import argparse import utils from FCECorpusHandler import FCECorpusHandler """ Module to parse XML in Python using minidom """ if __name__ == '__main__': parser = argparse.ArgumentParser( description='Extracts transcript and summary from FCE Corpus.') parser.add_argument('--fce_xml_dir', type=str, default=utils.project_dir_name() + '/data/', help='FCE Corpus download directory') parser.add_argument('--results_dir', type=str, default=utils.project_dir_name() + '/data/fce_txt/', help='FCE Corpus txt format') args = parser.parse_args() fceCorpusHandler = FCECorpusHandler(args) fceCorpusHandler.get_train_dev_test_sets() fceCorpusHandler.xml_to_txt(data_type="train") fceCorpusHandler.xml_to_txt(data_type="dev") fceCorpusHandler.xml_to_txt(data_type="test")
import unittest import utils import os import pandas as pd from collections import defaultdict """ Handles .dat emotion information """ __author__ = "Gwena Cunha" params = {'root': utils.project_dir_name() + 'data_test/BMI/', 'seconds': 10} class DatEmotionHandler: def __init__(self, root_dir, dat_filename='intended_1.dat'): self.root_dir = root_dir self.dat_filename = dat_filename self.dat_file = utils.get_file(self.root_dir, self.dat_filename) self.dat_sentences = self.csv_data = None def dat_to_csv(self, csv_filename='intended_1.csv'): """ Transform .dat files into .csv files with time, valence_score_raw, and valence_score emotion information :param csv_filename: filename where dataframe will be saved to in csv format :return: """ self.csv_data = defaultdict(list) for i in range(0, len(self.dat_sentences)):
""" Test steps: takes a movie file, resizes, downsamples, does RGB2HSV color transformation and saves data in .npz, saves new video, loads that video for testing, loads .npz, converts from HSV2RGB, saves in restored video for testing. DONE: add emotion and text """ __author__ = "Gwena Cunha" params = { 'fps': 10, 'root': utils.project_dir_name() + 'data_test/', 'new_size': 100, 'sr': 16000 } def load_video(filename): # Load videos (fps = 30) clip = VideoFileClip(filename) # Resize to 100 x 100 clip_resized = clip.resize(newsize=(params['new_size'], params['new_size'])) print("clip: {}, resized: {}".format(clip.size, clip_resized.size)) # Downsample downsampled_frames, new_filename = utils.downsample_video(clip_resized, params, save_downsampled=True)
""" Pre-processes data considering already spliced video and audio only Synchronize video and audio TODO: add emotion and text """ __author__ = "Gwena Cunha" params = { 'fps': 10, # 'root': '/media/ceslea/DATA/VideoEmotion/DataWithEmotionTags_noText_correctedAudio_hsio/', 'root': utils.project_dir_name() + 'data/cognimuse_10secs/', 'new_size': 100, 'sr': 16000, 'results_dir': utils.project_dir_name() + 'data/cognimuse_10secs/', 'seconds': 10, 'audio_len': 10*16000, # seconds*sr } def load_video(filename, params_substitute=None): # To use in external scripts if params_substitute is not None: params = params_substitute # Load videos (fps = 30) clip = VideoFileClip(filename)
import utils import glob from natsort import natsorted import pandas as pd from import read from splices2npz import load_video, process_audio """ Pre-processes data considering already spliced video and audio only Synchronize video and audio """ __author__ = "Gwena Cunha" is_test = False if is_test: ROOT = utils.project_dir_name() + 'data/deap/test_data2/' else: ROOT = utils.project_dir_name() + 'data/deap30frames/mp4/' params = { 'fps': 10, 'root': ROOT, 'new_size': 100, # new frame size (100x100) 'sr': 16000, 'audio_len': 48000, 'results_dir': ROOT, 'seconds': 3, } def save_npz(videos, type='train',
LOR: 75 (0: 57, 1: 18), 1239.75secs 30 seconds (2D emotion) BMI: 62 (0: 18, 1: 42, 2: 2, 3: 0) CHI: 60 (0: 0, 1: 0, 2: 29, 3: 31) CRA: 53 (0: 23, 1: 29, 2: 0, 3: 1) DEP: 60 (0: 8, 1: 20, 2: 29, 3: 3) FNE: 60 (0: 3, 1: 6, 2: 46, 3: 5) GLA: 60 (0: 2, 1: 11, 2: 23, 3: 24) LOR: 75 (0: 22, 1: 35, 2: 11, 3: 7) """ __author__ = "Gwena Cunha" params = { 'root': utils.project_dir_name() + 'data/cognimuse_3secs/', #'data/cognimuse_multimodal_robust_bert/', 'sr': 16000, 'seconds': 3, 'num_samples': -1 # default = -1 for all samples. Use different to test code } def splice_video(video_clip, num_samples=-1, params_substitute=None): # To use in external scripts if params_substitute is not None: params = params_substitute # Load videos (fps = 30) vid_fps = round(video_clip.fps) vid_num_frames = round(vid_fps * video_clip.duration) vid_frames_per_splice = params['seconds'] * vid_fps print("Video - num frames: {}, size: {}, fps: {}, frames_per_splice: {}".
import argparse import utils from AMICorpusHandler import AMICorpusHandler """ Module to parse XML in Python using minidom """ if __name__ == '__main__': parser = argparse.ArgumentParser( description='Extracts transcript and summary from AMI Corpus.') parser.add_argument('--ami_xml_dir', type=str, default=utils.project_dir_name() + '/data/', help='AMI Corpus download directory') parser.add_argument('--results_transcripts_speaker_dir', type=str, default=utils.project_dir_name() + '/data/ami-transcripts-speaker/', help='AMI Corpus transcripts per speaker') parser.add_argument('--results_transcripts_dir', type=str, default=utils.project_dir_name() + '/data/ami-transcripts/', help='AMI Corpus transcripts') parser.add_argument('--results_summary_dir', type=str, default=utils.project_dir_name() + '/data/ami-summary/', help='AMI Corpus summaries') args = parser.parse_args() # print(args.ami_xml_dir)
Train (raw videos): same emotion as above, but number of splices per video is different: Em 1 (total=2,069): Vid 1 (46), 2 (67), 3 (77), 4 (92), 5 (40), 6 (70), 7 (48), 9 (90), -> 530 10 (83), 11 (76), 12 (72), 13 (61), 15 (23), 20 (78), 22 (78), -> 471 23 (78), 24 (79), 25 (89), 27 (112), 29 (90), 30 (69), 31 (95), -> 612 32 (75), 34 (79), 35 (59), 36 (66), 37 (104), 38 (73) -> 456 Em 0 (total=781): Vid 8 (70), 14 (73), 16 (74), 19 (55), 21 (94), -> 366 26 (82), 28 (63), 33 (60), 39 (124), 40 (86) -> 415 P.S.: Raw 21, 23 have compromised frames and splicing doesn't work. Solved by converting to .mov and back to .mp4 """ __author__ = "Gwena Cunha" is_test = False if is_test: ROOT = utils.project_dir_name() + 'data/deap/test_data/' else: ROOT = utils.project_dir_name() + 'data/deap_raw/mp4/' params = { 'root': ROOT, 'emotion_root': utils.project_dir_name() + 'data/deap_raw/', 'sr': 16000, 'seconds': 3, 'num_samples': -1 # default = -1 for all samples. Use different to test code } def get_num_video_splices(video_splices_dir): return len(glob.glob(video_splices_dir + '*.mp4'))
tree = ET.parse(filename) root = tree.getroot() # one specific item attribute print('Item #2 attribute:') print(root[0][1].attrib) # all item attributes print('\nAll attributes:') for elem in root: for subelem in elem: print(subelem.attrib) # one specific item's data print('\nItem #2 data:') print(root[0][1].text) # all items data print('\nAll item data:') for elem in root: for subelem in elem: print(subelem.text) if __name__ == '__main__': filename = utils.project_dir_name() + 'data/test.xml' # test_minidom(filename) test_element_tree(filename)
import pandas as pd import csv """ DEAP has scores from 32 participants for 40 videos. This script will output a .csv file with the average emotion scores from all 32 participants. Valence: 1 (most negative) - 9 (most positive) -> 5 is right in the middle [1, 5) = neg [5, 9] = pos DEAP scale: valence, arousal (continuous 1-9) - Russell's scale ( J. A. Russell, “A circumplex model of affect,” Journal of Personality and Social Psychology, vol. 39, no. 6, pp. 1161–1178, 1980) "Arousal can range from inactive (e.g. uninterested, bored) to active (e.g. alert, excited), whereas valence ranges from unpleasant (e.g. sad, stressed) to pleasant (e.g. happy, elated)" Source: """ __author__ = "Gwena Cunha" ROOT = utils.project_dir_name() + 'data/deap/' def get_emotion_score_dictionary(filename='data/deap/participant_ratings.csv'): with open(filename, newline='') as csvfile: reader = csv.DictReader(csvfile) # Fieldnames: Participant_id,Trial,Experiment_id,Start_time,Valence,Arousal,Dominance,Liking,Familiarity valence_dict = defaultdict(lambda: []) arousal_dict = defaultdict(lambda: []) for row in reader: # print(row['Trial'], row['Valence'], row['Arousal']) valence_dict[row['Trial']].append(float(row['Valence'])) arousal_dict[row['Trial']].append(float(row['Arousal'])) dict = {'valence': valence_dict, 'arousal': arousal_dict} return dict