filename_annotations = 'https://docs.google.com/\ spreadsheets/d/1Rqu1sJiD-ogc4a6R491JTiaYacptOTqh6DKqhwTa8NA/gviz/tq?tqx=out:csv&sheet=Template' #Retrieve labels df_annotations = pd.read_csv(filename_annotations, header=None).drop([0, 1, 2, 3]) video_names = set(df_annotations[1].values) dict = {} for (j, video) in enumerate(video_names): for i in range(1, 5): text_file = f'{video}_{i}.txt' label = get_annotations_video(filename_annotations, video, 'max')[2] gender = get_annotations_video(filename_annotations, video, 'max')[4] gender_bool = 1.0 if gender == 'H' else 0.0 group = j dict[text_file] = (label[i - 1], gender_bool, group) df_labels = pd.DataFrame.from_dict(dict, columns=['Label', 'Gender', 'Group'], orient='index') text_files = df_labels.index #Retrieve Word Embedding X_vect = np.load(file_word_embedding)
#videos_excluded = ['WIN_20210329_14_13_45_Pro','WIN_20210402_14_27_50_Pro'] #multi_feat = multi_feat.drop(videos_excluded,axis=0) #Retrieve labels df_annotations = pd.read_csv(filename_annotations, header=None).drop([0, 1, 2, 3]) diapos = [1, 8, 9, 10, 11, 12, 17, 18] video_names = set(df_annotations[1].values) dict = {} for video_name in video_names: labels = get_annotations_video(filename_annotations, video_name, 'max')[2] dict_diapo = {} for (i, diapo) in enumerate(diapos): dict_diapo[diapo] = labels[i] dict[video_name] = dict_diapo df_labels = pd.DataFrame.from_dict(dict, orient='index') df_labels = df_labels.stack() #df_labels = df_labels.drop(videos_excluded,axis=0) #Merge multi_feeatures and labels data = pd.concat([multi_feat, df_labels], axis=1)