def generate_elan(elan, audio): for j in range(80): i = j + 1 if os.path.isfile(elan + 'new_elan' + str(i) + '.eaf'): count = 0 tree = pr(elan + 'new_elan' + str(i) + '.eaf') annotationTree = tree.documentElement headers = annotationTree.getElementsByTagName("HEADER") for header in headers: media = header.getElementsByTagName("MEDIA_DESCRIPTOR") for med in media: if count == 0: if med.hasAttribute( "MEDIA_URL" ) and not med.hasAttribute("EXTRACTED_FROM"): count = 1 name = "file:///" + audio + str(i) + ".mp4" med.setAttribute("MEDIA_URL", name) if med.hasAttribute( "RELATIVE_MEDIA_URL" ) and not med.hasAttribute("EXTRACTED_FROM"): count = 1 name = "./" + str(i) + ".mp4" med.setAttribute("RELATIVE_MEDIA_URL", name) else: if med.hasAttribute("EXTRACTED_FROM"): name = "file:///" + audio + str(i) + ".wav" med.setAttribute("EXTRACTED_FROM", name) if med.hasAttribute("MEDIA_URL"): name = "file:///" + audio + str(i) + ".wav" med.setAttribute("MEDIA_URL", name) if med.hasAttribute("RELATIVE_MEDIA_URL"): name = "./" + str(i) + ".wav" med.setAttribute("RELATIVE_MEDIA_URL", name) open(elan + 'elan' + str(i) + '.eaf', 'w').close() new_efname = elan + 'elan' + str(i) + '.eaf' tree.writexml(open(new_efname, 'w'), indent=" ", addindent=" ", newl='\n')
def getElanTime(efname): tree = pr(efname) annotationTree = tree.documentElement timeslot = None timeslot2 = None t = None t2 = None #go to the notes tier tiers = annotationTree.getElementsByTagName("TIER") for tier in tiers: if tier.getAttribute("TIER_ID") == "default": #"notes": annotations = tier.getElementsByTagName("ANNOTATION") for annotation in annotations: align_annotations = annotation.getElementsByTagName( "ALIGNABLE_ANNOTATION") for align_annotation in align_annotations: annotation_value = align_annotation.getElementsByTagName( "ANNOTATION_VALUE")[0] if "start_Being_Heard_one" in annotation_value.childNodes[ 0].data: timeslot = align_annotation.getAttribute( "TIME_SLOT_REF1") if "start_Being_Heard_two" in annotation_value.childNodes[ 0].data: timeslot2 = align_annotation.getAttribute( "TIME_SLOT_REF1") print("Found start_Being_Heard_one annotation at ", timeslot) if timeslot2 is not None: print("Found start_Being_Heard_two annotation at ", timeslot2) #go to the time tier times = annotationTree.getElementsByTagName("TIME_ORDER") for time in times: time_slots = time.getElementsByTagName("TIME_SLOT") for time_slot in time_slots: if time_slot.getAttribute("TIME_SLOT_ID") == timeslot: t = time_slot.getAttribute("TIME_VALUE") if time_slot.getAttribute("TIME_SLOT_ID") == timeslot2: t2 = time_slot.getAttribute("TIME_VALUE") print("Elan annotation times - ", t, "", t2) return t, t2
def generate_elan(dirc): tree = pr('elan.eaf') annotationTree = tree.documentElement for j in range(80): i = j + 1 audiofile = dirc + str(i) + ".wav" if os.path.isfile(audiofile): headers = annotationTree.getElementsByTagName("HEADER") for header in headers: media = header.getElementsByTagName("MEDIA_DESCRIPTOR") for med in media: if med.hasAttribute("MEDIA_URL") and not med.hasAttribute( "EXTRACTED_FROM"): name = "file:///" + dirc + str(i) + ".mp4" med.setAttribute("MEDIA_URL", name) if med.hasAttribute( "RELATIVE_MEDIA_URL" ) and not med.hasAttribute("EXTRACTED_FROM"): name = "./" + str(i) + ".mp4" med.setAttribute("RELATIVE_MEDIA_URL", name) if med.hasAttribute("EXTRACTED_FROM"): name = "file:///" + dirc + str(i) + ".mp4" med.setAttribute("EXTRACTED_FROM", name) if med.hasAttribute("MEDIA_URL") and med.hasAttribute( "EXTRACTED_FROM"): name = "file:///" + dirc + str(i) + ".wav" med.setAttribute("MEDIA_URL", name) if med.hasAttribute( "RELATIVE_MEDIA_URL") and med.hasAttribute( "EXTRACTED_FROM"): name = "./" + str(i) + ".wav" med.setAttribute("RELATIVE_MEDIA_URL", name) new_efname = dirc + 'elan' + str(i) + '.eaf' tree.writexml(open(new_efname, 'w'), indent=" ", addindent=" ", newl='\n')
def fixElanOffest(efname, dirc, offset): tree = pr(dirc + "/" + efname) annotationTree = tree.documentElement #go to the time tier times = annotationTree.getElementsByTagName("TIME_ORDER") for time in times: time_slots = time.getElementsByTagName("TIME_SLOT") for time_slot in time_slots: if int(time_slot.getAttribute("TIME_VALUE")) + offset > 0: time_slot.setAttribute( "TIME_VALUE", str(int(time_slot.getAttribute("TIME_VALUE")) + offset)) new_efname = dirc + "/" + 'new_' + efname tree.writexml(open(new_efname, 'w'), indent=" ", addindent=" ", newl='\n') print("Fixed offfset output at ", new_efname) return
def main(args): """ parse command like argument""" parser = argparse.ArgumentParser() parser.add_argument("-b", "--block") parser.add_argument("-f", "--file", nargs ='*') parser.add_argument("-m", "--facet", nargs = '*') parser.add_argument("-s", "--start", nargs ='*') parser.add_argument("-e", "--end", nargs ='*') parser.add_argument('--combine', dest='combine', action='store_true') parser.add_argument('--no-combine', dest='combine', action='store_false') parser.set_defaults(combine=True) parser.add_argument('--weak', dest='weak', action='store_true') parser.add_argument('--no-weak', dest='weak', action='store_false') parser.set_defaults(weak=False) parser.add_argument('--all', dest='all', action='store_true') parser.add_argument('--only-human', dest='all', action='store_false') parser.set_defaults(all=True) parser.add_argument('--mean-normalize', dest='mean', action='store_true') parser.add_argument('--no-mean-normalize', dest='mean', action='store_false') parser.set_defaults(mean=True) parser.add_argument('--global-mean', dest='global_mean', action='store_true') parser.add_argument('--no-global-mean', dest='global_mean', action='store_false') parser.set_defaults(mean=False) parser.add_argument('--binary', dest='binary', action='store_true') parser.add_argument('--no-binary', dest='binary', action='store_false') parser.set_defaults(mean=False) args = parser.parse_args() if args.block is not None: block = int(args.block) else: block = 1000 filename = args.file print(filename) facet_file = args.facet combine = args.combine weak = args.weak all = args.all mean = args.mean global_mean = args.global_mean binary = args.binary start_time_list = args.start if args.start is not None else None end_time_list = args.end if args.end is not None else None print("file =", filename, "facet file =", facet_file, "block =", block, "combine =", combine, "weak =", weak) time_dict_list = [] emotion_kappa = [] total = len(filename) for i in range(total): print(filename[i]) tree = pr(filename[i]) annotationTree = tree.documentElement tiers = annotationTree.getElementsByTagName("TIER") #put time tier to a dictionary ts_dict = {} times = annotationTree.getElementsByTagName("TIME_ORDER") for time in times: time_slots = time.getElementsByTagName("TIME_SLOT") for time_slot in time_slots: ts_dict[time_slot.getAttribute("TIME_SLOT_ID")] = time_slot.getAttribute("TIME_VALUE") time_dict_list.append(ts_dict) #get start and end time if start_time_list is None or end_time_list is None: start_time_list = [] end_time_list = [] for i in range(total): ts_dict = time_dict_list[i] start_time_slot = None end_time_slot = None #go to the Shamya_looking-at-screenTier tier tree = pr(filename[i]) annotationTree = tree.documentElement tiers = annotationTree.getElementsByTagName("TIER") for tier in tiers: if tier.getAttribute("TIER_ID") == "Shamya_looking-at-screenTier": annotations = tier.getElementsByTagName("ANNOTATION") for annotation in annotations: align_annotations = annotation.getElementsByTagName("ALIGNABLE_ANNOTATION") for align_annotation in align_annotations: if start_time_slot is None: start_time_slot = align_annotation.getAttribute("TIME_SLOT_REF1") end_time_slot = align_annotation.getAttribute("TIME_SLOT_REF2") print("start_time_slot = ", start_time_slot, " end_time_slot = ", end_time_slot) #pick start time start_time_list.append(int(ts_dict[start_time_slot])) end_time_list.append(int(ts_dict[end_time_slot])) if global_mean is True: print("Finding global mean...") global_mean_neutral, global_mean_other = get_global_mean(facet_file, start_time_list, end_time_list) else: global_mean_neutral = None global_mean_other = None for i in range(total): print(filename[i]) tree = pr(filename[i]) annotationTree = tree.documentElement tiers = annotationTree.getElementsByTagName("TIER") #put time tier to a dictionary ts_dict = time_dict_list[i] start_time = int(start_time_list[i]) end_time = int(end_time_list[i]) print("start_time = ", start_time, " end_time = ", end_time) #Shamya annotations shamya_ts = get_annotations(tiers, "Shamya_EmotionTier") print("shamya ts ", shamya_ts) shamya_full_ts = transform_annotation(shamya_ts, start_time, end_time,ts_dict) print("shamya full ts ", shamya_full_ts) #Mark annotations mark_ts = get_annotations(tiers, "Mark_EmotionTier") print("mark ts", mark_ts) mark_full_ts = transform_annotation(mark_ts, start_time, end_time,ts_dict) print("mark full ts", mark_full_ts) #get annotation based on the block size shamya_annot = regularize_annotations(shamya_full_ts, block, weak) print("shamya block sized annot", shamya_annot) mark_annot = regularize_annotations(mark_full_ts, block, weak) print("mark block sized annot", mark_annot) if all is True: print(facet_file[i]) #get FACET data #Mean Normalized if mean is True: if combine is True: #has to be separate as mean of max would differ with/without the confusion column facet = get_facet_data_combined(facet_file[i], start_time, end_time, global_mean_neutral, global_mean_other) else: facet = get_facet_data_all(facet_file[i], start_time, end_time, global_mean_neutral, global_mean_other) else: facet = get_facet_data(facet_file[i], start_time, end_time) print("facet full", facet[:10]) if binary is True: get_binary_emotion(facet) facet_annot = regularize_annotations(facet, block, weak) print("facet block sized annot", facet_annot) #print("FACET - Neutral =", facet_annot.count('Neutral'), "Confused =", facet_annot.count('Confused'), "Other =", facet_annot.count('Other'), "Skip =", facet_annot.count('Skip')) #print("SHAMYA - Neutral =", shamya_annot.count('Neutral'), "Confused =", shamya_annot.count('Confused'), "Other =", shamya_annot.count('Other'), "Skip =", shamya_annot.count('Skip')) #print("MARK - Neutral =", mark_annot.count('Neutral'), "Confused =", mark_annot.count('Confused'), "Other =", mark_annot.count('Other'), "Skip =", mark_annot.count('Skip')) #Delete skips and combine annotations if all is True: #change [s,m,f] if you need pairwise [m,f] or [s,f] annot = [[s,m,f] for s,m,f in zip(shamya_annot,mark_annot, facet_annot) if s != "Skip" and m != "Skip"] else: annot = [[s,m] for s,m in zip(shamya_annot,mark_annot) if s != "Skip" and m != "Skip"] #[Neutral, Confused, Other] emot = [[0,0,0] for i in range((len(annot)))] #update kappa matrix emot = update_kappa_matrix(annot, emot) emotion_kappa = emotion_kappa + emot #if combine merge other and confused if combine is True: emotion_kappa = [[l[0], (l[1]+l[2])] for l in emotion_kappa] print(len(emotion_kappa)) #compute fleiss' kappa kappa = fleissKappa(emotion_kappa) print(kappa)
def tweetsFromFile(filename): tree = pr(filename) tweets_tree = tree.documentElement tweets = tweets_tree.getElementsByTagName("tweet") return tweets