Ejemplo n.º 1
0
def generate_elan(elan, audio):

    for j in range(80):
        i = j + 1

        if os.path.isfile(elan + 'new_elan' + str(i) + '.eaf'):
            count = 0
            tree = pr(elan + 'new_elan' + str(i) + '.eaf')
            annotationTree = tree.documentElement
            headers = annotationTree.getElementsByTagName("HEADER")

            for header in headers:
                media = header.getElementsByTagName("MEDIA_DESCRIPTOR")

                for med in media:
                    if count == 0:
                        if med.hasAttribute(
                                "MEDIA_URL"
                        ) and not med.hasAttribute("EXTRACTED_FROM"):
                            count = 1
                            name = "file:///" + audio + str(i) + ".mp4"
                            med.setAttribute("MEDIA_URL", name)

                        if med.hasAttribute(
                                "RELATIVE_MEDIA_URL"
                        ) and not med.hasAttribute("EXTRACTED_FROM"):
                            count = 1
                            name = "./" + str(i) + ".mp4"
                            med.setAttribute("RELATIVE_MEDIA_URL", name)
                    else:
                        if med.hasAttribute("EXTRACTED_FROM"):
                            name = "file:///" + audio + str(i) + ".wav"
                            med.setAttribute("EXTRACTED_FROM", name)

                        if med.hasAttribute("MEDIA_URL"):
                            name = "file:///" + audio + str(i) + ".wav"
                            med.setAttribute("MEDIA_URL", name)

                        if med.hasAttribute("RELATIVE_MEDIA_URL"):
                            name = "./" + str(i) + ".wav"
                            med.setAttribute("RELATIVE_MEDIA_URL", name)

            open(elan + 'elan' + str(i) + '.eaf', 'w').close()
            new_efname = elan + 'elan' + str(i) + '.eaf'
            tree.writexml(open(new_efname, 'w'),
                          indent="  ",
                          addindent="  ",
                          newl='\n')
def getElanTime(efname):
    tree = pr(efname)
    annotationTree = tree.documentElement
    timeslot = None
    timeslot2 = None
    t = None
    t2 = None

    #go to the notes tier
    tiers = annotationTree.getElementsByTagName("TIER")
    for tier in tiers:
        if tier.getAttribute("TIER_ID") == "default":  #"notes":
            annotations = tier.getElementsByTagName("ANNOTATION")
            for annotation in annotations:
                align_annotations = annotation.getElementsByTagName(
                    "ALIGNABLE_ANNOTATION")
                for align_annotation in align_annotations:
                    annotation_value = align_annotation.getElementsByTagName(
                        "ANNOTATION_VALUE")[0]
                    if "start_Being_Heard_one" in annotation_value.childNodes[
                            0].data:
                        timeslot = align_annotation.getAttribute(
                            "TIME_SLOT_REF1")
                    if "start_Being_Heard_two" in annotation_value.childNodes[
                            0].data:
                        timeslot2 = align_annotation.getAttribute(
                            "TIME_SLOT_REF1")

    print("Found start_Being_Heard_one annotation at ", timeslot)
    if timeslot2 is not None:
        print("Found start_Being_Heard_two annotation at ", timeslot2)

    #go to the time tier
    times = annotationTree.getElementsByTagName("TIME_ORDER")
    for time in times:
        time_slots = time.getElementsByTagName("TIME_SLOT")
        for time_slot in time_slots:
            if time_slot.getAttribute("TIME_SLOT_ID") == timeslot:
                t = time_slot.getAttribute("TIME_VALUE")

            if time_slot.getAttribute("TIME_SLOT_ID") == timeslot2:
                t2 = time_slot.getAttribute("TIME_VALUE")

    print("Elan annotation times - ", t, "", t2)
    return t, t2
Ejemplo n.º 3
0
def generate_elan(dirc):
    tree = pr('elan.eaf')
    annotationTree = tree.documentElement

    for j in range(80):
        i = j + 1
        audiofile = dirc + str(i) + ".wav"
        if os.path.isfile(audiofile):
            headers = annotationTree.getElementsByTagName("HEADER")

            for header in headers:
                media = header.getElementsByTagName("MEDIA_DESCRIPTOR")

                for med in media:
                    if med.hasAttribute("MEDIA_URL") and not med.hasAttribute(
                            "EXTRACTED_FROM"):
                        name = "file:///" + dirc + str(i) + ".mp4"
                        med.setAttribute("MEDIA_URL", name)

                    if med.hasAttribute(
                            "RELATIVE_MEDIA_URL"
                    ) and not med.hasAttribute("EXTRACTED_FROM"):
                        name = "./" + str(i) + ".mp4"
                        med.setAttribute("RELATIVE_MEDIA_URL", name)

                    if med.hasAttribute("EXTRACTED_FROM"):
                        name = "file:///" + dirc + str(i) + ".mp4"
                        med.setAttribute("EXTRACTED_FROM", name)

                    if med.hasAttribute("MEDIA_URL") and med.hasAttribute(
                            "EXTRACTED_FROM"):
                        name = "file:///" + dirc + str(i) + ".wav"
                        med.setAttribute("MEDIA_URL", name)

                    if med.hasAttribute(
                            "RELATIVE_MEDIA_URL") and med.hasAttribute(
                                "EXTRACTED_FROM"):
                        name = "./" + str(i) + ".wav"
                        med.setAttribute("RELATIVE_MEDIA_URL", name)

                        new_efname = dirc + 'elan' + str(i) + '.eaf'
                        tree.writexml(open(new_efname, 'w'),
                                      indent="  ",
                                      addindent="  ",
                                      newl='\n')
def fixElanOffest(efname, dirc, offset):
    tree = pr(dirc + "/" + efname)
    annotationTree = tree.documentElement

    #go to the time tier
    times = annotationTree.getElementsByTagName("TIME_ORDER")
    for time in times:
        time_slots = time.getElementsByTagName("TIME_SLOT")
        for time_slot in time_slots:
            if int(time_slot.getAttribute("TIME_VALUE")) + offset > 0:
                time_slot.setAttribute(
                    "TIME_VALUE",
                    str(int(time_slot.getAttribute("TIME_VALUE")) + offset))

    new_efname = dirc + "/" + 'new_' + efname
    tree.writexml(open(new_efname, 'w'),
                  indent="  ",
                  addindent="  ",
                  newl='\n')
    print("Fixed offfset output at ", new_efname)

    return
Ejemplo n.º 5
0
def main(args):
    """ parse command like argument"""
    parser = argparse.ArgumentParser()
    parser.add_argument("-b", "--block")
    parser.add_argument("-f", "--file", nargs ='*')
    parser.add_argument("-m", "--facet", nargs = '*')
    parser.add_argument("-s", "--start", nargs ='*')
    parser.add_argument("-e", "--end", nargs ='*')
    
    parser.add_argument('--combine', dest='combine', action='store_true')
    parser.add_argument('--no-combine', dest='combine', action='store_false')
    parser.set_defaults(combine=True)
 
    parser.add_argument('--weak', dest='weak', action='store_true')
    parser.add_argument('--no-weak', dest='weak', action='store_false')
    parser.set_defaults(weak=False)   

    parser.add_argument('--all', dest='all', action='store_true')
    parser.add_argument('--only-human', dest='all', action='store_false')
    parser.set_defaults(all=True)
 
    parser.add_argument('--mean-normalize', dest='mean', action='store_true')
    parser.add_argument('--no-mean-normalize', dest='mean', action='store_false')
    parser.set_defaults(mean=True)
    
    parser.add_argument('--global-mean', dest='global_mean', action='store_true')
    parser.add_argument('--no-global-mean', dest='global_mean', action='store_false')
    parser.set_defaults(mean=False)
    
    parser.add_argument('--binary', dest='binary', action='store_true')
    parser.add_argument('--no-binary', dest='binary', action='store_false')
    parser.set_defaults(mean=False)
    
    
    args = parser.parse_args()
    
    if args.block is not None:
        block = int(args.block)
    else:
        block = 1000
    
    filename = args.file
    print(filename)
    facet_file = args.facet
    combine = args.combine
    weak = args.weak 
    all = args.all
    mean = args.mean
    global_mean = args.global_mean
    binary = args.binary
    start_time_list = args.start if args.start is not None else None
    end_time_list = args.end if args.end is not None else None
    
    print("file =", filename, "facet file =", facet_file, "block =", block, "combine =", combine, "weak =", weak)
    time_dict_list = []
    emotion_kappa = []
        
    total = len(filename)
    
    for i in range(total):
        print(filename[i])
        tree = pr(filename[i])
        annotationTree = tree.documentElement
        tiers = annotationTree.getElementsByTagName("TIER")
        #put time tier to a dictionary
        ts_dict = {}
                
        times = annotationTree.getElementsByTagName("TIME_ORDER")
        for time in times:
            time_slots = time.getElementsByTagName("TIME_SLOT")
            for time_slot in time_slots:
                ts_dict[time_slot.getAttribute("TIME_SLOT_ID")] = time_slot.getAttribute("TIME_VALUE")

        time_dict_list.append(ts_dict)
    
    #get start and end time
    if start_time_list is None or end_time_list is None:
        start_time_list = []
        end_time_list = []
        
        for i in range(total):
            ts_dict = time_dict_list[i]
            start_time_slot = None
            end_time_slot = None
            #go to the Shamya_looking-at-screenTier tier
            tree = pr(filename[i])
            annotationTree = tree.documentElement
            tiers = annotationTree.getElementsByTagName("TIER")
            for tier in tiers:
                if tier.getAttribute("TIER_ID") == "Shamya_looking-at-screenTier":
                    annotations = tier.getElementsByTagName("ANNOTATION")
                    for annotation in annotations:
                        align_annotations = annotation.getElementsByTagName("ALIGNABLE_ANNOTATION")
                        for align_annotation in align_annotations:
                            if start_time_slot is None:
                                start_time_slot = align_annotation.getAttribute("TIME_SLOT_REF1")
                            end_time_slot = align_annotation.getAttribute("TIME_SLOT_REF2")
                
            print("start_time_slot = ", start_time_slot, " end_time_slot = ", end_time_slot)
                
            #pick start time
            start_time_list.append(int(ts_dict[start_time_slot]))
            end_time_list.append(int(ts_dict[end_time_slot]))
     
        
    if global_mean is True:
        print("Finding global mean...")
        global_mean_neutral, global_mean_other = get_global_mean(facet_file, start_time_list, end_time_list)
    else:
        global_mean_neutral = None
        global_mean_other = None
    
    
    for i in range(total): 
        print(filename[i])
        tree = pr(filename[i])
        annotationTree = tree.documentElement
        tiers = annotationTree.getElementsByTagName("TIER")
        #put time tier to a dictionary
        ts_dict = time_dict_list[i]
                      
        start_time = int(start_time_list[i])
        end_time = int(end_time_list[i])

        print("start_time = ", start_time, " end_time = ", end_time)
        
        #Shamya annotations
        shamya_ts = get_annotations(tiers, "Shamya_EmotionTier")
        print("shamya ts ", shamya_ts)
        shamya_full_ts = transform_annotation(shamya_ts, start_time, end_time,ts_dict)
        print("shamya full ts ", shamya_full_ts)
        
        #Mark annotations
        mark_ts = get_annotations(tiers, "Mark_EmotionTier")
        print("mark ts", mark_ts)
        mark_full_ts = transform_annotation(mark_ts, start_time, end_time,ts_dict)
        print("mark full ts", mark_full_ts)
        
        
        #get annotation based on the block size
        shamya_annot = regularize_annotations(shamya_full_ts, block, weak)
        print("shamya block sized annot", shamya_annot)
        mark_annot = regularize_annotations(mark_full_ts, block, weak)
        print("mark block sized annot", mark_annot)
        
        if all is True:
            print(facet_file[i])
            
            #get FACET data
            #Mean Normalized
            if mean is True:                    
                if combine is True:
                    #has to be separate as mean of max would differ with/without the confusion column
                    facet = get_facet_data_combined(facet_file[i], start_time, end_time, global_mean_neutral, global_mean_other)
                else:
                    facet = get_facet_data_all(facet_file[i], start_time, end_time, global_mean_neutral, global_mean_other)
            else:
                facet = get_facet_data(facet_file[i], start_time, end_time)
                
            print("facet full", facet[:10])
            
            if binary is True:
                get_binary_emotion(facet)
            
            facet_annot = regularize_annotations(facet, block, weak)
            print("facet block sized annot", facet_annot)
            
            #print("FACET - Neutral =", facet_annot.count('Neutral'), "Confused =", facet_annot.count('Confused'), "Other =", facet_annot.count('Other'), "Skip =", facet_annot.count('Skip'))
            
        #print("SHAMYA - Neutral =", shamya_annot.count('Neutral'), "Confused =", shamya_annot.count('Confused'), "Other =", shamya_annot.count('Other'), "Skip =", shamya_annot.count('Skip'))
        #print("MARK - Neutral =", mark_annot.count('Neutral'), "Confused =", mark_annot.count('Confused'), "Other =", mark_annot.count('Other'), "Skip =", mark_annot.count('Skip'))
        
        #Delete skips and combine annotations
        if all is True:
            #change [s,m,f] if you need pairwise [m,f] or [s,f]
            annot = [[s,m,f] for s,m,f in zip(shamya_annot,mark_annot, facet_annot) if s != "Skip" and m != "Skip"]
        else:
            annot = [[s,m] for s,m in zip(shamya_annot,mark_annot) if s != "Skip" and m != "Skip"]
        
        #[Neutral, Confused, Other]
        emot = [[0,0,0] for i in range((len(annot)))]
        
        #update kappa matrix
        emot = update_kappa_matrix(annot, emot)    
        
        emotion_kappa = emotion_kappa + emot
      
    #if combine merge other and confused
    if combine is True:
        emotion_kappa = [[l[0], (l[1]+l[2])] for l in emotion_kappa]
                    
    print(len(emotion_kappa))
    
    #compute fleiss' kappa
    kappa = fleissKappa(emotion_kappa)
    print(kappa)
def tweetsFromFile(filename):
    tree = pr(filename)
    tweets_tree = tree.documentElement
    tweets = tweets_tree.getElementsByTagName("tweet")
    return tweets