예제 #1
0
def elan_to_df (filename):

	try:
		eaf = pympi.Eaf (filename,'utf-8')
		tiers = list (eaf.get_tier_names())
	except:
		print (filename)

	for idx, name in enumerate(tiers):
		
		data = eaf.get_annotation_data_for_tier (name)
		

		if name != 'default':
			if name.startswith('Parent'):
				print (data)

		'''
		try:
			print (data, name)
		except:
			im,jm,km=data[1]
			continue
		'''
		#if name == 'Parent':
		#	print (data)
		
	#print (tiers)
	return 0
예제 #2
0
def extract_videos_from_annotations_colab(video_name, eaf_file_name, gloss_list):
    """
    Function to extract videos from eaf annotations.
    Additionally it creates folders with the extracted frames for each video.
    """

    def check_folders(gl_name):
        directory1 = "openpose/" + gl_name + "/"
        if not os.path.exists(directory1):
            os.makedirs(directory1)

    file = pympi.Eaf(file_path=eaf_file_name)
    tier_names = file.get_tier_names()

    for tier_name in tier_names:
        annotations = file.get_annotation_data_for_tier(tier_name)
        count = 0
        for annotation in annotations:
            for gloss in gloss_list:
                if annotation[2] == gloss:
                    start = annotation[0]
                    end = annotation[1]
                    print(start / 1000, end / 1000)
                    check_folders(gloss)
                    ffmpeg_extract_subclip(video_name, start / 1000, end / 1000,
                                           targetname="openpose/" + str(gloss) + "/" + "%#05d.mp4" % (count + 1))
                    # Comment next line if you don't want to extract the frames for each video
                    # video_to_frames("Data/"+str(gloss)+"/Videos/"+"%#05d.mp4" % (count+1), "Data/"+str(gloss)+"/"+"%#05d" % (count+1) )
                    count = count + 1
        if count == 0:
            print("No annotation found with this name")
예제 #3
0
    def extract_one(self):
        options = QFileDialog.Options()
        options |= QFileDialog.DontUseNativeDialog
        path = QFileDialog.getExistingDirectory(
            parent=self,
            caption='Select directory to save extracted video',
        )
        if path:
            print(path)
        selected_file = pympi.Eaf(file_path=self.labl.text())
        annotations2 = selected_file.get_annotation_data_for_tier(
            self.all_label.text())
        video_name = self.video_label.text()
        for annotation in annotations2:
            # print(self.ann_label.text())
            if annotation[2] == self.ann_label.text():
                start = annotation[0]
                end = annotation[1]
                # print(start/1000,end/1000)

                ffmpeg_extract_subclip(video_name,
                                       start / 1000,
                                       end / 1000,
                                       targetname=path + '/' +
                                       str(self.ann_label.text()) + ".mp4")
예제 #4
0
    def to_print(self, text):
        self.all_label.setText(str(text))
        self.all_label.adjustSize()
        self.all_label.move(200, 150)
        self.all_label.hide()

        # add all the annotations to the other combo box
        my_file = pympi.Eaf(file_path=self.labl.text())
        my_annotations = my_file.get_annotation_data_for_tier(str(text))
        self.combo_annotations.clear()
        for annotatio in my_annotations:
            self.combo_annotations.addItem(str(annotatio[2]))
예제 #5
0
def extract_videos_from_annotations(root, gloss_list):
    """
    Function to extract videos from eaf annotations.
    Additionally it creates folders with the extracted frames for each video.
    Specify the path of the eaf file and the gloss list.
    ex. ("./Raw_videos/original_video",["NS", "1H", "2H"])
    Make sure the eaf file has the same name as the video
    """
    def check_folders(gl_name):
        directory1 = "./Data/" + gl_name + "/Videos/"
        if not os.path.exists(directory1):
            os.makedirs(directory1)

    # Find the eaf file
    cwd = os.getcwd()
    root = str(cwd)
    pattern = "*.eaf"

    for root, dirs, files in os.walk(root, topdown=False):
        for name in files:
            if fnmatch(name, pattern):
                video_name = re.sub('\.eaf$', '', name) + ".mp4"
                print(video_name)
                video_name = root + "/" + video_name
                file = pympi.Eaf(file_path=root + "/" + name)
                tier_names = file.get_tier_names()

                for tier_name in tier_names:
                    annotations = file.get_annotation_data_for_tier(tier_name)
                    count = 0
                    for annotation in annotations:
                        for gloss in gloss_list:
                            if annotation[2] == gloss:
                                start = annotation[0]
                                end = annotation[1]
                                print(start / 1000, end / 1000)
                                check_folders(gloss)
                                ffmpeg_extract_subclip(
                                    video_name,
                                    start / 1000,
                                    end / 1000,
                                    targetname="Data/" + str(gloss) +
                                    "/Videos/" + "%#05d.mp4" % (count + 1))
                                # Comment next line if you don't want to extract the frames for each video
                                video_to_frames(
                                    "Data/" + str(gloss) + "/Videos/" +
                                    "%#05d.mp4" % (count + 1), "Data/" +
                                    str(gloss) + "/" + "%#05d" % (count + 1))
                                count = count + 1
                    if count == 0:
                        print("No annotation found with this name")
예제 #6
0
def process(file, aclew_data, t=5, start=30, n=5):
    global selected
    length = int(float(aclew_data.length_of_recording))

    eaf = pympi.Eaf(file)

    existing_nums = [
        int(x) for _, _, x in eaf.get_annotation_data_for_tier("code_num")
    ]
    existing_nums.sort()
    last_n = existing_nums[-1]
    new_n = last_n + 1

    existing = [(x / 60000, y / 60000)
                for x, y, _ in eaf.get_annotation_data_for_tier('context')]

    minute_range = range(start, length - t)
    shuffle(minute_range)
    i = 0
    for x in minute_range:
        if i == n:
            break
        if not overlaps(x, existing, t):
            existing.append((x, x + 5))
            i += 1

    rand_ints = list(range(new_n, new_n + n))
    shuffle(rand_ints)

    for x in zip(existing[-n:], rand_ints):
        ts = ((x[0][0] + 2) * 60000), ((x[0][1] - 1) * 60000)
        eaf.add_annotation("code", ts[0], ts[1])
        eaf.add_annotation("code_num", ts[0], ts[1], value=str(x[1]))
        eaf.add_annotation("on_off",
                           ts[0],
                           ts[1],
                           value="{}_{}".format(ts[0], ts[1]))
        eaf.add_annotation("context", ts[0] - 120000, ts[1] + 60000)
        eaf.add_tier("notes")
        selected = selected.append(
            {
                'aclew_id': aclew_data.aclew_id,
                'corpus': aclew_data.corpus,
                'clip_num': x[1],
                'onset': ts[0],
                'offset': ts[1]
            },
            ignore_index=True)
        eaf.to_file(os.path.join(output_dir, os.path.basename(file)))
예제 #7
0
 def on_click(self):
     options = QFileDialog.Options()
     options |= QFileDialog.DontUseNativeDialog
     fileName, _ = QFileDialog.getOpenFileName(
         None,
         "Choose a file",
         "",
         "All Files (*);;Python Files (*.py)",
         options=options)
     if fileName:
         self.combo.clear()
         self.labl.setText(str(fileName))
         self.labl.adjustSize()
         self.labl.move(190, 16)
         self.file_eaf = pympi.Eaf(file_path=fileName)
         self.tier_names = self.file_eaf.get_tier_names()
         for tier_name in self.tier_names:
             self.combo.addItem(str(tier_name))
예제 #8
0
    def all_annotations(self):
        # print(self.all_label.text())
        options = QFileDialog.Options()
        options |= QFileDialog.DontUseNativeDialog
        path = QFileDialog.getExistingDirectory(
            parent=self,
            caption='Select directory to save extracted videos',
        )
        if path:
            print(path)
        selected_file = pympi.Eaf(file_path=self.labl.text())
        annotations = selected_file.get_annotation_data_for_tier(
            self.all_label.text())
        video_name = self.video_label.text()
        count = 0
        for annotation in annotations:
            # t1 = annotation[0]/1000
            # t2 = annotation[1]/1000
            # input_video_path = video_name
            # output_video_path = path+'/'+str(annotation[0])+".mp4"
            # print(output_video_path)

            # with VideoFileClip(input_video_path) as video:
            #     new = video.subclip(t_start= t1)
            #     new.write_videofile(filename=output_video_path, audio_codec='aac')

            start = annotation[0]
            end = annotation[1]
            print(start / 1000, end / 1000)

            ffmpeg_extract_subclip(video_name,
                                   start / 1000,
                                   end / 1000,
                                   targetname=path + '/' + str(annotation[2]) +
                                   '_' + str(count) + ".mp4")
            # Comment next line if you don't want to extract the frames for each video
            # video_to_frames("Data/"+str(gloss)+"/Videos/"+"%#05d.mp4" % (count+1), "Data/"+str(gloss)+"/"+"%#05d" % (count+1) )
            count = count + 1
        print("FINISHED")
예제 #9
0
        out.write(frame)  # Write out frame to video

        cv2.imshow('video', frame)
        if (cv2.waitKey(1) & 0xFF) == ord('q'):  # Hit `q` to exit
            break

    # Release everything if job is finished
    out.release()
    cv2.destroyAllWindows()

    print("The output video is {}".format(output))

    # Create annotations based on predictions array

    x = pympi.Eaf(author="Manolis")
    x.add_tier(tier_id="tier1")
    p = np.asarray(predictions_array)
    s = np.asarray(p)
    size = s.size
    # print(size)
    # print(size)
    st = 0
    end = 0
    arr = []
    j = 0
    i = 0

    # master_boolean = True
    # print("strarting values: " + str(st))
    while i < size - 1:
import pympi
import re

elan_file = pympi.Eaf("kpv_izva20150705-02-b.eaf")

tiers = elan_file.get_tier_ids_for_linguistic_type('orthT')

utterances = []

for tier in tiers:

    annotations = elan_file.get_annotation_data_for_tier(tier)

    sent = []

    for a in annotations:

        if not re.findall(r"[\.!?…]$", a[2]):

            sent.append(a)

        else:

            sent.append(a)
            utterances.append(sent)
            sent = []

data = []

for u in utterances:
예제 #11
0
 def __init__(self, filename, config):
     self.__document = pympi.Eaf()
     self.__filename = filename
     self.__overwrite = bool(config.get('overwrite-output', False))
예제 #12
0
def check_folders():
    directory1 = "./Extracted_videos"
    if not os.path.exists(directory1):
        os.makedirs(directory1)


root = './'
pattern = "*.eaf"
for root, dirs, files in os.walk(root, topdown=False):
    for name in files:
        if fnmatch(name, pattern):
            if args.v == "default":
                video_name = re.sub('\.eaf$', '', name)+".mp4"
            else:
                video_name = args.v
            file = pympi.Eaf(file_path=name)
            tier_names = file.get_tier_names()
            gloss = args.g
            check_folders()
            for tier_name in tier_names:
                annotations = file.get_annotation_data_for_tier(tier_name)
                count = 0
                for annotation in annotations:
                    if annotation[2] == gloss:
                        start = annotation[0]
                        end = annotation[1]
                        print(start/1000,end/1000)
                        ffmpeg_extract_subclip(video_name, start/1000, end/1000, targetname="Extracted_videos/"+"%#05d.mp4" % (count+1))
                        count = count+1
                if count == 0:
                    print("No annotation found with this name")
예제 #13
0
            timestamps = timestamps[0][1:-1].split(",")
    if len(c) > 0:
        complete_transcript.insert(i, c)
        complete_speaker_data.insert(i, timestamps)
        i += 1


# ///////////////////////////////////////////////////////////////////////////////////////////////////////
# Loads ELAN file as an ELAN object
# Use MasterTemplateFinal if starting new annotation,
# otherwise load pre-processed file with hand-entered annotations
# ///////////////////////////////////////////////////////////////////////////////////////////////////////

#elan_file = "MasterTemplateFinal.etf"
elan_file = "Trial_" + str(trial_num) + "_" + str(trial_date) + '/Trial_'+ str(trial_num) +".eaf"
elan_obj = pympi.Eaf(elan_file)
pympi.Elan.parse_eaf(elan_file, elan_obj)

# ///////////////////////////////////////////////////////////////////////////////////////////////////////
# Make sure the lines are clear and fix the timestamps to the correct time
# Convert the IBM transcript to the error-checked version
# ///////////////////////////////////////////////////////////////////////////////////////////////////////

clear_dictated_lines()
correct_app_time(complete_speaker_data)
transcript = correct_IBM_to_error_checked(transcript, complete_transcript)

# ///////////////////////////////////////////////////////////////////////////////////////////////////////
# Fill in tiers, starting with Dictated/Typed errors and Corrections
# ///////////////////////////////////////////////////////////////////////////////////////////////////////
for i in range(1, len(app_conversation)):