def process_corpus(self, cutMedia=True): """ Take every eaf file from the source directory subtree, turn it into a parsed json and store it in the target directory. """ Txt2JSON.process_corpus(self) if not cutMedia: return mediaDir = os.path.join(self.corpusSettings['corpus_dir'], self.srcExt) if 'media_dir' in self.corpusSettings: mediaDir = self.corpusSettings['media_dir'] for path, dirs, files in os.walk(mediaDir): # Process video files first files = [fname for fname in files if fname.lower().endswith(('.avi', '.mts', '.mov'))] + \ [fname for fname in files if fname.lower().endswith('.mp4')] + \ [fname for fname in files if not fname.lower().endswith(('.avi', '.mts', '.mov', '.mp4'))] for fname in files: fileExt = os.path.splitext(fname.lower())[1] if fileExt in self.mediaExtensions: privacySegments = [] if fname in self.privacySegments: privacySegments = self.privacySegments[fname] fname = os.path.abspath(os.path.join(path, fname)) print('Cutting media file', fname) self.mc.cut_media(fname, usedFilenames=self.usedMediaFiles, privacySegments=privacySegments)
def process_corpus(self, cutMedia=False): """ Take every eaf file from the source directory subtree, turn it into a parsed json and store it in the target directory. """ Txt2JSON.process_corpus(self) if not cutMedia: return for path, dirs, files in os.walk(os.path.join('..', self.srcExt)): for fname in files: fileExt = os.path.splitext(fname.lower())[1] if fileExt in self.mediaExtensions: fname = os.path.abspath(os.path.join(path, fname)) print('Cutting media file', fname) self.mc.cut_media(fname)
def process_corpus(self): """ Take every Exmaralda file from the source directory subtree, turn it into a parsed json and store it in the target directory. Split all the corpus media files into overlapping chunks of small duration. This is the main function of the class. """ Txt2JSON.process_corpus(self) for path, dirs, files in os.walk( os.path.join(self.corpusSettings['corpus_dir'], self.srcExt)): for fname in files: fileExt = os.path.splitext(fname.lower())[1] if fileExt in self.mediaExtensions: fname = os.path.abspath(os.path.join(path, fname)) print('Cutting media file', fname) self.mc.cut_media(fname)
def process_corpus(self, cutMedia=True, filenames = None, isToProcessCorpus = True): """ Take every eaf file from the source directory subtree, turn it into a parsed json and store it in the target directory. """ if isToProcessCorpus: Txt2JSON.process_corpus(self) if not cutMedia: return print(os.path.join(self.corpusSettings['corpus_dir'], self.srcExt)) for path, dirs, files in os.walk(os.path.join(self.corpusSettings['corpus_dir'], self.srcExt)): for fname in files: if filenames and fname not in filenames: continue print('!!!!') fileExt = os.path.splitext(fname.lower())[1] if fileExt in self.mediaExtensions: fname = os.path.abspath(os.path.join(path, fname)) print('Cutting media file', fname) self.mc.cut_media(fname)