def pronunciation_variants_corpus(textgrid_test_dir): path = os.path.join(textgrid_test_dir, 'pronunc_variants_corpus.TextGrid') annotypes = inspect_discourse_textgrid(path) annotypes[0].attribute.name = 'spelling' annotypes[1].attribute.name = 'transcription' annotypes[2].attribute.name = 'transcription' annotypes[2].token = True return load_discourse_textgrid('test', path, annotypes)
def run(self): time.sleep(0.1) textType = self.kwargs.pop('text_type') isDirectory = self.kwargs.pop('isDirectory') logging.info('Importing {} corpus named {}'.format( textType, self.kwargs['corpus_name'])) logging.info('Path: '.format(self.kwargs['path'])) log_annotation_types(self.kwargs['annotation_types']) try: if textType == 'spelling': if isDirectory: corpus = load_directory_spelling(**self.kwargs) else: corpus = load_discourse_spelling(**self.kwargs) elif textType == 'transcription': if isDirectory: corpus = load_directory_transcription(**self.kwargs) else: corpus = load_discourse_transcription(**self.kwargs) elif textType == 'ilg': if isDirectory: corpus = load_directory_ilg(**self.kwargs) else: corpus = load_discourse_ilg(**self.kwargs) elif textType == 'textgrid': if isDirectory: corpus = load_directory_textgrid(**self.kwargs) else: corpus = load_discourse_textgrid(**self.kwargs) elif textType == 'csv': corpus = load_corpus_csv(**self.kwargs) elif textType in ['buckeye', 'timit']: self.kwargs['dialect'] = textType if isDirectory: corpus = load_directory_multiple_files(**self.kwargs) else: corpus = load_discourse_multiple_files(**self.kwargs) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: time.sleep(0.1) self.finishedCancelling.emit() return self.dataReady.emit(corpus)
def run(self): time.sleep(0.1) textType = self.kwargs.pop('text_type') isDirectory = self.kwargs.pop('isDirectory') logging.info('Importing {} corpus named {}'.format(textType, self.kwargs['corpus_name'])) logging.info('Path: '.format(self.kwargs['path'])) log_annotation_types(self.kwargs['annotation_types']) try: if textType == 'spelling': if isDirectory: corpus = load_directory_spelling(**self.kwargs) else: corpus = load_discourse_spelling(**self.kwargs) elif textType == 'transcription': if isDirectory: corpus = load_directory_transcription(**self.kwargs) else: corpus = load_discourse_transcription(**self.kwargs) elif textType == 'ilg': if isDirectory: corpus = load_directory_ilg(**self.kwargs) else: corpus = load_discourse_ilg(**self.kwargs) elif textType == 'textgrid': if isDirectory: corpus = load_directory_textgrid(**self.kwargs) else: corpus = load_discourse_textgrid(**self.kwargs) elif textType == 'csv': corpus = load_corpus_csv(**self.kwargs) elif textType in ['buckeye', 'timit']: self.kwargs['dialect'] = textType if isDirectory: corpus = load_directory_multiple_files(**self.kwargs) else: corpus = load_discourse_multiple_files(**self.kwargs) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: time.sleep(0.1) self.finishedCancelling.emit() return self.dataReady.emit(corpus)