def test_ilg_mismatched(ilg_test_dir): mismatched_path = os.path.join(ilg_test_dir, 'test_mismatched.txt') ats = [AnnotationType('spelling', 'transcription', None, token = False, anchor = True), AnnotationType('transcription', None, None, token = False, base = True, attribute = Attribute('transcription','tier'))] ats[1].trans_delimiter = '.' with pytest.raises(ILGWordMismatchError): t = load_discourse_ilg('test', mismatched_path, ats)
def test_export_ilg(export_test_dir, unspecified_test_corpus): d = generate_discourse(unspecified_test_corpus) export_path = os.path.join(export_test_dir, 'test_export_ilg.txt') export_discourse_ilg(d, export_path) d2 = load_discourse_ilg('test', export_path) for k in unspecified_test_corpus.keys(): assert(d2.lexicon[k].spelling == unspecified_test_corpus[k].spelling) assert(d2.lexicon[k].transcription == unspecified_test_corpus[k].transcription) assert(d2.lexicon[k].frequency == unspecified_test_corpus[k].frequency) assert(d2.lexicon == unspecified_test_corpus)
def run(self): time.sleep(0.1) textType = self.kwargs.pop('text_type') isDirectory = self.kwargs.pop('isDirectory') logging.info('Importing {} corpus named {}'.format( textType, self.kwargs['corpus_name'])) logging.info('Path: '.format(self.kwargs['path'])) log_annotation_types(self.kwargs['annotation_types']) try: if textType == 'spelling': if isDirectory: corpus = load_directory_spelling(**self.kwargs) else: corpus = load_discourse_spelling(**self.kwargs) elif textType == 'transcription': if isDirectory: corpus = load_directory_transcription(**self.kwargs) else: corpus = load_discourse_transcription(**self.kwargs) elif textType == 'ilg': if isDirectory: corpus = load_directory_ilg(**self.kwargs) else: corpus = load_discourse_ilg(**self.kwargs) elif textType == 'textgrid': if isDirectory: corpus = load_directory_textgrid(**self.kwargs) else: corpus = load_discourse_textgrid(**self.kwargs) elif textType == 'csv': corpus = load_corpus_csv(**self.kwargs) elif textType in ['buckeye', 'timit']: self.kwargs['dialect'] = textType if isDirectory: corpus = load_directory_multiple_files(**self.kwargs) else: corpus = load_discourse_multiple_files(**self.kwargs) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: time.sleep(0.1) self.finishedCancelling.emit() return self.dataReady.emit(corpus)
def run(self): time.sleep(0.1) textType = self.kwargs.pop('text_type') isDirectory = self.kwargs.pop('isDirectory') logging.info('Importing {} corpus named {}'.format(textType, self.kwargs['corpus_name'])) logging.info('Path: '.format(self.kwargs['path'])) log_annotation_types(self.kwargs['annotation_types']) try: if textType == 'spelling': if isDirectory: corpus = load_directory_spelling(**self.kwargs) else: corpus = load_discourse_spelling(**self.kwargs) elif textType == 'transcription': if isDirectory: corpus = load_directory_transcription(**self.kwargs) else: corpus = load_discourse_transcription(**self.kwargs) elif textType == 'ilg': if isDirectory: corpus = load_directory_ilg(**self.kwargs) else: corpus = load_discourse_ilg(**self.kwargs) elif textType == 'textgrid': if isDirectory: corpus = load_directory_textgrid(**self.kwargs) else: corpus = load_discourse_textgrid(**self.kwargs) elif textType == 'csv': corpus = load_corpus_csv(**self.kwargs) elif textType in ['buckeye', 'timit']: self.kwargs['dialect'] = textType if isDirectory: corpus = load_directory_multiple_files(**self.kwargs) else: corpus = load_discourse_multiple_files(**self.kwargs) except PCTError as e: self.errorEncountered.emit(e) return except Exception as e: e = PCTPythonError(e) self.errorEncountered.emit(e) return if self.stopped: time.sleep(0.1) self.finishedCancelling.emit() return self.dataReady.emit(corpus)
def test_export_ilg(export_test_dir, unspecified_test_corpus): d = generate_discourse(unspecified_test_corpus) export_path = os.path.join(export_test_dir, 'test_export_ilg.txt') export_discourse_ilg(d, export_path) d2 = load_discourse_ilg('test', export_path) for k in unspecified_test_corpus.keys(): assert (d2.lexicon[k].spelling == unspecified_test_corpus[k].spelling) assert (d2.lexicon[k].transcription == unspecified_test_corpus[k].transcription) assert ( d2.lexicon[k].frequency == unspecified_test_corpus[k].frequency) assert (d2.lexicon == unspecified_test_corpus)
def test_ilg_basic(ilg_test_dir): basic_path = os.path.join(ilg_test_dir, 'test_basic.txt') tier_att = Attribute('transcription','tier') tier_att.delimiter = '.' ats = [AnnotationType('spelling', 'transcription', None, token = False, anchor = True), AnnotationType('transcription', None, None, token = False, base = True, attribute = tier_att)] ats[1].trans_delimiter = '.' corpus = load_discourse_ilg('test', basic_path, ats) print(corpus.words) print(corpus.lexicon.words) assert(corpus.lexicon.find('a').frequency == 2)
def test_ilg_mismatched(ilg_test_dir): mismatched_path = os.path.join(ilg_test_dir, 'test_mismatched.txt') ats = [ AnnotationType('spelling', 'transcription', None, token=False, anchor=True), AnnotationType('transcription', None, None, token=False, base=True, attribute=Attribute('transcription', 'tier')) ] ats[1].trans_delimiter = '.' with pytest.raises(ILGWordMismatchError): t = load_discourse_ilg('test', mismatched_path, ats)
def test_ilg_basic(ilg_test_dir): basic_path = os.path.join(ilg_test_dir, 'test_basic.txt') tier_att = Attribute('transcription', 'tier') tier_att.delimiter = '.' ats = [ AnnotationType('spelling', 'transcription', None, token=False, anchor=True), AnnotationType('transcription', None, None, token=False, base=True, attribute=tier_att) ] ats[1].trans_delimiter = '.' corpus = load_discourse_ilg('test', basic_path, ats) print(corpus.words) print(corpus.lexicon.words) assert (corpus.lexicon.find('a').frequency == 2)