def test_ilg_basic(ilg_test_dir): basic_path = os.path.join(ilg_test_dir, 'test_basic.txt') tier_att = Attribute('transcription','tier') tier_att.delimiter = '.' ats = [AnnotationType('spelling', 'transcription', None, token = False, anchor = True), AnnotationType('transcription', None, None, token = False, base = True, attribute = tier_att)] ats[1].trans_delimiter = '.' corpus = load_discourse_ilg('test', basic_path, ats) print(corpus.words) print(corpus.lexicon.words) assert(corpus.lexicon.find('a').frequency == 2)
def test_ilg_data(ilg_test_dir): basic_path = os.path.join(ilg_test_dir, 'test_basic.txt') tier_att = Attribute('transcription', 'tier') tier_att.delimiter = '.' ats = [ AnnotationType('spelling', 'transcription', None, token=False, anchor=True), AnnotationType('transcription', None, None, token=False, base=True, attribute=tier_att) ] ats[1].trans_delimiter = '.' data = ilg_to_data(basic_path, ats) expected_words = [] a = Annotation('a') a.references.append('transcription') a.begins.append(0) a.ends.append(2) expected_words.append(a) a = Annotation('a') a.references.append('transcription') a.begins.append(2) a.ends.append(4) expected_words.append(a) a = Annotation('b') a.references.append('transcription') a.begins.append(4) a.ends.append(6) expected_words.append(a) assert (data['spelling']._list == expected_words) assert (data['transcription']._list == [ BaseAnnotation('a'), BaseAnnotation('b'), BaseAnnotation('a'), BaseAnnotation('b'), BaseAnnotation('c'), BaseAnnotation('d') ])
def test_ilg_basic(ilg_test_dir): basic_path = os.path.join(ilg_test_dir, 'test_basic.txt') tier_att = Attribute('transcription', 'tier') tier_att.delimiter = '.' ats = [ AnnotationType('spelling', 'transcription', None, token=False, anchor=True), AnnotationType('transcription', None, None, token=False, base=True, attribute=tier_att) ] ats[1].trans_delimiter = '.' corpus = load_discourse_ilg('test', basic_path, ats) print(corpus.words) print(corpus.lexicon.words) assert (corpus.lexicon.find('a').frequency == 2)
def test_ilg_data(ilg_test_dir): basic_path = os.path.join(ilg_test_dir, 'test_basic.txt') tier_att = Attribute('transcription','tier') tier_att.delimiter = '.' ats = [AnnotationType('spelling', 'transcription', None, token = False, anchor = True), AnnotationType('transcription', None, None, token = False, base = True, attribute = tier_att)] ats[1].trans_delimiter = '.' data = ilg_to_data(basic_path, ats) expected_words = [] a = Annotation('a') a.references.append('transcription') a.begins.append(0) a.ends.append(2) expected_words.append(a) a = Annotation('a') a.references.append('transcription') a.begins.append(2) a.ends.append(4) expected_words.append(a) a = Annotation('b') a.references.append('transcription') a.begins.append(4) a.ends.append(6) expected_words.append(a) assert(data['spelling']._list == expected_words) assert(data['transcription']._list == [BaseAnnotation('a'), BaseAnnotation('b'), BaseAnnotation('a'), BaseAnnotation('b'), BaseAnnotation('c'), BaseAnnotation('d')])