def test_corpus_csv(graph_db, csv_test_dir): example_path = os.path.join(csv_test_dir, 'example.txt') with CorpusContext('basic_csv', **graph_db) as c: c.reset() parser = inspect_csv(example_path) parser.column_delimiter = '\t' with pytest.raises(DelimiterError): c.load(parser, example_path) with CorpusContext('basic_csv', **graph_db) as c: parser = inspect_csv(example_path) parser.annotation_types[1].name = 'word' c.load(parser, example_path) assert (c.lexicon['mata'].frequency == 2) assert (c.lexicon['mata'].transcription == 'm.ɑ.t.ɑ')
def test_corpus_csv(graph_db, csv_test_dir): example_path = os.path.join(csv_test_dir, 'example.txt') with CorpusContext('basic_csv', **graph_db) as c: c.reset() parser = inspect_csv(example_path) parser.column_delimiter = '\t' with pytest.raises(DelimiterError): c.load(parser, example_path) with CorpusContext('basic_csv', **graph_db) as c: parser = inspect_csv(example_path) parser.annotation_types[1].name = 'word' c.load(parser, example_path) assert(c.lexicon['mata'].frequency == 2) assert(c.lexicon['mata'].transcription == 'm.ɑ.t.ɑ')
def test_stressed(graph_db, csv_test_dir): stressed_path = os.path.join(csv_test_dir, 'stressed.txt') with CorpusContext('stressed_csv', **graph_db) as c: c.reset() parser = inspect_csv(stressed_path) parser.annotation_types[0].name = 'word' parser.annotation_types[1].number_behavior = 'stress' c.load(parser, stressed_path)
def test_corpus_csv_tiered(graph_db, csv_test_dir): example_path = os.path.join(csv_test_dir, 'tiered.txt') with CorpusContext('tiered_csv', **graph_db) as c: c.reset() parser = inspect_csv(example_path) parser.annotation_types[0].name = 'word' c.load(parser, example_path) assert(c.lexicon['tusi'].frequency == 13) assert(c.lexicon['tusi'].transcription == 't.u.s.i')
def test_inspect_example(csv_test_dir): example_path = os.path.join(csv_test_dir, 'example.txt') parser = inspect_csv(example_path) assert(parser.column_delimiter == ',') for a in parser.annotation_types: if a.name == 'frequency': assert(isinstance(a, NumericAnnotationType)) elif a.name == 'transcription': assert(isinstance(a, TranscriptionAnnotationType)) assert(a.trans_delimiter == '.') elif a.name == 'spelling': assert(isinstance(a, OrthographyAnnotationType))
def test_inspect_example(csv_test_dir): example_path = os.path.join(csv_test_dir, 'example.txt') parser = inspect_csv(example_path) assert (parser.column_delimiter == ',') for a in parser.annotation_types: if a.name == 'frequency': assert (isinstance(a, NumericAnnotationType)) elif a.name == 'transcription': assert (isinstance(a, TranscriptionAnnotationType)) assert (a.trans_delimiter == '.') elif a.name == 'spelling': assert (isinstance(a, OrthographyAnnotationType))
def test_corpus_csv_tiered(graph_db, csv_test_dir): example_path = os.path.join(csv_test_dir, 'tiered.txt') with CorpusContext('tiered_csv', **graph_db) as c: c.reset() parser = inspect_csv(example_path) parser.annotation_types[0].name = 'word' c.load(parser, example_path) assert (c.lexicon['tusi'].frequency == 13) assert (c.lexicon['tusi'].transcription == 't.u.s.i')