def test_fetches_line_content_for_key(sample_data): expected = [[ 'A011', '', 'A011', 'Paratyphoid fever A', 'Paratyphoid fever A', 'Paratyphoid fever A' ]] reader = weetabix.Reader(sample_data) rows = list(reader.fetch('A011')) assert rows == expected, 'returned expected row content'
def test_gets_correct_number_of_lines_for_each_key(sample_data): expected = (('A010', 2), ('A011', 1), ('A012', 1), ('A013', 1), ('A014', 1), ('A02', 2), ('W5622', 1)) reader = weetabix.Reader(sample_data) for k, count in expected: rows = reader.fetch(k) assert len( list(rows) ) == count, 'found expected number of rows for key {}'.format(k)
def test_writer_indexes_tsv(sample_data_tsv): # Index the file on exact contents of column 1 writer = weetabix.Writer(sample_data_tsv, skip_lines=1, delimiter='\t') writer.make_index(1) keys = list(writer.get_entries()) assert keys == ['2', '1'], 'Indexed correct keys for tab delimited file' reader = weetabix.Reader(sample_data_tsv) first_row = next(reader.fetch('2')) assert len(first_row) == 5, 'Reader was able to parse tab delimited file'
def test_nonstrict_mode_returns_empty_list_for_absent_key(sample_data): reader = weetabix.Reader(sample_data) result = reader.fetch('not_a_key', strict=False) assert result == []
def test_strict_mode_fails_if_key_not_in_index(sample_data): with pytest.raises(KeyError): reader = weetabix.Reader(sample_data) reader.fetch('not_a_key', strict=True)
def test_uses_secondary_index(sample_data, secondary_index): reader = weetabix.Reader(sample_data, index_fn=secondary_index) keys = list(reader.get_entries()) assert len(keys) == 2 assert keys == ['A', 'W']