Example #1
0
def test_predicate_on_multiple_fields(synthetic_dataset, reader_factory):
    expected_values = {'id': 11, 'id2': 1}
    with reader_factory(synthetic_dataset.url, shuffle_row_groups=False,
                        predicate=EqualPredicate(expected_values)) as reader:
        actual = next(reader)
        assert actual.id == expected_values['id']
        assert actual.id2 == expected_values['id2']
Example #2
0
def test_two_column_predicate(synthetic_dataset, reader_factory):
    """Test quering a single column with a predicate on the same column """
    with reader_factory(synthetic_dataset.url, schema_fields=[TestSchema.id2, TestSchema.partition_key],
                        predicate=EqualPredicate({'id2': 1, 'partition_key': 'p_2'})) as reader:
        all_rows = list(reader)
        all_id2 = np.array(list(map(operator.attrgetter('id2'), all_rows)))
        all_partition_key = np.array(list(map(operator.attrgetter('partition_key'), all_rows)))
        assert (all_id2 == 1).all()
        assert (all_partition_key == 'p_2').all()
Example #3
0
def test_single_column_predicate(synthetic_dataset):
    """Test quering a single column with a predicate on the same column """
    with Reader(synthetic_dataset.url, schema_fields=[TestSchema.id], predicate=EqualPredicate({'id': 1}),
                reader_pool=ThreadPool(1)) as reader:
        # Read a bunch of entries from the dataset and compare the data to reference
        for row in reader:
            actual = dict(row._asdict())
            expected = next(d for d in synthetic_dataset.data if d['id'] == actual['id'])
            np.testing.assert_equal(expected['id'], actual['id'])
Example #4
0
def test_predicate_with_invalid_fields(synthetic_dataset, reader_factory):
    """Try passing an invalid field name from a predicate to the reader. An error should be raised."""
    TEST_CASES = [
        {'invalid_field_name': 1},
        dict(),
        {'invalid_field_name': 1, 'id': 11},
        {'invalid_field_name': 1, 'invalid_field_name_2': 11}]

    for predicate_spec in TEST_CASES:
        with reader_factory(synthetic_dataset.url, shuffle_row_groups=False,
                            predicate=EqualPredicate(predicate_spec)) as reader:
            with pytest.raises(ValueError):
                next(reader)
Example #5
0
def test_invalid_schema_field(synthetic_dataset):
    # Let's assume we are selecting columns using a schema which is different from the one
    # stored in the dataset. Would expect to get a reasonable error message
    BogusSchema = Unischema('BogusSchema', [
        UnischemaField('partition_key', np.string_, (), ScalarCodec(StringType()), False),
        UnischemaField('id', np.int64, (), ScalarCodec(LongType()), False),
        UnischemaField('bogus_key', np.int32, (), ScalarCodec(ShortType()), False)])

    expected_values = {'bogus_key': 11, 'id': 1}
    with pytest.raises(ValueError) as e:
        Reader(synthetic_dataset.url, schema_fields=BogusSchema.fields.values(), shuffle_options=ShuffleOptions(False),
               predicate=EqualPredicate(expected_values), reader_pool=ThreadPool(1))

    assert 'bogus_key' in str(e)
Example #6
0
def test_predicate_with_invalid_fields(synthetic_dataset):
    """Try passing an invalid field name from a predicate to the reader. An error should be raised."""
    TEST_CASES = [
        {'invalid_field_name': 1},
        dict(),
        {'invalid_field_name': 1, 'id': 11},
        {'invalid_field_name': 1, 'invalid_field_name_2': 11}]

    for predicate_spec in TEST_CASES:
        with Reader(synthetic_dataset.url, shuffle_options=ShuffleOptions(False),
                    predicate=EqualPredicate(predicate_spec),
                    reader_pool=ThreadPool(1)) as reader:
            with pytest.raises(ValueError):
                next(reader)
Example #7
0
def test_invalid_schema_field(synthetic_dataset, reader_factory):
    # Let's assume we are selecting columns using a schema which is different from the one
    # stored in the dataset. Would expect to get a reasonable error message
    BogusSchema = Unischema('BogusSchema', [
        UnischemaField('partition_key', np.string_,
                       (), ScalarCodec(StringType()), False),
        UnischemaField('id', np.int64, (), ScalarCodec(LongType()), False),
        UnischemaField('bogus_key', np.int32,
                       (), ScalarCodec(ShortType()), False)
    ])

    expected_values = {'bogus_key': 11, 'id': 1}
    with pytest.raises(ValueError, match='bogus_key'):
        reader_factory(synthetic_dataset.url,
                       schema_fields=BogusSchema.fields.values(),
                       shuffle_row_groups=False,
                       predicate=EqualPredicate(expected_values))
Example #8
0
def test_single_column_predicate(synthetic_dataset, reader_factory):
    """Test quering a single column with a predicate on the same column """
    with reader_factory(synthetic_dataset.url, schema_fields=[TestSchema.id], predicate=EqualPredicate({'id': 1})) \
            as reader:
        all_rows = list(reader)
        assert 1 == len(all_rows)
        assert 1 == all_rows[0].id