Example #1
0
def test_infer_type(num_distinct_values, distinct_values, img_values, missing_vals, expected):
    field = FieldInfo(
        name="foo",
        dtype="object",
        num_distinct_values=num_distinct_values,
        distinct_values=distinct_values,
        image_values=img_values,
    )
    assert infer_type(field, missing_vals, ROW_COUNT) == expected
Example #2
0
def test_infer_type(distinct_values, avg_words, img_values, expected):
    field = FieldInfo(
        name='foo',
        dtype='object',
        distinct_values=distinct_values,
        avg_words=avg_words,
        image_values=img_values,
    )
    assert infer_type(field) == expected
Example #3
0
def get_dataset_info_from_source(source: DataSource) -> DatasetInfo:
    row_count = len(source)
    fields = []
    for field in source.columns:
        dtype = source.get_dtype(field)
        distinct_values = source.get_distinct_values(field)
        nonnull_values = source.get_nonnull_values(field)
        image_values = source.get_image_values(field)
        avg_words = None
        if source.is_string_type(dtype):
            avg_words = source.get_avg_num_tokens(field)
        fields.append(
            FieldInfo(name=field,
                      dtype=dtype,
                      distinct_values=distinct_values,
                      nonnull_values=nonnull_values,
                      image_values=image_values,
                      avg_words=avg_words))
    return DatasetInfo(fields=fields, row_count=row_count)
Example #4
0
def test_should_exclude(idx, distinct_values, dtype, name, expected):
    field = FieldInfo(name=name, dtype=dtype, distinct_values=distinct_values)
    assert should_exclude(idx, field, dtype, ROW_COUNT,
                          TARGET_NAME) == expected