Example #1
0
    def test_detect_types(self):
        record = {
            "null": "None",
            "bool": "false",
            "int": "1",
            "float": "1.5",
            "text": "Iñtërnâtiônàližætiøn",
            "date": "5/4/82",
            "time": "2:30",
            "datetime": "5/4/82 2pm",
        }

        records = it.repeat(record)
        records, result = pr.detect_types(records)
        nt.assert_equal(17, result["count"])
        nt.assert_equal(Decimal("0.95"), result["confidence"])
        nt.assert_true(result["accurate"])

        expected = {
            "null": "null",
            "bool": "bool",
            "int": "int",
            "float": "float",
            "text": "text",
            "date": "date",
            "time": "time",
            "datetime": "datetime",
        }

        nt.assert_equal(expected,
                        {r["id"]: r["type"]
                         for r in result["types"]})
        nt.assert_equal(record, next(records))

        result = pr.detect_types(records, 0.99)[1]
        nt.assert_equal(100, result["count"])
        nt.assert_equal(Decimal("0.97"), result["confidence"])
        nt.assert_false(result["accurate"])

        result = pr.detect_types([record, record])[1]
        nt.assert_equal(2, result["count"])
        nt.assert_equal(Decimal("0.87"), result["confidence"])
        nt.assert_false(result["accurate"])
Example #2
0
    def test_detect_types(self):
        record = {
            'null': 'None',
            'bool': 'false',
            'int': '1',
            'float': '1.5',
            'text': 'Iñtërnâtiônàližætiøn',
            'date': '5/4/82',
            'time': '2:30',
            'datetime': '5/4/82 2pm',
        }

        records = it.repeat(record)
        records, result = pr.detect_types(records)
        nt.assert_equal(17, result['count'])
        nt.assert_equal(Decimal('0.95'), result['confidence'])
        nt.assert_true(result['accurate'])

        expected = {
            'null': 'null',
            'bool': 'bool',
            'int': 'int',
            'float': 'float',
            'text': 'text',
            'date': 'date',
            'time': 'time',
            'datetime': 'datetime',
        }

        nt.assert_equal(expected,
                        {r['id']: r['type']
                         for r in result['types']})
        nt.assert_equal(record, next(records))

        result = pr.detect_types(records, 0.99)[1]
        nt.assert_equal(100, result['count'])
        nt.assert_equal(Decimal('0.97'), result['confidence'])
        nt.assert_false(result['accurate'])

        result = pr.detect_types([record, record])[1]
        nt.assert_equal(2, result['count'])
        nt.assert_equal(Decimal('0.87'), result['confidence'])
        nt.assert_false(result['accurate'])
Example #3
0
    def test_detect_types(self):
        record = {
            'null': 'None',
            'bool': 'false',
            'int': '1',
            'float': '1.5',
            'text': 'Iñtërnâtiônàližætiøn',
            'date': '5/4/82',
            'time': '2:30',
            'datetime': '5/4/82 2pm',
        }

        records = it.repeat(record)
        records, result = pr.detect_types(records)
        nt.assert_equal(17, result['count'])
        nt.assert_equal(Decimal('0.95'), result['confidence'])
        nt.assert_true(result['accurate'])

        expected = {
            'null': 'null',
            'bool': 'bool',
            'int': 'int',
            'float': 'float',
            'text': 'text',
            'date': 'date',
            'time': 'time',
            'datetime': 'datetime',
        }

        nt.assert_equal(expected, {r['id']: r['type'] for r in result['types']})
        nt.assert_equal(record, next(records))

        result = pr.detect_types(records, 0.99)[1]
        nt.assert_equal(100, result['count'])
        nt.assert_equal(Decimal('0.97'), result['confidence'])
        nt.assert_false(result['accurate'])

        result = pr.detect_types([record, record])[1]
        nt.assert_equal(2, result['count'])
        nt.assert_equal(Decimal('0.87'), result['confidence'])
        nt.assert_false(result['accurate'])
Example #4
0
def test_csv_columns_seq(data):
    columns = [
        st.text(min_size=1,
                max_size=100,
                alphabet=string.ascii_lowercase + string.ascii_uppercase +
                string.digits),
        st.integers(),
        st.floats(min_value=1.2, max_value=100.12)
    ]

    csv_string = data.draw(csv(columns=columns, lines=40))
    records = csv2records(csv_string, has_header=False)
    detected_types = detect_types(records)[1]
    types = list(map(lambda x: x["type"], detected_types["types"]))
    assert len(records) == 40
    assert types == ["text", "int", "float"]
Example #5
0
def test_csv_columns_and_header_seq(data):
    columns = [
        st.text(min_size=1,
                max_size=100,
                alphabet=string.ascii_lowercase + string.ascii_uppercase +
                string.digits),
        st.integers(),
        st.floats(min_value=1.2, max_value=100.12)
    ]
    header = ["x", "y", "z"]
    csv_string = data.draw(csv(header=header, columns=columns, lines=10))
    records = csv2records(csv_string)
    detected_types = detect_types(records)[1]
    types = list(map(lambda x: x["type"], detected_types["types"]))

    assert types == ["text", "int", "float"]

    extracted_header = list(records[0].keys())
    assert extracted_header == header
Example #6
0
 def test_detect_types_datetimes_midnight(self):
     records = it.repeat({"foo": "2000-01-01 00:00:00"})
     records, result = pr.detect_types(records)
     nt.assert_equal(result["types"], [{"id": "foo", "type": "datetime"}])