def test_detect_types(self): record = { "null": "None", "bool": "false", "int": "1", "float": "1.5", "text": "Iñtërnâtiônàližætiøn", "date": "5/4/82", "time": "2:30", "datetime": "5/4/82 2pm", } records = it.repeat(record) records, result = pr.detect_types(records) nt.assert_equal(17, result["count"]) nt.assert_equal(Decimal("0.95"), result["confidence"]) nt.assert_true(result["accurate"]) expected = { "null": "null", "bool": "bool", "int": "int", "float": "float", "text": "text", "date": "date", "time": "time", "datetime": "datetime", } nt.assert_equal(expected, {r["id"]: r["type"] for r in result["types"]}) nt.assert_equal(record, next(records)) result = pr.detect_types(records, 0.99)[1] nt.assert_equal(100, result["count"]) nt.assert_equal(Decimal("0.97"), result["confidence"]) nt.assert_false(result["accurate"]) result = pr.detect_types([record, record])[1] nt.assert_equal(2, result["count"]) nt.assert_equal(Decimal("0.87"), result["confidence"]) nt.assert_false(result["accurate"])
def test_detect_types(self): record = { 'null': 'None', 'bool': 'false', 'int': '1', 'float': '1.5', 'text': 'Iñtërnâtiônàližætiøn', 'date': '5/4/82', 'time': '2:30', 'datetime': '5/4/82 2pm', } records = it.repeat(record) records, result = pr.detect_types(records) nt.assert_equal(17, result['count']) nt.assert_equal(Decimal('0.95'), result['confidence']) nt.assert_true(result['accurate']) expected = { 'null': 'null', 'bool': 'bool', 'int': 'int', 'float': 'float', 'text': 'text', 'date': 'date', 'time': 'time', 'datetime': 'datetime', } nt.assert_equal(expected, {r['id']: r['type'] for r in result['types']}) nt.assert_equal(record, next(records)) result = pr.detect_types(records, 0.99)[1] nt.assert_equal(100, result['count']) nt.assert_equal(Decimal('0.97'), result['confidence']) nt.assert_false(result['accurate']) result = pr.detect_types([record, record])[1] nt.assert_equal(2, result['count']) nt.assert_equal(Decimal('0.87'), result['confidence']) nt.assert_false(result['accurate'])
def test_csv_columns_seq(data): columns = [ st.text(min_size=1, max_size=100, alphabet=string.ascii_lowercase + string.ascii_uppercase + string.digits), st.integers(), st.floats(min_value=1.2, max_value=100.12) ] csv_string = data.draw(csv(columns=columns, lines=40)) records = csv2records(csv_string, has_header=False) detected_types = detect_types(records)[1] types = list(map(lambda x: x["type"], detected_types["types"])) assert len(records) == 40 assert types == ["text", "int", "float"]
def test_csv_columns_and_header_seq(data): columns = [ st.text(min_size=1, max_size=100, alphabet=string.ascii_lowercase + string.ascii_uppercase + string.digits), st.integers(), st.floats(min_value=1.2, max_value=100.12) ] header = ["x", "y", "z"] csv_string = data.draw(csv(header=header, columns=columns, lines=10)) records = csv2records(csv_string) detected_types = detect_types(records)[1] types = list(map(lambda x: x["type"], detected_types["types"])) assert types == ["text", "int", "float"] extracted_header = list(records[0].keys()) assert extracted_header == header
def test_detect_types_datetimes_midnight(self): records = it.repeat({"foo": "2000-01-01 00:00:00"}) records, result = pr.detect_types(records) nt.assert_equal(result["types"], [{"id": "foo", "type": "datetime"}])