Beispiel #1
0
    def test_type_guess(self):
        csv_file = StringIO.StringIO('''
            1,   2012/2/12, 2,   02 October 2011,  yes,   1
            2,   2012/2/12, 2,   02 October 2011,  true,  1
            2.4, 2012/2/12, 1,   1 May 2011,       no,    0
            foo, bar,       1000, ,                false, 0
            4.3, ,          42,  24 October 2012,,
             ,   2012/2/12, 21,  24 December 2013, true,  1''')
        rows = CSVTableSet(csv_file).tables[0]
        guessed_types = type_guess(rows.sample)

        assert_equal(guessed_types, [
            DecimalType(), DateType('%Y/%m/%d'), IntegerType(),
            DateType('%d %B %Y'), BoolType(), BoolType()])
Beispiel #2
0
 def test_read_type_know_simple(self):
     fh = horror_fobj('simple.xls')
     table_set = XLSTableSet(fh)
     row_set = table_set.tables[0]
     row = list(row_set.sample)[1]
     types = [c.type for c in row]
     assert_equal(types, [DateType(None), FloatType(), StringType()])
Beispiel #3
0
    def test_read_type_guess_simple(self):
        fh = horror_fobj('simple.csv')
        table_set = CSVTableSet(fh)
        row_set = table_set.tables[0]
        types = type_guess(row_set.sample)
        expected_types = [DateType("%Y-%m-%d"), IntegerType(), StringType()]
        assert_equal(types, expected_types)

        row_set.register_processor(types_processor(types))
        data = list(row_set)
        header_types = [c.type for c in data[0]]
        assert_equal(header_types, [StringType()] * 3)
        row_types = [c.type for c in data[2]]
        assert_equal(expected_types, row_types)
Beispiel #4
0
 def test_type_guess_strict(self):
     import locale
     locale.setlocale(locale.LC_ALL, 'en_GB.UTF-8')
     csv_file = StringIO.StringIO('''
         1,   2012/2/12, 2,      2,02 October 2011,"100.234354"
         2,   2012/2/12, 1.1,    0,1 May 2011,"100,000,000.12"
         foo, bar,       1500,   0,,"NaN"
         4,   2012/2/12, 42,"-2,000",24 October 2012,"42"
         ,,,,,''')
     rows = CSVTableSet(csv_file).tables[0]
     guessed_types = type_guess(rows.sample, strict=True)
     assert_equal(guessed_types, [
         StringType(), StringType(),
         DecimalType(), IntegerType(), DateType('%d %B %Y'),
         DecimalType()])
# gussing
row_set = CSVTableSet(f, delimiter=delimiter).tables[0]
offset, headers = headers_guess(row_set.sample)
row_set.register_processor(headers_processor(headers))
row_set.register_processor(offset_processor(offset + 1))
types = type_guess(row_set.sample)
#types = type_guess(row_set.sample, strict=True)
print('guessed types:', types)

# constructing ddl
cols = []
for indx, typ in enumerate(types):
    if typ == StringType():
        cols.append("  `a%s` string" % (indx))
    elif typ == DateType(date_format):
        cols.append("  `a%s` date" % (indx))
    elif typ == DecimalType():
        cols.append("  `a%s` double" % (indx))
    elif typ == IntegerType():
        cols.append("  `a%s` int" % (indx))
    elif typ == BoolType():
        cols.append("  `a%s` boolean" % (indx))
    else:
        raise Exception("A type of column %indx cannot be handled. %s " %
                        (indx, typ))
cols_str = ",\n".join(cols)

ddl = '''----------------------------------------------------------
CREATE EXTERNAL TABLE IF NOT EXISTS default.%s (
%s