def test_csv_column_reader_row_key_column_key(self): reader = csv_column_reader(StringIO(u""" one,ten,683793200 five,twenty,683793300 three,forty-one,683793385 ten,a-big-number,683793385 """), formatfunc=csv_row_key_column_key) ls = list(reader) target = [ Column(row_key="one", column_key="ten", timestamp=683793200), Column(row_key="five", column_key="twenty", timestamp=683793300), Column(row_key="three", column_key="forty-one", timestamp=683793385), Column(row_key="ten", column_key="a-big-number", timestamp=683793385) ] self.assertEqual(target, ls)
def test_serialize_row_key_column_key(self): field = get_schema(True, True) ir = _ImportRequest(field, 0, [Column(row_key="one", column_key="two", timestamp=3)]) bin = ir.to_protobuf(False) self.assertIsNotNone(bin) ir = internal.ImportRequest() ir.ParseFromString(bin) self.assertEquals("foo", ir.Index) self.assertEquals("bar", ir.Field) self.assertEquals([], ir.RowIDs) self.assertEquals([], ir.ColumnIDs) self.assertEquals(["one"], ir.RowKeys) self.assertEquals(["two"], ir.ColumnKeys) self.assertEquals([3], ir.Timestamps)
def test_serialize_row_id_column_id(self): field = get_schema(False, False) ir = _ImportRequest(field, 0, [Column(row_id=1, column_id=2, timestamp=3)]) bin = ir.to_protobuf(False) self.assertIsNotNone(bin) ir = internal.ImportRequest() ir.ParseFromString(bin) self.assertEquals("foo", ir.Index) self.assertEquals("bar", ir.Field) self.assertEquals([1], ir.RowIDs) self.assertEquals([2], ir.ColumnIDs) self.assertEquals([], ir.RowKeys) self.assertEquals([], ir.ColumnKeys) self.assertEquals([3], ir.Timestamps)
def test_csvbititerator_customtimefunc(self): class UtcTzinfo(datetime.tzinfo): ZERO = datetime.timedelta(0) def utcoffset(self, dt): return UtcTzinfo.ZERO def dst(self, dt): return UtcTzinfo.ZERO def tzname(self, dt): return "UTC" def timefunc_utcstr(timeval): dt = datetime.datetime.strptime(timeval, '%Y-%m-%dT%H:%M:%S') dt = dt.replace(tzinfo=UtcTzinfo()) return calendar.timegm(dt.timetuple()) reader = csv_column_reader(StringIO(u""" 1,10,1991-09-02T06:33:20 5,20,1991-09-02T06:35:00 3,41,1991-09-02T06:36:25 10,10485760,1991-09-02T06:36:25 """), timefunc=timefunc_utcstr) rows = list(reader) self.assertEqual(len(rows), 4) self.assertEqual(rows[0], Column(row_id=1, column_id=10, timestamp=683793200)) self.assertEqual(rows[1], Column(row_id=5, column_id=20, timestamp=683793300)) self.assertEqual(rows[2], Column(row_id=3, column_id=41, timestamp=683793385)) self.assertEqual( rows[3], Column(row_id=10, column_id=10485760, timestamp=683793385))
def __init__(self, file_obj, field, column_index=0, row_index=1, has_header=True): self.file_obj = file_obj if has_header: # if there's a header skip it next(self.file_obj) ci = column_index ri = row_index # set the bit yielder if field.field_type == "int": if field.index.keys: self.yield_fun = lambda fs: FieldValue(column_key=fs[ci], value=int(fs[ri])) else: self.yield_fun = lambda fs: FieldValue(column_id=int(fs[ci]), value=int(fs[ri])) else: if field.index.keys: if field.keys: self.yield_fun = lambda fs: Column(column_key=fs[ci], row_key=fs[ri]) else: self.yield_fun = lambda fs: Column(column_key=fs[ci], row_id=int(fs[ri])) else: if field.keys: self.yield_fun = lambda fs: Column(column_id=int(fs[ci]), row_key=fs[ri]) else: self.yield_fun = lambda fs: Column(column_id=int(fs[ci]), row_id=int(fs[ri]))
def test_hash(self): c1 = Column(row_id=1, column_id=100, timestamp=123456) c2 = Column(row_id=1, column_id=100, timestamp=123456) self.assertEqual(hash(c1), hash(c2))
def test_import_request_invalid_format(self): field = get_schema(False, False) ir = _ImportRequest(field, 0, [Column(row_key="one", column_key="two", timestamp=3)]) ir.format = None self.assertRaises(PilosaError, ir.to_protobuf, False)