def test_binarydata(self): binary = b'\x00\x01\x02\x00\xff\x00\xff' uuid = '12345678123456781234567812345678' cons = TrailDBConstructor('testtrail', ['field1']) cons.add(uuid, 123, [binary]) tdb = cons.finalize(decode=False) self.assertEqual(list(tdb[0])[0].field1, binary)
def test_items(self): uuid = '12345678123456781234567812345678' cons = TrailDBConstructor('testtrail', ['field1', 'field2']) cons.add(uuid, 123, ['a', 'x' * 2048]) cons.add(uuid, 124, ['b', 'y' * 2048]) tdb = cons.finalize() cursor = tdb.trail(0, rawitems=True) event = cursor.next() self.assertEqual(tdb.get_item_value(event.field1), 'a') self.assertEqual(tdb.get_item_value(event.field2), 'x' * 2048) self.assertEqual(tdb.get_item('field1', 'a'), event.field1) self.assertEqual(tdb.get_item('field2', 'x' * 2048), event.field2) event = cursor.next() self.assertEqual(tdb.get_item_value(event.field1), 'b') self.assertEqual(tdb.get_item_value(event.field2), 'y' * 2048) self.assertEqual(tdb.get_item('field1', 'b'), event.field1) self.assertEqual(tdb.get_item('field2', 'y' * 2048), event.field2) cursor = tdb.trail(0, rawitems=True) event = cursor.next() field = tdb_item_field(event.field1) val = tdb_item_val(event.field1) self.assertEqual(tdb.get_value(field, val), 'a') field = tdb_item_field(event.field2) val = tdb_item_val(event.field2) self.assertEqual(tdb.get_value(field, val), 'x' * 2048) event = cursor.next() field = tdb_item_field(event.field1) val = tdb_item_val(event.field1) self.assertEqual(tdb.get_value(field, val), 'b') field = tdb_item_field(event.field2) val = tdb_item_val(event.field2) self.assertEqual(tdb.get_value(field, val), 'y' * 2048)
def test_binarydata(self): binary = '\x00\x01\x02\x00\xff\x00\xff' uuid = '12345678123456781234567812345678' cons = TrailDBConstructor('testtrail', ['field1']) cons.add(uuid, 123, [binary]) tdb = cons.finalize() self.assertEqual(list(tdb[0])[0].field1, binary)
def test_items(self): uuid = '12345678123456781234567812345678' cons = TrailDBConstructor('testtrail', ['field1', 'field2']) cons.add(uuid, 123, ['a', 'x' * 2048]) cons.add(uuid, 124, ['b', 'y' * 2048]) tdb = cons.finalize() cursor = tdb.trail(0, rawitems=True) event = next(cursor) self.assertEqual(tdb.get_item_value(event.field1), 'a') self.assertEqual(tdb.get_item_value(event.field2), 'x' * 2048) self.assertEqual(tdb.get_item('field1', 'a'), event.field1) self.assertEqual(tdb.get_item('field2', 'x' * 2048), event.field2) event = next(cursor) self.assertEqual(tdb.get_item_value(event.field1), 'b') self.assertEqual(tdb.get_item_value(event.field2), 'y' * 2048) self.assertEqual(tdb.get_item('field1', 'b'), event.field1) self.assertEqual(tdb.get_item('field2', 'y' * 2048), event.field2) cursor = tdb.trail(0, rawitems=True) event = next(cursor) field = tdb_item_field(event.field1) val = tdb_item_val(event.field1) self.assertEqual(tdb.get_value(field, val), 'a') field = tdb_item_field(event.field2) val = tdb_item_val(event.field2) self.assertEqual(tdb.get_value(field, val), 'x' * 2048) event = next(cursor) field = tdb_item_field(event.field1) val = tdb_item_val(event.field1) self.assertEqual(tdb.get_value(field, val), 'b') field = tdb_item_field(event.field2) val = tdb_item_val(event.field2) self.assertEqual(tdb.get_value(field, val), 'y' * 2048)
def test_cursor(self): uuid = '12345678123456781234567812345678' cons = TrailDBConstructor('testtrail', ['field1', 'field2']) cons.add(uuid, 1, ['a', '1']) cons.add(uuid, 2, ['b', '2']) cons.add(uuid, 3, ['c', '3']) cons.add(uuid, 4, ['d', '4']) cons.add(uuid, 5, ['e', '5']) tdb = cons.finalize() with self.assertRaises(IndexError): tdb.get_trail_id('12345678123456781234567812345679') trail = tdb.trail(tdb.get_trail_id(uuid)) with self.assertRaises(TypeError): len(trail) j = 1 for event in trail: self.assertEqual(j, int(event.field2)) self.assertEqual(j, int(event.time)) j += 1 self.assertEqual(6, j) # Iterator is empty now self.assertEqual([], list(trail)) field1_values = [e.field1 for e in tdb.trail(tdb.get_trail_id(uuid))] self.assertEqual(['a', 'b', 'c', 'd', 'e'], field1_values)
def setUp(self): uuid = '12345678123456781234567812345678' cons = TrailDBConstructor('testtrail', ['field1', 'field2', 'field3']) cons.add(uuid, 1, ['a', '1', 'x']) cons.add(uuid, 2, ['b', '2', 'x']) cons.add(uuid, 3, ['c', '3', 'y']) cons.add(uuid, 4, ['d', '4', 'x']) cons.add(uuid, 5, ['e', '5', 'x']) tdb = cons.finalize()
def setUp(self): self.uuid = '12345678123456781234567812345678' cons = TrailDBConstructor('testtrail', ['field1', 'field2']) cons.add(self.uuid, 1, ['a', '1']) cons.add(self.uuid, 2, ['b', '2']) cons.add(self.uuid, 3, ['c', '3']) cons.finalize()
def test_apply_blacklist(self): uuids = [ "02345678123456781234567812345678", "12345678123456781234567812345678", "22345678123456781234567812345678", "32345678123456781234567812345678", "42345678123456781234567812345678" ] cons = TrailDBConstructor('blacklist_testtrail', ['field1', 'field2']) for uuid in uuids: cons.add(uuid, 1, ['a', '1']) cons.add(uuid, 2, ['b', '2']) cons.add(uuid, 3, ['c', '3']) cons.finalize() tdb = TrailDB('blacklist_testtrail') blacklist = [uuids[1], uuids[2]] tdb.apply_blacklist(blacklist) found_trails = list(tdb.trails(parsetime=False)) for trail_uuid, trail_events in found_trails: if trail_uuid in blacklist: expected_length = 0 else: expected_length = 3 trail_events = list(trail_events) self.assertEqual(len(trail_events), expected_length)
def test_cursor_parsetime(self): uuid = '12345678123456781234567812345678' cons = TrailDBConstructor('testtrail', ['field1']) events = [(datetime.datetime(2016, 1, 1, 1, 1), ['1']), (datetime.datetime(2016, 1, 1, 1, 2), ['2']), (datetime.datetime(2016, 1, 1, 1, 3), ['3'])] [cons.add(uuid, time, fields) for time, fields in events] tdb = cons.finalize() timestamps = [e.time for e in tdb.trail(0, parsetime=True)] self.assertIsInstance(timestamps[0], datetime.datetime) self.assertEqual([time for time, _ in events], timestamps) self.assertEqual(tdb.time_range(True), (events[0][0], events[-1][0]))
def test_append(self): uuid = '12345678123456781234567812345678' cons = TrailDBConstructor('testtrail', ['field1']) cons.add(uuid, 123, ['foobarbaz']) tdb = cons.finalize() cons = TrailDBConstructor('testtrail2', ['field1']) cons.add(uuid, 124, ['barquuxmoo']) cons.append(tdb) tdb = cons.finalize() self.assertEqual(2, tdb.num_events) uuid, trail = list(tdb.trails())[0] trail = list(trail) self.assertEqual([123, 124], [e.time for e in trail]) self.assertEqual(['foobarbaz', 'barquuxmoo'], [e.field1 for e in trail])
def test_cursor_parsetime(self): uuid = '12345678123456781234567812345678' cons = TrailDBConstructor('testtrail', ['field1']) events = [(datetime.datetime(2016, 1, 1, 1, 1), ['1']), (datetime.datetime(2016, 1, 1, 1, 2), ['2']), (datetime.datetime(2016, 1, 1, 1, 3), ['3'])] [cons.add(uuid, time, fields) for time, fields in events] tdb = cons.finalize() timestamps = [e.time for e in tdb.trail(0, parsetime = True)] self.assertIsInstance(timestamps[0], datetime.datetime) self.assertEqual([time for time, _ in events], timestamps) self.assertEquals(tdb.time_range(True), (events[0][0], events[-1][0]))
def test_cons(self): uuid = '12345678123456781234567812345678' cons = TrailDBConstructor('testtrail', ['field1', 'field2']) cons.add(uuid, 123, ['a']) cons.add(uuid, 124, ['b', 'c']) tdb = cons.finalize() self.assertEqual(0, tdb.get_trail_id(uuid)) self.assertEqual(uuid, tdb.get_uuid(0)) self.assertEqual(1, tdb.num_trails) self.assertEqual(2, tdb.num_events) self.assertEqual(3, tdb.num_fields) crumbs = list(tdb.trails()) self.assertEqual(1, len(crumbs)) self.assertEqual(uuid, crumbs[0][0]) self.assertTrue(tdb[uuid]) self.assertTrue(uuid in tdb) self.assertFalse('00000000000000000000000000000000' in tdb) with self.assertRaises(IndexError): tdb['00000000000000000000000000000000'] trail = list(crumbs[0][1]) self.assertEqual(123, trail[0].time) self.assertEqual('a', trail[0].field1) self.assertEqual('', trail[0].field2) # TODO: Should this be None? self.assertEqual(124, trail[1].time) self.assertEqual('b', trail[1].field1) self.assertEqual('c', trail[1].field2)
def test_trails_selected_uuids(self): uuids = [ "02345678123456781234567812345678", "12345678123456781234567812345678", "22345678123456781234567812345678", "32345678123456781234567812345678", "42345678123456781234567812345678" ] cons = TrailDBConstructor('whitelist_testtrail', ['field1', 'field2']) for uuid in uuids: cons.add(uuid, 1, ['a', '1']) cons.add(uuid, 2, ['b', '2']) cons.add(uuid, 3, ['c', '3']) cons.finalize() tdb = TrailDB('whitelist_testtrail') whitelist = [uuids[0], uuids[3], uuids[4]] expected_length = 3 for trail_uuid, trail_events in tdb.trails(selected_uuids=whitelist): trail_events = list(trail_events) self.assertEqual(len(trail_events), expected_length)
#!/usr/bin/python # from __future__ import print_function # from __future__ import unicode_literals # from __future__ import division # from __future__ import absolute_import # from builtins import range from uuid import uuid4 from datetime import datetime from traildb import TrailDBConstructor, TrailDB cons = TrailDBConstructor('txiny', ['username', 'action']) print("h") # for i in range(3): # uuid = uuid4().hex # username = '******' % i # for day, action in enumerate(['open', 'save', 'close']): # cons.add(uuid, datetime(2016, i + 1, day + 1), (username, action)) # cons.finalize() # for uuid, trail in TrailDB('tiny').trails(): # print(uuid, list(trail))
def setUp(self): self.uuid1 = '12345678123456781234567812345678' self.uuid2 = '12345678123456781234567812345679' cons = TrailDBConstructor('testtrail1', ['field1', 'field2', 'field3']) cons.add(self.uuid1, 1, ['a', '1', 'x']) cons.add(self.uuid1, 2, ['b', '2', 'x']) cons.add(self.uuid2, 1, ['c', '3', 'y']) cons.add(self.uuid2, 2, ['d', '4', 'x']) cons.add(self.uuid2, 3, ['e', '5', 'x']) self.tdb1 = cons.finalize() cons = TrailDBConstructor('testtrail2', ['field1', 'field2', 'field3', 'field4']) cons.add(self.uuid2, 4, ['a', '1', 'x', 'l']) cons.add(self.uuid2, 5, ['b', '2', 'x', 'm']) cons.add(self.uuid1, 3, ['c', '3', 'y', 'n']) cons.add(self.uuid1, 4, ['d', '4', 'x', 'o']) cons.add(self.uuid1, 5, ['e', '5', 'x', 'p']) self.tdb2 = cons.finalize()
from traildb import TrailDBConstructor, TrailDB from uuid import uuid4 from datetime import datetime cons = TrailDBConstructor('tiny', ['username', 'action']) for i in range(3): uuid = uuid4().hex username = '******' % i for day, action in enumerate(['open', 'save', 'close']): cons.add(uuid, datetime(2016, i + 1, day + 1), (username, action)) cons.finalize() for uuid, trail in TrailDB('tiny').trails(): print uuid, list(trail)
from traildb import TrailDBConstructor, TrailDB from uuid import uuid4 from datetime import datetime import random cons = TrailDBConstructor('tiny', ['username', 'action']) for i in range(3): uuid = uuid4().hex username = '******' % i for day, action in enumerate(['open', 'save', 'close']): # print int(random.random() * 1000) cons.add(uuid, datetime(2016, i + 1, day + 1), (username, action)) # cons.add(int(random.random() * 1000), datetime(2016, i + 1, day + 1), (username, action)) cons.finalize() for uuid, trail in TrailDB('tiny').trails(): print uuid, list(trail)
from __future__ import division from __future__ import print_function from __future__ import unicode_literals from __future__ import absolute_import from past.utils import old_div from random import random import sys from traildb import TrailDB, TrailDBConstructor def extract(tdb, cons, sample_size): for uuid, trail in tdb.trails(): if random() < sample_size: for event in trail: cons.add(uuid, event.time, list(event)[1:]) return cons.finalize() if __name__ == '__main__': if len(sys.argv) < 3: print( 'Usage: extract_sample source_tdb destination_tdb sample_percentage' ) sys.exit(1) tdb = TrailDB(sys.argv[1]) cons = TrailDBConstructor(sys.argv[2], tdb.fields[1:]) num = extract(tdb, cons, old_div(float(sys.argv[3]), 100.)).num_trails print('Extracted %d trails to %s' % (num, sys.argv[2]))