def testGetData(self): data = [("a", "aa"), ("b", "bb")] records = RecordIORecords() records.insert(data[0]) records.insert(data[1]) records = RecordIORecords(records.get_data()) self.assertEqual(list(records), data)
def testInsertNotDeleted(self): records = RecordIORecords() records.insert(("a", "aa")) records.insert(("b", 0, 3, 3, "bb")) records.insert(("b", 0, 2, 2, "bb")) other = RecordIORecords(records.get_data()) self.assertEqual([("a", "aa"), ("b", 0, 2, 2, "bb")], list(other)) self.assertEqual([('b', 1, 3, 3), ('b', 2, 3, 3)], list(records.not_deleted()))
def testSplit(self): records = RecordIORecords() self.insertABC(records) records.insert(("d", "dd")) records.insert(("e", "ee")) lo, hi, middle = records.split() lo = RecordIORecords(lo) hi = RecordIORecords(hi) self.assertEqual(middle, ("d", "dd")) self.assertEqual([("a", "aa"), ("b", "bb"), ("c", "cc")], list(lo)) self.assertEqual([("d", "dd"), ("e", "ee")], list(hi))
def testDelete(self): records = RecordIORecords() self.insertABC(records) self.assertTrue(records.insert(("b", ))) records.insert(("b", "bb")) self.assertEqual([("a", "aa"), ("b", "bb"), ("c", "cc")], list(records)) self.assertTrue(records.insert(("b", ))) self.assertFalse(records.insert(("d", ))) records = RecordIORecords(records.get_data()) self.assertEqual([("a", "aa"), ("c", "cc")], list(records))
def testInsertSplitDataBigToSmall(self): records = RecordIORecords() records.insert(("a", "aa")) records.insert(("b", 0, 3, 3, "bb")) records.insert(("b", 1, 3, 3, "bb")) records.insert(("b", 2, 3, 3, "bb")) records.insert(("c", "cc")) records.insert(("b", "bb")) records = RecordIORecords(records.get_data()) self.assertEqual([("a", "aa"), ("b", "bb"), ("c", "cc")], list(records))
def commit_to_queue_(self): """Adds all pending changes to the task queues for async commits :return: Yields all shard names that need to be updated. """ pull = taskqueue.Queue('recordio-queue') rpcs = [] key_values_not_added = RecordIORecords() for shard_name, key_values in RecordIOShard.get_shards_for_key_values( self.name, self.updates): self.db_search += 1 if shard_name == None: for entry in key_values: key_values_not_added.insert(entry) else: for key_values_chunk in get_chunks(key_values, MAX_TASKQUEUE_BATCH_SIZE): payload = marshal.dumps(key_values_chunk, MARSHAL_VERSION) rpc = pull.add_async( taskqueue.Task(payload=payload, method='PULL', tag=shard_name)) rpcs.append((rpc, key_values_chunk, shard_name)) for rpc, key_values, shard_name in rpcs: try: rpc.get_result() yield shard_name except: for entry in key_values: key_values_not_added.insert(entry) self.updates = key_values_not_added if len(self.updates): raise RecordIOWriterNotCompletedError(len(self.updates))
def init(self): """Initializes internal values.""" if not hasattr(self, "records_"): if self.compressed: self.records_ = RecordIORecordsZipped(self.data) else: self.records_ = RecordIORecords(self.data) self.loHi_ = RecordIOShard.lo_hi_from_key(self.key().name())
def testInsertSplitDataSmallToBig(self): records = RecordIORecords() self.insertABC(records) records.insert(("b", 0, 3, 3, "bb")) records.insert(("b", 1, 3, 3, "bb")) records.insert(("b", 2, 3, 3, "bb")) self.assertEqual([("a", "aa"), ("b", 0, 3, 3, "bb"), ("b", 1, 3, 3, "bb"), ("b", 2, 3, 3, "bb"), ("c", "cc")], list(records))
def __init__(self, name): """Creates a RecordIOWriter :param name: The name of the RecordIO. The urllib quoted name is not allowed to be longer than 64 characters. """ if len(urllib.quote(name)) > MAX_KEY_LENGTH: raise ValueError( "Max urllib.quote(name) length is %d: len('%s') is %d" % (MAX_KEY_LENGTH, name, len(urllib.quote(name)))) self.name = name self.updates = RecordIORecords() self.pending_worker_tasks = [] self.db_search = 0 self.db_get = 0 self.db_put = 0
def testInsertGetAndRead(self): records = RecordIORecords() self.insertABC(records) self.assertEqual(len(records), 3) self.assertEqual([("a", "aa"), ("b", "bb"), ("c", "cc")], list(records)) records.insert(("b", "new")) self.assertEqual(len(records), 3) self.assertEqual(records["b"], ("b", "new")) self.assertTrue("a" in records) self.assertFalse("z" in records) records.insert(("b", "bb")) self.assertEqual([("a", "aa"), ("b", "bb"), ("c", "cc")], list(records.read())) self.assertEqual([("a", "aa"), ("b", "bb"), ("c", "cc")], list(records.read(("", ), ("d", )))) self.assertEqual([("b", "bb")], list(records.read(("b", ), ("c", ))))
def commit_sync(self, retries=32, retry_timeout=1): """Applies all changes synchronously to the RecordIO. :param retries: How many times a commit_sync should be retried in case of datastore collisions. :param retry_timeout: The amount of second to wait before the next retry. """ if not len(self.updates): return for attempt in range(retries + 1): shard_does_not_exist = RecordIORecords() for shard_name, key_values in RecordIOShard.get_shards_for_key_values( self.name, self.updates): self.db_search += 1 if shard_name == None and key_values: logging.debug( "RecordIO %s: No shard found for:\n%s -> %s" % (self.name, SPLIT_CHAR.join(RecordIOShard.entry_key( key_values[0])), key_values[0][:-1])) for entry in key_values: shard_does_not_exist.insert(entry) else: lo_just_split = None hi_just_split = None for key_values_chunk in get_chunks(key_values, MAX_WRITE_BATCH_SIZE): if lo_just_split and hi_just_split and key_values_chunk: if RecordIORecords.in_range(key_values_chunk[0], lo=lo_just_split[0], hi=lo_just_split[1]): shard_name = RecordIOShard.key_name( self.name, lo=lo_just_split[0], hi=lo_just_split[1]) elif RecordIORecords.in_range(key_values_chunk[0], lo=hi_just_split[0], hi=hi_just_split[1]): shard_name = RecordIOShard.key_name( self.name, lo=hi_just_split[0], hi=hi_just_split[1]) not_deleted = None try: not_deleted, lo_just_split, hi_just_split = self.commit_shard_( shard_name, key_values_chunk) except RecordIOShardDoesNotExistError: logging.debug("Shard does not exist:\n" + shard_name) lo_just_split = None hi_just_split = None for entry in key_values_chunk: shard_does_not_exist.insert(entry) if not_deleted: for to_delete_shard_name, to_delete_key_values in ( RecordIOShard.get_shards_for_key_values( self.name, not_deleted)): self.db_search += 1 try: self.commit_shard_(to_delete_shard_name, to_delete_key_values) except RecordIOShardDoesNotExistError: logging.debug("Shard does not exist:\n" + shard_name) for entry in to_delete_key_values: shard_does_not_exist.insert(entry) self.updates = shard_does_not_exist if len(self.updates): if attempt == retries: raise RecordIOWriterNotCompletedError(len(self.updates)) else: logging.debug("Commit attempt %d failed" % attempt) time.sleep(retry_timeout) else: return