def testSplit(self): records = RecordIORecordsZipped() self.insertABC(records) records.insert(("d", test_helper.uncompressableString(ZIP_CHUNKS / 2))) records.insert(("e", test_helper.uncompressableString(ZIP_CHUNKS))) lo, hi, middle = records.split() lo = RecordIORecordsZipped(lo) hi = RecordIORecordsZipped(hi) self.assertEqual(middle[0], "e") self.assertEqual(["a", "b", "c", "d"], list(sorted(self.getResult(lo).keys()))) self.assertEqual({"a": ("aa",), "b": ("bb",), "c": ("cc",)}, self.getResult(lo.read(("",), ("d",)))) self.assertEqual(["e"], self.getResult(hi).keys())
def testTaskQueue(self): writer = RecordIOWriter("test") writer.create(compressed=False) test_value = test_helper.uncompressableString(MAX_ENTRY_SIZE-1) entries_to_write = MAX_BLOB_SIZE / MAX_ENTRY_SIZE + 1 for i in range(entries_to_write): writer.insert(str(i), test_value) writer.commit_async() taskq = self.testbed.get_stub(testbed.TASKQUEUE_SERVICE_NAME) tasks = taskq.GetTasks("recordio-writer") for task in tasks: url=task["url"] args = urlparse.parse_qs(base64.b64decode(task["body"])) for x in args: args[x] = args[x][0] test_helper.requestGet(WriteHandler(), url, args) assert(len([x for x in RecordIOShard.all()]) > 1) reader = RecordIOReader("test") result = {} for key, value in reader: result[key] = value self.assertEqual(len(result), entries_to_write) for i in range(entries_to_write): self.assertEqual(result[str(i)], test_value, "Not equal")
def write2MBAndReplace(self, compressed): test_string = test_helper.uncompressableString(2**21) updater = RecordIOWriter("test") updater.create(compressed=compressed) updater.insert("test", test_string) updater.commit_sync() output = [] entries = 0 shards_count = 0 for recordio in RecordIOShard.all(): self.assertTrue(len(recordio.data) >= 1000) shards_count += 1 for entry in recordio: output += [entry[-1]] entries += 1 self.assertTrue(shards_count > 1) self.assertTrue(entries > 3) self.assertEqual("".join(output), STRING + test_string, "read != write") updater.insert("test", "short") updater.commit_sync(retries=0) replaced_shards_count = 0 for recordio in RecordIOShard.all(): if replaced_shards_count == 0: self.assertEqual(1, len(recordio)) for entry in recordio: self.assertEqual(STRING + "short", entry[-1]) else: self.assertEqual(0, len(recordio)) for entry in recordio: self.fail("shouldnt be iterable") replaced_shards_count += 1 self.assertTrue(len(recordio.data) < 1000) self.assertTrue(replaced_shards_count > 0) self.assertTrue(replaced_shards_count <= shards_count)
def testSplit(self): records = RecordIORecordsZipped() self.insertABC(records) records.insert(("d", test_helper.uncompressableString(ZIP_CHUNKS / 2))) records.insert(("e", test_helper.uncompressableString(ZIP_CHUNKS))) lo, hi, middle = records.split() lo = RecordIORecordsZipped(lo) hi = RecordIORecordsZipped(hi) self.assertEqual(middle[0], "e") self.assertEqual(["a", "b", "c", "d"], list(sorted(self.getResult(lo).keys()))) self.assertEqual({ "a": ("aa", ), "b": ("bb", ), "c": ("cc", ) }, self.getResult(lo.read(("", ), ("d", )))) self.assertEqual(["e"], self.getResult(hi).keys())
def testCommitToQueue(self): updater = RecordIOWriter("test") updater.create() chunk_size = MAX_ENTRY_SIZE - 1 entries_to_write = MAX_TASKQUEUE_BATCH_SIZE / MAX_ENTRY_SIZE + 1 for i in xrange(entries_to_write): updater.insert(str("%09d" % i), test_helper.uncompressableString(chunk_size)) list(updater.commit_to_queue_()) pull = taskqueue.Queue('recordio-queue') tasks = list(pull.lease_tasks(60, 100)) self.assertEqual(len(tasks), 2) self.assertEqual(tasks[0].tag, RecordIOShard.key_name("test")) self.assertEqual(tasks[1].tag, RecordIOShard.key_name("test")) updates_0 = marshal.loads(tasks[0].payload) updates_1 = marshal.loads(tasks[1].payload) self.assertEqual([str("%09d" % x) for x in xrange(entries_to_write)], [x[0] for x in updates_0] + [x[0] for x in updates_1]) self.assertTrue(updates_0[0][1] == STRING + test_helper.uncompressableString(chunk_size))
def testCommitToQueueSplitEntries(self): chunk_size = MAX_ENTRY_SIZE + 1 test_string = test_helper.uncompressableString(chunk_size) updater = RecordIOWriter("test") updater.create() updater.insert("test", test_string) list(updater.commit_to_queue_()) pull = taskqueue.Queue('recordio-queue') tasks = list(pull.lease_tasks(60, 100)) self.assertEqual(len(tasks), 1) self.assertEqual(tasks[0].tag, RecordIOShard.key_name("test")) updates = marshal.loads(tasks[0].payload) self.assertEqual([('test', 0, 2), ('test', 1, 2)], [x[:-2] for x in updates]) self.assertEqual(STRING + test_string, "".join([x[-1] for x in updates]))
def testSplit(self): recordio = RecordIOShard.create("test") test_strings = ["c", "a", "b", "d", "e"] for x in test_strings: recordio.insert((x, test_helper.uncompressableString(ZIP_CHUNKS))) lo_record, hi_record = recordio.split() self.assertEqual(3, len(lo_record)) self.assertEqual(2, len(hi_record)) for x in test_strings: self.assertTrue(x in lo_record or x in hi_record) self.assertTrue(max(lo_record) < min(hi_record)) self.assertEqual("test", lo_record.name()) self.assertEqual((None, ('d', 0, 1, 0)), lo_record.lo_hi()) self.assertEqual(["a", "b", "c"], [x[0] for x in lo_record]) self.assertEqual("test", hi_record.name()) self.assertEqual((('d', 0, 1, 0), None), hi_record.lo_hi()) self.assertEqual(["d", "e"], [x[0] for x in hi_record])
def testShardNamesForKeysSplit(self): recordio = RecordIOShard.create("test") test_strings = [str(x) for x in range(10)] for x in test_strings: recordio.insert((x, test_helper.uncompressableString(2**16))) recordio.commit() self.assertEqual({ RecordIOShard.key_name("test"): [("0", ""), ("1", "")] }, self.getResult(RecordIOShard.get_shards_for_key_values( "test", [("0", ""), ("1", "")]))) recordio.delete() shard_0, shard_1 = recordio.split() shard_1, shard_2 = shard_1.split() shard_0.commit() shard_1.commit() shard_2.commit() self.assertEqual({ shard_0.key().name(): [('0', '0'), ('1', '1'), ('2', '2'), ('3', '3'), ('4', '4')], shard_1.key().name(): [('5', '5'), ('6', '6'), ('7', '7')], shard_2.key().name(): [('8', '8'), ('9', '9')]}, self.getResult(RecordIOShard.get_shards_for_key_values( "test", zip(test_strings, test_strings))))