Example #1
0
    def post(self):
        values = {}
        if self.request.get("run_uncompressed"):
            values["run_uncompressed"] = "checked"
        if self.request.get("run_compressed"):
            values["run_compressed"] = "checked"
        if self.request.get("delete"):
            RecordIOWriter("loadtest_single_compressed").delete()
            RecordIOWriter("loadtest_single_uncompressed").delete()
            RecordIOWriter("loadtest_combined_compressed").delete()
            RecordIOWriter("loadtest_combined_uncompressed").delete()
            self.handle(values)
            return
        amount = int(self.request.get("entries"))
        entry_size_min = int(self.request.get("entry_size_min"))
        entry_size_max = int(self.request.get("entry_size_max"))
        entry_size_key = int(self.request.get("entry_size_key"))
        compressable = self.request.get("compressable")
        entries = []
        gen = StringGenerator(compressable)
        for i in xrange(amount):
            entries.append(
                (str(random.randint(0, entry_size_key)),
                 gen.next(random.randint(entry_size_min, entry_size_max))))
        values["ran"] = True
        single = "single"
        if (self.request.get("run_uncompressed")
                and self.request.get("run_compressed")):
            single = "combined"

        if self.request.get("run_uncompressed"):
            logging.info("Starting uncompressed write loadtest")
            values["write_uncompressed"] = self.do_write(
                single, False, entries)
        if self.request.get("run_compressed"):
            logging.info("Starting compressed write loadtest")
            values["write_compressed"] = self.do_write(single, True, entries)
        try:
            if self.request.get("run_uncompressed"):
                logging.info("Starting uncompressed read loadtest")
                values["read_uncompressed"] = self.do_read(
                    single, False, entries)
            if self.request.get("run_compressed"):
                logging.info("Starting compressed read loadtest")
                values["read_compressed"] = self.do_read(single, True, entries)
        except NotEveryThingWrittenError:
            logging.info("Maybe not ready to read!")
            time.sleep(5)
            if self.request.get("run_uncompressed"):
                logging.info("Starting uncompressed read loadtest")
                values["read_uncompressed"] = self.do_read(
                    single, False, entries)
            if self.request.get("run_compressed"):
                logging.info("Starting compressed read loadtest")
                values["read_compressed"] = self.do_read(single, True, entries)

        logging.info("Loadtests done")
        self.handle(values)
Example #2
0
 def testTaskQueue(self):
   writer = RecordIOWriter("test")
   writer.create(compressed=False)
   test_value = test_helper.uncompressableString(MAX_ENTRY_SIZE-1)
   entries_to_write = MAX_BLOB_SIZE / MAX_ENTRY_SIZE + 1
   for i in range(entries_to_write):
     writer.insert(str(i), test_value)
   writer.commit_async()
   
   taskq = self.testbed.get_stub(testbed.TASKQUEUE_SERVICE_NAME)
   tasks = taskq.GetTasks("recordio-writer")
   for task in tasks:
     url=task["url"]
     args = urlparse.parse_qs(base64.b64decode(task["body"]))
     for x in args:
       args[x] = args[x][0]
     test_helper.requestGet(WriteHandler(), url, args)
   assert(len([x for x in RecordIOShard.all()]) > 1)
   reader = RecordIOReader("test")
   result = {}
   for key, value in reader:
     result[key] = value
   self.assertEqual(len(result), entries_to_write)
   for i in range(entries_to_write):
     self.assertEqual(result[str(i)], test_value, "Not equal")
Example #3
0
 def do_write(self, single, compressed, entries):
     start = time.time()
     writer = RecordIOWriter("loadtest_" + single + "_" + {
         True: "compressed",
         False: "uncompressed"
     }[compressed])
     writer.create(compressed=compressed)
     for entry in entries:
         writer.insert(entry[0], entry[1])
     writer.commit_sync(retries=10)
     return time.time() - start, writer.db_stats()
 def do_write(self, single, compressed, entries):
   start = time.time()
   writer = RecordIOWriter("loadtest_" + single + "_" +
                           { True: "compressed",
                             False: "uncompressed"}[compressed])
   writer.create(compressed=compressed)
   for entry in entries:
     writer.insert(entry[0], entry[1])
   writer.commit_sync(retries=10)
   return time.time() - start, writer.db_stats()
 def get(self):
     self.pull = taskqueue.Queue('recordio-queue')
     tag = self.request.get("taskqueue")
     max_tasks_to_lease = MAX_RPC_SIZE / MAX_TASKQUEUE_BATCH_SIZE
     if tag:
         batch = []
         batch_size = 0
         success = True
         while True:
             tasks = self.pull.lease_tasks_by_tag(LEASE_TIME_PER_BATCH,
                                                  max_tasks_to_lease,
                                                  tag=tag)
             for task in tasks:
                 if task.was_deleted:
                     # Should never happend
                     continue
                 next_key_values = marshal.loads(task.payload)
                 next_size = sum(
                     [recordio_chunks.size(x) for x in next_key_values])
                 if next_size + batch_size >= MAX_WRITE_BATCH_SIZE:
                     success = success and self.commit_batch(tag, batch)
                     batch = [(task, next_key_values)]
                     batch_size = next_size
                 else:
                     batch_size += next_size
                     batch.append((task, next_key_values))
             if len(tasks) != max_tasks_to_lease:
                 break
         success = success and self.commit_batch(tag, batch)
         if not success:
             raise Exception("RecordIO not completed")
     else:
         pending_tasks = self.pull.lease_tasks(0, max_tasks_to_lease)
         seen = set([])
         for task in pending_tasks:
             tag = task.tag
             if tag in seen:
                 continue
             seen.add(tag)
             try:
                 taskqueue.Queue('recordio-writer').add(
                     RecordIOWriter.create_task_(tag, in_past=True))
                 self.response.out.write("Scheduled write for: %s<br>" %
                                         tag)
             except (taskqueue.DuplicateTaskNameError,
                     taskqueue.TombstonedTaskError,
                     taskqueue.TaskAlreadyExistsError):
                 self.response.out.write(
                     "Already pending write for: %s<br>" % tag)
         if len(pending_tasks) == max_tasks_to_lease:
             self.response.out.write(
                 "<script type=text/javascript>window.setTimeout(function() {"
                 "document.location.reload();"
                 "}, 5000);</script>")
Example #6
0
 def testCommitToQueueAndScheduleWrite(self):
   updater = RecordIOWriter("test")
   updater.create()
   updater.insert("a", "")
   updater.commit_async()
   taskq = self.testbed.get_stub(testbed.TASKQUEUE_SERVICE_NAME)
   
   tasks = taskq.GetTasks("recordio-writer")
   self.assertEqual(len(tasks), 1)
   self.assertEqual(tasks[0]["url"], "/recordio/write")
   self.assertEqual(base64.b64decode(tasks[0]["body"]),
                    "taskqueue=" + urllib.quote(
                    RecordIOShard.key_name("test")))
 def get(self):
   self.pull = taskqueue.Queue('recordio-queue')
   tag = self.request.get("taskqueue")
   max_tasks_to_lease = MAX_RPC_SIZE / MAX_TASKQUEUE_BATCH_SIZE
   if tag:
     batch = []
     batch_size = 0
     success = True
     while True:
       tasks = self.pull.lease_tasks_by_tag(LEASE_TIME_PER_BATCH,
                                            max_tasks_to_lease, tag=tag)
       for task in tasks:
         if task.was_deleted:
           # Should never happend
           continue
         next_key_values = marshal.loads(task.payload)
         next_size = sum([recordio_chunks.size(x) for x in next_key_values])
         if next_size + batch_size >= MAX_WRITE_BATCH_SIZE:
           success = success and self.commit_batch(tag, batch)
           batch = [(task, next_key_values)]
           batch_size = next_size
         else:
           batch_size += next_size
           batch.append((task, next_key_values))
       if len(tasks) != max_tasks_to_lease:
         break
     success = success and self.commit_batch(tag, batch)
     if not success:
       raise Exception("RecordIO not completed")
   else:
     pending_tasks = self.pull.lease_tasks(0, max_tasks_to_lease)
     seen = set([])
     for task in pending_tasks:
       tag = task.tag
       if tag in seen:
         continue
       seen.add(tag)
       try:
         taskqueue.Queue('recordio-writer').add(
             RecordIOWriter.create_task_(tag, in_past=True))
         self.response.out.write("Scheduled write for: %s<br>" % tag)
       except (taskqueue.DuplicateTaskNameError,
               taskqueue.TombstonedTaskError,
               taskqueue.TaskAlreadyExistsError):
         self.response.out.write("Already pending write for: %s<br>" % tag)
     if len(pending_tasks) == max_tasks_to_lease:
       self.response.out.write(
           "<script type=text/javascript>window.setTimeout(function() {"
           "document.location.reload();"
           "}, 5000);</script>")
  def commit_batch(self, tag, batch):
    """Applies a batch of values to a RecordIO and deletes the taskqueue task,

    :param tag: The current tag we are working on
    :param batch: A list of (tasqueue_task, key_value_list)
    :return: True on success
    """
    if batch:
      done_tasks = []
      count = 0
      writer = RecordIOWriter(RecordIOShard.get_name(tag))
      for done_task, key_values in batch:
        done_tasks.append(done_task)
        for entry in key_values:
          writer.insert_entry_(entry)
          count += 1
      try:
        writer.commit_sync(retries=1)
        try:
          self.pull.delete_tasks(done_tasks)
        except taskqueue.BadTaskStateError:
          for task in done_tasks:
            if task.was_deleted:
              continue
            try:
              self.pull.delete_tasks(task)
            except taskqueue.BadTaskStateError:
              logging.debug("RecordIO Failed to free task %s on %s" %
                            task.name, tag)
        logging.debug("RecordIO wrote %d entries to %s" %
                      (count, writer.name))
      except RecordIOWriterNotCompletedError:
        logging.debug("RecordIO not completed on: %s" % tag)
        for task in done_tasks:
          self.pull.modify_task_lease(task, 0)
        return False
    return True
Example #9
0
 def testCommitToQueueSplitEntries(self):
   chunk_size = MAX_ENTRY_SIZE + 1
   test_string = test_helper.uncompressableString(chunk_size)
   updater = RecordIOWriter("test")
   updater.create()
   updater.insert("test", test_string)
   list(updater.commit_to_queue_())
   pull = taskqueue.Queue('recordio-queue')
   tasks = list(pull.lease_tasks(60, 100))
   self.assertEqual(len(tasks), 1)
   self.assertEqual(tasks[0].tag, RecordIOShard.key_name("test"))
   updates = marshal.loads(tasks[0].payload)
   self.assertEqual([('test', 0, 2), ('test', 1, 2)],
                    [x[:-2] for x in updates])
   self.assertEqual(STRING + test_string, "".join([x[-1] for x in updates]))
Example #10
0
 def testWriteDuringSplit(self):
   recordio = RecordIOShard.create("test", compressed=False)
   recordio.insert(("1", STRING + "1"))
   recordio.insert(("2", STRING + "2"))
   lo_shard, hi_shard = recordio.split()
   lo_shard.commit()
   updater = RecordIOWriter("test")
   updater.insert("3", "3")
   self.assertRaises(RecordIOShardDoesNotExistError,
                     updater.commit_shard_,
                     hi_shard.key().name(), updater.updates)
   self.assertRaises(RecordIOWriterNotCompletedError,
                     updater.commit_sync,
                     32, 0)
   hi_shard.commit()
   updater.insert("0", STRING + "0")
   updater.commit_sync()
   lo_shard, hi_shard = [x for x in RecordIOShard.all()]
   self.assertEqual([x[0] for x in lo_shard], ["0", "1"])
   self.assertEqual([x[0] for x in hi_shard], ["2", "3"])
Example #11
0
 def testCommitToQueue(self):
   updater = RecordIOWriter("test")
   updater.create()
   chunk_size = MAX_ENTRY_SIZE - 1
   entries_to_write = MAX_TASKQUEUE_BATCH_SIZE / MAX_ENTRY_SIZE + 1
   for i in xrange(entries_to_write):
     updater.insert(str("%09d" % i),
                    test_helper.uncompressableString(chunk_size))
   list(updater.commit_to_queue_())
   pull = taskqueue.Queue('recordio-queue')
   tasks = list(pull.lease_tasks(60, 100))
   self.assertEqual(len(tasks), 2)
   self.assertEqual(tasks[0].tag, RecordIOShard.key_name("test"))
   self.assertEqual(tasks[1].tag, RecordIOShard.key_name("test"))
   updates_0 = marshal.loads(tasks[0].payload)
   updates_1 = marshal.loads(tasks[1].payload)
   self.assertEqual([str("%09d" % x) for x in xrange(entries_to_write)],
                    [x[0] for x in updates_0] + [x[0] for x in updates_1])
   self.assertTrue(updates_0[0][1] ==
                   STRING + test_helper.uncompressableString(chunk_size))
    def commit_batch(self, tag, batch):
        """Applies a batch of values to a RecordIO and deletes the taskqueue task,

    :param tag: The current tag we are working on
    :param batch: A list of (tasqueue_task, key_value_list)
    :return: True on success
    """
        if batch:
            done_tasks = []
            count = 0
            writer = RecordIOWriter(RecordIOShard.get_name(tag))
            for done_task, key_values in batch:
                done_tasks.append(done_task)
                for entry in key_values:
                    writer.insert_entry_(entry)
                    count += 1
            try:
                writer.commit_sync(retries=1)
                try:
                    self.pull.delete_tasks(done_tasks)
                except taskqueue.BadTaskStateError:
                    for task in done_tasks:
                        if task.was_deleted:
                            continue
                        try:
                            self.pull.delete_tasks(task)
                        except taskqueue.BadTaskStateError:
                            logging.debug(
                                "RecordIO Failed to free task %s on %s" %
                                task.name, tag)
                logging.debug("RecordIO wrote %d entries to %s" %
                              (count, writer.name))
            except RecordIOWriterNotCompletedError:
                logging.debug("RecordIO not completed on: %s" % tag)
                for task in done_tasks:
                    self.pull.modify_task_lease(task, 0)
                return False
        return True
Example #13
0
 def writeOneShard(self, compressed):
   updater = RecordIOWriter("test")
   updater.create(compressed=compressed)
   updater.insert("1", "foo")
   updater.insert("2", "bar")
   updater.commit_sync()
   updater = RecordIOWriter("test")
   updater.insert("3", "win")
   updater.remove("2")
   updater.commit_sync()
   recordio = RecordIOShard.all().get()
   self.assertEqual(recordio.compressed, compressed)
   self.assertEqual([x for x in recordio], [("1", STRING + "foo"), ("3", STRING + "win")])
Example #14
0
 def post(self):
     name = self.request.get("name")
     compressed = not not self.request.get("compressed")
     key = self.request.get("key", None)
     value = self.request.get("value", None)
     if name:
         writer = RecordIOWriter(name)
         if key == None and value == None:
             writer.create(compressed)
         elif value == None:
             writer.remove(key)
             writer.commit_sync()
         else:
             writer.insert(str(key), eval(value))
             writer.commit_sync()
         start = None
         if key:
             start = str(key)
         self.redirect("?name=" + str(urllib.quote(name)) + "&start=" +
                       urllib.quote(start))
     delete = self.request.get("delete")
     if delete:
         writer = RecordIOWriter(delete)
         writer.delete()
         self.redirect("/recordio/")
Example #15
0
 def testWriteStringMarshalPickle(self):
   updater = RecordIOWriter("test")
   updater.create()
   updater.insert("string", "string")
   marshalable = {"a": [1,2,3]}
   updater.insert("marshal", marshalable)
   class AnyClass():
     pass
   pickleable = AnyClass()
   updater.insert("cpickle", pickleable)
   updater.commit_sync()
   recordio = RecordIOShard.all().get()
   self.assertEqual([x for x in recordio],
                    [("cpickle", CPICKLE + cPickle.dumps(pickleable)),
                     ("marshal", MARSHAL + marshal.dumps(marshalable)),
                     ("string", STRING + "string")])
Example #16
0
 def write2MBAndReplace(self, compressed):
   test_string = test_helper.uncompressableString(2**21)
   updater = RecordIOWriter("test")
   updater.create(compressed=compressed)
   updater.insert("test", test_string)
   updater.commit_sync()
   output = []
   entries = 0
   shards_count = 0
   for recordio in RecordIOShard.all():
     self.assertTrue(len(recordio.data) >= 1000)
     shards_count += 1
     for entry in recordio:
       output += [entry[-1]]
       entries += 1
   self.assertTrue(shards_count > 1)
   self.assertTrue(entries > 3)
   self.assertEqual("".join(output), STRING + test_string, "read != write")
   updater.insert("test", "short")
   updater.commit_sync(retries=0)
   replaced_shards_count = 0
   for recordio in RecordIOShard.all():
     if replaced_shards_count == 0:
       self.assertEqual(1, len(recordio))
       for entry in recordio:
         self.assertEqual(STRING + "short", entry[-1])
     else:
       self.assertEqual(0, len(recordio))
       for entry in recordio:
         self.fail("shouldnt be iterable")
     replaced_shards_count += 1
     self.assertTrue(len(recordio.data) < 1000)
   self.assertTrue(replaced_shards_count > 0)
   self.assertTrue(replaced_shards_count <= shards_count)
 def post(self):
   name = self.request.get("name")
   compressed = not not self.request.get("compressed")
   key = self.request.get("key", None)
   value = self.request.get("value", None)
   if name:
     writer = RecordIOWriter(name)
     if key == None and value == None:
       writer.create(compressed)
     elif value == None:
       writer.remove(key)
       writer.commit_sync()
     else:
       writer.insert(str(key), eval(value))
       writer.commit_sync()
     start = None
     if key:
       start = str(key)
     self.redirect("?name=" + str(urllib.quote(name)) + "&start=" +
                   urllib.quote(start))
   delete = self.request.get("delete")
   if delete:
     writer = RecordIOWriter(delete)
     writer.delete()
     self.redirect("/recordio/")