예제 #1
0
 def testGetData(self):
     data = [("a", "aa"), ("b", "bb")]
     records = RecordIORecords()
     records.insert(data[0])
     records.insert(data[1])
     records = RecordIORecords(records.get_data())
     self.assertEqual(list(records), data)
예제 #2
0
 def testInsertNotDeleted(self):
     records = RecordIORecords()
     records.insert(("a", "aa"))
     records.insert(("b", 0, 3, 3, "bb"))
     records.insert(("b", 0, 2, 2, "bb"))
     other = RecordIORecords(records.get_data())
     self.assertEqual([("a", "aa"), ("b", 0, 2, 2, "bb")], list(other))
     self.assertEqual([('b', 1, 3, 3), ('b', 2, 3, 3)],
                      list(records.not_deleted()))
예제 #3
0
 def testSplit(self):
     records = RecordIORecords()
     self.insertABC(records)
     records.insert(("d", "dd"))
     records.insert(("e", "ee"))
     lo, hi, middle = records.split()
     lo = RecordIORecords(lo)
     hi = RecordIORecords(hi)
     self.assertEqual(middle, ("d", "dd"))
     self.assertEqual([("a", "aa"), ("b", "bb"), ("c", "cc")], list(lo))
     self.assertEqual([("d", "dd"), ("e", "ee")], list(hi))
예제 #4
0
 def testDelete(self):
     records = RecordIORecords()
     self.insertABC(records)
     self.assertTrue(records.insert(("b", )))
     records.insert(("b", "bb"))
     self.assertEqual([("a", "aa"), ("b", "bb"), ("c", "cc")],
                      list(records))
     self.assertTrue(records.insert(("b", )))
     self.assertFalse(records.insert(("d", )))
     records = RecordIORecords(records.get_data())
     self.assertEqual([("a", "aa"), ("c", "cc")], list(records))
예제 #5
0
 def testInsertSplitDataBigToSmall(self):
     records = RecordIORecords()
     records.insert(("a", "aa"))
     records.insert(("b", 0, 3, 3, "bb"))
     records.insert(("b", 1, 3, 3, "bb"))
     records.insert(("b", 2, 3, 3, "bb"))
     records.insert(("c", "cc"))
     records.insert(("b", "bb"))
     records = RecordIORecords(records.get_data())
     self.assertEqual([("a", "aa"), ("b", "bb"), ("c", "cc")],
                      list(records))
예제 #6
0
    def commit_to_queue_(self):
        """Adds all pending changes to the task queues for async commits

    :return: Yields all shard names that need to be updated.
    """
        pull = taskqueue.Queue('recordio-queue')
        rpcs = []
        key_values_not_added = RecordIORecords()
        for shard_name, key_values in RecordIOShard.get_shards_for_key_values(
                self.name, self.updates):
            self.db_search += 1
            if shard_name == None:
                for entry in key_values:
                    key_values_not_added.insert(entry)
            else:
                for key_values_chunk in get_chunks(key_values,
                                                   MAX_TASKQUEUE_BATCH_SIZE):
                    payload = marshal.dumps(key_values_chunk, MARSHAL_VERSION)
                    rpc = pull.add_async(
                        taskqueue.Task(payload=payload,
                                       method='PULL',
                                       tag=shard_name))
                    rpcs.append((rpc, key_values_chunk, shard_name))

        for rpc, key_values, shard_name in rpcs:
            try:
                rpc.get_result()
                yield shard_name
            except:
                for entry in key_values:
                    key_values_not_added.insert(entry)
        self.updates = key_values_not_added
        if len(self.updates):
            raise RecordIOWriterNotCompletedError(len(self.updates))
예제 #7
0
 def init(self):
     """Initializes internal values."""
     if not hasattr(self, "records_"):
         if self.compressed:
             self.records_ = RecordIORecordsZipped(self.data)
         else:
             self.records_ = RecordIORecords(self.data)
         self.loHi_ = RecordIOShard.lo_hi_from_key(self.key().name())
예제 #8
0
 def testInsertSplitDataSmallToBig(self):
     records = RecordIORecords()
     self.insertABC(records)
     records.insert(("b", 0, 3, 3, "bb"))
     records.insert(("b", 1, 3, 3, "bb"))
     records.insert(("b", 2, 3, 3, "bb"))
     self.assertEqual([("a", "aa"), ("b", 0, 3, 3, "bb"),
                       ("b", 1, 3, 3, "bb"), ("b", 2, 3, 3, "bb"),
                       ("c", "cc")], list(records))
예제 #9
0
    def __init__(self, name):
        """Creates a RecordIOWriter

    :param name: The name of the RecordIO. The urllib quoted name is not
                 allowed to be longer than 64 characters.
    """
        if len(urllib.quote(name)) > MAX_KEY_LENGTH:
            raise ValueError(
                "Max urllib.quote(name) length is %d: len('%s') is %d" %
                (MAX_KEY_LENGTH, name, len(urllib.quote(name))))
        self.name = name
        self.updates = RecordIORecords()
        self.pending_worker_tasks = []
        self.db_search = 0
        self.db_get = 0
        self.db_put = 0
예제 #10
0
 def testInsertGetAndRead(self):
     records = RecordIORecords()
     self.insertABC(records)
     self.assertEqual(len(records), 3)
     self.assertEqual([("a", "aa"), ("b", "bb"), ("c", "cc")],
                      list(records))
     records.insert(("b", "new"))
     self.assertEqual(len(records), 3)
     self.assertEqual(records["b"], ("b", "new"))
     self.assertTrue("a" in records)
     self.assertFalse("z" in records)
     records.insert(("b", "bb"))
     self.assertEqual([("a", "aa"), ("b", "bb"), ("c", "cc")],
                      list(records.read()))
     self.assertEqual([("a", "aa"), ("b", "bb"), ("c", "cc")],
                      list(records.read(("", ), ("d", ))))
     self.assertEqual([("b", "bb")], list(records.read(("b", ), ("c", ))))
예제 #11
0
    def commit_sync(self, retries=32, retry_timeout=1):
        """Applies all changes synchronously to the RecordIO.

    :param retries: How many times a commit_sync should be retried in case of
                    datastore collisions.
    :param retry_timeout: The amount of second to wait before the next retry.
    """
        if not len(self.updates):
            return
        for attempt in range(retries + 1):
            shard_does_not_exist = RecordIORecords()
            for shard_name, key_values in RecordIOShard.get_shards_for_key_values(
                    self.name, self.updates):
                self.db_search += 1
                if shard_name == None and key_values:
                    logging.debug(
                        "RecordIO %s: No shard found for:\n%s -> %s" %
                        (self.name,
                         SPLIT_CHAR.join(RecordIOShard.entry_key(
                             key_values[0])), key_values[0][:-1]))
                    for entry in key_values:
                        shard_does_not_exist.insert(entry)
                else:
                    lo_just_split = None
                    hi_just_split = None
                    for key_values_chunk in get_chunks(key_values,
                                                       MAX_WRITE_BATCH_SIZE):
                        if lo_just_split and hi_just_split and key_values_chunk:
                            if RecordIORecords.in_range(key_values_chunk[0],
                                                        lo=lo_just_split[0],
                                                        hi=lo_just_split[1]):
                                shard_name = RecordIOShard.key_name(
                                    self.name,
                                    lo=lo_just_split[0],
                                    hi=lo_just_split[1])
                            elif RecordIORecords.in_range(key_values_chunk[0],
                                                          lo=hi_just_split[0],
                                                          hi=hi_just_split[1]):
                                shard_name = RecordIOShard.key_name(
                                    self.name,
                                    lo=hi_just_split[0],
                                    hi=hi_just_split[1])
                        not_deleted = None
                        try:
                            not_deleted, lo_just_split, hi_just_split = self.commit_shard_(
                                shard_name, key_values_chunk)
                        except RecordIOShardDoesNotExistError:
                            logging.debug("Shard does not exist:\n" +
                                          shard_name)
                            lo_just_split = None
                            hi_just_split = None
                            for entry in key_values_chunk:
                                shard_does_not_exist.insert(entry)
                        if not_deleted:
                            for to_delete_shard_name, to_delete_key_values in (
                                    RecordIOShard.get_shards_for_key_values(
                                        self.name, not_deleted)):
                                self.db_search += 1
                                try:
                                    self.commit_shard_(to_delete_shard_name,
                                                       to_delete_key_values)
                                except RecordIOShardDoesNotExistError:
                                    logging.debug("Shard does not exist:\n" +
                                                  shard_name)
                                    for entry in to_delete_key_values:
                                        shard_does_not_exist.insert(entry)
            self.updates = shard_does_not_exist
            if len(self.updates):
                if attempt == retries:
                    raise RecordIOWriterNotCompletedError(len(self.updates))
                else:
                    logging.debug("Commit attempt %d failed" % attempt)
                    time.sleep(retry_timeout)
            else:
                return