Esempio n. 1
0
 def testGetChunks(self):
   self.assertEqual([[("a", "a")]], list(get_chunks([("a", "a")], 2**16)))
   self.assertEqual([[("a", "a")]], list(get_chunks([("a", "a")], 1)))
   a_100 = "a" * 100
   self.assertEqual([[("a", a_100)],
                     [("a", a_100)],],
                    list(get_chunks([("a", a_100), ("a", a_100)], 150)))
   self.assertEqual([[("a", a_100), ("a", a_100)],
                     [("a", a_100), ("a", a_100)]],
                    list(get_chunks([("a", a_100), ("a", a_100),
                                    ("a", a_100), ("a", a_100)], 300)))
Esempio n. 2
0
 def testGetChunks(self):
     self.assertEqual([[("a", "a")]], list(get_chunks([("a", "a")], 2**16)))
     self.assertEqual([[("a", "a")]], list(get_chunks([("a", "a")], 1)))
     a_100 = "a" * 100
     self.assertEqual([
         [("a", a_100)],
         [("a", a_100)],
     ], list(get_chunks([("a", a_100), ("a", a_100)], 150)))
     self.assertEqual([[("a", a_100),
                        ("a", a_100)], [("a", a_100), ("a", a_100)]],
                      list(
                          get_chunks([("a", a_100), ("a", a_100),
                                      ("a", a_100), ("a", a_100)], 300)))
Esempio n. 3
0
    def commit_to_queue_(self):
        """Adds all pending changes to the task queues for async commits

    :return: Yields all shard names that need to be updated.
    """
        pull = taskqueue.Queue('recordio-queue')
        rpcs = []
        key_values_not_added = RecordIORecords()
        for shard_name, key_values in RecordIOShard.get_shards_for_key_values(
                self.name, self.updates):
            self.db_search += 1
            if shard_name == None:
                for entry in key_values:
                    key_values_not_added.insert(entry)
            else:
                for key_values_chunk in get_chunks(key_values,
                                                   MAX_TASKQUEUE_BATCH_SIZE):
                    payload = marshal.dumps(key_values_chunk, MARSHAL_VERSION)
                    rpc = pull.add_async(
                        taskqueue.Task(payload=payload,
                                       method='PULL',
                                       tag=shard_name))
                    rpcs.append((rpc, key_values_chunk, shard_name))

        for rpc, key_values, shard_name in rpcs:
            try:
                rpc.get_result()
                yield shard_name
            except:
                for entry in key_values:
                    key_values_not_added.insert(entry)
        self.updates = key_values_not_added
        if len(self.updates):
            raise RecordIOWriterNotCompletedError(len(self.updates))
Esempio n. 4
0
  def commit_to_queue_(self):
    """Adds all pending changes to the task queues for async commits

    :return: Yields all shard names that need to be updated.
    """
    pull = taskqueue.Queue('recordio-queue')
    rpcs = []
    key_values_not_added = RecordIORecords()
    for shard_name, key_values in RecordIOShard.get_shards_for_key_values(
          self.name, self.updates):
      self.db_search += 1
      if shard_name == None:
        for entry in key_values:
          key_values_not_added.insert(entry)
      else:
        for key_values_chunk in get_chunks(key_values, MAX_TASKQUEUE_BATCH_SIZE):
          payload = marshal.dumps(key_values_chunk, MARSHAL_VERSION)
          rpc = pull.add_async(taskqueue.Task(payload=payload, method='PULL',
                                              tag=shard_name))
          rpcs.append((rpc, key_values_chunk, shard_name))
    
    for rpc, key_values, shard_name in rpcs:
      try:
        rpc.get_result()
        yield shard_name
      except:
        for entry in key_values:
          key_values_not_added.insert(entry)
    self.updates = key_values_not_added
    if len(self.updates):
      raise RecordIOWriterNotCompletedError(len(self.updates))
  def get_zipped_chunks_(self, force_repackiging=False):
    """Returns all zipped chunks for this RecordIORecordsZipped.


    :param force_repackiging: If the zip chunks should be unzipped and rezipped
                              to be more even.
    :return: list of zipped chunks: (lo_entry, hi_entry, zipped_entries).
             lo_entry and hi_entry ARE INCLUSIVE!
    """
    if self.records_ or force_repackiging:
      for records in recordio_chunks.get_chunks(self, ZIP_CHUNKS):
        yield (records[0][:-1], records[-1][:-1],
               zlib.compress(marshal.dumps(records, MARSHAL_VERSION),
                             COMPRESSION_LEVEL))
    else:
      for x in self.zipped_chunks_:
        yield x
Esempio n. 6
0
    def get_zipped_chunks_(self, force_repackiging=False):
        """Returns all zipped chunks for this RecordIORecordsZipped.


    :param force_repackiging: If the zip chunks should be unzipped and rezipped
                              to be more even.
    :return: list of zipped chunks: (lo_entry, hi_entry, zipped_entries).
             lo_entry and hi_entry ARE INCLUSIVE!
    """
        if self.records_ or force_repackiging:
            for records in recordio_chunks.get_chunks(self, ZIP_CHUNKS):
                yield (records[0][:-1], records[-1][:-1],
                       zlib.compress(marshal.dumps(records, MARSHAL_VERSION),
                                     COMPRESSION_LEVEL))
        else:
            for x in self.zipped_chunks_:
                yield x
Esempio n. 7
0
    def commit_sync(self, retries=32, retry_timeout=1):
        """Applies all changes synchronously to the RecordIO.

    :param retries: How many times a commit_sync should be retried in case of
                    datastore collisions.
    :param retry_timeout: The amount of second to wait before the next retry.
    """
        if not len(self.updates):
            return
        for attempt in range(retries + 1):
            shard_does_not_exist = RecordIORecords()
            for shard_name, key_values in RecordIOShard.get_shards_for_key_values(
                    self.name, self.updates):
                self.db_search += 1
                if shard_name == None and key_values:
                    logging.debug(
                        "RecordIO %s: No shard found for:\n%s -> %s" %
                        (self.name,
                         SPLIT_CHAR.join(RecordIOShard.entry_key(
                             key_values[0])), key_values[0][:-1]))
                    for entry in key_values:
                        shard_does_not_exist.insert(entry)
                else:
                    lo_just_split = None
                    hi_just_split = None
                    for key_values_chunk in get_chunks(key_values,
                                                       MAX_WRITE_BATCH_SIZE):
                        if lo_just_split and hi_just_split and key_values_chunk:
                            if RecordIORecords.in_range(key_values_chunk[0],
                                                        lo=lo_just_split[0],
                                                        hi=lo_just_split[1]):
                                shard_name = RecordIOShard.key_name(
                                    self.name,
                                    lo=lo_just_split[0],
                                    hi=lo_just_split[1])
                            elif RecordIORecords.in_range(key_values_chunk[0],
                                                          lo=hi_just_split[0],
                                                          hi=hi_just_split[1]):
                                shard_name = RecordIOShard.key_name(
                                    self.name,
                                    lo=hi_just_split[0],
                                    hi=hi_just_split[1])
                        not_deleted = None
                        try:
                            not_deleted, lo_just_split, hi_just_split = self.commit_shard_(
                                shard_name, key_values_chunk)
                        except RecordIOShardDoesNotExistError:
                            logging.debug("Shard does not exist:\n" +
                                          shard_name)
                            lo_just_split = None
                            hi_just_split = None
                            for entry in key_values_chunk:
                                shard_does_not_exist.insert(entry)
                        if not_deleted:
                            for to_delete_shard_name, to_delete_key_values in (
                                    RecordIOShard.get_shards_for_key_values(
                                        self.name, not_deleted)):
                                self.db_search += 1
                                try:
                                    self.commit_shard_(to_delete_shard_name,
                                                       to_delete_key_values)
                                except RecordIOShardDoesNotExistError:
                                    logging.debug("Shard does not exist:\n" +
                                                  shard_name)
                                    for entry in to_delete_key_values:
                                        shard_does_not_exist.insert(entry)
            self.updates = shard_does_not_exist
            if len(self.updates):
                if attempt == retries:
                    raise RecordIOWriterNotCompletedError(len(self.updates))
                else:
                    logging.debug("Commit attempt %d failed" % attempt)
                    time.sleep(retry_timeout)
            else:
                return
Esempio n. 8
0
  def commit_sync(self, retries=32, retry_timeout=1):
    """Applies all changes synchronously to the RecordIO.

    :param retries: How many times a commit_sync should be retried in case of
                    datastore collisions.
    :param retry_timeout: The amount of second to wait before the next retry.
    """
    if not len(self.updates):
      return
    for attempt in range(retries + 1):
      shard_does_not_exist = RecordIORecords()
      for shard_name, key_values in RecordIOShard.get_shards_for_key_values(
          self.name, self.updates):
        self.db_search += 1
        if shard_name == None and key_values:
          logging.debug("RecordIO %s: No shard found for:\n%s -> %s" %
              (self.name, 
               SPLIT_CHAR.join(RecordIOShard.entry_key(key_values[0])),
               key_values[0][:-1]))
          for entry in key_values:
            shard_does_not_exist.insert(entry)
        else:
          lo_just_split = None
          hi_just_split = None
          for key_values_chunk in get_chunks(key_values, MAX_WRITE_BATCH_SIZE):
            if lo_just_split and hi_just_split and key_values_chunk:
              if RecordIORecords.in_range(key_values_chunk[0],
                                          lo=lo_just_split[0],
                                          hi=lo_just_split[1]):
                shard_name = RecordIOShard.key_name(self.name,
                                                   lo=lo_just_split[0],
                                                   hi=lo_just_split[1])
              elif RecordIORecords.in_range(key_values_chunk[0],
                                            lo=hi_just_split[0],
                                            hi=hi_just_split[1]):
                shard_name = RecordIOShard.key_name(self.name,
                                                    lo=hi_just_split[0],
                                                    hi=hi_just_split[1])
            not_deleted = None
            try:
              not_deleted, lo_just_split, hi_just_split = self.commit_shard_(
                  shard_name, key_values_chunk)
            except RecordIOShardDoesNotExistError:
              logging.debug("Shard does not exist:\n" + shard_name)
              lo_just_split = None
              hi_just_split = None
              for entry in key_values_chunk:
                shard_does_not_exist.insert(entry)
            if not_deleted:
              for to_delete_shard_name, to_delete_key_values in (
                   RecordIOShard.get_shards_for_key_values(
                       self.name, not_deleted)):
                self.db_search += 1
                try:
                  self.commit_shard_(to_delete_shard_name, to_delete_key_values)
                except RecordIOShardDoesNotExistError:
                  logging.debug("Shard does not exist:\n" + shard_name)
                  for entry in to_delete_key_values:
                    shard_does_not_exist.insert(entry)
      self.updates = shard_does_not_exist
      if len(self.updates):
        if attempt == retries:
          raise RecordIOWriterNotCompletedError(len(self.updates))
        else:
          logging.debug("Commit attempt %d failed" % attempt)
          time.sleep(retry_timeout)
      else:
        return