def process_entity(self, entity): """ Verifies entity, fetches from journal if necessary and calls dump_entity. Args: entity: The entity to be backed up. Returns: True on success, False otherwise. """ key = entity.keys()[0] kind = entity_utils.get_kind_from_entity_key(key) # Skip protected and private entities. if re.match(self.PROTECTED_KINDS, kind) or\ re.match(self.PRIVATE_KINDS, kind): # Do not skip blob entities. if not re.match(self.BLOB_CHUNK_REGEX, kind) and\ not re.match(self.BLOB_INFO_REGEX, kind): logging.debug("Skipping key: {0}".format(key)) return False one_entity = entity[key][dbconstants.APP_ENTITY_SCHEMA[0]] if one_entity == datastore_server.TOMBSTONE: return False app_prefix = entity_utils.get_prefix_from_entity_key(key) root_key = entity_utils.get_root_key_from_entity_key(key) success = True while True: # Acquire lock. txn_id = self.zoo_keeper.get_transaction_id(app_prefix) try: if self.zoo_keeper.acquire_lock(app_prefix, txn_id, root_key): version = entity[key][dbconstants.APP_ENTITY_SCHEMA[1]] if not self.verify_entity(key, version): # Fetch from the journal. entity = entity_utils.fetch_journal_entry(self.db_access, key) if not entity: logging.error("Bad journal entry for key: {0} and result: {1}". format(key, entity)) success = False else: one_entity = entity[key][dbconstants.APP_ENTITY_SCHEMA[0]] if self.dump_entity(one_entity): logging.debug("Backed up key: {0}".format(key)) success = True else: success = False else: logging.warn("Entity with key: {0} not found".format(key)) success = False except zk.ZKTransactionException, zk_exception: logging.error("Zookeeper exception {0} while requesting entity lock". format(zk_exception)) success = False except zk.ZKInternalException, zk_exception: logging.error("Zookeeper exception {0} while requesting entity lock". format(zk_exception)) success = False
def run_backup(self): """ Runs the backup process. Loops on the entire dataset and dumps it into a file. """ logging.info("Backup started") start = time.time() first_key = '{0}\x00'.format(self.app_id) start_inclusive = True entities_remaining = [] while True: try: # Fetch batch. entities = entities_remaining + self.get_entity_batch( first_key, self.BATCH_SIZE, start_inclusive) logging.info("Processing {0} entities".format(self.BATCH_SIZE)) if not entities: break # Loop through entities retrieved and if not to be skipped, process. skip = False for entity in entities: first_key = entity.keys()[0] kind = entity_utils.get_kind_from_entity_key(first_key) logging.debug("Processing key: {0}".format(first_key)) index = 1 for skip_kind in self.skip_kinds: if re.match(skip_kind, kind): logging.warn( "Skipping entities of kind: {0}".format( skip_kind)) skip = True first_key = first_key[:first_key.find( skip_kind ) + len(skip_kind ) + 1] + dbconstants.TERMINATING_STRING self.skip_kinds = self.skip_kinds[index:] break index += 1 if skip: break self.process_entity(entity) if not skip: first_key = entities[-1].keys()[0] start_inclusive = False except dbconstants.AppScaleDBConnectionError, connection_error: logging.error( "Error getting a batch: {0}".format(connection_error)) time.sleep(self.DB_ERROR_PERIOD)
def run_backup(self): """ Runs the backup process. Loops on the entire dataset and dumps it into a file. """ logging.info("Backup started") start = time.time() first_key = '{0}\x00'.format(self.app_id) start_inclusive = True entities_remaining = [] while True: try: # Fetch batch. entities = entities_remaining + self.get_entity_batch(first_key, self.BATCH_SIZE, start_inclusive) logging.info("Processing {0} entities".format(self.BATCH_SIZE)) if not entities: break # Loop through entities retrieved and if not to be skipped, process. skip = False for entity in entities: first_key = entity.keys()[0] kind = entity_utils.get_kind_from_entity_key(first_key) logging.debug("Processing key: {0}".format(first_key)) index = 1 for skip_kind in self.skip_kinds: if re.match(skip_kind, kind): logging.warn("Skipping entities of kind: {0}".format(skip_kind)) skip = True first_key = first_key[:first_key.find(skip_kind)+ len(skip_kind)+1] + dbconstants.TERMINATING_STRING self.skip_kinds = self.skip_kinds[index:] break index += 1 if skip: break self.process_entity(entity) if not skip: first_key = entities[-1].keys()[0] start_inclusive = False except dbconstants.AppScaleDBConnectionError, connection_error: logging.error("Error getting a batch: {0}".format(connection_error)) time.sleep(self.DB_ERROR_PERIOD)
def test_get_kind_from_entity(self): self.assertEquals("some", entity_utils.get_kind_from_entity_key("hi\x00bye\x00some\x00other\x00stuff")) # Test empty namespace (very common). self.assertEquals("some", entity_utils.get_kind_from_entity_key("hi\x00\x00some\x00other\x00stuff"))