class RepositoryContentUnit(AutoRetryDocument): """ Represents the link between a repository and the units associated with it. This inherits from mongoengine.Document and defines the schema for the documents in repo_content_units collection. :ivar repo_id: string representation of the repository id :type repo_id: mongoengine.StringField :ivar unit_id: string representation of content unit id :type unit_id: mongoengine.StringField :ivar unit_type_id: string representation of content unit type :type unit_type_id: mongoengine.StringField :ivar created: ISO8601 representation of the time the association was created :type created: pulp.server.db.fields.ISO8601StringField :ivar updated: ISO8601 representation of last time a copy, sync, or upload ensured that the association existed :type updated: pulp.server.db.fields.ISO8601StringField :ivar _ns: The namespace field (Deprecated), reading :type _ns: mongoengine.StringField """ repo_id = StringField(required=True) unit_id = StringField(required=True) unit_type_id = StringField(required=True) created = ISO8601StringField( required=True, default=lambda: dateutils.format_iso8601_utc_timestamp( dateutils.now_utc_timestamp())) updated = ISO8601StringField( required=True, default=lambda: dateutils.format_iso8601_utc_timestamp( dateutils.now_utc_timestamp())) # For backward compatibility _ns = StringField(default='repo_content_units') meta = { 'collection': 'repo_content_units', 'allow_inheritance': False, 'indexes': [ { 'fields': ['repo_id', 'unit_type_id', 'unit_id'], 'unique': True }, { # Used for reverse lookup of units to repositories 'fields': ['unit_id'] } ] }
def on_success(self, retval, task_id, args, kwargs): """ This overrides the success handler run by the worker when the task executes successfully. It updates state, finish_time and traceback of the relevant task status for asynchronous tasks. Skip updating status for synchronous tasks. :param retval: The return value of the task. :param task_id: Unique id of the executed task. :param args: Original arguments for the executed task. :param kwargs: Original keyword arguments for the executed task. """ logger.debug("Task successful : [%s]" % task_id) if not self.request.called_directly: delta = {'state': dispatch_constants.CALL_FINISHED_STATE, 'finish_time': dateutils.now_utc_timestamp(), 'result': retval} if isinstance(retval, TaskResult): delta['result'] = retval.return_value if retval.error: delta['error'] = retval.error.to_dict() if retval.spawned_tasks: task_list = [] for spawned_task in retval.spawned_tasks: if isinstance(spawned_task, AsyncResult): task_list.append(spawned_task.task_id) elif isinstance(spawned_task, dict): task_list.append(spawned_task['task_id']) delta['spawned_tasks'] = task_list if isinstance(retval, AsyncResult): delta['spawned_tasks'] = [retval.task_id, ] delta['result'] = None TaskStatusManager.update_task_status(task_id=task_id, delta=delta)
def __call__(self, *args, **kwargs): """ This overrides CeleryTask's __call__() method. We use this method for task state tracking of Pulp tasks. """ # Add task_id to the task context, so that agent and plugins have access to the task id. # There are a few other attributes in the context as defined by old dispatch system. # These are unused right now. These should be removed when we cleanup the dispatch folder # after the migration to celery is complete. task_context = dispatch_factory.context() task_context.call_request_id = self.request.id # Check task status and skip running the task if task state is 'canceled'. task_status = TaskStatusManager.find_by_task_id(task_id=self.request.id) if task_status and task_status['state'] == dispatch_constants.CALL_CANCELED_STATE: logger.debug("Task cancel received for task-id : [%s]" % self.request.id) return # Update start_time and set the task state to 'running' for asynchronous tasks. # Skip updating status for eagerly executed tasks, since we don't want to track # synchronous tasks in our database. if not self.request.called_directly: # Using 'upsert' to avoid a possible race condition described in the apply_async method # above. TaskStatus.get_collection().update( {'task_id': self.request.id}, {'$set': {'state': dispatch_constants.CALL_RUNNING_STATE, 'start_time': dateutils.now_utc_timestamp()}}, upsert=True) # Run the actual task logger.debug("Running task : [%s]" % self.request.id) return super(Task, self).__call__(*args, **kwargs)
def test_update_task_status(self): """ Tests the successful operation of update_task_status(). """ task_id = self.get_random_uuid() queue = 'special_queue' tags = ['test-tag1', 'test-tag2'] state = 'waiting' TaskStatusManager.create_task_status(task_id, queue, tags, state) delta = {'start_time': dateutils.now_utc_timestamp(), 'state': 'running', 'disregard': 'ignored', 'progress_report': {'report-id': 'my-progress'}} updated = TaskStatusManager.update_task_status(task_id, delta) task_status = TaskStatusManager.find_by_task_id(task_id) self.assertEqual(task_status['start_time'], delta['start_time']) self.assertEqual(task_status['state'], delta['state']) self.assertEqual(task_status['progress_report'], delta['progress_report']) self.assertEqual(task_status['queue'], queue) self.assertEqual(updated['start_time'], delta['start_time']) self.assertEqual(updated['state'], delta['state']) self.assertEqual(updated['progress_report'], delta['progress_report']) self.assertTrue('disregard' not in updated) self.assertTrue('disregard' not in task_status)
def set_task_started(task_id): """ Update a task's state to reflect that it has started running. :param task_id: The identity of the task to be updated. :type task_id: basestring """ delta = { 'state': dispatch_constants.CALL_RUNNING_STATE, 'start_time': dateutils.now_utc_timestamp(), } TaskStatusManager.update_task_status(task_id=task_id, delta=delta)
def pre_save_signal(cls, sender, document, **kwargs): """ The signal that is triggered before a unit is saved, this is used to support the legacy behavior of generating the unit id and setting the _last_updated timestamp :param sender: sender class :type sender: object :param document: Document that sent the signal :type document: ContentUnit """ document._last_updated = dateutils.now_utc_timestamp()
def __init__(self, repo_id, unit_id, unit_type_id): super(RepoContentUnit, self).__init__() # Mapping Identity Information self.repo_id = repo_id self.unit_id = unit_id self.unit_type_id = unit_type_id # store time in UTC utc_timestamp = dateutils.now_utc_timestamp() self.created = dateutils.format_iso8601_utc_timestamp(utc_timestamp) self.updated = self.created
def update_content_unit(self, content_type, unit_id, unit_metadata_delta): """ Update a content unit's stored metadata. @param content_type: unique id of content collection @type content_type: str @param unit_id: unique id of content unit @type unit_id: str @param unit_metadata_delta: metadata fields that have changed @type unit_metadata_delta: dict """ unit_metadata_delta['_last_updated'] = dateutils.now_utc_timestamp() collection = content_types_db.type_units_collection(content_type) collection.update({'_id': unit_id}, {'$set': unit_metadata_delta}, safe=True)
def update_content_unit(self, content_type, unit_id, unit_metadata_delta): """ Update a content unit's stored metadata. @param content_type: unique id of content collection @type content_type: str @param unit_id: unique id of content unit @type unit_id: str @param unit_metadata_delta: metadata fields that have changed @type unit_metadata_delta: dict """ unit_metadata_delta['_last_updated'] = dateutils.now_utc_timestamp() collection = content_types_db.type_units_collection(content_type) collection.update({'_id': unit_id}, {'$set': unit_metadata_delta})
def set_task_failed(task_id, traceback): """ Update a task's state to reflect that it succeeded. :param task_id: The identity of the task to be updated. :type task_id: basestring :ivar traceback: A string representation of the traceback resulting from the task execution. :type traceback: basestring """ delta = { 'state': dispatch_constants.CALL_ERROR_STATE, 'finish_time': dateutils.now_utc_timestamp(), 'traceback': traceback } TaskStatusManager.update_task_status(task_id=task_id, delta=delta)
def set_task_succeeded(task_id, result=None): """ Update a task's state to reflect that it succeeded. :param task_id: The identity of the task to be updated. :type task_id: basestring :param result: The optional value returned by the task execution. :type result: anything """ delta = { 'state': dispatch_constants.CALL_FINISHED_STATE, 'finish_time': dateutils.now_utc_timestamp(), 'result': result } TaskStatusManager.update_task_status(task_id=task_id, delta=delta)
def associate_single_unit(repository, unit): """ Associate a single unit to a repository. :param repository: The repository to update. :type repository: pulp.server.db.model.Repository :param unit: The unit to associate to the repository. :type unit: pulp.server.db.model.ContentUnit """ current_timestamp = dateutils.now_utc_timestamp() formatted_datetime = dateutils.format_iso8601_utc_timestamp(current_timestamp) qs = model.RepositoryContentUnit.objects( repo_id=repository.repo_id, unit_id=unit.id, unit_type_id=unit.unit_type_id ) qs.update_one(set_on_insert__created=formatted_datetime, set__updated=formatted_datetime, upsert=True)
def associate_single_unit(repository, unit): """ Associate a single unit to a repository. :param repository: The repository to update. :type repository: pulp.server.db.model.Repository :param unit: The unit to associate to the repository. :type unit: pulp.server.db.model.ContentUnit """ current_timestamp = dateutils.now_utc_timestamp() formatted_datetime = dateutils.format_iso8601_utc_timestamp( current_timestamp) qs = model.RepositoryContentUnit.objects(repo_id=repository.repo_id, unit_id=unit.id, unit_type_id=unit.unit_type_id) qs.update_one(set_on_insert__created=formatted_datetime, set__updated=formatted_datetime, upsert=True)
def pre_save_signal(cls, sender, document, **kwargs): """ The signal that is triggered before a unit is saved, this is used to support the legacy behavior of generating the unit id and setting the last_updated timestamp :param sender: sender class :type sender: object :param document: Document that sent the signal :type document: ContentUnit """ if not document.id: document.id = str(uuid.uuid4()) document.last_updated = dateutils.now_utc_timestamp() # If content was set on this unit, copy the content into place if document._source_location: server_storage_dir = config.config.get('server', 'storage_dir') platform_storage_location = os.path.join(server_storage_dir, 'units', document.unit_type_id, str(document.id)[0], str(document.id)[1:3], str(document.id)) # Make if source is a directory, recursively copy it, otherwise copy the file if os.path.isdir(document._source_location): shutil.copytree(document._source_location, platform_storage_location) else: target_file_name = os.path.basename(document._source_location) # Make sure the base directory exists try: os.makedirs(platform_storage_location) except OSError as e: if e.errno != errno.EEXIST: raise # Copy the file document_full_storage_location = os.path.join( platform_storage_location, target_file_name) shutil.copy(document._source_location, document_full_storage_location) platform_storage_location = document_full_storage_location document.storage_path = platform_storage_location
def pre_save_signal(cls, sender, document, **kwargs): """ The signal that is triggered before a unit is saved, this is used to support the legacy behavior of generating the unit id and setting the last_updated timestamp :param sender: sender class :type sender: object :param document: Document that sent the signal :type document: ContentUnit """ if not document.id: document.id = str(uuid.uuid4()) document.last_updated = dateutils.now_utc_timestamp() # If content was set on this unit, copy the content into place if document._source_location: server_storage_dir = config.config.get('server', 'storage_dir') platform_storage_location = os.path.join(server_storage_dir, 'units', document.unit_type_id, str(document.id)[0], str(document.id)[1:3], str(document.id)) # Make if source is a directory, recursively copy it, otherwise copy the file if os.path.isdir(document._source_location): shutil.copytree(document._source_location, platform_storage_location) else: target_file_name = os.path.basename(document._source_location) # Make sure the base directory exists try: os.makedirs(platform_storage_location) except OSError as e: if e.errno != errno.EEXIST: raise # Copy the file document_full_storage_location = os.path.join(platform_storage_location, target_file_name) shutil.copy(document._source_location, document_full_storage_location) platform_storage_location = document_full_storage_location document.storage_path = platform_storage_location
def on_failure(self, exc, task_id, args, kwargs, einfo): """ This overrides the error handler run by the worker when the task fails. It updates state, finish_time and traceback of the relevant task status for asynchronous tasks. Skip updating status for synchronous tasks. :param exc: The exception raised by the task. :param task_id: Unique id of the failed task. :param args: Original arguments for the executed task. :param kwargs: Original keyword arguments for the executed task. :param einfo: celery's ExceptionInfo instance, containing serialized traceback. """ logger.debug("Task failed : [%s]" % task_id) if not self.request.called_directly: delta = {'state': dispatch_constants.CALL_ERROR_STATE, 'finish_time': dateutils.now_utc_timestamp(), 'traceback': einfo.traceback} if not isinstance(exc, PulpException): exc = PulpException(str(exc)) delta['error'] = exc.to_dict() TaskStatusManager.update_task_status(task_id=task_id, delta=delta)
def add_content_unit(self, content_type, unit_id, unit_metadata): """ Add a content unit and its metadata to the corresponding pulp db collection. @param content_type: unique id of content collection @type content_type: str @param unit_id: unique id of content unit, None means to generate id @type unit_id: str or None @param unit_metadata: content unit metadata @type unit_metadata: dict @return: unit id, useful if it was generated @rtype: str """ collection = content_types_db.type_units_collection(content_type) if unit_id is None: unit_id = str(uuid.uuid4()) unit_doc = { '_id': unit_id, '_content_type_id': content_type, '_last_updated': dateutils.now_utc_timestamp() } unit_doc.update(unit_metadata) collection.insert(unit_doc, safe=True) return unit_id
def add_content_unit(self, content_type, unit_id, unit_metadata): """ Add a content unit and its metadata to the corresponding pulp db collection. @param content_type: unique id of content collection @type content_type: str @param unit_id: unique id of content unit, None means to generate id @type unit_id: str or None @param unit_metadata: content unit metadata @type unit_metadata: dict @return: unit id, useful if it was generated @rtype: str """ collection = content_types_db.type_units_collection(content_type) if unit_id is None: unit_id = str(uuid.uuid4()) unit_doc = { '_id': unit_id, '_content_type_id': content_type, '_last_updated': dateutils.now_utc_timestamp() } unit_doc.update(unit_metadata) collection.insert(unit_doc) return unit_id
def __init__(self, call_request, call_report): super(ArchivedCall, self).__init__() self.timestamp = dateutils.now_utc_timestamp() self.call_request_string = str(call_request) self.serialized_call_report = call_report.serialize()
def setUp(self): super(RemoveRepoDuplicateNevra, self).setUp() # repo_a is based on the test repo defined in TestPurgeBase self.repo_a = platform_model.Repository(repo_id=self.repo.id) self.repo_a.save() # repo_b is a control repo, that should be untouched by purge functions self.repo_b = platform_model.Repository(repo_id='b') self.repo_b.save() # create units unit_key_base = { 'epoch': '0', 'version': '0', 'release': '23', 'arch': 'noarch', 'checksumtype': 'sha256', '_last_updated': 0, } units = [] self.duplicate_unit_ids = set() for unit_type in self.UNIT_TYPES: unit_key_dupe = unit_key_base.copy() unit_key_uniq = unit_key_base.copy() # account for slightly different unit key field on drpm if unit_type is models.DRPM: unit_key_dupe['filename'] = 'dupe' unit_key_uniq['filename'] = 'uniq' else: unit_key_dupe['name'] = 'dupe' unit_key_uniq['name'] = 'uniq' # create units with duplicate nevra for this type # after purging, only one of the three should remain for i in range(3): unit_dupe = unit_type(**unit_key_dupe) # use the unit's python id to guarantee a unique "checksum" unit_dupe.checksum = str(id(unit_dupe)) unit_dupe.save() units.append(unit_dupe) if i != 0: # after the first unit, stash the "extra" duplicates to make it easier # to modify the unit association updated timestamps for predictable sorting self.duplicate_unit_ids.add(unit_dupe.id) # use the incrementing unit count to make the uniq unit's nevra unique unit_key_uniq['version'] = str(len(units)) # create a unit with unique nevra unit_uniq = unit_type(**unit_key_uniq) unit_uniq.checksum = str(hash(unit_uniq)) unit_uniq.save() units.append(unit_uniq) # associate each unit with each repo for repo in self.repo_a, self.repo_b: for i, unit in enumerate(units): repo_controller.associate_single_unit(repo, unit) # Sanity check: 3 dupe units and 1 uniq unit for n unit types, for each repo expected_rcu_count = 4 * len(self.UNIT_TYPES) for repo_id in self.repo_a.repo_id, self.repo_b.repo_id: self.assertEqual( platform_model.RepositoryContentUnit.objects.filter( repo_id=repo_id).count(), expected_rcu_count) # To ensure the purge mechanism behavior is predictable for testing, # go through the duplicate unit IDs and set their updated time to be in the past, # since unit associations were all just created at the same time. # The older associations are the ones that should be purged. earlier_timestamp = dateutils.now_utc_timestamp() - 3600 formatted_timestamp = dateutils.format_iso8601_utc_timestamp( earlier_timestamp) platform_model.RepositoryContentUnit.objects.filter(unit_id__in=self.duplicate_unit_ids)\ .update(set__updated=formatted_timestamp)
def setUp(self): super(RemoveRepoDuplicateNevra, self).setUp() # repo_a is based on the test repo defined in TestPurgeBase self.repo_a = platform_model.Repository(repo_id=self.repo.id) self.repo_a.save() # repo_b is a control repo, that should be untouched by purge functions self.repo_b = platform_model.Repository(repo_id='b') self.repo_b.save() # create units unit_key_base = { 'epoch': '0', 'version': '0', 'release': '23', 'arch': 'noarch', 'checksumtype': 'sha256', '_last_updated': 0, } units = [] self.duplicate_unit_ids = set() for unit_type in self.UNIT_TYPES: unit_key_dupe = unit_key_base.copy() unit_key_uniq = unit_key_base.copy() # account for slightly different unit key field on drpm if unit_type is models.DRPM: unit_key_dupe['filename'] = 'dupe' unit_key_uniq['filename'] = 'uniq' else: unit_key_dupe['name'] = 'dupe' unit_key_uniq['name'] = 'uniq' # create units with duplicate nevra for this type # after purging, only one of the three should remain for i in range(3): unit_dupe = unit_type(**unit_key_dupe) # use the unit's python id to guarantee a unique "checksum" unit_dupe.checksum = str(id(unit_dupe)) unit_dupe.save() units.append(unit_dupe) if i != 0: # after the first unit, stash the "extra" duplicates to make it easier # to modify the unit association updated timestamps for predictable sorting self.duplicate_unit_ids.add(unit_dupe.id) # use the incrementing unit count to make the uniq unit's nevra unique unit_key_uniq['version'] = str(len(units)) # create a unit with unique nevra unit_uniq = unit_type(**unit_key_uniq) unit_uniq.checksum = str(hash(unit_uniq)) unit_uniq.save() units.append(unit_uniq) # associate each unit with each repo for repo in self.repo_a, self.repo_b: for i, unit in enumerate(units): repo_controller.associate_single_unit(repo, unit) # Sanity check: 3 dupe units and 1 uniq unit for n unit types, for each repo expected_rcu_count = 4 * len(self.UNIT_TYPES) for repo_id in self.repo_a.repo_id, self.repo_b.repo_id: self.assertEqual(platform_model.RepositoryContentUnit.objects.filter( repo_id=repo_id).count(), expected_rcu_count) # To ensure the purge mechanism behavior is predictable for testing, # go through the duplicate unit IDs and set their updated time to be in the past, # since unit associations were all just created at the same time. # The older associations are the ones that should be purged. earlier_timestamp = dateutils.now_utc_timestamp() - 3600 formatted_timestamp = dateutils.format_iso8601_utc_timestamp(earlier_timestamp) platform_model.RepositoryContentUnit.objects.filter(unit_id__in=self.duplicate_unit_ids)\ .update(set__updated=formatted_timestamp)