Beispiel #1
0
class RepositoryContentUnit(AutoRetryDocument):
    """
    Represents the link between a repository and the units associated with it.

    This inherits from mongoengine.Document and defines the schema for the documents
    in repo_content_units collection.


    :ivar repo_id: string representation of the repository id
    :type repo_id: mongoengine.StringField
    :ivar unit_id: string representation of content unit id
    :type unit_id: mongoengine.StringField
    :ivar unit_type_id: string representation of content unit type
    :type unit_type_id: mongoengine.StringField
    :ivar created: ISO8601 representation of the time the association was created
    :type created: pulp.server.db.fields.ISO8601StringField
    :ivar updated: ISO8601 representation of last time a copy, sync, or upload ensured that
                   the association existed
    :type updated: pulp.server.db.fields.ISO8601StringField
    :ivar _ns: The namespace field (Deprecated), reading
    :type _ns: mongoengine.StringField
    """

    repo_id = StringField(required=True)
    unit_id = StringField(required=True)
    unit_type_id = StringField(required=True)

    created = ISO8601StringField(
        required=True,
        default=lambda: dateutils.format_iso8601_utc_timestamp(
            dateutils.now_utc_timestamp()))
    updated = ISO8601StringField(
        required=True,
        default=lambda: dateutils.format_iso8601_utc_timestamp(
            dateutils.now_utc_timestamp()))

    # For backward compatibility
    _ns = StringField(default='repo_content_units')

    meta = {
        'collection':
        'repo_content_units',
        'allow_inheritance':
        False,
        'indexes': [
            {
                'fields': ['repo_id', 'unit_type_id', 'unit_id'],
                'unique': True
            },
            {
                # Used for reverse lookup of units to repositories
                'fields': ['unit_id']
            }
        ]
    }
Beispiel #2
0
    def on_success(self, retval, task_id, args, kwargs):
        """
        This overrides the success handler run by the worker when the task
        executes successfully. It updates state, finish_time and traceback
        of the relevant task status for asynchronous tasks. Skip updating status
        for synchronous tasks.

        :param retval:  The return value of the task.
        :param task_id: Unique id of the executed task.
        :param args:    Original arguments for the executed task.
        :param kwargs:  Original keyword arguments for the executed task.
        """
        logger.debug("Task successful : [%s]" % task_id)
        if not self.request.called_directly:
            delta = {'state': dispatch_constants.CALL_FINISHED_STATE,
                     'finish_time': dateutils.now_utc_timestamp(),
                     'result': retval}
            if isinstance(retval, TaskResult):
                delta['result'] = retval.return_value
                if retval.error:
                    delta['error'] = retval.error.to_dict()
                if retval.spawned_tasks:
                    task_list = []
                    for spawned_task in retval.spawned_tasks:
                        if isinstance(spawned_task, AsyncResult):
                            task_list.append(spawned_task.task_id)
                        elif isinstance(spawned_task, dict):
                            task_list.append(spawned_task['task_id'])
                    delta['spawned_tasks'] = task_list
            if isinstance(retval, AsyncResult):
                delta['spawned_tasks'] = [retval.task_id, ]
                delta['result'] = None

            TaskStatusManager.update_task_status(task_id=task_id, delta=delta)
Beispiel #3
0
 def __call__(self, *args, **kwargs):
     """
     This overrides CeleryTask's __call__() method. We use this method
     for task state tracking of Pulp tasks.
     """
     # Add task_id to the task context, so that agent and plugins have access to the task id.
     # There are a few other attributes in the context as defined by old dispatch system.
     # These are unused right now. These should be removed when we cleanup the dispatch folder
     # after the migration to celery is complete.
     task_context = dispatch_factory.context()
     task_context.call_request_id = self.request.id
     # Check task status and skip running the task if task state is 'canceled'.
     task_status = TaskStatusManager.find_by_task_id(task_id=self.request.id)
     if task_status and task_status['state'] == dispatch_constants.CALL_CANCELED_STATE:
         logger.debug("Task cancel received for task-id : [%s]" % self.request.id)
         return
     # Update start_time and set the task state to 'running' for asynchronous tasks.
     # Skip updating status for eagerly executed tasks, since we don't want to track
     # synchronous tasks in our database.
     if not self.request.called_directly:
         # Using 'upsert' to avoid a possible race condition described in the apply_async method
         # above.
         TaskStatus.get_collection().update(
             {'task_id': self.request.id},
             {'$set': {'state': dispatch_constants.CALL_RUNNING_STATE,
                       'start_time': dateutils.now_utc_timestamp()}},
             upsert=True)
     # Run the actual task
     logger.debug("Running task : [%s]" % self.request.id)
     return super(Task, self).__call__(*args, **kwargs)
    def test_update_task_status(self):
        """
        Tests the successful operation of update_task_status().
        """
        task_id = self.get_random_uuid()
        queue = 'special_queue'
        tags = ['test-tag1', 'test-tag2']
        state = 'waiting'
        TaskStatusManager.create_task_status(task_id, queue, tags, state)
        delta = {'start_time': dateutils.now_utc_timestamp(),
                 'state': 'running',
                 'disregard': 'ignored',
                 'progress_report': {'report-id': 'my-progress'}}

        updated = TaskStatusManager.update_task_status(task_id, delta)

        task_status =  TaskStatusManager.find_by_task_id(task_id)
        self.assertEqual(task_status['start_time'], delta['start_time'])
        self.assertEqual(task_status['state'], delta['state'])
        self.assertEqual(task_status['progress_report'], delta['progress_report'])
        self.assertEqual(task_status['queue'], queue)
        self.assertEqual(updated['start_time'], delta['start_time'])
        self.assertEqual(updated['state'], delta['state'])
        self.assertEqual(updated['progress_report'], delta['progress_report'])
        self.assertTrue('disregard' not in updated)
        self.assertTrue('disregard' not in task_status)
Beispiel #5
0
 def set_task_started(task_id):
     """
     Update a task's state to reflect that it has started running.
     :param task_id: The identity of the task to be updated.
     :type  task_id: basestring
     """
     delta = {
         'state': dispatch_constants.CALL_RUNNING_STATE,
         'start_time': dateutils.now_utc_timestamp(),
     }
     TaskStatusManager.update_task_status(task_id=task_id, delta=delta)
Beispiel #6
0
    def pre_save_signal(cls, sender, document, **kwargs):
        """
        The signal that is triggered before a unit is saved, this is used to
        support the legacy behavior of generating the unit id and setting
        the _last_updated timestamp

        :param sender: sender class
        :type sender: object
        :param document: Document that sent the signal
        :type document: ContentUnit
        """
        document._last_updated = dateutils.now_utc_timestamp()
Beispiel #7
0
    def __init__(self, repo_id, unit_id, unit_type_id):
        super(RepoContentUnit, self).__init__()

        # Mapping Identity Information
        self.repo_id = repo_id
        self.unit_id = unit_id
        self.unit_type_id = unit_type_id

        # store time in UTC
        utc_timestamp = dateutils.now_utc_timestamp()
        self.created = dateutils.format_iso8601_utc_timestamp(utc_timestamp)
        self.updated = self.created
Beispiel #8
0
    def __init__(self, repo_id, unit_id, unit_type_id):
        super(RepoContentUnit, self).__init__()

        # Mapping Identity Information
        self.repo_id = repo_id
        self.unit_id = unit_id
        self.unit_type_id = unit_type_id

        # store time in UTC
        utc_timestamp = dateutils.now_utc_timestamp()
        self.created = dateutils.format_iso8601_utc_timestamp(utc_timestamp)
        self.updated = self.created
Beispiel #9
0
    def pre_save_signal(cls, sender, document, **kwargs):
        """
        The signal that is triggered before a unit is saved, this is used to
        support the legacy behavior of generating the unit id and setting
        the _last_updated timestamp

        :param sender: sender class
        :type sender: object
        :param document: Document that sent the signal
        :type document: ContentUnit
        """
        document._last_updated = dateutils.now_utc_timestamp()
Beispiel #10
0
 def update_content_unit(self, content_type, unit_id, unit_metadata_delta):
     """
     Update a content unit's stored metadata.
     @param content_type: unique id of content collection
     @type content_type: str
     @param unit_id: unique id of content unit
     @type unit_id: str
     @param unit_metadata_delta: metadata fields that have changed
     @type unit_metadata_delta: dict
     """
     unit_metadata_delta['_last_updated'] = dateutils.now_utc_timestamp()
     collection = content_types_db.type_units_collection(content_type)
     collection.update({'_id': unit_id}, {'$set': unit_metadata_delta}, safe=True)
Beispiel #11
0
 def update_content_unit(self, content_type, unit_id, unit_metadata_delta):
     """
     Update a content unit's stored metadata.
     @param content_type: unique id of content collection
     @type content_type: str
     @param unit_id: unique id of content unit
     @type unit_id: str
     @param unit_metadata_delta: metadata fields that have changed
     @type unit_metadata_delta: dict
     """
     unit_metadata_delta['_last_updated'] = dateutils.now_utc_timestamp()
     collection = content_types_db.type_units_collection(content_type)
     collection.update({'_id': unit_id}, {'$set': unit_metadata_delta})
Beispiel #12
0
 def set_task_failed(task_id, traceback):
     """
     Update a task's state to reflect that it succeeded.
     :param task_id: The identity of the task to be updated.
     :type  task_id: basestring
     :ivar traceback: A string representation of the traceback resulting from the task execution.
     :type traceback: basestring
     """
     delta = {
         'state': dispatch_constants.CALL_ERROR_STATE,
         'finish_time': dateutils.now_utc_timestamp(),
         'traceback': traceback
     }
     TaskStatusManager.update_task_status(task_id=task_id, delta=delta)
Beispiel #13
0
 def set_task_succeeded(task_id, result=None):
     """
     Update a task's state to reflect that it succeeded.
     :param task_id: The identity of the task to be updated.
     :type  task_id: basestring
     :param result: The optional value returned by the task execution.
     :type result: anything
     """
     delta = {
         'state': dispatch_constants.CALL_FINISHED_STATE,
         'finish_time': dateutils.now_utc_timestamp(),
         'result': result
     }
     TaskStatusManager.update_task_status(task_id=task_id, delta=delta)
Beispiel #14
0
def associate_single_unit(repository, unit):
    """
    Associate a single unit to a repository.

    :param repository: The repository to update.
    :type repository: pulp.server.db.model.Repository
    :param unit: The unit to associate to the repository.
    :type unit: pulp.server.db.model.ContentUnit
    """
    current_timestamp = dateutils.now_utc_timestamp()
    formatted_datetime = dateutils.format_iso8601_utc_timestamp(current_timestamp)
    qs = model.RepositoryContentUnit.objects(
        repo_id=repository.repo_id, unit_id=unit.id, unit_type_id=unit.unit_type_id
    )
    qs.update_one(set_on_insert__created=formatted_datetime, set__updated=formatted_datetime, upsert=True)
Beispiel #15
0
def associate_single_unit(repository, unit):
    """
    Associate a single unit to a repository.

    :param repository: The repository to update.
    :type repository: pulp.server.db.model.Repository
    :param unit: The unit to associate to the repository.
    :type unit: pulp.server.db.model.ContentUnit
    """
    current_timestamp = dateutils.now_utc_timestamp()
    formatted_datetime = dateutils.format_iso8601_utc_timestamp(
        current_timestamp)
    qs = model.RepositoryContentUnit.objects(repo_id=repository.repo_id,
                                             unit_id=unit.id,
                                             unit_type_id=unit.unit_type_id)
    qs.update_one(set_on_insert__created=formatted_datetime,
                  set__updated=formatted_datetime,
                  upsert=True)
Beispiel #16
0
    def pre_save_signal(cls, sender, document, **kwargs):
        """
        The signal that is triggered before a unit is saved, this is used to
        support the legacy behavior of generating the unit id and setting
        the last_updated timestamp

        :param sender: sender class
        :type sender: object
        :param document: Document that sent the signal
        :type document: ContentUnit
        """
        if not document.id:
            document.id = str(uuid.uuid4())
        document.last_updated = dateutils.now_utc_timestamp()

        # If content was set on this unit, copy the content into place
        if document._source_location:
            server_storage_dir = config.config.get('server', 'storage_dir')
            platform_storage_location = os.path.join(server_storage_dir,
                                                     'units',
                                                     document.unit_type_id,
                                                     str(document.id)[0],
                                                     str(document.id)[1:3],
                                                     str(document.id))
            # Make if source is a directory, recursively copy it, otherwise copy the file
            if os.path.isdir(document._source_location):
                shutil.copytree(document._source_location,
                                platform_storage_location)
            else:
                target_file_name = os.path.basename(document._source_location)
                # Make sure the base directory exists
                try:
                    os.makedirs(platform_storage_location)
                except OSError as e:
                    if e.errno != errno.EEXIST:
                        raise
                # Copy the file
                document_full_storage_location = os.path.join(
                    platform_storage_location, target_file_name)
                shutil.copy(document._source_location,
                            document_full_storage_location)
                platform_storage_location = document_full_storage_location
            document.storage_path = platform_storage_location
Beispiel #17
0
    def pre_save_signal(cls, sender, document, **kwargs):
        """
        The signal that is triggered before a unit is saved, this is used to
        support the legacy behavior of generating the unit id and setting
        the last_updated timestamp

        :param sender: sender class
        :type sender: object
        :param document: Document that sent the signal
        :type document: ContentUnit
        """
        if not document.id:
            document.id = str(uuid.uuid4())
        document.last_updated = dateutils.now_utc_timestamp()

        # If content was set on this unit, copy the content into place
        if document._source_location:
            server_storage_dir = config.config.get('server', 'storage_dir')
            platform_storage_location = os.path.join(server_storage_dir, 'units',
                                                     document.unit_type_id,
                                                     str(document.id)[0],
                                                     str(document.id)[1:3],
                                                     str(document.id))
            # Make if source is a directory, recursively copy it, otherwise copy the file
            if os.path.isdir(document._source_location):
                shutil.copytree(document._source_location, platform_storage_location)
            else:
                target_file_name = os.path.basename(document._source_location)
                # Make sure the base directory exists
                try:
                    os.makedirs(platform_storage_location)
                except OSError as e:
                    if e.errno != errno.EEXIST:
                        raise
                # Copy the file
                document_full_storage_location = os.path.join(platform_storage_location,
                                                              target_file_name)
                shutil.copy(document._source_location, document_full_storage_location)
                platform_storage_location = document_full_storage_location
            document.storage_path = platform_storage_location
Beispiel #18
0
    def on_failure(self, exc, task_id, args, kwargs, einfo):
        """
        This overrides the error handler run by the worker when the task fails.
        It updates state, finish_time and traceback of the relevant task status
        for asynchronous tasks. Skip updating status for synchronous tasks.

        :param exc:     The exception raised by the task.
        :param task_id: Unique id of the failed task.
        :param args:    Original arguments for the executed task.
        :param kwargs:  Original keyword arguments for the executed task.
        :param einfo:   celery's ExceptionInfo instance, containing serialized traceback.
        """
        logger.debug("Task failed : [%s]" % task_id)
        if not self.request.called_directly:
            delta = {'state': dispatch_constants.CALL_ERROR_STATE,
                     'finish_time': dateutils.now_utc_timestamp(),
                     'traceback': einfo.traceback}
            if not isinstance(exc, PulpException):
                exc = PulpException(str(exc))
            delta['error'] = exc.to_dict()

            TaskStatusManager.update_task_status(task_id=task_id, delta=delta)
Beispiel #19
0
 def add_content_unit(self, content_type, unit_id, unit_metadata):
     """
     Add a content unit and its metadata to the corresponding pulp db
     collection.
     @param content_type: unique id of content collection
     @type content_type: str
     @param unit_id: unique id of content unit, None means to generate id
     @type unit_id: str or None
     @param unit_metadata: content unit metadata
     @type unit_metadata: dict
     @return: unit id, useful if it was generated
     @rtype: str
     """
     collection = content_types_db.type_units_collection(content_type)
     if unit_id is None:
         unit_id = str(uuid.uuid4())
     unit_doc = {
         '_id': unit_id,
         '_content_type_id': content_type,
         '_last_updated': dateutils.now_utc_timestamp()
     }
     unit_doc.update(unit_metadata)
     collection.insert(unit_doc, safe=True)
     return unit_id
Beispiel #20
0
 def add_content_unit(self, content_type, unit_id, unit_metadata):
     """
     Add a content unit and its metadata to the corresponding pulp db
     collection.
     @param content_type: unique id of content collection
     @type content_type: str
     @param unit_id: unique id of content unit, None means to generate id
     @type unit_id: str or None
     @param unit_metadata: content unit metadata
     @type unit_metadata: dict
     @return: unit id, useful if it was generated
     @rtype: str
     """
     collection = content_types_db.type_units_collection(content_type)
     if unit_id is None:
         unit_id = str(uuid.uuid4())
     unit_doc = {
         '_id': unit_id,
         '_content_type_id': content_type,
         '_last_updated': dateutils.now_utc_timestamp()
     }
     unit_doc.update(unit_metadata)
     collection.insert(unit_doc)
     return unit_id
Beispiel #21
0
 def __init__(self, call_request, call_report):
     super(ArchivedCall, self).__init__()
     self.timestamp = dateutils.now_utc_timestamp()
     self.call_request_string = str(call_request)
     self.serialized_call_report = call_report.serialize()
Beispiel #22
0
    def setUp(self):
        super(RemoveRepoDuplicateNevra, self).setUp()

        # repo_a is based on the test repo defined in TestPurgeBase
        self.repo_a = platform_model.Repository(repo_id=self.repo.id)
        self.repo_a.save()

        # repo_b is a control repo, that should be untouched by purge functions
        self.repo_b = platform_model.Repository(repo_id='b')
        self.repo_b.save()

        # create units
        unit_key_base = {
            'epoch': '0',
            'version': '0',
            'release': '23',
            'arch': 'noarch',
            'checksumtype': 'sha256',
            '_last_updated': 0,
        }

        units = []
        self.duplicate_unit_ids = set()
        for unit_type in self.UNIT_TYPES:
            unit_key_dupe = unit_key_base.copy()
            unit_key_uniq = unit_key_base.copy()

            # account for slightly different unit key field on drpm
            if unit_type is models.DRPM:
                unit_key_dupe['filename'] = 'dupe'
                unit_key_uniq['filename'] = 'uniq'
            else:
                unit_key_dupe['name'] = 'dupe'
                unit_key_uniq['name'] = 'uniq'

            # create units with duplicate nevra for this type
            # after purging, only one of the three should remain
            for i in range(3):
                unit_dupe = unit_type(**unit_key_dupe)
                # use the unit's python id to guarantee a unique "checksum"
                unit_dupe.checksum = str(id(unit_dupe))
                unit_dupe.save()
                units.append(unit_dupe)
                if i != 0:
                    # after the first unit, stash the "extra" duplicates to make it easier
                    # to modify the unit association updated timestamps for predictable sorting
                    self.duplicate_unit_ids.add(unit_dupe.id)

            # use the incrementing unit count to make the uniq unit's nevra unique
            unit_key_uniq['version'] = str(len(units))

            # create a unit with unique nevra
            unit_uniq = unit_type(**unit_key_uniq)
            unit_uniq.checksum = str(hash(unit_uniq))
            unit_uniq.save()
            units.append(unit_uniq)

        # associate each unit with each repo
        for repo in self.repo_a, self.repo_b:
            for i, unit in enumerate(units):
                repo_controller.associate_single_unit(repo, unit)

        # Sanity check: 3 dupe units and 1 uniq unit for n unit types, for each repo
        expected_rcu_count = 4 * len(self.UNIT_TYPES)
        for repo_id in self.repo_a.repo_id, self.repo_b.repo_id:
            self.assertEqual(
                platform_model.RepositoryContentUnit.objects.filter(
                    repo_id=repo_id).count(), expected_rcu_count)

        # To ensure the purge mechanism behavior is predictable for testing,
        # go through the duplicate unit IDs and set their updated time to be in the past,
        # since unit associations were all just created at the same time.
        # The older associations are the ones that should be purged.
        earlier_timestamp = dateutils.now_utc_timestamp() - 3600
        formatted_timestamp = dateutils.format_iso8601_utc_timestamp(
            earlier_timestamp)
        platform_model.RepositoryContentUnit.objects.filter(unit_id__in=self.duplicate_unit_ids)\
            .update(set__updated=formatted_timestamp)
Beispiel #23
0
    def setUp(self):
        super(RemoveRepoDuplicateNevra, self).setUp()

        # repo_a is based on the test repo defined in TestPurgeBase
        self.repo_a = platform_model.Repository(repo_id=self.repo.id)
        self.repo_a.save()

        # repo_b is a control repo, that should be untouched by purge functions
        self.repo_b = platform_model.Repository(repo_id='b')
        self.repo_b.save()

        # create units
        unit_key_base = {
            'epoch': '0',
            'version': '0',
            'release': '23',
            'arch': 'noarch',
            'checksumtype': 'sha256',
            '_last_updated': 0,
        }

        units = []
        self.duplicate_unit_ids = set()
        for unit_type in self.UNIT_TYPES:
            unit_key_dupe = unit_key_base.copy()
            unit_key_uniq = unit_key_base.copy()

            # account for slightly different unit key field on drpm
            if unit_type is models.DRPM:
                unit_key_dupe['filename'] = 'dupe'
                unit_key_uniq['filename'] = 'uniq'
            else:
                unit_key_dupe['name'] = 'dupe'
                unit_key_uniq['name'] = 'uniq'

            # create units with duplicate nevra for this type
            # after purging, only one of the three should remain
            for i in range(3):
                unit_dupe = unit_type(**unit_key_dupe)
                # use the unit's python id to guarantee a unique "checksum"
                unit_dupe.checksum = str(id(unit_dupe))
                unit_dupe.save()
                units.append(unit_dupe)
                if i != 0:
                    # after the first unit, stash the "extra" duplicates to make it easier
                    # to modify the unit association updated timestamps for predictable sorting
                    self.duplicate_unit_ids.add(unit_dupe.id)

            # use the incrementing unit count to make the uniq unit's nevra unique
            unit_key_uniq['version'] = str(len(units))

            # create a unit with unique nevra
            unit_uniq = unit_type(**unit_key_uniq)
            unit_uniq.checksum = str(hash(unit_uniq))
            unit_uniq.save()
            units.append(unit_uniq)

        # associate each unit with each repo
        for repo in self.repo_a, self.repo_b:
            for i, unit in enumerate(units):
                repo_controller.associate_single_unit(repo, unit)

        # Sanity check: 3 dupe units and 1 uniq unit for n unit types, for each repo
        expected_rcu_count = 4 * len(self.UNIT_TYPES)
        for repo_id in self.repo_a.repo_id, self.repo_b.repo_id:
            self.assertEqual(platform_model.RepositoryContentUnit.objects.filter(
                repo_id=repo_id).count(), expected_rcu_count)

        # To ensure the purge mechanism behavior is predictable for testing,
        # go through the duplicate unit IDs and set their updated time to be in the past,
        # since unit associations were all just created at the same time.
        # The older associations are the ones that should be purged.
        earlier_timestamp = dateutils.now_utc_timestamp() - 3600
        formatted_timestamp = dateutils.format_iso8601_utc_timestamp(earlier_timestamp)
        platform_model.RepositoryContentUnit.objects.filter(unit_id__in=self.duplicate_unit_ids)\
            .update(set__updated=formatted_timestamp)
Beispiel #24
0
 def __init__(self, call_request, call_report):
     super(ArchivedCall, self).__init__()
     self.timestamp = dateutils.now_utc_timestamp()
     self.call_request_string = str(call_request)
     self.serialized_call_report = call_report.serialize()