Ejemplo n.º 1
0
    def test_lock(self):
        task = "test"

        self.assertTrue(lock(task, expire=2))
        self.assertFalse(lock(task))

        time.sleep(2)

        # lock again after expiry
        self.assertTrue(lock(task, expire=1))
        self.assertTrue(touch(task, expire=10))

        time.sleep(2)

        # can't lock after touch
        self.assertFalse(lock(task))

        unlock(task)

        # can't touch after unlock
        self.assertFalse(touch(task))

        # unlocking again is noop
        unlock(task)

        # locking after unlocking
        self.assertTrue(lock(task, expire=1))
Ejemplo n.º 2
0
 def _remove_expired_archived_items(self, now, lock_name):
     if not touch(lock_name, expire=600):
         logger.warning(
             '{} Lost lock before removing expired items from archived.'.
             format(self.log_msg))
         return
     EXPIRY_MINUTES = app.config.get('ARCHIVED_EXPIRY_MINUTES')
     EXPIRY_LIMIT = app.config.get('MAX_EXPIRY_QUERY_LIMIT', 100)
     if not EXPIRY_MINUTES:
         return
     logger.info('%s Starting to remove expired items from archived.',
                 self.log_msg)
     archived_service = get_resource_service('archived')
     query = _get_expired_mongo_ids_query(EXPIRY_MINUTES, now)
     expired = list(
         archived_service.find(query, max_results=EXPIRY_LIMIT, sort='_id'))
     if not len(expired):
         logger.info('%s No items found to expire in archived.',
                     self.log_msg)
     else:
         logger.info('%s Removing %d expired items from archived.',
                     self.log_msg, len(expired))
     removed = archived_service.delete_docs(expired)
     for item in expired:
         if item['_id'] not in removed:
             logger.error('%s Item was not removed from archived item=%s',
                          self.log_msg, item['item_id'])
             continue
         signals.archived_item_removed.send(archived_service, item=item)
         if not app.config.get(
                 'LEGAL_ARCHIVE') and not archived_service.find_one(
                     req=None, item_id=item['item_id']):
             remove_media_files(item)
Ejemplo n.º 3
0
    def _remove_expired_items(self, expiry_datetime, lock_name):
        """Remove the expired items.

        :param datetime expiry_datetime: expiry datetime
        :param str log_msg: log message to be prefixed
        :param str lock_name: lock name to touch
        """
        logger.info('{} Starting to remove published expired items.'.format(
            self.log_msg))
        archive_service = get_resource_service(ARCHIVE)
        published_service = get_resource_service('published')
        items_to_remove = set()
        items_to_be_archived = dict()
        items_having_issues = dict()
        preserve_published_desks = {
            desk.get(config.ID_FIELD): 1
            for desk in get_resource_service('desks').find(
                where={'preserve_published_content': True})
        }

        for expired_items in archive_service.get_expired_items(
                expiry_datetime):
            if len(expired_items) == 0:
                logger.info('{} No items found to expire.'.format(
                    self.log_msg))
                return

            if not touch(lock_name, expire=600):
                logger.warning(
                    '{} lost lock while removing expired items.'.format(
                        self.log_msg))
                return

            # delete spiked items
            self.delete_spiked_items(expired_items)

            # get killed items
            killed_items = {
                item.get(config.ID_FIELD): item
                for item in expired_items if item.get(ITEM_STATE) in
                {CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED}
            }

            # check if killed items imported to legal
            items_having_issues.update(
                self.check_if_items_imported_to_legal_archive(killed_items))

            # filter out the killed items not imported to legal.
            killed_items = {
                item_id: item
                for item_id, item in killed_items.items()
                if item_id not in items_having_issues
            }

            # Get the not killed and spiked items
            not_killed_items = {
                item.get(config.ID_FIELD): item
                for item in expired_items if item.get(ITEM_STATE) not in {
                    CONTENT_STATE.KILLED, CONTENT_STATE.SPIKED,
                    CONTENT_STATE.RECALLED
                }
            }

            log_msg_format = "{{'_id': {_id}, 'unique_name': {unique_name}, 'version': {_current_version}, " \
                             "'expired_on': {expiry}}}."

            # Processing items to expire
            for item_id, item in not_killed_items.items():
                item.setdefault(config.VERSION, 1)
                item.setdefault('expiry', expiry_datetime)
                item.setdefault('unique_name', '')
                expiry_msg = log_msg_format.format(**item)
                logger.info('{} Processing expired item. {}'.format(
                    self.log_msg, expiry_msg))

                processed_items = dict()
                if item_id not in items_to_be_archived and item_id not in items_having_issues and \
                        self._can_remove_item(item, processed_items, preserve_published_desks):
                    # item can be archived and removed from the database
                    logger.info('{} Removing item. {}'.format(
                        self.log_msg, expiry_msg))
                    logger.info('{} Items to be removed. {}'.format(
                        self.log_msg, processed_items))
                    issues = self.check_if_items_imported_to_legal_archive(
                        processed_items)
                    if issues:
                        items_having_issues.update(processed_items)
                    else:
                        items_to_be_archived.update(processed_items)

            # all items to expire
            items_to_expire = deepcopy(items_to_be_archived)

            # check once again in items imported to legal
            items_having_issues.update(
                self.check_if_items_imported_to_legal_archive(items_to_expire))
            if items_having_issues:
                # remove items not imported to legal
                items_to_expire = {
                    item_id: item
                    for item_id, item in items_to_expire.items()
                    if item_id not in items_having_issues
                }

                # remove items not imported to legal from archived items
                items_to_be_archived = {
                    item_id: item
                    for item_id, item in items_to_be_archived.items()
                    if item_id not in items_having_issues
                }

                # items_to_be_archived might contain killed items
                for item_id, item in items_to_be_archived.items():
                    if item.get(ITEM_STATE) in {
                            CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED
                    }:
                        killed_items[item_id] = item

                # remove killed items from the items_to_be_archived
                items_to_be_archived = {
                    item_id: item
                    for item_id, item in items_to_be_archived.items()
                    if item.get(ITEM_STATE) not in
                    {CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED}
                }

            # add killed items to items to expire
            items_to_expire.update(killed_items)

            # get the filter conditions
            logger.info('{} Loading filter conditions.'.format(self.log_msg))
            req = ParsedRequest()
            filter_conditions = list(
                get_resource_service('content_filters').get(
                    req=req, lookup={'is_archived_filter': True}))

            # move to archived collection
            logger.info('{} Archiving items.'.format(self.log_msg))
            for _item_id, item in items_to_be_archived.items():
                self._move_to_archived(item, filter_conditions)

            for item_id, item in killed_items.items():
                # delete from the published collection and queue
                msg = log_msg_format.format(**item)
                try:
                    published_service.delete_by_article_id(item_id)
                    logger.info(
                        '{} Deleting killed item from published. {}'.format(
                            self.log_msg, msg))
                    items_to_remove.add(item_id)
                except Exception:
                    logger.exception(
                        '{} Failed to delete killed item from published. {}'.
                        format(self.log_msg, msg))

            if items_to_remove:
                logger.info('{} Deleting articles.: {}'.format(
                    self.log_msg, items_to_remove))
                archive_service.delete_by_article_ids(list(items_to_remove))

            for _item_id, item in items_having_issues.items():
                msg = log_msg_format.format(**item)
                try:
                    archive_service.system_update(item.get(config.ID_FIELD),
                                                  {'expiry_status': 'invalid'},
                                                  item)
                    logger.info('{} Setting item expiry status. {}'.format(
                        self.log_msg, msg))
                except Exception:
                    logger.exception(
                        '{} Failed to set expiry status for item. {}'.format(
                            self.log_msg, msg))

            logger.info('{} Deleting killed from archive.'.format(
                self.log_msg))
Ejemplo n.º 4
0
def update_provider(provider, rule_set=None, routing_scheme=None, sync=False):
    """Fetch items from ingest provider, ingest them into Superdesk and update the provider.

    :param provider: Ingest Provider data
    :param rule_set: Translation Rule Set if one is associated with Ingest Provider.
    :param routing_scheme: Routing Scheme if one is associated with Ingest Provider.
    :param sync: Running in sync mode from cli.
    """
    lock_name = get_lock_id("ingest", provider["name"],
                            provider[superdesk.config.ID_FIELD])

    if not lock(lock_name, expire=UPDATE_TTL + 10):
        if sync:
            logger.error("update is already running for %s", provider["name"])
        return

    try:
        feeding_service = get_feeding_service(provider["feeding_service"])
        update = {LAST_UPDATED: utcnow()}

        if sync:
            provider[LAST_UPDATED] = utcnow() - timedelta(
                days=9999)  # import everything again

        generator = feeding_service.update(provider, update)
        if isinstance(generator, list):
            generator = (items for items in generator)
        failed = None
        while True:
            try:
                if not touch(lock_name, expire=UPDATE_TTL):
                    logger.warning("lock expired while updating provider %s",
                                   provider[superdesk.config.ID_FIELD])
                    return
                items = generator.send(failed)
                failed = ingest_items(items, provider, feeding_service,
                                      rule_set, routing_scheme)
                update_last_item_updated(update, items)
            except StopIteration:
                break

        # Some Feeding Services update the collection and by this time the _etag might have been changed.
        # So it's necessary to fetch it once again. Otherwise, OriginalChangedError is raised.
        ingest_provider_service = superdesk.get_resource_service(
            "ingest_providers")
        provider = ingest_provider_service.find_one(
            req=None, _id=provider[superdesk.config.ID_FIELD])
        ingest_provider_service.system_update(
            provider[superdesk.config.ID_FIELD], update, provider)

        if LAST_ITEM_UPDATE not in update and get_is_idle(provider):
            admins = superdesk.get_resource_service(
                "users").get_users_by_user_type("administrator")
            notify_and_add_activity(
                ACTIVITY_EVENT,
                "Provider {{name}} has gone strangely quiet. Last activity was on {{last}}",
                resource="ingest_providers",
                user_list=admins,
                name=provider.get("name"),
                last=provider[LAST_ITEM_UPDATE].replace(
                    tzinfo=timezone.utc).astimezone(tz=None).strftime("%c"),
            )

        logger.info("Provider {0} updated".format(
            provider[superdesk.config.ID_FIELD]))

        if LAST_ITEM_UPDATE in update:  # Only push a notification if there has been an update
            push_notification("ingest:update",
                              provider_id=str(
                                  provider[superdesk.config.ID_FIELD]))
    except Exception as e:
        logger.error("Failed to ingest file: {error}".format(error=e))
        raise IngestFileError(3000, e, provider)
    finally:
        unlock(lock_name)
Ejemplo n.º 5
0
def sync():
    lock_name = "ultrad"
    if not lock(lock_name):
        logger.info("lock taken %s", lock_name)
        return
    try:
        todo_stages = list(
            get_resource_service("stages").get(
                req=None, lookup={"name": app.config["ULTRAD_TODO_STAGE"]}
            )
        )
        if not len(todo_stages):
            logger.warning(
                "ultrad todo stage not found, name=%s", app.config["ULTRAD_TODO_STAGE"]
            )
            return
        for todo_stage in todo_stages:
            desk = get_resource_service("desks").find_one(
                req=None, _id=todo_stage["desk"]
            )
            if not desk:
                logger.warning(
                    "ultrad desk not found for stage desk=%s", todo_stage["desk"]
                )
                continue
            lookup = {"task.stage": todo_stage["_id"]}
            items = list(get_resource_service("archive").get(req=None, lookup=lookup))
            logger.info(
                "checking %d items on ultrad on desk %s", len(items), desk["name"]
            )
            for item in items:
                if not touch(lock_name, expire=300):
                    logger.warning("lost lock %s", lock_name)
                    break
                if item.get("lock_user") and item.get("lock_session"):
                    logger.info("skipping locked item guid=%s", item["guid"])
                    continue
                if item["state"] not in IN_PROGRESS_STATES:
                    logger.info(
                        "ignore item due to state guid=%s state=%s",
                        item["guid"],
                        item["state"],
                    )
                    continue
                try:
                    ultrad_id = item["extra"][ULTRAD_ID]
                except KeyError:
                    continue
                try:
                    ultrad_doc = get_document(ultrad_id)
                except UltradException:
                    continue
                if ultrad_doc["state"] == "revised":
                    try:
                        updated = item.copy()
                        updated["body_html"] = ultrad_doc["text"]["edited"]
                    except KeyError:
                        logger.info(
                            "no content in ultrad for item guid=%s ultrad_id=%s",
                            item["guid"],
                            ultrad_id,
                        )
                        continue
                    logger.info(
                        "updating item from ultrad guid=%s ultrad_id=%s",
                        item["guid"],
                        ultrad_id,
                    )
                    editor = Editor3Content(updated)
                    editor._create_state_from_html(updated["body_html"])
                    editor.update_item()
                    send_to(
                        updated, desk_id=desk["_id"], stage_id=desk["working_stage"]
                    )
                    updates = {
                        "task": updated["task"],
                        "body_html": updated["body_html"],
                        "fields_meta": updated["fields_meta"],
                    }
                    # don't use patch, it assumes there is a user
                    get_resource_service("archive").update(item["_id"], updates, item)
                    get_resource_service("archive").on_updated(updates, item)
                else:
                    logger.debug(
                        "skip updating item guid=%s ultrad_id=%s state=%s",
                        item["guid"],
                        ultrad_id,
                        ultrad_doc["state"],
                    )
    finally:
        unlock(lock_name)