def enhance_with_archive_items(self, items):
        if items:
            ids = list(set([item.get("item_id") for item in items if item.get("item_id")]))
            archive_items = []
            archive_lookup = {}
            if ids:
                query = {"$and": [{config.ID_FIELD: {"$in": ids}}]}
                archive_req = ParsedRequest()
                archive_req.max_results = len(ids)
                # can't access published from elastic due filter on the archive resource hence going to mongo
                archive_items = list(
                    superdesk.get_resource_service(ARCHIVE).get_from_mongo(req=archive_req, lookup=query)
                )

                for item in archive_items:
                    handle_existing_data(item)
                    archive_lookup[item[config.ID_FIELD]] = item

            for item in items:
                archive_item = archive_lookup.get(item.get("item_id"), {config.VERSION: item.get(config.VERSION, 1)})

                updates = {
                    config.ID_FIELD: item.get("item_id"),
                    "item_id": item.get(config.ID_FIELD),
                    "lock_user": archive_item.get("lock_user", None),
                    "lock_time": archive_item.get("lock_time", None),
                    "lock_action": archive_item.get("lock_action", None),
                    "lock_session": archive_item.get("lock_session", None),
                    "archive_item": archive_item if archive_item else None,
                }

                item.update(updates)
                handle_existing_data(item)
예제 #2
0
    def enhance_with_archive_items(self, items):
        if items:
            ids = list(set([item.get('item_id') for item in items if item.get('item_id')]))
            archive_items = []
            if ids:
                query = {'$and': [{config.ID_FIELD: {'$in': ids}}]}
                archive_req = ParsedRequest()
                archive_req.max_results = len(ids)
                # can't access published from elastic due filter on the archive resource hence going to mongo
                archive_items = list(superdesk.get_resource_service(ARCHIVE)
                                     .get_from_mongo(req=archive_req, lookup=query))

                takes_service = TakesPackageService()
                for item in archive_items:
                    handle_existing_data(item)
                    takes_service.enhance_with_package_info(item)

            for item in items:
                archive_item = [i for i in archive_items if i.get(config.ID_FIELD) == item.get('item_id')]
                archive_item = archive_item[0] if len(archive_item) > 0 else \
                    {config.VERSION: item.get(config.VERSION, 1)}

                updates = {
                    config.ID_FIELD: item.get('item_id'),
                    'item_id': item.get(config.ID_FIELD),
                    'lock_user': archive_item.get('lock_user', None),
                    'lock_time': archive_item.get('lock_time', None),
                    'lock_session': archive_item.get('lock_session', None),
                    'archive_item': archive_item if archive_item else None
                }

                item.update(updates)
                handle_existing_data(item)
예제 #3
0
    def enhance_with_archive_items(self, items):
        if items:
            ids = list(set([item.get("item_id") for item in items if item.get("item_id")]))
            archive_items = []
            archive_lookup = {}
            if ids:
                query = {"$and": [{config.ID_FIELD: {"$in": ids}}]}
                archive_req = ParsedRequest()
                archive_req.max_results = len(ids)
                # can't access published from elastic due filter on the archive resource hence going to mongo
                archive_items = list(
                    superdesk.get_resource_service(ARCHIVE).get_from_mongo(req=archive_req, lookup=query)
                )

                takes_service = TakesPackageService()
                takes_service.enhance_items_with_takes_packages(archive_items)
                for item in archive_items:
                    handle_existing_data(item)
                    archive_lookup[item[config.ID_FIELD]] = item

            for item in items:
                archive_item = archive_lookup.get(item.get("item_id"), {config.VERSION: item.get(config.VERSION, 1)})

                updates = {
                    config.ID_FIELD: item.get("item_id"),
                    "item_id": item.get(config.ID_FIELD),
                    "lock_user": archive_item.get("lock_user", None),
                    "lock_time": archive_item.get("lock_time", None),
                    "lock_session": archive_item.get("lock_session", None),
                    "archive_item": archive_item if archive_item else None,
                }

                item.update(updates)
                handle_existing_data(item)
예제 #4
0
 def get_next_order_sequence(self, blog_id):
     if blog_id is None:
         return 0
     # get next order sequence and increment it
     blog = get_resource_service('blogs').find_and_modify(
         query={'_id': blog_id},
         update={'$inc': {
             'posts_order_sequence': 1
         }},
         upsert=False)
     if blog:
         order = blog and blog.get('posts_order_sequence') or None
         # support previous LB version when the sequence was not save into the blog
         if order is None:
             # find the highest order in the blog
             req = ParsedRequest()
             req.sort = '-order'
             req.max_results = 1
             post = next(
                 self.get_from_mongo(req=req, lookup={'blog': blog_id}),
                 None)
             if post and post.get('order') is not None:
                 order = post.get('order') + 1
                 # save the order into the blog
                 get_resource_service('blogs').update(
                     blog_id, {'posts_order_sequence': order + 1}, blog)
             else:
                 order = 0
     else:
         order = 0
     return order
예제 #5
0
    def get_expired_items(self, now):
        query_filter = self._get_query_for_expired_items(now)
        req = ParsedRequest()
        req.max_results = 100

        return superdesk.get_resource_service(ARCHIVE).get_from_mongo(
            req=req, lookup=query_filter)
예제 #6
0
    def on_create(self, docs):
        """
        Overriding this to set desk_order and expiry settings. Also, if this stage is defined as either working or
        incoming stage or both then removes the old incoming and working stages.
        """

        for doc in docs:
            desk = doc.get('desk')

            if not desk:
                doc['desk_order'] = 1
                continue

            req = ParsedRequest()
            req.sort = '-desk_order'
            req.max_results = 1
            prev_stage = self.get(req=req, lookup={'desk': doc['desk']})

            if doc.get('content_expiry') == 0:
                doc['content_expiry'] = None

            if prev_stage.count() == 0:
                doc['desk_order'] = 1
            else:
                doc['desk_order'] = prev_stage[0].get('desk_order', 1) + 1

            # if this new one is default then remove the old default
            if doc.get('working_stage', False):
                self.remove_old_default(desk, 'working_stage')

            if doc.get('default_incoming', False):
                self.remove_old_default(desk, 'default_incoming')
예제 #7
0
 def get_expired_items(self, now):
     logger.info('Get expired content from published')
     query_filter = self.get_query_for_expired_items(now)
     req = ParsedRequest()
     req.sort = '_created'
     req.max_results = 100
     return superdesk.get_resource_service('published').get_from_mongo(req=req, lookup=query_filter)
    def get_filters(self):
        """Retrieve all of the available filter conditions and content filters if they have not yet been retrieved or
        they have been updated. This avoids the filtering functions having to repeatedly retireve the individual filter
        records.

        :return:
        """

        # Get the most recent update time to the filter conditions and content_filters
        req = ParsedRequest()
        req.sort = '-_updated'
        req.max_results = 1
        mindate = datetime.min.replace(tzinfo=pytz.UTC)
        latest_fc = next(get_resource_service('filter_conditions').get_from_mongo(req=req, lookup=None),
                         {}).get('_updated', mindate)
        latest_cf = next(get_resource_service('content_filters').get_from_mongo(req=req, lookup=None),
                         {}).get('_updated', mindate)

        if not self.filters or \
                latest_fc > self.filters.get('latest_filter_conditions', mindate) or latest_fc == mindate or \
                latest_cf > self.filters.get('latest_content_filters', mindate) or latest_cf == mindate:
            logger.debug('Getting content filters and filter conditions')
            self.filters = dict()
            self.filters['filter_conditions'] = dict()
            self.filters['content_filters'] = dict()
            for fc in get_resource_service('filter_conditions').get(req=None, lookup={}):
                self.filters['filter_conditions'][fc.get('_id')] = {'fc': fc}
                self.filters['latest_filter_conditions'] = fc.get('_updated') if fc.get('_updated') > self.filters.get(
                    'latest_filter_conditions', mindate) else self.filters.get('latest_filter_conditions', mindate)
            for cf in get_resource_service('content_filters').get(req=None, lookup={}):
                self.filters['content_filters'][cf.get('_id')] = {'cf': cf}
                self.filters['latest_content_filters'] = cf.get('_updated') if cf.get('_updated') > self.filters.get(
                    'latest_content_filters', mindate) else self.filters.get('latest_content_filters', mindate)
        else:
            logger.debug('Using chached content filters and filters conditions')
    def test_compare_repos(self):
        with self.app.app_context():
            req = ParsedRequest()
            req.args = {}
            req.max_results = 25

            data = [{
                'headline': 'test {}'.format(i),
                'slugline': 'rebuild {}'.format(i),
                'type': 'text' if (i % 2 == 0) else 'picture'
            } for i in range(1, 100)]
            resolve_document_etag(data, 'archive')
            superdesk.app.data._search_backend('archive').bulk_insert(
                'archive', data)
            get_resource_service('archive').post(data)
            consistency_init(self.app)

            items = get_resource_service('archive').get(req, {})
            self.assertEquals(99, items.count())

            consistency_record = CompareRepositories().run(
                'archive', self.app.config['ELASTICSEARCH_URL'],
                self.app.config['ELASTICSEARCH_INDEXES'].get('archive'))
            self.assertEquals(consistency_record['mongo'], 99)
            self.assertEquals(consistency_record['elastic'], 198)
            self.assertEquals(consistency_record['identical'], 99)
            self.assertEquals(consistency_record['mongo_only'], 0)
            self.assertEquals(consistency_record['elastic_only'], 99)
            self.assertEquals(consistency_record['inconsistent'], 0)
    def test_compare_repos(self):
        with self.app.app_context():
            req = ParsedRequest()
            req.args = {}
            req.max_results = 25

            data = [{'headline': 'test {}'.format(i), 'slugline': 'rebuild {}'.format(i),
                     'type': 'text' if (i % 2 == 0) else 'picture'} for i in range(1, 100)]
            resolve_document_etag(data, 'archive')
            superdesk.app.data._search_backend('archive').bulk_insert('archive', data)
            get_resource_service('archive').post(data)
            consistency_init(self.app)

            items = get_resource_service('archive').get(req, {})
            self.assertEquals(99, items.count())

            consistency_record = CompareRepositories().run('archive',
                                                           self.app.config['ELASTICSEARCH_URL'],
                                                           self.app.config['ELASTICSEARCH_INDEXES'].get('archive'))
            self.assertEquals(consistency_record['mongo'], 99)
            self.assertEquals(consistency_record['elastic'], 198)
            self.assertEquals(consistency_record['identical'], 99)
            self.assertEquals(consistency_record['mongo_only'], 0)
            self.assertEquals(consistency_record['elastic_only'], 99)
            self.assertEquals(consistency_record['inconsistent'], 0)
예제 #11
0
    def get_expired_items(self, expiry_datetime, invalid_only=False):
        """Get the expired items.

        Where content state is not scheduled and the item matches given parameters

        :param datetime expiry_datetime: expiry datetime
        :param bool invalid_only: True only invalid items
        :return pymongo.cursor: expired non published items.
        """
        query = {
            '$and': [{
                'expiry': {
                    '$lte': date_to_str(expiry_datetime)
                }
            }, {
                '$or': [{
                    'task.desk': {
                        '$ne': None
                    }
                }, {
                    ITEM_STATE: CONTENT_STATE.SPIKED,
                    'task.desk': None
                }]
            }]
        }

        if invalid_only:
            query['$and'].append({'expiry_status': 'invalid'})
        else:
            query['$and'].append({'expiry_status': {'$ne': 'invalid'}})

        req = ParsedRequest()
        req.max_results = config.MAX_EXPIRY_QUERY_LIMIT
        req.sort = 'expiry,_created'
        return self.get_from_mongo(req=req, lookup=query)
예제 #12
0
    def purge_old_entries(self):
        """
        Purge entries older than the expiry that are not related to archive items
        :return:
        """
        service = superdesk.get_resource_service('audit')
        current_date = None

        while True:
            lookup = {
                '$and': [
                    self.not_item_entry_query, {
                        '_updated': {
                            '$lte': date_to_str(self.expiry)
                        }
                    }
                ]
            }
            if current_date:
                lookup['$and'].append({'_updated': {'$gte': current_date}})
            req = ParsedRequest()
            req.sort = '[("_updated", 1)]'
            req.projection = '{"_id": 1, "_updated": 1}'
            req.max_results = 1000
            audits = service.get_from_mongo(req=req, lookup=lookup)
            if audits.count() == 0:
                break
            items = list([(item['_id'], item['_updated']) for item in audits])
            current_date = items[len(items) - 1][1]
            service.delete({'_id': {'$in': [i[0] for i in items]}})
예제 #13
0
    def purge_orphaned_item_audits(self):
        """
        Purge the audit items that do not have associated entries existing in archive
        :return:
        """
        service = superdesk.get_resource_service('audit')
        current_id = None

        # Scan the audit collection for items to delete
        while True:
            query = deepcopy(self.item_entry_query)
            query['$and'].append(
                {'_updated': {
                    '$lte': date_to_str(self.expiry)
                }})
            if current_id:
                query['$and'].append({'_id': {'$gt': current_id}})
            req = ParsedRequest()
            req.sort = '[("_id", 1)]'
            req.projection = '{"_id": 1, "extra.guid": 1, "extra._id": 1, "extra.item_id": 1, "extra.item": 1}'
            req.max_results = 1000
            audits = service.get_from_mongo(req=req, lookup=query)
            if audits.count() == 0:
                break
            items = list([(item['_id'], self._extract_item_id(item))
                          for item in audits])
            current_id = items[len(items) - 1][0]

            batch_ids = set([i[1] for i in items])
            archive_ids = self._get_archive_ids(batch_ids)
            ids = (batch_ids - archive_ids)
            audit_ids = [i[0] for i in items if i[1] in ids]
            service.delete({'_id': {'$in': audit_ids}})
예제 #14
0
    def get_expired_items(self, expired_date_time, limit=100):
        """
        Fetches the expired articles from published collection. Expiry Conditions:
            1.  can_be_removed flag is True
            2.  Item Expiry is less than or equal to expired_date_time, State of the Item is not SCHEDULED and
                allow_post_publish_actions flag is True

        :param expired_date_time:
        :param limit:
        :return: expired articles from published collection
        """

        logger.info("Get expired content from published")
        query = {
            "$or": [
                {"can_be_removed": True},
                {
                    "$and": [
                        {"expiry": {"$lte": expired_date_time}},
                        {ITEM_STATE: {"$ne": CONTENT_STATE.SCHEDULED}},
                        {"allow_post_publish_actions": True},
                    ]
                },
            ]
        }

        req = ParsedRequest()
        req.sort = "_created"
        req.max_results = limit

        return superdesk.get_resource_service("published").get_from_mongo(req=req, lookup=query)
예제 #15
0
    def get_history_items(self, last_id, gte, item_id, chunk_size=0):
        history_service = get_resource_service('archive_history')

        last_processed_id = last_id

        while True:
            req = ParsedRequest()
            req.sort = '[("_id", 1), ("version", 1)]'

            query = {'$and': []}

            if gte:
                query['$and'].append({'_created': {'$gte': date_to_str(gte)}})

            if item_id:
                query['$and'].append({'item_id': str(item_id)})

            if last_processed_id:
                query['$and'].append({'_id': {'$gt': str(last_processed_id)}})

            req.where = json.dumps(query)

            if chunk_size > 0:
                req.max_results = int(chunk_size)

            items = list(history_service.get(req=req, lookup=None))

            if len(items) < 1:
                break

            last_processed_id = items[-1][config.ID_FIELD]
            yield items
예제 #16
0
 def get_published_items(self):
     """
     Get all items with queue state: "pending" that are not scheduled or scheduled time has lapsed.
     """
     query = {
         QUEUE_STATE:
         PUBLISH_STATE.PENDING,
         "$or": [
             {
                 ITEM_STATE: {
                     "$ne": CONTENT_STATE.SCHEDULED
                 }
             },
             {
                 ITEM_STATE: CONTENT_STATE.SCHEDULED,
                 "{}.utc_{}".format(SCHEDULE_SETTINGS, PUBLISH_SCHEDULE): {
                     "$lte": utcnow()
                 },
             },
         ],
     }
     request = ParsedRequest()
     request.sort = "publish_sequence_no"
     request.max_results = 200
     return list(
         get_resource_service(PUBLISHED).get_from_mongo(req=request,
                                                        lookup=query))
    def forwards(self, mongodb_collection, mongodb_database):
        archive_service = get_resource_service('archive')
        published_service = get_resource_service(self.resource)

        req = ParsedRequest()
        req.max_results = 50
        for page in range(1, 200):  # 10k limit
            req.page = page
            items = list(published_service.get(req=req, lookup=None))
            if not items:
                break
            for item in items:
                published_date = item.get('firstpublished')

                if published_date is not None:
                    compliant_lifetime = add_years(published_date, 1)

                    extra = item.get('extra', {})
                    extra['compliantlifetime'] = compliant_lifetime

                    try:
                        published_service.system_update(
                            ObjectId(item['_id']), {'extra': extra}, item)
                    except SuperdeskApiError:
                        continue

                    archive_item = archive_service.find_one(
                        req=None, _id=item['item_id'])
                    if archive_item:
                        archive_service.system_update(archive_item['_id'],
                                                      {'extra': extra},
                                                      archive_item)
예제 #18
0
    def _get_combined_view_data(self, items, request):
        """Get list of event and planning for the combined view

        :param items:
        :param request: object representing the HTTP request
        """
        ids = set()
        for item in items:
            # don't want related planing items
            _id = item.get('event_item') or item.get('_id')
            ids.add(_id)

        filters = self._get_date_filters(request)
        page = request.page or 1
        page_size = self._get_page_size(request)
        req = ParsedRequest()
        req.args = MultiDict()
        req.args['source'] = json.dumps({
            'query': {
                'bool': {
                    'must': [{'terms': {'_id': list(ids)}}],
                }
            },
            'filter': filters,
            'sort': self._get_sort(),
            'size': self._get_page_size(request),
            'from': (page - 1) * page_size
        })
        req.page = request.page or 1
        req.max_results = self._get_page_size(request)
        return get_resource_service('planning_search').get(req=req, lookup=None)
예제 #19
0
def get_expired_items(expired_date_time, limit=100):
    """
    Fetches the expired articles from published collection. Expiry Conditions:
        1.  can_be_removed flag is True
        2.  Item Expiry is less than or equal to expired_date_time, State of the Item is not SCHEDULED and
            allow_post_publish_actions flag is True

    :param expired_date_time:
    :param limit:
    :return: expired articles from published collection
    """

    logger.info('Get expired content from published')
    query = {
        '$or': [
            {'can_be_removed': True},
            {'$and': [
                {'expiry': {'$lte': expired_date_time}},
                {ITEM_STATE: {'$ne': CONTENT_STATE.SCHEDULED}},
                {'allow_post_publish_actions': True}
            ]}
        ]
    }

    req = ParsedRequest()
    req.sort = '_created'
    req.max_results = limit

    return superdesk.get_resource_service('published').get_from_mongo(req=req, lookup=query)
예제 #20
0
    def purge_orphaned_item_audits(self):
        """
        Purge the audit items that do not have associated entries existing in archive
        :return:
        """
        service = superdesk.get_resource_service('audit')
        current_id = None
        logger.info('Starting to purge audit logs of content items not in archive at {}'.format(utcnow()))

        # Scan the audit collection for items to delete
        while True:
            query = deepcopy(self.item_entry_query)
            query['$and'].append({'_updated': {'$lte': date_to_str(self.expiry)}})
            if current_id:
                query['$and'].append({'_id': {'$gt': current_id}})
            req = ParsedRequest()
            req.sort = '[("_id", 1)]'
            req.projection = '{"_id": 1, "audit_id":1}'
            req.max_results = 1000
            audits = service.get_from_mongo(req=req, lookup=query)
            items = list([(item['_id'], item['audit_id']) for item in audits])
            if len(items) == 0:
                logger.info('Finished purging audit logs of content items not in archive at {}'.format(utcnow()))
                return
            logger.info('Found {} orphaned audit items at {}'.format(len(items), utcnow()))
            current_id = items[len(items) - 1][0]

            batch_ids = set([i[1] for i in items])
            archive_ids = self._get_archive_ids(batch_ids)
            ids = (batch_ids - archive_ids)
            audit_ids = [i[0] for i in items if i[1] in ids]
            logger.info('Deleting {} orphaned audit items at {}'.format(len(audit_ids), utcnow()))
            service.delete_ids_from_mongo(audit_ids)
예제 #21
0
 def get_next_order_sequence(self, blog_id):
     if blog_id is None:
         return 0
     # get next order sequence and increment it
     blog = get_resource_service('blogs').find_and_modify(
         query={'_id': blog_id},
         update={'$inc': {'posts_order_sequence': 1}},
         upsert=False)
     if blog:
         order = blog and blog.get('posts_order_sequence') or None
         # support previous LB version when the sequence was not save into the blog
         if order is None:
             # find the highest order in the blog
             req = ParsedRequest()
             req.sort = '-order'
             req.max_results = 1
             post = next(self.get_from_mongo(req=req, lookup={'blog': blog_id}), None)
             if post and post.get('order') is not None:
                 order = post.get('order') + 1
                 # save the order into the blog
                 get_resource_service('blogs').update(blog_id, {'posts_order_sequence': order + 1}, blog)
             else:
                 order = 0
     else:
         order = 0
     return order
예제 #22
0
    def purge_old_entries(self):
        """
        Purge entries older than the expiry that are not related to archive items
        :return:
        """
        service = superdesk.get_resource_service('audit')
        current_id = None
        logger.info('Starting to purge audit logs of none content items at {}'.format(utcnow()))

        while True:
            lookup = {'$and': [self.not_item_entry_query, {'_updated': {'$lte': date_to_str(self.expiry)}}]}
            if current_id:
                lookup['$and'].append({'_id': {'$gt': current_id}})
            req = ParsedRequest()
            req.sort = '[("_id", 1)]'
            req.projection = '{"_id": 1}'
            req.max_results = 1000
            audits = service.get_from_mongo(req=req, lookup=lookup)
            items = list(item.get('_id') for item in audits)
            if len(items) == 0:
                logger.info('Finished purging audit logs of none content items at {}'.format(utcnow()))
                return
            logger.info('Found {} audit items at {}'.format(len(items), utcnow()))
            current_id = items[len(items) - 1]
            logger.info('Deleting {} old audit items'.format(len(items)))
            service.delete_ids_from_mongo(items)
예제 #23
0
    def on_create(self, docs):
        """Runs on stage create.

        Overriding this to set desk_order and expiry settings. Also, if this stage is defined as either working or
        incoming stage or both then removes the old incoming and working stages.
        """

        for doc in docs:
            desk = doc.get("desk")

            if not desk:
                doc["desk_order"] = 1
                continue

            req = ParsedRequest()
            req.sort = "-desk_order"
            req.max_results = 1
            prev_stage = self.get(req=req, lookup={"desk": doc["desk"]})

            if doc.get("content_expiry") == 0:
                doc["content_expiry"] = None

            if prev_stage.count() == 0:
                doc["desk_order"] = 1
            else:
                doc["desk_order"] = prev_stage[0].get("desk_order", 1) + 1

            # if this new one is default then remove the old default
            if doc.get("working_stage", False):
                self.remove_old_default(desk, "working_stage")

            if doc.get("default_incoming", False):
                self.remove_old_default(desk, "default_incoming")
예제 #24
0
    def get_filters(self):
        """Retrieve all of the available filter conditions and content filters if they have not yet been retrieved or
        they have been updated. This avoids the filtering functions having to repeatedly retireve the individual filter
        records.

        :return:
        """

        # Get the most recent update time to the filter conditions and content_filters
        req = ParsedRequest()
        req.sort = '-_updated'
        req.max_results = 1
        mindate = datetime.min.replace(tzinfo=pytz.UTC)
        latest_fc = next(get_resource_service('filter_conditions').get_from_mongo(req=req, lookup=None),
                         {}).get('_updated', mindate)
        latest_cf = next(get_resource_service('content_filters').get_from_mongo(req=req, lookup=None),
                         {}).get('_updated', mindate)

        if not self.filters or \
                latest_fc > self.filters.get('latest_filter_conditions', mindate) or latest_fc == mindate or \
                latest_cf > self.filters.get('latest_content_filters', mindate) or latest_cf == mindate:
            logger.debug('Getting content filters and filter conditions')
            self.filters = dict()
            self.filters['filter_conditions'] = dict()
            self.filters['content_filters'] = dict()
            for fc in get_resource_service('filter_conditions').get(req=None, lookup={}):
                self.filters['filter_conditions'][fc.get('_id')] = {'fc': fc}
                self.filters['latest_filter_conditions'] = fc.get('_updated') if fc.get('_updated') > self.filters.get(
                    'latest_filter_conditions', mindate) else self.filters.get('latest_filter_conditions', mindate)
            for cf in get_resource_service('content_filters').get(req=None, lookup={}):
                self.filters['content_filters'][cf.get('_id')] = {'cf': cf}
                self.filters['latest_content_filters'] = cf.get('_updated') if cf.get('_updated') > self.filters.get(
                    'latest_content_filters', mindate) else self.filters.get('latest_content_filters', mindate)
        else:
            logger.debug('Using chached content filters and filters conditions')
예제 #25
0
def get_overdue_scheduled_items(expired_date_time, resource, limit=100):
    """
    Fetches the overdue scheduled articles from given collection. Overdue Conditions:
        1.  it should be in 'scheduled' state
        2.  publish_schedule is less than or equal to expired_date_time

    :param expired_date_time: DateTime that scheduled tate will be checked against
    :param resource: Name of the resource to check the data from
    :param limit: Number of return items
    :return: overdue scheduled articles from published collection
    """

    logger.info('Get overdue scheduled content from {}'.format(resource))
    query = {
        '$and': [{
            'publish_schedule': {
                '$lte': expired_date_time
            }
        }, {
            ITEM_STATE: CONTENT_STATE.SCHEDULED
        }]
    }

    req = ParsedRequest()
    req.sort = '_modified'
    req.max_results = limit

    return superdesk.get_resource_service(resource).get_from_mongo(
        req=req, lookup=query)
예제 #26
0
def get_queue_items(retries=False):
    if retries:
        lookup = {
            '$and': [{
                'state': QueueState.RETRYING.value
            }, {
                'next_retry_attempt_at': {
                    '$lte': utcnow()
                }
            }, {
                'destination.delivery_type': {
                    '$ne': 'pull'
                }
            }]
        }
    else:
        lookup = {
            '$and': [{
                'state': QueueState.PENDING.value
            }, {
                'destination.delivery_type': {
                    '$ne': 'pull'
                }
            }]
        }
    request = ParsedRequest()
    request.max_results = app.config.get('MAX_TRANSMIT_QUERY_LIMIT', 500)
    # ensure we publish in the correct sequence
    request.sort = '[("_created", 1), ("subscriber_id", 1), ("published_seq_num", 1)]'
    return get_resource_service(PUBLISH_QUEUE).get(req=request, lookup=lookup)
예제 #27
0
    def _get_changed_items(self, existing_items, package):
        """Returns the added and removed items from existing_items

        :param existing_items: Existing list
        :param updates: Changes
        :return: list of removed items and list of added items
        """
        published_service = get_resource_service('published')
        req = ParsedRequest()
        query = {'query': {'filtered': {'filter': {'and': [{'terms': {QUEUE_STATE: [
            PUBLISH_STATE.QUEUED, PUBLISH_STATE.QUEUED_NOT_TRANSMITTED]}},
            {'term': {'item_id': package['item_id']}}]}}}, 'sort': [{'publish_sequence_no': 'desc'}]}
        req.args = {'source': json.dumps(query)}
        req.max_results = 1
        previously_published_packages = published_service.get(req=req, lookup=None)

        if not previously_published_packages.count():
            return [], []

        previously_published_package = previously_published_packages[0]

        if 'groups' in previously_published_package:
            old_items = self.package_service.get_residrefs(previously_published_package)
            added_items = list(set(existing_items) - set(old_items))
            removed_items = list(set(old_items) - set(existing_items))
            return removed_items, added_items
        else:
            return [], []
예제 #28
0
    def get_expired_items(self, expiry_datetime):
        """
        Get the expired items where content state is not scheduled
        and
        :param datetime expiry_datetime: expiry datetime
        :return pymongo.cursor: expired non published items.
        """
        query = {
            '$and': [{
                'expiry': {
                    '$lte': date_to_str(expiry_datetime)
                }
            }, {
                '$or': [{
                    'task.desk': {
                        '$ne': None
                    }
                }, {
                    ITEM_STATE: CONTENT_STATE.SPIKED,
                    'task.desk': None
                }]
            }]
        }

        req = ParsedRequest()
        req.max_results = config.MAX_EXPIRY_QUERY_LIMIT
        req.sort = 'expiry,_created'
        return self.get_from_mongo(req=req, lookup=query)
예제 #29
0
    def enhance_with_archive_items(self, items):
        if items:
            ids = list(set([item.get('item_id') for item in items if item.get('item_id')]))
            archive_items = []
            archive_lookup = {}
            if ids:
                query = {'$and': [{config.ID_FIELD: {'$in': ids}}]}
                archive_req = ParsedRequest()
                archive_req.max_results = len(ids)
                # can't access published from elastic due filter on the archive resource hence going to mongo
                archive_items = list(superdesk.get_resource_service(ARCHIVE)
                                     .get_from_mongo(req=archive_req, lookup=query))

                takes_service = TakesPackageService()
                takes_service.enhance_items_with_takes_packages(archive_items)
                for item in archive_items:
                    handle_existing_data(item)
                    archive_lookup[item[config.ID_FIELD]] = item

            for item in items:
                archive_item = archive_lookup.get(item.get('item_id'), {config.VERSION: item.get(config.VERSION, 1)})

                updates = {
                    config.ID_FIELD: item.get('item_id'),
                    'item_id': item.get(config.ID_FIELD),
                    'lock_user': archive_item.get('lock_user', None),
                    'lock_time': archive_item.get('lock_time', None),
                    'lock_action': archive_item.get('lock_action', None),
                    'lock_session': archive_item.get('lock_session', None),
                    'archive_item': archive_item if archive_item else None
                }

                item.update(updates)
                handle_existing_data(item)
예제 #30
0
    def on_create(self, docs):
        """
        Overriding this to set desk_order and expiry settings. Also, if this stage is defined as either working or
        incoming stage or both then removes the old incoming and working stages.
        """

        for doc in docs:
            desk = doc.get('desk')

            if not desk:
                doc['desk_order'] = 1
                continue

            req = ParsedRequest()
            req.sort = '-desk_order'
            req.max_results = 1
            prev_stage = self.get(req=req, lookup={'desk': doc['desk']})

            if doc.get('content_expiry') == 0:
                doc['content_expiry'] = None

            if prev_stage.count() == 0:
                doc['desk_order'] = 1
            else:
                doc['desk_order'] = prev_stage[0].get('desk_order', 1) + 1

            # if this new one is default then remove the old default
            if doc.get('working_stage', False):
                self.remove_old_default(desk, 'working_stage')

            if doc.get('default_incoming', False):
                self.remove_old_default(desk, 'default_incoming')
    def forwards(self, mongodb_collection, mongodb_database):
        archive_service = get_resource_service('archive')
        published_service = get_resource_service(self.resource)
        templates_service = get_resource_service('content_templates')

        template = templates_service.find_one(req=None,
                                              template_name='article')
        if not template:
            return

        req = ParsedRequest()
        req.max_results = 50
        for page in range(1, 200):
            req.page = page
            items = list(published_service.get(req=req, lookup=None))
            if not items:
                break
            for item in items:
                if ObjectId(item.get('template')) != template.get('_id'):
                    extra = item.get('extra')
                    if extra is not None:
                        extra.pop('compliantlifetime', None)
                        published_service.system_update(
                            ObjectId(item['_id']), {'extra': extra}, item)

                        archive_item = archive_service.find_one(
                            req=None, _id=item.get('item_id'))
                        if archive_item:
                            archive_service.system_update(
                                archive_item.get('_id'), {'extra': extra},
                                archive_item)
예제 #32
0
    def get_expired_items(self, expiry_datetime, invalid_only=False):
        """Get the expired items.

        Where content state is not scheduled and the item matches given parameters

        :param datetime expiry_datetime: expiry datetime
        :param bool invalid_only: True only invalid items
        :return pymongo.cursor: expired non published items.
        """
        query = {
            '$and': [
                {'expiry': {'$lte': date_to_str(expiry_datetime)}},
                {'$or': [
                    {'task.desk': {'$ne': None}},
                    {ITEM_STATE: CONTENT_STATE.SPIKED, 'task.desk': None}
                ]}
            ]
        }

        if invalid_only:
            query['$and'].append({'expiry_status': 'invalid'})
        else:
            query['$and'].append({'expiry_status': {'$ne': 'invalid'}})

        req = ParsedRequest()
        req.max_results = config.MAX_EXPIRY_QUERY_LIMIT
        req.sort = 'expiry,_created'
        return self.get_from_mongo(req=req, lookup=query)
예제 #33
0
    def _filter_items(self, items):
        """
        Remove events which are exist in the db.

        :param items: dict with events, ntbId used as a key
        :type items: dict
        :return: a list of events
        """

        req = ParsedRequest()
        req.projection = json.dumps({'ntb_id': 1, 'guid': 1, ITEM_STATE: 1})
        req.max_results = len(items)

        existing_items = superdesk.get_resource_service('events').get_from_mongo(
            req,
            {
                'ntb_id': {
                    '$in': [ntb_id for ntb_id in items.keys()]
                }
            }
        )
        for existing_item in existing_items:
            if existing_item.get(ITEM_STATE) == WORKFLOW_STATE.INGESTED:
                # update event
                items[existing_item['ntb_id']][GUID_FIELD] = existing_item[GUID_FIELD]
            else:
                # remove event when it has a state different from 'ingested'
                del items[existing_item['ntb_id']]

        return [items[i] for i in items.keys()]
예제 #34
0
    def _get_changed_items(self, existing_items, package):
        """Returns the added and removed items from existing_items

        :param existing_items: Existing list
        :param updates: Changes
        :return: list of removed items and list of added items
        """
        published_service = get_resource_service('published')
        req = ParsedRequest()
        query = {'query': {'filtered': {'filter': {'and': [{'terms': {QUEUE_STATE: [
            PUBLISH_STATE.QUEUED, PUBLISH_STATE.QUEUED_NOT_TRANSMITTED]}},
            {'term': {'item_id': package['item_id']}}]}}}, 'sort': [{'publish_sequence_no': 'desc'}]}
        req.args = {'source': json.dumps(query)}
        req.max_results = 1
        previously_published_packages = published_service.get(req=req, lookup=None)

        if not previously_published_packages.count():
            return [], []

        previously_published_package = previously_published_packages[0]

        if 'groups' in previously_published_package:
            old_items = self.package_service.get_residrefs(previously_published_package)
            added_items = list(set(existing_items) - set(old_items))
            removed_items = list(set(old_items) - set(existing_items))
            return removed_items, added_items
        else:
            return [], []
    def _get_events_and_planning(self, request, query, search_filter):
        """Get list of event and planning based on the search criteria

        :param request: object representing the HTTP request
        """
        # params = request.args or MultiDict()
        # query = construct_combined_search_query(params)
        page = request.page or 1
        max_results = self._get_page_size(request, search_filter)
        req = ParsedRequest()
        req.args = MultiDict()
        req.args['source'] = json.dumps({
            'query':
            query['query'],
            'sort':
            query['sort'] if query.get('sort') else self._get_sort(),
            'size':
            int((5 * max_results) * math.ceil(page / 3)),
        })
        req.args['projections'] = json.dumps(['_id', 'type', 'event_item'])
        req.page = page
        req.max_results = max_results
        req.exec_on_fetched_resource = False  # don't call on_fetched_resource
        return get_resource_service('planning_search').get(req=req,
                                                           lookup=None)
예제 #36
0
 def purge_old_entries(self):
     """
     Purge entries older than the expiry
     :return:
     """
     service = superdesk.get_resource_service("audit")
     logger.info("Starting to purge audit logs at {}".format(utcnow()))
     for _ in range(100):  # make sure we don't get stuck
         lookup = {
             "$and": [{
                 "_id": {
                     "$lt": ObjectId.from_datetime(self.expiry)
                 }
             }]
         }
         req = ParsedRequest()
         req.sort = '[("_id", 1)]'
         req.projection = '{"_id": 1}'
         req.max_results = 1000
         audits = service.get_from_mongo(req=req, lookup=lookup)
         items = list(item.get("_id") for item in audits)
         if len(items) == 0:
             logger.info("Finished purging audit logs at {}".format(
                 utcnow()))
             return
         logger.info("Found {} audit items at {}".format(
             len(items), utcnow()))
         service.delete_ids_from_mongo(items)
     logger.warning("Audit purge didn't finish in 100 iterations.")
예제 #37
0
    def get_packages(self, doc_id, not_package_id=None):
        """
        Retrieves package(s) if an article identified by doc_id is referenced in a package.

        :param str doc_id: identifier of the item in the package
        :param str not_package_id: not package id
        :return: articles of type composite
        """

        query = {
            '$and': [{
                ITEM_TYPE: CONTENT_TYPE.COMPOSITE
            }, {
                'groups.refs.residRef': doc_id
            }]
        }

        if not_package_id:
            query['$and'].append({config.ID_FIELD: {'$ne': not_package_id}})

        request = ParsedRequest()
        request.max_results = 100

        return get_resource_service(ARCHIVE).get_from_mongo(req=request,
                                                            lookup=query)
    def get_items(self, now):
        """Get the items from the archive collection that have expiry in future
        and state is published, corrected, killed

        :param datetime now: current date time
        :return list: list of expired items
        """
        logger.info('Fetching expired items from archive collection.')
        now = now + timedelta(minutes=self.expiry_minutes)

        query = {
            'expiry': {
                '$gte': date_to_str(now)
            },
            ITEM_STATE: {
                '$in': [
                    CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED,
                    CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED
                ]
            }
        }

        req = ParsedRequest()
        req.sort = '[("unique_id", 1)]'
        req.where = json.dumps(query)
        cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req,
                                                              lookup=None)
        count = cursor.count()
        no_of_pages = 0
        if count:
            no_of_pages = len(range(0, count, self.default_page_size))
            unique_id = cursor[0]['unique_id']
            logger.info('Number of items to modify: {}, pages={}'.format(
                count, no_of_pages))
        else:
            logger.info('No items to modify.')

        for page in range(0, no_of_pages):
            logger.info(
                'Fetching items for page number: {} unique_id: {}'.format(
                    (page + 1), unique_id))
            req = ParsedRequest()
            req.sort = '[("unique_id", 1)]'
            if page == 0:
                query['unique_id'] = {'$gte': unique_id}
            else:
                query['unique_id'] = {'$gt': unique_id}

            req.where = json.dumps(query)
            req.max_results = self.default_page_size
            cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req,
                                                                  lookup=None)
            items = list(cursor)
            if len(items) > 0:
                unique_id = items[len(items) - 1]['unique_id']

            logger.info('Fetched No. of Items: {} for page: {}'.format(
                len(items), (page + 1)))
            yield items
예제 #39
0
def get_queue_items(retries=False, subscriber_id=None, priority=None):
    lookup = _get_queue_lookup(retries, priority)
    if subscriber_id:
        lookup['$and'].append({'subscriber_id': subscriber_id})
    request = ParsedRequest()
    request.max_results = app.config.get('MAX_TRANSMIT_QUERY_LIMIT', 100)  # limit per subscriber now
    request.sort = '[("_created", 1), ("published_seq_num", 1)]'
    return get_resource_service(PUBLISH_QUEUE).get(req=request, lookup=lookup)
예제 #40
0
def index():
    items_service = superdesk.get_resource_service("rss_items")
    req = ParsedRequest()
    req.args = request.args
    req.max_results = 200
    items = list(items_service.get(req, {}))
    content = generate_feed(items)
    return flask.Response(content, mimetype="application/rss+xml")
예제 #41
0
def test_pagination(data_layer):
    resource = 'instruments'
    sub_resource_lookup = None
    req = ParsedRequest()
    req.max_results = 1
    req.page = 2
    results = data_layer.find(resource, req, sub_resource_lookup)
    assert len(results) == 1
예제 #42
0
def get_published_items():
    """
    Returns a list of items marked for publishing.
    """
    query = {QUEUE_STATE: PUBLISH_STATE.PENDING}
    request = ParsedRequest()
    request.sort = 'publish_sequence_no'
    request.max_results = 100
    return list(get_resource_service(PUBLISHED).get_from_mongo(req=request, lookup=query))
예제 #43
0
    def get_expired_items(self, page_size):
        """Get expired item that are not moved to legal

        :return:
        """
        query = {
            "query": {
                "filtered": {
                    "filter": {
                        "and": [
                            {"range": {"expiry": {"lt": "now"}}},
                            {"term": {"moved_to_legal": False}},
                            {"not": {"term": {"state": CONTENT_STATE.SCHEDULED}}},
                        ]
                    }
                }
            }
        }

        service = get_resource_service("published")
        req = ParsedRequest()
        req.args = {"source": json.dumps(query)}
        req.sort = '[("publish_sequence_no", 1)]'
        cursor = service.get(req=req, lookup=None)
        count = cursor.count()
        no_of_pages = 0
        if count:
            no_of_pages = len(range(0, count, page_size))
            sequence_no = cursor[0]["publish_sequence_no"]
        logger.info("Number of items to move to legal archive: {}, pages={}".format(count, no_of_pages))

        for page in range(0, no_of_pages):
            logger.info(
                "Fetching published items " "for page number: {} sequence no: {}".format((page + 1), sequence_no)
            )
            req = ParsedRequest()
            page_query = deepcopy(query)
            sequence_filter = {"range": {"publish_sequence_no": {"gte": sequence_no}}}
            if page == 0:
                sequence_filter = {"range": {"publish_sequence_no": {"gte": sequence_no}}}
            else:
                sequence_filter = {"range": {"publish_sequence_no": {"gt": sequence_no}}}

            page_query["query"]["filtered"]["filter"]["and"].append(sequence_filter)

            req.args = {"source": json.dumps(page_query)}
            req.sort = '[("publish_sequence_no", 1)]'
            req.max_results = page_size
            cursor = service.get(req=req, lookup=None)
            items = list(cursor)
            if len(items):
                sequence_no = items[len(items) - 1]["publish_sequence_no"]

            logger.info(
                "Fetched No. of Items: {} for page: {} " "For import into legal archive.".format(len(items), (page + 1))
            )
            yield items
예제 #44
0
    def get_expired_items(self, page_size):
        """Get expired item that are not moved to legal

        :return:
        """
        query = {
            'query': {
                'filtered': {
                    'filter': {
                        'and': [
                            {'range': {'expiry': {'lt': 'now'}}},
                            {'term': {'moved_to_legal': False}},
                            {'not': {'term': {'state': CONTENT_STATE.SCHEDULED}}}
                        ]
                    }
                }
            }
        }

        service = get_resource_service('published')
        req = ParsedRequest()
        req.args = {'source': json.dumps(query)}
        req.sort = '[("publish_sequence_no", 1)]'
        cursor = service.get(req=req, lookup=None)
        count = cursor.count()
        no_of_pages = 0
        if count:
            no_of_pages = len(range(0, count, page_size))
            sequence_no = cursor[0]['publish_sequence_no']
        logger.info('Number of items to move to legal archive: {}, pages={}'.format(count, no_of_pages))

        for page in range(0, no_of_pages):
            logger.info('Fetching published items '
                        'for page number: {} sequence no: {}'. format((page + 1), sequence_no))
            req = ParsedRequest()
            page_query = deepcopy(query)
            sequence_filter = {'range': {'publish_sequence_no': {'gte': sequence_no}}}
            if page == 0:
                sequence_filter = {'range': {'publish_sequence_no': {'gte': sequence_no}}}
            else:
                sequence_filter = {'range': {'publish_sequence_no': {'gt': sequence_no}}}

            page_query['query']['filtered']['filter']['and'].append(sequence_filter)

            req.args = {'source': json.dumps(page_query)}
            req.sort = '[("publish_sequence_no", 1)]'
            req.max_results = page_size
            cursor = service.get(req=req, lookup=None)
            items = list(cursor)
            if len(items):
                sequence_no = items[len(items) - 1]['publish_sequence_no']

            logger.info('Fetched No. of Items: {} for page: {} '
                        'For import into legal archive.'.format(len(items), (page + 1)))
            yield items
예제 #45
0
 def _get_max_date_from_publish_queue(self):
     """
     Get the max _updated date from legal_publish_queue collection
     :return datetime: _updated time
     """
     legal_publish_queue_service = get_resource_service(LEGAL_PUBLISH_QUEUE_NAME)
     req = ParsedRequest()
     req.sort = '[("%s", -1)]' % config.LAST_UPDATED
     req.max_results = 1
     queue_item = list(legal_publish_queue_service.get(req=req, lookup={}))
     return queue_item[0][config.LAST_UPDATED] if queue_item else None
    def test_retrieve_items_after_index_rebuilt(self):
        with self.app.app_context():
            req = ParsedRequest()
            req.args = {}
            req.max_results = 25

            items = get_resource_service('archive').get(req, {})
            self.assertEquals(10, items.count())

            items = get_resource_service('ingest').get(req, {})
            self.assertEquals(10, items.count())
예제 #47
0
    def get_packages(self, doc_id):
        """
        Retrieves if an article identified by doc_id is referenced in a package.
        :return: articles of type composite
        """

        query = {'$and': [{'type': 'composite'}, {'groups.refs.guid': doc_id}]}

        request = ParsedRequest()
        request.max_results = 100

        return get_resource_service(ARCHIVE).get_from_mongo(req=request, lookup=query)
예제 #48
0
    def find(self, endpoint_name, where, max_results=0):
        """Find items for given endpoint using mongo query in python dict object.

        It handles request creation here so no need to do this in service.

        :param string endpoint_name
        :param dict where
        :param int max_results
        """
        req = ParsedRequest()
        req.where = MongoJSONEncoder().encode(where)
        req.max_results = max_results
        return self.get_from_mongo(endpoint_name, req, None)
예제 #49
0
    def get_items(self, now):
        """Get the items from the archive collection that have expiry in future
        and state is published, corrected, killed

        :param datetime now: current date time
        :return list: list of expired items
        """
        logger.info('Fetching expired items from archive collection.')
        now = now + timedelta(minutes=self.expiry_minutes)

        query = {
            'expiry': {'$gte': date_to_str(now)},
            ITEM_STATE: {'$in': [
                CONTENT_STATE.PUBLISHED,
                CONTENT_STATE.CORRECTED,
                CONTENT_STATE.KILLED,
                CONTENT_STATE.RECALLED
            ]}
        }

        req = ParsedRequest()
        req.sort = '[("unique_id", 1)]'
        req.where = json.dumps(query)
        cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req, lookup=None)
        count = cursor.count()
        no_of_pages = 0
        if count:
            no_of_pages = len(range(0, count, self.default_page_size))
            unique_id = cursor[0]['unique_id']
            logger.info('Number of items to modify: {}, pages={}'.format(count, no_of_pages))
        else:
            logger.info('No items to modify.')

        for page in range(0, no_of_pages):
            logger.info('Fetching items for page number: {} unique_id: {}'. format((page + 1), unique_id))
            req = ParsedRequest()
            req.sort = '[("unique_id", 1)]'
            if page == 0:
                query['unique_id'] = {'$gte': unique_id}
            else:
                query['unique_id'] = {'$gt': unique_id}

            req.where = json.dumps(query)
            req.max_results = self.default_page_size
            cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req, lookup=None)
            items = list(cursor)
            if len(items) > 0:
                unique_id = items[len(items) - 1]['unique_id']

            logger.info('Fetched No. of Items: {} for page: {}'.format(len(items), (page + 1)))
            yield items
예제 #50
0
    def get_packages(self, doc_id):
        """
        Retrieves package(s) if an article identified by doc_id is referenced in a package.

        :param: doc_id identifier of the item in the package
        :return: articles of type composite
        """

        query = {'$and': [{ITEM_TYPE: CONTENT_TYPE.COMPOSITE}, {'groups.refs.guid': doc_id}]}

        request = ParsedRequest()
        request.max_results = 100

        return get_resource_service(ARCHIVE).get_from_mongo(req=request, lookup=query)
예제 #51
0
    def _get_publish_queue_items_to_import(self, max_date):
        """
        Get the queue items to import after max_date
        :param datetime max_date:
        :return : list of publish queue items
        """
        publish_queue_service = get_resource_service("publish_queue")

        lookup = {}
        if max_date:
            lookup["$and"] = [{config.LAST_UPDATED: {"$gte": max_date}}]

        req = ParsedRequest()
        req.max_results = 500
        return publish_queue_service.get(req=req, lookup=lookup)
예제 #52
0
 def on_create(self, docs):
     for doc in docs:
         if not doc.get('desk'):
             doc['desk_order'] = 1
             continue
         req = ParsedRequest()
         req.sort = '-desk_order'
         req.max_results = 1
         prev_stage = self.get(req=req, lookup={'desk': doc['desk']})
         if doc.get('content_expiry', 0) == 0:
             doc['content_expiry'] = app.settings['CONTENT_EXPIRY_MINUTES']
         if prev_stage.count() == 0:
             doc['desk_order'] = 1
         else:
             doc['desk_order'] = prev_stage[0].get('desk_order', 1) + 1
예제 #53
0
def get_queue_items():
    lookup = {
        "$and": [
            {
                "$or": [
                    {"state": QueueState.PENDING.value},
                    {"state": QueueState.RETRYING.value, "next_retry_attempt_at": {"$lte": utcnow()}},
                ]
            },
            {"destination.delivery_type": {"$ne": "pull"}},
        ]
    }
    request = ParsedRequest()
    request.max_results = app.config.get("MAX_TRANSMIT_QUERY_LIMIT", 500)
    return get_resource_service(PUBLISH_QUEUE).get(req=request, lookup=lookup)
예제 #54
0
def get_queue_items(retries=False):
    if retries:
        lookup = {
            "$and": [
                {"state": QueueState.RETRYING.value},
                {"next_retry_attempt_at": {"$lte": utcnow()}},
                {"destination.delivery_type": {"$ne": "pull"}},
            ]
        }
    else:
        lookup = {"$and": [{"state": QueueState.PENDING.value}, {"destination.delivery_type": {"$ne": "pull"}}]}
    request = ParsedRequest()
    request.max_results = app.config.get("MAX_TRANSMIT_QUERY_LIMIT", 500)
    # ensure we publish in the correct sequence
    request.sort = '[("_created", 1), ("subscriber_id", 1), ("published_seq_num", 1)]'
    return get_resource_service(PUBLISH_QUEUE).get(req=request, lookup=lookup)
예제 #55
0
    def get_expired_items(self, expiry_datetime=None, expiry_days=None, max_results=None, include_children=True):
        """Get the expired items.

        Returns a generator for the list of expired items, sorting by `_id` and returning `max_results` per iteration.

        :param datetime expiry_datetime: Expiry date/time used to retrieve the list of items, defaults to `utcnow()`
        :param int expiry_days: Number of days content expires, defaults to `CONTENT_API_EXPIRY_DAYS`
        :param int max_results: Maximum results to retrieve per iteration, defaults to `MAX_EXPIRY_QUERY_LIMIT`
        :param boolean include_children: Include only root item if False, otherwise include the entire item chain
        :return list: expired content_api items
        """

        if expiry_datetime is None:
            expiry_datetime = utcnow()

        if expiry_days is None:
            expiry_days = app.settings['CONTENT_API_EXPIRY_DAYS']

        if max_results is None:
            max_results = app.settings['MAX_EXPIRY_QUERY_LIMIT']

        last_id = None
        expire_at = date_to_str(expiry_datetime - timedelta(days=expiry_days))

        while True:
            query = {'$and': [{'_updated': {'$lte': expire_at}}]}

            if last_id is not None:
                query['$and'].append({'_id': {'$gt': last_id}})

            if not include_children:
                query['$and'].append({'ancestors': {'$exists': False}})

            req = ParsedRequest()
            req.sort = '_id'
            req.where = json.dumps(query)
            req.max_results = max_results

            items = list(self.get_from_mongo(req=req, lookup=None))

            if not items:
                break

            last_id = items[-1]['_id']
            yield items
예제 #56
0
def get_published_items():
    """
    Get all items with queue state: "pending" that are not scheduled or scheduled time has lapsed.
    """
    query = {
        QUEUE_STATE: PUBLISH_STATE.PENDING,
        "$or": [
            {ITEM_STATE: {"$ne": CONTENT_STATE.SCHEDULED}},
            {
                ITEM_STATE: CONTENT_STATE.SCHEDULED,
                "{}.utc_{}".format(SCHEDULE_SETTINGS, PUBLISH_SCHEDULE): {"$lte": utcnow()},
            },
        ],
    }
    request = ParsedRequest()
    request.sort = "publish_sequence_no"
    request.max_results = 200
    return list(get_resource_service(PUBLISHED).get_from_mongo(req=request, lookup=query))
예제 #57
0
    def get_packages(self, doc_id, not_package_id=None):
        """
        Retrieves package(s) if an article identified by doc_id is referenced in a package.

        :param str doc_id: identifier of the item in the package
        :param str not_package_id: not package id
        :return: articles of type composite
        """

        query = {'$and': [{ITEM_TYPE: CONTENT_TYPE.COMPOSITE}, {'groups.refs.residRef': doc_id}]}

        if not_package_id:
            query['$and'].append({config.ID_FIELD: {'$ne': not_package_id}})

        request = ParsedRequest()
        request.max_results = 100

        return get_resource_service(ARCHIVE).get_from_mongo(req=request, lookup=query)
예제 #58
0
 def on_create(self, docs):
     for doc in docs:
         if not doc.get('desk'):
             doc['desk_order'] = 1
             continue
         req = ParsedRequest()
         req.sort = '-desk_order'
         req.max_results = 1
         prev_stage = self.get(req=req, lookup={'desk': doc['desk']})
         if doc.get('content_expiry', 0) == 0:
             doc['content_expiry'] = app.settings['CONTENT_EXPIRY_MINUTES']
         if prev_stage.count() == 0:
             doc['desk_order'] = 1
         else:
             doc['desk_order'] = prev_stage[0].get('desk_order', 1) + 1
         # if this new one is default need to remove the old default
         if doc.get('default_incoming', False):
             self.remove_old_default(doc.get('desk'), 'default_incoming')
    def test_compare_repos(self):
        with self.app.app_context():
            req = ParsedRequest()
            req.args = {}
            req.max_results = 25

            items = get_resource_service('archive').get(req, {})
            self.assertEquals(99, items.count())

            consistency_record = CompareRepositories().run('archive',
                                                           self.app.config['ELASTICSEARCH_URL'],
                                                           self.app.config['ELASTICSEARCH_INDEX'])
            self.assertEquals(consistency_record['mongo'], 99)
            self.assertEquals(consistency_record['elastic'], 198)
            self.assertEquals(consistency_record['identical'], 99)
            self.assertEquals(consistency_record['mongo_only'], 0)
            self.assertEquals(consistency_record['elastic_only'], 99)
            self.assertEquals(consistency_record['inconsistent'], 0)
예제 #60
0
    def get_publish_queue_items(self, page_size, expired_items=[]):
        """Get publish queue items that are not moved to legal

        :param int page_size: batch size
        :param list expired_items:
        :return list: publish queue items
        """
        query = {"moved_to_legal": False}

        if expired_items:
            query["item_id"] = {"$in": expired_items}
        else:
            query["state"] = {"$in": [QueueState.SUCCESS.value, QueueState.CANCELED.value, QueueState.FAILED.value]}

        service = get_resource_service("publish_queue")
        req = ParsedRequest()
        req.sort = '[("_id", 1)]'
        req.where = json.dumps(query)
        cursor = service.get(req=req, lookup=None)
        count = cursor.count()
        no_of_pages = 0
        if count:
            no_of_pages = len(range(0, count, page_size))
            queue_id = cursor[0][config.ID_FIELD]
        logger.info("Number of items to move to legal archive publish queue: {}, pages={}".format(count, no_of_pages))

        for page in range(0, no_of_pages):
            logger.info(
                "Fetching publish queue items " "for page number: {}. queue_id: {}".format((page + 1), queue_id)
            )
            req = ParsedRequest()
            req.sort = '[("_id", 1)]'
            query["_id"] = {"$gte": str(queue_id)}
            req.where = json.dumps(query)
            req.max_results = page_size
            cursor = service.get(req=req, lookup=None)
            items = list(cursor)
            if len(items) > 0:
                queue_id = items[len(items) - 1][config.ID_FIELD]
            logger.info(
                "Fetched No. of Items: {} for page: {} "
                "For import in to legal archive publish_queue.".format(len(items), (page + 1))
            )
            yield items