def test_query_sort_by_name_case_insensetive(self):
        service = get_resource_service("concept_items")
        names = [
            "A Message to Garcia",
            "and then there were none",
            "Bootstrap: Responsive Web Development",
            "Hobbit",
            "Lord of the rings",
            "the Elegance of the Hedgehog",
            "The Little Prince",
            "Гайдамаки",
        ]

        req = ParsedRequest()
        req.sort = "name"
        req.args = ImmutableMultiDict([("collation",
                                        '{"locale": "en", "strength":"1"}')])
        cursor = service.get_from_mongo(req=req, lookup={})
        self.assertEqual([i["name"] for i in cursor], names)

        req = ParsedRequest()
        req.sort = "-name"
        req.args = ImmutableMultiDict([("collation",
                                        '{"locale": "en", "strength":"1"}')])
        names.reverse()
        cursor = service.get_from_mongo(req=req, lookup={})
        self.assertEqual([i["name"] for i in cursor], names)
    def test_service_use_definition_text_instead_of_definition_html(self):
        service = get_resource_service("concept_items")
        definitions = [
            "A Message to Garcia is a widely distributed essay written by Elbert Hubbard in 1899, "
            "expressing the value of individual initiative and conscientiousness in work.",
            "and then there were none dame Agatha Mary Clarissa Christie, Lady Mallowan, DBE "
            "(née Miller; 15 September 1890 – 12 January 1976) was an English writer.",
            "Bootstrap: Responsive Web Development. Discover how easy it is to design killer "
            "interfaces and responsive websites with the Bootstrap framework. ",
            "Hobbit is a children's fantasy novel by English author J. R. R. Tolkien.",
            "Lord of the rings is a children's fantasy novel by English author J. R. R. "
            "Tolkien.",
            "the Elegance of the Hedgehog is a novel about parallels and the concealment of one’s "
            "true passions in life.",
            "The Little Prince is a novella, the most famous work of French aristocrat, writer, "
            "poet, and pioneering aviator Antoine de Saint-Exupéry.",
            "Гайдамаки — історико-героїчна поема Шевченка, перший український історичний роман у "
            "віршах.",
        ]

        req = ParsedRequest()
        req.sort = "definition_html"
        cursor = service.get(req=req, lookup={})
        self.assertEqual([i["definition_text"] for i in cursor], definitions)

        req = ParsedRequest()
        req.sort = "-definition_html"
        definitions.reverse()
        cursor = service.get(req=req, lookup={})
        self.assertEqual([i["definition_text"] for i in cursor], definitions)
    def get_items(self, now):
        """Get the items from the archive collection that have expiry in future
        and state is published, corrected, killed

        :param datetime now: current date time
        :return list: list of expired items
        """
        logger.info('Fetching expired items from archive collection.')
        now = now + timedelta(minutes=self.expiry_minutes)

        query = {
            'expiry': {
                '$gte': date_to_str(now)
            },
            ITEM_STATE: {
                '$in': [
                    CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED,
                    CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED
                ]
            }
        }

        req = ParsedRequest()
        req.sort = '[("unique_id", 1)]'
        req.where = json.dumps(query)
        cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req,
                                                              lookup=None)
        count = cursor.count()
        no_of_pages = 0
        if count:
            no_of_pages = len(range(0, count, self.default_page_size))
            unique_id = cursor[0]['unique_id']
            logger.info('Number of items to modify: {}, pages={}'.format(
                count, no_of_pages))
        else:
            logger.info('No items to modify.')

        for page in range(0, no_of_pages):
            logger.info(
                'Fetching items for page number: {} unique_id: {}'.format(
                    (page + 1), unique_id))
            req = ParsedRequest()
            req.sort = '[("unique_id", 1)]'
            if page == 0:
                query['unique_id'] = {'$gte': unique_id}
            else:
                query['unique_id'] = {'$gt': unique_id}

            req.where = json.dumps(query)
            req.max_results = self.default_page_size
            cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req,
                                                                  lookup=None)
            items = list(cursor)
            if len(items) > 0:
                unique_id = items[len(items) - 1]['unique_id']

            logger.info('Fetched No. of Items: {} for page: {}'.format(
                len(items), (page + 1)))
            yield items
Exemple #4
0
    def get_expired_items(self, page_size):
        """Get expired item that are not moved to legal

        :return:
        """
        query = {
            "query": {
                "filtered": {
                    "filter": {
                        "and": [
                            {"range": {"expiry": {"lt": "now"}}},
                            {"term": {"moved_to_legal": False}},
                            {"not": {"term": {"state": CONTENT_STATE.SCHEDULED}}},
                        ]
                    }
                }
            }
        }

        service = get_resource_service("published")
        req = ParsedRequest()
        req.args = {"source": json.dumps(query)}
        req.sort = '[("publish_sequence_no", 1)]'
        cursor = service.get(req=req, lookup=None)
        count = cursor.count()
        no_of_pages = 0
        if count:
            no_of_pages = len(range(0, count, page_size))
            sequence_no = cursor[0]["publish_sequence_no"]
        logger.info("Number of items to move to legal archive: {}, pages={}".format(count, no_of_pages))

        for page in range(0, no_of_pages):
            logger.info(
                "Fetching published items " "for page number: {} sequence no: {}".format((page + 1), sequence_no)
            )
            req = ParsedRequest()
            page_query = deepcopy(query)
            sequence_filter = {"range": {"publish_sequence_no": {"gte": sequence_no}}}
            if page == 0:
                sequence_filter = {"range": {"publish_sequence_no": {"gte": sequence_no}}}
            else:
                sequence_filter = {"range": {"publish_sequence_no": {"gt": sequence_no}}}

            page_query["query"]["filtered"]["filter"]["and"].append(sequence_filter)

            req.args = {"source": json.dumps(page_query)}
            req.sort = '[("publish_sequence_no", 1)]'
            req.max_results = page_size
            cursor = service.get(req=req, lookup=None)
            items = list(cursor)
            if len(items):
                sequence_no = items[len(items) - 1]["publish_sequence_no"]

            logger.info(
                "Fetched No. of Items: {} for page: {} " "For import into legal archive.".format(len(items), (page + 1))
            )
            yield items
Exemple #5
0
    def get_expired_items(self, page_size):
        """Get expired item that are not moved to legal

        :return:
        """
        query = {
            'query': {
                'filtered': {
                    'filter': {
                        'and': [
                            {'range': {'expiry': {'lt': 'now'}}},
                            {'term': {'moved_to_legal': False}},
                            {'not': {'term': {'state': CONTENT_STATE.SCHEDULED}}}
                        ]
                    }
                }
            }
        }

        service = get_resource_service('published')
        req = ParsedRequest()
        req.args = {'source': json.dumps(query)}
        req.sort = '[("publish_sequence_no", 1)]'
        cursor = service.get(req=req, lookup=None)
        count = cursor.count()
        no_of_pages = 0
        if count:
            no_of_pages = len(range(0, count, page_size))
            sequence_no = cursor[0]['publish_sequence_no']
        logger.info('Number of items to move to legal archive: {}, pages={}'.format(count, no_of_pages))

        for page in range(0, no_of_pages):
            logger.info('Fetching published items '
                        'for page number: {} sequence no: {}'. format((page + 1), sequence_no))
            req = ParsedRequest()
            page_query = deepcopy(query)
            sequence_filter = {'range': {'publish_sequence_no': {'gte': sequence_no}}}
            if page == 0:
                sequence_filter = {'range': {'publish_sequence_no': {'gte': sequence_no}}}
            else:
                sequence_filter = {'range': {'publish_sequence_no': {'gt': sequence_no}}}

            page_query['query']['filtered']['filter']['and'].append(sequence_filter)

            req.args = {'source': json.dumps(page_query)}
            req.sort = '[("publish_sequence_no", 1)]'
            req.max_results = page_size
            cursor = service.get(req=req, lookup=None)
            items = list(cursor)
            if len(items):
                sequence_no = items[len(items) - 1]['publish_sequence_no']

            logger.info('Fetched No. of Items: {} for page: {} '
                        'For import into legal archive.'.format(len(items), (page + 1)))
            yield items
    def get_expired_items(self, page_size):
        """Get expired item that are not moved to legal

        :return:
        """
        query = {
            'query': {
                'filtered': {
                    'filter': {
                        'and': [
                            {'range': {'expiry': {'lt': 'now'}}},
                            {'term': {'moved_to_legal': False}},
                            {'not': {'term': {'state': CONTENT_STATE.SCHEDULED}}}
                        ]
                    }
                }
            }
        }

        service = get_resource_service('published')
        req = ParsedRequest()
        req.args = {'source': json.dumps(query)}
        req.sort = '[("publish_sequence_no", 1)]'
        cursor = service.get(req=req, lookup=None)
        count = cursor.count()
        no_of_pages = 0
        if count:
            no_of_pages = len(range(0, count, page_size))
            sequence_no = cursor[0]['publish_sequence_no']
        logger.info('Number of items to move to legal archive: {}, pages={}'.format(count, no_of_pages))

        for page in range(0, no_of_pages):
            logger.info('Fetching published items '
                        'for page number: {} sequence no: {}'. format((page + 1), sequence_no))
            req = ParsedRequest()
            page_query = deepcopy(query)
            sequence_filter = {'range': {'publish_sequence_no': {'gte': sequence_no}}}
            if page == 0:
                sequence_filter = {'range': {'publish_sequence_no': {'gte': sequence_no}}}
            else:
                sequence_filter = {'range': {'publish_sequence_no': {'gt': sequence_no}}}

            page_query['query']['filtered']['filter']['and'].append(sequence_filter)

            req.args = {'source': json.dumps(page_query)}
            req.sort = '[("publish_sequence_no", 1)]'
            req.max_results = page_size
            cursor = service.get(req=req, lookup=None)
            items = list(cursor)
            if len(items):
                sequence_no = items[len(items) - 1]['publish_sequence_no']

            logger.info('Fetched No. of Items: {} for page: {} '
                        'For import into legal archive.'.format(len(items), (page + 1)))
            yield items
Exemple #7
0
    def get_publish_queue_items(self, page_size, expired_items=None):
        """Get publish queue items that are not moved to legal

        :param int page_size: batch size
        :param list expired_items:
        :return list: publish queue items
        """
        if expired_items is None:
            expired_items = []

        query = {"moved_to_legal": False}

        if expired_items:
            query["item_id"] = {"$in": expired_items}
        else:
            query["state"] = {
                "$in": [
                    QueueState.SUCCESS.value, QueueState.CANCELED.value,
                    QueueState.FAILED.value
                ]
            }

        service = get_resource_service("publish_queue")
        req = ParsedRequest()
        req.sort = '[("_id", 1)]'
        req.where = json.dumps(query)
        cursor = service.get(req=req, lookup=None)
        count = cursor.count()
        no_of_pages = 0
        if count:
            no_of_pages = len(range(0, count, page_size))
            queue_id = cursor[0][config.ID_FIELD]
        logger.info(
            "Number of items to move to legal archive publish queue: {}, pages={}"
            .format(count, no_of_pages))

        for page in range(0, no_of_pages):
            logger.info("Fetching publish queue items "
                        "for page number: {}. queue_id: {}".format((page + 1),
                                                                   queue_id))
            req = ParsedRequest()
            req.sort = '[("_id", 1)]'
            query["_id"] = {"$gte": str(queue_id)}
            req.where = json.dumps(query)
            req.max_results = page_size
            cursor = service.get(req=req, lookup=None)
            items = list(cursor)
            if len(items) > 0:
                queue_id = items[len(items) - 1][config.ID_FIELD]
            logger.info("Fetched No. of Items: {} for page: {} "
                        "For import in to legal archive publish_queue.".format(
                            len(items), (page + 1)))
            yield items
    def get_items(self, now):
        """Get the items from the archive collection that have expiry in future
        and state is published, corrected, killed

        :param datetime now: current date time
        :return list: list of expired items
        """
        logger.info('Fetching expired items from archive collection.')
        now = now + timedelta(minutes=self.expiry_minutes)

        query = {
            'expiry': {'$gte': date_to_str(now)},
            ITEM_STATE: {'$in': [
                CONTENT_STATE.PUBLISHED,
                CONTENT_STATE.CORRECTED,
                CONTENT_STATE.KILLED,
                CONTENT_STATE.RECALLED
            ]}
        }

        req = ParsedRequest()
        req.sort = '[("unique_id", 1)]'
        req.where = json.dumps(query)
        cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req, lookup=None)
        count = cursor.count()
        no_of_pages = 0
        if count:
            no_of_pages = len(range(0, count, self.default_page_size))
            unique_id = cursor[0]['unique_id']
            logger.info('Number of items to modify: {}, pages={}'.format(count, no_of_pages))
        else:
            logger.info('No items to modify.')

        for page in range(0, no_of_pages):
            logger.info('Fetching items for page number: {} unique_id: {}'. format((page + 1), unique_id))
            req = ParsedRequest()
            req.sort = '[("unique_id", 1)]'
            if page == 0:
                query['unique_id'] = {'$gte': unique_id}
            else:
                query['unique_id'] = {'$gt': unique_id}

            req.where = json.dumps(query)
            req.max_results = self.default_page_size
            cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req, lookup=None)
            items = list(cursor)
            if len(items) > 0:
                unique_id = items[len(items) - 1]['unique_id']

            logger.info('Fetched No. of Items: {} for page: {}'.format(len(items), (page + 1)))
            yield items
Exemple #9
0
 def get_expired_items(self, now):
     logger.info('Get expired content from published')
     query_filter = self.get_query_for_expired_items(now)
     req = ParsedRequest()
     req.sort = '_created'
     req.max_results = 100
     return superdesk.get_resource_service('published').get_from_mongo(req=req, lookup=query_filter)
Exemple #10
0
 def get_next_order_sequence(self, blog_id):
     if blog_id is None:
         return 0
     # get next order sequence and increment it
     blog = get_resource_service('blogs').find_and_modify(
         query={'_id': blog_id},
         update={'$inc': {'posts_order_sequence': 1}},
         upsert=False)
     if blog:
         order = blog and blog.get('posts_order_sequence') or None
         # support previous LB version when the sequence was not save into the blog
         if order is None:
             # find the highest order in the blog
             req = ParsedRequest()
             req.sort = '-order'
             req.max_results = 1
             post = next(self.get_from_mongo(req=req, lookup={'blog': blog_id}), None)
             if post and post.get('order') is not None:
                 order = post.get('order') + 1
                 # save the order into the blog
                 get_resource_service('blogs').update(blog_id, {'posts_order_sequence': order + 1}, blog)
             else:
                 order = 0
     else:
         order = 0
     return order
Exemple #11
0
    def purge_orphaned_item_audits(self):
        """
        Purge the audit items that do not have associated entries existing in archive
        :return:
        """
        service = superdesk.get_resource_service('audit')
        current_id = None
        logger.info('Starting to purge audit logs of content items not in archive at {}'.format(utcnow()))

        # Scan the audit collection for items to delete
        while True:
            query = deepcopy(self.item_entry_query)
            query['$and'].append({'_updated': {'$lte': date_to_str(self.expiry)}})
            if current_id:
                query['$and'].append({'_id': {'$gt': current_id}})
            req = ParsedRequest()
            req.sort = '[("_id", 1)]'
            req.projection = '{"_id": 1, "audit_id":1}'
            req.max_results = 1000
            audits = service.get_from_mongo(req=req, lookup=query)
            items = list([(item['_id'], item['audit_id']) for item in audits])
            if len(items) == 0:
                logger.info('Finished purging audit logs of content items not in archive at {}'.format(utcnow()))
                return
            logger.info('Found {} orphaned audit items at {}'.format(len(items), utcnow()))
            current_id = items[len(items) - 1][0]

            batch_ids = set([i[1] for i in items])
            archive_ids = self._get_archive_ids(batch_ids)
            ids = (batch_ids - archive_ids)
            audit_ids = [i[0] for i in items if i[1] in ids]
            logger.info('Deleting {} orphaned audit items at {}'.format(len(audit_ids), utcnow()))
            service.delete_ids_from_mongo(audit_ids)
Exemple #12
0
    def get_expired_items(self, expiry_datetime, invalid_only=False):
        """Get the expired items.

        Where content state is not scheduled and the item matches given parameters

        :param datetime expiry_datetime: expiry datetime
        :param bool invalid_only: True only invalid items
        :return pymongo.cursor: expired non published items.
        """
        query = {
            '$and': [{
                'expiry': {
                    '$lte': date_to_str(expiry_datetime)
                }
            }, {
                '$or': [{
                    'task.desk': {
                        '$ne': None
                    }
                }, {
                    ITEM_STATE: CONTENT_STATE.SPIKED,
                    'task.desk': None
                }]
            }]
        }

        if invalid_only:
            query['$and'].append({'expiry_status': 'invalid'})
        else:
            query['$and'].append({'expiry_status': {'$ne': 'invalid'}})

        req = ParsedRequest()
        req.max_results = config.MAX_EXPIRY_QUERY_LIMIT
        req.sort = 'expiry,_created'
        return self.get_from_mongo(req=req, lookup=query)
Exemple #13
0
    def purge_old_entries(self):
        """
        Purge entries older than the expiry that are not related to archive items
        :return:
        """
        service = superdesk.get_resource_service('audit')
        current_date = None

        while True:
            lookup = {
                '$and': [
                    self.not_item_entry_query, {
                        '_updated': {
                            '$lte': date_to_str(self.expiry)
                        }
                    }
                ]
            }
            if current_date:
                lookup['$and'].append({'_updated': {'$gte': current_date}})
            req = ParsedRequest()
            req.sort = '[("_updated", 1)]'
            req.projection = '{"_id": 1, "_updated": 1}'
            req.max_results = 1000
            audits = service.get_from_mongo(req=req, lookup=lookup)
            if audits.count() == 0:
                break
            items = list([(item['_id'], item['_updated']) for item in audits])
            current_date = items[len(items) - 1][1]
            service.delete({'_id': {'$in': [i[0] for i in items]}})
Exemple #14
0
    def purge_orphaned_item_audits(self):
        """
        Purge the audit items that do not have associated entries existing in archive
        :return:
        """
        service = superdesk.get_resource_service('audit')
        current_id = None

        # Scan the audit collection for items to delete
        while True:
            query = deepcopy(self.item_entry_query)
            query['$and'].append(
                {'_updated': {
                    '$lte': date_to_str(self.expiry)
                }})
            if current_id:
                query['$and'].append({'_id': {'$gt': current_id}})
            req = ParsedRequest()
            req.sort = '[("_id", 1)]'
            req.projection = '{"_id": 1, "extra.guid": 1, "extra._id": 1, "extra.item_id": 1, "extra.item": 1}'
            req.max_results = 1000
            audits = service.get_from_mongo(req=req, lookup=query)
            if audits.count() == 0:
                break
            items = list([(item['_id'], self._extract_item_id(item))
                          for item in audits])
            current_id = items[len(items) - 1][0]

            batch_ids = set([i[1] for i in items])
            archive_ids = self._get_archive_ids(batch_ids)
            ids = (batch_ids - archive_ids)
            audit_ids = [i[0] for i in items if i[1] in ids]
            service.delete({'_id': {'$in': audit_ids}})
Exemple #15
0
    def get_expired_items(self, expired_date_time, limit=100):
        """
        Fetches the expired articles from published collection. Expiry Conditions:
            1.  can_be_removed flag is True
            2.  Item Expiry is less than or equal to expired_date_time, State of the Item is not SCHEDULED and
                allow_post_publish_actions flag is True

        :param expired_date_time:
        :param limit:
        :return: expired articles from published collection
        """

        logger.info("Get expired content from published")
        query = {
            "$or": [
                {"can_be_removed": True},
                {
                    "$and": [
                        {"expiry": {"$lte": expired_date_time}},
                        {ITEM_STATE: {"$ne": CONTENT_STATE.SCHEDULED}},
                        {"allow_post_publish_actions": True},
                    ]
                },
            ]
        }

        req = ParsedRequest()
        req.sort = "_created"
        req.max_results = limit

        return superdesk.get_resource_service("published").get_from_mongo(req=req, lookup=query)
Exemple #16
0
 def get_next_order_sequence(self, blog_id):
     if blog_id is None:
         return 0
     # get next order sequence and increment it
     blog = get_resource_service('blogs').find_and_modify(
         query={'_id': blog_id},
         update={'$inc': {
             'posts_order_sequence': 1
         }},
         upsert=False)
     if blog:
         order = blog and blog.get('posts_order_sequence') or None
         # support previous LB version when the sequence was not save into the blog
         if order is None:
             # find the highest order in the blog
             req = ParsedRequest()
             req.sort = '-order'
             req.max_results = 1
             post = next(
                 self.get_from_mongo(req=req, lookup={'blog': blog_id}),
                 None)
             if post and post.get('order') is not None:
                 order = post.get('order') + 1
                 # save the order into the blog
                 get_resource_service('blogs').update(
                     blog_id, {'posts_order_sequence': order + 1}, blog)
             else:
                 order = 0
     else:
         order = 0
     return order
Exemple #17
0
    def get_history_items(self, last_id, gte, item_id, chunk_size=0):
        history_service = get_resource_service('archive_history')

        last_processed_id = last_id

        while True:
            req = ParsedRequest()
            req.sort = '[("_id", 1), ("version", 1)]'

            query = {'$and': []}

            if gte:
                query['$and'].append({'_created': {'$gte': date_to_str(gte)}})

            if item_id:
                query['$and'].append({'item_id': str(item_id)})

            if last_processed_id:
                query['$and'].append({'_id': {'$gt': str(last_processed_id)}})

            req.where = json.dumps(query)

            if chunk_size > 0:
                req.max_results = int(chunk_size)

            items = list(history_service.get(req=req, lookup=None))

            if len(items) < 1:
                break

            last_processed_id = items[-1][config.ID_FIELD]
            yield items
Exemple #18
0
    def get_filters(self):
        """Retrieve all of the available filter conditions and content filters if they have not yet been retrieved or
        they have been updated. This avoids the filtering functions having to repeatedly retireve the individual filter
        records.

        :return:
        """

        # Get the most recent update time to the filter conditions and content_filters
        req = ParsedRequest()
        req.sort = '-_updated'
        req.max_results = 1
        mindate = datetime.min.replace(tzinfo=pytz.UTC)
        latest_fc = next(get_resource_service('filter_conditions').get_from_mongo(req=req, lookup=None),
                         {}).get('_updated', mindate)
        latest_cf = next(get_resource_service('content_filters').get_from_mongo(req=req, lookup=None),
                         {}).get('_updated', mindate)

        if not self.filters or \
                latest_fc > self.filters.get('latest_filter_conditions', mindate) or latest_fc == mindate or \
                latest_cf > self.filters.get('latest_content_filters', mindate) or latest_cf == mindate:
            logger.debug('Getting content filters and filter conditions')
            self.filters = dict()
            self.filters['filter_conditions'] = dict()
            self.filters['content_filters'] = dict()
            for fc in get_resource_service('filter_conditions').get(req=None, lookup={}):
                self.filters['filter_conditions'][fc.get('_id')] = {'fc': fc}
                self.filters['latest_filter_conditions'] = fc.get('_updated') if fc.get('_updated') > self.filters.get(
                    'latest_filter_conditions', mindate) else self.filters.get('latest_filter_conditions', mindate)
            for cf in get_resource_service('content_filters').get(req=None, lookup={}):
                self.filters['content_filters'][cf.get('_id')] = {'cf': cf}
                self.filters['latest_content_filters'] = cf.get('_updated') if cf.get('_updated') > self.filters.get(
                    'latest_content_filters', mindate) else self.filters.get('latest_content_filters', mindate)
        else:
            logger.debug('Using chached content filters and filters conditions')
Exemple #19
0
def get_expired_items(expired_date_time, limit=100):
    """
    Fetches the expired articles from published collection. Expiry Conditions:
        1.  can_be_removed flag is True
        2.  Item Expiry is less than or equal to expired_date_time, State of the Item is not SCHEDULED and
            allow_post_publish_actions flag is True

    :param expired_date_time:
    :param limit:
    :return: expired articles from published collection
    """

    logger.info('Get expired content from published')
    query = {
        '$or': [
            {'can_be_removed': True},
            {'$and': [
                {'expiry': {'$lte': expired_date_time}},
                {ITEM_STATE: {'$ne': CONTENT_STATE.SCHEDULED}},
                {'allow_post_publish_actions': True}
            ]}
        ]
    }

    req = ParsedRequest()
    req.sort = '_created'
    req.max_results = limit

    return superdesk.get_resource_service('published').get_from_mongo(req=req, lookup=query)
Exemple #20
0
    def purge_old_entries(self):
        """
        Purge entries older than the expiry that are not related to archive items
        :return:
        """
        service = superdesk.get_resource_service('audit')
        current_id = None
        logger.info('Starting to purge audit logs of none content items at {}'.format(utcnow()))

        while True:
            lookup = {'$and': [self.not_item_entry_query, {'_updated': {'$lte': date_to_str(self.expiry)}}]}
            if current_id:
                lookup['$and'].append({'_id': {'$gt': current_id}})
            req = ParsedRequest()
            req.sort = '[("_id", 1)]'
            req.projection = '{"_id": 1}'
            req.max_results = 1000
            audits = service.get_from_mongo(req=req, lookup=lookup)
            items = list(item.get('_id') for item in audits)
            if len(items) == 0:
                logger.info('Finished purging audit logs of none content items at {}'.format(utcnow()))
                return
            logger.info('Found {} audit items at {}'.format(len(items), utcnow()))
            current_id = items[len(items) - 1]
            logger.info('Deleting {} old audit items'.format(len(items)))
            service.delete_ids_from_mongo(items)
    def get_filters(self):
        """Retrieve all of the available filter conditions and content filters if they have not yet been retrieved or
        they have been updated. This avoids the filtering functions having to repeatedly retireve the individual filter
        records.

        :return:
        """

        # Get the most recent update time to the filter conditions and content_filters
        req = ParsedRequest()
        req.sort = '-_updated'
        req.max_results = 1
        mindate = datetime.min.replace(tzinfo=pytz.UTC)
        latest_fc = next(get_resource_service('filter_conditions').get_from_mongo(req=req, lookup=None),
                         {}).get('_updated', mindate)
        latest_cf = next(get_resource_service('content_filters').get_from_mongo(req=req, lookup=None),
                         {}).get('_updated', mindate)

        if not self.filters or \
                latest_fc > self.filters.get('latest_filter_conditions', mindate) or latest_fc == mindate or \
                latest_cf > self.filters.get('latest_content_filters', mindate) or latest_cf == mindate:
            logger.debug('Getting content filters and filter conditions')
            self.filters = dict()
            self.filters['filter_conditions'] = dict()
            self.filters['content_filters'] = dict()
            for fc in get_resource_service('filter_conditions').get(req=None, lookup={}):
                self.filters['filter_conditions'][fc.get('_id')] = {'fc': fc}
                self.filters['latest_filter_conditions'] = fc.get('_updated') if fc.get('_updated') > self.filters.get(
                    'latest_filter_conditions', mindate) else self.filters.get('latest_filter_conditions', mindate)
            for cf in get_resource_service('content_filters').get(req=None, lookup={}):
                self.filters['content_filters'][cf.get('_id')] = {'cf': cf}
                self.filters['latest_content_filters'] = cf.get('_updated') if cf.get('_updated') > self.filters.get(
                    'latest_content_filters', mindate) else self.filters.get('latest_content_filters', mindate)
        else:
            logger.debug('Using chached content filters and filters conditions')
Exemple #22
0
    def get_published_takes(self, takes_package):
        """
        Get all the published takes in the takes packages.
        :param takes_package: takes package
        :return: List of publishes takes.
        """
        refs = self.get_package_refs(takes_package)
        if not refs:
            return []

        takes = [ref.get(RESIDREF) for ref in refs]

        query = {
            '$and': [{
                config.ID_FIELD: {
                    '$in': takes
                }
            }, {
                ITEM_STATE: {
                    '$in': [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]
                }
            }]
        }
        request = ParsedRequest()
        request.sort = SEQUENCE
        return list(
            get_resource_service(ARCHIVE).get_from_mongo(req=request,
                                                         lookup=query))
Exemple #23
0
def get_overdue_scheduled_items(expired_date_time, resource, limit=100):
    """
    Fetches the overdue scheduled articles from given collection. Overdue Conditions:
        1.  it should be in 'scheduled' state
        2.  publish_schedule is less than or equal to expired_date_time

    :param expired_date_time: DateTime that scheduled tate will be checked against
    :param resource: Name of the resource to check the data from
    :param limit: Number of return items
    :return: overdue scheduled articles from published collection
    """

    logger.info('Get overdue scheduled content from {}'.format(resource))
    query = {
        '$and': [{
            'publish_schedule': {
                '$lte': expired_date_time
            }
        }, {
            ITEM_STATE: CONTENT_STATE.SCHEDULED
        }]
    }

    req = ParsedRequest()
    req.sort = '_modified'
    req.max_results = limit

    return superdesk.get_resource_service(resource).get_from_mongo(
        req=req, lookup=query)
Exemple #24
0
    def find_articles_to_kill(self, lookup, include_other_takes=True):
        """Finds the article to kill.

        If the article is associated with Digital Story then Digital Story will
        also be fetched. If the Digital Story has more takes then all of them would be fetched.

        :param lookup: query to find the main article to be killed
        :type lookup: dict
        :return: list of articles to be killed
        :rtype: list
        """

        archived_doc = self.find_one(req=None, **lookup)
        if not archived_doc:
            return

        req = ParsedRequest()
        req.sort = '[("%s", -1)]' % config.VERSION
        archived_doc = list(self.get(req=req, lookup={'item_id': archived_doc['item_id']}))[0]
        articles_to_kill = [archived_doc]
        takes_package_id = self._get_take_package_id(archived_doc)
        if takes_package_id:
            takes_package = self.get_archived_takes_package(takes_package_id,
                                                            archived_doc['item_id'],
                                                            archived_doc['_current_version'],
                                                            include_other_takes)
            articles_to_kill.append(takes_package)

            if include_other_takes:
                for takes_ref in self._get_package_refs(takes_package):
                    if takes_ref[RESIDREF] != archived_doc[GUID_FIELD]:
                        take = list(self.get(req=req, lookup={'item_id': takes_ref[RESIDREF]}))[0]
                        articles_to_kill.append(take)

        return articles_to_kill
Exemple #25
0
    def on_create(self, docs):
        """
        Overriding this to set desk_order and expiry settings. Also, if this stage is defined as either working or
        incoming stage or both then removes the old incoming and working stages.
        """

        for doc in docs:
            desk = doc.get('desk')

            if not desk:
                doc['desk_order'] = 1
                continue

            req = ParsedRequest()
            req.sort = '-desk_order'
            req.max_results = 1
            prev_stage = self.get(req=req, lookup={'desk': doc['desk']})

            if doc.get('content_expiry') == 0:
                doc['content_expiry'] = None

            if prev_stage.count() == 0:
                doc['desk_order'] = 1
            else:
                doc['desk_order'] = prev_stage[0].get('desk_order', 1) + 1

            # if this new one is default then remove the old default
            if doc.get('working_stage', False):
                self.remove_old_default(desk, 'working_stage')

            if doc.get('default_incoming', False):
                self.remove_old_default(desk, 'default_incoming')
Exemple #26
0
    def _find_articles_to_kill(self, lookup):
        """
        Finds the article to kill. If the article is associated with Digital Story then Digital Story will
        also be fetched. If the Digital Story has more takes then all of them would be fetched.

        :param lookup: query to find the main article to be killed
        :type lookup: dict
        :return: list of articles to be killed
        :rtype: list
        """

        archived_doc = self.find_one(req=None, **lookup)

        req = ParsedRequest()
        req.sort = '[("%s", -1)]' % config.VERSION
        archived_doc = list(self.get(req=req, lookup={'item_id': archived_doc['item_id']}))[0]
        articles_to_kill = [archived_doc]
        takes_package_service = TakesPackageService()
        takes_package_id = takes_package_service.get_take_package_id(archived_doc)
        if takes_package_id:
            takes_package = list(self.get(req=req, lookup={'item_id': takes_package_id}))[0]
            articles_to_kill.append(takes_package)

            for takes_ref in takes_package_service.get_package_refs(takes_package):
                if takes_ref[RESIDREF] != archived_doc[GUID_FIELD]:
                    take = list(self.get(req=req, lookup={'item_id': takes_ref[RESIDREF]}))[0]
                    articles_to_kill.append(take)

        return articles_to_kill
Exemple #27
0
    def _find_articles_to_kill(self, lookup):
        """
        Finds the article to kill. If the article is associated with Digital Story then Digital Story will
        also be fetched. If the Digital Story has more takes then all of them would be fetched.

        :param lookup: query to find the main article to be killed
        :type lookup: dict
        :return: list of articles to be killed
        :rtype: list
        """

        archived_doc = self.find_one(req=None, **lookup)

        req = ParsedRequest()
        req.sort = '[("%s", -1)]' % config.VERSION
        archived_doc = list(
            self.get(req=req, lookup={'item_id': archived_doc['item_id']}))[0]
        articles_to_kill = [archived_doc]
        takes_package_service = TakesPackageService()
        takes_package_id = takes_package_service.get_take_package_id(
            archived_doc)
        if takes_package_id:
            takes_package = list(
                self.get(req=req, lookup={'item_id': takes_package_id}))[0]
            articles_to_kill.append(takes_package)

            for takes_ref in takes_package_service.get_package_refs(
                    takes_package):
                if takes_ref[RESIDREF] != archived_doc[GUID_FIELD]:
                    take = list(
                        self.get(req=req,
                                 lookup={'item_id': takes_ref[RESIDREF]}))[0]
                    articles_to_kill.append(take)

        return articles_to_kill
Exemple #28
0
    def get_expired_items(self, expiry_datetime, invalid_only=False):
        """Get the expired items.

        Where content state is not scheduled and the item matches given parameters

        :param datetime expiry_datetime: expiry datetime
        :param bool invalid_only: True only invalid items
        :return pymongo.cursor: expired non published items.
        """
        query = {
            '$and': [
                {'expiry': {'$lte': date_to_str(expiry_datetime)}},
                {'$or': [
                    {'task.desk': {'$ne': None}},
                    {ITEM_STATE: CONTENT_STATE.SPIKED, 'task.desk': None}
                ]}
            ]
        }

        if invalid_only:
            query['$and'].append({'expiry_status': 'invalid'})
        else:
            query['$and'].append({'expiry_status': {'$ne': 'invalid'}})

        req = ParsedRequest()
        req.max_results = config.MAX_EXPIRY_QUERY_LIMIT
        req.sort = 'expiry,_created'
        return self.get_from_mongo(req=req, lookup=query)
Exemple #29
0
    def on_create(self, docs):
        """Runs on stage create.

        Overriding this to set desk_order and expiry settings. Also, if this stage is defined as either working or
        incoming stage or both then removes the old incoming and working stages.
        """

        for doc in docs:
            desk = doc.get("desk")

            if not desk:
                doc["desk_order"] = 1
                continue

            req = ParsedRequest()
            req.sort = "-desk_order"
            req.max_results = 1
            prev_stage = self.get(req=req, lookup={"desk": doc["desk"]})

            if doc.get("content_expiry") == 0:
                doc["content_expiry"] = None

            if prev_stage.count() == 0:
                doc["desk_order"] = 1
            else:
                doc["desk_order"] = prev_stage[0].get("desk_order", 1) + 1

            # if this new one is default then remove the old default
            if doc.get("working_stage", False):
                self.remove_old_default(desk, "working_stage")

            if doc.get("default_incoming", False):
                self.remove_old_default(desk, "default_incoming")
Exemple #30
0
    def on_create(self, docs):
        """
        Overriding this to set desk_order and expiry settings. Also, if this stage is defined as either working or
        incoming stage or both then removes the old incoming and working stages.
        """

        for doc in docs:
            desk = doc.get('desk')

            if not desk:
                doc['desk_order'] = 1
                continue

            req = ParsedRequest()
            req.sort = '-desk_order'
            req.max_results = 1
            prev_stage = self.get(req=req, lookup={'desk': doc['desk']})

            if doc.get('content_expiry') == 0:
                doc['content_expiry'] = None

            if prev_stage.count() == 0:
                doc['desk_order'] = 1
            else:
                doc['desk_order'] = prev_stage[0].get('desk_order', 1) + 1

            # if this new one is default then remove the old default
            if doc.get('working_stage', False):
                self.remove_old_default(desk, 'working_stage')

            if doc.get('default_incoming', False):
                self.remove_old_default(desk, 'default_incoming')
 def purge_old_entries(self):
     """
     Purge entries older than the expiry
     :return:
     """
     service = superdesk.get_resource_service("audit")
     logger.info("Starting to purge audit logs at {}".format(utcnow()))
     for _ in range(100):  # make sure we don't get stuck
         lookup = {
             "$and": [{
                 "_id": {
                     "$lt": ObjectId.from_datetime(self.expiry)
                 }
             }]
         }
         req = ParsedRequest()
         req.sort = '[("_id", 1)]'
         req.projection = '{"_id": 1}'
         req.max_results = 1000
         audits = service.get_from_mongo(req=req, lookup=lookup)
         items = list(item.get("_id") for item in audits)
         if len(items) == 0:
             logger.info("Finished purging audit logs at {}".format(
                 utcnow()))
             return
         logger.info("Found {} audit items at {}".format(
             len(items), utcnow()))
         service.delete_ids_from_mongo(items)
     logger.warning("Audit purge didn't finish in 100 iterations.")
Exemple #32
0
 def get_published_items(self):
     """
     Get all items with queue state: "pending" that are not scheduled or scheduled time has lapsed.
     """
     query = {
         QUEUE_STATE:
         PUBLISH_STATE.PENDING,
         "$or": [
             {
                 ITEM_STATE: {
                     "$ne": CONTENT_STATE.SCHEDULED
                 }
             },
             {
                 ITEM_STATE: CONTENT_STATE.SCHEDULED,
                 "{}.utc_{}".format(SCHEDULE_SETTINGS, PUBLISH_SCHEDULE): {
                     "$lte": utcnow()
                 },
             },
         ],
     }
     request = ParsedRequest()
     request.sort = "publish_sequence_no"
     request.max_results = 200
     return list(
         get_resource_service(PUBLISHED).get_from_mongo(req=request,
                                                        lookup=query))
Exemple #33
0
    def get_expired_items(self, expiry_datetime):
        """
        Get the expired items where content state is not scheduled
        and
        :param datetime expiry_datetime: expiry datetime
        :return pymongo.cursor: expired non published items.
        """
        query = {
            '$and': [{
                'expiry': {
                    '$lte': date_to_str(expiry_datetime)
                }
            }, {
                '$or': [{
                    'task.desk': {
                        '$ne': None
                    }
                }, {
                    ITEM_STATE: CONTENT_STATE.SPIKED,
                    'task.desk': None
                }]
            }]
        }

        req = ParsedRequest()
        req.max_results = config.MAX_EXPIRY_QUERY_LIMIT
        req.sort = 'expiry,_created'
        return self.get_from_mongo(req=req, lookup=query)
def get_queue_items(retries=False):
    if retries:
        lookup = {
            '$and': [{
                'state': QueueState.RETRYING.value
            }, {
                'next_retry_attempt_at': {
                    '$lte': utcnow()
                }
            }, {
                'destination.delivery_type': {
                    '$ne': 'pull'
                }
            }]
        }
    else:
        lookup = {
            '$and': [{
                'state': QueueState.PENDING.value
            }, {
                'destination.delivery_type': {
                    '$ne': 'pull'
                }
            }]
        }
    request = ParsedRequest()
    request.max_results = app.config.get('MAX_TRANSMIT_QUERY_LIMIT', 500)
    # ensure we publish in the correct sequence
    request.sort = '[("_created", 1), ("subscriber_id", 1), ("published_seq_num", 1)]'
    return get_resource_service(PUBLISH_QUEUE).get(req=request, lookup=lookup)
def get_queue_items(retries=False, subscriber_id=None, priority=None):
    lookup = _get_queue_lookup(retries, priority)
    if subscriber_id:
        lookup['$and'].append({'subscriber_id': subscriber_id})
    request = ParsedRequest()
    request.max_results = app.config.get('MAX_TRANSMIT_QUERY_LIMIT', 100)  # limit per subscriber now
    request.sort = '[("_created", 1), ("published_seq_num", 1)]'
    return get_resource_service(PUBLISH_QUEUE).get(req=request, lookup=lookup)
Exemple #36
0
def get_published_items():
    """
    Returns a list of items marked for publishing.
    """
    query = {QUEUE_STATE: PUBLISH_STATE.PENDING}
    request = ParsedRequest()
    request.sort = 'publish_sequence_no'
    request.max_results = 100
    return list(get_resource_service(PUBLISHED).get_from_mongo(req=request, lookup=query))
Exemple #37
0
    def get_last_published_item(self, item_id):
        """Get the last published item

        :param item_id: Id of the planning item or event ite,
        :return:
        """
        req = ParsedRequest()
        req.sort = '-version'
        return self.find_one(req=req, item_id=item_id)
Exemple #38
0
    def get_archived_takes_package(self, package_id, take_id, version, include_other_takes=True):
        req = ParsedRequest()
        req.sort = '[("%s", -1)]' % config.VERSION
        take_packages = list(self.get(req=req, lookup={'item_id': package_id}))

        for take_package in take_packages:
            for ref in self._get_package_refs(take_package):
                if ref[RESIDREF] == take_id and (include_other_takes or ref['_current_version'] == version):
                    return take_package
Exemple #39
0
    def get_publish_queue_items(self, page_size, expired_items=[]):
        """Get publish queue items that are not moved to legal

        :param int page_size: batch size
        :param list expired_items:
        :return list: publish queue items
        """
        query = {"moved_to_legal": False}

        if expired_items:
            query["item_id"] = {"$in": expired_items}
        else:
            query["state"] = {"$in": [QueueState.SUCCESS.value, QueueState.CANCELED.value, QueueState.FAILED.value]}

        service = get_resource_service("publish_queue")
        req = ParsedRequest()
        req.sort = '[("_id", 1)]'
        req.where = json.dumps(query)
        cursor = service.get(req=req, lookup=None)
        count = cursor.count()
        no_of_pages = 0
        if count:
            no_of_pages = len(range(0, count, page_size))
            queue_id = cursor[0][config.ID_FIELD]
        logger.info("Number of items to move to legal archive publish queue: {}, pages={}".format(count, no_of_pages))

        for page in range(0, no_of_pages):
            logger.info(
                "Fetching publish queue items " "for page number: {}. queue_id: {}".format((page + 1), queue_id)
            )
            req = ParsedRequest()
            req.sort = '[("_id", 1)]'
            query["_id"] = {"$gte": str(queue_id)}
            req.where = json.dumps(query)
            req.max_results = page_size
            cursor = service.get(req=req, lookup=None)
            items = list(cursor)
            if len(items) > 0:
                queue_id = items[len(items) - 1][config.ID_FIELD]
            logger.info(
                "Fetched No. of Items: {} for page: {} "
                "For import in to legal archive publish_queue.".format(len(items), (page + 1))
            )
            yield items
Exemple #40
0
    def test_query_sort_by_name_case_sensetive(self):
        service = get_resource_service('concept_items')
        names = [
            'A Message to Garcia', 'Bootstrap: Responsive Web Development',
            'Hobbit', 'Lord of the rings', 'The Little Prince',
            'and then there were none', 'the Elegance of the Hedgehog',
            'Гайдамаки'
        ]

        req = ParsedRequest()
        req.sort = 'name'
        cursor = service.get_from_mongo(req=req, lookup={})
        self.assertEqual([i['name'] for i in cursor], names)

        req = ParsedRequest()
        req.sort = '-name'
        names.reverse()
        cursor = service.get_from_mongo(req=req, lookup={})
        self.assertEqual([i['name'] for i in cursor], names)
Exemple #41
0
 def _get_max_date_from_publish_queue(self):
     """
     Get the max _updated date from legal_publish_queue collection
     :return datetime: _updated time
     """
     legal_publish_queue_service = get_resource_service(LEGAL_PUBLISH_QUEUE_NAME)
     req = ParsedRequest()
     req.sort = '[("%s", -1)]' % config.LAST_UPDATED
     req.max_results = 1
     queue_item = list(legal_publish_queue_service.get(req=req, lookup={}))
     return queue_item[0][config.LAST_UPDATED] if queue_item else None
Exemple #42
0
def get_published_items():
    """
    Returns a list of items marked for publishing.
    """
    query = {QUEUE_STATE: PUBLISH_STATE.PENDING}
    request = ParsedRequest()
    request.sort = 'publish_sequence_no'
    request.max_results = 100
    return list(
        get_resource_service(PUBLISHED).get_from_mongo(req=request,
                                                       lookup=query))
Exemple #43
0
    def get_mongo_items(self, mongo_collection_name, page_size):
        """Generate list of items from given mongo collection per page size.

        :param mongo_collection_name: Name of the collection to get the items
        :param page_size: Size of every list in each iteration
        :return: list of items
        """
        bucket_size = int(page_size) if page_size else self.default_page_size
        print('Indexing data from mongo/{} to elastic/{}'.format(
            mongo_collection_name, mongo_collection_name))

        service = superdesk.get_resource_service(mongo_collection_name)
        req = ParsedRequest()
        req.sort = '[("%s", 1)]' % config.ID_FIELD
        cursor = service.get_from_mongo(req, {})
        count = cursor.count()
        no_of_buckets = len(range(0, count, bucket_size))
        water_mark = cursor[0][config.ID_FIELD]
        print('Number of items to index: {}, pages={}'.format(
            count, no_of_buckets))
        for x in range(0, no_of_buckets):
            print('{} Page : {}'.format(time.strftime('%X %x %Z'), x + 1))
            s = time.time()
            req = ParsedRequest()
            req.sort = '[("%s", 1)]' % config.ID_FIELD
            req.max_results = bucket_size
            if x == 0:
                lookup = {config.ID_FIELD: {'$gte': water_mark}}
            else:
                lookup = {config.ID_FIELD: {'$gt': water_mark}}

            cursor = service.get_from_mongo(req, lookup)
            items = list(cursor)
            water_mark = items[len(items) - 1][config.ID_FIELD]
            print('{} Retrieved from Mongo in {:.3f} seconds to {}'.format(
                time.strftime('%X %x %Z'),
                time.time() - s, water_mark))

            yield items
Exemple #44
0
 def _get_max_date_from_publish_queue(self):
     """
     Get the max _updated date from legal_publish_queue collection
     :return datetime: _updated time
     """
     legal_publish_queue_service = get_resource_service(
         LEGAL_PUBLISH_QUEUE_NAME)
     req = ParsedRequest()
     req.sort = '[("%s", -1)]' % config.LAST_UPDATED
     req.max_results = 1
     req.page = 1
     queue_item = list(legal_publish_queue_service.get(req=req, lookup={}))
     return queue_item[0][config.LAST_UPDATED] if queue_item else None
Exemple #45
0
    def get_expired_items(self, page_size):
        """
        Get expired item that are not moved to legal
        :return:
        """
        query = {
            "query": {
                "filtered": {
                    "filter": {
                        "and": [
                            {"range": {"expiry": {"lt": "now"}}},
                            {"term": {"moved_to_legal": False}},
                            {"not": {"term": {"state": CONTENT_STATE.SCHEDULED}}},
                        ]
                    }
                }
            }
        }

        service = get_resource_service("published")
        req = ParsedRequest()
        req.args = {"source": json.dumps(query)}
        req.sort = '[("publish_sequence_no", 1)]'
        cursor = service.get(req=req, lookup=None)
        count = cursor.count()
        no_of_pages = len(range(0, count, page_size))
        logger.info("Number of items to move to legal archive: {}, pages={}".format(count, no_of_pages))

        for page in range(0, no_of_pages):
            req = ParsedRequest()
            req.args = {"source": json.dumps(query)}
            req.sort = '[("publish_sequence_no", 1)]'
            req.max_results = page_size
            cursor = service.get(req=req, lookup=None)
            items = list(cursor)
            logger.info("Fetched No. of Items: {} import in to legal archive.".format(len(items)))
            yield items
    def get_published_takes(self, takes_package):
        """
        Get all the published takes in the takes packages.
        :param takes_package: takes package
        :return: List of publishes takes.
        """
        refs = self.get_package_refs(takes_package)
        if not refs:
            return []

        takes = [ref.get(RESIDREF) for ref in refs]
        query = self._get_published_items_query(takes)
        request = ParsedRequest()
        request.sort = SEQUENCE
        return list(get_resource_service(ARCHIVE).get_from_mongo(req=request, lookup=query))
    def get_mongo_items(self, mongo_collection_name, page_size):
        """
        Generates list of items from given mongo collection per page size
        :param mongo_collection_name: Name of the collection to get the items
        :param page_size: Size of every list in each iteration
        :return: list of items
        """
        bucket_size = int(page_size) if page_size else self.default_page_size
        print('Indexing data from mongo/{} to elastic/{}'.format(mongo_collection_name, mongo_collection_name))

        service = superdesk.get_resource_service(mongo_collection_name)
        req = ParsedRequest()
        req.sort = '[("%s", 1)]' % config.ID_FIELD
        cursor = service.get_from_mongo(req, {})
        count = cursor.count()
        no_of_buckets = len(range(0, count, bucket_size))
        water_mark = cursor[0][config.ID_FIELD]
        print('Number of items to index: {}, pages={}'.format(count, no_of_buckets))
        for x in range(0, no_of_buckets):
            print('{} Page : {}'.format(time.strftime('%X %x %Z'), x + 1))
            s = time.time()
            req = ParsedRequest()
            req.sort = '[("%s", 1)]' % config.ID_FIELD
            req.max_results = bucket_size
            if x == 0:
                lookup = {config.ID_FIELD: {'$gte': water_mark}}
            else:
                lookup = {config.ID_FIELD: {'$gt': water_mark}}

            cursor = service.get_from_mongo(req, lookup)
            items = list(cursor)
            water_mark = items[len(items) - 1][config.ID_FIELD]
            print('{} Retrieved from Mongo in {:.3f} seconds to {}'.format(time.strftime('%X %x %Z'), time.time() - s,
                  water_mark))

            yield items
 def on_create(self, docs):
     for doc in docs:
         if not doc.get('desk'):
             doc['desk_order'] = 1
             continue
         req = ParsedRequest()
         req.sort = '-desk_order'
         req.max_results = 1
         prev_stage = self.get(req=req, lookup={'desk': doc['desk']})
         if doc.get('content_expiry', 0) == 0:
             doc['content_expiry'] = app.settings['CONTENT_EXPIRY_MINUTES']
         if prev_stage.count() == 0:
             doc['desk_order'] = 1
         else:
             doc['desk_order'] = prev_stage[0].get('desk_order', 1) + 1
def get_queue_items(retries=False):
    if retries:
        lookup = {
            "$and": [
                {"state": QueueState.RETRYING.value},
                {"next_retry_attempt_at": {"$lte": utcnow()}},
                {"destination.delivery_type": {"$ne": "pull"}},
            ]
        }
    else:
        lookup = {"$and": [{"state": QueueState.PENDING.value}, {"destination.delivery_type": {"$ne": "pull"}}]}
    request = ParsedRequest()
    request.max_results = app.config.get("MAX_TRANSMIT_QUERY_LIMIT", 500)
    # ensure we publish in the correct sequence
    request.sort = '[("_created", 1), ("subscriber_id", 1), ("published_seq_num", 1)]'
    return get_resource_service(PUBLISH_QUEUE).get(req=request, lookup=lookup)
Exemple #50
0
    def get_expired_items(self, expiry_datetime=None, expiry_days=None, max_results=None, include_children=True):
        """Get the expired items.

        Returns a generator for the list of expired items, sorting by `_id` and returning `max_results` per iteration.

        :param datetime expiry_datetime: Expiry date/time used to retrieve the list of items, defaults to `utcnow()`
        :param int expiry_days: Number of days content expires, defaults to `CONTENT_API_EXPIRY_DAYS`
        :param int max_results: Maximum results to retrieve per iteration, defaults to `MAX_EXPIRY_QUERY_LIMIT`
        :param boolean include_children: Include only root item if False, otherwise include the entire item chain
        :return list: expired content_api items
        """

        if expiry_datetime is None:
            expiry_datetime = utcnow()

        if expiry_days is None:
            expiry_days = app.settings['CONTENT_API_EXPIRY_DAYS']

        if max_results is None:
            max_results = app.settings['MAX_EXPIRY_QUERY_LIMIT']

        last_id = None
        expire_at = date_to_str(expiry_datetime - timedelta(days=expiry_days))

        while True:
            query = {'$and': [{'_updated': {'$lte': expire_at}}]}

            if last_id is not None:
                query['$and'].append({'_id': {'$gt': last_id}})

            if not include_children:
                query['$and'].append({'ancestors': {'$exists': False}})

            req = ParsedRequest()
            req.sort = '_id'
            req.where = json.dumps(query)
            req.max_results = max_results

            items = list(self.get_from_mongo(req=req, lookup=None))

            if not items:
                break

            last_id = items[-1]['_id']
            yield items
Exemple #51
0
def get_published_items():
    """
    Get all items with queue state: "pending" that are not scheduled or scheduled time has lapsed.
    """
    query = {
        QUEUE_STATE: PUBLISH_STATE.PENDING,
        "$or": [
            {ITEM_STATE: {"$ne": CONTENT_STATE.SCHEDULED}},
            {
                ITEM_STATE: CONTENT_STATE.SCHEDULED,
                "{}.utc_{}".format(SCHEDULE_SETTINGS, PUBLISH_SCHEDULE): {"$lte": utcnow()},
            },
        ],
    }
    request = ParsedRequest()
    request.sort = "publish_sequence_no"
    request.max_results = 200
    return list(get_resource_service(PUBLISHED).get_from_mongo(req=request, lookup=query))
Exemple #52
0
 def on_create(self, docs):
     for doc in docs:
         if not doc.get('desk'):
             doc['desk_order'] = 1
             continue
         req = ParsedRequest()
         req.sort = '-desk_order'
         req.max_results = 1
         prev_stage = self.get(req=req, lookup={'desk': doc['desk']})
         if doc.get('content_expiry', 0) == 0:
             doc['content_expiry'] = app.settings['CONTENT_EXPIRY_MINUTES']
         if prev_stage.count() == 0:
             doc['desk_order'] = 1
         else:
             doc['desk_order'] = prev_stage[0].get('desk_order', 1) + 1
         # if this new one is default need to remove the old default
         if doc.get('default_incoming', False):
             self.remove_old_default(doc.get('desk'), 'default_incoming')
    def _get_items(self, resource, query, sort, keys, callback):
        req = ParsedRequest()
        cursor = get_resource_service(resource).get_from_mongo(req=req, lookup=query)
        count = cursor.count()
        no_of_buckets = len(range(0, count, self.default_page_size))
        items = {}
        req.sort = sort

        for bucket in range(0, no_of_buckets):
            skip = bucket * self.default_page_size
            logger.info('Page : {}, skip: {}'.format(bucket + 1, skip))
            cursor = get_resource_service(resource).get_from_mongo(req=req, lookup=query)
            cursor.skip(skip)
            cursor.limit(self.default_page_size)
            cursor = list(cursor)
            items.update({callback(item): {key: item.get(key)
                         for key in keys if key in item} for item in cursor})
        return items
def get_queue_items(retries=False):
    if retries:
        lookup = {
            '$and': [
                {'state': QueueState.RETRYING.value},
                {'next_retry_attempt_at': {'$lte': utcnow()}}
            ]
        }
    else:
        lookup = {
            '$and': [
                {'state': QueueState.PENDING.value}
            ]
        }
    request = ParsedRequest()
    request.max_results = app.config.get('MAX_TRANSMIT_QUERY_LIMIT', 500)
    # ensure we publish in the correct sequence
    request.sort = '[("_created", 1), ("subscriber_id", 1), ("published_seq_num", 1)]'
    return get_resource_service(PUBLISH_QUEUE).get(req=request, lookup=lookup)
    def get_published_takes(self, takes_package):
        """
        Get all the published takes in the takes packages.
        :param takes_package: takes package
        :return: List of publishes takes.
        """
        refs = self.get_package_refs(takes_package)
        if not refs:
            return []

        takes = [ref.get(RESIDREF) for ref in refs]

        query = {'$and':
                 [
                     {config.ID_FIELD: {'$in': takes}},
                     {ITEM_STATE: {'$in': [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]}}
                 ]}
        request = ParsedRequest()
        request.sort = SEQUENCE
        return list(get_resource_service(ARCHIVE).get_from_mongo(req=request, lookup=query))
    def get_expired_items(self, expiry_datetime, invalid_only=False):
        """Get the expired items.

        Where content state is not scheduled and the item matches given parameters

        :param datetime expiry_datetime: expiry datetime
        :param bool invalid_only: True only invalid items
        :return pymongo.cursor: expired non published items.
        """
        unique_id = 0

        while True:
            req = ParsedRequest()
            req.sort = 'unique_id'
            query = {
                '$and': [
                    {'expiry': {'$lte': date_to_str(expiry_datetime)}},
                    {'$or': [
                        {'task.desk': {'$ne': None}},
                        {ITEM_STATE: CONTENT_STATE.SPIKED, 'task.desk': None}
                    ]}
                ]
            }

            query['$and'].append({'unique_id': {'$gt': unique_id}})

            if invalid_only:
                query['$and'].append({'expiry_status': 'invalid'})
            else:
                query['$and'].append({'expiry_status': {'$ne': 'invalid'}})

            req.where = json.dumps(query)

            req.max_results = config.MAX_EXPIRY_QUERY_LIMIT
            items = list(self.get_from_mongo(req=req, lookup=None))

            if not len(items):
                break

            unique_id = items[-1]['unique_id']
            yield items
Exemple #57
0
    def get_expired_items(self, expiry_datetime):
        """
        Get the expired items where content state is not scheduled
        and
        :param datetime expiry_datetime: expiry datetime
        :return pymongo.cursor: expired non published items.
        """
        query = {
            '$and': [
                {'expiry': {'$lte': date_to_str(expiry_datetime)}},
                {'$or': [
                    {'task.desk': {'$ne': None}},
                    {ITEM_STATE: CONTENT_STATE.SPIKED, 'task.desk': None}
                ]}
            ]
        }

        req = ParsedRequest()
        req.max_results = config.MAX_EXPIRY_QUERY_LIMIT
        req.sort = 'expiry,_created'
        return self.get_from_mongo(req=None, lookup=query)
Exemple #58
0
def get_overdue_scheduled_items(expired_date_time, resource, limit=100):
    """
    Fetches the overdue scheduled articles from given collection. Overdue Conditions:
        1.  it should be in 'scheduled' state
        2.  publish_schedule is less than or equal to expired_date_time

    :param expired_date_time: DateTime that scheduled tate will be checked against
    :param resource: Name of the resource to check the data from
    :param limit: Number of return items
    :return: overdue scheduled articles from published collection
    """

    logger.info('Get overdue scheduled content from {}'.format(resource))
    query = {'$and': [
        {'publish_schedule': {'$lte': expired_date_time}},
        {ITEM_STATE: CONTENT_STATE.SCHEDULED}
    ]}

    req = ParsedRequest()
    req.sort = '_modified'
    req.max_results = limit

    return superdesk.get_resource_service(resource).get_from_mongo(req=req, lookup=query)