def get_mongo_items(self, consistency_record):
        # get the records from mongo in chunks
        projection = dict(superdesk.resources[self.resource_name].endpoint_schema['datasource']['projection'])
        superdesk.resources[self.resource_name].endpoint_schema['datasource']['projection'] = None
        service = superdesk.get_resource_service(self.resource_name)
        cursor = service.get_from_mongo(None, {})
        count = cursor.count()
        no_of_buckets = len(range(0, count, self.default_page_size))
        mongo_items = []
        updated_mongo_items = []
        request = ParsedRequest()
        request.projection = json.dumps({'_etag': 1, '_updated': 1})
        for x in range(0, no_of_buckets):
            skip = x * self.default_page_size
            print('Page : {}, skip: {}'.format(x + 1, skip))
            # don't get any new records since the elastic items are retrieved
            cursor = service.get_from_mongo(request, {'_created': {'$lte': consistency_record['started_at']}})
            cursor.skip(skip)
            cursor.limit(self.default_page_size)
            cursor = list(cursor)
            mongo_items.extend([(mongo_item['_id'], mongo_item['_etag']) for mongo_item in cursor])
            updated_mongo_items.extend([mongo_item['_id'] for mongo_item in cursor
                                       if mongo_item['_updated'] > consistency_record['started_at']])

        superdesk.resources[self.resource_name].endpoint_schema['datasource']['projection'] = projection
        return mongo_items, updated_mongo_items
 def on_delete_res_vocabularies(self, doc):
     req = ParsedRequest()
     req.projection = '{"label": 1}'
     res = self.get(req=req, lookup={'schema.' + doc[config.ID_FIELD]: {'$type': 3}})
     if res.count():
         payload = {'content_types': [doc_hateoas for doc_hateoas in map(self._build_hateoas, res)]}
         message = 'Vocabulary "%s" is used in %d content type(s)' % \
             (doc.get('display_name'), res.count())
         raise SuperdeskApiError.badRequestError(message, payload)
Exemple #3
0
    def find(self, resource, lookup, projection, **options):
        req = ParsedRequest()
        req.args = {}
        req.projection = projection

        if hasattr(self.data_layer, 'find'):
            return self.data_layer.find(resource, req, lookup)
        else:
            return self.data_layer.get(resource, req, lookup)
Exemple #4
0
 def on_delete_res_vocabularies(self, doc):
     req = ParsedRequest()
     req.projection = '{"label": 1}'
     res = self.get(req=req,
                    lookup={'schema.' + doc[config.ID_FIELD]: {
                                '$type': 3
                            }})
     if res.count():
         payload = {
             'content_types':
             [doc_hateoas for doc_hateoas in map(self._build_hateoas, res)]
         }
         message = 'Vocabulary "%s" is used in %d content type(s)' % \
             (doc.get('display_name'), res.count())
         raise SuperdeskApiError.badRequestError(message, payload)
 def on_delete_res_vocabularies(self, doc):
     req = ParsedRequest()
     req.projection = '{"label": 1}'
     res = self.get(req=req,
                    lookup={"schema." + doc[config.ID_FIELD]: {
                                "$type": 3
                            }})
     if res.count():
         payload = {
             "content_types":
             [doc_hateoas for doc_hateoas in map(self._build_hateoas, res)]
         }
         message = _(
             "Vocabulary {vocabulary} is used in {count} content type(s)"
         ).format(vocabulary=doc.get("display_name"), count=res.count())
         raise SuperdeskApiError.badRequestError(message, payload)
    def purge_orphaned_item_audits(self):
        """
        Purge the audit items that do not have associated entries existing in archive
        :return:
        """
        service = superdesk.get_resource_service('audit')
        current_id = None
        logger.info(
            'Starting to purge audit logs of content items not in archive at {}'
            .format(utcnow()))

        # Scan the audit collection for items to delete
        while True:
            query = deepcopy(self.item_entry_query)
            query['$and'].append(
                {'_updated': {
                    '$lte': date_to_str(self.expiry)
                }})
            if current_id:
                query['$and'].append({'_id': {'$gt': current_id}})
            req = ParsedRequest()
            req.sort = '[("_id", 1)]'
            req.projection = '{"_id": 1, "audit_id":1}'
            req.max_results = 1000
            audits = service.get_from_mongo(req=req, lookup=query)
            items = list([(item['_id'], item['audit_id']) for item in audits])
            if len(items) == 0:
                logger.info(
                    'Finished purging audit logs of content items not in archive at {}'
                    .format(utcnow()))
                return
            logger.info('Found {} orphaned audit items at {}'.format(
                len(items), utcnow()))
            current_id = items[len(items) - 1][0]

            batch_ids = set([i[1] for i in items])
            archive_ids = self._get_archive_ids(batch_ids)
            ids = (batch_ids - archive_ids)
            audit_ids = [i[0] for i in items if i[1] in ids]
            logger.info('Deleting {} orphaned audit items at {}'.format(
                len(audit_ids), utcnow()))
            service.delete_ids_from_mongo(audit_ids)
Exemple #7
0
    def purge_orphaned_item_audits(self):
        """
        Purge the audit items that do not have associated entries existing in archive
        :return:
        """
        service = superdesk.get_resource_service("audit")
        current_id = None
        logger.info(
            "Starting to purge audit logs of content items not in archive at {}"
            .format(utcnow()))

        # Scan the audit collection for items to delete
        for _ in range(100):
            query = deepcopy(self.item_entry_query)
            query["$and"].append(
                {"_id": {
                    "$lte": ObjectId.from_datetime(self.expiry)
                }})
            if current_id:
                query["$and"].append({"_id": {"$gt": current_id}})
            req = ParsedRequest()
            req.sort = '[("_id", 1)]'
            req.projection = '{"_id": 1, "audit_id":1}'
            req.max_results = 1000
            audits = service.get_from_mongo(req=req, lookup=query)
            items = list([(item["_id"], item["audit_id"]) for item in audits])
            if len(items) == 0:
                logger.info(
                    "Finished purging audit logs of content items not in archive at {}"
                    .format(utcnow()))
                return
            logger.info("Found {} orphaned audit items at {}".format(
                len(items), utcnow()))
            current_id = items[len(items) - 1][0]

            batch_ids = set([i[1] for i in items])
            archive_ids = self._get_archive_ids(batch_ids)
            ids = batch_ids - archive_ids
            audit_ids = [i[0] for i in items if i[1] in ids]
            logger.info("Deleting {} orphaned audit items at {}".format(
                len(audit_ids), utcnow()))
            service.delete_ids_from_mongo(audit_ids)
Exemple #8
0
    def _prefetch_vocabularies(self):
        """
        Prefetch items from vocabularies.
        """

        # this method is called from `parse`, but it must be executed only once
        if self._vocabularies is not None:
            return

        self._vocabularies = {}
        req = ParsedRequest()
        req.projection = json.dumps({'items': 1})
        # prefetch vocabularies -> anp_genres
        self._vocabularies['anp_genres'] = superdesk.get_resource_service(
            'vocabularies').find_one(req=req,
                                     _id='anp_genres').get('items', [])
        # use qcode as a key to speed up work with it in the future methods
        self._vocabularies['anp_genres'] = {
            s['qcode']: s
            for s in self._vocabularies['anp_genres']
        }
    def _validate_language(self, doc):
        # fetch languages from CVs
        req = ParsedRequest()
        req.projection = json.dumps({"items.qcode": 1})

        try:
            languages = (
                superdesk.get_resource_service("vocabularies").find_one(req=req, _id="languages").get("items", [])
            )
        except AttributeError:
            raise SuperdeskApiError.badRequestError(
                message="Request is not valid",
                payload={"language": "Concept items requires 'languages' vocabulary to be set"},
            )

        languages_qcodes = [lang["qcode"] for lang in languages]

        if doc["language"] not in languages_qcodes:
            raise SuperdeskApiError.badRequestError(
                message="Request is not valid", payload={"language": "unallowed value '{}'".format(doc["language"])}
            )
    def purge_old_entries(self):
        """
        Purge entries older than the expiry that are not related to archive items
        :return:
        """
        service = superdesk.get_resource_service('audit')
        current_id = None
        logger.info(
            'Starting to purge audit logs of none content items at {}'.format(
                utcnow()))

        while True:
            lookup = {
                '$and': [
                    self.not_item_entry_query, {
                        '_updated': {
                            '$lte': date_to_str(self.expiry)
                        }
                    }
                ]
            }
            if current_id:
                lookup['$and'].append({'_id': {'$gt': current_id}})
            req = ParsedRequest()
            req.sort = '[("_id", 1)]'
            req.projection = '{"_id": 1}'
            req.max_results = 1000
            audits = service.get_from_mongo(req=req, lookup=lookup)
            items = list(item.get('_id') for item in audits)
            if len(items) == 0:
                logger.info(
                    'Finished purging audit logs of none content items at {}'.
                    format(utcnow()))
                return
            logger.info('Found {} audit items at {}'.format(
                len(items), utcnow()))
            current_id = items[len(items) - 1]
            logger.info('Deleting {} old audit items'.format(len(items)))
            service.delete_ids_from_mongo(items)
Exemple #11
0
    def get_mongo_items(self, consistency_record):
        # get the records from mongo in chunks
        projection = dict(superdesk.resources[
            self.resource_name].endpoint_schema['datasource']['projection'])
        superdesk.resources[self.resource_name].endpoint_schema['datasource'][
            'projection'] = None
        service = superdesk.get_resource_service(self.resource_name)
        cursor = service.get_from_mongo(None, {})
        count = cursor.count()
        no_of_buckets = len(range(0, count, self.default_page_size))
        mongo_items = []
        updated_mongo_items = []
        request = ParsedRequest()
        request.projection = json.dumps({'_etag': 1, '_updated': 1})
        for x in range(0, no_of_buckets):
            skip = x * self.default_page_size
            print('Page : {}, skip: {}'.format(x + 1, skip))
            # don't get any new records since the elastic items are retrieved
            cursor = service.get_from_mongo(
                request,
                {'_created': {
                    '$lte': consistency_record['started_at']
                }})
            cursor.skip(skip)
            cursor.limit(self.default_page_size)
            cursor = list(cursor)
            mongo_items.extend([(mongo_item['_id'], mongo_item['_etag'])
                                for mongo_item in cursor])
            updated_mongo_items.extend([
                mongo_item['_id'] for mongo_item in cursor
                if mongo_item['_updated'] > consistency_record['started_at']
            ])

        superdesk.resources[self.resource_name].endpoint_schema['datasource'][
            'projection'] = projection
        return mongo_items, updated_mongo_items
Exemple #12
0
 def find(self, resource, filter, projection, **options):
     req = ParsedRequest()
     req.args = {}
     req.projection = projection
     return self.data_layer.find(resource, req, filter)
Exemple #13
0
 def find_one(self, resource, filter, projection):
     req = ParsedRequest()
     req.args = {}
     req.projection = projection
     return self.data_layer.find_one(resource, req, **filter)
Exemple #14
0
    def _validate_associated_items(self,
                                   original_item,
                                   updates=None,
                                   validation_errors=None):
        """Validates associated items.

        This function will ensure that the unpublished content validates and none of
        the content is locked, also do not allow any killed or recalled or spiked content.

        :param package:
        :param validation_errors: validation errors are appended if there are any.
        """

        if validation_errors is None:
            validation_errors = []

        if updates is None:
            updates = {}

        # merge associations
        associations = deepcopy(original_item.get(ASSOCIATIONS, {}))
        associations.update(updates.get(ASSOCIATIONS, {}))

        items = [value for value in associations.values()]
        if original_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and \
                self.publish_type == ITEM_PUBLISH:
            items.extend(self.package_service.get_residrefs(original_item))

        for item in items:
            if type(item) == dict and item.get(config.ID_FIELD):
                doc = item
                # enhance doc with lock_user
                req = ParsedRequest()
                req.args = {}
                req.projection = json.dumps({'lock_user': 1})
                try:
                    doc.update({
                        'lock_user':
                        super().find_one(
                            req=req, _id=item[config.ID_FIELD])['lock_user']
                    })
                except (TypeError, KeyError):
                    pass
            elif item:
                doc = super().find_one(req=None, _id=item)
            else:
                continue

            if not doc:
                continue

            if original_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
                self._validate_associated_items(
                    doc, validation_errors=validation_errors)

            # make sure no items are killed or recalled or spiked or scheduled
            doc_item_state = doc.get(ITEM_STATE, CONTENT_STATE.PUBLISHED)
            if doc_item_state in {
                    CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED,
                    CONTENT_STATE.SPIKED, CONTENT_STATE.SCHEDULED
            }:
                validation_errors.append(
                    'Item cannot contain associated {} item'.format(
                        doc[ITEM_STATE]))

            if doc.get(EMBARGO):
                validation_errors.append(
                    'Item cannot have associated items with Embargo')

            # don't validate items that already have published
            if doc_item_state not in [
                    CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED
            ]:
                validate_item = {
                    'act': self.publish_type,
                    'type': doc[ITEM_TYPE],
                    'validate': doc
                }
                if type(item) == dict:
                    validate_item['embedded'] = True
                errors = get_resource_service('validate').post([validate_item],
                                                               headline=True)
                if errors[0]:
                    pre_errors = [
                        'Associated item %s %s' %
                        (doc.get('slugline', ''), error) for error in errors[0]
                    ]
                    validation_errors.extend(pre_errors)

            if config.PUBLISH_ASSOCIATED_ITEMS:
                # check the locks on the items
                if doc.get('lock_user'):
                    if original_item['lock_user'] != doc['lock_user']:
                        validation_errors.extend([
                            '{}: {}'.format(
                                doc.get('headline', doc['_id']),
                                _('packaged item is locked by another user'))
                        ])
                    elif original_item['lock_user'] == doc['lock_user']:
                        validation_errors.extend([
                            '{}: {}'.format(
                                doc.get('headline', doc['_id']),
                                _('packaged item is locked by you. Unlock it and try again'
                                  ))
                        ])
Exemple #15
0
 def find(self, resource, filter, projection, **options):
     req = ParsedRequest()
     req.args = {}
     req.projection = projection
     return self.data_layer.find(resource, req, filter)
Exemple #16
0
 def find_one(self, resource, filter, projection):
     req = ParsedRequest()
     req.args = {}
     req.projection = projection
     return self.data_layer.find_one(resource, req, **filter)
Exemple #17
0
    def _users_aggregation(self, desk_id: str) -> List[Dict]:
        desks_service = superdesk.get_resource_service("desks")

        es_query: Dict[str, Any]
        es_assign_query: Dict[str, Any]
        desk_filter: Dict[str, Any]

        if desk_id == "all":
            desk_filter = {}
            es_query = {}
        else:
            desk_filter = {"_id": ObjectId(desk_id)}
            es_query = {"filter": {"term": {"task.desk": desk_id}}}

        req = ParsedRequest()
        req.projection = json.dumps({"members": 1})
        found = desks_service.get(req, desk_filter)
        members = set()
        for d in found:
            members.update({m["user"] for m in d["members"]})

        users_aggregation = app.data.pymongo().db.users.aggregate([
            {
                "$match": {
                    "_id": {
                        "$in": list(members)
                    }
                }
            },
            {
                "$group": {
                    "_id": "$role",
                    "authors": {
                        "$addToSet": "$_id"
                    }
                }
            },
        ])

        # first we check archives for locked items
        es_query["aggs"] = {
            "desk_authors": {
                "filter": {
                    "terms": {
                        "version_creator": [str(m) for m in members]
                    }
                },
                "aggs": {
                    "authors": {
                        "terms": {
                            "field": "version_creator",
                        },
                        "aggs": {
                            "locked": {
                                "filter": {
                                    "exists": {
                                        "field": "lock_user",
                                    }
                                }
                            },
                        },
                    }
                },
            }
        }
        docs_agg = app.data.elastic.search(es_query,
                                           "archive",
                                           params={"size": 0})
        stats_by_authors = {}
        for a in docs_agg.hits["aggregations"]["desk_authors"]["authors"][
                "buckets"]:
            stats_by_authors[a["key"]] = {
                "locked": a["locked"]["doc_count"],
                "assigned": 0,
            }

        # then assignments
        if desk_id == "all":
            desk_filter = {}
            es_assign_query = {}
        else:
            desk_filter = {"_id": ObjectId(desk_id)}
            es_assign_query = {
                "filter": {
                    "term": {
                        "assigned_to.desk": desk_id
                    }
                }
            }
        es_assign_query["aggs"] = {
            "desk_authors": {
                "filter": {
                    "terms": {
                        "assigned_to.user": [str(m) for m in members]
                    }
                },
                "aggs": {
                    "authors": {
                        "terms": {
                            "field": "assigned_to.user",
                        },
                    }
                },
            }
        }
        try:
            assign_agg = app.data.elastic.search(es_assign_query,
                                                 "assignments",
                                                 params={"size": 0})
        except KeyError:
            logger.warning(
                'Can\'t access "assignments" collection, planning is probably not installed'
            )
        else:
            for a in assign_agg.hits["aggregations"]["desk_authors"][
                    "authors"]["buckets"]:
                stats_by_authors.setdefault(
                    a["key"], {"locked": 0})["assigned"] = a["doc_count"]

        overview = []
        for a in users_aggregation:
            role = a["_id"]
            authors_dict: Dict[str, Any] = {}
            role_dict = {
                "role": role,
                "authors": authors_dict,
            }
            authors = a["authors"]
            for author in authors:
                author = str(author)
                try:
                    authors_dict[author] = stats_by_authors[author]
                except KeyError:
                    logger.debug(
                        "No article found for {author}".format(author=author))
                    authors_dict[author] = {"assigned": 0, "locked": 0}
            overview.append(role_dict)

        return overview
Exemple #18
0
 def get_from_mongo(self, req, lookup, projection=None):
     if req is None:
         req = ParsedRequest()
     if not req.projection and projection:
         req.projection = json.dumps(projection)
     return self.backend.get_from_mongo(self.datasource, req=req, lookup=lookup)
 def find(self, resource, lookup, projection, **options):
     req = ParsedRequest()
     req.args = {}
     req.projection = projection
     return self.data_layer.get(resource, req, lookup)
Exemple #20
0
 def find(self, resource, lookup, projection, **options):
     req = ParsedRequest()
     req.args = {}
     req.projection = projection
     return self.data_layer.get(resource, req, lookup)