Example #1
0
    def delete(self, endpoint_name, lookup):
        """Delete method to delete by using mongo query syntax.

        :param endpoint_name: Name of the endpoint
        :param lookup: User mongo query syntax. example 1. ``{'_id':123}``, 2. ``{'item_id': {'$in': [123, 234]}}``
        :returns: Returns the mongo remove command response. {'n': 12, 'ok': 1}
        """
        backend = self._backend(endpoint_name)
        search_backend = self._lookup_backend(endpoint_name)
        docs = self.get_from_mongo(endpoint_name, lookup=lookup, req=ParsedRequest())
        ids = [doc[config.ID_FIELD] for doc in docs]
        removed_ids = ids
        logger.info("total documents to be removed {}".format(len(ids)))
        if search_backend and ids:
            removed_ids = []
            # first remove it from search backend, so it won't show up. when this is done - remove it from mongo
            for _id in ids:
                try:
                    self.remove_from_search(endpoint_name, _id)
                    removed_ids.append(_id)
                except NotFoundError:
                    logger.warning('item missing from elastic _id=%s' % (_id, ))
                    removed_ids.append(_id)
                except:
                    logger.exception('item can not be removed from elastic _id=%s' % (_id, ))
        backend.remove(endpoint_name, {config.ID_FIELD: {'$in': removed_ids}})
        logger.info("Removed {} documents from {}.".format(len(ids), endpoint_name))
        if not ids:
            logger.warn("No documents for {} resource were deleted using lookup {}".format(endpoint_name, lookup))
Example #2
0
    def delete(self, endpoint_name, lookup):
        """Delete method to delete by using mongo query syntax.

        :param endpoint_name: Name of the endpoint
        :param lookup: User mongo query syntax. example 1. ``{'_id':123}``, 2. ``{'item_id': {'$in': [123, 234]}}``
        :returns: Returns the mongo remove command response. {'n': 12, 'ok': 1}
        """
        backend = self._backend(endpoint_name)
        search_backend = self._lookup_backend(endpoint_name)
        docs = self.get_from_mongo(endpoint_name, lookup=lookup, req=ParsedRequest())
        ids = [doc[config.ID_FIELD] for doc in docs]
        removed_ids = ids
        logger.info("total documents to be removed {}".format(len(ids)))
        if search_backend and ids:
            removed_ids = []
            # first remove it from search backend, so it won't show up. when this is done - remove it from mongo
            for _id in ids:
                try:
                    self.remove_from_search(endpoint_name, _id)
                    removed_ids.append(_id)
                except NotFoundError:
                    logger.warning('item missing from elastic _id=%s' % (_id, ))
                    removed_ids.append(_id)
                except:
                    logger.exception('item can not be removed from elastic _id=%s' % (_id, ))
        backend.remove(endpoint_name, {config.ID_FIELD: {'$in': removed_ids}})
        logger.info("Removed {} documents from {}.".format(len(ids), endpoint_name))
        if not ids:
            logger.warn("No documents for {} resource were deleted using lookup {}".format(endpoint_name, lookup))
Example #3
0
 def delete_docs(self, endpoint_name, docs):
     """Delete using list of documents."""
     backend = self._backend(endpoint_name)
     search_backend = self._lookup_backend(endpoint_name)
     ids = [doc[config.ID_FIELD] for doc in docs]
     removed_ids = ids
     logger.info("total documents to be removed {}".format(len(ids)))
     if search_backend and ids:
         removed_ids = []
         # first remove it from search backend, so it won't show up. when this is done - remove it from mongo
         for doc in docs:
             try:
                 self.remove_from_search(endpoint_name, doc)
                 removed_ids.append(doc[config.ID_FIELD])
             except NotFoundError:
                 logger.warning('item missing from elastic _id=%s' %
                                (doc[config.ID_FIELD], ))
                 removed_ids.append(doc[config.ID_FIELD])
             except Exception:
                 logger.exception(
                     'item can not be removed from elastic _id=%s' %
                     (doc[config.ID_FIELD], ))
     if len(removed_ids):
         backend.remove(endpoint_name,
                        {config.ID_FIELD: {
                            '$in': removed_ids
                        }})
         logger.info("Removed %d documents from %s.", len(removed_ids),
                     endpoint_name)
     else:
         logger.warn("No documents for %s resource were deleted.",
                     endpoint_name)
     return removed_ids
def remove_expired_from_elastic(ingest_collection):
    """Remove expired items from elastic which shouldn't be there anymore - expired before previous run."""
    ingest = superdesk.get_resource_service(ingest_collection)
    items = ingest.search({'filter': {'range': {'expiry': {'lt': 'now-5m/m'}}}})
    if items.count():
        logger.warning('there are expired items in elastic (%d)' % (items.count(), ))
        for item in items:
            logger.debug('doc only in elastic item=%s' % (item, ))
            ingest.remove_from_search(item)
Example #5
0
def remove_expired_from_elastic(ingest_collection):
    """Remove expired items from elastic which shouldn't be there anymore - expired before previous run."""
    ingest = superdesk.get_resource_service(ingest_collection)
    items = ingest.search({'filter': {'range': {'expiry': {'lt': 'now-5m/m'}}}})
    if items.count():
        logger.warning('there are expired items in elastic (%d)' % (items.count(), ))
        for item in items:
            logger.debug('doc only in elastic item=%s' % (item, ))
            ingest.remove_from_search(item.get('_id'))
Example #6
0
    def _try_get_lock(self, key, owner, expire):
        """Log warning in case lock is gained after expiry.

        This should not happen in general, locks should be released.
        Consider increasing lock time.
        """
        lock_info = self.get_lock_info(key)
        locked = super()._try_get_lock(key, owner, expire)
        if locked and lock_info and lock_info['locked']:
            logger.warning('Lock %s expired', key)
        return locked
Example #7
0
    def run(self, max_days=3, item_id=None, chunk_size=1000):
        now_utc = utcnow()

        # If we're generating stats for a single item, then
        # don't set max_days, as we want to process all history records
        # for the provided item
        if item_id is not None:
            max_days = 0

        try:
            max_days = float(max_days)
        except (ValueError, TypeError):
            max_days = 3
        gte = None if max_days <= 0.0 else utcnow() - timedelta(days=max_days)

        try:
            chunk_size = int(chunk_size)
        except (ValueError, TypeError):
            chunk_size = 1000
        chunk_size = None if chunk_size <= 0 else chunk_size

        logger.info(
            'Starting to generate archive statistics: {}. gte={}. item_id={}. chunk_size={}'
            .format(now_utc, gte, item_id, chunk_size))

        lock_name = get_lock_id('analytics', 'gen_archive_statistics')
        if not lock(lock_name, expire=610):
            logger.info('Generate archive statistics task is already running.')
            return

        items_processed = 0
        failed_ids = []
        num_history_items = 0

        try:
            items_processed, failed_ids, num_history_items = self.generate_stats(
                item_id, gte, chunk_size)
        except Exception:
            logger.exception('Failed to generate archive stats')
        finally:
            unlock(lock_name)

        if len(failed_ids) > 0:
            logger.warning('Failed to generate stats for items {}'.format(
                ', '.join(failed_ids)))

        duration = (utcnow() - now_utc).total_seconds()
        logger.info(
            'Finished generating stats for {} items ({} history entries). Duration: {} seconds'
            .format(items_processed, num_history_items, int(duration)))
    def _change_request(self, endpoint_name, id, updates, original):
        backend = self._backend(endpoint_name)
        search_backend = self._lookup_backend(endpoint_name)

        try:
            backend.update(endpoint_name, id, updates, original)
            push_notification("resource:updated",
                              _id=str(id),
                              resource=endpoint_name,
                              fields=get_diff_keys(updates, original))
        except eve.io.base.DataLayer.OriginalChangedError:
            if not backend.find_one(endpoint_name, req=None,
                                    _id=id) and search_backend:
                # item is in elastic, not in mongo - not good
                logger.warn(
                    "Item is missing in mongo resource={} id={}".format(
                        endpoint_name, id))
                item = search_backend.find_one(endpoint_name, req=None, _id=id)
                if item:
                    self.remove_from_search(endpoint_name, item)
                raise SuperdeskApiError.notFoundError()
            else:
                # item is there, but no change was done - ok
                logger.warning(
                    "Item was not updated in mongo.",
                    extra=dict(
                        id=id,
                        resource=endpoint_name,
                        updates=updates,
                    ),
                )
                return updates

        if search_backend:
            doc = backend.find_one(endpoint_name, req=None, _id=id)
            if not doc:  # there is no doc in mongo, remove it from elastic
                logger.warn(
                    "Item is missing in mongo resource={} id={}".format(
                        endpoint_name, id))
                item = search_backend.find_one(endpoint_name, req=None, _id=id)
                if item:
                    self.remove_from_search(endpoint_name, item)
                raise SuperdeskApiError.notFoundError()
            search_backend.update(endpoint_name, id, doc)

        return updates
Example #9
0
def remove_expired_from_elastic(ingest_collection):
    """Remove expired items from elastic which shouldn't be there anymore - expired before previous run."""
    ingest = superdesk.get_resource_service(ingest_collection)
    items = ingest.search(
        {"filter": {
            "range": {
                "expiry": {
                    "lt": "now-5m/m"
                }
            }
        }})
    if items.count():
        logger.warning("there are expired items in elastic (%d)" %
                       (items.count(), ))
        for item in items:
            logger.debug("doc only in elastic item=%s" % (item, ))
            ingest.remove_from_search(item)
Example #10
0
def transtype_metadata(doc, original=None):
    """Change the type of metadata coming from client to match expected type in database

    Some metadata (e.g. custom fields) are sent as plain text while an other type is expected in
    database (e.g. datetime). This method check those metadata and update them.

    :param doc: document to be transtyped (will be modified in place)
    :param original: original document in case of update
    """
    # For now only fields of type "date" in the "extra" dict are handled.
    extra = doc.get("extra")
    if not extra:
        return

    if original is None:
        original = {}

    try:
        profile_id = doc.get("profile") or original["profile"]
    except KeyError:
        # profile may be missing with some items in tests
        logger.warning("`profile` is not available in doc")
        return
    ctypes_service = get_resource_service("content_types")
    profile = ctypes_service.find_one(None, _id=profile_id)
    if profile is None:
        return

    for key, value in extra.items():
        try:
            value_type = profile["schema"][key]["type"]
        except KeyError:
            continue

        if value_type == "date":
            if value and type(value) != datetime:
                try:
                    extra[key] = date_parse(value)
                except Exception as e:
                    logger.warning("Can't parse {key}: {reason}".format(
                        key=key, reason=e))
Example #11
0
    def process_timelines(self, items, failed_ids):
        statistics_service = get_resource_service('archive_statistics')
        items_to_create = []
        rewrites = []

        for item_id, item in items.items():
            try:
                self.gen_stats_from_timeline(item)
            except Exception:
                logger.exception('Failed to generate stats for item {}'.format(item_id))
                failed_ids.append(item_id)
                continue

            if item['updates'].get('rewrite_of') and \
                    (item['updates'].get('time_to_first_publish') or 0) > 0:
                rewrites.append(item_id)

            if not item['item'].get(config.ID_FIELD):
                item['updates'][config.ID_FIELD] = item_id
                item['updates']['stats_type'] = 'archive'
                items_to_create.append(item['updates'])
            else:
                try:
                    statistics_service.patch(
                        item_id,
                        item['updates']
                    )
                except Exception:
                    logger.exception('Failed to update stats for item {}. updates={}'.format(
                        item_id,
                        item.get('updates')
                    ))
                    failed_ids.append(item_id)

        if len(items_to_create) > 0:
            try:
                statistics_service.post(items_to_create)
            except Exception:
                item_ids = [item.get(config.ID_FIELD) for item in items_to_create]
                logger.exception('Failed to create stat entries for items {}'.format(
                    ', '.join(item_ids)
                ))
                failed_ids.extend(failed_ids)

        for item_id in rewrites:
            item = items[item_id]

            updated_at = item['updates'].get('firstpublished')
            if not updated_at:
                logger.warning('Failed {}, updated_at not defined'.format(item_id))
                continue

            original_id = item['updates'].get('rewrite_of')
            if not original_id:
                logger.warning('Failed {}, original_id not defined'.format(item_id))
                continue

            original = statistics_service.find_one(req=None, _id=original_id)
            if not original:
                logger.warning('Failed {}, original not found'.format(item_id))
                continue

            published_at = original.get('firstpublished')
            if not published_at:
                logger.warning('Failed {}, published_at not defined'.format(original_id))
                continue

            statistics_service.patch(
                original_id,
                {'time_to_next_update_publish': (updated_at - published_at).total_seconds()}
            )