def set_if_not_running(pipe):
     last_updated = pipe.get(key)
     if last_updated:
         last_updated = get_date(str(last_updated))
         delta = last_updated + update_schedule
         if delta < now:
             logger.warn('Overwriting running key for {}:{}'.format(name, id))
             pipe.set(key, date_to_str(now))
             return True
         else:
             logger.warn('Task {}:{} is already running. last_updated={}'.format(name, id, last_updated))
             return False
     else:
         pipe.set(key, date_to_str(now))
         return True
    def _get_archive_items(self, start_date, end_date):
        """
        Gets the archive items from the mongo database that were updated today
        :return:
        """
        query = {
            '$and': [
                {'_updated': {'$gte': date_to_str(start_date), '$lte': date_to_str(end_date)}},
                {ITEM_STATE: {'$in': [CONTENT_STATE.CORRECTED, CONTENT_STATE.PUBLISHED, CONTENT_STATE.KILLED]}}
            ]
        }

        return self._get_items(ARCHIVE, query, '_created',
                               [config.VERSION, 'versioncreated', 'state'],
                               self.__get_key)
Beispiel #3
0
def _prepare_response(resource, dct, last_modified=None, etag=None, status=200):
    """ Prepares the response object according to the client request and
    available renderers, making sure that all accessory directives (caching,
    etag, last-modified) are present.

    :param resource: the resource involved.
    :param dct: the dict that should be sent back as a response.
    :param last_modified: Last-Modified header value.
    :param etag: ETag header value.
    :param status: response status.

    .. versionchanged:: 0.0.5
       Support for Cross-Origin Resource Sharing (CORS).

    .. versionadded:: 0.0.4
    """
    if request.method == "OPTIONS":
        resp = app.make_default_options_response()
    else:
        # obtain the best match between client's request and available mime
        # types, along with the corresponding render function.
        mime, renderer = _best_mime()

        # invoke the render function and obtain the corresponding rendered item
        rendered = globals()[renderer](**dct)

        # build the main wsgi rensponse object
        resp = make_response(rendered, status)
        resp.mimetype = mime

    # cache directives
    if request.method == "GET":
        if resource:
            cache_control = config.DOMAIN[resource]["cache_control"]
            expires = config.DOMAIN[resource]["cache_expires"]
        else:
            cache_control = config.CACHE_CONTROL
            expires = config.CACHE_EXPIRES
        if cache_control:
            resp.headers.add("Cache-Control", cache_control)
        if expires:
            resp.expires = time.time() + expires

    # etag and last-modified
    if etag:
        resp.headers.add("ETag", etag)
    if last_modified:
        resp.headers.add("Last-Modified", date_to_str(last_modified))

    if "Origin" in request.headers and config.X_DOMAINS is not None:
        if isinstance(config.X_DOMAINS, basestring):
            domains = [config.X_DOMAINS]
        else:
            domains = config.X_DOMAINS
        methods = app.make_default_options_response().headers["allow"]
        resp.headers.add("Access-Control-Allow-Origin", ", ".join(domains))
        resp.headers.add("Access-Control-Allow-Methods", methods)
        resp.headers.add("Access-Control-Allow-Max-Age", 21600)

    return resp
Beispiel #4
0
    def xml_dict(cls, data):
        """ Renders a dict as XML.

        :param data: the data stream to be rendered as xml.

        .. versionchanged:: 0.5
           Always return ordered items (#441).

        .. versionchanged:: 0.2
           Leaf values are now properly escaped.

        .. versionadded:: 0.0.3
        """
        xml = ''
        ordered_items = OrderedDict(sorted(data.items()))
        for k, v in ordered_items.items():
            if isinstance(v, datetime.datetime):
                v = date_to_str(v)
            elif isinstance(v, (datetime.time, datetime.date)):
                v = v.isoformat()
            if not isinstance(v, list):
                v = [v]
            for value in v:
                if isinstance(value, dict):
                    links = cls.xml_add_links(value)
                    xml += "<%s>" % k
                    xml += cls.xml_dict(value)
                    xml += links
                    xml += "</%s>" % k
                else:
                    xml += "<%s>%s</%s>" % (k, utils.escape(value), k)
        return xml
Beispiel #5
0
 def set_if_not_running(pipe):
     last_updated = pipe.get(key)
     if last_updated:
         last_updated = get_date(str(last_updated))
         delta = last_updated + update_schedule
         if delta < now:
             logger.warn('Overwritting running key for provider {0}'.format(provider[superdesk.config.ID_FIELD]))
             pipe.set(key, date_to_str(now))
             return True
         else:
             logger.warn('Update ingest already running for provider {0}, last_updated={1}'.
                         format(provider[superdesk.config.ID_FIELD], last_updated))
             return False
     else:
         pipe.set(key, date_to_str(now))
         return True
Beispiel #6
0
    def purge_old_entries(self):
        """
        Purge entries older than the expiry that are not related to archive items
        :return:
        """
        service = superdesk.get_resource_service('audit')
        current_id = None
        logger.info('Starting to purge audit logs of none content items at {}'.format(utcnow()))

        while True:
            lookup = {'$and': [self.not_item_entry_query, {'_updated': {'$lte': date_to_str(self.expiry)}}]}
            if current_id:
                lookup['$and'].append({'_id': {'$gt': current_id}})
            req = ParsedRequest()
            req.sort = '[("_id", 1)]'
            req.projection = '{"_id": 1}'
            req.max_results = 1000
            audits = service.get_from_mongo(req=req, lookup=lookup)
            items = list(item.get('_id') for item in audits)
            if len(items) == 0:
                logger.info('Finished purging audit logs of none content items at {}'.format(utcnow()))
                return
            logger.info('Found {} audit items at {}'.format(len(items), utcnow()))
            current_id = items[len(items) - 1]
            logger.info('Deleting {} old audit items'.format(len(items)))
            service.delete_ids_from_mongo(items)
Beispiel #7
0
    def get_expired_items(self, expiry_datetime, invalid_only=False):
        """Get the expired items.

        Where content state is not scheduled and the item matches given parameters

        :param datetime expiry_datetime: expiry datetime
        :param bool invalid_only: True only invalid items
        :return pymongo.cursor: expired non published items.
        """
        query = {
            '$and': [
                {'expiry': {'$lte': date_to_str(expiry_datetime)}},
                {'$or': [
                    {'task.desk': {'$ne': None}},
                    {ITEM_STATE: CONTENT_STATE.SPIKED, 'task.desk': None}
                ]}
            ]
        }

        if invalid_only:
            query['$and'].append({'expiry_status': 'invalid'})
        else:
            query['$and'].append({'expiry_status': {'$ne': 'invalid'}})

        req = ParsedRequest()
        req.max_results = config.MAX_EXPIRY_QUERY_LIMIT
        req.sort = 'expiry,_created'
        return self.get_from_mongo(req=req, lookup=query)
Beispiel #8
0
    def get_expired_items(self, expiry_datetime, invalid_only=False):
        """Get the expired items.

        Where content state is not scheduled and the item matches given parameters

        :param datetime expiry_datetime: expiry datetime
        :param bool invalid_only: True only invalid items
        :return pymongo.cursor: expired non published items.
        """
        query = {
            '$and': [{
                'expiry': {
                    '$lte': date_to_str(expiry_datetime)
                }
            }, {
                '$or': [{
                    'task.desk': {
                        '$ne': None
                    }
                }, {
                    ITEM_STATE: CONTENT_STATE.SPIKED,
                    'task.desk': None
                }]
            }]
        }

        if invalid_only:
            query['$and'].append({'expiry_status': 'invalid'})
        else:
            query['$and'].append({'expiry_status': {'$ne': 'invalid'}})

        req = ParsedRequest()
        req.max_results = config.MAX_EXPIRY_QUERY_LIMIT
        req.sort = 'expiry,_created'
        return self.get_from_mongo(req=req, lookup=query)
Beispiel #9
0
def prepopulate_data(file_name, default_user=get_default_user()):
    placeholders = {'NOW()': date_to_str(utcnow())}
    users = {default_user['username']: default_user['password']}
    default_username = default_user['username']
    file = os.path.join(superdesk.app.config.get('APP_ABSPATH'), 'apps',
                        'prepopulate', file_name)
    with open(file, 'rt', encoding='utf8') as app_prepopulation:
        json_data = json.load(app_prepopulation)
        for item in json_data:
            resource = item.get('resource', None)
            service = get_resource_service(resource)
            username = item.get('username', None) or default_username
            set_logged_user(username, users[username])
            id_name = item.get('id_name', None)
            id_update = item.get('id_update', None)
            text = json.dumps(item.get('data', None))
            text = apply_placeholders(placeholders, text)
            data = json.loads(text)
            if resource:
                app.data.mongo._mongotize(data, resource)
            if resource == 'users':
                users.update({data['username']: data['password']})
            if id_update:
                id_update = apply_placeholders(placeholders, id_update)
                res = service.patch(ObjectId(id_update), data)
                if not res:
                    raise Exception()
            else:
                ids = service.post([data])
                if not ids:
                    raise Exception()
                if id_name:
                    placeholders[id_name] = str(ids[0])
            if app.config['VERSION'] in data:
                insert_versioning_documents(resource, data)
Beispiel #10
0
    def purge_old_entries(self):
        """
        Purge entries older than the expiry that are not related to archive items
        :return:
        """
        service = superdesk.get_resource_service('audit')
        current_date = None

        while True:
            lookup = {
                '$and': [
                    self.not_item_entry_query, {
                        '_updated': {
                            '$lte': date_to_str(self.expiry)
                        }
                    }
                ]
            }
            if current_date:
                lookup['$and'].append({'_updated': {'$gte': current_date}})
            req = ParsedRequest()
            req.sort = '[("_updated", 1)]'
            req.projection = '{"_id": 1, "_updated": 1}'
            req.max_results = 1000
            audits = service.get_from_mongo(req=req, lookup=lookup)
            if audits.count() == 0:
                break
            items = list([(item['_id'], item['_updated']) for item in audits])
            current_date = items[len(items) - 1][1]
            service.delete({'_id': {'$in': [i[0] for i in items]}})
Beispiel #11
0
    def get_history_items(self, last_id, gte, item_id, chunk_size=0):
        history_service = get_resource_service('archive_history')

        last_processed_id = last_id

        while True:
            req = ParsedRequest()
            req.sort = '[("_id", 1), ("version", 1)]'

            query = {'$and': []}

            if gte:
                query['$and'].append({'_created': {'$gte': date_to_str(gte)}})

            if item_id:
                query['$and'].append({'item_id': str(item_id)})

            if last_processed_id:
                query['$and'].append({'_id': {'$gt': str(last_processed_id)}})

            req.where = json.dumps(query)

            if chunk_size > 0:
                req.max_results = int(chunk_size)

            items = list(history_service.get(req=req, lookup=None))

            if len(items) < 1:
                break

            last_processed_id = items[-1][config.ID_FIELD]
            yield items
Beispiel #12
0
    def purge_orphaned_item_audits(self):
        """
        Purge the audit items that do not have associated entries existing in archive
        :return:
        """
        service = superdesk.get_resource_service('audit')
        current_id = None

        # Scan the audit collection for items to delete
        while True:
            query = deepcopy(self.item_entry_query)
            query['$and'].append(
                {'_updated': {
                    '$lte': date_to_str(self.expiry)
                }})
            if current_id:
                query['$and'].append({'_id': {'$gt': current_id}})
            req = ParsedRequest()
            req.sort = '[("_id", 1)]'
            req.projection = '{"_id": 1, "extra.guid": 1, "extra._id": 1, "extra.item_id": 1, "extra.item": 1}'
            req.max_results = 1000
            audits = service.get_from_mongo(req=req, lookup=query)
            if audits.count() == 0:
                break
            items = list([(item['_id'], self._extract_item_id(item))
                          for item in audits])
            current_id = items[len(items) - 1][0]

            batch_ids = set([i[1] for i in items])
            archive_ids = self._get_archive_ids(batch_ids)
            ids = (batch_ids - archive_ids)
            audit_ids = [i[0] for i in items if i[1] in ids]
            service.delete({'_id': {'$in': audit_ids}})
Beispiel #13
0
def xml_dict(data):
    """ Renders a dict as XML.

    :param data: the data stream to be rendered as xml.

    .. versionadded:: 0.0.3
    """
    xml = ''
    for k, v in data.items():
        if isinstance(v, datetime.datetime):
            v = date_to_str(v)
        elif isinstance(v, (datetime.time, datetime.date)):
            v = v.isoformat()
        if not isinstance(v, list):
            v = [v]
        for value in v:
            if isinstance(value, dict):
                links = xml_add_links(value)
                xml += "<%s>" % k
                xml += xml_dict(value)
                xml += links
                xml += "</%s>" % k
            else:
                xml += "<%s>%s</%s>" % (k, value, k)
    return xml
def prepopulate_data(file_name, default_user=get_default_user()):
    placeholders = {'NOW()': date_to_str(utcnow())}
    users = {default_user['username']: default_user['password']}
    default_username = default_user['username']
    file = os.path.join(superdesk.app.config.get('APP_ABSPATH'), 'apps', 'prepopulate', file_name)
    with open(file, 'rt', encoding='utf8') as app_prepopulation:
        json_data = json.load(app_prepopulation)
        for item in json_data:
            resource = item.get('resource', None)
            service = get_resource_service(resource)
            username = item.get('username', None) or default_username
            set_logged_user(username, users[username])
            id_name = item.get('id_name', None)
            id_update = item.get('id_update', None)
            text = json.dumps(item.get('data', None))
            text = apply_placeholders(placeholders, text)
            data = json.loads(text)
            if resource:
                app.data.mongo._mongotize(data, resource)
            if resource == 'users':
                users.update({data['username']: data['password']})
            if id_update:
                id_update = apply_placeholders(placeholders, id_update)
                res = service.patch(ObjectId(id_update), data)
                if not res:
                    raise Exception()
            else:
                ids = service.post([data])
                if not ids:
                    raise Exception()
                if id_name:
                    placeholders[id_name] = str(ids[0])
            if app.config['VERSION'] in data:
                insert_versioning_documents(resource, data)
Beispiel #15
0
def xml_dict(data):
    """ Renders a dict as XML.

    :param data: the data stream to be rendered as xml.

    .. versionchanged:: 0.5
       Always return ordered items (#441).

    .. versionchanged:: 0.2
       Leaf values are now properly escaped.

    .. versionadded:: 0.0.3
    """
    xml = ''
    ordered_items = OrderedDict(sorted(data.items()))
    for k, v in ordered_items.items():
        if isinstance(v, datetime.datetime):
            v = date_to_str(v)
        elif isinstance(v, (datetime.time, datetime.date)):
            v = v.isoformat()
        if not isinstance(v, list):
            v = [v]
        for value in v:
            if isinstance(value, dict):
                links = xml_add_links(value)
                xml += "<%s>" % k
                xml += xml_dict(value)
                xml += links
                xml += "</%s>" % k
            else:
                xml += "<%s>%s</%s>" % (k, utils.escape(value), k)
    return xml
Beispiel #16
0
    def update_overdue_scheduled(self):
        """
        Updates the overdue scheduled content on archive collection.
        """

        logger.info('Updating overdue scheduled content')

        if is_task_running("archive", "update_overdue_scheduled",
                           UPDATE_OVERDUE_SCHEDULED_DEFAULT):
            return

        try:
            now = date_to_str(utcnow())
            items = get_overdue_scheduled_items(now, ARCHIVE)
            item_update = {ITEM_STATE: CONTENT_STATE.PUBLISHED}

            for item in items:
                logger.info(
                    'updating overdue scheduled article with id {} and headline {} -- expired on: {} now: {}'
                    .format(item[config.ID_FIELD], item['headline'],
                            item['publish_schedule'], now))

                superdesk.get_resource_service(ARCHIVE).patch(
                    item[config.ID_FIELD], item_update)
        finally:
            mark_task_as_not_running("archive", "update_overdue_scheduled")
Beispiel #17
0
def render_xml(**d):
    """ XML render function. This could surely use some further tinkering.
    """
    xml = ''
    for k, v in d.items():
        if isinstance(v, datetime.datetime):
            v = date_to_str(v)
        elif isinstance(v, (datetime.time, datetime.date)):
            v = v.isoformat()
        if type(v) is dict:
            xml += "<%s>" % (k.rstrip('s'))
            xml += render_xml(**v)
            xml += "</%s>" % (k.rstrip('s'))
        else:
            original_list = False
            if type(v) is not list:
                v = [v]
            else:
                original_list = True
                xml += "<%s>" % k
            for value in v:
                if type(value) is dict:
                    xml += "<%s>" % (k.rstrip('s'))
                    xml += render_xml(**value)
                    xml += "</%s>" % (k.rstrip('s'))
                else:
                    xml += "<%s>%s</%s>" % (str(k.rstrip('s')), value,
                                            str(k.rstrip('s')))
            if original_list:
                xml += "</%s>" % k
    return xml
Beispiel #18
0
    def purge_orphaned_item_audits(self):
        """
        Purge the audit items that do not have associated entries existing in archive
        :return:
        """
        service = superdesk.get_resource_service('audit')
        current_id = None
        logger.info('Starting to purge audit logs of content items not in archive at {}'.format(utcnow()))

        # Scan the audit collection for items to delete
        while True:
            query = deepcopy(self.item_entry_query)
            query['$and'].append({'_updated': {'$lte': date_to_str(self.expiry)}})
            if current_id:
                query['$and'].append({'_id': {'$gt': current_id}})
            req = ParsedRequest()
            req.sort = '[("_id", 1)]'
            req.projection = '{"_id": 1, "audit_id":1}'
            req.max_results = 1000
            audits = service.get_from_mongo(req=req, lookup=query)
            items = list([(item['_id'], item['audit_id']) for item in audits])
            if len(items) == 0:
                logger.info('Finished purging audit logs of content items not in archive at {}'.format(utcnow()))
                return
            logger.info('Found {} orphaned audit items at {}'.format(len(items), utcnow()))
            current_id = items[len(items) - 1][0]

            batch_ids = set([i[1] for i in items])
            archive_ids = self._get_archive_ids(batch_ids)
            ids = (batch_ids - archive_ids)
            audit_ids = [i[0] for i in items if i[1] in ids]
            logger.info('Deleting {} orphaned audit items at {}'.format(len(audit_ids), utcnow()))
            service.delete_ids_from_mongo(audit_ids)
    def test_query_getting_expired_content(self):
        with self.app.app_context():
            self.app.data.insert(ARCHIVE, [{
                'expiry': get_expiry_date(-10),
                'state': 'spiked'
            }])
            self.app.data.insert(ARCHIVE, [{
                'expiry': get_expiry_date(0),
                'state': 'spiked'
            }])
            self.app.data.insert(ARCHIVE, [{
                'expiry': get_expiry_date(10),
                'state': 'spiked'
            }])
            self.app.data.insert(ARCHIVE, [{
                'expiry': get_expiry_date(20),
                'state': 'spiked'
            }])
            self.app.data.insert(ARCHIVE, [{
                'expiry': get_expiry_date(30),
                'state': 'spiked'
            }])
            self.app.data.insert(ARCHIVE, [{
                'expiry': None,
                'state': 'spiked'
            }])
            self.app.data.insert(ARCHIVE, [{
                'unique_id': 97,
                'state': 'spiked'
            }])

            now = date_to_str(utcnow())
            expired_items = RemoveExpiredSpikeContent().get_expired_items(now)
            self.assertEquals(2, expired_items.count())
Beispiel #20
0
    def get_expired_items(self, expiry_datetime):
        """
        Get the expired items where content state is not scheduled
        and
        :param datetime expiry_datetime: expiry datetime
        :return pymongo.cursor: expired non published items.
        """
        query = {
            '$and': [{
                'expiry': {
                    '$lte': date_to_str(expiry_datetime)
                }
            }, {
                '$or': [{
                    'task.desk': {
                        '$ne': None
                    }
                }, {
                    ITEM_STATE: CONTENT_STATE.SPIKED,
                    'task.desk': None
                }]
            }]
        }

        req = ParsedRequest()
        req.max_results = config.MAX_EXPIRY_QUERY_LIMIT
        req.sort = 'expiry,_created'
        return self.get_from_mongo(req=req, lookup=query)
    def test_query_getting_overdue_scheduled_content(self):
        with self.app.app_context():
            self.app.data.insert(ARCHIVE,
                                 [{
                                     'publish_schedule': get_expiry_date(-10),
                                     'state': 'published'
                                 }])
            self.app.data.insert(ARCHIVE,
                                 [{
                                     'publish_schedule': get_expiry_date(-10),
                                     'state': 'scheduled'
                                 }])
            self.app.data.insert(ARCHIVE,
                                 [{
                                     'publish_schedule': get_expiry_date(0),
                                     'state': 'spiked'
                                 }])
            self.app.data.insert(ARCHIVE,
                                 [{
                                     'publish_schedule': get_expiry_date(10),
                                     'state': 'scheduled'
                                 }])
            self.app.data.insert(ARCHIVE, [{
                'unique_id': 97,
                'state': 'spiked'
            }])

            now = date_to_str(utcnow())
            overdueItems = get_overdue_scheduled_items(now, 'archive')
            self.assertEquals(1, overdueItems.count())
Beispiel #22
0
    def update_overdue_scheduled(self):
        """
        Updates the overdue scheduled content on published collection.
        """

        logger.info("Updating overdue scheduled content")

        if is_task_running("publish", "update_overdue_scheduled", UPDATE_OVERDUE_SCHEDULED_DEFAULT):
            return

        try:
            now = date_to_str(utcnow())
            items = get_overdue_scheduled_items(now, "published")

            for item in items:
                logger.info(
                    "updating overdue scheduled article with id {} and headline {} -- expired on: {} now: {}".format(
                        item[config.ID_FIELD], item["headline"], item["publish_schedule"], now
                    )
                )

                superdesk.get_resource_service("published").update_published_items(
                    item["item_id"], ITEM_STATE, CONTENT_STATE.PUBLISHED
                )
        finally:
            mark_task_as_not_running("publish", "update_overdue_scheduled")
Beispiel #23
0
def xml_dict(data):
    """ Renders a dict as XML.

    :param data: the data stream to be rendered as xml.

    .. versionadded:: 0.0.3
    """
    xml = ''
    for k, v in data.items():
        if isinstance(v, datetime.datetime):
            v = date_to_str(v)
        elif isinstance(v, (datetime.time, datetime.date)):
            v = v.isoformat()
        if not isinstance(v, list):
            v = [v]
        for value in v:
            if isinstance(value, dict):
                links = xml_add_links(value)
                xml += "<%s>" % k
                xml += xml_dict(value)
                xml += links
                xml += "</%s>" % k
            else:
                xml += "<%s>%s</%s>" % (k, value, k)
    return xml
    def get_items(self, now):
        """Get the items from the archive collection that have expiry in future
        and state is published, corrected, killed

        :param datetime now: current date time
        :return list: list of expired items
        """
        logger.info('Fetching expired items from archive collection.')
        now = now + timedelta(minutes=self.expiry_minutes)

        query = {
            'expiry': {
                '$gte': date_to_str(now)
            },
            ITEM_STATE: {
                '$in': [
                    CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED,
                    CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED
                ]
            }
        }

        req = ParsedRequest()
        req.sort = '[("unique_id", 1)]'
        req.where = json.dumps(query)
        cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req,
                                                              lookup=None)
        count = cursor.count()
        no_of_pages = 0
        if count:
            no_of_pages = len(range(0, count, self.default_page_size))
            unique_id = cursor[0]['unique_id']
            logger.info('Number of items to modify: {}, pages={}'.format(
                count, no_of_pages))
        else:
            logger.info('No items to modify.')

        for page in range(0, no_of_pages):
            logger.info(
                'Fetching items for page number: {} unique_id: {}'.format(
                    (page + 1), unique_id))
            req = ParsedRequest()
            req.sort = '[("unique_id", 1)]'
            if page == 0:
                query['unique_id'] = {'$gte': unique_id}
            else:
                query['unique_id'] = {'$gt': unique_id}

            req.where = json.dumps(query)
            req.max_results = self.default_page_size
            cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req,
                                                                  lookup=None)
            items = list(cursor)
            if len(items) > 0:
                unique_id = items[len(items) - 1]['unique_id']

            logger.info('Fetched No. of Items: {} for page: {}'.format(
                len(items), (page + 1)))
            yield items
 def remove_expired_sessions(self):
     expiry_minutes = app.settings['SESSION_EXPIRY_MINUTES']
     expiration_time = utcnow() - timedelta(minutes=expiry_minutes)
     logger.info('Deleting session not updated since {}'.format(expiration_time))
     query = {'_updated': {'$lte': date_to_str(expiration_time)}}
     sessions = get_resource_service('auth').get(req=None, lookup=query)
     for session in sessions:
         get_resource_service('auth').delete_action({'_id': str(session['_id'])})
 def remove_expired_sessions(self):
     expiry_minutes = app.settings["SESSION_EXPIRY_MINUTES"]
     expiration_time = utcnow() - timedelta(minutes=expiry_minutes)
     logger.info("Deleting session not updated since {}".format(expiration_time))
     query = {"_updated": {"$lte": date_to_str(expiration_time)}}
     sessions = get_resource_service("auth").get(req=None, lookup=query)
     for session in sessions:
         get_resource_service("auth").delete_action({"_id": str(session["_id"])})
def get_query_for_expired_items(provider_id, expiration_date):
    query = {'and':
             [
                 {'term': {'ingest.ingest_provider': provider_id}},
                 {'range': {'ingest.versioncreated': {'lte': date_to_str(expiration_date)}}},
             ]
             }
    return superdesk.json.dumps(query)
Beispiel #28
0
 def remove_expired_content(self):
     logger.info('Removing expired content')
     now = date_to_str(utcnow())
     items = self.get_expired_items(now)
     while items.count() > 0:
         for item in items:
             logger.info('deleting {} expiry: {} now:{}'.format(item['_id'], item['expiry'], now))
             superdesk.get_resource_service('archive').delete_action({'_id': str(item['_id'])})
         items = self.get_expired_items(now)
Beispiel #29
0
 def remove_expired_spiked(self):
     logger.info('Expiring spiked content')
     now = date_to_str(utcnow())
     items = self.get_expired_items(now)
     while items.count() > 0:
         for item in items:
             logger.info('deleting {} expiry: {} now:{}'.format(item['_id'], item['expiry'], now))
             superdesk.get_resource_service('archive').delete_action({'_id': str(item['_id'])})
         items = self.get_expired_items(now)
Beispiel #30
0
 def remove_expired_content(self):
     logger.info("Removing expired content")
     now = date_to_str(utcnow())
     items = self.get_expired_items(now)
     while items.count() > 0:
         for item in items:
             logger.info("deleting {} expiry: {} now:{}".format(item["_id"], item["expiry"], now))
             superdesk.get_resource_service("archive").delete_action({"_id": str(item["_id"])})
         items = self.get_expired_items(now)
Beispiel #31
0
 def default(self, obj):
     if isinstance(obj, datetime.datetime):
         # convert any datetime to RFC 1123 format
         return date_to_str(obj)
     elif isinstance(obj, (datetime.time, datetime.date)):
         # should not happen since the only supported date-like format
         # supported at dmain schema level is 'datetime' .
         return obj.isoformat()
     return json.JSONEncoder.default(self, obj)
 def remove_expired_sessions(self):
     expiry_minutes = app.settings['SESSION_EXPIRY_MINUTES']
     expiration_time = utcnow() - timedelta(minutes=expiry_minutes)
     logger.info('Deleting session not updated since {}'.format(expiration_time))
     query = {'_updated': {'$lte': date_to_str(expiration_time)}}
     sessions = get_resource_service('auth').get(req=None, lookup=query)
     for session in sessions:
         get_resource_service('auth').delete_action({'_id': str(session['_id'])})
     self._update_online_users()
Beispiel #33
0
 def default(self, obj):
     if isinstance(obj, datetime.datetime):
         # convert any datetime to RFC 1123 format
         return date_to_str(obj)
     elif isinstance(obj, (datetime.time, datetime.date)):
         # should not happen since the only supported date-like format
         # supported at dmain schema level is 'datetime' .
         return obj.isoformat()
     return json.JSONEncoder.default(self, obj)
Beispiel #34
0
 def set_if_not_running(pipe):
     last_updated = pipe.get(key)
     if last_updated:
         last_updated = get_date(str(last_updated))
         delta = last_updated + update_schedule
         if delta < now:
             logger.warn('Overwriting running key for {}:{}'.format(
                 name, id))
             pipe.set(key, date_to_str(now))
             return True
         else:
             logger.warn(
                 'Task {}:{} is already running. last_updated={}'.format(
                     name, id, last_updated))
             return False
     else:
         pipe.set(key, date_to_str(now))
         return True
Beispiel #35
0
    def test_query_getting_overdue_scheduled_content(self):
        self.app.data.insert(ARCHIVE, [{'publish_schedule': get_expiry_date(-10), 'state': 'published'}])
        self.app.data.insert(ARCHIVE, [{'publish_schedule': get_expiry_date(-10), 'state': 'scheduled'}])
        self.app.data.insert(ARCHIVE, [{'publish_schedule': get_expiry_date(0), 'state': 'spiked'}])
        self.app.data.insert(ARCHIVE, [{'publish_schedule': get_expiry_date(10), 'state': 'scheduled'}])
        self.app.data.insert(ARCHIVE, [{'unique_id': 97, 'state': 'spiked'}])

        now = date_to_str(utcnow())
        overdueItems = get_overdue_scheduled_items(now, 'archive')
        self.assertEquals(1, overdueItems.count())
Beispiel #36
0
    def remove_expired_items(self):
        """ Removes the expired items from the database """
        now = date_to_str(utcnow())
        items = self.get_expired_items(now)

        for item in items:
            logger.info('deleting article of type {} with id {} and headline {} -- expired on: {} now: {}'.
                        format(item['type'], item[config.ID_FIELD], item['headline'], item['expiry'], now))

            superdesk.get_resource_service('published').remove_expired(item)
    def _bulk_find(resource, args):

        find_args = {
          'filter': process_where(args.get('where')),
          'projection': args.get('projection'),
        }
        try:
            limit = max(int(args['max_results']), 0)
        except (ValueError, KeyError):
            limit = 0
        if limit:
            find_args['limit'] = limit

        try:
            page = max(int(args['page']), 1)
        except (ValueError, KeyError):
            page = 1
        if limit and page > 1:
            find_args['skip'] = limit*(page-1)

        try:
            find_args['sort'] = [(args['sort_by'], args['sort_order'])]
        except:
            find_args['sort'] = None

        cursor = app.data.driver.db[resource].find(**find_args)
        total = cursor.count()
        pages = ((total + limit-1) // limit) if limit else 1
        items = list(cursor)

        meta = {'max_results': limit, 'total': total, 'page': page}

        links = {}
        if page > 1:
            links['prev'] = {'page': (page-1)}
        if page < pages:
            links['next'] = {'page': (page+1)}
            links['last'] = {'page': pages}

        for item in items:
            for k, v in item.iteritems():
                if isinstance(v, datetime):
                    # date_to_str converts a datetime value to the format defined in the
                    #   configuration file
                    item[k] = date_to_str(v)
                if isinstance(v, unicode):
                    item[k] = str(v)

        msg = {'_items': items, '_meta': meta, '_links': links}

        msg_json = json.dumps(msg, default=json_util.default)

        return Response(response=msg_json,
                        status=200,
                        mimetype="application/json")
Beispiel #38
0
 def remove_expired_sessions(self):
     auth_service = get_resource_service("auth")
     expiry_minutes = app.settings["SESSION_EXPIRY_MINUTES"]
     expiration_time = utcnow() - timedelta(minutes=expiry_minutes)
     logger.info(
         "Deleting session not updated since {}".format(expiration_time))
     query = {"_updated": {"$lte": date_to_str(expiration_time)}}
     sessions = auth_service.get(req=None, lookup=query)
     for session in sessions:
         auth_service.delete({"_id": str(session["_id"])})
     self._update_online_users()
Beispiel #39
0
    def test_query_getting_overdue_scheduled_content(self):
        with self.app.app_context():
            self.app.data.insert(ARCHIVE, [{"publish_schedule": get_expiry_date(-10), "state": "published"}])
            self.app.data.insert(ARCHIVE, [{"publish_schedule": get_expiry_date(-10), "state": "scheduled"}])
            self.app.data.insert(ARCHIVE, [{"publish_schedule": get_expiry_date(0), "state": "spiked"}])
            self.app.data.insert(ARCHIVE, [{"publish_schedule": get_expiry_date(10), "state": "scheduled"}])
            self.app.data.insert(ARCHIVE, [{"unique_id": 97, "state": "spiked"}])

            now = date_to_str(utcnow())
            overdueItems = get_overdue_scheduled_items(now, "archive")
            self.assertEquals(1, overdueItems.count())
Beispiel #40
0
    def test_getitem_ifmatch_disabled_if_mod_since(self):
        # Test that #239 is fixed.
        # IF_MATCH is disabled and If-Modified-Since request comes through. If
        # a 304 was expected, we would crash like a mofo.
        self.app.config['IF_MATCH'] = False

        # IMS needs to see as recent as possible since the test db has just
        # been built
        header = [("If-Modified-Since", date_to_str(datetime.now()))]

        r = self.test_client.get(self.item_id_url, headers=header)
        self.assert304(r.status_code)
Beispiel #41
0
    def remove_expired_content(self):
        logger.info("Removing expired content if spiked")

        now = date_to_str(utcnow())
        items = self.get_expired_items(now)

        while items.count() > 0:
            for item in items:
                logger.info("deleting {} expiry: {} now:{}".format(item["_id"], item["expiry"], now))
                superdesk.get_resource_service(ARCHIVE).remove_expired(item)

            items = self.get_expired_items(now)
Beispiel #42
0
    def remove_expired_items(self):
        """ Removes the expired items from the database """
        now = date_to_str(utcnow())
        items = self.get_expired_items(now)

        for item in items:
            logger.info(
                'deleting article of type {} with id {} and headline {} -- expired on: {} now: {}'
                .format(item['type'], item[config.ID_FIELD], item['headline'],
                        item['expiry'], now))

            superdesk.get_resource_service('published').remove_expired(item)
Beispiel #43
0
    def get_expired_items(self, expiry_datetime):
        """Get the expired items

        Where end date is in the past
        """
        query = {
            'query': {
                'bool': {
                    'must_not': [{
                        'term': {
                            'expired': True
                        }
                    }]
                }
            },
            'filter': {
                'range': {
                    'dates.end': {
                        'lte': date_to_str(expiry_datetime)
                    }
                }
            },
            'sort': [{
                'dates.start': 'asc'
            }],
            'size': get_max_recurrent_events()
        }

        total_received = 0
        total_events = -1

        while True:
            query["from"] = total_received

            results = self.search(query)

            # If the total_events has not been set, then this is the first query
            # In which case we need to store the total hits from the search
            if total_events < 0:
                total_events = results.count()

                # If the search doesn't contain any results, return here
                if total_events < 1:
                    break

            # If the last query doesn't contain any results, return here
            if not len(results.docs):
                break

            total_received += len(results.docs)

            # Yield the results for iteration by the callee
            yield list(results.docs)
Beispiel #44
0
    def remove_expired_content(self):
        logger.info('Removing expired content from published')
        now = date_to_str(utcnow())
        items = self.get_expired_items(now)

        while items.count() > 0:
            for item in items:
                logger.info('deleting article of type {} with id {} and headline {} -- expired on: {} now: {}'.
                            format(item['type'], item['_id'], item['headline'], item['expiry'], now))

                superdesk.get_resource_service('published').remove_expired(item)

            items = self.get_expired_items(now)
Beispiel #45
0
    def test_query_getting_expired_content(self):
        with self.app.app_context():
            self.app.data.insert(ARCHIVE, [{"expiry": get_expiry_date(-10), "state": "spiked"}])
            self.app.data.insert(ARCHIVE, [{"expiry": get_expiry_date(0), "state": "spiked"}])
            self.app.data.insert(ARCHIVE, [{"expiry": get_expiry_date(10), "state": "spiked"}])
            self.app.data.insert(ARCHIVE, [{"expiry": get_expiry_date(20), "state": "spiked"}])
            self.app.data.insert(ARCHIVE, [{"expiry": get_expiry_date(30), "state": "spiked"}])
            self.app.data.insert(ARCHIVE, [{"expiry": None, "state": "spiked"}])
            self.app.data.insert(ARCHIVE, [{"unique_id": 97, "state": "spiked"}])

            now = date_to_str(utcnow())
            expired_items = RemoveExpiredSpikeContent().get_expired_items(now)
            self.assertEquals(2, expired_items.count())
Beispiel #46
0
    def remove_expired_items(self):
        """ Removes the expired items from the database """
        now = date_to_str(utcnow())
        items = self.get_expired_items(now)

        for item in items:
            logger.info(
                'Removing article {{id: {}, version: {}, type: {}, headline: {}, expired_on: {} }}'
                .format(item[config.ID_FIELD],
                        item[config.VERSION], item[ITEM_TYPE],
                        item.get('headline', ''), item['expiry']))

            superdesk.get_resource_service('published').remove_expired(item)
    def test_query_getting_expired_content(self):
        with self.app.app_context():
            self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(-10), 'state': 'spiked'}])
            self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(0), 'state': 'spiked'}])
            self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(10), 'state': 'spiked'}])
            self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(20), 'state': 'spiked'}])
            self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(30), 'state': 'spiked'}])
            self.app.data.insert(ARCHIVE, [{'expiry': None, 'state': 'spiked'}])
            self.app.data.insert(ARCHIVE, [{'unique_id': 97, 'state': 'spiked'}])

            now = date_to_str(utcnow())
            expiredItems = RemoveExpiredSpikeContent().get_expired_items(now)
            self.assertEquals(2, expiredItems.count())
    def get_items(self, now):
        """Get the items from the archive collection that have expiry in future
        and state is published, corrected, killed

        :param datetime now: current date time
        :return list: list of expired items
        """
        logger.info('Fetching expired items from archive collection.')
        now = now + timedelta(minutes=self.expiry_minutes)

        query = {
            'expiry': {'$gte': date_to_str(now)},
            ITEM_STATE: {'$in': [
                CONTENT_STATE.PUBLISHED,
                CONTENT_STATE.CORRECTED,
                CONTENT_STATE.KILLED,
                CONTENT_STATE.RECALLED
            ]}
        }

        req = ParsedRequest()
        req.sort = '[("unique_id", 1)]'
        req.where = json.dumps(query)
        cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req, lookup=None)
        count = cursor.count()
        no_of_pages = 0
        if count:
            no_of_pages = len(range(0, count, self.default_page_size))
            unique_id = cursor[0]['unique_id']
            logger.info('Number of items to modify: {}, pages={}'.format(count, no_of_pages))
        else:
            logger.info('No items to modify.')

        for page in range(0, no_of_pages):
            logger.info('Fetching items for page number: {} unique_id: {}'. format((page + 1), unique_id))
            req = ParsedRequest()
            req.sort = '[("unique_id", 1)]'
            if page == 0:
                query['unique_id'] = {'$gte': unique_id}
            else:
                query['unique_id'] = {'$gt': unique_id}

            req.where = json.dumps(query)
            req.max_results = self.default_page_size
            cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req, lookup=None)
            items = list(cursor)
            if len(items) > 0:
                unique_id = items[len(items) - 1]['unique_id']

            logger.info('Fetched No. of Items: {} for page: {}'.format(len(items), (page + 1)))
            yield items
Beispiel #49
0
def get_query_for_expired_items(provider_id, expiration_date):
    query = {'bool':
             {
                 'must': [
                     {
                         'range': {'ingest._updated': {'lte': date_to_str(expiration_date)}}
                     },
                     {
                         'term': {'ingest.ingest_provider': provider_id}
                     }
                 ]
             }
             }
    return superdesk.json.dumps(query)
Beispiel #50
0
    def get_expired_items(self,
                          expiry_datetime=None,
                          expiry_days=None,
                          max_results=None,
                          include_children=True):
        """Get the expired items.

        Returns a generator for the list of expired items, sorting by `_id` and returning `max_results` per iteration.

        :param datetime expiry_datetime: Expiry date/time used to retrieve the list of items, defaults to `utcnow()`
        :param int expiry_days: Number of days content expires, defaults to `CONTENT_API_EXPIRY_DAYS`
        :param int max_results: Maximum results to retrieve per iteration, defaults to `MAX_EXPIRY_QUERY_LIMIT`
        :param boolean include_children: Include only root item if False, otherwise include the entire item chain
        :return list: expired content_api items
        """

        if expiry_datetime is None:
            expiry_datetime = utcnow()

        if expiry_days is None:
            expiry_days = app.settings["CONTENT_API_EXPIRY_DAYS"]

        if max_results is None:
            max_results = app.settings["MAX_EXPIRY_QUERY_LIMIT"]

        last_id = None
        expire_at = date_to_str(expiry_datetime - timedelta(days=expiry_days))

        while True:
            query = {"$and": [{"_updated": {"$lte": expire_at}}]}

            if last_id is not None:
                query["$and"].append({"_id": {"$gt": last_id}})

            if not include_children:
                query["$and"].append({"ancestors": {"$exists": False}})

            req = ParsedRequest()
            req.sort = "_id"
            req.where = json.dumps(query)
            req.max_results = max_results

            items = list(self.get_from_mongo(req=req, lookup=None))

            if not items:
                break

            last_id = items[-1]["_id"]
            yield items
def get_date_params(params: Dict[str, Any]):
    date_filter = (params.get('date_filter') or '').strip().lower()
    tz_offset = get_time_zone(params)

    try:
        start_date = params.get('start_date')
        if start_date:
            if isinstance(start_date, str):
                if not start_date.endswith('+0000'):
                    params['start_date'] += '+0000'
                    start_date = params['start_date']

                str_to_date(
                    params['start_date'])  # validating if date can be parsed
            elif isinstance(start_date, datetime):
                start_date = date_to_str(start_date)
    except Exception as e:
        logger.exception(e)
        raise SuperdeskApiError.badRequestError('Invalid value for start date')

    try:
        end_date = params.get('end_date')
        if end_date:
            if isinstance(end_date, str):
                if not end_date.endswith('+0000'):
                    params['end_date'] += '+0000'
                    end_date = params['end_date']
                str_to_date(
                    params['end_date'])  # validating if date can be parsed
            elif isinstance(end_date, datetime):
                end_date = date_to_str(end_date)
    except Exception as e:
        logger.exception(e)
        raise SuperdeskApiError.badRequestError('Invalid value for end date')

    return date_filter, start_date, end_date, tz_offset
Beispiel #52
0
    def get_expired_items(self, expiry_datetime, invalid_only=False):
        """Get the expired items.

        Where content state is not scheduled and the item matches given parameters

        :param datetime expiry_datetime: expiry datetime
        :param bool invalid_only: True only invalid items
        :return pymongo.cursor: expired non published items.
        """
        unique_id = 0

        while True:
            req = ParsedRequest()
            req.sort = 'unique_id'
            query = {
                '$and': [{
                    'expiry': {
                        '$lte': date_to_str(expiry_datetime)
                    }
                }, {
                    '$or': [{
                        'task.desk': {
                            '$ne': None
                        }
                    }, {
                        ITEM_STATE: CONTENT_STATE.SPIKED,
                        'task.desk': None
                    }]
                }]
            }

            query['$and'].append({'unique_id': {'$gt': unique_id}})

            if invalid_only:
                query['$and'].append({'expiry_status': 'invalid'})
            else:
                query['$and'].append({'expiry_status': {'$ne': 'invalid'}})

            req.where = json.dumps(query)

            req.max_results = config.MAX_EXPIRY_QUERY_LIMIT
            items = list(self.get_from_mongo(req=req, lookup=None))

            if not len(items):
                break

            unique_id = items[-1]['unique_id']
            yield items
Beispiel #53
0
    def remove_expired_items(self):
        """ Removes the expired items from the database """
        now = date_to_str(utcnow())
        items = self.get_expired_items(now)

        for item in items:
            logger.info(
                "Removing article {{id: {}, version: {}, type: {}, headline: {}, expired_on: {} }}".format(
                    item[config.ID_FIELD],
                    item[config.VERSION],
                    item[ITEM_TYPE],
                    item.get("headline", ""),
                    item["expiry"],
                )
            )

            superdesk.get_resource_service("published").remove_expired(item)
Beispiel #54
0
    def get_expired_items(self, expiry_datetime=None, expiry_days=None, max_results=None, include_children=True):
        """Get the expired items.

        Returns a generator for the list of expired items, sorting by `_id` and returning `max_results` per iteration.

        :param datetime expiry_datetime: Expiry date/time used to retrieve the list of items, defaults to `utcnow()`
        :param int expiry_days: Number of days content expires, defaults to `CONTENT_API_EXPIRY_DAYS`
        :param int max_results: Maximum results to retrieve per iteration, defaults to `MAX_EXPIRY_QUERY_LIMIT`
        :param boolean include_children: Include only root item if False, otherwise include the entire item chain
        :return list: expired content_api items
        """

        if expiry_datetime is None:
            expiry_datetime = utcnow()

        if expiry_days is None:
            expiry_days = app.settings['CONTENT_API_EXPIRY_DAYS']

        if max_results is None:
            max_results = app.settings['MAX_EXPIRY_QUERY_LIMIT']

        last_id = None
        expire_at = date_to_str(expiry_datetime - timedelta(days=expiry_days))

        while True:
            query = {'$and': [{'_updated': {'$lte': expire_at}}]}

            if last_id is not None:
                query['$and'].append({'_id': {'$gt': last_id}})

            if not include_children:
                query['$and'].append({'ancestors': {'$exists': False}})

            req = ParsedRequest()
            req.sort = '_id'
            req.where = json.dumps(query)
            req.max_results = max_results

            items = list(self.get_from_mongo(req=req, lookup=None))

            if not items:
                break

            last_id = items[-1]['_id']
            yield items
Beispiel #55
0
def _prepare_response(resource, dct, last_modified=None, etag=None,
                      status=200):
    """ Prepares the response object according to the client request and
    available renderers, making sure that all accessory directives (caching,
    etag, last-modified) are present.

    :param resource: the resource involved.
    :param dct: the dict that should be sent back as a response.
    :param last_modified: Last-Modified header value.
    :param etag: ETag header value.
    :param status: response status.

    .. versionadded:: 0.0.4
    """
    # obtain the best match between client's request and available mime types,
    # along with the corresponding render function.
    mime, renderer = _best_mime()

    # invoke the render function and obtain the corresponding rendered item
    rendered = globals()[renderer](**dct)

    # build the main wsgi rensponse object
    resp = make_response(rendered, status)
    resp.mimetype = mime

    # cache directives
    if request.method == 'GET':
        if resource:
            cache_control = config.DOMAIN[resource]['cache_control']
            expires = config.DOMAIN[resource]['cache_expires']
        else:
            cache_control = config.CACHE_CONTROL
            expires = config.CACHE_EXPIRES
        if cache_control:
            resp.headers.add('Cache-Control', cache_control)
        if expires:
            resp.expires = time.time() + expires

    # etag and last-modified
    if etag:
        resp.headers.add('ETag', etag)
    if last_modified:
        resp.headers.add('Last-Modified', date_to_str(last_modified))

    return resp
Beispiel #56
0
def prepopulate_data(file_name):
    placeholders = {'NOW()': date_to_str(utcnow())}
    file = os.path.join(superdesk.app.config.get('APP_ABSPATH'), 'prepopulate', 'prepopulate-data', file_name)
    with open_with_report(file, 'rt', encoding='utf8') as app_prepopulation:
        json_data = json.load(app_prepopulation)
        for item in json_data:
            service = get_resource_service(item.get('resource', None))
            id_name = item.get('id_name', None)
            text = json.dumps(item.get('data', None))
            text = apply_placeholders(placeholders, text)
            data = json.loads(text)
            if item.get('resource'):
                app.data.mongo._mongotize(data, item.get('resource'))
            ids = service.post([data])
            if not ids:
                raise Exception()
            if id_name:
                placeholders[id_name] = str(ids[0])