Exemple #1
0
def openaire_direct_index(record_uuid):
    """Send record for direct indexing at OpenAIRE.

    :param record_uuid: Record Metadata UUID.
    :type record_uuid: str
    """
    try:
        record = ZenodoRecord.get_record(record_uuid)

        # Bail out if not an OpenAIRE record.
        if not (is_openaire_publication(record)
                or is_openaire_dataset(record)):
            return

        data = openaire_json_v1.serialize(record.pid, record)
        url = '{}/api/results/feedObject'.format(
            current_app.config['OPENAIRE_API_URL'])
        req = _openaire_request_factory()
        res = req.post(url, data=data)
        if not res.ok:
            raise OpenAIRERequestError(res.text)
        else:
            recid = record.get('recid')
            current_cache.delete('openaire_direct_index:{}'.format(recid))
    except Exception as exc:
        recid = record.get('recid')
        current_cache.set('openaire_direct_index:{}'.format(recid),
                          datetime.now(),
                          timeout=-1)
        openaire_direct_index.retry(exc=exc)
Exemple #2
0
def load_restricted_collections():
    restricted_collections = current_cache.get('restricted_collections')
    if restricted_collections:
        return restricted_collections
    else:
        restricted_collections = set(
            [
                a.argument for a in ActionUsers.query.filter_by(
                    action='view-restricted-collection').all()
            ]
        )
        restricted_collections = restricted_collections | set(
            [
                a.argument for a in ActionRoles.query.filter_by(
                    action='view-restricted-collection').all()
            ]
        )
        if restricted_collections:
            current_cache.set(
                'restricted_collections',
                restricted_collections,
                timeout=current_app.config.get(
                    'INSPIRE_COLLECTIONS_RESTRICTED_CACHE_TIMEOUT', 120)
            )
        return restricted_collections
Exemple #3
0
 def decorated(*args, **kwargs):
     key = f'{prefix}:{key_func(f, *args, **kwargs)}'
     res = current_cache.get(key)
     if not res:
         res = f(*args, **kwargs)
         current_cache.set(key, res, timeout=timeout)
     return res
Exemple #4
0
def _get_from_cache(cache_key, generator, force_update=False):
    """Retrieve specified key from cache.
    If key is missing or is empty or if `force_update` is set to `True`
        update cache with generator and return cached data.

    Args:
        cache_key: name of the key where cache is stored
        generator: partial to generate data for specified key if key is empty
            or when `force_update` is `True`.
        force_update: when `True` updates cache key then return cached data.

    Returns:
        Whatever was under `cache_key` key in cache.

    """
    data = current_cache.get(cache_key)
    if not data or force_update:
        new_data = generator()
        current_cache.set(
            cache_key,
            new_data,
            timeout=current_app.config.get("RT_USERS_CACHE_TIMEOUT", 86400),
        )
        return new_data
    return data
def test_cache_ui():
    with_update = request.args.get('update', '0')
    with_delete = request.args.get('delete', '0')
    with_update = True if int(with_update) else False
    with_delete = True if int(with_delete) else False

    if with_update:
        if with_delete:
            current_cache.delete('test_key')
        current_cache.set('test_key', ObjDict(**{
            'device_id': 12345678,
            'cache_time': datetime.today()
        }), timeout=60)

    test_value = current_cache.get('test_key')
    if not test_value:
        cache_rtn = current_cache.set('test_key', ObjDict(**{
            'device_id': 12345678,
            'cache_time': datetime.today()
        }), timeout=60)
        if not cache_rtn:
            return jsonify({
                'code': -1,
                'msg': 'cache set error'
            })
        test_value = current_cache.get('test_key')
    return jsonify({
        'code': 0,
        'msg': 'success',
        'data': {
            'dev_id': test_value.device_id,
            'store_time': test_value.cache_time.strftime('%Y-%m-%d %H:%M:%S')
        }
    })
Exemple #6
0
    def query_cuor_by_uuid(cls, uuid):
        """"""
        try:
            cache = current_cache.get(
                "query_cuor_by_pid:{0}".format(uuid)) or {}
            if "date" not in cache:
                cache["date"] = datetime.now()
            if datetime.now() - cache["date"] < timedelta(
                    days=1) and "org" in cache:
                return cache["org"]

            api_endpoint = current_app.config['CUOR_API_ENDPOINT']
            session = requests.Session()
            url = api_endpoint + '/' + uuid
            response = session.get(url, verify=False)
            result = json.loads(response.text)

            cache["org"] = result
            cache["date"] = datetime.now()
            current_cache.set("query_cuor_by_pid:{0}".format(uuid),
                              cache,
                              timeout=-1)

            return result
        except Exception:
            print(traceback.format_exc())
            return None
    def read_all(self, identity, fields, cache=True, **kwargs):
        """Search for records matching the querystring."""
        cache_key = "-".join(fields)
        results = current_cache.get(cache_key)
        es_query = Q("match_all")

        if not results:
            results = self._read_many(
                identity, es_query, fields, **kwargs)
            if cache:
                # ES DSL Response is not pickable.
                # If saved in cache serialization wont work with to_dict()
                current_cache.set(cache_key, results.to_dict())

        else:
            search = self.create_search(
                identity=identity,
                record_cls=self.record_cls,
                search_opts=self.config.search,
                permission_action='search',
            ).query(es_query)

            results = Response(search, results)

        return self.result_list(self, identity, results)
Exemple #8
0
def schedule_update_datacite_metadata(max_count):
    """Schedule the update of DataCite metadata."""
    task_details = current_cache.get('update_datacite:task_details')

    if task_details is None or 'from_date' not in task_details or 'until_date' not in task_details:
        return

    doi_pids = find_registered_doi_pids(task_details['from_date'],
                                        task_details['until_date'],
                                        current_app.config['ZENODO_LOCAL_DOI_PREFIXES'])
    dois_count = doi_pids.count()

    task_details['left_pids'] = dois_count
    task_details['last_update'] = datetime.utcnow()
    current_cache.set('update_datacite:task_details', task_details, timeout=-1)

    if dois_count == 0:
        if 'finish_date' not in task_details:
            task_details['finish_date'] = datetime.utcnow()
            current_cache.set('update_datacite:task_details', task_details, timeout=-1)
        return

    scheduled_dois_count = max_count if max_count < dois_count else dois_count
    scheduled_dois_pids = doi_pids.limit(scheduled_dois_count)

    for doi_pid in scheduled_dois_pids:
        update_datacite_metadata.delay(doi_pid.pid_value,
                                       str(doi_pid.object_uuid),
                                       task_details['job_id'])
def load_restricted_collections():
    restricted_collections = current_cache.get('restricted_collections')
    if restricted_collections:
        return restricted_collections
    else:
        restricted_collections = set(
            [
                a.argument for a in ActionUsers.query.filter_by(
                    action='view-restricted-collection').all()
            ]
        )
        restricted_collections = restricted_collections | set(
            [
                a.argument for a in ActionRoles.query.filter_by(
                    action='view-restricted-collection').all()
            ]
        )
        if restricted_collections:
            current_cache.set(
                'restricted_collections',
                restricted_collections,
                timeout=current_app.config.get(
                    'INSPIRE_COLLECTIONS_RESTRICTED_CACHE_TIMEOUT', 120)
            )
        return restricted_collections
Exemple #10
0
def schedule_update_datacite_metadata(max_count):
    """Schedule the update of DataCite metadata."""
    task_details = current_cache.get('update_datacite:task_details')

    if task_details is None or 'from_date' not in task_details or 'until_date' not in task_details:
        return

    pids = find_registered_doi_pids(task_details['from_date'],
                                          task_details['until_date'],
                                          current_app.config['ZENODO_LOCAL_DOI_PREFIXES'])
    pids_count = pids.count()

    task_details['left_pids'] = pids_count
    task_details['last_update'] = datetime.utcnow()
    current_cache.set('update_datacite:task_details', task_details)

    if pids_count == 0:
        if 'finish_date' not in task_details:
            task_details['finish_date'] = datetime.utcnow()
            current_cache.set('update_datacite:task_details', task_details)
        return

    scheduled_pids_count = max_count if max_count < pids_count else pids_count
    scheduled_pids = pids.limit(scheduled_pids_count)

    for pid in scheduled_pids:
        update_datacite_metadata.delay(pid.pid_value, pid.object_uuid, task_details['job_id'])
Exemple #11
0
    def read_all(self, identity, fields, type, cache=True, **kwargs):
        """Search for records matching the querystring."""
        cache_key = type + "_" + "-".join(fields)
        results = current_cache.get(cache_key)
        es_query = Q("match_all")

        if not results:
            # If not found, NoResultFound is raised (caught by the resource).
            vocabulary_type = VocabularyType.query.filter_by(id=type).one()
            vocab_id_filter = Q('term', type__id=vocabulary_type.id)

            results = self._read_many(identity,
                                      es_query,
                                      fields,
                                      extra_filter=vocab_id_filter,
                                      **kwargs)
            if cache:
                # ES DSL Response is not pickable.
                # If saved in cache serialization wont work with to_dict()
                current_cache.set(cache_key, results.to_dict())

        else:
            search = self.create_search(
                identity=identity,
                record_cls=self.record_cls,
                search_opts=self.config.search,
                permission_action='search',
            ).query(es_query)

            results = Response(search, results)

        return self.result_list(self, identity, results)
Exemple #12
0
def get_organization_relationships(uuid):
    """Get a source by any PID received as a argument, including UUID"""
    try:
        rtype = request.args.get('type') if request.args.get('type') else None
        cache = current_cache.get(
            "get_organization_relationships:{0}{1}".format(uuid, rtype)) or {}
        if "date" not in cache:
            cache["date"] = datetime.datetime.now()
        if datetime.datetime.now() - cache["date"] < datetime.timedelta(
                days=1) and "stats" in cache:
            result = cache["stats"]
            return jsonify(result)
        else:
            result = _get_organization_relationships(uuid, rtype)
            cache["date"] = datetime.datetime.now()
            cache["stats"] = result
            current_cache.set("get_organization_relationships:{0}{1}".format(
                uuid, rtype),
                              cache,
                              timeout=-1)
            return jsonify(result)

    except Exception as e:
        return jsonify({
            'ERROR': str(e),
        })
Exemple #13
0
    def query_cuor_by_pid(cls, pid):
        """Request an Organization by Persistent Identifier
            not the CUOR UUID
         """
        try:
            cache = current_cache.get(
                "query_cuor_by_pid:{0}".format(pid)) or {}
            if "date" not in cache:
                cache["date"] = datetime.now()
            if datetime.now() - cache["date"] < timedelta(
                    days=1) and "org" in cache:
                print("USING CACHE ORGANIZATION")
                if 'status' in cache["org"] and cache["org"]['status'] == '404':
                    cache["org"] = None
                    return None
                return cache["org"]

            api_endpoint = current_app.config['CUOR_API_ENDPOINT']
            session = requests.Session()
            url = api_endpoint + '/pid?value=' + pid
            response = session.get(url, verify=False)
            result = json.loads(response.text)
            if 'status' in result and result['status'] == '404':
                return None
            cache["org"] = result
            cache["date"] = datetime.now()
            current_cache.set("query_cuor_by_pid:{0}".format(pid),
                              cache,
                              timeout=-1)
            return result
        except Exception:
            return None
Exemple #14
0
    def run(self, start_date=None, end_date=None, update_bookmark=True):
        """Run export job."""
        if start_date is None:
            bookmark = current_cache.get('piwik_export:bookmark')
            start_date = dateutil_parse(bookmark) if bookmark else None

        time_range = {}
        if start_date is not None:
            time_range['gte'] = start_date.replace(microsecond=0).isoformat()
        if end_date is not None:
            time_range['lte'] = end_date.replace(microsecond=0).isoformat()

        events = Search(using=current_search_client,
                        index='events-stats-*').filter(
                            'range', timestamp=time_range).sort({
                                'timestamp': {
                                    'order': 'asc'
                                }
                            }).params(preserve_order=True).scan()

        url = current_app.config['ZENODO_STATS_PIWIK_EXPORTER'].get(
            'url', None)
        token_auth = current_app.config['ZENODO_STATS_PIWIK_EXPORTER'] \
            .get('token_auth', None)
        chunk_size = current_app.config['ZENODO_STATS_PIWIK_EXPORTER']\
            .get('chunk_size', 0)

        for event_chunk in chunkify(events, chunk_size):
            query_strings = []
            for event in event_chunk:
                query_string = self._build_query_string(event)
                query_strings.append(query_string)

            payload = {'requests': query_strings, 'token_auth': token_auth}

            res = requests.post(url, json=payload)

            # Failure: not 200 or not "success"
            content = res.json() if res.ok else None
            if res.status_code == 200 and content.get('status') == 'success':
                if content.get('invalid') != 0:
                    msg = 'Invalid events in Piwik export request.'
                    info = {
                        'begin_event_timestamp': event_chunk[0].timestamp,
                        'end_event_timestamp': event_chunk[-1].timestamp,
                        'invalid_events': content.get('invalid')
                    }
                    current_app.logger.warning(msg, extra=info)
                elif update_bookmark is True:
                    current_cache.set('piwik_export:bookmark',
                                      event_chunk[-1].timestamp,
                                      timeout=-1)
            else:
                msg = 'Invalid events in Piwik export request.'
                info = {
                    'begin_event_timestamp': event_chunk[0].timestamp,
                    'end_event_timestamp': event_chunk[-1].timestamp,
                }
                raise PiwikExportRequestError(msg, export_info=info)
Exemple #15
0
 def set_cache_page(self, key, value):
     """Set the page into the cache."""
     # Check if there is a set of keys, if not create one_or_none
     current_key_set = current_cache.get(self.cached_pages_set_key) or set()
     current_key_set.add(key)
     current_cache.set(self.cached_pages_set_key, current_key_set)
     current_cache.set(
         key, value,
         timeout=current_app.config['WEKO_SITEMAP_CACHE_TIMEOUT'])
Exemple #16
0
    def update_datacite(self):
        """."""
        form = UpdateDataciteForm()
        cancel_or_new_task_form = FlaskForm()

        is_task_running = False
        time = 0
        task_details = current_cache.get('update_datacite:task_details')

        if task_details:
            is_task_running = True
            if cancel_or_new_task_form.validate_on_submit():
                current_cache.set('update_datacite:task_details', None)
                return redirect(url_for('updatedataciteview.update_datacite'))
        else:
            if form.validate_on_submit():
                from_date = request.form['from_date']
                until_date = request.form['until_date']

                action = request.form['action']
                if action == 'SubmitDates':
                    if from_date > until_date:
                        flash("Error: the 'From' date should precede the 'Until' date.")
                    else:
                        pids_count = find_registered_doi_pids(from_date,
                                                                until_date,
                                                                current_app.config['ZENODO_LOCAL_DOI_PREFIXES']).count()
                        task_details = dict(
                            total_pids=pids_count
                        )
                        time = pids_count/current_app.config['DATACITE_UPDATING_RATE_PER_HOUR']

                elif action == 'Confirm':
                    pids_count = find_registered_doi_pids(from_date,
                                                          until_date,
                                                          current_app.config['ZENODO_LOCAL_DOI_PREFIXES']).count()
                    task_details = dict(
                        start_date=datetime.utcnow(),
                        job_id=str(uuid.uuid4()),
                        from_date=from_date,
                        until_date=until_date,
                        total_pids=pids_count,
                        left_pids=pids_count,
                        last_update=datetime.utcnow()
                    )
                    current_cache.set('update_datacite:task_details',
                                      task_details, timeout=-1)
                    return redirect(url_for('updatedataciteview.update_datacite'))

                elif action == 'Cancel':
                    return redirect(url_for('updatedataciteview.update_datacite'))

        return self.render('zenodo_records/update_datacite.html',
                           form=form,
                           cancel_or_new_task_form=cancel_or_new_task_form,
                           details=task_details,
                           is_task_running=is_task_running, time=time)
Exemple #17
0
def get_anonymization_salt(ts):
    """Get the anonymization salt based on the event timestamp's day."""
    salt_key = 'stats:salt:{}'.format(ts.date().isoformat())
    salt = current_cache.get(salt_key)
    if not salt:
        salt_bytes = os.urandom(32)
        salt = b64encode(salt_bytes).decode('utf-8')
        current_cache.set(salt_key, salt, timeout=60 * 60 * 24)
    return salt
Exemple #18
0
def get_widget_design_setting(repository_id, current_language, page_id=None):
    """Get widget design setting.

    @param repository_id: The repository identifier
    @param current_language: The current language
    @param page_id: The Page identifier.
    @return:
    """
    def validate_response():
        """Check the response data can compress with gzip.

        @return: True if the response data can compress with gzip
        """
        is_valid = True
        accept_encoding = request.headers.get('Accept-Encoding', '')
        response = jsonify({})
        if not config.WEKO_GRIDLAYOUT_IS_COMPRESS_WIDGET or \
            response.direct_passthrough or \
            'gzip' not in accept_encoding.lower() or \
                'Content-Encoding' in response.headers:
            is_valid = False
        return is_valid

    def get_widget_response(_page_id):
        """Get widget setting response.

        :param _page_id: The Page identifier.
        @return: The widget setting response
        """
        lang_code = current_language or get_default_language()
        if _page_id:
            from .services import WidgetDesignPageServices
            widget_setting_data = WidgetDesignPageServices\
                .get_widget_design_setting(_page_id, lang_code)
        else:
            from .services import WidgetDesignServices
            widget_setting_data = WidgetDesignServices\
                .get_widget_design_setting(
                    repository_id, lang_code)
        return jsonify(widget_setting_data)

    if validate_response() and current_language:
        if page_id:
            key = (config.WEKO_GRIDLAYOUT_WIDGET_PAGE_CACHE_KEY +
                   repository_id + "_" + page_id + "_" + current_language)
        else:
            key = (config.WEKO_GRIDLAYOUT_WIDGET_CACHE_KEY + repository_id +
                   "_" + current_language)
        if current_cache.get(key) is None:
            data = compress_widget_response(get_widget_response(page_id))
            current_cache.set(key, data)
            return data
        else:
            return current_cache.get(key)
    else:
        return get_widget_response(page_id)
Exemple #19
0
def get_users():
    """Returns list of all users as {id, name} dict

    :rtype: dict - with ``name (string)``, ``id (integer)`` properties
    """
    queues = current_cache.get('rt_users')
    if queues:
        return queues
    else:
        queues = _get_all_of("user")
        if queues:
            current_cache.set('rt_users', queues, timeout=current_app.config.get(
                'RT_USERS_CACHE_TIMEOUT', 86400))
        return queues
Exemple #20
0
def get_users():
    """Returns list of all users as {id, name} dict

    :rtype: dict - with ``name (string)``, ``id (integer)`` properties
    """
    queues = current_cache.get('rt_users')
    if queues:
        return queues
    else:
        queues = _get_all_of("user")
        if queues:
            current_cache.set('rt_users', queues, timeout=current_app.config.get(
                'RT_USERS_CACHE_TIMEOUT', 86400))
        return queues
Exemple #21
0
 def wrapper(*args, **kwargs):
     key = key_prefix
     for value in args:
         key += str(value)
     cache_fun = current_cache.cached(
         timeout=timeout,
         key_prefix=key,
         forced_update=is_update_cache,
     )
     if current_cache.get(key) is None:
         data = cache_fun(f)(*args, **kwargs)
         current_cache.set(key, data)
         return data
     else:
         return current_cache.get(key)
Exemple #22
0
def calculate_metrics(metric_id, cache=True):
    if cache:
        cached_data = current_cache.get(
            'ZENODO_METRICS_CACHE::{}'.format(metric_id))
        if cached_data is not None:
            return cached_data

    result = deepcopy(current_app.config['ZENODO_METRICS_DATA'][metric_id])

    for metric in result:
        metric['value'] = metric['value']()

    current_cache.set(
        'ZENODO_METRICS_CACHE::{}'.format(metric_id),
        result,
        timeout=current_app.config['ZENODO_METRICS_CACHE_TIMEOUT'])

    return result
Exemple #23
0
def get_queues():
    """Returns list of all queues.

    Returns:
        list (dict): list of all queues as {id, name}.
    """
    queues = current_cache.get("rt_queues")
    if queues:
        return queues
    else:
        queues = _get_all_of("queue")
        if queues:
            current_cache.set(
                "rt_queues",
                queues,
                timeout=current_app.config.get("RT_QUEUES_CACHE_TIMEOUT",
                                               86400),
            )
        return queues
Exemple #24
0
def get_users():
    """Get users.

    Returns:
        list (dict): list of all users as {id, name} dict
    """
    queues = current_cache.get("rt_users")
    if queues:
        return queues
    else:
        queues = _get_all_of("user")
        if queues:
            current_cache.set(
                "rt_users",
                queues,
                timeout=current_app.config.get("RT_USERS_CACHE_TIMEOUT",
                                               86400),
            )
        return queues
Exemple #25
0
    def save(cls, openid=None, **kwargs):
        """
        Save Wepy user info
        :param openid:
        :param kwargs:
        :return:
        """
        assert openid

        try:
            cache.set(openid, kwargs)
            db_wepy_user = WepyUser()
            wepy_user = db_wepy_user.get_by_openid(openid)
            if wepy_user is None:
                wepy_user = WepyUser.create(openid, kwargs.pop('session'))
            else:
                wepy_user.update(openid, **kwargs)
        except Exception as ex:
            current_app.logger.error('WepyUserApi.save({}) Except: '.format(openid), ex)
        return wepy_user
Exemple #26
0
def openaire_direct_index(record_uuid):
    """Send record for direct indexing at OpenAIRE.

    :param record_uuid: Record Metadata UUID.
    :type record_uuid: str
    """
    try:
        record = ZenodoRecord.get_record(record_uuid)

        # Bail out if not an OpenAIRE record.
        if not (is_openaire_publication(record) or
                is_openaire_dataset(record) or
                is_openaire_software(record) or
                is_openaire_other(record)):
            return

        data = openaire_json_v1.serialize(record.pid, record)
        url = '{}/feedObject'.format(
            current_app.config['OPENAIRE_API_URL'])
        req = _openaire_request_factory()
        res = req.post(url, data=data)

        if not res.ok:
            raise OpenAIRERequestError(res.text)

        res_beta = None
        if current_app.config['OPENAIRE_API_URL_BETA']:
            url_beta = '{}/feedObject'.format(
                current_app.config['OPENAIRE_API_URL_BETA'])
            res_beta = req.post(url_beta, data=data)

        if res_beta and not res_beta.ok:
            raise OpenAIRERequestError(res_beta.text)
        else:
            recid = record.get('recid')
            current_cache.delete('openaire_direct_index:{}'.format(recid))
    except Exception as exc:
        recid = record.get('recid')
        current_cache.set('openaire_direct_index:{}'.format(recid),
                          datetime.now(), timeout=-1)
        openaire_direct_index.retry(exc=exc)
Exemple #27
0
    def set(self, key, value, timeout=7200):
        """Set the key and value.

        Args:
            key (str): a key name.
            value (str): a value.
            timeout (int): a timeout time in seconds.

        Returns:
            bool: if the key is stored.
        """
        return current_cache.set(self._prefix(key), value, timeout=timeout)
Exemple #28
0
def get_sources_stats():
    """

    """
    try:

        offset = request.args.get('offset') if request.args.get(
            'offset') else 3

        # top organization bucket
        org_id = request.args.get('org') if request.args.get('org') else None

        cache = current_cache.get("get_sources_stats:{0}{1}".format(
            org_id, offset)) or {}
        if "date" not in cache:
            cache["date"] = datetime.datetime.now()
        if datetime.datetime.now() - cache["date"] < datetime.timedelta(
                seconds=300) and "stats" in cache:
            print(datetime.datetime.now())
            print(cache["date"])
            print(datetime.datetime.now() - cache["date"])
            print(datetime.timedelta(seconds=300))
            print("USING CACHE STATS")
            result = cache["stats"]
            return iroko_json_response(IrokoResponseStatus.SUCCESS, 'ok',
                                       'aggr', result)
        else:
            result = _get_sources_stats(org_id, offset)
            cache["date"] = datetime.datetime.now()
            cache["stats"] = result
            current_cache.set("get_sources_stats:{0}{1}".format(
                org_id, offset),
                              cache,
                              timeout=-1)
            return iroko_json_response(IrokoResponseStatus.SUCCESS, 'ok',
                                       'aggr', result)

    except Exception as e:
        return iroko_json_response(IrokoResponseStatus.ERROR, str(e), None,
                                   None)
Exemple #29
0
    def set(self, key, value, timeout=7200):
        """Set the key and value.

        Args:
            key (str): a key name.
            value (str): a value.
            timeout (int): a timeout time in seconds.

        Returns:
            bool: if the key is stored.
        """
        return current_cache.set(
            self._prefix(key), value, timeout=timeout)
Exemple #30
0
 def set_cache(self, key, value):
     """Set the sitemap cache."""
     current_cache.set(key, value, timeout=-1)
     self.cache_keys.add(key)
Exemple #31
0
 def setAccessToken(cls, appid, token, expire_in=7200):
     cache.set('token-'+appid, token, timeout=expire_in)
Exemple #32
0
def missing_subformats_report(start_date=None, end_date=None):
    """Send a report of missing subformats to CDS admins."""
    report = []

    def _get_master_video(record):
        """Return master video."""
        master = CDSVideosFilesIterator.get_master_video_file(record)
        if not master:
            raise Exception("No master video found for the given record")

        return master, master['tags']['display_aspect_ratio'], \
            int(master['tags']['width']), int(master['tags']['height'])

    def _get_missing_subformats(subformats, ar, w, h):
        """Return missing and transcodable subformats."""
        dones = [
            subformat['tags']['preset_quality'] for subformat in subformats
        ]
        missing = set(get_all_distinct_qualities()) - set(dones)
        transcodables = list(
            filter(lambda q: can_be_transcoded(q, ar, w, h), missing))
        return transcodables

    def _format_report(report):
        """Format the email body for the file integrity report."""
        lines = []
        for entry in report:
            lines.append('Message: {}'.format(entry.get('message')))
            lines.append(u'Record: {}'.format(
                format_pid_link(current_app.config['RECORDS_UI_ENDPOINT'],
                                entry.get('recid'))))
            lines.append('Report number: {}'.format(
                entry.get('report_number')))
            lines.append('Missing subformats: {}'.format(
                entry.get('missing_subformats')))
            lines.append(('-' * 80) + '\n')

        return '\n'.join(lines)

    cache = current_cache.get('task_missing_subformats:details') or {}
    if 'end_date' not in cache:
        # Set the end date to 7 days ago
        cache['end_date'] = datetime.utcnow() - timedelta(days=7)

    record_uuids = _filter_by_last_created(_get_all_records_with_bucket(),
                                           start_date, end_date
                                           or cache['end_date'])

    for record_uuid in record_uuids:
        record = CDSRecord.get_record(record_uuid.id)
        master, ar, w, h = _get_master_video(record)

        if not master:
            report.append({
                'message': 'No master video found for the given record',
                'recid': record.get('recid'),
                'report_number': record['report_number'][0]
            })
            continue

        # check missing subformats
        subformats = CDSVideosFilesIterator.get_video_subformats(master)
        missing = _get_missing_subformats(subformats, ar, w, h)
        if missing:
            report.append({
                'message': 'Missing subformats for the given record',
                'recid': record.get('recid'),
                'report_number': record['report_number'][0],
                'missing_subformats': missing
            })

        # check bucket ids consistency
        bucket_id = master['bucket_id']
        for f in \
            subformats + CDSVideosFilesIterator.get_video_frames(master) + \
                CDSVideosFilesIterator.get_video_subtitles(record):

            if f['bucket_id'] != bucket_id:
                report.append({
                    'message':
                    'Different buckets in the same record',
                    'recid':
                    record.get('recid'),
                    'report_number':
                    record['report_number'][0],
                    'buckets':
                    'Master: {0} - {1}: {2}'.format(bucket_id, f['key'],
                                                    f['bucket_id'])
                })

    cache['end_date'] = datetime.utcnow()
    current_cache.set('task_missing_subformats:details', cache, timeout=-1)

    if report:
        # Format and send the email
        subject = u'[CDS Videos] Missing subformats report [{}]'.format(
            datetime.now())
        body = _format_report(report)
        sender = current_app.config['NOREPLY_EMAIL']
        recipients = [current_app.config['CDS_ADMIN_EMAIL']]
        _send_email(subject, body, sender, recipients)
Exemple #33
0
def subformats_integrity_report(start_date=None, end_date=None):
    """Send a report of all corrupted subformats to CDS admins."""
    report = []
    update_cache = True

    def _probe_video_file(obj, record):
        """Run ffmpeg on a video file
        Return a touple containing (report, accessible)
        """
        file_report = {}
        path = obj.file.uri.replace(
            current_app.config['VIDEOS_XROOTD_ENDPOINT'], '')

        if not os.path.exists(path):
            # Check if the file exists on disk
            file_report = {
                'file_name': obj.key,
                'message': 'The file cannot be accessed',
                'error': repr(e)
            }

            # Return the file report and the file accessibility
            return (file_report, False)

        try:
            # Expecting the storage to be mounted on the machine
            probe = ff_probe_all(path)

            if not probe.get('streams'):
                file_report = {
                    'file_name': obj.key,
                    'message': 'No video stream'
                }

        except Exception as e:
            file_report = {
                'file_name': obj.key,
                'message': 'Error while running ff_probe_all',
                'error': repr(e)
            }

        # Return the file report and the file accessibility
        return (file_report, True)

    def _format_report(report):
        """Format the email body for the subformats integrity report."""
        lines = []
        for entry in report:
            lines.append(u'Record: {}'.format(
                format_pid_link(current_app.config['RECORDS_UI_ENDPOINT'],
                                entry.get('recid'))))
            lines.append('Message: {}'.format(entry.get('message')))

            if entry.get('report_number'):
                lines.append('Report number: {}'.format(
                    entry.get('report_number')))

            subreports = entry.get('subreports')
            if subreports:
                lines.append(('-' * 10) + '\n')

                for subreport in subreports:
                    lines.append('  File name: {}'.format(
                        subreport.get('file_name')))
                    lines.append('  Message: {}'.format(
                        subreport.get('message')))

                    if subreport.get('error'):
                        lines.append('  Error: {}'.format(
                            subreport.get('error')))

            lines.append(('-' * 80) + '\n')

        return '\n'.join(lines)

    cache = current_cache.get('task_subformats_integrity:details') or {}
    two_days_ago = datetime.utcnow() - timedelta(days=2)
    if 'start_date' not in cache:
        # Set the start date to 4 days ago
        cache['start_date'] = datetime.utcnow() - timedelta(days=4)

    record_uuids = _filter_by_last_created(_get_all_records_with_bucket(),
                                           start_date or cache['start_date'],
                                           end_date or two_days_ago)

    for record_uuid in record_uuids:
        record = CDSRecord.get_record(record_uuid.id)
        master = CDSVideosFilesIterator.get_master_video_file(record)

        if not master:
            report.append({
                'recid': record['recid'],
                'message': 'No master video found for the given record',
                'report_number': record['report_number'][0]
            })
            continue

        master_obj = as_object_version(master['version_id'])
        subreport_master, accessible = _probe_video_file(master_obj, record)

        if not accessible:
            update_cache = False

        if subreport_master:
            report.append({
                'recid': record['recid'],
                'message': 'Master file issue report',
                'report_number': record['report_number'][0],
                'subreports': subreport_master
            })

        subformats = CDSVideosFilesIterator.get_video_subformats(master)
        if not subformats:
            report.append({
                'recid': record['recid'],
                'message': 'No subformats found'
            })
            continue

        subformats_subreport = []
        for subformat in subformats:
            subformat_obj = as_object_version(subformat['version_id'])
            subformat_subreport, accessible = _probe_video_file(
                subformat_obj, record)

            if not accessible:
                update_cache = False

            if subformat_subreport:
                subformats_subreport.append(subformat_subreport)

        if subformats_subreport:
            report.append({
                'recid': record['recid'],
                'message': 'Subformats issues found',
                'report_number': record['report_number'][0],
                'subreports': subformats_subreport
            })

    if update_cache:
        # Set the start date for next time when the task will run
        cache['start_date'] = two_days_ago
        current_cache.set('task_subformats_integrity:details',
                          cache,
                          timeout=-1)

    if report:
        # Format and send the email
        subject = u'[CDS Videos] Subformats integrity report [{}]'.format(
            datetime.now())
        body = _format_report(report)
        sender = current_app.config['NOREPLY_EMAIL']
        recipients = [current_app.config['CDS_ADMIN_EMAIL']]
        _send_email(subject, body, sender, recipients)
Exemple #34
0
    def run(self, start_date=None, end_date=None, update_bookmark=True):
        """Run export job."""
        if start_date is None:
            bookmark = current_cache.get('piwik_export:bookmark')
            if bookmark is None:
                msg = 'Bookmark not found, and no start date specified.'
                current_app.logger.warning(msg)
                return
            start_date = dateutil_parse(bookmark) if bookmark else None

        time_range = {}
        if start_date is not None:
            time_range['gte'] = start_date.replace(microsecond=0).isoformat()
        if end_date is not None:
            time_range['lte'] = end_date.replace(microsecond=0).isoformat()

        events = Search(using=current_search_client,
                        index=build_alias_name('events-stats-*')).filter(
                            'range', timestamp=time_range).sort({
                                'timestamp': {
                                    'order': 'asc'
                                }
                            }).params(preserve_order=True).scan()

        url = current_app.config['ZENODO_STATS_PIWIK_EXPORTER'].get(
            'url', None)
        token_auth = current_app.config['ZENODO_STATS_PIWIK_EXPORTER'] \
            .get('token_auth', None)
        chunk_size = current_app.config['ZENODO_STATS_PIWIK_EXPORTER']\
            .get('chunk_size', 0)

        for event_chunk in chunkify(events, chunk_size):
            query_strings = []
            for event in event_chunk:
                if 'recid' not in event:
                    continue
                try:
                    query_string = self._build_query_string(event)
                    query_strings.append(query_string)
                except PIDDeletedError:
                    pass

            # Check and bail if the bookmark has progressed, e.g. from another
            # duplicate task or manual run of the exporter.
            bookmark = current_cache.get('piwik_export:bookmark')
            if event_chunk[-1].timestamp < bookmark:
                return

            payload = {'requests': query_strings, 'token_auth': token_auth}
            res = requests.post(url, json=payload, timeout=60)

            # Failure: not 200 or not "success"
            content = res.json() if res.ok else None
            if res.status_code == 200 and content.get('status') == 'success':
                if content.get('invalid') != 0:
                    msg = 'Invalid events in Piwik export request.'
                    info = {
                        'begin_event_timestamp': event_chunk[0].timestamp,
                        'end_event_timestamp': event_chunk[-1].timestamp,
                        'invalid_events': content.get('invalid')
                    }
                    current_app.logger.warning(msg, extra=info)
                elif update_bookmark is True:
                    current_cache.set('piwik_export:bookmark',
                                      event_chunk[-1].timestamp,
                                      timeout=-1)
            else:
                msg = 'Invalid events in Piwik export request.'
                info = {
                    'begin_event_timestamp': event_chunk[0].timestamp,
                    'end_event_timestamp': event_chunk[-1].timestamp,
                }
                raise PiwikExportRequestError(msg, export_info=info)
Exemple #35
0
    def run(self, start_date=None, end_date=None, update_bookmark=True):
        """Run export job."""
        if start_date is None:
            bookmark = current_cache.get('piwik_export:bookmark')
            if bookmark is None:
                msg = 'Bookmark not found, and no start date specified.'
                current_app.logger.warning(msg)
                return
            start_date = dateutil_parse(bookmark) if bookmark else None

        time_range = {}
        if start_date is not None:
            time_range['gte'] = start_date.replace(microsecond=0).isoformat()
        if end_date is not None:
            time_range['lte'] = end_date.replace(microsecond=0).isoformat()

        events = Search(
            using=current_search_client,
            index='events-stats-*'
        ).filter(
            'range', timestamp=time_range
        ).sort(
            {'timestamp': {'order': 'asc'}}
        ).params(preserve_order=True).scan()

        url = current_app.config['ZENODO_STATS_PIWIK_EXPORTER'].get('url', None)
        token_auth = current_app.config['ZENODO_STATS_PIWIK_EXPORTER'] \
            .get('token_auth', None)
        chunk_size = current_app.config['ZENODO_STATS_PIWIK_EXPORTER']\
            .get('chunk_size', 0)

        for event_chunk in chunkify(events, chunk_size):
            query_strings = []
            for event in event_chunk:
                if 'recid' not in event:
                    continue
                try:
                    query_string = self._build_query_string(event)
                    query_strings.append(query_string)
                except PIDDeletedError:
                    pass

            payload = {
                'requests': query_strings,
                'token_auth': token_auth
            }

            res = requests.post(url, json=payload)

            # Failure: not 200 or not "success"
            content = res.json() if res.ok else None
            if res.status_code == 200 and content.get('status') == 'success':
                if content.get('invalid') != 0:
                    msg = 'Invalid events in Piwik export request.'
                    info = {
                        'begin_event_timestamp': event_chunk[0].timestamp,
                        'end_event_timestamp': event_chunk[-1].timestamp,
                        'invalid_events': content.get('invalid')
                    }
                    current_app.logger.warning(msg, extra=info)
                elif update_bookmark is True:
                    current_cache.set('piwik_export:bookmark',
                                      event_chunk[-1].timestamp,
                                      timeout=-1)
            else:
                msg = 'Invalid events in Piwik export request.'
                info = {
                    'begin_event_timestamp': event_chunk[0].timestamp,
                    'end_event_timestamp': event_chunk[-1].timestamp,
                }
                raise PiwikExportRequestError(msg, export_info=info)
Exemple #36
0
 def set(self, key: str, value: datetime = None):
     """."""
     return current_cache.set(f'{self.prefix}:{key}',
                              value or datetime.now(),
                              timeout=-1)
Exemple #37
0
def test_datacite_update(mocker, db, minimal_record):
    dc_mock = mocker.patch(
        'invenio_pidstore.providers.datacite.DataCiteMDSClient'
    )

    doi_tags = [
        '<identifier identifierType="DOI">{doi}</identifier>',
        ('<relatedIdentifier relatedIdentifierType="DOI" '
         'relationType="IsVersionOf">{conceptdoi}</relatedIdentifier>'),
    ]

    # Assert calls and content
    def assert_datacite_calls_and_content(record, doi_tags):
        """Datacite client calls assertion helper."""
        assert dc_mock().metadata_post.call_count == 1
        _, doi_args, _ = dc_mock().metadata_post.mock_calls[0]
        assert all([t.format(**record) in doi_args[0] for t in doi_tags])

        assert dc_mock().doi_post.call_count == 1
        dc_mock().doi_post.assert_any_call(
            record['doi'],
            'https://zenodo.org/record/{}'.format(record['recid']))

    def assert_datacite_calls_with_missing_data():
        """Datacite client calls assertion helper."""
        assert dc_mock().metadata_post.call_count == 0
        assert dc_mock().doi_post.call_count == 0

    def create_versioned_record(recid_value, conceptrecid):
        """Utility function for creating versioned records."""
        recid = PersistentIdentifier.create(
            'recid', recid_value, status=PIDStatus.RESERVED)
        pv = PIDVersioning(parent=conceptrecid)
        pv.insert_draft_child(recid)

        record_metadata = deepcopy(minimal_record)
        # Remove the DOI
        del record_metadata['doi']
        record_metadata['conceptrecid'] = conceptrecid.pid_value
        record_metadata['recid'] = int(recid.pid_value)
        record = ZenodoRecord.create(record_metadata)
        zenodo_record_minter(record.id, record)
        record.commit()

        return recid, record

    # Create conceptrecid for the records
    conceptrecid = PersistentIdentifier.create(
        'recid', '100', status=PIDStatus.RESERVED)

    # Create a reserved recid
    recid1, r1 = create_versioned_record('352543', conceptrecid)

    # no registered local DOIs
    schedule_update_datacite_metadata(1)
    assert_datacite_calls_with_missing_data()

    doi_pids = PersistentIdentifier.query.filter(
        PersistentIdentifier.pid_value == '10.5072/zenodo.352543')
    doi_pids[0].status = PIDStatus.REGISTERED

    db.session.commit()

    update_date = doi_pids[0].updated

    # no task_details on Redis cache
    schedule_update_datacite_metadata(1)
    assert_datacite_calls_with_missing_data()
    new_update_date1 = doi_pids[0].updated
    assert update_date == new_update_date1

    task_details = dict(
        job_id=str(uuid.uuid4()),
        from_date=datetime(2015, 1, 1, 13, 33),
        until_date=datetime(2016, 1, 1, 13, 33),
        last_update=datetime.utcnow()
    )
    current_cache.set('update_datacite:task_details', task_details, timeout=-1)

    # no registered local DOIs updated inside the interval
    schedule_update_datacite_metadata(1)
    assert_datacite_calls_with_missing_data()
    new_update_date2 = doi_pids[0].updated
    assert update_date == new_update_date2

    task_details = dict(
        job_id=str(uuid.uuid4()),
        from_date=datetime(2015, 1, 1, 13, 33),
        until_date=datetime.utcnow(),
        last_update=datetime.utcnow()
    )
    current_cache.set('update_datacite:task_details', task_details, timeout=-1)

    schedule_update_datacite_metadata(1)
    new_update_date3 = doi_pids[0].updated
    assert update_date < new_update_date3

    assert_datacite_calls_and_content(r1, doi_tags)
Exemple #38
0
 def set_cache(self, key, value):
     """Set the sitemap cache."""
     current_cache.set(key, value, timeout=-1)
     self.cache_keys.add(key)
Exemple #39
0
    def update_datacite(self):
        """."""
        form = UpdateDataciteForm()
        cancel_or_new_task_form = FlaskForm()

        is_task_running = False
        time = 0
        task_details = current_cache.get('update_datacite:task_details')

        if task_details:
            is_task_running = True
            if cancel_or_new_task_form.validate_on_submit():
                current_cache.set('update_datacite:task_details', None)
                return redirect(url_for('updatedataciteview.update_datacite'))
        else:
            if form.validate_on_submit():
                from_date = request.form['from_date']
                until_date = request.form['until_date']

                action = request.form['action']
                if action == 'SubmitDates':
                    if from_date > until_date:
                        flash(
                            "Error: the 'From' date should precede the 'Until' date."
                        )
                    else:
                        pids_count = find_registered_doi_pids(
                            from_date, until_date, current_app.
                            config['ZENODO_LOCAL_DOI_PREFIXES']).count()
                        task_details = dict(total_pids=pids_count)
                        time = pids_count / current_app.config[
                            'DATACITE_UPDATING_RATE_PER_HOUR']

                elif action == 'Confirm':
                    pids_count = find_registered_doi_pids(
                        from_date, until_date,
                        current_app.config['ZENODO_LOCAL_DOI_PREFIXES']).count(
                        )
                    task_details = dict(start_date=datetime.utcnow(),
                                        job_id=str(uuid.uuid4()),
                                        from_date=from_date,
                                        until_date=until_date,
                                        total_pids=pids_count,
                                        left_pids=pids_count,
                                        last_update=datetime.utcnow())
                    current_cache.set('update_datacite:task_details',
                                      task_details,
                                      timeout=-1)
                    return redirect(
                        url_for('updatedataciteview.update_datacite'))

                elif action == 'Cancel':
                    return redirect(
                        url_for('updatedataciteview.update_datacite'))

        return self.render('zenodo_records/update_datacite.html',
                           form=form,
                           cancel_or_new_task_form=cancel_or_new_task_form,
                           details=task_details,
                           is_task_running=is_task_running,
                           time=time)