def openaire_direct_index(record_uuid): """Send record for direct indexing at OpenAIRE. :param record_uuid: Record Metadata UUID. :type record_uuid: str """ try: record = ZenodoRecord.get_record(record_uuid) # Bail out if not an OpenAIRE record. if not (is_openaire_publication(record) or is_openaire_dataset(record)): return data = openaire_json_v1.serialize(record.pid, record) url = '{}/api/results/feedObject'.format( current_app.config['OPENAIRE_API_URL']) req = _openaire_request_factory() res = req.post(url, data=data) if not res.ok: raise OpenAIRERequestError(res.text) else: recid = record.get('recid') current_cache.delete('openaire_direct_index:{}'.format(recid)) except Exception as exc: recid = record.get('recid') current_cache.set('openaire_direct_index:{}'.format(recid), datetime.now(), timeout=-1) openaire_direct_index.retry(exc=exc)
def load_restricted_collections(): restricted_collections = current_cache.get('restricted_collections') if restricted_collections: return restricted_collections else: restricted_collections = set( [ a.argument for a in ActionUsers.query.filter_by( action='view-restricted-collection').all() ] ) restricted_collections = restricted_collections | set( [ a.argument for a in ActionRoles.query.filter_by( action='view-restricted-collection').all() ] ) if restricted_collections: current_cache.set( 'restricted_collections', restricted_collections, timeout=current_app.config.get( 'INSPIRE_COLLECTIONS_RESTRICTED_CACHE_TIMEOUT', 120) ) return restricted_collections
def decorated(*args, **kwargs): key = f'{prefix}:{key_func(f, *args, **kwargs)}' res = current_cache.get(key) if not res: res = f(*args, **kwargs) current_cache.set(key, res, timeout=timeout) return res
def _get_from_cache(cache_key, generator, force_update=False): """Retrieve specified key from cache. If key is missing or is empty or if `force_update` is set to `True` update cache with generator and return cached data. Args: cache_key: name of the key where cache is stored generator: partial to generate data for specified key if key is empty or when `force_update` is `True`. force_update: when `True` updates cache key then return cached data. Returns: Whatever was under `cache_key` key in cache. """ data = current_cache.get(cache_key) if not data or force_update: new_data = generator() current_cache.set( cache_key, new_data, timeout=current_app.config.get("RT_USERS_CACHE_TIMEOUT", 86400), ) return new_data return data
def test_cache_ui(): with_update = request.args.get('update', '0') with_delete = request.args.get('delete', '0') with_update = True if int(with_update) else False with_delete = True if int(with_delete) else False if with_update: if with_delete: current_cache.delete('test_key') current_cache.set('test_key', ObjDict(**{ 'device_id': 12345678, 'cache_time': datetime.today() }), timeout=60) test_value = current_cache.get('test_key') if not test_value: cache_rtn = current_cache.set('test_key', ObjDict(**{ 'device_id': 12345678, 'cache_time': datetime.today() }), timeout=60) if not cache_rtn: return jsonify({ 'code': -1, 'msg': 'cache set error' }) test_value = current_cache.get('test_key') return jsonify({ 'code': 0, 'msg': 'success', 'data': { 'dev_id': test_value.device_id, 'store_time': test_value.cache_time.strftime('%Y-%m-%d %H:%M:%S') } })
def query_cuor_by_uuid(cls, uuid): """""" try: cache = current_cache.get( "query_cuor_by_pid:{0}".format(uuid)) or {} if "date" not in cache: cache["date"] = datetime.now() if datetime.now() - cache["date"] < timedelta( days=1) and "org" in cache: return cache["org"] api_endpoint = current_app.config['CUOR_API_ENDPOINT'] session = requests.Session() url = api_endpoint + '/' + uuid response = session.get(url, verify=False) result = json.loads(response.text) cache["org"] = result cache["date"] = datetime.now() current_cache.set("query_cuor_by_pid:{0}".format(uuid), cache, timeout=-1) return result except Exception: print(traceback.format_exc()) return None
def read_all(self, identity, fields, cache=True, **kwargs): """Search for records matching the querystring.""" cache_key = "-".join(fields) results = current_cache.get(cache_key) es_query = Q("match_all") if not results: results = self._read_many( identity, es_query, fields, **kwargs) if cache: # ES DSL Response is not pickable. # If saved in cache serialization wont work with to_dict() current_cache.set(cache_key, results.to_dict()) else: search = self.create_search( identity=identity, record_cls=self.record_cls, search_opts=self.config.search, permission_action='search', ).query(es_query) results = Response(search, results) return self.result_list(self, identity, results)
def schedule_update_datacite_metadata(max_count): """Schedule the update of DataCite metadata.""" task_details = current_cache.get('update_datacite:task_details') if task_details is None or 'from_date' not in task_details or 'until_date' not in task_details: return doi_pids = find_registered_doi_pids(task_details['from_date'], task_details['until_date'], current_app.config['ZENODO_LOCAL_DOI_PREFIXES']) dois_count = doi_pids.count() task_details['left_pids'] = dois_count task_details['last_update'] = datetime.utcnow() current_cache.set('update_datacite:task_details', task_details, timeout=-1) if dois_count == 0: if 'finish_date' not in task_details: task_details['finish_date'] = datetime.utcnow() current_cache.set('update_datacite:task_details', task_details, timeout=-1) return scheduled_dois_count = max_count if max_count < dois_count else dois_count scheduled_dois_pids = doi_pids.limit(scheduled_dois_count) for doi_pid in scheduled_dois_pids: update_datacite_metadata.delay(doi_pid.pid_value, str(doi_pid.object_uuid), task_details['job_id'])
def load_restricted_collections(): restricted_collections = current_cache.get('restricted_collections') if restricted_collections: return restricted_collections else: restricted_collections = set( [ a.argument for a in ActionUsers.query.filter_by( action='view-restricted-collection').all() ] ) restricted_collections = restricted_collections | set( [ a.argument for a in ActionRoles.query.filter_by( action='view-restricted-collection').all() ] ) if restricted_collections: current_cache.set( 'restricted_collections', restricted_collections, timeout=current_app.config.get( 'INSPIRE_COLLECTIONS_RESTRICTED_CACHE_TIMEOUT', 120) ) return restricted_collections
def schedule_update_datacite_metadata(max_count): """Schedule the update of DataCite metadata.""" task_details = current_cache.get('update_datacite:task_details') if task_details is None or 'from_date' not in task_details or 'until_date' not in task_details: return pids = find_registered_doi_pids(task_details['from_date'], task_details['until_date'], current_app.config['ZENODO_LOCAL_DOI_PREFIXES']) pids_count = pids.count() task_details['left_pids'] = pids_count task_details['last_update'] = datetime.utcnow() current_cache.set('update_datacite:task_details', task_details) if pids_count == 0: if 'finish_date' not in task_details: task_details['finish_date'] = datetime.utcnow() current_cache.set('update_datacite:task_details', task_details) return scheduled_pids_count = max_count if max_count < pids_count else pids_count scheduled_pids = pids.limit(scheduled_pids_count) for pid in scheduled_pids: update_datacite_metadata.delay(pid.pid_value, pid.object_uuid, task_details['job_id'])
def read_all(self, identity, fields, type, cache=True, **kwargs): """Search for records matching the querystring.""" cache_key = type + "_" + "-".join(fields) results = current_cache.get(cache_key) es_query = Q("match_all") if not results: # If not found, NoResultFound is raised (caught by the resource). vocabulary_type = VocabularyType.query.filter_by(id=type).one() vocab_id_filter = Q('term', type__id=vocabulary_type.id) results = self._read_many(identity, es_query, fields, extra_filter=vocab_id_filter, **kwargs) if cache: # ES DSL Response is not pickable. # If saved in cache serialization wont work with to_dict() current_cache.set(cache_key, results.to_dict()) else: search = self.create_search( identity=identity, record_cls=self.record_cls, search_opts=self.config.search, permission_action='search', ).query(es_query) results = Response(search, results) return self.result_list(self, identity, results)
def get_organization_relationships(uuid): """Get a source by any PID received as a argument, including UUID""" try: rtype = request.args.get('type') if request.args.get('type') else None cache = current_cache.get( "get_organization_relationships:{0}{1}".format(uuid, rtype)) or {} if "date" not in cache: cache["date"] = datetime.datetime.now() if datetime.datetime.now() - cache["date"] < datetime.timedelta( days=1) and "stats" in cache: result = cache["stats"] return jsonify(result) else: result = _get_organization_relationships(uuid, rtype) cache["date"] = datetime.datetime.now() cache["stats"] = result current_cache.set("get_organization_relationships:{0}{1}".format( uuid, rtype), cache, timeout=-1) return jsonify(result) except Exception as e: return jsonify({ 'ERROR': str(e), })
def query_cuor_by_pid(cls, pid): """Request an Organization by Persistent Identifier not the CUOR UUID """ try: cache = current_cache.get( "query_cuor_by_pid:{0}".format(pid)) or {} if "date" not in cache: cache["date"] = datetime.now() if datetime.now() - cache["date"] < timedelta( days=1) and "org" in cache: print("USING CACHE ORGANIZATION") if 'status' in cache["org"] and cache["org"]['status'] == '404': cache["org"] = None return None return cache["org"] api_endpoint = current_app.config['CUOR_API_ENDPOINT'] session = requests.Session() url = api_endpoint + '/pid?value=' + pid response = session.get(url, verify=False) result = json.loads(response.text) if 'status' in result and result['status'] == '404': return None cache["org"] = result cache["date"] = datetime.now() current_cache.set("query_cuor_by_pid:{0}".format(pid), cache, timeout=-1) return result except Exception: return None
def run(self, start_date=None, end_date=None, update_bookmark=True): """Run export job.""" if start_date is None: bookmark = current_cache.get('piwik_export:bookmark') start_date = dateutil_parse(bookmark) if bookmark else None time_range = {} if start_date is not None: time_range['gte'] = start_date.replace(microsecond=0).isoformat() if end_date is not None: time_range['lte'] = end_date.replace(microsecond=0).isoformat() events = Search(using=current_search_client, index='events-stats-*').filter( 'range', timestamp=time_range).sort({ 'timestamp': { 'order': 'asc' } }).params(preserve_order=True).scan() url = current_app.config['ZENODO_STATS_PIWIK_EXPORTER'].get( 'url', None) token_auth = current_app.config['ZENODO_STATS_PIWIK_EXPORTER'] \ .get('token_auth', None) chunk_size = current_app.config['ZENODO_STATS_PIWIK_EXPORTER']\ .get('chunk_size', 0) for event_chunk in chunkify(events, chunk_size): query_strings = [] for event in event_chunk: query_string = self._build_query_string(event) query_strings.append(query_string) payload = {'requests': query_strings, 'token_auth': token_auth} res = requests.post(url, json=payload) # Failure: not 200 or not "success" content = res.json() if res.ok else None if res.status_code == 200 and content.get('status') == 'success': if content.get('invalid') != 0: msg = 'Invalid events in Piwik export request.' info = { 'begin_event_timestamp': event_chunk[0].timestamp, 'end_event_timestamp': event_chunk[-1].timestamp, 'invalid_events': content.get('invalid') } current_app.logger.warning(msg, extra=info) elif update_bookmark is True: current_cache.set('piwik_export:bookmark', event_chunk[-1].timestamp, timeout=-1) else: msg = 'Invalid events in Piwik export request.' info = { 'begin_event_timestamp': event_chunk[0].timestamp, 'end_event_timestamp': event_chunk[-1].timestamp, } raise PiwikExportRequestError(msg, export_info=info)
def set_cache_page(self, key, value): """Set the page into the cache.""" # Check if there is a set of keys, if not create one_or_none current_key_set = current_cache.get(self.cached_pages_set_key) or set() current_key_set.add(key) current_cache.set(self.cached_pages_set_key, current_key_set) current_cache.set( key, value, timeout=current_app.config['WEKO_SITEMAP_CACHE_TIMEOUT'])
def update_datacite(self): """.""" form = UpdateDataciteForm() cancel_or_new_task_form = FlaskForm() is_task_running = False time = 0 task_details = current_cache.get('update_datacite:task_details') if task_details: is_task_running = True if cancel_or_new_task_form.validate_on_submit(): current_cache.set('update_datacite:task_details', None) return redirect(url_for('updatedataciteview.update_datacite')) else: if form.validate_on_submit(): from_date = request.form['from_date'] until_date = request.form['until_date'] action = request.form['action'] if action == 'SubmitDates': if from_date > until_date: flash("Error: the 'From' date should precede the 'Until' date.") else: pids_count = find_registered_doi_pids(from_date, until_date, current_app.config['ZENODO_LOCAL_DOI_PREFIXES']).count() task_details = dict( total_pids=pids_count ) time = pids_count/current_app.config['DATACITE_UPDATING_RATE_PER_HOUR'] elif action == 'Confirm': pids_count = find_registered_doi_pids(from_date, until_date, current_app.config['ZENODO_LOCAL_DOI_PREFIXES']).count() task_details = dict( start_date=datetime.utcnow(), job_id=str(uuid.uuid4()), from_date=from_date, until_date=until_date, total_pids=pids_count, left_pids=pids_count, last_update=datetime.utcnow() ) current_cache.set('update_datacite:task_details', task_details, timeout=-1) return redirect(url_for('updatedataciteview.update_datacite')) elif action == 'Cancel': return redirect(url_for('updatedataciteview.update_datacite')) return self.render('zenodo_records/update_datacite.html', form=form, cancel_or_new_task_form=cancel_or_new_task_form, details=task_details, is_task_running=is_task_running, time=time)
def get_anonymization_salt(ts): """Get the anonymization salt based on the event timestamp's day.""" salt_key = 'stats:salt:{}'.format(ts.date().isoformat()) salt = current_cache.get(salt_key) if not salt: salt_bytes = os.urandom(32) salt = b64encode(salt_bytes).decode('utf-8') current_cache.set(salt_key, salt, timeout=60 * 60 * 24) return salt
def get_widget_design_setting(repository_id, current_language, page_id=None): """Get widget design setting. @param repository_id: The repository identifier @param current_language: The current language @param page_id: The Page identifier. @return: """ def validate_response(): """Check the response data can compress with gzip. @return: True if the response data can compress with gzip """ is_valid = True accept_encoding = request.headers.get('Accept-Encoding', '') response = jsonify({}) if not config.WEKO_GRIDLAYOUT_IS_COMPRESS_WIDGET or \ response.direct_passthrough or \ 'gzip' not in accept_encoding.lower() or \ 'Content-Encoding' in response.headers: is_valid = False return is_valid def get_widget_response(_page_id): """Get widget setting response. :param _page_id: The Page identifier. @return: The widget setting response """ lang_code = current_language or get_default_language() if _page_id: from .services import WidgetDesignPageServices widget_setting_data = WidgetDesignPageServices\ .get_widget_design_setting(_page_id, lang_code) else: from .services import WidgetDesignServices widget_setting_data = WidgetDesignServices\ .get_widget_design_setting( repository_id, lang_code) return jsonify(widget_setting_data) if validate_response() and current_language: if page_id: key = (config.WEKO_GRIDLAYOUT_WIDGET_PAGE_CACHE_KEY + repository_id + "_" + page_id + "_" + current_language) else: key = (config.WEKO_GRIDLAYOUT_WIDGET_CACHE_KEY + repository_id + "_" + current_language) if current_cache.get(key) is None: data = compress_widget_response(get_widget_response(page_id)) current_cache.set(key, data) return data else: return current_cache.get(key) else: return get_widget_response(page_id)
def get_users(): """Returns list of all users as {id, name} dict :rtype: dict - with ``name (string)``, ``id (integer)`` properties """ queues = current_cache.get('rt_users') if queues: return queues else: queues = _get_all_of("user") if queues: current_cache.set('rt_users', queues, timeout=current_app.config.get( 'RT_USERS_CACHE_TIMEOUT', 86400)) return queues
def get_users(): """Returns list of all users as {id, name} dict :rtype: dict - with ``name (string)``, ``id (integer)`` properties """ queues = current_cache.get('rt_users') if queues: return queues else: queues = _get_all_of("user") if queues: current_cache.set('rt_users', queues, timeout=current_app.config.get( 'RT_USERS_CACHE_TIMEOUT', 86400)) return queues
def wrapper(*args, **kwargs): key = key_prefix for value in args: key += str(value) cache_fun = current_cache.cached( timeout=timeout, key_prefix=key, forced_update=is_update_cache, ) if current_cache.get(key) is None: data = cache_fun(f)(*args, **kwargs) current_cache.set(key, data) return data else: return current_cache.get(key)
def calculate_metrics(metric_id, cache=True): if cache: cached_data = current_cache.get( 'ZENODO_METRICS_CACHE::{}'.format(metric_id)) if cached_data is not None: return cached_data result = deepcopy(current_app.config['ZENODO_METRICS_DATA'][metric_id]) for metric in result: metric['value'] = metric['value']() current_cache.set( 'ZENODO_METRICS_CACHE::{}'.format(metric_id), result, timeout=current_app.config['ZENODO_METRICS_CACHE_TIMEOUT']) return result
def get_queues(): """Returns list of all queues. Returns: list (dict): list of all queues as {id, name}. """ queues = current_cache.get("rt_queues") if queues: return queues else: queues = _get_all_of("queue") if queues: current_cache.set( "rt_queues", queues, timeout=current_app.config.get("RT_QUEUES_CACHE_TIMEOUT", 86400), ) return queues
def get_users(): """Get users. Returns: list (dict): list of all users as {id, name} dict """ queues = current_cache.get("rt_users") if queues: return queues else: queues = _get_all_of("user") if queues: current_cache.set( "rt_users", queues, timeout=current_app.config.get("RT_USERS_CACHE_TIMEOUT", 86400), ) return queues
def save(cls, openid=None, **kwargs): """ Save Wepy user info :param openid: :param kwargs: :return: """ assert openid try: cache.set(openid, kwargs) db_wepy_user = WepyUser() wepy_user = db_wepy_user.get_by_openid(openid) if wepy_user is None: wepy_user = WepyUser.create(openid, kwargs.pop('session')) else: wepy_user.update(openid, **kwargs) except Exception as ex: current_app.logger.error('WepyUserApi.save({}) Except: '.format(openid), ex) return wepy_user
def openaire_direct_index(record_uuid): """Send record for direct indexing at OpenAIRE. :param record_uuid: Record Metadata UUID. :type record_uuid: str """ try: record = ZenodoRecord.get_record(record_uuid) # Bail out if not an OpenAIRE record. if not (is_openaire_publication(record) or is_openaire_dataset(record) or is_openaire_software(record) or is_openaire_other(record)): return data = openaire_json_v1.serialize(record.pid, record) url = '{}/feedObject'.format( current_app.config['OPENAIRE_API_URL']) req = _openaire_request_factory() res = req.post(url, data=data) if not res.ok: raise OpenAIRERequestError(res.text) res_beta = None if current_app.config['OPENAIRE_API_URL_BETA']: url_beta = '{}/feedObject'.format( current_app.config['OPENAIRE_API_URL_BETA']) res_beta = req.post(url_beta, data=data) if res_beta and not res_beta.ok: raise OpenAIRERequestError(res_beta.text) else: recid = record.get('recid') current_cache.delete('openaire_direct_index:{}'.format(recid)) except Exception as exc: recid = record.get('recid') current_cache.set('openaire_direct_index:{}'.format(recid), datetime.now(), timeout=-1) openaire_direct_index.retry(exc=exc)
def set(self, key, value, timeout=7200): """Set the key and value. Args: key (str): a key name. value (str): a value. timeout (int): a timeout time in seconds. Returns: bool: if the key is stored. """ return current_cache.set(self._prefix(key), value, timeout=timeout)
def get_sources_stats(): """ """ try: offset = request.args.get('offset') if request.args.get( 'offset') else 3 # top organization bucket org_id = request.args.get('org') if request.args.get('org') else None cache = current_cache.get("get_sources_stats:{0}{1}".format( org_id, offset)) or {} if "date" not in cache: cache["date"] = datetime.datetime.now() if datetime.datetime.now() - cache["date"] < datetime.timedelta( seconds=300) and "stats" in cache: print(datetime.datetime.now()) print(cache["date"]) print(datetime.datetime.now() - cache["date"]) print(datetime.timedelta(seconds=300)) print("USING CACHE STATS") result = cache["stats"] return iroko_json_response(IrokoResponseStatus.SUCCESS, 'ok', 'aggr', result) else: result = _get_sources_stats(org_id, offset) cache["date"] = datetime.datetime.now() cache["stats"] = result current_cache.set("get_sources_stats:{0}{1}".format( org_id, offset), cache, timeout=-1) return iroko_json_response(IrokoResponseStatus.SUCCESS, 'ok', 'aggr', result) except Exception as e: return iroko_json_response(IrokoResponseStatus.ERROR, str(e), None, None)
def set(self, key, value, timeout=7200): """Set the key and value. Args: key (str): a key name. value (str): a value. timeout (int): a timeout time in seconds. Returns: bool: if the key is stored. """ return current_cache.set( self._prefix(key), value, timeout=timeout)
def set_cache(self, key, value): """Set the sitemap cache.""" current_cache.set(key, value, timeout=-1) self.cache_keys.add(key)
def setAccessToken(cls, appid, token, expire_in=7200): cache.set('token-'+appid, token, timeout=expire_in)
def missing_subformats_report(start_date=None, end_date=None): """Send a report of missing subformats to CDS admins.""" report = [] def _get_master_video(record): """Return master video.""" master = CDSVideosFilesIterator.get_master_video_file(record) if not master: raise Exception("No master video found for the given record") return master, master['tags']['display_aspect_ratio'], \ int(master['tags']['width']), int(master['tags']['height']) def _get_missing_subformats(subformats, ar, w, h): """Return missing and transcodable subformats.""" dones = [ subformat['tags']['preset_quality'] for subformat in subformats ] missing = set(get_all_distinct_qualities()) - set(dones) transcodables = list( filter(lambda q: can_be_transcoded(q, ar, w, h), missing)) return transcodables def _format_report(report): """Format the email body for the file integrity report.""" lines = [] for entry in report: lines.append('Message: {}'.format(entry.get('message'))) lines.append(u'Record: {}'.format( format_pid_link(current_app.config['RECORDS_UI_ENDPOINT'], entry.get('recid')))) lines.append('Report number: {}'.format( entry.get('report_number'))) lines.append('Missing subformats: {}'.format( entry.get('missing_subformats'))) lines.append(('-' * 80) + '\n') return '\n'.join(lines) cache = current_cache.get('task_missing_subformats:details') or {} if 'end_date' not in cache: # Set the end date to 7 days ago cache['end_date'] = datetime.utcnow() - timedelta(days=7) record_uuids = _filter_by_last_created(_get_all_records_with_bucket(), start_date, end_date or cache['end_date']) for record_uuid in record_uuids: record = CDSRecord.get_record(record_uuid.id) master, ar, w, h = _get_master_video(record) if not master: report.append({ 'message': 'No master video found for the given record', 'recid': record.get('recid'), 'report_number': record['report_number'][0] }) continue # check missing subformats subformats = CDSVideosFilesIterator.get_video_subformats(master) missing = _get_missing_subformats(subformats, ar, w, h) if missing: report.append({ 'message': 'Missing subformats for the given record', 'recid': record.get('recid'), 'report_number': record['report_number'][0], 'missing_subformats': missing }) # check bucket ids consistency bucket_id = master['bucket_id'] for f in \ subformats + CDSVideosFilesIterator.get_video_frames(master) + \ CDSVideosFilesIterator.get_video_subtitles(record): if f['bucket_id'] != bucket_id: report.append({ 'message': 'Different buckets in the same record', 'recid': record.get('recid'), 'report_number': record['report_number'][0], 'buckets': 'Master: {0} - {1}: {2}'.format(bucket_id, f['key'], f['bucket_id']) }) cache['end_date'] = datetime.utcnow() current_cache.set('task_missing_subformats:details', cache, timeout=-1) if report: # Format and send the email subject = u'[CDS Videos] Missing subformats report [{}]'.format( datetime.now()) body = _format_report(report) sender = current_app.config['NOREPLY_EMAIL'] recipients = [current_app.config['CDS_ADMIN_EMAIL']] _send_email(subject, body, sender, recipients)
def subformats_integrity_report(start_date=None, end_date=None): """Send a report of all corrupted subformats to CDS admins.""" report = [] update_cache = True def _probe_video_file(obj, record): """Run ffmpeg on a video file Return a touple containing (report, accessible) """ file_report = {} path = obj.file.uri.replace( current_app.config['VIDEOS_XROOTD_ENDPOINT'], '') if not os.path.exists(path): # Check if the file exists on disk file_report = { 'file_name': obj.key, 'message': 'The file cannot be accessed', 'error': repr(e) } # Return the file report and the file accessibility return (file_report, False) try: # Expecting the storage to be mounted on the machine probe = ff_probe_all(path) if not probe.get('streams'): file_report = { 'file_name': obj.key, 'message': 'No video stream' } except Exception as e: file_report = { 'file_name': obj.key, 'message': 'Error while running ff_probe_all', 'error': repr(e) } # Return the file report and the file accessibility return (file_report, True) def _format_report(report): """Format the email body for the subformats integrity report.""" lines = [] for entry in report: lines.append(u'Record: {}'.format( format_pid_link(current_app.config['RECORDS_UI_ENDPOINT'], entry.get('recid')))) lines.append('Message: {}'.format(entry.get('message'))) if entry.get('report_number'): lines.append('Report number: {}'.format( entry.get('report_number'))) subreports = entry.get('subreports') if subreports: lines.append(('-' * 10) + '\n') for subreport in subreports: lines.append(' File name: {}'.format( subreport.get('file_name'))) lines.append(' Message: {}'.format( subreport.get('message'))) if subreport.get('error'): lines.append(' Error: {}'.format( subreport.get('error'))) lines.append(('-' * 80) + '\n') return '\n'.join(lines) cache = current_cache.get('task_subformats_integrity:details') or {} two_days_ago = datetime.utcnow() - timedelta(days=2) if 'start_date' not in cache: # Set the start date to 4 days ago cache['start_date'] = datetime.utcnow() - timedelta(days=4) record_uuids = _filter_by_last_created(_get_all_records_with_bucket(), start_date or cache['start_date'], end_date or two_days_ago) for record_uuid in record_uuids: record = CDSRecord.get_record(record_uuid.id) master = CDSVideosFilesIterator.get_master_video_file(record) if not master: report.append({ 'recid': record['recid'], 'message': 'No master video found for the given record', 'report_number': record['report_number'][0] }) continue master_obj = as_object_version(master['version_id']) subreport_master, accessible = _probe_video_file(master_obj, record) if not accessible: update_cache = False if subreport_master: report.append({ 'recid': record['recid'], 'message': 'Master file issue report', 'report_number': record['report_number'][0], 'subreports': subreport_master }) subformats = CDSVideosFilesIterator.get_video_subformats(master) if not subformats: report.append({ 'recid': record['recid'], 'message': 'No subformats found' }) continue subformats_subreport = [] for subformat in subformats: subformat_obj = as_object_version(subformat['version_id']) subformat_subreport, accessible = _probe_video_file( subformat_obj, record) if not accessible: update_cache = False if subformat_subreport: subformats_subreport.append(subformat_subreport) if subformats_subreport: report.append({ 'recid': record['recid'], 'message': 'Subformats issues found', 'report_number': record['report_number'][0], 'subreports': subformats_subreport }) if update_cache: # Set the start date for next time when the task will run cache['start_date'] = two_days_ago current_cache.set('task_subformats_integrity:details', cache, timeout=-1) if report: # Format and send the email subject = u'[CDS Videos] Subformats integrity report [{}]'.format( datetime.now()) body = _format_report(report) sender = current_app.config['NOREPLY_EMAIL'] recipients = [current_app.config['CDS_ADMIN_EMAIL']] _send_email(subject, body, sender, recipients)
def run(self, start_date=None, end_date=None, update_bookmark=True): """Run export job.""" if start_date is None: bookmark = current_cache.get('piwik_export:bookmark') if bookmark is None: msg = 'Bookmark not found, and no start date specified.' current_app.logger.warning(msg) return start_date = dateutil_parse(bookmark) if bookmark else None time_range = {} if start_date is not None: time_range['gte'] = start_date.replace(microsecond=0).isoformat() if end_date is not None: time_range['lte'] = end_date.replace(microsecond=0).isoformat() events = Search(using=current_search_client, index=build_alias_name('events-stats-*')).filter( 'range', timestamp=time_range).sort({ 'timestamp': { 'order': 'asc' } }).params(preserve_order=True).scan() url = current_app.config['ZENODO_STATS_PIWIK_EXPORTER'].get( 'url', None) token_auth = current_app.config['ZENODO_STATS_PIWIK_EXPORTER'] \ .get('token_auth', None) chunk_size = current_app.config['ZENODO_STATS_PIWIK_EXPORTER']\ .get('chunk_size', 0) for event_chunk in chunkify(events, chunk_size): query_strings = [] for event in event_chunk: if 'recid' not in event: continue try: query_string = self._build_query_string(event) query_strings.append(query_string) except PIDDeletedError: pass # Check and bail if the bookmark has progressed, e.g. from another # duplicate task or manual run of the exporter. bookmark = current_cache.get('piwik_export:bookmark') if event_chunk[-1].timestamp < bookmark: return payload = {'requests': query_strings, 'token_auth': token_auth} res = requests.post(url, json=payload, timeout=60) # Failure: not 200 or not "success" content = res.json() if res.ok else None if res.status_code == 200 and content.get('status') == 'success': if content.get('invalid') != 0: msg = 'Invalid events in Piwik export request.' info = { 'begin_event_timestamp': event_chunk[0].timestamp, 'end_event_timestamp': event_chunk[-1].timestamp, 'invalid_events': content.get('invalid') } current_app.logger.warning(msg, extra=info) elif update_bookmark is True: current_cache.set('piwik_export:bookmark', event_chunk[-1].timestamp, timeout=-1) else: msg = 'Invalid events in Piwik export request.' info = { 'begin_event_timestamp': event_chunk[0].timestamp, 'end_event_timestamp': event_chunk[-1].timestamp, } raise PiwikExportRequestError(msg, export_info=info)
def run(self, start_date=None, end_date=None, update_bookmark=True): """Run export job.""" if start_date is None: bookmark = current_cache.get('piwik_export:bookmark') if bookmark is None: msg = 'Bookmark not found, and no start date specified.' current_app.logger.warning(msg) return start_date = dateutil_parse(bookmark) if bookmark else None time_range = {} if start_date is not None: time_range['gte'] = start_date.replace(microsecond=0).isoformat() if end_date is not None: time_range['lte'] = end_date.replace(microsecond=0).isoformat() events = Search( using=current_search_client, index='events-stats-*' ).filter( 'range', timestamp=time_range ).sort( {'timestamp': {'order': 'asc'}} ).params(preserve_order=True).scan() url = current_app.config['ZENODO_STATS_PIWIK_EXPORTER'].get('url', None) token_auth = current_app.config['ZENODO_STATS_PIWIK_EXPORTER'] \ .get('token_auth', None) chunk_size = current_app.config['ZENODO_STATS_PIWIK_EXPORTER']\ .get('chunk_size', 0) for event_chunk in chunkify(events, chunk_size): query_strings = [] for event in event_chunk: if 'recid' not in event: continue try: query_string = self._build_query_string(event) query_strings.append(query_string) except PIDDeletedError: pass payload = { 'requests': query_strings, 'token_auth': token_auth } res = requests.post(url, json=payload) # Failure: not 200 or not "success" content = res.json() if res.ok else None if res.status_code == 200 and content.get('status') == 'success': if content.get('invalid') != 0: msg = 'Invalid events in Piwik export request.' info = { 'begin_event_timestamp': event_chunk[0].timestamp, 'end_event_timestamp': event_chunk[-1].timestamp, 'invalid_events': content.get('invalid') } current_app.logger.warning(msg, extra=info) elif update_bookmark is True: current_cache.set('piwik_export:bookmark', event_chunk[-1].timestamp, timeout=-1) else: msg = 'Invalid events in Piwik export request.' info = { 'begin_event_timestamp': event_chunk[0].timestamp, 'end_event_timestamp': event_chunk[-1].timestamp, } raise PiwikExportRequestError(msg, export_info=info)
def set(self, key: str, value: datetime = None): """.""" return current_cache.set(f'{self.prefix}:{key}', value or datetime.now(), timeout=-1)
def test_datacite_update(mocker, db, minimal_record): dc_mock = mocker.patch( 'invenio_pidstore.providers.datacite.DataCiteMDSClient' ) doi_tags = [ '<identifier identifierType="DOI">{doi}</identifier>', ('<relatedIdentifier relatedIdentifierType="DOI" ' 'relationType="IsVersionOf">{conceptdoi}</relatedIdentifier>'), ] # Assert calls and content def assert_datacite_calls_and_content(record, doi_tags): """Datacite client calls assertion helper.""" assert dc_mock().metadata_post.call_count == 1 _, doi_args, _ = dc_mock().metadata_post.mock_calls[0] assert all([t.format(**record) in doi_args[0] for t in doi_tags]) assert dc_mock().doi_post.call_count == 1 dc_mock().doi_post.assert_any_call( record['doi'], 'https://zenodo.org/record/{}'.format(record['recid'])) def assert_datacite_calls_with_missing_data(): """Datacite client calls assertion helper.""" assert dc_mock().metadata_post.call_count == 0 assert dc_mock().doi_post.call_count == 0 def create_versioned_record(recid_value, conceptrecid): """Utility function for creating versioned records.""" recid = PersistentIdentifier.create( 'recid', recid_value, status=PIDStatus.RESERVED) pv = PIDVersioning(parent=conceptrecid) pv.insert_draft_child(recid) record_metadata = deepcopy(minimal_record) # Remove the DOI del record_metadata['doi'] record_metadata['conceptrecid'] = conceptrecid.pid_value record_metadata['recid'] = int(recid.pid_value) record = ZenodoRecord.create(record_metadata) zenodo_record_minter(record.id, record) record.commit() return recid, record # Create conceptrecid for the records conceptrecid = PersistentIdentifier.create( 'recid', '100', status=PIDStatus.RESERVED) # Create a reserved recid recid1, r1 = create_versioned_record('352543', conceptrecid) # no registered local DOIs schedule_update_datacite_metadata(1) assert_datacite_calls_with_missing_data() doi_pids = PersistentIdentifier.query.filter( PersistentIdentifier.pid_value == '10.5072/zenodo.352543') doi_pids[0].status = PIDStatus.REGISTERED db.session.commit() update_date = doi_pids[0].updated # no task_details on Redis cache schedule_update_datacite_metadata(1) assert_datacite_calls_with_missing_data() new_update_date1 = doi_pids[0].updated assert update_date == new_update_date1 task_details = dict( job_id=str(uuid.uuid4()), from_date=datetime(2015, 1, 1, 13, 33), until_date=datetime(2016, 1, 1, 13, 33), last_update=datetime.utcnow() ) current_cache.set('update_datacite:task_details', task_details, timeout=-1) # no registered local DOIs updated inside the interval schedule_update_datacite_metadata(1) assert_datacite_calls_with_missing_data() new_update_date2 = doi_pids[0].updated assert update_date == new_update_date2 task_details = dict( job_id=str(uuid.uuid4()), from_date=datetime(2015, 1, 1, 13, 33), until_date=datetime.utcnow(), last_update=datetime.utcnow() ) current_cache.set('update_datacite:task_details', task_details, timeout=-1) schedule_update_datacite_metadata(1) new_update_date3 = doi_pids[0].updated assert update_date < new_update_date3 assert_datacite_calls_and_content(r1, doi_tags)
def set_cache(self, key, value): """Set the sitemap cache.""" current_cache.set(key, value, timeout=-1) self.cache_keys.add(key)
def update_datacite(self): """.""" form = UpdateDataciteForm() cancel_or_new_task_form = FlaskForm() is_task_running = False time = 0 task_details = current_cache.get('update_datacite:task_details') if task_details: is_task_running = True if cancel_or_new_task_form.validate_on_submit(): current_cache.set('update_datacite:task_details', None) return redirect(url_for('updatedataciteview.update_datacite')) else: if form.validate_on_submit(): from_date = request.form['from_date'] until_date = request.form['until_date'] action = request.form['action'] if action == 'SubmitDates': if from_date > until_date: flash( "Error: the 'From' date should precede the 'Until' date." ) else: pids_count = find_registered_doi_pids( from_date, until_date, current_app. config['ZENODO_LOCAL_DOI_PREFIXES']).count() task_details = dict(total_pids=pids_count) time = pids_count / current_app.config[ 'DATACITE_UPDATING_RATE_PER_HOUR'] elif action == 'Confirm': pids_count = find_registered_doi_pids( from_date, until_date, current_app.config['ZENODO_LOCAL_DOI_PREFIXES']).count( ) task_details = dict(start_date=datetime.utcnow(), job_id=str(uuid.uuid4()), from_date=from_date, until_date=until_date, total_pids=pids_count, left_pids=pids_count, last_update=datetime.utcnow()) current_cache.set('update_datacite:task_details', task_details, timeout=-1) return redirect( url_for('updatedataciteview.update_datacite')) elif action == 'Cancel': return redirect( url_for('updatedataciteview.update_datacite')) return self.render('zenodo_records/update_datacite.html', form=form, cancel_or_new_task_form=cancel_or_new_task_form, details=task_details, is_task_running=is_task_running, time=time)