def _classify_paper_with_deposit(obj, eng): from invenio_deposit.models import Deposition deposition = Deposition(obj) data = None if not fast_mode: for f in deposition.files: if f.name and ".pdf" in f.name.lower(): data = f.get_syspath() break callback = get_keywords_from_local_file if not data: try: metadata = deposition.get_latest_sip().metadata except AttributeError as err: obj.log.error("Error getting data: {0}".format(err)) data = [metadata.get("titles", {}).get("title", ""), metadata.get("abstracts", {}).get("value", "")] callback = get_keywords_from_text classify_paper(obj, eng, callback, data, taxonomy, rebuild_cache, no_cache, output_limit, spires, match_mode, with_author_keywords, extract_acronyms, only_core_tags, fast_mode)
def arxiv_fft_get(obj, eng): """Get FFT from arXiv, if arXiv ID is provided.""" deposition = Deposition(obj) sip = deposition.get_latest_sip(sealed=False) metadata = sip.metadata if 'arxiv_id' in metadata and metadata['arxiv_id']: arxiv_pdf_url = cfg.get("ARXIV_PDF_URL", "http://arxiv.org/pdf/") + \ "{0}.{1}" from invenio.config import CFG_TMPSHAREDDIR arxiv_file, arxiv_file_path = mkstemp( prefix="%s_" % (metadata['arxiv_id'].replace("/", "_")), suffix='.pdf', dir=CFG_TMPSHAREDDIR, ) os.close(arxiv_file) download_url(url=arxiv_pdf_url.format(metadata['arxiv_id'], "pdf"), content_type="pdf", download_to_file=arxiv_file_path) # To get 1111.2222.pdf as filename. filename = "{0}.pdf".format(metadata['arxiv_id'].replace("/", "_")) try: try: save_deposition_file(deposition, filename, arxiv_file_path) except FilenameAlreadyExists: obj.log.error("PDF file not saved: filename already exists.") except Exception as e: obj.log.error("PDF file not saved: {}.".format(e.message))
def _classify_paper_with_deposit(obj, eng): from invenio_deposit.models import Deposition deposition = Deposition(obj) data = None if not fast_mode: for f in deposition.files: if f.name and ".pdf" in f.name.lower(): data = f.get_syspath() break callback = get_keywords_from_local_file if not data: try: metadata = deposition.get_latest_sip().metadata except AttributeError as err: obj.log.error("Error getting data: {0}".format(err)) data = [ metadata.get("title", {}).get("title", ""), metadata.get("abstract", {}).get("summary", "") ] callback = get_keywords_from_text classify_paper(obj, eng, callback, data, taxonomy, rebuild_cache, no_cache, output_limit, spires, match_mode, with_author_keywords, extract_acronyms, only_core_tags, fast_mode)
def add_note_entry(obj, eng): """Add note entry to sip metadata on approval.""" entry = {'value': '*Temporary entry*'} if obj.extra_data.get("core") \ else {'value': '*Brief entry*'} deposition = Deposition(obj) metadata = deposition.get_latest_sip(sealed=True).metadata if metadata.get('public_notes') is None or not isinstance(metadata.get("public_notes"), list): metadata['public_notes'] = [entry] else: metadata['public_notes'].append(entry) deposition.update()
def get_description(bwo): """Return description of object.""" from invenio_access.control import acc_get_user_email results = bwo.get_tasks_results() try: deposit_object = Deposition(bwo) except InvalidDepositionType: return "This submission is disabled: {0}.".format(bwo.workflow.name) id_user = deposit_object.workflow_object.id_user user_email = acc_get_user_email(id_user) sip = deposit_object.get_latest_sip() if sip: record = Record(sip.metadata) identifiers = [] report_numbers = record.get("report_numbers", []) dois = record.get("dois.value", []) if report_numbers: for report_number in report_numbers: number = report_number.get("value", "") if number: identifiers.append(number) if dois: identifiers.extend(["doi:{0}".format(d) for d in dois]) categories = [] subjects = record.get("subject_terms", []) if subjects: for subject in subjects: if isinstance(subject, string_types): categories.append(subject) elif isinstance(subject, dict): if subject.get("term"): categories.append(subject.get("term")) categories = [record.get("type_of_doc", "")] + categories authors = [] authors += [record.get("_first_author", {})] authors += record.get("_additional_authors", []) return render_template( "workflows/styles/submission_record.html", categories=categories, authors=authors, identifiers=identifiers, results=results, user_email=user_email, object=bwo, record=record, ) else: return "Submitter: {0}".format(user_email)
def get_title(bwo): """Return title of object.""" try: deposit_object = Deposition(bwo) except InvalidDepositionType: return "This submission is disabled: {0}.".format(bwo.workflow.name) sip = deposit_object.get_latest_sip() if sip: # Get the SmartJSON object record = Record(sip.metadata) return record.get("titles.title", ["No title"])[0] else: return "User submission in progress"
def user_pdf_get(obj, eng): """Upload user PDF file, if requested.""" if obj.extra_data.get('pdf_upload', False): fft = {'url': obj.extra_data.get('submission_data').get('pdf'), 'docfile_type': 'INSPIRE-PUBLIC'} deposition = Deposition(obj) metadata = deposition.get_latest_sip(sealed=True).metadata if metadata.get('fft'): metadata['fft'].append(fft) else: metadata['fft'] = [fft] deposition.update() obj.log.info("PDF file added to FFT.")
def formatter(bwo, **kwargs): """Return formatted data of object.""" try: deposit_object = Deposition(bwo) except InvalidDepositionType: return "This submission is disabled: {0}.".format(bwo.workflow.name) submission_data = deposit_object.get_latest_sip(deposit_object.submitted) record = submission_data.metadata return render_template( 'format/record/Holding_Pen_HTML_detailed.tpl', record=Record(record) )
def add_submission_extra_data(obj, eng): """ Add extra data to workflow object. """ deposition = Deposition(obj) sip = deposition.get_latest_sip(sealed=True) metadata = sip.metadata submission_data = {} if "references" in metadata: submission_data["references"] = metadata["references"] del metadata["references"] if "extra_comments" in metadata: submission_data["extra_comments"] = metadata["extra_comments"] del metadata["extra_comments"] if "pdf" in metadata: submission_data["pdf"] = metadata["pdf"] del metadata["pdf"] obj.extra_data["submission_data"] = submission_data deposition.save()
def _create_ticket(obj, eng): from invenio_access.control import acc_get_user_email deposition = Deposition(obj) requestors = acc_get_user_email(obj.id_user) subject, body = get_ticket_body(template, deposition, deposition.get_latest_sip(sealed=True).metadata, requestors, obj) submit_rt_ticket(obj, queue, subject, body, requestors, ticket_id_key)
def _create_curation_ticket(obj, eng): from invenio_access.control import acc_get_user_email deposition = Deposition(obj) requestors = acc_get_user_email(obj.id_user) metadata = deposition.get_latest_sip(sealed=True).metadata if obj.extra_data.get("core"): subject, body = get_curation_body(template, metadata, requestors, obj.extra_data) submit_rt_ticket(obj, queue, subject, body, requestors, ticket_id_key)
def migrate_workflow_object(obj_id): try: obj = BibWorkflowObject.query.get(obj_id) rename_object_action(obj) if obj.workflow.name == "process_record_arxiv": metadata = obj.get_data() if isinstance(metadata, string_types): # Ignore records that have string as data return if 'drafts' in metadata: # New data model detected, just save and exit obj.save() return if hasattr(metadata, 'dumps'): metadata = metadata.dumps(clean=True) obj.data = bibfield.do(metadata) payload = Payload.create( type=obj.workflow.name, workflow_object=obj ) payload.save() elif obj.workflow.name == "literature": d = Deposition(obj) sip = d.get_latest_sip() if sip: sip.metadata = bibfield.do(sip.metadata) sip.package = legacy_export_as_marc(hep2marc.do(sip.metadata)) d.save() elif obj.workflow.name in ("authornew", "authorupdate"): data = obj.get_data() obj.set_data(author_bibfield.do(data)) obj.save() else: obj.save() # To update and trigger indexing reset_workflow_object_states(obj) except Exception as err: current_app.logger.error("Problem migrating record {0}".format(obj_id)) current_app.logger.exception(err) msg = "Error: %r\n%s" % \ (err, traceback.format_exc()) obj.set_error_message(msg) obj.save(version=ObjectVersion.ERROR) raise
def halt_to_render(obj, eng): """Halt the workflow - waiting to be resumed.""" deposition = Deposition(obj) sip = deposition.get_latest_sip(sealed=False) deposition.set_render_context(dict( template_name_or_list="deposit/completed.html", deposition=deposition, deposition_type=( None if deposition.type.is_default() else deposition.type.get_identifier() ), uuid=deposition.id, sip=sip, my_depositions=Deposition.get_depositions( current_user, type=deposition.type ), format_record=format_record, )) obj.last_task = "halt_to_render" eng.halt("User submission complete.")
def formatter(bwo, **kwargs): """Return formatted data of object.""" try: deposit_object = Deposition(bwo) except InvalidDepositionType: return "This submission is disabled: {0}.".format(bwo.workflow.name) sip = deposit_object.get_latest_sip(deposit_object.submitted) record = sip.metadata if hasattr(sip, "package"): marcxml = sip.package else: return "No data found in submission (no package)." of = kwargs.get("of", "hd") if of == "xm": return marcxml else: return render_template("format/record/Holding_Pen_HTML_detailed.tpl", record=Record(record))
def test_agnostic_deposit(self): """A deposition still has the same data model.""" from invenio_deposit.models import Deposition from invenio.ext.login.legacy_user import UserInfo u = UserInfo(uid=1) d = Deposition.create(u, type='DepositModelTest') d.save() d.run_workflow() completed_object = d.engine.completed_objects[0] for l in ['files', 'sips', 'type', 'drafts', 'title']: self.assertIn(l, completed_object.data)
def test_create(self): """Test.""" from invenio_ext.login.legacy_user import UserInfo from invenio_deposit.models import Deposition user = UserInfo(uid=1) d = Deposition.create(user) assert d.type == self.DefaultType assert Deposition.get(d.id).type == self.DefaultType d2 = Deposition.create(user, type=self.AnotherType) assert d2.type == self.AnotherType assert Deposition.get(d2.id).type == self.AnotherType # remove the records Deposition.delete(d) Deposition.delete(d2)
def test_create(self): """Test.""" from invenio.ext.login.legacy_user import UserInfo from invenio_deposit.models import Deposition user = UserInfo(uid=1) d = Deposition.create(user) assert d.type == self.DefaultType assert Deposition.get(d.id).type == self.DefaultType d2 = Deposition.create(user, type=self.AnotherType) assert d2.type == self.AnotherType assert Deposition.get(d2.id).type == self.AnotherType # remove the records Deposition.delete(d) Deposition.delete(d2)
def run_deposition_tasks(self, deposition_id, with_webcoll=True): """ Run all task ids specified in the latest SIP and optionally run webcoll. """ # Run submitted tasks from invenio_deposit.models import Deposition dep = Deposition.get(deposition_id) sip = dep.get_latest_sip(sealed=True) for task_id in sip.task_ids: self.run_task_id(task_id) if with_webcoll: # Check if record is accessible response = self.client.get( url_for('record.metadata', recid=sip.metadata['recid']), base_url=self.app.config['CFG_SITE_SECURE_URL'], ) self.assertStatus(response, 200)
def show_stats(deposition_type): """Render the stats for all the depositions.""" if len(DepositionType.keys()) <= 1 and \ DepositionType.get_default() is not None: abort(404) form = FilterDateForm() deptype = DepositionType.get(deposition_type) submitted_depositions = [d for d in Deposition.get_depositions(type=deptype) if d.has_sip(sealed=True)] ctx = process_metadata_for_charts(submitted_depositions, group_by=request.args.get('group_by', 'type_of_doc')) ctx.update(dict( deposition_type=deptype, depositions=submitted_depositions, form=form, chart_types=CHART_TYPES )) return render_template('deposit/stats/all_depositions.html', **ctx)
def stats_api(deposition_type): """Get stats JSON.""" deptype = DepositionType.get(deposition_type) submitted_depositions = [d for d in Deposition.get_depositions(type=deptype) if d.has_sip(sealed=True)] if request.args.get('since_date') is not None: since_date = datetime.strptime(request.args['since_date'], "%Y-%m-%d").replace(hour=0, minute=0) submitted_depositions = [d for d in submitted_depositions if d.created >= since_date] if request.args.get('until_date') is not None: until_date = datetime.strptime(request.args['until_date'], "%Y-%m-%d").replace(hour=23, minute=59) submitted_depositions = [d for d in submitted_depositions if d.created <= until_date] result = process_metadata_for_charts(submitted_depositions, request.args.get('group_by', 'type_of_doc'), bool(request.args.get('include_hidden', None))) resp = jsonify(result) return resp
def do_upgrade(): """Implement your upgrades here.""" from invenio_workflows.models import ( BibWorkflowObject, Workflow, ObjectVersion ) from invenio_workflows.registry import workflows from invenio_deposit.models import Deposition from inspire.dojson.utils import legacy_export_as_marc from inspire.dojson.hep import hep2marc from inspire.modules.workflows.dojson import bibfield from inspire.modules.workflows.models import Payload def rename_object_action(obj): if obj.get_action() == "arxiv_approval": obj.set_action("hep_approval", obj.get_action_message()) def reset_workflow_object_states(obj): """Fix workflow positions and states. Old states from Prod/QA: {(), (0,), (5, 3, 14), (5, 3, 14, 0), (5, 3, 15), (5, 3, 15, 1)} {(), (0,), (5,), (5, 3, 1), (5, 3, 10), (5, 3, 11), (5, 3, 12), (5, 3, 14), (5, 3, 14, 0), (6, 3, 4)} OLD -> NEW 5, 3, 14 -> 0 end 5, 3, 10 -> 14, 0 halted """ pos = obj.get_current_task() if obj.version == ObjectVersion.COMPLETED: obj.save(task_counter=[len(workflows.get(obj.workflow.name).workflow) - 1]) return elif obj.version == ObjectVersion.RUNNING: # Running? Nah that cannot be. obj.version = ObjectVersion.ERROR try: obj.get_current_task_info() except IndexError: # The current task counter is Invalid obj.version = ObjectVersion.ERROR if obj.workflow.name == "process_record_arxiv": if tuple(pos) in [ (5,), (5, 3, 14), (5, 3, 14, 0), (5, 3, 15), (5, 3, 15, 1)]: pos = [len(workflows.get(obj.workflow.name).workflow) - 1] # finished elif tuple(pos) in [(5, 3, 10), (5, 3, 11), (5, 3, 12)]: pos = [14, 0] # halted elif len(pos) > 1 and pos[0] == 6: # We need to update pos from 6 to start of pre_processing part pos = [7] else: pos = [0] # Nothing here, we go to start obj.save(task_counter=pos) # Special submission handling for deposit in BibWorkflowObject.query.filter( BibWorkflowObject.module_name == "webdeposit"): reset_workflow_object_states(deposit) d = Deposition(deposit) sip = d.get_latest_sip() if sip: sip.metadata = bibfield.do(sip.metadata) sip.package = legacy_export_as_marc(hep2marc.do(sip.metadata)) d.update() deposit.save() # Special workflow handling workflows_to_fix = ["process_record_arxiv"] workflow_objects = [] for workflow_name in workflows_to_fix: workflow_objects += BibWorkflowObject.query.join( BibWorkflowObject.workflow).filter(Workflow.name == workflow_name).all() for obj in workflow_objects: metadata = obj.get_data() reset_workflow_object_states(obj) rename_object_action(obj) if isinstance(metadata, six.string_types): # Ignore records that have string as data continue if 'drafts' in metadata: # New data model detected continue if hasattr(metadata, 'dumps'): metadata = metadata.dumps(clean=True) obj.data = bibfield.do(metadata) payload = Payload.create( type=obj.workflow.name, workflow_object=obj ) payload.save()