def _save_identifiers_to_kb(obj, eng): from inspirehep.utils.knowledge import save_keys_to_kb record = get_record_from_obj(obj, eng) identifiers = record.get(identifier_key, []) save_keys_to_kb(kb_name, identifiers, obj.id)
def exists_in_holding_pen(obj, eng): """Check if a record exists in HP by looking in given KB.""" from invenio_workflows.search import search as hp_search record = get_record_from_obj(obj, eng) identifiers = [] for field, lookup in six.iteritems( current_app.config.get("HOLDING_PEN_MATCH_MAPPING")): # Add quotes around to make the search exact identifiers += ['{0}:"{1}"'.format(field, i) for i in record.get(lookup, [])] # Search for any existing record in Holding Pen, exclude self if identifiers: result = set(hp_search( query=" OR ".join(identifiers), per_page=10, page=1, )[0]) - set([obj.id]) if result: obj.log.info("Record already found in Holding Pen ({0})".format( result )) obj.extra_data["holdingpen_ids"] = list(result) return result return False
def _create_curation_ticket(obj, eng): from invenio_access.control import acc_get_user_email requestors = acc_get_user_email(obj.id_user) record = get_record_from_obj(obj, eng) if obj.extra_data.get("core"): subject, body = get_curation_body(template, record, requestors, obj.extra_data) submit_rt_ticket(obj, queue, subject, body, requestors, ticket_id_key)
def test_harvesting_workflow_accepted(self, search): """Test a full harvesting workflow.""" from invenio_base.globals import cfg from invenio_workflows.api import start from inspirehep.utils.helpers import ( get_record_from_obj, ) # Mock Elasticsearch search for Holding Pen check search.return_value = ([], 0) # Mock matching checks responses.add( responses.GET, re.compile(".*record/_search"), status=200, body="""{ "hits": { "total": 0, "max_score": null, "hits": [] } }""", content_type='application/json' ) responses.add( responses.GET, 'http://arxiv.org/e-print/1511.01097', content_type="application/x-eprint-tar", body=self.arxiv_tarball_accept.read(), status=200, adding_headers={ "Content-Encoding": 'x-gzip', }, ) responses.add( responses.GET, 'http://arxiv.org/pdf/1511.01097', content_type="application/pdf", body=self.arxiv_pdf_accept.read(), status=200, stream=True, ) robotupload_url = os.path.join( cfg.get("CFG_ROBOTUPLOAD_SUBMISSION_BASEURL"), "batchuploader/robotupload/insert" ) responses.add( responses.POST, robotupload_url, body="[INFO] bibupload batchupload --insert /dummy/file/path\n", status=200, ) workflow = start('harvesting_fixture', data=[self.record_oai_arxiv_accept], module_name='unit_tests') # Let's get the halted record obj = workflow.halted_objects[0] # Now let's resolve it as accepted and continue obj.remove_action() obj.extra_data["approved"] = True obj.extra_data["core"] = True obj.set_extra_data(obj.extra_data) obj.save() workflow = obj.continue_workflow() record = get_record_from_obj(obj, workflow) # Now it is CORE self.assertTrue("CORE" in record.get("collections.primary"))
def test_harvesting_workflow_rejected(self, search): """Test a full harvesting workflow.""" from invenio_workflows.api import start from inspirehep.utils.helpers import ( get_record_from_obj, ) # Mock Elasticsearch search for Holding Pen check search.return_value = ([], 0) # Mock matching checks responses.add( responses.GET, re.compile(".*record/_search"), status=200, body="""{ "hits": { "total": 0, "max_score": null, "hits": [] } }""", content_type='application/json' ) responses.add( responses.GET, 'http://arxiv.org/e-print/1407.7587', content_type="application/x-eprint-tar", body=self.arxiv_tarball.read(), status=200, adding_headers={ "Content-Encoding": 'x-gzip', }, ) responses.add( responses.GET, 'http://arxiv.org/pdf/1407.7587', content_type="application/pdf", body=self.arxiv_pdf.read(), status=200, stream=True, ) workflow = start('harvesting_fixture', data=[self.record_oai_arxiv_plots], module_name='unit_tests') # Let's get the record metadata and check contents obj = workflow.completed_objects[0] record = get_record_from_obj(obj, workflow) # This record should be rejected self.assertFalse(obj.extra_data["approved"]) # Files should have been attached (tarball + pdf) self.assertTrue(len(obj.data["files"]) == 2) # Some plots/files should have been added to FFTs self.assertTrue(record.get('fft')) # A publication note should have been extracted self.assertTrue(record.get('publication_info')) # A prediction should have been made self.assertTrue(obj.get_tasks_results().get("arxiv_guessing")) # It is not CORE self.assertFalse("CORE" in record.get("collections.primary"))