def _send_robotupload(obj, eng): from invenio_workflows.errors import WorkflowError from inspire.utils.robotupload import make_robotupload_marcxml combined_callback_url = os.path.join(cfg["CFG_SITE_URL"], callback_url) model = eng.workflow_definition.model(obj) sip = model.get_latest_sip() marcxml = sip.package result = make_robotupload_marcxml( url=url, marcxml=marcxml, callback_url=combined_callback_url, mode=mode, nonce=obj.id ) if "[INFO]" not in result.text: if "cannot use the service" in result.text: # IP not in the list obj.log.error("Your IP is not in " "CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS " "on host") obj.log.error(result.text) txt = "Error while submitting robotupload: {0}".format(result.text) raise WorkflowError(txt, eng.uuid, obj.id) else: obj.log.info("Robotupload sent!") obj.log.info(result.text) eng.halt("Waiting for robotupload: {0}".format(result.text)) obj.log.info("end of upload")
def _send_robotupload(obj, eng): from invenio.base.globals import cfg from invenio.modules.workflows.errors import WorkflowError from inspire.utils.robotupload import make_robotupload_marcxml callback_url = os.path.join(cfg["CFG_SITE_URL"], "callback/workflows/robotupload") marcxml = obj.get_extra_data().get("marcxml") if not marcxml: obj.log.error("No MARCXML found in extra data.") result = make_robotupload_marcxml( url=None, marcxml=marcxml, callback_url=callback_url, mode=mode, nonce=obj.id ) if "[INFO]" not in result.text: if "cannot use the service" in result.text: # IP not in the list obj.log.error("Your IP is not in " "CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS " "on host") obj.log.error(result.text) txt = "Error while submitting robotupload: {0}".format(result.text) raise WorkflowError(txt, eng.uuid, obj.id) else: obj.log.info("Robotupload sent!") obj.log.info(result.text) if mode != "holdingpen": eng.halt("Waiting for robotupload: {0}".format(result.text)) obj.log.info("end of upload")
def test_robotupload_success(self): from inspire.utils.robotupload import make_robotupload_marcxml httpretty.register_uri( httpretty.POST, "http://localhost:4000/batchuploader/robotupload/insert", body="[INFO] bibupload batchupload --insert /dummy/file/path\n", status=200 ) valid_marcxml = "<record></record>" response = make_robotupload_marcxml( "http://localhost:4000", valid_marcxml, ) self.assertEqual(response.status_code, 200) self.assertTrue("[INFO] bibupload batchupload" in response.text)
def test_robotupload_bad_xml(self): from inspire.utils.robotupload import make_robotupload_marcxml httpretty.register_uri( httpretty.POST, "http://localhost:4000/batchuploader/robotupload/insert", body="[ERROR] MARCXML is not valid.\n", status=400 ) invalid_marcxml = "record></record>" response = make_robotupload_marcxml( "http://localhost:4000", invalid_marcxml, ) self.assertEqual(response.status_code, 400) self.assertTrue("not valid" in response.text)
def _send_robotupload_oaiharvest(obj, eng): from invenio.modules.records.api import Record from inspire.utils.robotupload import make_robotupload_marcxml sequence_id = random.randrange(1, 60000) arguments = obj.extra_data.get("repository", {}).get("arguments", {}) default_args = [] default_args.extend(['-I', str(sequence_id)]) if arguments.get('u_name', ""): default_args.extend(['-N', arguments.get('u_name', "")]) if arguments.get('u_priority', 5): default_args.extend(['-P', str(arguments.get('u_priority', 5))]) extract_path = os.path.join( cfg['CFG_TMPSHAREDDIR'], str(eng.uuid) ) if not os.path.exists(extract_path): os.makedirs(extract_path) callback_url = os.path.join(cfg["CFG_SITE_URL"], "callback/workflows/continue") marcxml = Record(obj.data.dumps()).legacy_export_as_marc() result = make_robotupload_marcxml( url=url, marcxml=marcxml, callback_url=callback_url, mode='insert', nonce=obj.id ) if "[INFO]" not in result.text: if "cannot use the service" in result.text: # IP not in the list obj.log.error("Your IP is not in " "CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS " "on host") obj.log.error(result.text) from invenio.modules.workflows.errors import WorkflowError txt = "Error while submitting robotupload: {0}".format(result.text) raise WorkflowError(txt, eng.uuid, obj.id) else: obj.log.info("Robotupload sent!") obj.log.info(result.text) eng.halt("Waiting for robotupload: {0}".format(result.text)) obj.log.info("end of upload")
def test_robotupload_success_append(self): """Test proper handling when good MARCXML is sent.""" from inspire.utils.robotupload import make_robotupload_marcxml httpretty.register_uri( httpretty.POST, "http://localhost:4000/batchuploader/robotupload/append", body="[INFO] bibupload batchupload --append /dummy/file/path\n", status=200 ) valid_marcxml = "<record></record>" response = make_robotupload_marcxml( "http://localhost:4000", valid_marcxml, mode="append", ) self.assertEqual(response.status_code, 200) self.assertTrue("[INFO] bibupload batchupload" in response.text)
def test_robotupload_callback_url(self): from inspire.utils.robotupload import make_robotupload_marcxml body = ( "[INFO] bibupload batchupload --insert /some/path" "--callback-url http://localhost" ) httpretty.register_uri( httpretty.POST, "http://localhost:4000/batchuploader/robotupload/insert", body=body, status=200 ) valid_marcxml = "<record></record>" response = make_robotupload_marcxml( "http://localhost:4000", valid_marcxml, callback_url="http://localhost", ) self.assertEqual(response.status_code, 200) self.assertTrue("--callback-url http://localhost" in response.text)
def _send_robotupload_deposit(obj, eng): from invenio.modules.workflows.errors import WorkflowError from inspire.utils.robotupload import make_robotupload_marcxml callback_url = os.path.join(cfg["CFG_SITE_URL"], "callback/workflows/robotupload") deposition = Deposition(obj) sip = deposition.get_latest_sip(deposition.submitted) if not sip: raise WorkflowError("No sip found", eng.uuid, obj.id) if not deposition.submitted: sip.seal() deposition.update() marcxml = sip.package result = make_robotupload_marcxml( url=url, marcxml=marcxml, callback_url=callback_url, mode='insert', nonce=obj.id ) if "[INFO]" not in result.text: if "cannot use the service" in result.text: # IP not in the list obj.log.error("Your IP is not in " "CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS " "on host") obj.log.error(result.text) txt = "Error while submitting robotupload: {0}".format(result.text) raise WorkflowError(txt, eng.uuid, obj.id) else: obj.log.info("Robotupload sent!") obj.log.info(result.text) eng.halt("Waiting for robotupload: {0}".format(result.text)) obj.log.info("end of upload")
def _send_robotupload(obj, eng): from invenio.modules.deposit.models import Deposition from invenio.modules.workflows.errors import WorkflowError from inspire.utils.robotupload import make_robotupload_marcxml from invenio.base.globals import cfg d = Deposition(obj) sip = d.get_latest_sip(d.submitted) if not sip: raise WorkflowError("No sip found", eng.uuid, obj.id) if not d.submitted: sip.seal() d.update() if url is None: base_url = cfg.get("CFG_ROBOTUPLOAD_SUBMISSION_BASEURL") callback_url = os.path.join(cfg["CFG_SITE_URL"], "callback/workflows/robotupload") obj.log.info("Sending Robotupload to {0} with callback {1}".format( base_url, callback_url)) result = make_robotupload_marcxml(url=base_url, marcxml=sip.package, callback_url=callback_url, nonce=obj.id) if "[INFO]" not in result.text: if "cannot use the service" in result.text: # IP not in the list obj.log.error("Your IP is not in " "CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS " "on host") obj.log.error(result.text) from invenio.modules.workflows.errors import WorkflowError txt = "Error while submitting robotupload: {0}".format(result.text) raise WorkflowError(txt, eng.uuid, obj.id) else: obj.log.info("Robotupload sent!") obj.log.info(result.text) eng.halt("Waiting for robotupload: {0}".format(result.text))
def _update(obj, eng): import dictdiffer from lxml import objectify, etree from invenio.base.globals import cfg from invenio_workflows.utils import convert_marcxml_to_bibfield from invenio_records.api import Record from inspire.utils.robotupload import make_robotupload_marcxml try: recid = obj.extra_data["recid"] except KeyError: obj.log.error("Cannot locate record ID") return callback_url = os.path.join(cfg["CFG_SITE_URL"], "callback/workflows/continue") search_url = "%s?p=recid:%s&of=xm" % (cfg["WORKFLOWS_MATCH_REMOTE_SERVER_URL"], recid) prod_data = objectify.parse(search_url) # remove controlfields root = prod_data.getroot() record = root['record'] while True: try: record.remove(record['controlfield']) except AttributeError: break prod_data = etree.tostring(record) prod_data = convert_marcxml_to_bibfield(prod_data, model=["hep"]) new_data = dict(obj.data.dumps(clean=True)) prod_data = dict(prod_data.dumps(clean=True)) updated_keys = [] diff = dictdiffer.diff(prod_data, new_data) for diff_type, new_key, content in diff: if diff_type == 'add': if new_key: if isinstance(new_key, list): # ['subject_terms', 0] updated_keys.append(new_key[0]) else: # 'subject_terms' updated_keys.append(new_key) else: # content must be list of new adds for key in content: updated_keys.append(key) updates = dictdiffer.patch(diff, new_data) for key in updates.keys(): if key not in updated_keys: del updates[key] if updates: updates['recid'] = recid marcxml = Record(updates).legacy_export_as_marc() result = make_robotupload_marcxml( url=url, marcxml=marcxml, callback_url=callback_url, mode='correct', nonce=obj.id ) if "[INFO]" not in result.text: if "cannot use the service" in result.text: # IP not in the list obj.log.error("Your IP is not in " "CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS " "on host") obj.log.error(result.text) from invenio_workflows.errors import WorkflowError txt = "Error while submitting robotupload: {0}".format(result.text) raise WorkflowError(txt, eng.uuid, obj.id) else: obj.log.info("Robotupload sent!") obj.log.info(result.text) eng.halt("Waiting for robotupload: {0}".format(result.text)) obj.log.info("end of upload") else: obj.log.info("No updates to do.")