Example #1
0
    def _send_robotupload(obj, eng):
        from invenio_workflows.errors import WorkflowError
        from inspire.utils.robotupload import make_robotupload_marcxml

        combined_callback_url = os.path.join(cfg["CFG_SITE_URL"], callback_url)
        model = eng.workflow_definition.model(obj)
        sip = model.get_latest_sip()
        marcxml = sip.package
        result = make_robotupload_marcxml(
            url=url,
            marcxml=marcxml,
            callback_url=combined_callback_url,
            mode=mode,
            nonce=obj.id
        )
        if "[INFO]" not in result.text:
            if "cannot use the service" in result.text:
                # IP not in the list
                obj.log.error("Your IP is not in "
                              "CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS "
                              "on host")
                obj.log.error(result.text)
            txt = "Error while submitting robotupload: {0}".format(result.text)
            raise WorkflowError(txt, eng.uuid, obj.id)
        else:
            obj.log.info("Robotupload sent!")
            obj.log.info(result.text)
            eng.halt("Waiting for robotupload: {0}".format(result.text))
        obj.log.info("end of upload")
Example #2
0
    def _send_robotupload(obj, eng):
        from invenio.base.globals import cfg
        from invenio.modules.workflows.errors import WorkflowError
        from inspire.utils.robotupload import make_robotupload_marcxml

        callback_url = os.path.join(cfg["CFG_SITE_URL"],
                                    "callback/workflows/robotupload")

        marcxml = obj.get_extra_data().get("marcxml")

        if not marcxml:
            obj.log.error("No MARCXML found in extra data.")

        result = make_robotupload_marcxml(
            url=None,
            marcxml=marcxml,
            callback_url=callback_url,
            mode=mode,
            nonce=obj.id
        )
        if "[INFO]" not in result.text:
            if "cannot use the service" in result.text:
                # IP not in the list
                obj.log.error("Your IP is not in "
                              "CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS "
                              "on host")
                obj.log.error(result.text)
            txt = "Error while submitting robotupload: {0}".format(result.text)
            raise WorkflowError(txt, eng.uuid, obj.id)
        else:
            obj.log.info("Robotupload sent!")
            obj.log.info(result.text)
            if mode != "holdingpen":
                eng.halt("Waiting for robotupload: {0}".format(result.text))
        obj.log.info("end of upload")
Example #3
0
 def test_robotupload_success(self):
     from inspire.utils.robotupload import make_robotupload_marcxml
     httpretty.register_uri(
         httpretty.POST,
         "http://localhost:4000/batchuploader/robotupload/insert",
         body="[INFO] bibupload batchupload --insert /dummy/file/path\n",
         status=200
     )
     valid_marcxml = "<record></record>"
     response = make_robotupload_marcxml(
         "http://localhost:4000",
         valid_marcxml,
     )
     self.assertEqual(response.status_code, 200)
     self.assertTrue("[INFO] bibupload batchupload" in response.text)
Example #4
0
 def test_robotupload_bad_xml(self):
     from inspire.utils.robotupload import make_robotupload_marcxml
     httpretty.register_uri(
         httpretty.POST,
         "http://localhost:4000/batchuploader/robotupload/insert",
         body="[ERROR] MARCXML is not valid.\n",
         status=400
     )
     invalid_marcxml = "record></record>"
     response = make_robotupload_marcxml(
         "http://localhost:4000",
         invalid_marcxml,
     )
     self.assertEqual(response.status_code, 400)
     self.assertTrue("not valid" in response.text)
Example #5
0
    def _send_robotupload_oaiharvest(obj, eng):
        from invenio.modules.records.api import Record
        from inspire.utils.robotupload import make_robotupload_marcxml

        sequence_id = random.randrange(1, 60000)

        arguments = obj.extra_data.get("repository", {}).get("arguments", {})

        default_args = []
        default_args.extend(['-I', str(sequence_id)])
        if arguments.get('u_name', ""):
            default_args.extend(['-N', arguments.get('u_name', "")])
        if arguments.get('u_priority', 5):
            default_args.extend(['-P', str(arguments.get('u_priority', 5))])

        extract_path = os.path.join(
            cfg['CFG_TMPSHAREDDIR'],
            str(eng.uuid)
        )
        if not os.path.exists(extract_path):
            os.makedirs(extract_path)

        callback_url = os.path.join(cfg["CFG_SITE_URL"],
                                    "callback/workflows/continue")

        marcxml = Record(obj.data.dumps()).legacy_export_as_marc()
        result = make_robotupload_marcxml(
            url=url,
            marcxml=marcxml,
            callback_url=callback_url,
            mode='insert',
            nonce=obj.id
        )
        if "[INFO]" not in result.text:
            if "cannot use the service" in result.text:
                # IP not in the list
                obj.log.error("Your IP is not in "
                              "CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS "
                              "on host")
                obj.log.error(result.text)
            from invenio.modules.workflows.errors import WorkflowError
            txt = "Error while submitting robotupload: {0}".format(result.text)
            raise WorkflowError(txt, eng.uuid, obj.id)
        else:
            obj.log.info("Robotupload sent!")
            obj.log.info(result.text)
            eng.halt("Waiting for robotupload: {0}".format(result.text))
        obj.log.info("end of upload")
Example #6
0
 def test_robotupload_success_append(self):
     """Test proper handling when good MARCXML is sent."""
     from inspire.utils.robotupload import make_robotupload_marcxml
     httpretty.register_uri(
         httpretty.POST,
         "http://localhost:4000/batchuploader/robotupload/append",
         body="[INFO] bibupload batchupload --append /dummy/file/path\n",
         status=200
     )
     valid_marcxml = "<record></record>"
     response = make_robotupload_marcxml(
         "http://localhost:4000",
         valid_marcxml,
         mode="append",
     )
     self.assertEqual(response.status_code, 200)
     self.assertTrue("[INFO] bibupload batchupload" in response.text)
Example #7
0
 def test_robotupload_callback_url(self):
     from inspire.utils.robotupload import make_robotupload_marcxml
     body = (
         "[INFO] bibupload batchupload --insert /some/path"
         "--callback-url http://localhost"
     )
     httpretty.register_uri(
         httpretty.POST,
         "http://localhost:4000/batchuploader/robotupload/insert",
         body=body,
         status=200
     )
     valid_marcxml = "<record></record>"
     response = make_robotupload_marcxml(
         "http://localhost:4000",
         valid_marcxml,
         callback_url="http://localhost",
     )
     self.assertEqual(response.status_code, 200)
     self.assertTrue("--callback-url http://localhost" in response.text)
    def _send_robotupload_deposit(obj, eng):
        from invenio.modules.workflows.errors import WorkflowError
        from inspire.utils.robotupload import make_robotupload_marcxml

        callback_url = os.path.join(cfg["CFG_SITE_URL"],
                                    "callback/workflows/robotupload")

        deposition = Deposition(obj)

        sip = deposition.get_latest_sip(deposition.submitted)

        if not sip:
            raise WorkflowError("No sip found", eng.uuid, obj.id)
        if not deposition.submitted:
            sip.seal()
            deposition.update()

        marcxml = sip.package

        result = make_robotupload_marcxml(
            url=url,
            marcxml=marcxml,
            callback_url=callback_url,
            mode='insert',
            nonce=obj.id
        )
        if "[INFO]" not in result.text:
            if "cannot use the service" in result.text:
                # IP not in the list
                obj.log.error("Your IP is not in "
                              "CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS "
                              "on host")
                obj.log.error(result.text)
            txt = "Error while submitting robotupload: {0}".format(result.text)
            raise WorkflowError(txt, eng.uuid, obj.id)
        else:
            obj.log.info("Robotupload sent!")
            obj.log.info(result.text)
            eng.halt("Waiting for robotupload: {0}".format(result.text))
        obj.log.info("end of upload")
Example #9
0
    def _send_robotupload(obj, eng):
        from invenio.modules.deposit.models import Deposition
        from invenio.modules.workflows.errors import WorkflowError
        from inspire.utils.robotupload import make_robotupload_marcxml
        from invenio.base.globals import cfg

        d = Deposition(obj)

        sip = d.get_latest_sip(d.submitted)
        if not sip:
            raise WorkflowError("No sip found", eng.uuid, obj.id)
        if not d.submitted:
            sip.seal()
            d.update()

        if url is None:
            base_url = cfg.get("CFG_ROBOTUPLOAD_SUBMISSION_BASEURL")

        callback_url = os.path.join(cfg["CFG_SITE_URL"],
                                    "callback/workflows/robotupload")
        obj.log.info("Sending Robotupload to {0} with callback {1}".format(
            base_url, callback_url))
        result = make_robotupload_marcxml(url=base_url,
                                          marcxml=sip.package,
                                          callback_url=callback_url,
                                          nonce=obj.id)
        if "[INFO]" not in result.text:
            if "cannot use the service" in result.text:
                # IP not in the list
                obj.log.error("Your IP is not in "
                              "CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS "
                              "on host")
                obj.log.error(result.text)
            from invenio.modules.workflows.errors import WorkflowError
            txt = "Error while submitting robotupload: {0}".format(result.text)
            raise WorkflowError(txt, eng.uuid, obj.id)
        else:
            obj.log.info("Robotupload sent!")
            obj.log.info(result.text)
            eng.halt("Waiting for robotupload: {0}".format(result.text))
Example #10
0
    def _update(obj, eng):
        import dictdiffer

        from lxml import objectify, etree

        from invenio.base.globals import cfg
        from invenio_workflows.utils import convert_marcxml_to_bibfield
        from invenio_records.api import Record

        from inspire.utils.robotupload import make_robotupload_marcxml

        try:
            recid = obj.extra_data["recid"]
        except KeyError:
            obj.log.error("Cannot locate record ID")
            return

        callback_url = os.path.join(cfg["CFG_SITE_URL"],
                                    "callback/workflows/continue")

        search_url = "%s?p=recid:%s&of=xm" % (cfg["WORKFLOWS_MATCH_REMOTE_SERVER_URL"], recid)

        prod_data = objectify.parse(search_url)
        # remove controlfields
        root = prod_data.getroot()
        record = root['record']
        while True:
            try:
                record.remove(record['controlfield'])
            except AttributeError:
                break
        prod_data = etree.tostring(record)
        prod_data = convert_marcxml_to_bibfield(prod_data, model=["hep"])
        new_data = dict(obj.data.dumps(clean=True))
        prod_data = dict(prod_data.dumps(clean=True))
        updated_keys = []
        diff = dictdiffer.diff(prod_data, new_data)
        for diff_type, new_key, content in diff:
            if diff_type == 'add':
                if new_key:
                    if isinstance(new_key, list):
                        # ['subject_terms', 0]
                        updated_keys.append(new_key[0])
                    else:
                        # 'subject_terms'
                        updated_keys.append(new_key)
                else:
                    # content must be list of new adds
                    for key in content:
                        updated_keys.append(key)

        updates = dictdiffer.patch(diff, new_data)
        for key in updates.keys():
            if key not in updated_keys:
                del updates[key]
        if updates:
            updates['recid'] = recid
            marcxml = Record(updates).legacy_export_as_marc()
            result = make_robotupload_marcxml(
                url=url,
                marcxml=marcxml,
                callback_url=callback_url,
                mode='correct',
                nonce=obj.id
            )
            if "[INFO]" not in result.text:
                if "cannot use the service" in result.text:
                    # IP not in the list
                    obj.log.error("Your IP is not in "
                                  "CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS "
                                  "on host")
                    obj.log.error(result.text)
                from invenio_workflows.errors import WorkflowError
                txt = "Error while submitting robotupload: {0}".format(result.text)
                raise WorkflowError(txt, eng.uuid, obj.id)
            else:
                obj.log.info("Robotupload sent!")
                obj.log.info(result.text)
                eng.halt("Waiting for robotupload: {0}".format(result.text))
            obj.log.info("end of upload")
        else:
            obj.log.info("No updates to do.")