Beispiel #1
0
def create_oaiharvest_log(task_id, oai_src_id, marcxmlfile):
    """
    Function which creates the harvesting logs
    :param task_id: bibupload task id
    :param oai_src_id:
    :param marcxmlfile:
    """
    file_fd = open(marcxmlfile, "r")
    xml_content = file_fd.read(-1)
    file_fd.close()
    create_oaiharvest_log_str(task_id, oai_src_id, xml_content)
Beispiel #2
0
def create_oaiharvest_log(task_id, oai_src_id, marcxmlfile):
    """
    Function which creates the harvesting logs
    :param task_id: bibupload task id
    :param oai_src_id:
    :param marcxmlfile:
    """
    file_fd = open(marcxmlfile, "r")
    xml_content = file_fd.read(-1)
    file_fd.close()
    create_oaiharvest_log_str(task_id, oai_src_id, xml_content)
Beispiel #3
0
def upload_step(obj, eng):
    """
    Perform the upload step.

    :param obj: Bibworkflow Object to process
    :param eng: BibWorkflowEngine processing the object
    """
    from invenio.legacy.oaiharvest.dblayer import create_oaiharvest_log_str

    uploaded_task_ids = []
    #Work comment:
    #
    #Prepare in case of filtering the files to up,
    #no filtering, no other things to do
    new_dict_representation = records_api.Record(obj.data)
    marcxml_value = new_dict_representation.legacy_export_as_marc()
    task_id = None
    # Get a random sequence ID that will allow for the tasks to be
    # run in order, regardless if parallel task execution is activated
    sequence_id = random.randrange(1, 60000)
    bibtask.task_sleep_now_if_required()
    extract_path = plotextractor_getter.make_single_directory(cfg['CFG_TMPSHAREDDIR'], eng.uuid)
    # Now we launch BibUpload tasks for the final MARCXML files
    filepath = extract_path + os.sep + str(obj.id)
    file_fd = open(filepath, 'w')
    file_fd.write(marcxml_value)
    file_fd.close()
    mode = ["-r", "-i"]

    arguments = obj.extra_data["_repository"]["arguments"]

    if os.path.exists(filepath):
        try:
            args = mode
            if sequence_id:
                args.extend(['-I', str(sequence_id)])
            if arguments.get('u_name', ""):
                args.extend(['-N', arguments.get('u_name', "")])
            if arguments.get('u_priority', 5):
                args.extend(['-P', str(arguments.get('u_priority', 5))])
            args.append(filepath)
            task_id = bibtask.task_low_level_submission("bibupload", "oaiharvest", *tuple(args))
            create_oaiharvest_log_str(task_id, obj.extra_data["_repository"]["id"], marcxml_value)
        except Exception as msg:
            eng.log.error("An exception during submitting oaiharvest task occured : %s " % (str(msg)))
            return None
    else:
        eng.log.error("marcxmlfile %s does not exist" % (filepath,))
    if task_id is None:
        eng.log.error("an error occurred while uploading %s from %s" %
                      (filepath, obj.extra_data["_repository"]["name"]))
    else:
        uploaded_task_ids.append(task_id)
        eng.log.info("material harvested from source %s was successfully uploaded" %
                     (obj.extra_data["_repository"]["name"],))

    if cfg['CFG_INSPIRE_SITE']:
        # Launch BibIndex,Webcoll update task to show uploaded content quickly
        bibindex_params = ['-w', 'collection,reportnumber,global',
                           '-P', '6',
                           '-I', str(sequence_id),
                           '--post-process',
                           'bst_run_bibtask[taskname="webcoll", user="******", P="6", c="HEP"]']
        bibtask.task_low_level_submission("bibindex", "oaiharvest", *tuple(bibindex_params))
    eng.log.info("end of upload")
Beispiel #4
0
def upload_step_marcxml(obj, eng):
    """Perform the upload step with MARCXML in obj.data().

    :param obj: BibWorkflowObject to process
    :param eng: BibWorkflowEngine processing the object
    """
    from invenio.base.globals import cfg
    from invenio.legacy.oaiharvest.dblayer import create_oaiharvest_log_str
    from invenio.legacy.bibsched.bibtask import task_low_level_submission

    repository = obj.extra_data.get("repository", {})
    sequence_id = random.randrange(1, 60000)

    arguments = repository.get("arguments", {})

    default_args = []
    default_args.extend(['-I', str(sequence_id)])
    if arguments.get('u_name', ""):
        default_args.extend(['-N', arguments.get('u_name', "")])
    if arguments.get('u_priority', 5):
        default_args.extend(['-P', str(arguments.get('u_priority', 5))])

    extract_path = os.path.join(
        cfg['CFG_TMPSHAREDDIR'],
        str(eng.uuid)
    )
    if not os.path.exists(extract_path):
        os.makedirs(extract_path)

    filepath = extract_path + os.sep + str(obj.id)
    if "f" in repository.get("postprocess", []):
        # We have a filter.
        file_uploads = [
            ("{0}.insert.xml".format(filepath), ["-i"]),
            ("{0}.append.xml".format(filepath), ["-a"]),
            ("{0}.correct.xml".format(filepath), ["-c"]),
            ("{0}.holdingpen.xml".format(filepath), ["-o"]),
        ]
    else:
        # We do not, so we get the data from the record
        file_fd = open(filepath, 'w')
        file_fd.write(obj.get_data())
        file_fd.close()
        file_uploads = [(filepath, ["-r", "-i"])]

    task_id = None
    for location, mode in file_uploads:
        if os.path.exists(location):
            try:
                args = mode + [filepath] + default_args
                task_id = task_low_level_submission("bibupload",
                                                    "oaiharvest",
                                                    *tuple(args))
                repo_id = repository.get("id")
                if repo_id:
                    create_oaiharvest_log_str(
                        task_id,
                        repo_id,
                        obj.get_data()
                    )
            except Exception as msg:
                eng.log.error(
                    "An exception during submitting OAI harvester task occurred: %s " % (
                        str(msg)))
    if task_id is None:
        eng.log.error("an error occurred while uploading %s from %s" %
                      (filepath, repository.get("name", "Unknown")))
    else:
        eng.log.info(
            "material harvested from source %s was successfully uploaded" %
            (repository.get("name", "Unknown"),))
    eng.log.info("end of upload")
Beispiel #5
0
def upload_step(obj, eng):
    """Perform the upload step.

    :param obj: BibWorkflowObject to process
    :param eng: BibWorkflowEngine processing the object
    """
    from invenio.legacy.oaiharvest.dblayer import create_oaiharvest_log_str
    from invenio.modules.records.api import Record
    from invenio.legacy.bibsched.bibtask import task_low_level_submission

    repository = obj.extra_data.get("repository", {})
    sequence_id = random.randrange(1, 60000)

    arguments = repository.get("arguments", {})

    default_args = []
    default_args.extend(['-I', str(sequence_id)])
    if arguments.get('u_name', ""):
        default_args.extend(['-N', arguments.get('u_name', "")])
    if arguments.get('u_priority', 5):
        default_args.extend(['-P', str(arguments.get('u_priority', 5))])

    extract_path = os.path.join(
        cfg['CFG_TMPSHAREDDIR'],
        str(eng.uuid)
    )
    if not os.path.exists(extract_path):
        os.makedirs(extract_path)

    filepath = extract_path + os.sep + str(obj.id)
    if "f" in repository.get("postprocess", []):
        # We have a filter.
        file_uploads = [
            ("{0}.insert.xml".format(filepath), ["-i"]),
            ("{0}.append.xml".format(filepath), ["-a"]),
            ("{0}.correct.xml".format(filepath), ["-c"]),
            ("{0}.holdingpen.xml".format(filepath), ["-o"]),
        ]
    else:
        # We do not, so we get the data from the record
        marcxml_value = Record(obj.data.dumps()).legacy_export_as_marc()
        file_fd = open(filepath, 'w')
        file_fd.write(marcxml_value)
        file_fd.close()
        file_uploads = [(filepath, ["-r", "-i"])]

    task_id = None
    for location, mode in file_uploads:
        if os.path.exists(location):
            try:
                args = mode + [filepath] + default_args
                task_id = task_low_level_submission("bibupload",
                                                    "oaiharvest",
                                                    *tuple(args))
                repo_id = repository.get("id")
                if repo_id:
                    create_oaiharvest_log_str(
                        task_id,
                        repo_id,
                        obj.get_data()
                    )
            except Exception as msg:
                eng.log.error(
                    "An exception during submitting oaiharvest task occured : %s " % (
                        str(msg)))
    if task_id is None:
        eng.log.error("an error occurred while uploading %s from %s" %
                      (filepath, repository.get("name", "Unknown")))
    else:
        eng.log.info(
            "material harvested from source %s was successfully uploaded" %
            (repository.get("name", "Unknown"),))
    eng.log.info("end of upload")