Exemple #1
0
def make_record(values, is_dump=True):
    """
    Export recjson from drafts
    """
    if is_dump:
        record = Record(json=values, master_format='marc')
    else:
        record = Record(master_format='marc')
        for k, v in six.iteritems(values):
            record[k] = v
    return record
Exemple #2
0
    def test_marc_export(self):
        from invenio.modules.records.api import Record
        from invenio.legacy.bibrecord import create_record, record_xml_output

        rec = Record(json=test_record, master_format='marc')

        # Needed to properly set authors when generating MARC
        first = rec['authors'][0]
        additional = rec['authors'][1:]
        rec['_first_author'] = first
        rec['_additional_authors'] = additional

        output_marc = record_xml_output(
            create_record(rec.legacy_export_as_marc())[0]
        )
        try:
            self.assertEqual(test_marc, output_marc)
        except AssertionError:
            # Print diff in case of errors.
            import difflib
            diff = "".join(difflib.unified_diff(
                test_marc.splitlines(1),
                output_marc.splitlines(1)
            ))
            raise AssertionError(diff)

        form_json = rec.produce('json_for_form')
        for k, v in test_form_json.items():
            self.assertEqual(form_json[k], test_form_json[k])
Exemple #3
0
    def test_json_for_ld(self):
        from invenio.modules.records.api import Record
        r = Record.create({'title': 'Test'}, 'json')

        import copy
        r = Record(json=copy.copy(test_record), master_format='marc')
        r.produce('json_for_ld')
Exemple #4
0
 def patch_get_record(self, get_record_patch):
     from invenio.modules.records.api import Record
     r = Record(json={
         self.app.config['PIDSTORE_DATACITE_RECORD_DOI_FIELD']:
         '10.1234/invenio.1234',
         'recid': 1,
     },
                master_format='json')
     get_record_patch.return_value = r
Exemple #5
0
def create_records_for_workflow(records, **kwargs):
    """Create the record object from the json.

    :param records: List of records to be process.
    :kwargs:
    """
    from invenio.modules.records.api import Record
    for i, obj in enumerate(records):
        records[i] = (obj[0], Record(json=obj[1]))
    def formatter(bwo, **kwargs):
        """Return a formatted version of the data."""
        from invenio.modules.formatter.engine import format_record

        data = bwo.get_data()
        if not data:
            return ''
        formatter = kwargs.get("formatter", None)
        format = kwargs.get("format", None)
        if formatter:
            # A seperate formatter is supplied
            return formatter(data)
        from invenio.modules.records.api import Record
        if isinstance(data, collections.Mapping):
            # Dicts are cool on its own, but maybe its SmartJson (record)
            try:
                data = Record(data.dumps()).legacy_export_as_marc()
            except (TypeError, KeyError):
                # Maybe not, submission?
                return data

        if isinstance(data, string_types):
            # Its a string type, lets try to convert
            if format:
                # We can try formatter!
                # If already XML, format_record does not like it.
                if format != 'xm':
                    try:
                        return format_record(recID=None,
                                             of=format,
                                             xml_record=data)
                    except TypeError:
                        # Wrong kind of type
                        pass
                else:
                    # So, XML then
                    from xml.dom.minidom import parseString

                    try:
                        pretty_data = parseString(data)
                        return pretty_data.toprettyxml()
                    except TypeError:
                        # Probably not proper XML string then
                        return "Data cannot be parsed: %s" % (data, )
                    except Exception:
                        # Some other parsing error
                        pass

            # Just return raw string
            return data
        if isinstance(data, set):
            return list(data)
        # Not any of the above types. How juicy!
        return data
Exemple #7
0
    def formatter(bwo, **kwargs):
        """Nicely format the record."""
        from pprint import pformat
        from invenio.modules.records.api import Record

        data = bwo.get_data()
        if not data:
            return ''

        formatter = kwargs.get("formatter", None)
        of = kwargs.get("of", None)
        if formatter:
            # A separate formatter is supplied
            return formatter(data)

        if isinstance(data, collections.Mapping):
            # Dicts are cool on its own, but maybe its SmartJson (record)
            try:
                data = Record(data.dumps()).legacy_export_as_marc()
            except (TypeError, KeyError):
                pass

        if isinstance(data, string_types):
            # We can try formatter!
            # If already XML, format_record does not like it.
            if of and of != 'xm':
                try:
                    from invenio.modules.formatter import format_record
                    formatted_data = format_record(recID=None,
                                                   of=of,
                                                   xml_record=data)
                except TypeError:
                    # Wrong kind of type
                    pass
            else:
                # So, XML then
                from xml.dom.minidom import parseString

                try:
                    unpretty_data = parseString(data)
                    formatted_data = unpretty_data.toprettyxml()
                except TypeError:
                    # Probably not proper XML string then
                    return "Data cannot be parsed: %s" % (data, )
                except Exception:
                    # Just return raw string
                    pass

        if not formatted_data:
            formatted_data = data

        if isinstance(formatted_data, dict):
            formatted_data = pformat(formatted_data)
        return formatted_data
Exemple #8
0
    def test_json_for_form(self):
        from invenio.modules.records.api import Record
        r = Record.create({'title': 'Test'}, 'json')
        assert r.produce('json_for_form')['title'] == 'Test'
        assert {'245__a': 'Test'} in r.produce('json_for_marc')

        import copy
        r = Record(json=copy.copy(test_record), master_format='marc')

        form_json = r.produce('json_for_form')
        for k, v in test_form_json.items():
            self.assertEqual(form_json[k], test_form_json[k])
Exemple #9
0
 def get_mocked_record():
     from invenio.modules.records.api import Record
     if RecordMock.record is None:
         RecordMock.record = Record(
             json={
                 'doi': '10.1234/invenio.1234',
                 'files_to_upload': [  # replace with cfg['files_var_name']
                     ('path1.xls', 'this/is/a/long/path/to/the/file/location/path1.xls'),
                     ('path2.csv', 'path2.csv'),
                     ('path3.pdf', 'path3.pdf'), ],
                 'recid': 1,
                 # '_files': [  # replace with cfg['files_var_name']
                 #    'path1',
                 #    'path2',
                 #    'path3']
             },
             master_format='marc'
         )
     return RecordMock.record
Exemple #10
0
    def get_description(bwo):
        """Get the description (identifiers and categories) from the object data."""
        from invenio.modules.records.api import Record
        from flask import render_template, current_app

        record = bwo.get_data()
        final_identifiers = {}
        try:
            identifiers = Record(record.dumps()).persistent_identifiers
            for values in identifiers.values():
                final_identifiers.extend([i.get("value") for i in values])
        except Exception:
            current_app.logger.exception("Could not get identifiers")
            if hasattr(record, "get"):
                final_identifiers = [
                    record.get("system_control_number",
                               {}).get("value", 'No ids')
                ]
            else:
                final_identifiers = []

        categories = []
        if hasattr(record, "get"):
            if 'subject' in record:
                lookup = ["subject", "term"]
            elif "subject_term" in record:
                lookup = ["subject_term", "term"]
            else:
                lookup = None
            if lookup:
                primary, secondary = lookup
                category_list = record.get(primary, [])
                if isinstance(category_list, dict):
                    category_list = [category_list]
                categories = [subject[secondary] for subject in category_list]

        return render_template('workflows/styles/harvesting_record.html',
                               categories=categories,
                               identifiers=final_identifiers)
Exemple #11
0
def filter_step(obj, eng):
    """Run an external python script."""
    from invenio.modules.records.api import Record
    from invenio.utils.shell import run_shell_command

    repository = obj.extra_data.get("repository", {})
    arguments = repository.get("arguments", {})
    script_name = arguments.get("f_filter-file")
    if script_name:
        marcxml_value = Record(obj.data.dumps()).legacy_export_as_marc()
        extract_path = os.path.join(
            cfg['CFG_TMPSHAREDDIR'],
            str(eng.uuid)
        )
        if not os.path.exists(extract_path):
            os.makedirs(extract_path)

        # Now we launch BibUpload tasks for the final MARCXML files
        marcxmlfile = extract_path + os.sep + str(obj.id)
        file_fd = open(marcxmlfile, 'w')
        file_fd.write(marcxml_value)
        file_fd.close()

        exitcode, cmd_stdout, cmd_stderr = run_shell_command(
            cmd="%s '%s'",
            args=(str(script_name),
                  str(marcxmlfile)))
        if exitcode != 0 or cmd_stderr != "":
            obj.log.error(
                "Error while running filtering script on %s\nError:%s"
                % (marcxmlfile, cmd_stderr)
            )
        else:
            obj.log.info(cmd_stdout)
    else:
        obj.log.error("No script file found!")
    def get_description(bwo):
        """Get the description column part."""
        record = bwo.get_data()
        from invenio.modules.records.api import Record
        try:
            identifiers = Record(record.dumps()).persistent_identifiers
            final_identifiers = []
            for i in identifiers:
                final_identifiers.append(i['value'])
        except Exception:
            if hasattr(record, "get"):
                final_identifiers = [
                    record.get("system_number_external",
                               {}).get("value", 'No ids')
                ]
            else:
                final_identifiers = [' No ids']

        task_results = bwo.get_tasks_results()
        results = []
        if 'bibclassify' in task_results:
            try:
                result = task_results['bibclassify'][0]['result']
                fast_mode = result.get('fast_mode', False)
                result = result['dict']['complete_output']
                result_string = "<strong></br>Bibclassify result:"\
                                "</br></strong>"\
                                "Number of Core keywords: \t%s</br>"\
                                "PACS: \t%s</br>"\
                                % (len(result['Core keywords']),
                                   len(result['Field codes']))
                if fast_mode:
                    result_string += "(This task run at fast mode"\
                                     " taking into consideration"\
                                     " only the title and the abstract)"
                results.append(result_string)
            except (KeyError, IndexError):
                pass
        categories = []
        if hasattr(record, "get"):
            if 'subject' in record:
                lookup = ["subject", "term"]
            elif "subject_term":
                lookup = ["subject_term", "term"]
            else:
                lookup = None
            if lookup:
                primary, secondary = lookup
                category_list = record.get(primary, [])
                if isinstance(category_list, dict):
                    category_list = [category_list]
                for subject in category_list:
                    category = subject[secondary]
                    if len(subject) == 2:
                        if subject.keys()[1] == secondary:
                            source_list = subject[subject.keys()[0]]
                        else:
                            source_list = subject[subject.keys()[1]]
                    else:
                        try:
                            source_list = subject['source']
                        except KeyError:
                            source_list = ""
                    if source_list.lower() == 'inspire':
                        categories.append(category)

        from flask import render_template
        return render_template('workflows/styles/harvesting_record.html',
                               categories=categories,
                               identifiers=final_identifiers,
                               results=results)
Exemple #13
0
def quick_match_record(obj, eng):
    """Retrieve the record Id from a record.

    Retrieve the record Id from a record by using tag 001 or SYSNO or OAI ID or
    DOI tag. opt_mod is the desired mode.

    001 fields even in the insert mode

    :param obj: Bibworkflow Object to process
    :param eng: BibWorkflowEngine processing the object
    """
    from invenio.legacy.bibupload.engine import (find_record_from_recid,
                                                 find_record_from_sysno,
                                                 find_records_from_extoaiid,
                                                 find_record_from_oaiid,
                                                 find_record_from_doi)
    from invenio.modules.records.api import Record

    identifier_function_to_check = {
        'recid': find_record_from_recid,
        'system_number': find_record_from_sysno,
        'oaiid': find_record_from_oaiid,
        'system_control_number': find_records_from_extoaiid,
        'doi': find_record_from_doi
    }

    record = Record(obj.data.dumps())
    try:
        identifiers = record.persistent_identifiers
    except Exception as e:
        # if anything goes wrong, assume we need to get it manually.
        eng.log.error("Problem with getting identifiers: %s\n%s" %
                      (str(e), traceback.format_exc()))
        identifiers = []

    obj.extra_data["persistent_ids"] = identifiers

    identifier_dict = {}
    for name, value in identifiers:
        value_dict = {}
        for dic in value:
            value_dict.update(dic)
        identifier_dict[name] = value_dict

    if "recid" in identifier_dict:
        # If there is a recid, we are good, right?
        obj.extra_data["persistent_ids"]["recid"] = identifier_dict["recid"]
        return True

    # So if there is no explicit recid key, then maybe we can find the record
    # using any of the other stable identifiers defined.
    found_recid = False
    for name, func in identifier_function_to_check.iteritems():
        if name in identifier_dict:
            if name in identifier_dict[name]:
                # To get {"doi": {"doi": val}}
                found_recid = func(identifier_dict[name][name])
            elif "value" in identifier_dict[name]:
                # To get {"doi": {"value": val}}
                found_recid = func(identifier_dict[name]["value"])

            if found_recid:
                break

    if found_recid:
        obj.extra_data["persistent_ids"]["recid"] = found_recid
        return True
    return False
Exemple #14
0
def upload_step(obj, eng):
    """Perform the upload step.

    :param obj: BibWorkflowObject to process
    :param eng: BibWorkflowEngine processing the object
    """
    from invenio.legacy.oaiharvest.dblayer import create_oaiharvest_log_str
    from invenio.modules.records.api import Record
    from invenio.legacy.bibsched.bibtask import task_low_level_submission

    repository = obj.extra_data.get("repository", {})
    sequence_id = random.randrange(1, 60000)

    arguments = repository.get("arguments", {})

    default_args = []
    default_args.extend(['-I', str(sequence_id)])
    if arguments.get('u_name', ""):
        default_args.extend(['-N', arguments.get('u_name', "")])
    if arguments.get('u_priority', 5):
        default_args.extend(['-P', str(arguments.get('u_priority', 5))])

    extract_path = os.path.join(
        cfg['CFG_TMPSHAREDDIR'],
        str(eng.uuid)
    )
    if not os.path.exists(extract_path):
        os.makedirs(extract_path)

    filepath = extract_path + os.sep + str(obj.id)
    if "f" in repository.get("postprocess", []):
        # We have a filter.
        file_uploads = [
            ("{0}.insert.xml".format(filepath), ["-i"]),
            ("{0}.append.xml".format(filepath), ["-a"]),
            ("{0}.correct.xml".format(filepath), ["-c"]),
            ("{0}.holdingpen.xml".format(filepath), ["-o"]),
        ]
    else:
        # We do not, so we get the data from the record
        marcxml_value = Record(obj.data.dumps()).legacy_export_as_marc()
        file_fd = open(filepath, 'w')
        file_fd.write(marcxml_value)
        file_fd.close()
        file_uploads = [(filepath, ["-r", "-i"])]

    task_id = None
    for location, mode in file_uploads:
        if os.path.exists(location):
            try:
                args = mode + [filepath] + default_args
                task_id = task_low_level_submission("bibupload",
                                                    "oaiharvest",
                                                    *tuple(args))
                repo_id = repository.get("id")
                if repo_id:
                    create_oaiharvest_log_str(
                        task_id,
                        repo_id,
                        obj.get_data()
                    )
            except Exception as msg:
                eng.log.error(
                    "An exception during submitting oaiharvest task occured : %s " % (
                        str(msg)))
    if task_id is None:
        eng.log.error("an error occurred while uploading %s from %s" %
                      (filepath, repository.get("name", "Unknown")))
    else:
        eng.log.info(
            "material harvested from source %s was successfully uploaded" %
            (repository.get("name", "Unknown"),))
    eng.log.info("end of upload")