Example #1
0
def main():
    from_base = 'http://openaire.cern.ch'
    to_base = config.CFG_SITE_URL

    # All records
    recids = search_pattern(p="0->Z", f="8564_u")

    print "<collection>"
    for recid in recids:
        # Get record information
        touched = False
        file_links = get_fieldvalues(recid, "8564_u")

        new_file_links = map(replace_link_func(from_base, to_base), file_links)

        # Print correcting to record
        rec = {}
        record_add_field(rec, "001", controlfield_value=str(recid))
        for old_link, new_link in zip(file_links, new_file_links):
            if old_link != new_link:
                touched = True
            record_add_field(rec, '856', ind1='4', subfields=[('u', new_link)])

        if touched:
            print record_xml_output(rec)
    print "</collection>"
Example #2
0
def main():
    # 
    from_base = 'http://openaire.cern.ch/'
    to_base = 'http://localhost:4000/'
    
    # All records
    recids = search_pattern(p="0->Z", f="8564_u")
    
    print "<collection>"
    for recid in recids:
        # Get record information 
        touched = False
        file_links = get_fieldvalues(recid, "8564_u")

        def replace_link(x): 
            if x.startswith(from_base):
                return x.replace(from_base, to_base)
            else:
                return x
        
        new_file_links = map(replace_link, file_links)
        
        # Print correcting to record
        rec = {}
        record_add_field(rec, "001", controlfield_value=str(recid))
        for old_link,new_link in zip(file_links, new_file_links):
            if old_link != new_link:
                touched = True 
            record_add_field(rec, '856', ind1='4', subfields=[('u', new_link)])
        
        if touched:
            print record_xml_output(rec)
    print "</collection>"
Example #3
0
def bibupload(record=None, collection=None, file_prefix="", mode="-c"):
    """
    General purpose function that will write a MARCXML file and call bibupload
    on it.
    """
    if collection is None and record is None:
        return

    (file_out, filename) = open_temp_file(file_prefix)

    if collection is not None:
        file_out.write("<collection>")
        tot = 0
        for rec in collection:
            file_out.write(record_xml_output(rec))
            tot += 1
            if tot == MAX_RECORDS:
                file_out.write("</collection>")
                file_out.close()
                logger.debug("Submitting bibupload %s -n %s" % (mode, filename))
                task_low_level_submission('bibupload', 'openaire', mode, filename, '-n')

                (file_out, filename) = open_temp_file(file_prefix)
                file_out.write("<collection>")
                tot = 0
        file_out.write("</collection>")
    elif record is not None:
        tot = 1
        file_out.write(record_xml_output(record))

    file_out.close()
    if tot > 0:
        logger.debug("Submitting bibupload %s -n %s" % (mode, filename))
        task_low_level_submission('bibupload', 'openaire', mode, filename, '-n')
Example #4
0
def convert_record(record, response_date, request):
    header = record.getElementsByTagName("header")[0]
    oai_identifier = get_value_in_tag(header, "identifier")
    datestamp = get_value_in_tag(header, "datestamp")
    status = header.getAttribute("status").encode('utf8')
    rec = {}
    record_add_field(rec, tag="035", subfields=[('a', oai_identifier),
                                                ('u', request),
                                                ('9', 'Hindawi'),
                                                ('d', datestamp),
                                                ('h', response_date),
                                                ('m', 'marc21'),
                                                ('t', 'false')])
    new = True
    if find_records_from_extoaiid(oai_identifier, 'Hindawi'):
        new = False
    if status == 'deleted':
        if new:
            ## deleting a record we didn't have? Who cares :-)
            return None, True
        else:
            record_add_field(rec, tag="980", subfields=[('a', 'SCOAP3'), ('b', 'Hindawi'), ('c', 'DELETED')])
            return record_xml_output(rec), False
    for datafield in record.getElementsByTagName("datafield"):
        tag = datafield.getAttribute("tag").encode('utf-8')
        ind1 = datafield.getAttribute("ind1").encode('utf-8') or ' '
        ind2 = datafield.getAttribute("ind2").encode('utf-8') or ' '
        subfields = []
        for subfield in datafield.getElementsByTagName("subfield"):
            code = subfield.getAttribute("code").encode('utf-8')
            value = xml_to_text(subfield)
            subfields.append((code, value))
        record_add_field(rec, tag=tag, ind1=ind1, ind2=ind2, subfields=subfields)
    return record_xml_output(rec), new
def compare_references(test, a, b):
    ## Let's normalize records to remove the Invenio refextract signature
    a = create_record(a)[0]
    b = create_record(b)[0]
    record_delete_field(a, '999', 'C', '6')
    a = record_xml_output(a)
    b = record_xml_output(b)
    test.assertXmlEqual(a, b)
Example #6
0
def _prepare_marcxml(recid_a, rn_a, recid_b, rn_b, what_is_a_for_b, what_is_b_for_a, display_in_a=True, display_in_b=True):
    record_a = {}
    record_b = {}
    record_add_field(record_a, "001", controlfield_value=str(recid_a))
    record_add_field(record_a, CFG_OTHER_RELATIONSHIP_ENTRY, ind1=display_in_a and "0" or "1", subfields=[('i', what_is_b_for_a), ('r', rn_b), ('w', str(recid_b))])
    record_add_field(record_b, "001", controlfield_value=str(recid_b))
    record_add_field(record_b, CFG_OTHER_RELATIONSHIP_ENTRY, ind1=display_in_b and "0" or "1", subfields=[('i', what_is_a_for_b), ('r', rn_a), ('w', str(recid_a))])
    return "<collection>\n%s\n%s</collection>" % (record_xml_output(record_a), record_xml_output(record_b))
Example #7
0
def save_xml_record(recid, uid, xml_record='', to_upload=True, to_merge=False,
                    task_name="bibedit", sequence_id=None):
    """Write XML record to file. Default behaviour is to read the record from
    a BibEdit cache file, filter out the unchanged volatile subfields,
    write it back to an XML file and then pass this file to BibUpload.

    @param xml_record: give XML as string in stead of reading cache file
    @param to_upload: pass the XML file to BibUpload
    @param to_merge: prepare an XML file for BibMerge to use

    """
    if not xml_record:
        # Read record from cache file.
        cache = get_cache_contents(recid, uid)
        if cache:
            record = cache[2]
            used_changes = cache[4]
            xml_record = record_xml_output(record)
            delete_cache(recid, uid)
            delete_disabled_changes(used_changes)
    else:
        record = create_record(xml_record)[0]

    # clean the record from unfilled volatile fields
    record_strip_empty_volatile_subfields(record)
    record_strip_empty_fields(record)

    # order subfields alphabetically before saving the record
    record_order_subfields(record)

    xml_to_write = wash_for_xml(record_xml_output(record))

    # Write XML file.
    if not to_merge:
        fd, file_path = tempfile.mkstemp(dir=CFG_BIBEDIT_CACHEDIR,
                                         prefix="%s_" % CFG_BIBEDIT_FILENAME,
                                         suffix="_%s_%s.xml" % (recid, uid))
        f = os.fdopen(fd, 'w')
        f.write(xml_to_write)
        f.close()
    else:
        file_path = '%s_%s.xml' % (_get_file_path(recid, uid),
                                   CFG_BIBEDIT_TO_MERGE_SUFFIX)
        xml_file = open(file_path, 'w')
        xml_file.write(xml_to_write)
        xml_file.close()

    user_name = get_user_info(uid)[1]
    if to_upload:
        args = ['bibupload', user_name, '-P', '5', '-r',
                file_path, '-u', user_name]
        if task_name == "bibedit":
            args.extend(['--name', 'bibedit'])
        if sequence_id:
            args.extend(["-I", sequence_id])
        args.extend(['--email-logs-on-error'])
        task_low_level_submission(*args)
    return True
    def test_for_special_delete_field(self):
        """ BibUpload Revision Verifier - Rev1-100/300, Modified 100 in Rev1-Mod, Deleted 300 in Rev1-Mod (100/300), Patch for DELETE generated"""
        upload_rec = xml_marc_to_records(self.rev1_mod)
        orig_rec = xml_marc_to_records(self.rev1)

        rev_verifier = RevisionVerifier()
        (opt_mode, final_patch, dummy_affected_tags) = rev_verifier.verify_revision(upload_rec[0], \
                                                         orig_rec[0], \
                                                         'replace')
        self.assertEqual('correct', opt_mode)
        self.failUnless((compare_xmbuffers(self.patch_1, record_xml_output(final_patch))!='') or \
                        (compare_xmbuffers(self.patch_2, record_xml_output(final_patch))!=''))
def save_xml_record(recid, uid, xml_record='', to_upload=True, to_merge=False, spec_name=''):
    """Write XML record to file. Default behaviour is to read the record from
    a BibEdit cache file, filter out the unchanged volatile subfields,
    write it back to an XML file and then pass this file to BibUpload.

    @param xml_record: give XML as string in stead of reading cache file
    @param to_upload: pass the XML file to BibUpload
    @param to_merge: prepare an XML file for BibMerge to use

    """
    if not xml_record:
        # Read record from cache file.
        cache = get_cache_file_contents(recid, uid)
        if cache:
            record = cache[2]
            used_changes = cache[4]
#            record_strip_empty_fields(record) # now performed for every record after removing unfilled volatile fields
            xml_record = record_xml_output(record)
            delete_cache_file(recid, uid)
            delete_disabled_changes(used_changes)
    else:
        record = create_record(xml_record)[0]

    # clean the record from unfilled volatile fields
    record_strip_empty_volatile_subfields(record)
    record_strip_empty_fields(record)

    # order subfields alphabetically before saving the record
#TP: nechceme    record_order_subfields(record)

    xml_to_write = wash_for_xml(record_xml_output(record))

    # Write XML file.
    if not to_merge:
        file_path = '%s.xml' % _get_file_path(recid, uid)
    else:
        file_path = '%s_%s.xml' % (_get_file_path(recid, uid),
                                   CFG_BIBEDIT_TO_MERGE_SUFFIX)
    xml_file = open(file_path, 'w')
    xml_file.write(xml_to_write)
    xml_file.close()

    user_name = get_user_info(uid)[1]
    if to_upload:
        # TP: check whether to add spec name
        if spec_name == '':
            # Pass XML file to BibUpload.
            task_low_level_submission('bibupload', 'bibedit', '-P', '5', '-r',
                                      file_path, '-u', user_name)
        else:
            task_low_level_submission('bibupload', 'bibedit', '-P', '5', '-r',
                                      file_path, '-u', user_name, '-N', spec_name)
    return True
def apply_hepnames_updates(hepname_updates):
    bibupload = ChunkedBibUpload(mode='a', user='******')
    for recid, entry in hepname_updates.iteritems():
        record = {}
        record_add_field(record, '001', controlfield_value=str(recid))
        for key, value in entry.iteritems():
            if key in ('ORCID', 'ORIGINAL_BAI', 'INSPIRE', 'KAKEN'):
                if key == 'ORIGINAL_BAI':
                    key = 'BAI'
                record_add_field(record, '035', subfields=[('a', value), ('9', key)])
        write_message(record_xml_output(record))
        bibupload.add(record_xml_output(record))
Example #11
0
def _get_formated_record(record_id, output_format, update_commands, language, outputTags="",
                         checked=True, displayed_records=None):
    """Returns a record in a given format

    @param record_id: the ID of record to format
    @param output_format: an output format code (or short identifier for the output format)
    @param update_commands: list of commands used to update record contents
    @param language: the language to use to format the record
    @param outputTags: the tags to be shown to the user
    @param checked: is the record checked by the user?
    @param displayed_records: records to be displayed on a given page

    @returns: record formated to be displayed or None
    """
    if update_commands and checked:
        # Modify the bibrecord object with the appropriate actions
        updated_record = _get_updated_record(record_id, update_commands)

    textmarc_options = {"aleph-marc":0, "correct-mode":1, "append-mode":0,
                        "delete-mode":0, "insert-mode":0, "replace-mode":0,
                        "text-marc":1}

    if record_id not in displayed_records:
        return

    old_record = search_engine.get_record(recid=record_id)
    old_record_textmarc = xmlmarc2textmarc.create_marc_record(old_record, sysno="", options=textmarc_options)
    if "hm" == output_format:
        if update_commands and checked:
            updated_record_textmarc = xmlmarc2textmarc.create_marc_record(updated_record, sysno="", options=textmarc_options)
            result = _get_record_diff(old_record_textmarc, updated_record_textmarc, outputTags, record_id)
        else:
            filter_tags = "All tags" not in outputTags and outputTags
            result = ['<pre>']
            for line in old_record_textmarc.splitlines():
                if not filter_tags or line.split()[0].replace('_', '') in outputTags:
                    result.append("%09d " % record_id + line.strip())
            result.append('</pre>')
            result = '\n'.join(result)
    else:
        if update_commands and checked:
            # No coloring of modifications in this case
            xml_record = bibrecord.record_xml_output(updated_record)
        else:
            xml_record = bibrecord.record_xml_output(old_record)
        result = bibformat.format_record(recID=None,
                                        of=output_format,
                                        xml_record=xml_record,
                                        ln=language)
    return result
def _get_formated_record(record_id, output_format, update_commands, language, outputTags=""):
    """Returns a record in a given format

    @param record_id: the ID of record to format
    @param output_format: an output format code (or short identifier for the output format)
    @param update_commands: list of commands used to update record contents
    @param language: the language to use to format the record
    """
    if update_commands:
        updated_record = _get_updated_record(record_id, update_commands)

    old_record = search_engine.get_record(recid=record_id)
    xml_record = bibrecord.record_xml_output(old_record)
    if "hm" == output_format:
        result = "<pre>\n"
        if ("All tags" not in outputTags) and outputTags:
            if update_commands:
                marc_record = _get_record_diff(record_id, old_record, updated_record)
                tag_position = 1
            else:
                marc_record = _create_marc(xml_record)
                tag_position = 0
            for line in marc_record.split('\n')[:-1]:
                if line.split()[tag_position][:3] in outputTags:
                    if update_commands:
                        result += line.strip() + '\n'
                    else:
                        result += "%09d " % record_id + line.strip() + '\n'
                elif '<strong' in line:
                    if line.split()[3][5:8] in outputTags:
                        result += line.strip() + '\n'
        else:
            if update_commands:
                result += _get_record_diff(record_id, old_record, updated_record)
            else:
                marc_record = _create_marc(xml_record)
                for line in marc_record.split('\n')[:-1]:
                    result += "%09d " % record_id + line.strip() + '\n'

        result += "</pre>"
        return result

    if update_commands:
        xml_record = bibrecord.record_xml_output(updated_record)
    result = bibformat.format_record(recID=None,
                                     of=output_format,
                                     xml_record=xml_record,
                                     ln=language)
    return result
    def test_correcting_del_field_add_field_diff_ind(self):
        """ BibUpload Revision Verifier - Rev3-100/970__/888, Deleted 970__ and Added 970CP in Rev2(100/970__), Patch Generated for 970__/970CP"""
        upload_recs = xml_marc_to_records(self.rev2_mod_del_one_add_one)
        orig_recs = xml_marc_to_records(self.data["rev3"][0])

        rev_verifier = RevisionVerifier()
        (opt_mode, patch, dummy_affected_tags) = rev_verifier.verify_revision(upload_recs[0], orig_recs[0], "replace")
        self.assertEqual("correct", opt_mode)
        # NOTE:for multiple fields in patch it is better to compare with different possible patch strings
        # This is due to unsorted key-value pairs of generated patch dictionary
        # self.assertEqual(compare_xmbuffers(record_xml_output(patch), self.patch_del_one_add_one), '')
        self.failUnless(
            (compare_xmbuffers(record_xml_output(patch), self.patch_del_one_add_one) != "")
            or (compare_xmbuffers(record_xml_output(patch), self.patch_del_one_add_one_2) != "")
        )
def _get_formated_record(record_id, output_format, update_commands, language, outputTags=""):
    """Returns a record in a given format

    @param record_id: the ID of record to format
    @param output_format: an output format code (or short identifier for the output format)
    @param update_commands: list of commands used to update record contents
    @param language: the language to use to format the record
    """
    updated_record = _get_updated_record(record_id, update_commands)
    xml_record = bibrecord.record_xml_output(updated_record)

    old_record = search_engine.get_record(recid=record_id)
    if "hm" == output_format:
        result = "<pre>\n"
        if "All tags" not in outputTags or not outputTags:
            diff_result = _get_record_diff(record_id, old_record, updated_record)
            for line in diff_result.split('\n')[:-1]:
                for tag in outputTags:
                    if tag in line.split()[1]:
                        result += line.strip() + '\n'
                    elif '<strong' in line:
                        if tag in line.split()[3]:
                            result += line.strip() + '\n'
        else:
            result += _get_record_diff(record_id, old_record, updated_record)

        result += "</pre>"
        return result

    result = bibformat.format_record(recID=None,
                                     of=output_format,
                                     xml_record=xml_record,
                                     ln=language)
    return result
Example #15
0
def replace_references(recid):
    """Replace references for a record

    The record itself is not updated, the marc xml of the document with updated
    references is returned

    Parameters:
    * recid: the id of the record
    """
    # Parse references
    references_xml = extract_references_from_record_xml(recid)
    references = create_record(references_xml.encode("utf-8"))
    # Record marc xml
    record = get_record(recid)

    if references[0]:
        fields_to_add = record_get_field_instances(references[0], tag="999", ind1="%", ind2="%")
        # Replace 999 fields
        record_delete_fields(record, "999")
        record_add_fields(record, "999", fields_to_add)
        # Update record references
        out_xml = record_xml_output(record)
    else:
        out_xml = None

    return out_xml
Example #16
0
def _get_formated_record(record_id, output_format, update_commands, language, outputTags=""):
    """Returns a record in a given format

    @param record_id: the ID of record to format
    @param output_format: an output format code (or short identifier for the output format)
    @param update_commands: list of commands used to update record contents
    @param language: the language to use to format the record
    """
    updated_record = _get_updated_record(record_id, update_commands)

    xml_record = bibrecord.record_xml_output(updated_record)

    if "hm" == output_format:
        result = "<pre>\n"
        marc_record = _create_marc(xml_record)
        if "All tags" not in outputTags or not outputTags:
            for line in marc_record.split('\n')[:-1]:
                for tag in outputTags:
                    if tag in line.split()[0]:
                        result += "%09d " % record_id + line.strip() + '\n'
        else:
            for line in marc_record.split('\n')[:-1]:
                result += "%09d " % record_id + line.strip() + '\n'

        result += "</pre>"
        return result

    result = bibformat.format_record(recID=None,
                                     of=output_format,
                                     xml_record=xml_record,
                                     ln=language)
    return result
def main():
    usage = """ Usage: $ %s [tags_csv] [marcxml_in] [marcxml_out]
  tags_csv      Tags to preserve as CSVs
  marcxml_in    MARCXML file to read from
  marcxml_out   MARCXML file to write""" % (PROGRAM_NAME,)
    if len(argv) == 4:
        tags = argv[1].split(',')
        fin = argv[2]
        fout = argv[3]
    else:
        print(usage)
        return

    with open(fin) as handle:
        records = create_records(handle.read())


    xmlout = ('<?xml version="1.0"?>\n' +
              '<collection xmlns="http://www.loc.gov/MARC21/slim">\n')

    for record, err, reason in records:
        if err == '0':
            print('Error: Could not create record\n' + reason)
        else:
            xmlout += record_xml_output(record, tags=tags) + '\n'

    with open(fout, 'w') as handle:
        handle.write(xmlout + '</collection>\n')
Example #18
0
 def get_record(self, xml_file):
     """ Reads a xml file in JATS format and returns
         a xml string in marc format """
     self.document = parse(xml_file)
     rec = {}
     title = self._get_title()
     if title:
         record_add_field(rec, '245', subfields=[('a', title)])
     journal, volume, issue, year, start_date, doi,\
         article_id = self._get_publition_information()
     if start_date:
         record_add_field(rec, '260', subfields=[('c', start_date)])
     if doi:
         record_add_field(rec, '024', ind1='7', subfields=[('a', doi),
                                                           ('2', 'DOI')])
     authors = self._get_authors()
     first_author = True
     for author in authors:
         subfields = [('a', author[0]), ('v', author[1])]
         if first_author:
             record_add_field(rec, '100', subfields=subfields)
             first_author = False
         else:
             record_add_field(rec, '700', subfields=subfields)
     abstract = self._get_abstract()
     if abstract:
         record_add_field(rec, '520', subfields=[('a', abstract),
                                                 ('9', 'APS')])
     copyrightt = self._get_copyright()
     if copyrightt:
         year = ''
         if copyrightt.startswith('©'):
             year = copyrightt[2:].strip()
             year = year.split()[0]
         if year.isdigit():
             copyrightt = copyrightt[2:].strip()
             copyrightt = " ".join(copyrightt.split()[1:])
             record_add_field(rec, '542', subfields=[('d', copyrightt),
                                                     ('g', year),
                                                     ('3', 'Article')])
         else:
             year = start_date[:4]
             record_add_field(rec, '542', subfields=[('f', copyrightt),
                                                     ('g', year),
                                                     ('3', 'Article')])
     record_add_field(rec, '773', subfields=[('p', journal),
                                             ('v', volume),
                                             ('n', issue),
                                             ('y', year),
                                             ('c', article_id)])
     record_add_field(rec, '980', subfields=[('a', 'HEP')])
     record_add_field(rec, '980', subfields=[('a', 'Citeable')])
     record_add_field(rec, '980', subfields=[('a', 'Published')])
     self._add_references(rec)
     try:
         return record_xml_output(rec)
     except UnicodeDecodeError:
         sys.stderr.write("""Found a bad char in the file
                             for the article """ + doi)
         return ""
Example #19
0
def upload_amendments(records, holdingpen):
    """ Upload a modified record """

    if task_get_option("no_upload", False) or len(records) == 0:
        return

    xml = '<collection xmlns="http://www.loc.gov/MARC21/slim">'
    for record in records:
        xml += record_xml_output(record)
    xml += "</collection>"

    tmp_file_fd, tmp_file = mkstemp(
        suffix='.xml',
        prefix="bibcheckfile_%s" % time.strftime("%Y-%m-%d_%H:%M:%S"),
        dir=CFG_TMPSHAREDDIR
    )
    os.write(tmp_file_fd, xml)
    os.close(tmp_file_fd)
    os.chmod(tmp_file, 0644)
    if holdingpen:
        flag = "-o"
    else:
        flag = "-r"
    task = task_low_level_submission('bibupload', 'bibcheck', flag, tmp_file)
    write_message("Submitted bibupload task %s" % task)
def _get_formated_record(record_id, output_format, update_commands, language):
    """Returns a record in a given format

    @param record_id: the ID of record to format
    @param output_format: an output format code (or short identifier for the output format)
    @param update_commands: list of commands used to update record contents
    @param language: the language to use to format the record
    """
    updated_record = _get_updated_record(record_id, update_commands)

    xml_record = bibrecord.record_xml_output(updated_record)


    # FIXME: Remove this as soon as the formatting for MARC is
    # implemented in bibformat
    if "hm" == output_format:
        result = _create_marc(xml_record)
        return result


    result = bibformat.format_record(recID=None,
                                     of=output_format,
                                     xml_record=xml_record,
                                     ln=language)
    return result
def get_keywords_body(keywords, req, recid, argd):
    """Returns the body associated with the keywords."""
    body = []
    rec = get_record(recid)

    extend_argd(argd)

    if keywords:
        weights_available = 0 not in zip(*keywords)[1]
    else:
        req.write('There are no keywords associated with this document.<br>' \
            '<form action="" method="get">' \
            '  <input type="hidden" name="generate" value="yes">' \
            '  <input type="submit" value="Generate keywords">' \
            '</form>')
        return

    if argd['type'] == 'tagcloud' and not weights_available:
        # No weight is specified for at least one of the keywords.
        # Display the keywords as a list.
        argd['type'] = 'list'

    if argd['type'] == 'tagcloud':
        body.append('<div style="text-align: center; color: red; '
            'font-size: 80%; margin-top: 15px">Single keywords in grey, '
            'composite keywords in blue.</div>')

    if argd['type'] == 'list':
        # Display keywords as a list.
        body.append(_get_keywords_list(keywords, argd))
    elif argd['type'] == 'tagcloud':
        if argd['sort'] == 'related' and not keywords:
            print 'No similar document was found.'

        # Separate single and composite keywords.
        single_keywords, composite_keywords = [], []
        for keyword in keywords:
            if ': ' in keyword[0]:
                composite_keywords.append(keyword)
            else:
                single_keywords.append(keyword)

        # Display keywords as a tag cloud.
        single_levels = _get_font_levels(single_keywords)
        composite_levels = _get_font_levels(composite_keywords)

        body.append(_get_html_tag_cloud(single_levels +
            composite_levels, argd))
    elif argd['type'] == 'xml':
        body.append('<pre><code>%s</code></pre>' %
            escape_html(record_xml_output(rec, ['653'])))
    else:
        body = 'Unknown type: ' + argd['type']

    out = ''
    for element in body:
        out += '<br>' + element.encode('utf-8')
    req.write(out)
    return
    def test_correcting_added_field_with_diff_ind(self):
        """ BibUpload Revision Verifier - Rev3-100/970__/888, Added 970CP in Rev2(100/970__), Patch Generated for 970CP"""
        upload_recs = xml_marc_to_records(self.rev2_mod_field_diff_ind)
        orig_recs = xml_marc_to_records(self.data["rev3"][0])

        rev_verifier = RevisionVerifier()
        (opt_mode, patch, dummy_affected_tags) = rev_verifier.verify_revision(upload_recs[0], orig_recs[0], "replace")
        self.assertEqual("correct", opt_mode)
        self.assertEqual(compare_xmbuffers(record_xml_output(patch), self.patch_diff_ind), "")
    def test_add_new_field(self):
        """ BibUpload Revision Verifier - Rev3-100/970/888, Added 300 to Rev2(100/970), Patch Generated for 300"""
        upload_recs = xml_marc_to_records(self.rev2_add_field)
        orig_recs = xml_marc_to_records(self.data["rev3"][0])

        rev_verifier = RevisionVerifier()
        (opt_mode, patch, dummy_affected_tags) = rev_verifier.verify_revision(upload_recs[0], orig_recs[0], "replace")
        self.assertEqual("correct", opt_mode)
        self.assertEqual(compare_xmbuffers(record_xml_output(patch), self.patch), "")
Example #24
0
def _prepare_marcxml(recid_a, rn_a, recids_and_rns_b, what_is_a_for_b, what_is_b_for_a, display_in_a=True, display_in_b=True, marc_for_a=None, marc_for_b=None, upload_mode='append', consider_empty_p=False):
    output = '<collection>'
    record_a = {}
    record_b = {}
    if what_is_b_for_a is not None:
        marc_tag_for_a, marc_ind1_for_a, marc_ind2_for_a = \
          _prepare_marc(marc_for_a, CFG_OTHER_RELATIONSHIP_ENTRY, display_in_a and "0" or "1")
        record_add_field(record_a, "001", controlfield_value=str(recid_a))
        if upload_mode == 'correct' and not recids_and_rns_b and consider_empty_p:
            # Add empty field in order to account for cases where all
            # linkings are removed by the submitter
            record_add_field(record_a, marc_tag_for_a, ind1=marc_ind1_for_a, ind2=marc_ind2_for_a)
        for recid_b, rn_b in recids_and_rns_b:
            record_add_field(record_a, marc_tag_for_a, ind1=marc_ind1_for_a, ind2=marc_ind2_for_a,
                             subfields=[('i', what_is_b_for_a), ('r', rn_b), ('w', str(recid_b))])
        output += record_xml_output(record_a)

    if what_is_a_for_b is not None:
        marc_tag_for_b, marc_ind1_for_b, marc_ind2_for_b = \
          _prepare_marc(marc_for_b, CFG_OTHER_RELATIONSHIP_ENTRY, display_in_b and "0" or "1")
        for recid_b, rn_b in recids_and_rns_b:
            record_b = {}
            record_add_field(record_b, "001", controlfield_value=str(recid_b))
            if upload_mode == 'correct':
                original_linking_fields = _get_record_linking_fields(recid_b, recid_a, marc_tag_for_b, marc_ind1_for_b, marc_ind2_for_b)
                record_add_fields(record_b, marc_tag_for_b, original_linking_fields)
            record_add_field(record_b, marc_tag_for_b, ind1=marc_ind1_for_b, ind2=marc_ind2_for_b,
                             subfields=[('i', what_is_a_for_b), ('r', rn_a), ('w', str(recid_a))])
            output += record_xml_output(record_b)
        # Remove linking in remote records where adequate
        if consider_empty_p:
            unlinked_recids = get_unlinked_records(recid_a, marc_for_b, display_in_b, upload_mode, recids_and_rns_b)
            for recid_b in unlinked_recids:
                record_b = {}
                record_add_field(record_b, "001", controlfield_value=str(recid_b))
                original_linking_fields = _get_record_linking_fields(recid_b, recid_a, marc_tag_for_b, marc_ind1_for_b, marc_ind2_for_b)
                if not original_linking_fields:
                    # Add empty field in order to account for cases where all
                    # linkings are removed by the submitter
                    record_add_field(record_b, marc_tag_for_b, ind1=marc_ind1_for_b, ind2=marc_ind2_for_b)
                record_add_fields(record_b, marc_tag_for_b, original_linking_fields)
                output += record_xml_output(record_b)
    output += '</collection>'
    return output
def generate_marc_to_append(local, remote):
    """ Generates MarcXML to append an 035 remote ID to a record """
    newrec = {}
    record_add_field(newrec, '001', controlfield_value=str(local))
    field_pos = record_add_field(newrec, '035')
    record_add_subfield_into(newrec, '035', '9', REMOTE_INSTANCE,
                             field_position_global=field_pos)
    record_add_subfield_into(newrec, '035', 'a', str(remote),
                             field_position_global=field_pos)
    return record_xml_output(newrec)
    def test_interchanged_fields(self):
        """ BibUpload Revision Verifier - Rev1--100-1/100-2/100-3/970/888, Rev1-Up--100-2/100-3/100-1/970/888, Patch Generated for 100"""

        upload_recs = xml_marc_to_records(self.rev1_mod)
        orig_recs = xml_marc_to_records(self.rev1)

        rev_verifier = RevisionVerifier()
        (opt_mode, patch, dummy_affected_tags) = rev_verifier.verify_revision(upload_recs[0], orig_recs[0], "replace")
        self.assertEqual("correct", opt_mode)
        self.assertEqual(compare_xmbuffers(record_xml_output(patch), self.patch), "")
Example #27
0
def _get_formated_record(record_id, output_format, update_commands, language, outputTags="", run_diff=True):
    """Returns a record in a given format

    @param record_id: the ID of record to format
    @param output_format: an output format code (or short identifier for the output format)
    @param update_commands: list of commands used to update record contents
    @param language: the language to use to format the record
    @param run_diff: determines if we want to run _get_recodr_diff function, which sometimes takes too much time
    """
    if update_commands:
        # Modify te bibrecord object with the appropriate actions
        updated_record = _get_updated_record(record_id, update_commands)

    textmarc_options = {"aleph-marc":0, "correct-mode":1, "append-mode":0,
                        "delete-mode":0, "insert-mode":0, "replace-mode":0,
                        "text-marc":1}

    old_record = search_engine.get_record(recid=record_id)
    old_record_textmarc = xmlmarc2textmarc.create_marc_record(old_record, sysno="", options=textmarc_options)
    if "hm" == output_format:
        if update_commands and run_diff:
            updated_record_textmarc = xmlmarc2textmarc.create_marc_record(updated_record, sysno="", options=textmarc_options)
            result = _get_record_diff(old_record_textmarc, updated_record_textmarc, outputTags, record_id)
        else:
            filter_tags = "All tags" not in outputTags and outputTags
            result = ['<pre>']
            for line in old_record_textmarc.splitlines()[:-1]:
                if not filter_tags or line.split()[0].replace('_', '') in outputTags:
                    result.append("%09d " % record_id + line.strip())
            result.append('</pre>')
            result = '\n'.join(result)
    else:
        if update_commands:
            # No coloring of modifications in this case
            xml_record = bibrecord.record_xml_output(updated_record)
        else:
            xml_record = bibrecord.record_xml_output(old_record)
        result = bibformat.format_record(recID=None,
                                        of=output_format,
                                        xml_record=xml_record,
                                        ln=language)
    return result
Example #28
0
def create_xml(recs_to_change, subfields):
    """
    Create xmls for upload.

    @param recs_to_change: affected tagiic in recid
    @type  recs_to_change: dict
    @param subfields: VOLATILE content in tagiic
    @type  subfields: dict
    @return: (string, string) xml's for correct and delete
    """

    xml_correct = ''
    xml_delete = ''
    for recid in recs_to_change.keys():
        tags_correct = []
        tags_delete = []
        tags4update = []
        record_old = get_record(recid)
        record = deepcopy(record_old)
        for tagiic in recs_to_change[recid]:
            tag = tagiic[:3]
            for value in subfields[tagiic]:
                if record.has_key(tag):
                    for field_position, field in enumerate(record[tag]):
                        for subfield_position, subfield in enumerate(field[0]):
                            if subfield[1] == value and subfield[0] == tagiic[5]:
                                record_delete_subfield_from(
                                    record, tag, subfield_position,
                                    field_position_local=field_position)
                                tags4update.append(tag)
        for tag in set(tags4update):
            if record.has_key(tag):
                tags_correct.append(tag)
            else:
                tags_delete.append(tag)
        if tags_correct:
            xml_correct += record_xml_output(record,
                                             ['001', '005'] + tags_correct) + '\n'
        if tags_delete:
            xml_delete += record_xml_output(record_old,
                                            ['001', '005'] + tags_delete) + '\n'
    return xml_correct, xml_delete
    def test_add_identical_field(self):
        """ BibUpload Revision Verifier - Rev3-100/970/888, Added 100 to Rev2(100/970), Patch Generated for 100"""
        upload_identical_rec = xml_marc_to_records(self.rev2_add_sim_field)
        orig_recs = xml_marc_to_records(self.data['rev3'][0])

        rev_verifier = RevisionVerifier()
        (opt_mode, patch, dummy_affected_tags) = rev_verifier.verify_revision(upload_identical_rec[0], \
                                           orig_recs[0], \
                                           'replace')
        self.assertEqual('correct', opt_mode)
        self.assertEqual(compare_xmbuffers(record_xml_output(patch), self.patch_identical_field), '')
Example #30
0
def bibupload_record(record=None, collection=None,
                     file_prefix="bibuploadutils", mode="-c",
                     alias='bibuploadutils', opts=[]):
    """
    General purpose function that will write a MARCXML file and call bibupload
    on it.
    """
    if collection is None and record is None:
        return

    (file_out, filename) = open_temp_file(file_prefix)

    if collection is not None:
        file_out.write("<collection>")
        tot = 0
        for rec in collection:
            file_out.write(record_xml_output(rec))
            tot += 1
            if tot == CFG_MAX_RECORDS:
                file_out.write("</collection>")
                close_temp_file(file_out, filename)
                task_low_level_submission(
                    'bibupload', alias, mode, filename, *opts
                )

                (file_out, filename) = open_temp_file(file_prefix)
                file_out.write("<collection>")
                tot = 0
        file_out.write("</collection>")
    elif record is not None:
        tot = 1
        file_out.write(record_xml_output(record))

    close_temp_file(file_out, filename)
    if tot > 0:
        return task_low_level_submission(
            'bibupload', alias, mode, filename, *opts
        )
    return None
Example #31
0
def write_records_to_file(output_dir, name, records, dry_run):
    """
    Writes a new MARCXML file to specified path from a list of records.
    """
    if len(records) > 0:
        out = []
        out.append("<collection>")
        for record in records.itervalues():
            if record != {}:
                out.extend(record_xml_output(record).split('\n'))
        out.append("</collection>")
        if dry_run:
            _print_out("DRY: Ready to write " + str(len(records)) + " entries to file.")
        else:
            _print_out("-> Writing " + str(len(records)) + " entries to file...")
            write_list_to_file(output_dir, name, out)
Example #32
0
def write_record_to_file(filename, record_list):
    """
    Writes a new MARCXML file to specified path from a list of records.
    """
    if len(record_list) > 0:
        out = []
        out.append("<collection>")
        for record in record_list:
            if record != {}:
                record = record_drop_duplicate_fields(record)
                out.append(record_xml_output(record))
        out.append("</collection>")
        if len(out) > 2:
            file_fd = open(filename, 'w')
            file_fd.write("\n".join(out))
            file_fd.close()
Example #33
0
def write_record_to_file(filename, record_list):
    """Writes a new MARCXML file to specified path from BibRecord list."""
    from invenio.bibrecord import record_xml_output

    if len(record_list) > 0:
        out = []
        out.append("<collection>")
        for record in record_list:
            if record != {}:
                out.append(record_xml_output(record))
        out.append("</collection>")
        if len(out) > 2:
            file_fd = open(filename, 'w')
            file_fd.write("\n".join(out))
            file_fd.close()
            return True
def generate_marc_to_append(local, remote):
    """ Generates MarcXML to append an 035 remote ID to a record """
    newrec = {}
    record_add_field(newrec, '001', controlfield_value=str(local))
    field_pos = record_add_field(newrec, '035')
    record_add_subfield_into(newrec,
                             '035',
                             '9',
                             REMOTE_INSTANCE,
                             field_position_global=field_pos)
    record_add_subfield_into(newrec,
                             '035',
                             'a',
                             str(remote),
                             field_position_global=field_pos)
    return record_xml_output(newrec)
Example #35
0
def bst_hepdata():
    uploader = ChunkedHepDataUpload()
    dumper = HepDataDumper()
    for record in dumper:
        marcxml_record = hepdata2marcxml(record)
        uploader.add(marcxml_record)
    inspire_ids = dumper.inspire_ids
    current_inspire_ids = intbitset(perform_request_search(p='035__9:HEPDATA'))
    records_to_amend = inspire_ids - current_inspire_ids
    id_appender = ChunkedBibUpload(mode='a', user='******')
    for recid in records_to_amend:
        rec = {}
        record_add_field(rec, tag="001", controlfield_value=str(recid))
        record_add_field(rec,
                         tag="035",
                         subfields=[('a', 'ins%s' % recid), ('9', 'HEPDATA')])
        id_appender.add(record_xml_output(rec))
Example #36
0
def modify_record_timestamp(revision_xml, last_revision_ts):
    """ Modify tag 005 to add the revision passed as parameter.
    @param revision_xml: marcxml representation of the record to modify
    @type revision_xml: string
    @param last_revision_ts: timestamp to add to 005 tag
    @type last_revision_ts: string

    @return: marcxml with 005 tag modified
    """
    recstruct = create_record(revision_xml)[0]
    if "005" in recstruct:
        record_modify_controlfield(recstruct,
                                   "005",
                                   last_revision_ts,
                                   field_position_local=0)
    else:
        record_add_field(recstruct, '005', controlfield_value=last_revision_ts)
    return record_xml_output(recstruct)
Example #37
0
    def approve_record(self, recid):
        """ Approve a record to make it publicly available """
        # Make MARCXML to approve record
        rec = {}
        record_add_field(rec, '001', controlfield_value=str(recid))
        record_add_field(rec, '980', subfields=[('a', 'OPENAIRE')])
        output = "<collection>%s</collection>" % record_xml_output(rec)

        # Upload MARCXML
        run_sql("TRUNCATE schTASK")  # Ensures we run bibupload
        (hdl, marcxml_path) = mkstemp(suffix=".xml", text=True)
        open(marcxml_path, 'w').write(output)
        task_low_level_submission('bibupload', 'openaire', '-c', marcxml_path,
                                  '-P5')
        task_low_level_submission('bibindex', 'openaire')
        task_low_level_submission('webcoll', 'openaire')
        os.system("%s/bin/bibupload 1 > /dev/null" % CFG_PREFIX)
        os.system("%s/bin/bibindex 2 > /dev/null" % CFG_PREFIX)
        os.system("%s/bin/webcoll 3 > /dev/null" % CFG_PREFIX)
Example #38
0
def replace_references(recid, uid=None, txt=None, url=None):
    """Replace references for a record

    The record itself is not updated, the marc xml of the document with updated
    references is returned

    Parameters:
    * recid: the id of the record
    * txt: references in text mode
    * inspire: format of ther references
    """
    # Parse references
    if txt is not None:
        references_xml = extract_references_from_string_xml(
            txt, is_only_references=True)
    elif url is not None:
        references_xml = extract_references_from_url_xml(url)
    else:
        references_xml = extract_references_from_record_xml(recid)
    references = create_record(references_xml.encode('utf-8'))

    dummy1, dummy2, record, dummy3, dummy4, dummy5, dummy6 = get_cache_file_contents(
        recid, uid)
    out_xml = None

    references_to_add = record_get_field_instances(references[0],
                                                   tag='999',
                                                   ind1='C',
                                                   ind2='5')
    refextract_status = record_get_field_instances(references[0],
                                                   tag='999',
                                                   ind1='C',
                                                   ind2='6')

    if references_to_add:
        # Replace 999 fields
        record_delete_fields(record, '999')
        record_add_fields(record, '999', references_to_add)
        record_add_fields(record, '999', refextract_status)
        # Update record references
        out_xml = record_xml_output(record)

    return out_xml
Example #39
0
def main():
    """
    TODO: Fix file download of funny URLS ?conf...
    No keywords - check
    presentation pubype not shown for some reason.
    """
    f = codecs.open(os.path.expanduser("~/Desktop/emi.csv"), "r", "utf-8")
    fout = open(os.path.expanduser("~/Desktop/emi.xml"), "w")

    fout.write("<collection>")
    for (i, row) in enumerate(unicode_csv_reader(f)):
        if i == 0:
            continue

        print i
        try:
            fout.write(record_xml_output(handle_row(i, row)))
        except Exception, e:
            print e
            print "Couldn't handle row:", i
Example #40
0
def bst_hal():
    doi_map, arxiv_map = get_hal_maps()
    matchable_records = get_record_ids_to_export()
    write_message("Total matchable records: %s" % len(matchable_records))
    hal_records = get_hal_records()
    write_message("Already matched records: %s" % len(hal_records))
    bibupload = ChunkedBibUpload(mode='a', notimechange=True, user='******')
    tot_records = matchable_records - hal_records
    write_message("Records to be checked: %s" % len(tot_records))
    for i, recid in enumerate(tot_records):
        if i % 1000 == 0:
            write_message("%s records done out of %s" % (i, len(tot_records)))
            task_sleep_now_if_required()
        dois = get_fieldvalues(recid, tag='0247__a', sort=False)
        arxivs = get_fieldvalues(recid, tag='037__a', sort=False)
        matched_hal = [doi_map[doi] for doi in dois if doi in doi_map]
        matched_hal += [
            arxiv_map[arxiv] for arxiv in arxivs if arxiv in arxiv_map
        ]

        # Let's assert that we matched only one single hal document at most
        matched_hal_id = set(id(entry) for entry in matched_hal)
        if len(matched_hal) > 1:
            write_message(
                "WARNING: record %s matches more than 1 HAL record: %s" %
                (recid, matched_hal),
                stream=sys.stderr)
            continue
        elif not matched_hal:
            continue
        hal_id = matched_hal[0]['halId_s']

        rec = {}
        record_add_field(rec, '001', controlfield_value=str(recid))
        record_add_field(rec, '035', subfields=[('a', hal_id), ('9', 'HAL')])

        write_message("Record %s matched HAL record %s" % (recid, hal_id))

        bibupload.add(record_xml_output(rec))

    return True
Example #41
0
def bst_labssync():
    """
    Synchronizes from Labs via redis.

    """
    r = redis.StrictRedis.from_url(CFG_REDIS_HOST_LABS)
    user_agent = make_user_agent_string('labssync')
    s = requests.Session()
    s.headers['User-Agent'] = user_agent
    s.headers['Accept'] = 'application/marcxml+xml'

    tot = r.scard(CFG_REDIS_KEY)
    if tot == 0:
        write_message("Nothing to do")
        return
    else:
        write_message("At least %s records to synchronize from labs" % tot)

    errors = []
    final_total = 0
    uploader = ChunkedBibUpload(mode='r', user='******')
    while True:
        elem = r.spop(CFG_REDIS_KEY)
        if not elem:
            break
        final_total += 1
        try:
            record = s.get("https://%s/api/%s" % (CFG_LABS_HOSTNAME, elem)).text

            # Let's strip collection/XML header
            record = record_xml_output(create_record(record)[0])
            uploader.add(record)
            task_sleep_now_if_required()
        except Exception as err:
            register_exception()
            write_message("ERROR: when retrieving %s: %s" % (elem, err), stream=sys.stderr)
            errors.append(elem)

    write_message("Finally synced %s records from labs" % final_total)
    if errors:
        write_message("All those %s records had errors and might need to be resynced: %s" % (len(errors), ', '.join(errors)))
def _get_formated_record(record_id,
                         output_format,
                         update_commands,
                         language,
                         outputTags=""):
    """Returns a record in a given format

    @param record_id: the ID of record to format
    @param output_format: an output format code (or short identifier for the output format)
    @param update_commands: list of commands used to update record contents
    @param language: the language to use to format the record
    """
    updated_record = _get_updated_record(record_id, update_commands)
    xml_record = bibrecord.record_xml_output(updated_record)

    old_record = search_engine.get_record(recid=record_id)
    if "hm" == output_format:
        result = "<pre>\n"
        if "All tags" not in outputTags or not outputTags:
            diff_result = _get_record_diff(record_id, old_record,
                                           updated_record)
            for line in diff_result.split('\n')[:-1]:
                for tag in outputTags:
                    if tag in line.split()[1]:
                        result += line.strip() + '\n'
                    elif '<strong' in line:
                        if tag in line.split()[3]:
                            result += line.strip() + '\n'
        else:
            result += _get_record_diff(record_id, old_record, updated_record)

        result += "</pre>"
        return result

    result = bibformat.format_record(recID=None,
                                     of=output_format,
                                     xml_record=xml_record,
                                     ln=language)
    return result
Example #43
0
def hepdata2marcxml(record):
    out = {}
    record_add_field(out,
                     '024',
                     '7',
                     subfields=[('a', record['doi']), ('2', 'DOI')])
    if record.get('title'):
        title = 'Data from {title} from: {paper_title}'
    else:
        title = 'Additional data from: {paper_title}'
    record_add_field(out,
                     '245',
                     subfields=[
                         ('a',
                          title.format(title=record.get('title'),
                                       paper_title=record['paper_title'])),
                         ('9', 'HEPDATA')
                     ])
    record_add_field(out, '336', subfields=[('t', 'DATASET')])
    record_add_field(out,
                     '520',
                     subfields=[('h', record['abstract']), ('9', 'HEPDATA')])
    for keyword in record['keywords']:
        name = keyword['name']
        value = keyword['value']
        if name in ('observables', 'cmenergies'):
            value = '%s: %s' % (name, value)
        record_add_field(out,
                         '695',
                         subfields=[('a', value), ('9', 'HEPDATA')])
    for collaboration in record['collaborations']:
        record_add_field(out, '710', subfields=[('g', collaboration)])
    record_add_field(out,
                     '786',
                     subfields=[('q', str(record['position'])),
                                ('w', str(record['inspire_id']))])
    record_add_field(out, '980', subfields=[('a', 'DATA')])
    return record_xml_output(out)
Example #44
0
def _get_formated_record(record_id,
                         output_format,
                         update_commands,
                         language,
                         outputTags=""):
    """Returns a record in a given format

    @param record_id: the ID of record to format
    @param output_format: an output format code (or short identifier for the output format)
    @param update_commands: list of commands used to update record contents
    @param language: the language to use to format the record
    """
    updated_record = _get_updated_record(record_id, update_commands)

    xml_record = bibrecord.record_xml_output(updated_record)

    if "hm" == output_format:
        result = "<pre>\n"
        marc_record = _create_marc(xml_record)
        if "All tags" not in outputTags or not outputTags:
            for line in marc_record.split('\n')[:-1]:
                for tag in outputTags:
                    if tag in line.split()[0]:
                        result += "%09d " % record_id + line.strip() + '\n'
        else:
            for line in marc_record.split('\n')[:-1]:
                result += "%09d " % record_id + line.strip() + '\n'

        result += "</pre>"
        return result

    result = bibformat.format_record(recID=None,
                                     of=output_format,
                                     xml_record=xml_record,
                                     ln=language)
    return result
Example #45
0
def upload_amendments(records, holdingpen):
    """ Upload a modified record """

    if task_get_option("no_upload", False) or len(records) == 0:
        return

    xml = '<collection xmlns="http://www.loc.gov/MARC21/slim">'
    for record in records:
        xml += record_xml_output(record)
    xml += "</collection>"

    tmp_file_fd, tmp_file = mkstemp(suffix='.xml',
                                    prefix="bibcheckfile_%s" %
                                    time.strftime("%Y-%m-%d_%H:%M:%S"),
                                    dir=CFG_TMPSHAREDDIR)
    os.write(tmp_file_fd, xml)
    os.close(tmp_file_fd)
    os.chmod(tmp_file, 0644)
    if holdingpen:
        flag = "-o"
    else:
        flag = "-r"
    task = task_low_level_submission('bibupload', 'bibcheck', flag, tmp_file)
    write_message("Submitted bibupload task %s" % task)
Example #46
0
def bst_scoap3_importer():
    """Import from SCOAP3."""
    try:
        request = requests.get(
            'http://repo.scoap3.org/ffts_for_inspire.py/csv')
    except (HTTPError, ConnectionError, Timeout):
        register_exception()
        return
    task_sleep_now_if_required(can_stop_too=True)

    fd_update, name_update = mkstemp(suffix='.xml',
                                     prefix='bibupload_scoap3_',
                                     dir=CFG_TMPSHAREDDIR)

    out_update = fdopen(fd_update, 'w')
    fd_new, name_new = mkstemp(suffix='.xml',
                               prefix='bibupload_scoap3_',
                               dir=CFG_TMPSHAREDDIR)
    out_new = fdopen(fd_new, 'w')

    print >> out_update, "<collection>"
    print >> out_new, "<collection>"

    line_count_new = 0  # to avoid empty bibupload
    line_count_update = 0  # to avoid empty bibupload

    # We strip the first line.
    for line in request.text.split("\n")[1:]:
        if not line.strip():
            continue
        task_sleep_now_if_required(can_stop_too=True)
        recid, arxiv_id, cr_date, checksum, link, file_format, doi = [
            x.strip() for x in line.split(',')
        ]
        write_message(line.strip())
        if checksum == "None":
            write_message("... no PDF. Skipping")
            continue
        if arxiv_id == "None":
            inspire_record = perform_request_search(p="doi:%s" % (doi, ),
                                                    cc="HEP")
        else:
            inspire_record = perform_request_search(p="037:%s or doi:%s" %
                                                    (arxiv_id, doi),
                                                    cc="HEP")
        if len(inspire_record) > 1:
            write_message(
                "ERROR: more than one INSPIRE record matched %s and %s for SCOAP3 record %s: %s"
                % (arxiv_id, doi, recid, list(inspire_record)),
                stream=sys.stderr)
            continue
        elif not inspire_record:
            write_message(
                "WARNING: no INSPIRE record matched %s or %s for SCOAP3 record %s"
                % (arxiv_id, doi, recid),
                stream=sys.stderr)
            continue
        action = None  # do nothing
        rec = {}
        inspire_record = inspire_record[0]
        record = BibRecDocs(inspire_record)
        for doc in record.list_latest_files('SCOAP3'):
            if doc.format == file_format:
                if doc.checksum == checksum:
                    write_message(
                        "... OK: file alredy attached to INSPIRE record %s (doc.checksum=%s, checksum=%s)"
                        % (inspire_record, doc.checksum, checksum))
                else:
                    write_message(
                        "... OK: new revision available for INSPIRE record %s (doc.checksum=%s, checksum=%s)"
                        % (inspire_record, doc.checksum, checksum))
                    action = "UPDATE"
                break
        else:
            write_message("... OK: need to add new file to INSPIRE record %s" %
                          inspire_record)
            action = "APPEND"
        if action:
            if file_format == '.pdf;pdfa':
                record_add_field(rec,
                                 'FFT',
                                 subfields=[('a', link),
                                            ('n', 'scoap3-fulltext'),
                                            ('f', '.pdf;pdfa'),
                                            ('t', 'SCOAP3'),
                                            ('d', 'Article from SCOAP3')])
            else:
                record_add_field(rec,
                                 'FFT',
                                 subfields=[('a', link),
                                            ('n', 'scoap3-fulltext'),
                                            ('t', 'SCOAP3'),
                                            ('d', 'Article from SCOAP3')])

            record_add_field(rec,
                             '001',
                             controlfield_value=str(inspire_record))
        if action == "UPDATE":
            line_count_update += 1
            print >> out_update, record_xml_output(rec)
        elif action == "APPEND":
            line_count_new += 1
            print >> out_new, record_xml_output(rec)
    print >> out_update, "</collection>"
    print >> out_new, "</collection>"
    out_new.close()
    out_update.close()

    if line_count_new:
        # We use correct here instead of append to deal with potential sync issues.
        # Basically BibUpload should handle "new" corrections as "append" if it is not there.
        id = task_low_level_submission("bibupload", "admin", "-N",
                                       "SCOAP3-import", "-c", name_new)
        write_message("Scheduled bibupload --correct %s with ID #%s" %
                      (name_new, id))
    else:
        remove(name_new)
    if line_count_update:
        id = task_low_level_submission("bibupload", "admin", "-N",
                                       "SCOAP3-import", "-c", name_update)
        write_message("Scheduled bibupload --correct %s with ID #%s" %
                      (name_update, id))
    else:
        remove(name_update)
Example #47
0
                         (len(match_results[2]), ))
        sys.stderr.write("\n Fuzzy records       : %d\n" %
                         (len(match_results[3]), ))
        sys.stderr.write("=" * 35)
        sys.stderr.write("\n Total records       : %d\n" % (len(records), ))

    if not noprocess:
        options = {'text-marc': 1, 'aleph-marc': 0}
        for record, results in recs_out:
            if textmarc_output:
                # FIXME: textmarc output does not print matching results
                sysno = get_sysno_from_record(record, options)
                print create_marc_record(record, sysno, options)
            else:
                print results
                print record_xml_output(record)

    if batch_output:
        i = 0
        options = {'text-marc': 1, 'aleph-marc': 0}
        outputs = ['new', 'matched', 'ambiguous', 'fuzzy']
        for result in match_results:
            filename = "%s.%s" % (batch_output, outputs[i])
            file_fd = open(filename, "w")
            for record, results in result:
                out = []
                if textmarc_output:
                    # FIXME: textmarc output does not print matching results
                    sysno = get_sysno_from_record(record, options)
                    out.append(create_marc_record(record, sysno, options))
                else:
Example #48
0
        sys.stderr.write("\n New records         : %d" % len(match_results[0]))
        sys.stderr.write("\n Matched records     : %d" % len(match_results[1]))
        sys.stderr.write("\n Ambiguous records   : %d" % len(match_results[2]))
        sys.stderr.write("\n Fuzzy records       : %d\n" % len(match_results[3]))
        sys.stderr.write("=" * 35)
        sys.stderr.write("\n Total records       : %d\n" % len(records))

    if not noprocess:
        options = {'text-marc':1, 'aleph-marc':0}
        for record in recs_out:
            if textmarc_output:
                sysno = get_sysno_from_record(record[0], options)
                print create_marc_record(record[0], sysno, options)
            else:
                print record[3]
                print record_xml_output(record[0])

    if batch_output:
        i = 0
        options = {'text-marc':1, 'aleph-marc':0}
        for result in match_results:
            filename = "%s.%i" % (batch_output, i)
            file_fd = open(filename,"w")
            for record in result:
                out = ""
                if textmarc_output:
                    sysno = get_sysno_from_record(record[0], options)
                    out += create_marc_record(record[0], sysno, options)
                else:
                    out += record[3]
                    out += record_xml_output(record[0])
Example #49
0
def save_xml_record(recid,
                    uid,
                    xml_record='',
                    to_upload=True,
                    to_merge=False,
                    task_name="bibedit",
                    sequence_id=None):
    """Write XML record to file. Default behaviour is to read the record from
    a BibEdit cache file, filter out the unchanged volatile subfields,
    write it back to an XML file and then pass this file to BibUpload.

    @param xml_record: give XML as string in stead of reading cache file
    @param to_upload: pass the XML file to BibUpload
    @param to_merge: prepare an XML file for BibMerge to use

    """
    if not xml_record:
        # Read record from cache file.
        cache = get_cache_contents(recid, uid)
        if cache:
            record = cache[2]
            used_changes = cache[4]
            xml_record = record_xml_output(record)
            delete_cache(recid, uid)
            delete_disabled_changes(used_changes)
    else:
        record = create_record(xml_record)[0]

    # clean the record from unfilled volatile fields
    record_strip_empty_volatile_subfields(record)
    record_strip_empty_fields(record)

    # order subfields alphabetically before saving the record
    record_order_subfields(record)

    xml_to_write = wash_for_xml(record_xml_output(record))

    # Write XML file.
    if not to_merge:
        fd, file_path = tempfile.mkstemp(dir=CFG_BIBEDIT_CACHEDIR,
                                         prefix="%s_" % CFG_BIBEDIT_FILENAME,
                                         suffix="_%s_%s.xml" % (recid, uid))
        f = os.fdopen(fd, 'w')
        f.write(xml_to_write)
        f.close()
    else:
        file_path = '%s_%s.xml' % (_get_file_path(
            recid, uid), CFG_BIBEDIT_TO_MERGE_SUFFIX)
        xml_file = open(file_path, 'w')
        xml_file.write(xml_to_write)
        xml_file.close()

    user_name = get_user_info(uid)[1]
    if to_upload:
        args = [
            'bibupload', user_name, '-P', '5', '-r', file_path, '-u', user_name
        ]
        if task_name == "bibedit":
            args.extend(['--name', 'bibedit'])
        if sequence_id:
            args.extend(["-I", sequence_id])
        args.extend(['--email-logs-on-error'])
        task_low_level_submission(*args)
    return True
def bst_scoap3_importer():
    task_sleep_now_if_required(can_stop_too=True)
    f = urllib.urlopen('http://repo.scoap3.org/ffts_for_inspire.py/csv')

    fd_update, name_update = mkstemp(suffix='.xml',
                                     prefix='bibupload_scoap3_',
                                     dir=CFG_TMPSHAREDDIR)
    out_update = fdopen(fd_update, 'w')
    fd_new, name_new = mkstemp(suffix='.xml',
                               prefix='bibupload_scoap3_',
                               dir=CFG_TMPSHAREDDIR)
    out_new = fdopen(fd_new, 'w')
    print >> out_update, "<collection>"
    print >> out_new, "<collection>"

    line_count_new = 0  # to avoid empty bibupload
    line_count_update = 0  # to avoid empty bibupload
    f.readline()  ## Let's strip the header line

    for d in f:
        task_sleep_now_if_required(can_stop_too=True)
        recid, arxiv_id, cr_date, checksum, link, type, doi = [
            x.strip() for x in d.split(',')
        ]
        write_message(d.strip())
        if checksum == "None":
            write_message("... no PDF. Skipping")
            continue
        if arxiv_id == "None":
            inspire_record = perform_request_search(p="doi:%s" % (doi, ),
                                                    cc="HEP")
        else:
            inspire_record = perform_request_search(p="037:%s or doi:%s" %
                                                    (arxiv_id, doi),
                                                    cc="HEP")
        if len(inspire_record) > 1:
            write_message(
                "ERROR: more than one INSPIRE record matched %s and %s for SCOAP3 record %s: %s"
                % (arxiv_id, doi, recid, list(inspire_record)),
                stream=sys.stderr)
            continue
        elif not inspire_record:
            write_message(
                "WARNING: no INSPIRE record matched %s or %s for SCOAP3 record %s"
                % (arxiv_id, doi, recid),
                stream=sys.stderr)
            continue
        action = None  # do nothing
        rec = {}
        inspire_record = inspire_record[0]
        record = BibRecDocs(inspire_record)
        for doc in record.list_latest_files():
            if doc.format in ('.pdf', '.pdf;pdfa'):
                if doc.bibdoc.doctype == 'SCOAP3':
                    if doc.checksum == checksum:
                        write_message(
                            "... OK: file alredy attached to INSPIRE record %s (doc.checksum=%s, checksum=%s)"
                            % (inspire_record, doc.checksum, checksum))
                    else:
                        write_message(
                            "... OK: new revision available for INSPIRE record %s (doc.checksum=%s, checksum=%s)"
                            % (inspire_record, doc.checksum, checksum))
                        action = "UPDATE"
                    break
        else:
            write_message("... OK: need to add new file to INSPIRE record %s" %
                          inspire_record)
            action = "APPEND"
        if action:
            if type == '.pdf;pdfa':
                record_add_field(rec,
                                 'FFT',
                                 subfields=[('a', link),
                                            ('n', 'scoap3-fulltext'),
                                            ('f', '.pdf;pdfa'),
                                            ('t', 'SCOAP3'),
                                            ('d', 'Article from SCOAP3')])
            else:
                record_add_field(rec,
                                 'FFT',
                                 subfields=[('a', link),
                                            ('n', 'scoap3-fulltext'),
                                            ('t', 'SCOAP3'),
                                            ('d', 'Article from SCOAP3')])

            record_add_field(rec,
                             '001',
                             controlfield_value=str(inspire_record))
        if action == "UPDATE":
            line_count_update += 1
            print >> out_update, record_xml_output(rec)
        elif action == "APPEND":
            line_count_new += 1
            print >> out_new, record_xml_output(rec)
    print >> out_update, "</collection>"
    print >> out_new, "</collection>"
    out_new.close()
    out_update.close()

    if line_count_new:
        id = task_low_level_submission("bibupload", "admin", "-N",
                                       "SCOAP3-import", "-a", name_new)
        write_message("Scheduled bibupload --append %s with ID #%s" %
                      (name_new, id))
    else:
        remove(name_new)
    if line_count_update:
        id = task_low_level_submission("bibupload", "admin", "-N",
                                       "SCOAP3-import", "-c", name_update)
        write_message("Scheduled bibupload --correct %s with ID #%s" %
                      (name_new, id))
    else:
        remove(name_update)
Example #51
0
def perform_request_record(requestType, uid, data):
    """Handle 'major' record related requests.
    Handle retrieving, submitting or cancelling the merging session.
    """
    #TODO add checks before submission and cancel, replace get_bibrecord call
    result = {'resultCode': 0, 'resultText': ''}
    recid1 = data["recID1"]
    record1 = _get_record(recid1, uid, result)
    if result[
            'resultCode'] != 0:  #if record not accessible return error information
        return result

    if requestType == 'submit':
        if data.has_key('duplicate'):
            recid2 = data['duplicate']
            record2 = _get_record_slave(recid2, result, 'recid', uid)
            if result['resultCode'] != 0:  #return in case of error
                return result
            (errcode, message) = check_doi_status_after_merge(
                data["recID1"],
                data['duplicate'],
                record1,
                record2,
                record2_marked_as_duplicate_p=data.has_key('duplicate'),
                submit_confirmed_p=data.get('additional_data', {
                    'confirmed_submit': False
                }).get('confirmed_submit', False))
            if errcode:
                result['resultCode'] = errcode
                result['resultText'] = message
                return result

            # mark record2 as deleted
            record_add_field(record2, '980', ' ', ' ', '', [('c', 'DELETED')])
            # mark record2 as duplicate of record1
            record_add_field(record2, '970', ' ', ' ', '',
                             [('d', str(recid1))])
            # add recid of deleted record to master record
            record_add_field(record1, '981', ' ', ' ', '',
                             [('a', str(recid2))])

            # To ensure updates happen in order, use a seq id
            sequence_id = str(random.randrange(1, 2147483648))

            # submit record2 to be deleted
            xml_record2 = record_xml_output(record2)
            save_xml_record(recid2,
                            uid,
                            xml_record2,
                            task_name="bibmerge",
                            sequence_id=sequence_id)

            # submit record1
            xml_record1 = record_xml_output(record1)
            save_xml_record(recid1,
                            uid,
                            xml_record1,
                            task_name="bibmerge",
                            sequence_id=sequence_id)

            # Delete cache file if it exists
            if cache_exists(recid1, uid):
                delete_cache(recid1, uid)

            result['resultText'] = 'Records submitted'
            return result

        (errcode, message) = check_doi_status_after_merge(
            data["recID1"],
            data["recID2"],
            record1,
            None,
            submit_confirmed_p=data.get('additional_data', {
                'confirmed_submit': False
            }).get('confirmed_submit', False))
        if errcode:
            result['resultCode'] = errcode
            result['resultText'] = message
            return result

        #submit record1 from cache
        save_xml_record(recid1, uid, task_name="bibmerge")

        # Delete cache file if it exists
        if cache_exists(recid1, uid):
            delete_cache(recid1, uid)

        result['resultText'] = 'Record submitted'
        return result

    elif requestType == 'cancel':
        delete_cache(recid1, uid)
        result['resultText'] = 'Cancelled'
        return result

    recid2 = data["recID2"]
    mode = data['record2Mode']
    record2 = _get_record_slave(recid2, result, mode, uid)
    if result[
            'resultCode'] != 0:  #if record not accessible return error information
        return result

    if requestType == 'getRecordCompare':
        result['resultHtml'] = bibmerge_templates.BM_html_all_diff(
            record1, record2)
        result['resultText'] = 'Records compared'

    elif requestType == 'recCopy':
        copy_R2_to_R1(record1, record2)
        result['resultHtml'] = bibmerge_templates.BM_html_all_diff(
            record1, record2)
        result['resultText'] = 'Record copied'

    elif requestType == 'recMerge':
        merge_record(record1, record2, merge_conflicting_fields=True)
        result['resultHtml'] = bibmerge_templates.BM_html_all_diff(
            record1, record2)
        result['resultText'] = 'Records merged'

    elif requestType == 'recMergeNC':
        merge_record(record1, record2, merge_conflicting_fields=False)
        result['resultHtml'] = bibmerge_templates.BM_html_all_diff(
            record1, record2)
        result['resultText'] = 'Records merged'

    else:
        result['resultCode'], result['resultText'] = 1, 'Wrong request type'

    return result
Example #52
0
def add_other_id(other_id=None,
                 doi="",
                 eprint="",
                 recid=None,
                 system_number=None,
                 reportnumbers=None,
                 all_recids=None):
    """Search and match using given identifiers."""
    query = ""
    if all_recids is None:
        all_recids = get_all_recids()
    if reportnumbers is None:
        reportnumbers = []
    if recid is not None:
        query = "existing recid"
        try:
            recid = int(recid)
        except ValueError:
            recid = None
        if recid and recid not in all_recids:
            write_message(
                "WARNING: %s thought that their record %s had recid %s in %s but this seems wrong"
                % (CFG_OTHER_SITE, other_id, recid, CFG_THIS_SITE),
                stream=sys.stderr)
            recid = None
    if recid is None and eprint:
        query = 'oai:arXiv.org:%s' % (eprint, )
        arxiv_ids = search_pattern(p=query, f='035__a', m='e') & all_recids
        if len(arxiv_ids) > 1:
            write_message(
                "ERROR: %s record %s matches more than one record in %s via %s: %s"
                % (CFG_OTHER_SITE, other_id, CFG_THIS_SITE, query, arxiv_ids),
                stream=sys.stderr)
            return [other_id] + list(arxiv_ids)
        elif len(arxiv_ids) == 1:
            recid = arxiv_ids[0]
    if recid is None and doi:
        query = 'doi:"%s"' % doi
        doi_ids = search_pattern(p=query) & all_recids
        if len(doi_ids) > 1:
            write_message(
                "ERROR: %s record %s matches more than one record in %s via %s: %s"
                % (CFG_OTHER_SITE, other_id, CFG_THIS_SITE, query, doi_ids),
                stream=sys.stderr)
            return [other_id] + list(doi_ids)
        elif len(doi_ids) == 1:
            recid = doi_ids[0]
    if recid is None and reportnumbers:
        query = "037__a:" + " OR 037__a:".join(reportnumbers)
        reportnumbers_ids = intbitset()
        for rn in reportnumbers:
            reportnumbers_ids |= search_pattern(p=rn, f='037__a', m='e')
        reportnumbers_ids &= all_recids
        if len(reportnumbers_ids) > 1:
            write_message(
                "ERROR: %s record %s matches more than one record in %s via %s: %s"
                % (CFG_OTHER_SITE, other_id, CFG_THIS_SITE, query,
                   reportnumbers_ids),
                stream=sys.stderr)
            return [other_id] + list(reportnumbers_ids)
        elif len(reportnumbers_ids) == 1:
            recid = reportnumbers_ids[0]
    if recid is None and system_number and CFG_CERN_SITE:
        query = "035:%s 035:SPIRES" % (system_number, )
        system_number_ids = search_pattern(p=query)
        system_number_ids &= all_recids
        if len(system_number_ids) > 1:
            write_message(
                "ERROR: %s record %s matches more than one record in %s via %s: %s"
                % (CFG_OTHER_SITE, other_id, CFG_THIS_SITE, query,
                   system_number_ids),
                stream=sys.stderr)
            return [other_id] + list(system_number_ids)
        elif len(system_number_ids) == 1:
            recid = system_number_ids[0]

    if recid:
        recid = int(recid)
        record = get_record(recid)
        fields = record_get_field_instances(record, '035')
        for field in fields:
            subfields = dict(field_get_subfield_instances(field))
            if CFG_OTHER_SITE.upper() == subfields.get('9', '').upper():
                stored_recid = subfields.get('a', 0)
                try:
                    stored_recid = int(stored_recid)
                except ValueError:
                    # Not an integer, we move on and add the new ID.
                    continue
                if stored_recid and int(stored_recid) != int(other_id):
                    write_message(
                        "ERROR: %s record %s matches %s record %s which already points back to a different record %s in %s"
                        % (CFG_OTHER_SITE, other_id, CFG_THIS_SITE, recid,
                           stored_recid, CFG_OTHER_SITE),
                        stream=sys.stderr)
                if CFG_INSPIRE_SITE and int(other_id) not in CERN_IDS:
                    write_message(
                        "INFO: ID was found in 035 but the record is not core CERN hence it should be moved into 595"
                    )
                else:
                    return

        if CFG_INSPIRE_SITE:
            fields = record_get_field_instances(record, '595')
            for field in fields:
                subfields = dict(field_get_subfield_instances(field))
                if "CDS" in subfields.get('a', '').upper():
                    stored_recid = subfields.get('a', 0).split("-")[-1]
                    try:
                        stored_recid = int(stored_recid)
                    except ValueError:
                        # Not an integer, we move on and add the new ID.
                        continue
                    if stored_recid and int(stored_recid) != int(other_id):
                        write_message(
                            "ERROR: %s record %s matches %s record %s which already points back to a different record %s in %s"
                            % (CFG_OTHER_SITE, other_id, CFG_THIS_SITE, recid,
                               stored_recid, CFG_OTHER_SITE),
                            stream=sys.stderr)
                    if int(other_id) in CERN_IDS:
                        write_message(
                            "INFO: ID was found in 595 but the record is core CERN hence it should be moved into 035"
                        )
                    else:
                        return

        write_message("Matched {1}/{0} to {3}/{2} with {4}".format(
            other_id, CFG_OTHER_URL, recid, CFG_THIS_URL, query))
        rec = {}
        record_add_field(rec, '001', controlfield_value='%s' % recid)

        # Let's filter out previous values in 035/595
        for field in record_get_field_instances(record, '035'):
            subfields = field_get_subfield_instances(field)
            subfields_dict = dict(subfields)
            if subfields_dict.get('a') != str(other_id) or subfields_dict.get(
                    '9') != CFG_OTHER_SITE:
                record_add_field(rec, '035', subfields=subfields)
        for field in record_get_field_instances(record, '595'):
            subfields = field_get_subfield_instances(field)
            subfields_dict = dict(subfields)
            if subfields_dict.get('a') != "CDS-{0}".format(
                    other_id) or subfields_dict.get('9') != 'CERN':
                record_add_field(rec, '595', subfields=subfields)

        if CFG_INSPIRE_SITE:
            if int(other_id) in CERN_IDS:
                write_message("CERN relevant paper: adding 035")
                record_add_field(rec,
                                 '035',
                                 ind1=' ',
                                 ind2=' ',
                                 subfields=(('9', CFG_OTHER_SITE), ('a',
                                                                    other_id)))
            else:
                write_message("Non-CERN relevant paper: adding 595")
                record_add_field(rec,
                                 '595',
                                 ind1=' ',
                                 ind2=' ',
                                 subfields=(('9', "CERN"),
                                            ('a', "CDS-{0}".format(other_id))))
        else:
            record_add_field(rec,
                             '035',
                             ind1=' ',
                             ind2=' ',
                             subfields=(('9', CFG_OTHER_SITE), ('a',
                                                                other_id)))
        return record_xml_output(rec)
def _get_formated_record(record_id,
                         output_format,
                         update_commands,
                         language,
                         outputTags="",
                         checked=True,
                         displayed_records=None):
    """Returns a record in a given format

    @param record_id: the ID of record to format
    @param output_format: an output format code (or short identifier for the output format)
    @param update_commands: list of commands used to update record contents
    @param language: the language to use to format the record
    @param outputTags: the tags to be shown to the user
    @param checked: is the record checked by the user?
    @param displayed_records: records to be displayed on a given page

    @returns: record formated to be displayed or None
    """
    if update_commands and checked:
        # Modify the bibrecord object with the appropriate actions
        updated_record = _get_updated_record(record_id, update_commands)

    textmarc_options = {
        "aleph-marc": 0,
        "correct-mode": 1,
        "append-mode": 0,
        "delete-mode": 0,
        "insert-mode": 0,
        "replace-mode": 0,
        "text-marc": 1
    }

    if record_id not in displayed_records:
        return

    old_record = search_engine.get_record(recid=record_id)
    old_record_textmarc = xmlmarc2textmarc.create_marc_record(
        old_record, sysno="", options=textmarc_options)
    if "hm" == output_format:
        if update_commands and checked:
            updated_record_textmarc = xmlmarc2textmarc.create_marc_record(
                updated_record, sysno="", options=textmarc_options)
            result = _get_record_diff(old_record_textmarc,
                                      updated_record_textmarc, outputTags,
                                      record_id)
        else:
            filter_tags = "All tags" not in outputTags and outputTags
            result = ['<pre>']
            for line in old_record_textmarc.splitlines():
                if not filter_tags or line.split()[0].replace(
                        '_', '') in outputTags:
                    result.append("%09d " % record_id + line.strip())
            result.append('</pre>')
            result = '\n'.join(result)
    else:
        if update_commands and checked:
            # No coloring of modifications in this case
            xml_record = bibrecord.record_xml_output(updated_record)
        else:
            xml_record = bibrecord.record_xml_output(old_record)
        result = bibformat.format_record(recID=None,
                                         of=output_format,
                                         xml_record=xml_record,
                                         ln=language)
    return result
def perform_request_record(requestType, uid, data):
    """Handle 'major' record related requests.
    Handle retrieving, submitting or cancelling the merging session.
    """
    #TODO add checks before submission and cancel, replace get_bibrecord call
    result = {'resultCode': 0, 'resultText': ''}
    recid1 = data["recID1"]
    record1 = _get_record(recid1, uid, result)
    if result[
            'resultCode'] != 0:  #if record not accessible return error information
        return result

    if requestType == 'submit':
        if data.has_key('duplicate'):
            recid2 = data['duplicate']
            record2 = _get_record_slave(recid2, result, 'recid', uid)
            if result['resultCode'] != 0:  #return in case of error
                return result
            # mark record2 as deleted
            record_add_field(record2, '980', ' ', ' ', '', [('c', 'DELETED')])
            # mark record2 as duplicate of record1
            record_add_field(record2, '970', ' ', ' ', '',
                             [('d', str(recid1))])
            #submit record2
            xml_record = record_xml_output(record2)
            save_xml_record(recid2, uid, xml_record)

        #submit record1
        save_xml_record(recid1, uid)
        result['resultText'] = 'Record submitted'
        return result

    elif requestType == 'cancel':
        delete_cache_file(recid1, uid)
        result['resultText'] = 'Cancelled'
        return result

    recid2 = data["recID2"]
    mode = data['record2Mode']
    record2 = _get_record_slave(recid2, result, mode, uid)
    if result[
            'resultCode'] != 0:  #if record not accessible return error information
        return result

    if requestType == 'getRecordCompare':
        result['resultHtml'] = bibmerge_templates.BM_html_all_diff(
            record1, record2)
        result['resultText'] = 'Records compared'

    elif requestType == 'recCopy':
        copy_R2_to_R1(record1, record2)
        result['resultHtml'] = bibmerge_templates.BM_html_all_diff(
            record1, record2)
        result['resultText'] = 'Record copied'

    elif requestType == 'recMerge':
        merge_record(record1, record2, merge_conflicting_fields=True)
        result['resultHtml'] = bibmerge_templates.BM_html_all_diff(
            record1, record2)
        result['resultText'] = 'Records merged'

    elif requestType == 'recMergeNC':
        merge_record(record1, record2, merge_conflicting_fields=False)
        result['resultHtml'] = bibmerge_templates.BM_html_all_diff(
            record1, record2)
        result['resultText'] = 'Records merged'

    else:
        result['resultCode'], result['resultText'] = 1, 'Wrong request type'

    return result
def format_element(bfo, oai=0):
    """Produce MARCXML with enhanced fields.

    Adds 100/700 $x with Record ID of linked HepName,
         701/702 $y with True/False if the signature is claimed
                 $z with Record ID of institution
                 $w with BAI of linked Profile
         371/110 $z with Record ID of institution
         119/502 $z with Record ID of institution
         999C5   $0 with on the fly discovered Record IDs (not for books)
         773     $0 with Record ID of corresponding Book or Proceeding or Report
                 $1 with Record ID of corresponding Journal
                 $2 with Record ID of corresponding Conference
         693/710 $0 with Record ID of corresponding experiment
    """
    can_see_hidden_stuff = not acc_authorize_action(bfo.user_info,
                                                    'runbibedit')[0]
    recid = bfo.recID
    if can_see_hidden_stuff and is_record_deleted(bfo):
        record = salvage_deleted_record_from_history(recid)
    else:
        record = bfo.get_record()

    # Let's filter hidden fields
    if can_see_hidden_stuff:
        # Let's add bibdoc info
        bibrecdocs = BibRecDocs(recid)
        for bibdocfile in bibrecdocs.list_latest_files():
            fft = [
                ('a', bibdocfile.fullpath),
                ('d', bibdocfile.description or ''),
                ('f', bibdocfile.format or ''),
                ('n', bibdocfile.name or ''),
                ('r', bibdocfile.status or ''),
                ('s', bibdocfile.cd.strftime('%Y-%m-%d %H:%M:%S')),
                ('t', bibdocfile.bibdoc.doctype),
                ('v', str(bibdocfile.version)),
                ('z', bibdocfile.comment or ''),
            ]
            for flag in bibdocfile.flags:
                fft.append(('o', flag))
            record_add_field(record, 'FFT', subfields=fft)
    else:
        # not authorized
        for tag in CFG_BIBFORMAT_HIDDEN_TAGS:
            if tag in record:
                del record[tag]

    is_institution = 'INSTITUTION' in [
        collection.upper() for collection in bfo.fields('980__a')
    ]

    signatures = {}
    if '100' in record or '700' in record:
        signatures = dict((
            name, (personid, flag)
        ) for name, personid, flag in run_sql(
            "SELECT name, personid, flag FROM aidPERSONIDPAPERS WHERE bibrec=%s AND flag>-2",
            (recid, )))

    # Let's add signatures
    for field in record_get_field_instances(
            record, '100') + record_get_field_instances(
                record, '700') + record_get_field_instances(
                    record, '701') + record_get_field_instances(record, '702'):
        subfields = field_get_subfield_instances(field)
        subfield_dict = dict(subfields)
        if 'a' in subfield_dict:
            author_name = subfield_dict['a']
            personid, flag = signatures.get(author_name, (None, None))
            bai = get_personid_canonical_id().get(personid)
            if bai:
                subfields.append(('w', bai))
                hepname_id = get_hepname_id(personid)
                if hepname_id:
                    subfields.append(('x', '%i' % hepname_id))
                subfields.append(('y', '%i' % (flag == 2)))

        # And matched affiliations
        if 'u' in subfield_dict:
            for code, value in subfields:
                if code == 'u':
                    ids = get_institution_ids(value)
                    if len(ids) == 1:
                        subfields.append(('z', '%i' % ids[0]))

    # Thesis institution
    for field in record_get_field_instances(record, '502'):
        subfields = field_get_subfield_instances(field)
        subfield_dict = dict(subfields)
        if 'c' in subfield_dict:
            for code, value in subfields:
                if code == 'c':
                    ids = get_institution_ids(value)
                    if len(ids) == 1:
                        subfields.append(('z', '%i' % ids[0]))

    # Related institution
    for field in record_get_field_instances(record, '510'):
        subfields = field_get_subfield_instances(field)
        subfield_dict = dict(subfields)
        if 'a' in subfield_dict and not '0' in subfield_dict:
            ids = get_institution_ids(subfield_dict['a'])
            if len(ids) == 1:
                subfields.append(('0', '%i' % ids[0]))

    # Related journal
    for field in record_get_field_instances(record, '530'):
        subfields = field_get_subfield_instances(field)
        subfield_dict = dict(subfields)
        if 'a' in subfield_dict and not '0' in subfield_dict:
            ids = get_institution_ids(subfield_dict['a'])
            if len(ids) == 1:
                subfields.append(('0', '%i' % ids[0]))

    # Enhance affiliation in Experiments
    for field in record_get_field_instances(record, '119'):
        subfields = field_get_subfield_instances(field)
        subfield_dict = dict(subfields)
        if 'u' in subfield_dict:
            for code, value in subfields:
                if code == 'u':
                    ids = get_institution_ids(value)
                    if len(ids) == 1:
                        subfields.append(('z', '%i' % ids[0]))

    # Enhance affiliation in HepNames and Jobs and Institutions and
    # naked affiliations in HEP
    for field in record_get_field_instances(
            record, '371') + record_get_field_instances(record, '902'):
        subfields = field_get_subfield_instances(field)
        subfield_dict = dict(subfields)
        if 'a' in subfield_dict:
            for code, value in subfields:
                if code == 'a':
                    ids = get_institution_ids(value)
                    if len(ids) == 1:
                        subfields.append(('z', '%i' % ids[0]))

    for field in record_get_field_instances(record, '110'):
        subfields = field_get_subfield_instances(field)
        subfield_dict = dict(subfields)
        if is_institution:
            # We try to resolve obsolete ICNs
            if 'x' in subfield_dict:
                for code, value in subfields:
                    if code == 'x':
                        ids = get_institution_ids(value)
                        if len(ids) == 1:
                            subfields.append(('z', '%i' % ids[0]))
        else:
            # In other collections institution is in a
            if 'a' in subfield_dict:
                for code, value in subfields:
                    if code == 'a':
                        ids = get_institution_ids(value)
                        if len(ids) == 1:
                            subfields.append(('z', '%i' % ids[0]))

    # Enhance citation
    for field in record_get_field_instances(record, '999', ind1='C', ind2='5'):
        subfields = field_get_subfield_instances(field)
        subfield_dict = dict(subfields)
        if '0' in subfield_dict:
            # Already available recid
            subfields.append(('z', '1'))
        else:
            matched_id = get_matched_id(subfields)
            if matched_id:
                subfields.append(('0', str(matched_id)))

    # Enhance related records
    for field in (
            record_get_field_instances(record, '780', ind1='0', ind2='2') +
            record_get_field_instances(record, '785', ind1='0', ind2='2') +
            record_get_field_instances(record, '787', ind1='0', ind2='8')):
        subfields = field_get_subfield_instances(field)
        subfield_dict = dict(subfields)
        subfield_citation = []
        if subfield_dict.get('r'):  # Reportnumber
            subfield_citation.append(('r', subfield_dict['r']))
        if subfield_dict.get('z'):  # ISBN
            subfield_citation.append(('i', subfield_dict['z']))
        if 'w' not in subfield_dict and subfield_citation:
            matched_id = get_matched_id(subfield_citation)
            if matched_id:
                subfields.append(('w', str(matched_id)))

    # Enhance CNUMs and Journals
    for field in record_get_field_instances(record, '773'):
        subfields = field_get_subfield_instances(field)
        subfield_dict = dict(subfields)
        for code, value in subfields:
            if code == 'w':
                # Conference CNUMs
                recids = perform_request_search(p='111__g:"%s"' % value,
                                                cc='Conferences')
                if len(recids) == 1:
                    subfields.append(('2', str(recids.pop())))
                if '0' not in subfield_dict:
                    recids = perform_request_search(
                        p='773__w:"%s" 980:PROCEEDINGS' % value)
                    if recid in recids:
                        # We remove this very record, since it can be a proceedings
                        recids.remove(recid)
                    if len(recids) == 1:
                        subfields.append(('0', str(recids.pop())))
            elif code == 'p':
                # Journal title
                recids = perform_request_search(p='711__a:"%s"' % value,
                                                cc='Journals')
                if len(recids) == 1:
                    subfields.append(('1', str(recids.pop())))
            elif code == 'z' and '0' not in subfield_dict:
                # ISBN
                recids = find_isbn({'ISBN': value})
                if len(recids) == 1:
                    subfields.append(('0', str(recids.pop())))
            elif code == 'r' and '0' not in subfield_dict:
                # Report
                recids = perform_request_search(p='reportnumber:"%s"' % value)
                if len(recids) == 1:
                    subfields.append(('0', str(recids.pop())))

    # Enhance Experiments
    for field in record_get_field_instances(record, '693'):
        subfields = field_get_subfield_instances(field)
        for code, value in subfields:
            if code == 'e':
                recids = perform_request_search(p='119__a:"%s"' % value,
                                                cc='Experiments')
                if len(recids) == 1:
                    subfields.append(('0', str(recids.pop())))
            elif code == 'a':
                recids = perform_request_search(p='119__b:"%s"' % value,
                                                cc='Experiments')
                if len(recids) == 1:
                    subfields.append(('0', str(recids.pop())))

    # Enhance Experiments
    for field in record_get_field_instances(record, '710'):
        subfields = field_get_subfield_instances(field)
        for code, value in subfields:
            if code == 'g':
                recids = perform_request_search(p='119__a:"%s"' % value,
                                                cc='Experiments')
                if len(recids) == 1:
                    subfields.append(('0', str(recids.pop())))

    # Add Creation date:
    if '961' in record:
        del record['961']
    creation_date, modification_date = run_sql(
        "SELECT creation_date, modification_date FROM bibrec WHERE id=%s",
        (recid, ))[0]
    record_add_field(record,
                     '961',
                     subfields=[('x', creation_date.strftime('%Y-%m-%d')),
                                ('c', modification_date.strftime('%Y-%m-%d'))])

    formatted_record = record_xml_output(record)
    if oai:
        formatted_record = formatted_record.replace(
            "<record>",
            "<marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\" type=\"Bibliographic\">\n     <marc:leader>00000coc  2200000uu 4500</marc:leader>"
        )
        formatted_record = formatted_record.replace(
            "<record xmlns=\"http://www.loc.gov/MARC21/slim\">",
            "<marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\" type=\"Bibliographic\">\n     <marc:leader>00000coc  2200000uu 4500</marc:leader>"
        )
        formatted_record = formatted_record.replace("</record",
                                                    "</marc:record")
        formatted_record = formatted_record.replace("<controlfield",
                                                    "<marc:controlfield")
        formatted_record = formatted_record.replace("</controlfield",
                                                    "</marc:controlfield")
        formatted_record = formatted_record.replace("<datafield",
                                                    "<marc:datafield")
        formatted_record = formatted_record.replace("</datafield",
                                                    "</marc:datafield")
        formatted_record = formatted_record.replace("<subfield",
                                                    "<marc:subfield")
        formatted_record = formatted_record.replace("</subfield",
                                                    "</marc:subfield")
    return formatted_record
Example #56
0
def add_other_id(other_id=None,
                 doi="",
                 eprint="",
                 recid=None,
                 reportnumbers=None,
                 all_recids=None):
    if all_recids is None:
        all_recids = get_all_recids()
    if reportnumbers is None:
        reportnumbers = []
    if recid is not None and recid not in all_recids:
        write_message(
            "WARNING: %s thought that their record %s had recid %s in %s but this seems wrong"
            % (CFG_OTHER_SITE, other_id, recid, CFG_THIS_SITE),
            stream=sys.stderr)
        recid = None
    if recid is None and eprint:
        arxiv_ids = search_pattern(
            p='oai:arXiv.org:%s' % (eprint, ), f='035__a', m='e') & all_recids
        if len(arxiv_ids) > 1:
            write_message(
                "ERROR: %s record %s matches more than one record in %s via arXiv eprint matching: %s"
                % (CFG_OTHER_SITE, other_id, CFG_THIS_SITE, arxiv_ids),
                stream=sys.stderr)
            return
        elif len(arxiv_ids) == 1:
            recid = arxiv_ids[0]
    if recid is None and doi:
        doi_ids = search_pattern(p='doi:"%s"' % doi) & all_recids
        if len(doi_ids) > 1:
            write_message(
                "ERROR: %s record %s matches more than one record in %s via DOI matching: %s"
                % (CFG_OTHER_SITE, other_id, CFG_THIS_SITE, doi_ids),
                stream=sys.stderr)
            return
        elif len(doi_ids) == 1:
            recid = doi_ids[0]
    if recid is None and reportnumbers:
        reportnumbers_ids = intbitset()
        for rn in reportnumbers:
            reportnumbers_ids |= search_pattern(p=rn, f='037__a', m='e')
        reportnumbers_ids &= all_recids()
        if len(reportnumbers_ids) > 1:
            write_message(
                "ERROR: %s record %s matches more than one record in %s via reportnumber matching: %s"
                % (CFG_OTHER_SITE, other_id, CFG_THIS_SITE, reportnumbers_ids),
                stream=sys.stderr)
            return
        elif len(reportnumbers_ids) == 1:
            recid = reportnumbers_ids[0]
    if recid:
        record = get_record(recid)
        fields = record_get_field_instances(record, '035')
        for field in fields:
            subfields = dict(field_get_subfield_instances(field))
            if CFG_OTHER_SITE.upper() == subfields.get('9', '').upper():
                stored_recid = int(subfields.get('a', 0))
                if stored_recid and stored_recid != other_id:
                    write_message(
                        "ERROR: %s record %s matches %s record %s which already points back to a different record %s in %s"
                        % (CFG_OTHER_SITE, other_id, CFG_THIS_SITE, recid,
                           stored_recid, CFG_OTHER_SITE),
                        stream=sys.stderr)
                return
        rec = {}
        record_add_field(rec, '001', controlfield_value='%s' % recid)
        record_add_field(rec,
                         '035',
                         ind1=' ',
                         ind2=' ',
                         subfields=(('9', CFG_OTHER_SITE), ('a', other_id)))
        return record_xml_output(rec)
Example #57
0
    ## Let's tag this record as a TWEET so that later we can build a collection
    ## out of these records.
    record_add_field(rec, '980', subfields=[('a', 'TWEET'), ('b', query)])

    ## Some smart manipulations: let's parse out URLs and tags from the body
    ## of the Tweet.
    for url in _RE_GET_HTTP.findall(text):
        url = url[0]
        record_add_field(rec, '856', '4', subfields=[('u', url)])

    for tag in _RE_TAGS.findall(text):
        ## And here we add the keywords.
        record_add_field(rec, '653', '1', subfields=[('a', tag), ('9', 'TWITTER')])

    ## Finally we shall serialize everything to MARCXML
    return record_xml_output(rec)

def bst_twitter_fetcher(query):
    """
    Fetch the tweets related to the user and upload them into Invenio.
    @param user: the user
    """
    ## We prepare a temporary MARCXML file to upload.
    fd, name = tempfile.mkstemp(suffix='.xml', prefix='tweets', dir=CFG_TMPDIR)
    tweets = get_tweets(query)
    if tweets:
        os.write(fd, """<collection>\n""")
        for i, tweet in enumerate(tweets):
            ## For every tweet we transform it to MARCXML and we dump it in the file.
            task_update_progress('DONE: tweet %s out %s' % (i, len(tweets)))
            os.write(fd, tweet_to_record(tweet, query))
def oairepositoryupdater_task():
    """Main business logic code of oai_archive"""
    no_upload = task_get_option("no_upload")
    report = task_get_option("report")

    if report > 1:
        print_repository_status(verbose=report)
        return True

    initial_snapshot = {}
    for set_spec in all_set_specs():
        initial_snapshot[set_spec] = get_set_definitions(set_spec)
    write_message("Initial set snapshot: %s" % pformat(initial_snapshot),
                  verbose=2)

    task_update_progress("Fetching records to process")

    recids_with_oaiid = search_unit_in_bibxxx(p='*',
                                              f=CFG_OAI_ID_FIELD,
                                              type='e')
    write_message("%s recids have an OAI ID" % len(recids_with_oaiid),
                  verbose=2)

    all_current_recids = search_unit_in_bibxxx(p='*',
                                               f=CFG_OAI_SET_FIELD,
                                               type='e')
    no_more_exported_recids = intbitset(all_current_recids)
    write_message("%s recids are currently exported" %
                  (len(all_current_recids)),
                  verbose=2)

    all_affected_recids = intbitset()
    all_should_recids = intbitset()
    recids_for_set = {}
    for set_spec in all_set_specs():
        if not set_spec:
            set_spec = CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
        should_recids = get_recids_for_set_spec(set_spec)
        recids_for_set[set_spec] = should_recids
        no_more_exported_recids -= should_recids
        all_should_recids |= should_recids
        current_recids = search_unit_in_bibxxx(p=set_spec,
                                               f=CFG_OAI_SET_FIELD,
                                               type='e')
        write_message(
            "%s recids should be in %s. Currently %s are in %s" %
            (len(should_recids), set_spec, len(current_recids), set_spec),
            verbose=2)
        to_add = should_recids - current_recids
        write_message("%s recids should be added to %s" %
                      (len(to_add), set_spec),
                      verbose=2)
        to_remove = current_recids - should_recids
        write_message("%s recids should be removed from %s" %
                      (len(to_remove), set_spec),
                      verbose=2)
        affected_recids = to_add | to_remove
        write_message("%s recids should be hence updated for %s" %
                      (len(affected_recids), set_spec),
                      verbose=2)
        all_affected_recids |= affected_recids

    missing_oaiid = all_should_recids - recids_with_oaiid
    write_message("%s recids are missing an oaiid" % len(missing_oaiid))
    write_message("%s recids should no longer be exported" %
                  len(no_more_exported_recids))

    ## Let's add records with missing OAI ID
    all_affected_recids |= missing_oaiid | no_more_exported_recids
    write_message("%s recids should updated" % (len(all_affected_recids)),
                  verbose=2)

    if not all_affected_recids:
        write_message("Nothing to do!")
        return True

    # Prepare to save results in a tmp file
    (fd, filename) = mkstemp(dir=CFG_TMPDIR,
                                  prefix='oairepository_' + \
                                  time.strftime("%Y%m%d_%H%M%S_",
                                                time.localtime()))
    oai_out = os.fdopen(fd, "w")
    oai_out.write("<collection>")

    tot = 0
    # Iterate over the recids
    for i, recid in enumerate(all_affected_recids):
        task_sleep_now_if_required(can_stop_too=True)
        task_update_progress("Done %s out of %s records." % \
                             (i, len(all_affected_recids)))

        write_message("Elaborating recid %s" % recid, verbose=3)
        record = get_record(recid)
        if not record:
            write_message("Record %s seems empty. Let's skip it." % recid,
                          verbose=3)
            continue
        new_record = {}

        # Check if an OAI identifier is already in the record or
        # not.
        assign_oai_id_entry = False
        oai_id_entry = record_get_field_value(record,
                                              tag=CFG_OAI_ID_FIELD[:3],
                                              ind1=CFG_OAI_ID_FIELD[3],
                                              ind2=CFG_OAI_ID_FIELD[4],
                                              code=CFG_OAI_ID_FIELD[5])
        if not oai_id_entry:
            assign_oai_id_entry = True
            oai_id_entry = "oai:%s:%s" % (CFG_OAI_ID_PREFIX, recid)
            write_message("Setting new oai_id %s for record %s" %
                          (oai_id_entry, recid),
                          verbose=3)
        else:
            write_message("Already existing oai_id %s for record %s" %
                          (oai_id_entry, recid),
                          verbose=3)

        # Get the sets to which this record already belongs according
        # to the metadata
        current_oai_sets = set(
            record_get_field_values(record,
                                    tag=CFG_OAI_SET_FIELD[:3],
                                    ind1=CFG_OAI_SET_FIELD[3],
                                    ind2=CFG_OAI_SET_FIELD[4],
                                    code=CFG_OAI_SET_FIELD[5]))
        write_message("Record %s currently belongs to these oai_sets: %s" %
                      (recid, ", ".join(current_oai_sets)),
                      verbose=3)

        current_previous_oai_sets = set(
            record_get_field_values(record,
                                    tag=CFG_OAI_PREVIOUS_SET_FIELD[:3],
                                    ind1=CFG_OAI_PREVIOUS_SET_FIELD[3],
                                    ind2=CFG_OAI_PREVIOUS_SET_FIELD[4],
                                    code=CFG_OAI_PREVIOUS_SET_FIELD[5]))
        write_message(
            "Record %s currently doesn't belong anymore to these oai_sets: %s"
            % (recid, ", ".join(current_previous_oai_sets)),
            verbose=3)

        # Get the sets that should be in this record according to
        # settings
        updated_oai_sets = set(_set
                               for _set, _recids in recids_for_set.iteritems()
                               if recid in _recids)
        write_message("Record %s now belongs to these oai_sets: %s" %
                      (recid, ", ".join(updated_oai_sets)),
                      verbose=3)

        updated_previous_oai_sets = set(
            _set for _set in (current_previous_oai_sets - updated_oai_sets)
            | (current_oai_sets - updated_oai_sets))
        write_message(
            "Record %s now doesn't belong anymore to these oai_sets: %s" %
            (recid, ", ".join(updated_previous_oai_sets)),
            verbose=3)

        # Ok, we have the old sets and the new sets. If they are equal
        # and oai ID does not need to be added, then great, nothing to
        # change . Otherwise apply the new sets.
        if current_oai_sets == updated_oai_sets and not assign_oai_id_entry:
            write_message("Nothing has changed for record %s, let's move on!" %
                          recid,
                          verbose=3)
            continue  # Jump to next recid

        write_message("Something has changed for record %s, let's update it!" %
                      recid,
                      verbose=3)
        subfields = [(CFG_OAI_ID_FIELD[5], oai_id_entry)]
        for oai_set in updated_oai_sets:
            subfields.append((CFG_OAI_SET_FIELD[5], oai_set))
        for oai_set in updated_previous_oai_sets:
            subfields.append((CFG_OAI_PREVIOUS_SET_FIELD[5], oai_set))

        record_add_field(new_record, tag="001", controlfield_value=str(recid))
        record_add_field(new_record,
                         tag=CFG_OAI_ID_FIELD[:3],
                         ind1=CFG_OAI_ID_FIELD[3],
                         ind2=CFG_OAI_ID_FIELD[4],
                         subfields=subfields)
        oai_out.write(record_xml_output(new_record))
        tot += 1
        if tot == CFG_OAI_REPOSITORY_MARCXML_SIZE:
            oai_out.write("</collection>")
            oai_out.close()
            write_message("Wrote to file %s" % filename)
            if not no_upload:
                task_low_level_submission('bibupload', 'oairepository', '-c',
                                          filename, '-n')
            # Prepare to save results in a tmp file
            (fd, filename) = mkstemp(dir=CFG_TMPDIR,
                                        prefix='oairepository_' + \
                                        time.strftime("%Y%m%d_%H%M%S_",
                                                        time.localtime()))
            oai_out = os.fdopen(fd, "w")
            oai_out.write("<collection>")
            tot = 0
            task_sleep_now_if_required(can_stop_too=True)

    oai_out.write("</collection>")
    oai_out.close()
    write_message("Wrote to file %s" % filename)

    if not no_upload:
        task_sleep_now_if_required(can_stop_too=True)
        if tot > 0:
            task_low_level_submission('bibupload', 'oairepository', '-c',
                                      filename, '-n')
        else:
            os.remove(filename)

    return True
Example #59
0
Run as::
  python fix_8560.py > output.xml
  bibupload -c output.xml
"""

from invenio.search_engine import search_pattern, get_fieldvalues
from invenio.bibrecord import record_add_field, record_xml_output
from invenio.webuser import collect_user_info, get_uid_from_email

# All records
recids = search_pattern(p="0->Z", f="8560_f")

print "<collection>"
for recid in recids:
    # Get record information
    email = get_fieldvalues(recid, "8560_f")[0]
    if "<" in email:
        email = email.split()[-1][1:-1].strip()
    user_info = collect_user_info(get_uid_from_email(email))
    name = user_info.get("external_fullname", user_info.get("nickname", ""))
    external_id = user_info.get("external_id", "")

    # Create correction for record
    rec = {}
    record_add_field(rec, "001", controlfield_value=str(recid))
    record_add_field(rec,
                     '856',
                     ind1='0',
                     subfields=[('f', email), ('y', name)])
    print record_xml_output(rec)
print "</collection>"
Example #60
0
def _prepare_marcxml(recid_a,
                     rn_a,
                     recids_and_rns_b,
                     what_is_a_for_b,
                     what_is_b_for_a,
                     display_in_a=True,
                     display_in_b=True,
                     marc_for_a=None,
                     marc_for_b=None,
                     upload_mode='append',
                     consider_empty_p=False):
    output = '<collection>'
    record_a = {}
    record_b = {}
    if what_is_b_for_a is not None:
        marc_tag_for_a, marc_ind1_for_a, marc_ind2_for_a = \
          _prepare_marc(marc_for_a, CFG_OTHER_RELATIONSHIP_ENTRY, display_in_a and "0" or "1")
        record_add_field(record_a, "001", controlfield_value=str(recid_a))
        if upload_mode == 'correct' and not recids_and_rns_b and consider_empty_p:
            # Add empty field in order to account for cases where all
            # linkings are removed by the submitter
            record_add_field(record_a,
                             marc_tag_for_a,
                             ind1=marc_ind1_for_a,
                             ind2=marc_ind2_for_a)
        for recid_b, rn_b in recids_and_rns_b:
            record_add_field(record_a,
                             marc_tag_for_a,
                             ind1=marc_ind1_for_a,
                             ind2=marc_ind2_for_a,
                             subfields=[('i', what_is_b_for_a), ('r', rn_b),
                                        ('w', str(recid_b))])
        output += record_xml_output(record_a)

    if what_is_a_for_b is not None:
        marc_tag_for_b, marc_ind1_for_b, marc_ind2_for_b = \
          _prepare_marc(marc_for_b, CFG_OTHER_RELATIONSHIP_ENTRY, display_in_b and "0" or "1")
        for recid_b, rn_b in recids_and_rns_b:
            record_b = {}
            record_add_field(record_b, "001", controlfield_value=str(recid_b))
            if upload_mode == 'correct':
                original_linking_fields = _get_record_linking_fields(
                    recid_b, recid_a, marc_tag_for_b, marc_ind1_for_b,
                    marc_ind2_for_b)
                record_add_fields(record_b, marc_tag_for_b,
                                  original_linking_fields)
            record_add_field(record_b,
                             marc_tag_for_b,
                             ind1=marc_ind1_for_b,
                             ind2=marc_ind2_for_b,
                             subfields=[('i', what_is_a_for_b), ('r', rn_a),
                                        ('w', str(recid_a))])
            output += record_xml_output(record_b)
        # Remove linking in remote records where adequate
        if consider_empty_p:
            unlinked_recids = get_unlinked_records(recid_a, marc_for_b,
                                                   display_in_b, upload_mode,
                                                   recids_and_rns_b)
            for recid_b in unlinked_recids:
                record_b = {}
                record_add_field(record_b,
                                 "001",
                                 controlfield_value=str(recid_b))
                original_linking_fields = _get_record_linking_fields(
                    recid_b, recid_a, marc_tag_for_b, marc_ind1_for_b,
                    marc_ind2_for_b)
                if not original_linking_fields:
                    # Add empty field in order to account for cases where all
                    # linkings are removed by the submitter
                    record_add_field(record_b,
                                     marc_tag_for_b,
                                     ind1=marc_ind1_for_b,
                                     ind2=marc_ind2_for_b)
                record_add_fields(record_b, marc_tag_for_b,
                                  original_linking_fields)
                output += record_xml_output(record_b)
    output += '</collection>'
    return output