Example #1
0
def crossref_normalize_name(record):
    """
    Changes the format of author's name (often with initials) to the proper,
    unified one, using bibauthor_name_utils tools
    @return: changed record
    """
    # pattern for removing the spaces between two initials
    pattern_initials = '([A-Z]\\.)\\s([A-Z]\\.)'
    # first, change the main author
    for field in record_get_field_instances(record, '100'):
        main_author = field[0][0][1]
        new_author = create_normalized_name(split_name_parts(main_author))
        # remove spaces between initials
        # two iterations are required
        for _ in range(2):
            new_author = re.sub(pattern_initials, '\g<1>\g<2>', new_author)
        position = field[4]
        record_modify_subfield(rec=record, tag='100', subfield_code='a', \
        value=new_author, subfield_position=0, field_position_global=position)

    # then, change additional authors
    for field in record_get_field_instances(record, '700'):
        author = field[0][0][1]
        new_author = create_normalized_name(split_name_parts(author))
        for _ in range(2):
            new_author = re.sub(pattern_initials, '\g<1>\g<2>', new_author)
        position = field[4]
        record_modify_subfield(rec=record, tag='700', subfield_code='a', \
            value=new_author, subfield_position=0, field_position_global=position)
Example #2
0
def create_ticket(recid, bibcatalog_system, queue=CFG_REFEXTRACT_TICKET_QUEUE):
    write_message("bibcatalog_system %s" % bibcatalog_system, verbose=1)
    write_message("queue %s" % queue, verbose=1)
    if bibcatalog_system and queue:

        subject = "Refs for #%s" % recid

        # Add report number in the subjecet
        report_number = ""
        record = get_bibrecord(recid)

        in_hep = False
        for collection_tag in record_get_field_instances(record, "980"):
            for collection in field_get_subfield_values(collection_tag, "a"):
                if collection == "HEP":
                    in_hep = True

        # Only create tickets for HEP
        if not in_hep:
            write_message("not in hep", verbose=1)
            return

        for report_tag in record_get_field_instances(record, "037"):
            for category in field_get_subfield_values(report_tag, "c"):
                if category.startswith("astro-ph"):
                    write_message("astro-ph", verbose=1)
                    # We do not curate astro-ph
                    return

            for report_number in field_get_subfield_values(report_tag, "a"):
                subject += " " + report_number
                break

        text = "%s/record/edit/#state=edit&recid=%s" % (CFG_SITE_SECURE_URL, recid)
        bibcatalog_system.ticket_submit(subject=subject, queue=queue, text=text, recordid=recid)
Example #3
0
def check_records(records):
    """
    Update field 700__i:
        * Replace substring INSPIRE-00227069 with INSPIRE-00341324
          When subfield __a is equal to Yang, Yi AND __u is equal to Beijing,
          Inst. High Energy Phys.
        * Update field 700  ADD subfield __i INSPIRE-00341324 When subfield __a
          is equal to Yang, Yi AND __u is equal to Beijing, Inst. High Energy
          Phys. IF subfield __i Does not exist
    """
    for record in records:
        for field in record_get_field_instances(
                record, tag="100") + record_get_field_instances(record, "700"):
            subfields = field_get_subfield_instances(field)
            subfields_dict = dict(subfields)
            if subfields_dict.get('a') == 'Yang, Yi' and subfields_dict.get(
                    'u') == 'Beijing, Inst. High Energy Phys.':
                if 'i' not in subfields_dict:
                    subfields.append(('i', 'INSPIRE-00341324'))
                    record.set_amended('Added INSPIRE-00341324 to Yang, Yi')
                else:
                    for i, (code, value) in enumerate(subfields):
                        if code == 'i' and 'INSPIRE-00227069' in value:
                            subfields[i] = ('i', 'INSPIRE-00341324')
                            record.set_amended(
                                'Corrected INSPIRE-00227069 with INSPIRE-00341324 for Yang, Yi'
                            )
Example #4
0
def crossref_normalize_name(record):
    """
    Changes the format of author's name (often with initials) to the proper,
    unified one, using bibauthor_name_utils tools
    @return: changed record
    """
    # pattern for removing the spaces between two initials
    pattern_initials = '([A-Z]\\.)\\s([A-Z]\\.)'
    # first, change the main author
    for field in record_get_field_instances(record, '100'):
        main_author = field[0][0][1]
        new_author = create_normalized_name(split_name_parts(main_author))
        # remove spaces between initials
        # two iterations are required
        for _ in range(2):
            new_author = re.sub(pattern_initials, r'\g<1>\g<2>', new_author)
        position = field[4]
        record_modify_subfield(rec=record, tag='100', subfield_code='a',
        value=new_author, subfield_position=0, field_position_global=position)

    # then, change additional authors
    for field in record_get_field_instances(record, '700'):
        author = field[0][0][1]
        new_author = create_normalized_name(split_name_parts(author))
        for _ in range(2):
            new_author = re.sub(pattern_initials, r'\g<1>\g<2>', new_author)
        position = field[4]
        record_modify_subfield(rec=record, tag='700', subfield_code='a',
            value=new_author, subfield_position=0, field_position_global=position)
Example #5
0
def get_signatures_with_orcid(record):
    out = {}
    for field in record_get_field_instances(record, '100') + record_get_field_instances(record, '700'):
        subfields = dict(field_get_subfield_instances(field))
        if subfields.get('j', '').upper().startswith('ORCID:'):
            orcid = subfields['j'][len('ORCID:'):]
            author = subfields['a']
            out[author] = orcid
    return out
Example #6
0
def check_records(records):
    for record in records:
        for field in record_get_field_instances(record, '100') + record_get_field_instances(record, '700'):
            subfields = field_get_subfield_instances(field)
            subfields_dict = dict(subfields)
            if 'a' in subfields_dict and subfields_dict['a'] in CHANGES:
                if 'i' in subfields_dict and subfields_dict['i'] != CHANGES[subfields_dict['a']]:
                    record.set_invalid("Author %s should have INSPIRE ID %s but has already INSPIRE ID %s" % (subfields_dict['a'], CHANGES[subfields_dict['a']], subfields_dict['i']))
                elif not 'i' in subfields_dict:
                    subfields.append(('i', CHANGES[subfields_dict['a']]))
                    record.set_amended("Added INSPIRE ID %s to author %s" % (CHANGES[subfields_dict['a']], subfields_dict['a']))
def get_ids_from_recid(recid):
    """Get all relevant identifiers from metadata of local record."""
    record = get_record(recid)

    # Retrieving DOI
    doi = ""
    dois = record_get_field_values(record, '024', '7', code='a')
    dois = [doi for doi in dois if doi.startswith('10.')]
    if len(dois) > 1:
        print >> sys.stderr, "WARNING: record %s have more than one DOI: %s" % (recid, dois)
        doi = dois[0]
    elif len(dois) == 1:
        doi = dois[0]

    # Retrieving arXiv eprint
    eprint = ""
    eprints = record_get_field_values(record, '035', code='a')
    eprints = [an_eprint[len('oai:arXiv.org:'):] for an_eprint in eprints if an_eprint.lower().startswith('oai:arxiv.org:')]
    if len(eprints) > 1:
        print >> sys.stderr, "WARNING: record %s have more than one arXiv eprint: %s" % (recid, eprints)
        eprint = eprints[0]
    elif len(eprints) == 1:
        eprint = eprints[0]

    # Retrieving Other service ID
    other_id = ''
    for field in record_get_field_instances(record, '035'):
        subfields = dict(field_get_subfield_instances(field))
        if subfields.get('9', '').upper() == CFG_OTHER_SITE.upper() and subfields.get('a'):
            other_id = subfields['a']
    if CFG_INSPIRE_SITE and not other_id:
        for field in record_get_field_instances(record, '595'):
            subfields = dict(field_get_subfield_instances(field))
            if "CDS" in subfields.get('a', '').upper():
                other_id = subfields.get('a', 0).split("-")[-1]
                try:
                    int(other_id)
                except ValueError:
                    # Not an integer, we move on
                    other_id = ''
    reportnumbers = record_get_field_values(record, '037', code='a')

    system_number = ""
    if CFG_INSPIRE_SITE:
        for value in record_get_field_values(record, '970',
                                             filter_subfield_code="a",
                                             filter_subfield_value="SPIRES",
                                             filter_subfield_mode="s"):
            system_number = value.split("-")[-1]
            break  # There is typically only one

    out = [str(recid), doi, eprint, other_id, system_number] + reportnumbers
    return [val.replace('\n', ' ').replace('\r', '') for val in out]
Example #8
0
def is_published(record):
    """ Checks fields 980 and 773 to see if the record has
    already been published.
    Parameters:
     * record - dictionary: BibRecord dictionary.
    Returns: True is published, else False """
    field980 = record_get_field_instances(record, '980')
    field773 = record_get_field_instances(record, '773')
    for f980 in field980:
        if 'a' in field_get_subfields(f980):
            for f773 in field773:
                if 'p' in field_get_subfields(f773):
                    return True
    return False
def print_essentials(record, tag_list):
    """ Neatly prints all subfield values """
    # Print control values first
    for control in tag_list['control']:
        for field in record_get_field_instances(record, tag=control):
            print " %s: %s" % (control, field[3])

    # Then values of datafields
    for tag, ind1, ind2, subs in tag_list['datafld']:
        fields = record_get_field_instances(record, tag, ind1, ind2)
        fields_values = get_fields_vals(fields, subs)
        field_line = format_field_vals(fields_values)
        print " %s:%s" % (tag, field_line)
    print
def is_published(record):
    """ Checks fields 980 and 773 to see if the record has
    already been published.
    Parameters:
     * record - dictionary: BibRecord dictionary.
    Returns: True is published, else False """
    field980 = record_get_field_instances(record, '980')
    field773 = record_get_field_instances(record, '773')
    for f980 in field980:
        if 'a' in field_get_subfields(f980):
            for f773 in field773:
                if 'p' in field_get_subfields(f773):
                    return True
    return False
Example #11
0
def _create_ticket(recid, bibcatalog_system, queue):
    subject = "Refs for #%s" % recid

    if CFG_INSPIRE_SITE:
        # Add report number in the subjecet
        report_number = ""
        record = get_bibrecord(recid)

        in_core = False
        for collection_tag in record_get_field_instances(record, "980"):
            for collection in field_get_subfield_values(collection_tag, 'a'):
                if collection == 'CORE':
                    in_core = True
                if collection == 'arXiv':
                    # Do not create tickets for arxiv papers
                    # Tickets for arxiv papers are created in bibcatelog
                    write_message("arXiv paper", verbose=1)
                    return

        # Do not create tickets for user submissions
        for source_field in record_get_field_instances(record, "541"):
            for source in field_get_subfield_values(source_field, "c"):
                if source == "submission":
                    write_message("User submitted paper", verbose=1)
                    return

        # Only create tickets for CORE papers
        if not in_core:
            write_message("not in core papers", verbose=1)
            return

        # Do not create tickets for old records
        creation_date = run_sql(
            """SELECT creation_date FROM bibrec
                                   WHERE id = %s""", [recid])[0][0]
        if creation_date < datetime.now() - timedelta(days=30 * 4):
            return

        for report_tag in record_get_field_instances(record, "037"):
            for report_number in field_get_subfield_values(report_tag, 'a'):
                subject += " " + report_number
                break

    text = '%s/record/edit/#state=edit&recid=%s' % (CFG_SITE_SECURE_URL, recid)
    bibcatalog_system.ticket_submit(subject=subject,
                                    queue=queue,
                                    text=text,
                                    recordid=recid)
Example #12
0
def check_record(record, source_field, new_field, subfield_filter):
    """ Changes the code of a field to new_field """
    from collections import namedtuple
    from invenio.bibrecord import (record_add_field, record_delete_field,
                                   record_get_field_instances)

    assert len(source_field) == 5
    assert len(new_field) == 5
    source_field = source_field.replace("_", " ")
    new_field = new_field.replace("_", " ")

    assert len(subfield_filter) == 2
    SubfieldFilter = namedtuple('SubfieldFilter', ['code', 'value'])
    subfield_filter = SubfieldFilter(*subfield_filter)

    def filter_passes(subfield_code, result):
        return subfield_filter.code is None or (
            subfield_filter.code in ('%', subfield_code) and
            subfield_filter.value == result)

    subfields_list = []
    for subfields, ind1, ind2, _, pos in record_get_field_instances(
            record, source_field[:3], source_field[3], source_field[4]):
        if any(filter_passes(*s) for s in subfields):
            subfields_list.append(subfields)
            record_delete_field(record, source_field[:3], ind1, ind2, pos)

    for subfields in subfields_list:
        record_add_field(record, new_field[:3], new_field[3], new_field[4],
                         subfields=subfields)
        record.set_amended('move from %s to %s: %s' %
                           (source_field.replace(" ", "_"),
                            new_field.replace(" ", "_"), subfields))
def create_xml(recid, IDs, tags):
    """
    Replaces specific inspire-ids in records with nothing
    """
    if VERBOSE:
        print "Working on %s" % recid
    record = get_record(int(recid))
    correct_record = {}
    record_add_field(correct_record, '001', controlfield_value=recid)
    for tag in tags:
        field_instances = record_get_field_instances(record, \
                                                     tag[0:3], tag[3], tag[4])
        for field_instance in field_instances:
            correct_subfields = []
            for code, value in field_instance[0]:
                if code == 'i':
                    if value in IDs:
                        if VERBOSE:
                            print "Getting rid of %s from %s!" % (value, recid)
                        pass
                    else:
                        correct_subfields.append((code, value))
                else:
                    correct_subfields.append((code, value))

            record_add_field(correct_record, tag[0:3], tag[3], tag[4], \
                             subfields=correct_subfields)
    return print_rec(correct_record)
def create_xml(recid):
    correct_record = {}
    tag = '8564_'
    record = get_record(recid)
    flag = None
    record_add_field(record, '001', controlfield_value=str(recid))
    field_instances = record_get_field_instances(record, tag[0:3], tag[3], tag[4])
    correct_subfields = []
    for field_instance in field_instances:
        correct_subfields = []
#        print field_instance
        for c,v in field_instance[0]:
#            print c,v
            matchObj = re.search(r'inspirehep\.net/record/\d+/files/fermilab-thesis-.*?\.pdf', v, flags=re.IGNORECASE)
            if matchObj:
                print 'yes'
                flag = True
                correct_subfields.append(('y', 'Fulltext'))
            correct_subfields.append((c,v))
        record_add_field(correct_record, tag[0:3], tag[3], tag[4], \
            subfields=correct_subfields)
    if flag:
        return print_rec(correct_record)
    else:
        return None
def create_xml(recid):
    """
    Searches for duplicate instances of 773 and keeps the good one.
    """
    tag = '773__'
    tag_value = tag + 'p'
    journal = get_fieldvalues(recid, tag_value)
    if len(journal) == 2 and journal[0] == journal[1]:
        record = get_record(recid)
        correct_record = {}
        record_add_field(correct_record, '001', \
            controlfield_value=str(recid))
        field_instances = record_get_field_instances(record, \
                              tag[0:3], tag[3], tag[4])
        correct_subfields = []
        c_value = False
        for field_instance in field_instances:
            for code, value in field_instance[0]:
                if value == 'To appear in the proceedings of':
                    pass
                elif (code, value) not in correct_subfields:
                    if code == 'c':
                        if c_value:
                            if len(value) > len(c_value):
                                c_value = value
                        else:
                            c_value = value
                    else:
                        correct_subfields.append((code, value))
        if c_value:
            correct_subfields.append(('c', c_value))
        record_add_field(correct_record, tag[0:3], tag[3], tag[4], \
                    subfields=correct_subfields)
        return print_rec(correct_record)
    return None
Example #16
0
def create_xml(recid, tags):
    """Create xml file to replace to 100, 700 block."""

    record = get_record(recid)
    correct_record = {}
    record_add_field(correct_record, '001', controlfield_value=str(recid))
    flag = None
    for tag in tags:
        field_instances = record_get_field_instances(record, tag[0:3], \
                                                     tag[3], tag[4])
        correct_subfields = []
        for field_instance in field_instances:
            correct_subfields = []
            for code, value in field_instance[0]:
                if code == 'v':
                    try:
                        if VERBOSE:
                            print len(AFFILIATIONS_DONE)
                        affiliation_key = re.sub(r'\W+', ' ', value).upper()
                        if not affiliation_key in AFFILIATIONS_DONE:
                            new_values = get_aff(value)
                            AFFILIATIONS_DONE[affiliation_key] = new_values
                        for new_value in AFFILIATIONS_DONE[affiliation_key]:
                            correct_subfields.append(('u', \
                                                     new_value.lstrip(' ')))
                        flag = True
                    except TypeError:
                        pass
                correct_subfields.append((code, value))
            record_add_field(correct_record, tag[0:3], tag[3], tag[4], \
                             subfields=correct_subfields)
    if flag:
        return print_rec(correct_record)
def create_xml(recid):
    """
    Searches for duplicate instances of 773 and keeps the good one.
    """
    tag = '773__'
    tag_value = tag + 'p'
    journal = get_fieldvalues(recid, tag_value)
    if len(journal) == 2 and journal[0] == journal[1]:
        record = get_record(recid)
        correct_record = {}
        record_add_field(correct_record, '001', \
            controlfield_value=str(recid))
        field_instances = record_get_field_instances(record, \
                              tag[0:3], tag[3], tag[4])
        correct_subfields = []
        c_value = False
        for field_instance in field_instances:
            for code, value in field_instance[0]:
                if value == 'To appear in the proceedings of':
                    pass
                elif (code, value) not in correct_subfields:
                    if code == 'c':
                        if c_value:
                            if len(value) > len(c_value):
                                c_value = value
                        else:
                            c_value = value
                    else:
                        correct_subfields.append((code, value))
        if c_value:
            correct_subfields.append(('c', c_value))
        record_add_field(correct_record, tag[0:3], tag[3], tag[4], \
                    subfields=correct_subfields)
        return print_rec(correct_record)
    return None
Example #18
0
def check_record(record, source_field, new_field, subfield_filter):
    """ Changes the code of a field to new_field """
    from collections import namedtuple
    from invenio.bibrecord import (record_add_field, record_delete_field,
                                   record_get_field_instances)

    assert len(source_field) == 5
    assert len(new_field) == 5
    source_field = source_field.replace("_", " ")
    new_field = new_field.replace("_", " ")

    assert len(subfield_filter) == 2
    SubfieldFilter = namedtuple('SubfieldFilter', ['code', 'value'])
    subfield_filter = SubfieldFilter(*subfield_filter)

    def filter_passes(subfield_code, result):
        return subfield_filter.code is None or (
            subfield_filter.code in ('%', subfield_code)
            and subfield_filter.value == result)

    subfields_list = []
    for subfields, ind1, ind2, _, pos in record_get_field_instances(
            record, source_field[:3], source_field[3], source_field[4]):
        if any(filter_passes(*s) for s in subfields):
            subfields_list.append(subfields)
            record_delete_field(record, source_field[:3], ind1, ind2, pos)

    for subfields in subfields_list:
        record_add_field(record,
                         new_field[:3],
                         new_field[3],
                         new_field[4],
                         subfields=subfields)
        record.set_amended('move from %s to %s: %s' % (source_field.replace(
            " ", "_"), new_field.replace(" ", "_"), subfields))
def get_ids_from_recid(recid):
    record = get_record(recid)

    ## Retrieving DOI
    doi = ""
    dois = record_get_field_values(record, "024", "7", code="a")
    dois = [doi for doi in dois if doi.startswith("10.")]
    if len(dois) > 1:
        print >> sys.stderr, "WARNING: record %s have more than one DOI: %s" % (recid, dois)
    elif len(dois) == 1:
        doi = dois[0]

    ## Retrieving arXiv eprint
    eprint = ""
    eprints = record_get_field_values(record, "035", code="a")
    eprints = [
        an_eprint[len("oai:arXiv.org:") :] for an_eprint in eprints if an_eprint.lower().startswith("oai:arxiv.org:")
    ]
    if len(eprints) > 1:
        print >> sys.stderr, "WARNING: record %s have more than one arXiv eprint: %s" % (recid, eprints)
    elif len(eprints) == 1:
        eprint = eprints[0]

    ## Retrieving Other service ID
    other_id = ""
    for field in record_get_field_instances(record, "035"):
        subfields = dict(field_get_subfield_instances(field))
        if subfields.get("9", "").upper() == CFG_OTHER_SITE.upper() and subfields.get("a"):
            other_id = subfields["a"]
    reportnumbers = record_get_field_values(record, "037", code="a")
    return [str(recid), doi, eprint, other_id] + reportnumbers
def create_xmlrefs(recid):
    subrefs = [
        '%s,%i,' % (old_journal, x) for x in range(vol_change, vol_curr)
    ]
    record = get_record(recid)
    correct_record = {}
    record_add_field(correct_record, '001', controlfield_value=str(recid))
    field_instances = record_get_field_instances(record, '999', 'C', '5')
    correct_subfields = []
    for field_instance in field_instances:
        correct_subfields = []
        for code, value in field_instance[0]:
            if code == 's' and any(x for x in subrefs if x in value):
                newval = re.sub(old_journal, repl_journal, value)
                if VERBOSE:
                    print "%s: Replacing %s with %s" % (recid, value, newval)
                correct_subfields.append(('s', newval))
            else:
                correct_subfields.append((code, value))
        record_add_field(correct_record,
                         '999',
                         'C',
                         '5',
                         subfields=correct_subfields)
    return print_rec(correct_record)
Example #21
0
def check_records(records, field):
    for record in records:
        if field != '999C5s':
            for position, value in record.iterfields([field]):
                newval = value.replace('. ', '.')
                if newval != value:
                    record.amend_field(position, newval)
            continue
        for afield in record_get_field_instances(record, '999', 'C', '5'):
            subfields = field_get_subfield_instances(afield)
            subfields_dict = dict(subfields)
            if  's'in subfields_dict:
                old_pubnote = subfields_dict['s']
                new_pubnote = old_pubnote.replace('. ', '.')
                if old_pubnote != new_pubnote:
                    subfields.remove(('s', old_pubnote))
                    subfields.append(('s', new_pubnote))
                    if not '0' in subfields_dict:
                        recids = perform_request_search(p=new_pubnote, f='journal')
                        if len(recids) == 1:
                            recid = recids.pop()
                            subfields.append(('0', str(recid)))
                            record.set_amended("Pubnote changed from %s to %s and matched a new record %s: Sam is the best, HURRAY!!!" % (old_pubnote, new_pubnote, recid))
                            continue
                    record.set_amended("Pubnote changed from %s to %s" % (old_pubnote, new_pubnote))
    def tokenize(self, recID):
        phrases = []
        try:
            rec = get_record(recID)

            for rule in self.rules:
                tag_to_index, necessary_tag, necessary_value = rule
                core_tag = tag_to_index[0:3]
                ind = tag_to_index[3:5]
                sub_tag = tag_to_index[5]

                fields = [
                    dict(instance[0])
                    for instance in record_get_field_instances(
                        rec, core_tag, ind[0], ind[1])
                ]
                for field in fields:
                    tag_condition = necessary_tag and field.has_key(
                        necessary_tag) or necessary_tag == ''
                    value_condition = necessary_value and field.get(necessary_tag, '') == necessary_value or \
                                      necessary_value == ''
                    if tag_condition and field.has_key(
                            sub_tag) and value_condition:
                        phrases.append(field[sub_tag])
            return phrases
        except KeyError:
            return []
        return phrases
Example #23
0
def replace_references(recid):
    """Replace references for a record

    The record itself is not updated, the marc xml of the document with updated
    references is returned

    Parameters:
    * recid: the id of the record
    """
    # Parse references
    references_xml = extract_references_from_record_xml(recid)
    references = create_record(references_xml.encode("utf-8"))
    # Record marc xml
    record = get_record(recid)

    if references[0]:
        fields_to_add = record_get_field_instances(references[0], tag="999", ind1="%", ind2="%")
        # Replace 999 fields
        record_delete_fields(record, "999")
        record_add_fields(record, "999", fields_to_add)
        # Update record references
        out_xml = record_xml_output(record)
    else:
        out_xml = None

    return out_xml
Example #24
0
def generate_ticket(ticket, record):
    """
    Generates a ticket to be created, filling subject, body and queue values
    of the passed BibCatalogTicket object. The enriched object is returned.

    @param ticket: a ticket object as created by BibCatalogTicket() containing
                   the subject, body and queue to create a ticket in.
    @type ticket: record object of BibCatalogTicket.

    @param record: a recstruct object as created by bibrecord.create_record()
    @type record: record object of BibRecord.

    @return: the modified ticket object to create.
    @rtype: BibCatalogTicket
    """
    recid = record_id_from_record(record)
    subject = []

    # Add report number in the subjecet
    report_number = ""
    for report_tag in record_get_field_instances(record, "037"):
        for report_number in field_get_subfield_values(report_tag, 'a'):
            subject.append(report_number)
            break

    subject.append("(#%s)" % (recid,))
    text = 'Curate record here: %s/record/edit/#state=edit&recid=%s' % \
           (CFG_SITE_SECURE_URL, recid)

    ticket.subject = " ".join(subject)
    ticket.body = text.replace('%', '%%')
    ticket.queue = "HEP_curation"
    return ticket
Example #25
0
def check_records(records):
    from invenio.bibrank import ConfigParser, CFG_ETCDIR
    from invenio.bibrank_citation_indexer import get_recids_matching_query
    config = ConfigParser.ConfigParser()
    config.read("%s/bibrank/%s.cfg" % (CFG_ETCDIR, "citation"))
    for record in records:
        for field in record_get_field_instances(record, '999', 'C', '5'):
            subfields = field_get_subfield_instances(field)
            subfields_dict = dict(subfields)
            if '0' not in subfields_dict and 's' in subfields_dict:
                old_pubnote = subfields_dict['s']
                g = RE_BROKEN_PUBNOTES.match(old_pubnote)
                if g:
                    new_pubnote = '%(journal)s,%(volume)s,P%(id)s' % g.groupdict(
                    )
                    subfields.remove(('s', old_pubnote))
                    subfields.append(('s', new_pubnote))
                    recids = get_recids_matching_query(p=new_pubnote,
                                                       f='journal',
                                                       config=config)
                    if len(recids) == 1:
                        recid = recids.pop()
                        subfields.append(('0', str(recid)))
                        record.set_amended(
                            "Pubnote changed from %s to %s and matched a new record %s: Sam is the best, HURRAY!!!"
                            % (old_pubnote, new_pubnote, recid))
                    else:
                        record.set_amended("Pubnote changed from %s to %s" %
                                           (old_pubnote, new_pubnote))
def check_records(records, field):
    for record in records:
        if field != '999C5s':
            for position, value in record.iterfields([field]):
                newval = value.replace('. ', '.')
                if newval != value:
                    record.amend_field(position, newval)
            continue
        for afield in record_get_field_instances(record, '999', 'C', '5'):
            subfields = field_get_subfield_instances(afield)
            subfields_dict = dict(subfields)
            if 's' in subfields_dict:
                old_pubnote = subfields_dict['s']
                new_pubnote = old_pubnote.replace('. ', '.')
                if old_pubnote != new_pubnote:
                    subfields.remove(('s', old_pubnote))
                    subfields.append(('s', new_pubnote))
                    if not '0' in subfields_dict:
                        recids = perform_request_search(p=new_pubnote,
                                                        f='journal')
                        if len(recids) == 1:
                            recid = recids.pop()
                            subfields.append(('0', str(recid)))
                            record.set_amended(
                                "Pubnote changed from %s to %s and matched a new record %s: Sam is the best, HURRAY!!!"
                                % (old_pubnote, new_pubnote, recid))
                            continue
                    record.set_amended("Pubnote changed from %s to %s" %
                                       (old_pubnote, new_pubnote))
Example #27
0
def translate_fieldvalues_from_latex(record, tag, code='', encoding='utf-8'):
    """
    Given a record and field tag, this function will modify the record by
    translating the subfield values of found fields from LaTeX to chosen
    encoding for all the subfields with given code (or all if no code is given).

    @param record: record to modify, in BibRec style structure
    @type record: dict

    @param tag: tag of fields to modify
    @type tag: string

    @param code: restrict the translation to a given subfield code
    @type code: string

    @param encoding: scharacter encoding for the new value. Defaults to UTF-8.
    @type encoding: string
    """
    field_list = record_get_field_instances(record, tag)
    for field in field_list:
        subfields = field[0]
        subfield_index = 0
        for subfield_code, subfield_value in subfields:
            if code == '' or subfield_code == code:
                newvalue = translate_latex2unicode(subfield_value).encode(encoding)
                record_modify_subfield(record, tag, subfield_code, newvalue, \
                                       subfield_index, field_position_global=field[4])
            subfield_index += 1
Example #28
0
def translate_fieldvalues_from_latex(record, tag, code='', encoding='utf-8'):
    """
    Given a record and field tag, this function will modify the record by
    translating the subfield values of found fields from LaTeX to chosen
    encoding for all the subfields with given code (or all if no code is given).

    @param record: record to modify, in BibRec style structure
    @type record: dict

    @param tag: tag of fields to modify
    @type tag: string

    @param code: restrict the translation to a given subfield code
    @type code: string

    @param encoding: scharacter encoding for the new value. Defaults to UTF-8.
    @type encoding: string
    """
    field_list = record_get_field_instances(record, tag)
    for field in field_list:
        subfields = field[0]
        subfield_index = 0
        for subfield_code, subfield_value in subfields:
            if code == '' or subfield_code == code:
                newvalue = translate_latex2unicode(subfield_value).encode(
                    encoding)
                record_modify_subfield(record, tag, subfield_code, newvalue, \
                                       subfield_index, field_position_global=field[4])
            subfield_index += 1
Example #29
0
def record_get_value_with_provenence(record,
                                     tag,
                                     ind1=" ",
                                     ind2=" ",
                                     value_code="",
                                     provenence_code="9",
                                     provenence_value="arXiv"):
    """
    Retrieves the value of the field with given provenence.
    """
    fields = record_get_field_instances(record, tag, ind1, ind2)
    final_values = []
    for subfields, dummy1, dummy2, dummy3, dummy4 in fields:
        for code, value in subfields:
            if code == provenence_code and value == provenence_value:
                # We have a hit. Stop to look for right value
                break
        else:
            # No hits.. continue to next field
            continue
        for code, value in subfields:
            if code == value_code:
                # This is the value we are looking for with the correct provenence
                final_values.append(value)
    return final_values
Example #30
0
def generate_ticket(ticket, record):
    """
    Generates a ticket to be created, filling subject, body and queue values
    of the passed BibCatalogTicket object. The enriched object is returned.

    @param ticket: a ticket object as created by BibCatalogTicket() containing
                   the subject, body and queue to create a ticket in.
    @type ticket: record object of BibCatalogTicket.

    @param record: a recstruct object as created by bibrecord.create_record()
    @type record: record object of BibRecord.

    @return: the modified ticket object to create.
    @rtype: BibCatalogTicket
    """
    recid = record_id_from_record(record)
    subject = []

    # Add report number in the subjecet
    report_number = ""
    for report_tag in record_get_field_instances(record, "037"):
        for report_number in field_get_subfield_values(report_tag, 'a'):
            subject.append(report_number)
            break

    subject.append("(#%s)" % (recid, ))
    text = 'Curate record here: %s/record/edit/#state=edit&recid=%s' % \
           (CFG_SITE_SECURE_URL, recid)

    ticket.subject = " ".join(subject)
    ticket.body = text.replace('%', '%%')
    ticket.queue = "HEP_curation"
    return ticket
Example #31
0
def record_get_value_with_provenence(record, provenence_value, provenence_code,
                                     tag, ind1=" ", ind2=" ", code=""):
    """
    Retrieves the value of the given field(s) with given provenence code/value
    combo.

    For example:

    If one would like to extract all subject categories (65017 $a) with a given
    provenence, in this case "arXiv" in $9:

    65017 $ahep-ph$9arXiv
    65017 $ahep-th$9arXiv
    65017 $aMath$9INSPIRE

    this function would return ["hep-ph", "hep-th"]

    Returns a list of subfield values.
    """
    fields = record_get_field_instances(record, tag, ind1, ind2)
    final_values = []
    for subfields, dummy1, dummy2, dummy3, dummy4 in fields:
        for subfield_code, value in subfields:
            if subfield_code == provenence_code and value == provenence_value:
                # We have a hit. Stop to look for right value
                break
        else:
            # No hits.. continue to next field
            continue
        for subfield_code, value in subfields:
            if subfield_code == code:
                # This is the value we are looking for with the correct provenence
                final_values.append(value)
    return final_values
def create_xml(recid, tags, experiment):
    record = get_record(recid)
    correct_record = {}
    record_add_field(correct_record, '001', controlfield_value=str(recid))
    flag = None
    for tag in tags:
        field_instances = record_get_field_instances(record, tag[0:3],
                                                     tag[3], tag[4])
        correct_subfields = []
        for field_instance in field_instances:
            correct_subfields = []
            for code, value in field_instance[0]:
                if code == 'a':
                    search = 'find a ' + value + ' and exp ' + experiment
                    new_value = convert_search_to_inspire_id(search)
                    if new_value[0]:
                        flag = True
                        correct_subfields.append(('i', new_value[0]))
                    if new_value[1]:
                        flag = True
                        orcid_value = 'ORCID:' + new_value[1]
                        correct_subfields.append(('j', orcid_value))
                correct_subfields.append((code, value))
            record_add_field(correct_record, tag[0:3], tag[3], tag[4],
                             subfields=correct_subfields)
    #return print_rec(correct_record)
    if flag:
        #print print_rec(correct_record)
        return print_rec(correct_record)
Example #33
0
def format_element(bfo, limit, separator=' ; ', extension='[...]', print_links="yes"):
    """
    Prints the list of editors of a record.

    @param limit: the maximum number of editors to display
    @param separator: the separator between editors.
    @param extension: a text printed if more editors than 'limit' exist
    @param print_links: if yes, print the editors as HTML link to their publications
    """
    from urllib import quote
    from invenio.config import CFG_BASE_URL
    from invenio import bibrecord

    authors = bibrecord.record_get_field_instances(bfo.get_record(), '100')

    editors = [bibrecord.field_get_subfield_values(author, 'a')[0]
               for author in authors if len(bibrecord.field_get_subfield_values(author, "e")) > 0 and bibrecord.field_get_subfield_values(author, "e")[0]=="ed." ]

    if print_links.lower() == "yes":
        editors = ['<a href="' + CFG_BASE_URL + '/search?f=author&p=' + \
                   quote(editor) + \
                   '&amp;ln='+ bfo.lang + \
                   '">' + editor + '</a>'
                   for editor in editors]

    if limit.isdigit() and len(editors) > int(limit):
        return separator.join(editors[:int(limit)]) + extension

    elif len(editors) > 0:
        return separator.join(editors)
Example #34
0
def check_records(records):
    from invenio.bibrank import ConfigParser, CFG_ETCDIR
    from invenio.bibrank_citation_indexer import get_recids_matching_query
    config = ConfigParser.ConfigParser()
    config.read("%s/bibrank/%s.cfg" % (CFG_ETCDIR, "citation"))
    for record in records:
        for field in record_get_field_instances(record, '999', 'C', '5'):
            subfields = field_get_subfield_instances(field)
            subfields_dict = dict(subfields)
            if '0' not in subfields_dict and 's' in subfields_dict:
                old_pubnote = subfields_dict['s']
                g = RE_BROKEN_PUBNOTES.match(old_pubnote)
                if g:
                    new_pubnote = '%(journal)s,%(volume)s,P%(id)s' % g.groupdict()
                    subfields.remove(('s', old_pubnote))
                    subfields.append(('s', new_pubnote))
                    recids = get_recids_matching_query(p=new_pubnote,
                                                    f='journal',
                                                    config=config)
                    if len(recids) == 1:
                        recid = recids.pop()
                        subfields.append(('0', str(recid)))
                        record.set_amended("Pubnote changed from %s to %s and matched a new record %s: Sam is the best, HURRAY!!!" % (old_pubnote, new_pubnote, recid))
                    else:
                        record.set_amended("Pubnote changed from %s to %s" % (old_pubnote, new_pubnote))
Example #35
0
def create_xml(recid, tags):
    """Create xml file to replace to 100, 700 block."""

    record = get_record(recid)
    correct_record = {}
    record_add_field(correct_record, '001', controlfield_value=str(recid))
    flag = None
    for tag in tags:
        field_instances = record_get_field_instances(record, tag[0:3], \
                                                     tag[3], tag[4])
        correct_subfields = []
        for field_instance in field_instances:
            correct_subfields = []
            for code, value in field_instance[0]:
                if code == 'v':
                    try:
                        if VERBOSE:
                            print len(AFFILIATIONS_DONE)
                        affiliation_key = re.sub(r'\W+', ' ', value).upper()
                        if not affiliation_key in AFFILIATIONS_DONE:
                            new_values = get_aff(value)
                            AFFILIATIONS_DONE[affiliation_key] = new_values
                        for new_value in AFFILIATIONS_DONE[affiliation_key]:
                            correct_subfields.append(('u', \
                                                     new_value.lstrip(' ')))
                        flag = True
                    except TypeError:
                        pass
                correct_subfields.append((code, value))
            record_add_field(correct_record, tag[0:3], tag[3], tag[4], \
                             subfields=correct_subfields)
    if flag:
        return print_rec(correct_record)
Example #36
0
def create_xml(recid, correction_dict):
    """Fix the citations of Fermilab reports."""

    tags = [REF]
    record = get_record(recid)
    correct_record = {}
    record_add_field(correct_record, '001', controlfield_value=str(recid))
    flag = False
    for (tag, field_instance) in \
            [(tag, field_instance) for tag in tags \
             for field_instance in record_get_field_instances(record, \
             tag[0:3], tag[3], tag[4])]:
        correct_subfields = []
        for code, value in field_instance[0]:
            if code == 'r' and value.upper() in correction_dict:
                print 'Was:', value
                value = correction_dict[value.upper()]
                print 'Now:', value
                flag = True
            correct_subfields.append((code, value))
        record_add_field(correct_record, tag[0:3], tag[3], tag[4], \
                         subfields=correct_subfields)
    if flag:
        return print_rec(correct_record)
    else:
        return None
Example #37
0
def replace_references(recid):
    """Replace references for a record

    The record itself is not updated, the marc xml of the document with updated
    references is returned

    Parameters:
    * recid: the id of the record
    """
    # Parse references
    references_xml = extract_references_from_record_xml(recid)
    references = create_record(references_xml.encode('utf-8'))
    # Record marc xml
    record = get_record(recid)

    if references[0]:
        fields_to_add = record_get_field_instances(references[0],
                                                   tag='999',
                                                   ind1='%',
                                                   ind2='%')
        # Replace 999 fields
        record_delete_fields(record, '999')
        record_add_fields(record, '999', fields_to_add)
        # Update record references
        out_xml = record_xml_output(record)
    else:
        out_xml = None

    return out_xml
def create_xml(recid):
    record = get_record(recid)
    correct_record = {}
    record_add_field(correct_record, '001', controlfield_value=str(recid))
    field_instances = record_get_field_instances(record, tag[0:3],
                                                     tag[3], tag[4])
    correct_subfields = []
    for field_instance in field_instances:
        correct_subfields = []
        for code, value in field_instance[0]:
            if volume_letter:
                if code == 'p':
                    correct_subfields.append(('p', repl_journal))
                elif code == 'v':
                    volume = get_fieldvalues(recid, '773__v')
                    for v in volume:
                        if v[0].isalpha():
                            correct_subfields.append(('v', v))
                        else: 
                            new_volume = volume_letter + v
                            correct_subfields.append(('v', new_volume))
                else:
                    correct_subfields.append((code, value))
            else:
                if code == 'p':
                    correct_subfields.append(('p', repl_journal))
                else:
                    correct_subfields.append((code, value))
        record_add_field(correct_record, tag[0:3], tag[3], tag[4],
                             subfields=correct_subfields)
    return print_rec(correct_record)
Example #39
0
def _create_ticket(recid, bibcatalog_system, queue):
    subject = "Refs for #%s" % recid

    if CFG_INSPIRE_SITE:
        # Add report number in the subjecet
        report_number = ""
        record = get_bibrecord(recid)

        in_core = False
        for collection_tag in record_get_field_instances(record, "980"):
            for collection in field_get_subfield_values(collection_tag, 'a'):
                if collection == 'CORE':
                    in_core = True
                if collection == 'arXiv':
                    # Do not create tickets for arxiv papers
                    # Tickets for arxiv papers are created in bibcatelog
                    write_message("arXiv paper", verbose=1)
                    return

        # Only create tickets for HEP
        if not in_core:
            write_message("not in hep", verbose=1)
            return

        # Do not create tickets for old records
        creation_date = run_sql("""SELECT creation_date FROM bibrec
                                   WHERE id = %s""", [recid])[0][0]
        if creation_date < datetime.now() - timedelta(days=30*4):
            return

        for report_tag in record_get_field_instances(record, "037"):
            for category in field_get_subfield_values(report_tag, 'c'):
                if category.startswith('astro-ph'):
                    write_message("astro-ph", verbose=1)
                    # We do not curate astro-ph
                    return

            for report_number in field_get_subfield_values(report_tag, 'a'):
                subject += " " + report_number
                break

    text = '%s/record/edit/#state=edit&recid=%s' % (CFG_SITE_SECURE_URL,
                                                    recid)
    bibcatalog_system.ticket_submit(subject=subject,
                                    queue=queue,
                                    text=text,
                                    recordid=recid)
Example #40
0
def check_existing_pdg_fields(recids, pdg_data, current_records):
    _print_out("Comparing new and old PDG data for " + str(len(recids)) +
               " records...")
    records = {}
    for recid in recids:
        record_mod = {}
        record_mod['001'] = deepcopy(current_records[recid]['001'])
        record_mod['084'] = deepcopy(current_records[recid]['084'])
        fields = record_get_field_instances(record_mod, '084')
        current_pdg_data = []
        for field in fields:
            if is_pdg_field(field):
                current_pdg_data.append(
                    field_get_subfield_values(field, 'a')[0])

        current_set = set(current_pdg_data)
        new_set = set(pdg_data[recid])
        deletions = list(current_set - new_set)
        additions = list(new_set - current_set)

        if len(deletions) > 0 or len(additions) > 0:
            if len(deletions) > 0:
                for field in fields:
                    if is_pdg_field(field):
                        if field_get_subfield_values(field,
                                                     'a')[0] in deletions:
                            record_delete_field(record_mod,
                                                '084',
                                                ind1=' ',
                                                ind2=' ',
                                                field_position_global=field[4])

            for pdg_field in additions:
                position = record_add_field(record_mod, '084', ' ', ' ')
                record_add_subfield_into(record_mod,
                                         '084',
                                         '2',
                                         'PDG',
                                         field_position_global=position)
                record_add_subfield_into(record_mod,
                                         '084',
                                         '9',
                                         'PDG',
                                         field_position_global=position)
                record_add_subfield_into(record_mod,
                                         '084',
                                         'a',
                                         pdg_field,
                                         field_position_global=position)

            records[recid] = record_mod
            _print_verbose("Record #" + str(recid) + ": " +
                           str(len(deletions)) + " deletions and " +
                           str(len(additions)) + " additons.")
        else:
            _print_verbose("Nothing to change for record #" + str(recid))

    _print_out(str(len(records)) + " records to be corrected.")
    return records
Example #41
0
    def check_arxiv(recid):
        record = get_record(recid)

        for report_tag in record_get_field_instances(record, "037"):
            for category in field_get_subfield_values(report_tag, 'a'):
                if category.startswith('arXiv'):
                    return True
        return False
Example #42
0
    def check_arxiv(recid):
        record = get_record(recid)

        for report_tag in record_get_field_instances(record, "037"):
            for category in field_get_subfield_values(report_tag, 'a'):
                if category.startswith('arXiv'):
                    return True
        return False
def create_xml(recid, arxiv_ids):
    old_record = get_record(recid)
    attached_files = record_get_field_instances(old_record, tag='856', ind1='4')
    fields_to_add = [f for f in attached_files if check_arxiv_url(f, arxiv_ids)]
    record = {}
    record_add_field(record, '001', controlfield_value=str(recid))
    record_add_fields(record, '856', fields_to_add)
    return print_rec(record)
Example #44
0
def create_xml(recid, fname=None, oaff=None):
    affs = [a for a in oaff]
    record = get_record(recid)
    auth_location = record_get_field_instances(record, '100', '', '')[0][4]
    record_delete_field(record, '700', '', '')
    for x in affs:
        record_add_subfield_into(record, '100', 'u', x, field_position_global=auth_location)
    return print_rec(record)
Example #45
0
def record_in_collection(record, collection):
    """
    Returns True/False if given record is in a given collection (980__a).
    """
    for collection_tag in record_get_field_instances(record, "980"):
        for coll in field_get_subfield_values(collection_tag, 'a'):
            if coll.lower() == collection.lower():
                return True
    return False
Example #46
0
def check_records(records):
    for record in records:
        for field in record_get_field_instances(
                record, '100') + record_get_field_instances(record, '700'):
            subfields = field_get_subfield_instances(field)
            subfields_dict = dict(subfields)
            if 'a' in subfields_dict and subfields_dict['a'] in CHANGES:
                if 'i' in subfields_dict and subfields_dict['i'] != CHANGES[
                        subfields_dict['a']]:
                    record.set_invalid(
                        "Author %s should have INSPIRE ID %s but has already INSPIRE ID %s"
                        % (subfields_dict['a'], CHANGES[subfields_dict['a']],
                           subfields_dict['i']))
                elif not 'i' in subfields_dict:
                    subfields.append(('i', CHANGES[subfields_dict['a']]))
                    record.set_amended(
                        "Added INSPIRE ID %s to author %s" %
                        (CHANGES[subfields_dict['a']], subfields_dict['a']))
Example #47
0
def record_in_collection(record, collection):
    """
    Returns True/False if given record is in a given collection (980__a).
    """
    for collection_tag in record_get_field_instances(record, "980"):
        for coll in field_get_subfield_values(collection_tag, 'a'):
            if coll.lower() == collection.lower():
                return True
    return False
def rollback_record(recid):
        print 'id', recid
        for rev in get_record_revision_ids(recid):
            old_record = create_record(get_marcxml_of_revision_id(rev))
            fields_to_add = record_get_field_instances(old_record[0], tag='520')
            if fields_to_add:
                print 'reverting to', rev
                return create_our_record(recid, fields_to_add)
        print 'FAILED', recid
Example #49
0
def main():
    matchObj = re.match('^(\s)', journal)
    if matchObj:
        filename = 'tmp_' + matchObj.group(1) + '_' + re.sub(
            '.py', '.html', __file__)
    else:
        filename = 'tmp_' + re.sub('.py', '.html', __file__)
    if TEST:
        print "Testing mode...."
    else:
        print "Checking records in this search: %s" % search
    check_these_records = []
    x = perform_request_search(p=search, cc='HEP')
    if len(x) > 0:
        if VERBOSE:
            print "%i records in search" % len(x)
        output = open(filename, 'w')
        for r in x:
            if VERBOSE:
                print "Working on record %i" % r
            record = get_record(r)
            ptep_field_instances = []
            field_instances = record_get_field_instances(record, \
    tag[0:3], tag[3], tag[4])
            for field_instance in field_instances:
                #                if TEST:
                #                    print "field_instance: ", field_instance
                for (code, value) in field_instance[0]:
                    if journal in value:
                        if TEST:
                            print "suspect field_instance[0]: ", field_instance[
                                0]
                        ptep_field_instances.append(field_instance[0])
            for item in ptep_field_instances:
                if any('r' in code for code in item) or any('0' in code
                                                            for code in item):
                    if TEST:
                        print "'r' or '0' in item:", item
                else:
                    if VERBOSE:
                        print "Found a record that needs checking: %i" % r
                    check_these_records.append(r)
        if check_these_records:
            check_these_records = sorted(set(check_these_records))
            if VERBOSE:
                print "%i records of %i total in search should be checked" % (
                    len(check_these_records), len(x))
            check_these_records = [
                '<a href="https://inspirehep.net/record/edit/?ln=en#state=edit&recid=%i">%i</a><br />'
                % (r, r) for r in check_these_records
            ]
            output.writelines(check_these_records)
        output.close()
    else:
        if VERBOSE:
            print "No results in search"
def create_our_record(recid):
    old_record = get_record(recid)
    instances = record_get_field_instances(old_record, '980')
    new_instances = [l.field for l in set(OurInstance(i) for i in instances
                     if field_get_subfield_instances(i) != [('a', 'unknown')])]

    record = {}
    record_add_field(record, '001', controlfield_value=str(recid))
    record_add_fields(record, '980', new_instances)
    return print_rec(record)
def get_rn(revision):
    rns = set()
    record = create_record(get_marcxml_of_revision_id(revision))[0]
    fields = record_get_field_instances(record, tag='999', ind1='C', ind2='5')
    for f in fields:
        subfields = field_get_subfield_instances(f)
        for index, s in enumerate(subfields):
            if s[0] == 'r':
                rns.add(tag_arxiv_more(s[1]))
    return rns
Example #52
0
def get_photolab_image_caption(record, imageID):
    """
    Get the caption for the given image
    """
    elements = record_get_field_instances(record, tag=CFG_MA_CAPTION_TAG)
    for element in elements:
        current_values = dict(element[0])
        if current_values.get(CFG_MA_CAPTION_SUBFIELD_ID, -1) == imageID:
            return current_values.get(CFG_MA_CAPTION_SUBFIELD_CONTENT, '')
    return ''
Example #53
0
def replace_references(recid, uid=None, txt=None, url=None):
    """Replace references for a record

    The record itself is not updated, the marc xml of the document with updated
    references is returned

    Parameters:
    * recid: the id of the record
    * txt: references in text mode
    * inspire: format of ther references
    """
    # Parse references
    if txt is not None:
        references_xml = extract_references_from_string_xml(
            txt, is_only_references=True)
    elif url is not None:
        references_xml = extract_references_from_url_xml(url)
    else:
        references_xml = extract_references_from_record_xml(recid)
    references = create_record(references_xml.encode('utf-8'))

    dummy1, dummy2, record, dummy3, dummy4, dummy5, dummy6 = get_cache_file_contents(
        recid, uid)
    out_xml = None

    references_to_add = record_get_field_instances(references[0],
                                                   tag='999',
                                                   ind1='C',
                                                   ind2='5')
    refextract_status = record_get_field_instances(references[0],
                                                   tag='999',
                                                   ind1='C',
                                                   ind2='6')

    if references_to_add:
        # Replace 999 fields
        record_delete_fields(record, '999')
        record_add_fields(record, '999', references_to_add)
        record_add_fields(record, '999', refextract_status)
        # Update record references
        out_xml = record_xml_output(record)

    return out_xml
Example #54
0
def record_find_matching_fields(key,
                                rec,
                                tag="",
                                ind1=" ",
                                ind2=" ",
                                exact_match=False):
    """
    This utility function will look for any fieldvalues containing or equal
    to, if exact match is wanted, given keyword string. The found fields will be
    returned as a list of field instances per tag. The fields to search can be
    narrowed down to tag/indicator level.

    @param key: keyword to search for
    @type key: string

    @param rec: a record structure as returned by bibrecord.create_record()
    @type rec: dict

    @param tag: a 3 characters long string
    @type tag: string

    @param ind1: a 1 character long string
    @type ind1: string

    @param ind2: a 1 character long string
    @type ind2: string

    @return: a list of found fields in a tuple per tag: (tag, field_instances) where
        field_instances is a list of (Subfields, ind1, ind2, value, field_position_global)
        and subfields is list of (code, value)
    @rtype: list
    """
    if not tag:
        all_field_instances = rec.items()
    else:
        all_field_instances = [
            (tag, record_get_field_instances(rec, tag, ind1, ind2))
        ]
    matching_field_instances = []
    for current_tag, field_instances in all_field_instances:
        found_fields = []
        for field_instance in field_instances:
            # Get values to match: controlfield_value + subfield values
            values_to_match = [field_instance[3]] + \
                              [val for dummy_code, val in field_instance[0]]
            if exact_match and key in values_to_match:
                found_fields.append(field_instance)
            else:
                for value in values_to_match:
                    if value.find(key) > -1:
                        found_fields.append(field_instance)
                        break
        if len(found_fields) > 0:
            matching_field_instances.append((current_tag, found_fields))
    return matching_field_instances
Example #55
0
def handle_tags(recid, tags, d):
    record = get_record(recid)
    correct_record = {}
    need_email = False
    need_author = False

    for tag in tags:
        original_tag = tag
        field_instances = \
            record_get_field_instances(record, tag[0:3], tag[3], tag[4])
        correct_subfields = []
        #correct_subfields_aff = []
        for field_instance in field_instances:
            correct_record = {}
            correct_subfields = []
            for code, value in field_instance[0]:
                if code == 'm' or code == 'u':
                    tag = '371__'
                    if code == 'u': code = 'a'
                    if code == 'm' and not value in list_of_emails:
                        list_of_emails.append(value)
                        inHepnames_email = get_hepnames_recid_from_email(value)
                        if verbose: print 'inHepnames_email=', inHepnames_email
                        #if not inHepnames_email: need_email = value
                else:
                    tag = original_tag
                if tag == '700__' : tag = '100__'
                if code != 'v':
                    correct_subfields = [(code, value)]
                if  tag == '371__':
                    correct_subfields.append(('z', 'current'))
                if code == 'a' and tag == '100__' and not value in list_of_authors:              
                    list_of_authors.append(value)
                    nicename = re.sub(r'(.*)\, (.*)',r'\2 \1',value)                    
                    correct_subfields.append(('q', nicename))
                    search = "find a " + value
                    search = search + " or ea " + value
                    inHepnames_author = \
                        perform_request_search(p=search, cc='HepNames')
                    if verbose: print 'inHepnames_author=', inHepnames_author
                    if not inHepnames_author: need_author = True
                    if re.search(r"'",value): need_author = False
                if code == 'i' : need_author = False
                record_add_field(correct_record, tag[0:3], tag[3], tag[4], \
                    subfields=correct_subfields)
            if d:
                correct_record.update(d)
                if need_author or need_email:
                    if verbose and inHepnames_author:
                        print "Margaret: This author is already in", \
                               inHepnames_author, need_email
                    print print_rec(correct_record)
                    need_email = False
                    need_author = False
    return correct_record
    def process_record(self, record):
        """@see: BaseFieldCommand.process_record"""

        # if the tag is empty, we don't make any changes
        if self._tag == "" or self._tag == None:
            return

        matching_field_instances = \
            bibrecord.record_get_field_instances(record, self._tag,
                                                 self._ind1, self._ind2)
        for current_field in matching_field_instances:
            self._apply_subfield_commands_to_field(record, current_field[4])
Example #57
0
def create_xml(recid, fname=None, oaff=None):
    affs = [a for a in oaff]
    record = get_record(recid)
    auth_location = record_get_field_instances(record, '100', '', '')[0][4]
    record_delete_field(record, '700', '', '')
    for x in affs:
        record_add_subfield_into(record,
                                 '100',
                                 'u',
                                 x,
                                 field_position_global=auth_location)
    return print_rec(record)