Exemple #1
0
def national_authors_list(req, search_country):
    req.content_type = 'text/csv; charset=utf-8'
    req.headers_out['content-disposition'] = ('attachment; '
                                              'filename=national_authors_list.csv')
    ids = perform_request_search(p="country:'%s'" % (search_country,))
    req.write("#;RECID;Title;Creation date;Publisher;Total # of authors;Authors name(given country only);Authors country;Authors affiliations\n")

    for number, recid in enumerate(ids):
        title = record_get_field_value(get_record(recid), '245', code="a")
        del_date = get_creation_date(recid)
        publisher = record_get_field_value(get_record(recid), '980', code="b")
        rec = get_record(recid)

        authors = []
        author_count = 0
        for f in ['100', '700']:
            if f in rec:
                for auth in rec[f]:
                    author_count += 1
                    aff = ''
                    name = ''
                    country = ''
                    hit = 0
                    for subfield, value in auth[0]:
                        if subfield == 'a':
                            name = value
                        if subfield in ['v', 'u']:
                            if aff:
                                aff += ', ' + value
                            else:
                                aff = value
                        if subfield == 'w':
                            if country:
                                country += ', ' + value
                            else:
                                country = value
                            if search_country in value:
                                hit = 1

                    if hit:
                        authors.append({'name': name,
                                        'affiliation': aff.replace('\n',''),
                                        'country': country})

        for i, author in enumerate(authors):
            if i == 0:
                req.write("%s;%s;%s;%s;%s;%s;%s;%s;%s\n" % (number+1, recid, title.replace('\n',''), del_date, publisher, author_count, author['name'], author['country'], author['affiliation']))
            else:
                req.write(";;;;;;%s;%s;%s\n" % (author['name'], author['country'], author['affiliation']))
def index(req):
    req.content_type = 'text/csv; charset=utf-8'
    req.headers_out['content-disposition'] = ('attachment; filename=scoap3_records_info.csv')

    req.write("SCOAP3 record id; Journal; Creation date; Modification date; Title; Authors; Publication info\n")
    for key, value in JOURNALS.iteritems():
        recids = perform_request_search(c=value)
        for recid in recids:
            rec = get_record(recid)
            title = rec['245'][0][0][0][1].strip()
            creation_date = get_creation_date(recid)
            modification_date = get_modification_date(recid)
            authors = rec['100'][0][0][0][1]
            if '700' in rec:
                for author in rec['700']:
                    authors += ' / %s' % (author[0][0][1])
            publication_info = ''
            if '733' in rec:
                publication_info += "%s %s (%s) %s" % (rec['733'][0][0][0][1], rec['733'][0][0][1][1], rec['733'][0][0][2][1], rec['733'][0][0][3][1])
            if '024' in rec:
                publication_info += " %s" % (rec['024'][0][0][0][1],)
            if '037' in rec:
                publication_info += " %s" % (rec['037'][0][0][0][1],)


            req.writeline("%s; %s; %s; %s; %s; %s; %s\n" % (recid,
                                                            value,
                                                            creation_date,
                                                            modification_date,
                                                            title,
                                                            authors,
                                                            publication_info))
Exemple #3
0
def get_recids_changes(last_recid, max_recs=10000):

    search_op = '>'

    if last_recid == -1:
        l = list(dbquery.run_sql("SELECT id FROM bibrec ORDER BY creation_date ASC LIMIT 1"))
        search_op = '>='
    else:
        # let's make sure we have a valid recid (or get the close valid one)
        l = list(dbquery.run_sql("SELECT id FROM bibrec WHERE id >= %s LIMIT 1", (last_recid,)))
        if not len(l):
            return
    last_recid = l[0][0]

    # there is not api to get this (at least i haven't found it)
    mod_date = search_engine.get_modification_date(last_recid, fmt="%Y-%m-%d %H:%i:%S")
    if not mod_date:
        return
    modified_records = list(dbquery.run_sql("SELECT id,modification_date, creation_date FROM bibrec "
                    "WHERE modification_date " + search_op + "%s LIMIT %s", (mod_date, max_recs )))

    out = {'DELETED': [], 'CHANGED': [], 'ADDED': []}
    for recid, mod_date, create_date in modified_records:
        if mod_date == create_date:
            out['ADDED'].append(recid)
        else:
            rec = search_engine.get_record(recid)
            status = bibrecord.record_get_field_value(rec, tag='980', code='c')
            if status == 'DELETED':
                out['DELETED'].append(recid)
            else:
                out['CHANGED'].append(recid)
    return out
def create_xml(recid=None, osti_id=None, doi=None):
    osti_exists = False
    doi_exists = False
    osti_mismatch = False
    mismatches = []
    osti_subfields = [('9', 'OSTI'), ('a', osti_id)]
    record = get_record(recid)
    record_link = '<a href="http://inspirehep.net/record/%s">%s</a>' % (str(recid),str(recid))
    append_record = {}
    additions = False
    errors = None
    for item in BibFormatObject(recid).fields('035__'):
        if item.has_key('9') and item.has_key('a'):
            if item['9'] == 'OSTI' and item['a'] == osti_id:
                osti_exists = True
            elif item['9'] == 'OSTI' and item['a'] != osti_id:
                osti_mismatch = True
                mismatches.append(item['a'])
    for item in BibFormatObject(recid).fields('0247_'):
        if item.has_key('2') and item.has_key('a'):
            if item['2'] == 'DOI' and item['a'] == doi:
                doi_exists = True
    if osti_exists is False and osti_mismatch is True:
        print str(recid), "already has a different OSTI ID"
        errors = "doi %s in record %s should match OSTI ID %s, but the record already contains OSTI ID(s) %s<br />" % (doi, record_link, osti_id, ','.join(mismatches))
        return errors
    if doi_exists is False and osti_exists is True:
        print str(recid), "contains an OSTI ID but no doi"
        no_doi = "%s contains OSTI ID %s but not doi %s<br />"  % (record_link, osti_id, doi)
        return no_doi
    if osti_exists is False and osti_mismatch is False:
        record_add_field(append_record, '001', controlfield_value=str(recid))
        record_add_field(append_record, '035', '', '', subfields=osti_subfields)
        print "%s: added 035__a:%s" % (str(recid), osti_id)
        return print_rec(append_record)
def create_xml(recid):
    correct_record = {}
    tag = '8564_'
    record = get_record(recid)
    flag = None
    record_add_field(record, '001', controlfield_value=str(recid))
    field_instances = record_get_field_instances(record, tag[0:3], tag[3], tag[4])
    correct_subfields = []
    for field_instance in field_instances:
        correct_subfields = []
#        print field_instance
        for c,v in field_instance[0]:
#            print c,v
            matchObj = re.search(r'inspirehep\.net/record/\d+/files/fermilab-thesis-.*?\.pdf', v, flags=re.IGNORECASE)
            if matchObj:
                print 'yes'
                flag = True
                correct_subfields.append(('y', 'Fulltext'))
            correct_subfields.append((c,v))
        record_add_field(correct_record, tag[0:3], tag[3], tag[4], \
            subfields=correct_subfields)
    if flag:
        return print_rec(correct_record)
    else:
        return None
def create_xml(recid):
    """
    Searches for duplicate instances of 773 and keeps the good one.
    """
    tag = '773__'
    tag_value = tag + 'p'
    journal = get_fieldvalues(recid, tag_value)
    if len(journal) == 2 and journal[0] == journal[1]:
        record = get_record(recid)
        correct_record = {}
        record_add_field(correct_record, '001', \
            controlfield_value=str(recid))
        field_instances = record_get_field_instances(record, \
                              tag[0:3], tag[3], tag[4])
        correct_subfields = []
        c_value = False
        for field_instance in field_instances:
            for code, value in field_instance[0]:
                if value == 'To appear in the proceedings of':
                    pass
                elif (code, value) not in correct_subfields:
                    if code == 'c':
                        if c_value:
                            if len(value) > len(c_value):
                                c_value = value
                        else:
                            c_value = value
                    else:
                        correct_subfields.append((code, value))
        if c_value:
            correct_subfields.append(('c', c_value))
        record_add_field(correct_record, tag[0:3], tag[3], tag[4], \
                    subfields=correct_subfields)
        return print_rec(correct_record)
    return None
def create_xml(recid, tags):
    """Create xml file to replace to 100, 700 block."""

    record = get_record(recid)
    correct_record = {}
    record_add_field(correct_record, '001', controlfield_value=str(recid))
    flag = None
    for tag in tags:
        field_instances = record_get_field_instances(record, tag[0:3], \
                                                     tag[3], tag[4])
        correct_subfields = []
        for field_instance in field_instances:
            correct_subfields = []
            for code, value in field_instance[0]:
                if code == 'v':
                    try:
                        if VERBOSE:
                            print len(AFFILIATIONS_DONE)
                        affiliation_key = re.sub(r'\W+', ' ', value).upper()
                        if not affiliation_key in AFFILIATIONS_DONE:
                            new_values = get_aff(value)
                            AFFILIATIONS_DONE[affiliation_key] = new_values
                        for new_value in AFFILIATIONS_DONE[affiliation_key]:
                            correct_subfields.append(('u', \
                                                     new_value.lstrip(' ')))
                        flag = True
                    except TypeError:
                        pass
                correct_subfields.append((code, value))
            record_add_field(correct_record, tag[0:3], tag[3], tag[4], \
                             subfields=correct_subfields)
    if flag:
        return print_rec(correct_record)
Exemple #8
0
def get_list():
    papers = []
    prev_version = perform_request_search()

    for recid in prev_version:
        rec = get_record(recid)
        doi = None
        arxiv_id = None
        try:
            if ('2', 'DOI') in rec['024'][0][0]:
                for t in rec['024'][0][0]:
                    if 'a' in t:
                        doi = t[1]
                if not doi:
                    print "No DOI for record: %i" % (recid, )
            else:
                print "No DOI for record: %i" % (recid, )
        except:
            print "No DOI for record: %i" % (recid, )

        checksum, url, url_type = get_pdf(recid)

        if '037' in rec.keys():
            if ('9', 'arXiv') in rec.get('037')[0][0]:
                for t in rec.get('037')[0][0]:
                    if 'a' in t:
                        arxiv_id = t[1]

        papers.append((recid, arxiv_id, get_creation_date(recid), checksum, url, url_type, doi))
    return papers
def replace_references(recid):
    """Replace references for a record

    The record itself is not updated, the marc xml of the document with updated
    references is returned

    Parameters:
    * recid: the id of the record
    """
    # Parse references
    references_xml = extract_references_from_record_xml(recid)
    references = create_record(references_xml.encode("utf-8"))
    # Record marc xml
    record = get_record(recid)

    if references[0]:
        fields_to_add = record_get_field_instances(references[0], tag="999", ind1="%", ind2="%")
        # Replace 999 fields
        record_delete_fields(record, "999")
        record_add_fields(record, "999", fields_to_add)
        # Update record references
        out_xml = record_xml_output(record)
    else:
        out_xml = None

    return out_xml
def _get_formated_record(record_id, output_format, update_commands, language, outputTags=""):
    """Returns a record in a given format

    @param record_id: the ID of record to format
    @param output_format: an output format code (or short identifier for the output format)
    @param update_commands: list of commands used to update record contents
    @param language: the language to use to format the record
    """
    updated_record = _get_updated_record(record_id, update_commands)
    xml_record = bibrecord.record_xml_output(updated_record)

    old_record = search_engine.get_record(recid=record_id)
    if "hm" == output_format:
        result = "<pre>\n"
        if "All tags" not in outputTags or not outputTags:
            diff_result = _get_record_diff(record_id, old_record, updated_record)
            for line in diff_result.split('\n')[:-1]:
                for tag in outputTags:
                    if tag in line.split()[1]:
                        result += line.strip() + '\n'
                    elif '<strong' in line:
                        if tag in line.split()[3]:
                            result += line.strip() + '\n'
        else:
            result += _get_record_diff(record_id, old_record, updated_record)

        result += "</pre>"
        return result

    result = bibformat.format_record(recID=None,
                                     of=output_format,
                                     xml_record=xml_record,
                                     ln=language)
    return result
def update_references(recid, overwrite=True):
    """Update references for a record

    First, we extract references from a record.
    Then, we are not updating the record directly but adding a bibupload
    task in -c mode which takes care of updating the record.

    Parameters:
    * recid: the id of the record
    """

    if not overwrite:
        # Check for references in record
        record = get_record(recid)
        if record and record_has_field(record, "999"):
            raise RecordHasReferences("Record has references and overwrite " "mode is disabled: %s" % recid)

    if get_fieldvalues(recid, "999C59"):
        raise RecordHasReferences("Record has been curated: %s" % recid)

    # Parse references
    references_xml = extract_references_from_record_xml(recid)

    # Save new record to file
    (temp_fd, temp_path) = mkstemp(prefix=CFG_REFEXTRACT_FILENAME, dir=CFG_TMPSHAREDDIR)
    temp_file = os.fdopen(temp_fd, "w")
    temp_file.write(references_xml.encode("utf-8"))
    temp_file.close()

    # Update record
    task_low_level_submission("bibupload", "refextract", "-P", "5", "-c", temp_path)
def record_get_keywords(record, main_field=bconfig.CFG_MAIN_FIELD, others=bconfig.CFG_OTHER_FIELDS):
    """Returns a dictionary of keywordToken objects from the marc
    record. Weight is set to (0,0) if no weight can be found.

    This will load keywords from the field 653 and 695__a (which are the
    old 'DESY' keywords)

    @var record: int or marc record, if int - marc record is loaded
        from the database. If you pass record instance, keywords are
        extracted from it
    @return: tuple (found, keywords, marcxml)
        found - int indicating how many main_field keywords were found
            the other fields are not counted
        keywords - standard dictionary of keywordToken objects
        marcrec - marc record object loaded with data
    """
    keywords = {}

    if isinstance(main_field, basestring):
        main_field = [main_field]
    if isinstance(others, basestring):
        others = [others]

    if isinstance(record, int):
        rec = get_record(record)
    else:
        rec = record

    found = 0
    for m_field in main_field:
        tag, ind1, ind2 = bibclassify_engine._parse_marc_code(m_field)
        for field in rec.get(tag, []):
            keyword = ""
            weight = 0
            type = ""

            for subfield in field[0]:
                if subfield[0] == "a":
                    keyword = subfield[1]
                elif subfield[0] == "n":
                    weight = int(subfield[1])
                elif subfield[0] == "9":
                    type = subfield[1]
            if keyword:
                found += 1
                keywords[bor.KeywordToken(keyword, type=type)] = [[(0, 0) for x in range(weight)]]

    if others:
        for field_no in others:
            tag, ind1, ind2 = bibclassify_engine._parse_marc_code(field_no)
            type = "f%s" % field_no
            for field in rec.get(tag, []):
                keyword = ""
                for subfield in field[0]:
                    if subfield[0] == "a":
                        keyword = subfield[1]
                        keywords[bor.KeywordToken(keyword, type=type)] = [[(0, 0)]]
                        break

    return found, keywords, rec
def create_xml(recid, IDs, tags):
    """
    Replaces specific inspire-ids in records with nothing
    """
    if VERBOSE:
        print "Working on %s" % recid
    record = get_record(int(recid))
    correct_record = {}
    record_add_field(correct_record, '001', controlfield_value=recid)
    for tag in tags:
        field_instances = record_get_field_instances(record, \
                                                     tag[0:3], tag[3], tag[4])
        for field_instance in field_instances:
            correct_subfields = []
            for code, value in field_instance[0]:
                if code == 'i':
                    if value in IDs:
                        if VERBOSE:
                            print "Getting rid of %s from %s!" % (value, recid)
                        pass
                    else:
                        correct_subfields.append((code, value))
                else:
                    correct_subfields.append((code, value))

            record_add_field(correct_record, tag[0:3], tag[3], tag[4], \
                             subfields=correct_subfields)
    return print_rec(correct_record)
Exemple #14
0
def late(req):
    req.content_type = "text/html"
    print >> req, pageheaderonly("Late journals", req=req)
    for journal in CFG_JOURNALS:
        print >> req, "<h2>%s</h2>" % escape(get_coll_i18nname(journal))
        results = get_collection_reclist(journal)
        print >> req, "<table>"
        print >> req, "<tr><th>DOI</th><th>Title</th><th>DOI registration</th><th>Arrival in SCOAP3</th></tr>"
        for recid in results:
            creation_date = run_sql("SELECT creation_date FROM bibrec WHERE id=%s", (recid, ))[0][0]
            record = get_record(recid)
            doi = record_get_field_value(record, '024', '7', code='a')
            title = record_get_field_value(record, '245', code='a')
            doi_date = run_sql("SELECT creation_date FROM doi WHERE doi=%s", (doi, ))
            background = "#eee"
            if doi_date:
                doi_date = doi_date[0][0]
                if (creation_date - doi_date).days < 0:
                    background = "#66FF00"
                elif (creation_date - doi_date).days < 1:
                    background = "#FF6600"
                else:
                    background = "#FF0000"
            else:
                doi_date = ''
            print >> req, '<tr style="background-color: %s;"><td><a href="http://dx.doi.org/%s" target="_blank">%s</td><td>%s</td><td>%s</td><td>%s</td></tr>' % (
                    background,
                    escape(doi, True),
                    escape(doi),
                    title,
                    doi_date,
                    creation_date)
        print >> req, "</table>"
def get_ids_from_recid(recid):
    record = get_record(recid)

    ## Retrieving DOI
    doi = ""
    dois = record_get_field_values(record, "024", "7", code="a")
    dois = [doi for doi in dois if doi.startswith("10.")]
    if len(dois) > 1:
        print >> sys.stderr, "WARNING: record %s have more than one DOI: %s" % (recid, dois)
    elif len(dois) == 1:
        doi = dois[0]

    ## Retrieving arXiv eprint
    eprint = ""
    eprints = record_get_field_values(record, "035", code="a")
    eprints = [
        an_eprint[len("oai:arXiv.org:") :] for an_eprint in eprints if an_eprint.lower().startswith("oai:arxiv.org:")
    ]
    if len(eprints) > 1:
        print >> sys.stderr, "WARNING: record %s have more than one arXiv eprint: %s" % (recid, eprints)
    elif len(eprints) == 1:
        eprint = eprints[0]

    ## Retrieving Other service ID
    other_id = ""
    for field in record_get_field_instances(record, "035"):
        subfields = dict(field_get_subfield_instances(field))
        if subfields.get("9", "").upper() == CFG_OTHER_SITE.upper() and subfields.get("a"):
            other_id = subfields["a"]
    reportnumbers = record_get_field_values(record, "037", code="a")
    return [str(recid), doi, eprint, other_id] + reportnumbers
Exemple #16
0
def get_record_checks(req, recids):
    if recids == '':
        return ''

    recids = recids.split(',')
    return_val = []
    for rid in recids:
        try:
            recid = int(rid)
            rec = get_record(recid)
            doi = get_doi(rec)
            record_compl = is_complete_record(recid)
            return_val.append("""<tr>
                <td><a href="%s">%i</a></td>
                <td>%s</td>
                <td>%s</td>
                <td><a href="http://dx.doi.org/%s">%s</a></td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
            </tr>""" % (join(CFG_SITE_URL, 'record', str(recid)), recid,
                        get_creation_date(recid),
                        get_modification_date(recid),
                        doi, doi,
                        has_or_had_format(recid, '.xml'),
                        has_or_had_format(recid, '.pdf'),
                        has_or_had_format(recid, '.pdf;pdfa'),
                        check_complete_rec(record_compl),
                        get_arxiv(rec),
                        is_compliant(recid, "authors"),
                        is_compliant(recid, "cc"),
                        is_compliant(recid, "scoap3"),
                        str([rec_key for rec_key, rec_val
                             in record_compl.iteritems() if not rec_val])))
        except:
            recid = rid
            return_val.append("""<tr><th colspan="13" align="left">
                               <h2>%s</h2></th></tr>""" % (recid,))
            return_val.append("""<tr>
                <th>recid</th>
                <th>cr. date</th>
                <th>mod. date</th>
                <th>DOI</th>
                <th>XML</th>
                <th>PDF</th>
                <th>PDF/A</th>
                <th>Complete record?</th>
                <th>arXiv number</th>
                <th>Copyright: authors</th>
                <th>CC-BY</th>
                <th>Funded by SCOAP3</th>
                <th>notes</th>
            </tr>""")
    return ''.join(return_val)
Exemple #17
0
def write_csv(req, dictionary, journal_list, f_date, t_date,
              created_or_modified_date):
    return_val = ''

    for key in journal_list:
        val = dictionary[key]
        papers = perform_request_search(p="date%s:%s->%s"
                                        % (created_or_modified_date,
                                           f_date, t_date),
                                        c=val)

        if papers == []:
            continue

        return_val += key + '\n'
        return_val += ';'.join(['recid', 'cr. date', 'mod. date', 'DOI',
                                'XML', 'PDF', 'PDF/A', 'Complete record?',
                                'arXiv number', 'Copyright: authors', 'CC-BY',
                                'Funded by SCOAP3', 'arXiv category', 'notes',
                                'First delivery', 'First AB delivery',
                                'Last modification', 'PDF/A upload',
                                'DOI registration', 'Delivery diff',
                                'PDF/A diff']) + '\n'

        for recid in papers:
            rec = get_record(recid)
            doi = get_doi(rec)
            first_del = None
            first_ab_del = None
            last_mod = None
            doi_reg = None
            pdfa_del = None
            first_del, first_ab_del, last_mod, doi_reg, pdfa_del = get_delivery_data(recid, doi)

            record_compl = is_complete_record(recid)
            return_val += ';'.join(str(item) for item in [str(recid),
                                   get_creation_date(recid),
                                   get_modification_date(recid),
                                   doi,
                                   has_or_had_format(recid, '.xml').lstrip('<b>').rstrip('</b>'),
                                   has_or_had_format(recid, '.pdf').lstrip('<b>').rstrip('</b>'),
                                   has_or_had_format(recid, '.pdf;pdfa').lstrip('<b>').rstrip('</b>'),
                                   str(check_complete_rec(record_compl)),
                                   get_arxiv(rec).lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'authors').lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'cc').lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'scoap3').lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'category').lstrip('<b>').rstrip('</b>'),
                                   str([rec_key for rec_key, rec_val in record_compl.iteritems() if not rec_val]),
                                   str(first_del),
                                   str(first_ab_del),
                                   str(last_mod),
                                   str(pdfa_del),
                                   str(doi_reg),
                                   check_24h_delivery(first_del, doi_reg),
                                   check_24h_delivery(pdfa_del, doi_reg)
                                   ])
            return_val += '\n'

    return return_val
def create_xml(recid):
    record = get_record(recid)
    correct_record = {}
    record_add_field(correct_record, '001', controlfield_value=str(recid))
    field_instances = record_get_field_instances(record, tag[0:3],
                                                     tag[3], tag[4])
    correct_subfields = []
    for field_instance in field_instances:
        correct_subfields = []
        for code, value in field_instance[0]:
            if volume_letter:
                if code == 'p':
                    correct_subfields.append(('p', repl_journal))
                elif code == 'v':
                    volume = get_fieldvalues(recid, '773__v')
                    for v in volume:
                        if v[0].isalpha():
                            correct_subfields.append(('v', v))
                        else: 
                            new_volume = volume_letter + v
                            correct_subfields.append(('v', new_volume))
                else:
                    correct_subfields.append((code, value))
            else:
                if code == 'p':
                    correct_subfields.append(('p', repl_journal))
                else:
                    correct_subfields.append((code, value))
        record_add_field(correct_record, tag[0:3], tag[3], tag[4],
                             subfields=correct_subfields)
    return print_rec(correct_record)
    def test_create_record_and_signatures(self):
        """Test creating record and signatures."""
        import querying
        from invenio.search_engine import get_record
        querying.consecutive_id = count()

        record_id = 123456
        cds_record = get_record(record_id)
        signatures = querying.create_signatures(record_id, cds_record)
        record = querying.create_record(record_id, cds_record)

        record_expected = {
            'publication_id': '123456',
            'title': 'Target mass corrections in QCD',
            'year': '1980',
            'authors': ['Frazer, W R', 'Gunion, J F']}

        signatures_expected = [
            {'publication_id': '123456',
             'signature_id': '123456_Frazer, W R_0',
             'author_affiliation': '',
             'author_name': 'Frazer, W R'},
            {'publication_id': '123456',
             'signature_id': '123456_Gunion, J F_1',
             'author_affiliation': '',
             'author_name': 'Gunion, J F'}]

        self.assertEqual(signatures, signatures_expected)
        self.assertEqual(record, record_expected)
def record_get_keywords(recid, argd):
    """Returns a list of pairs [keyword, weight] contained in the
    record. Weight is set to 0 if no weight can be found."""
    keywords = []

    rec = get_record(recid)

    for field in rec.get('653', []):
        keyword = ''
        weight = 0
        for subfield in field[0]:
            if subfield[0] == 'a':
                keyword = subfield[1]
            elif subfield[0] == 'n':
                weight = int(subfield[1])
        if argd['sort'] == 'related':
            # Number of related documents minus 1 in order to not
            # consider the source document.
            weight = len(perform_request_search(p='"%s"' % keyword,
                f='keyword'))
            if weight:
                keywords.append([keyword, weight])
        else:
            keywords.append([keyword, weight])

    return keywords
Exemple #21
0
def load_ticket_templates(recId):
    """
    Loads all enabled ticket plugins and calls them.
    @return dictionary with the following structure:
        key: string: name of queue
        value: dict: a dictionary with 2 keys,
        the template subject and content of the queue
    @rtype dict
    """
    ticket_templates = {}
    all_plugins, error_messages = load_ticket_plugins()
    if error_messages:
        # We got broken plugins. We alert only for now.
        print >>sys.stderr, "\n".join(error_messages)
    else:
        plugins = all_plugins.get_enabled_plugins()
        record = get_record(recId)
        for name, plugin in plugins.items():
            if plugin:
                queue_data = plugin['get_template_data'](record)
                if queue_data:
                    ticket_templates[queue_data[0]] = { 'subject' : queue_data[1], 'content' : queue_data[2] }
            else:
                raise BibEditPluginException("Plugin not valid in %s" % (name,))
    return ticket_templates
def match_all_subfields_for_tag(recID, field_tag, subfields_required=[]):
    """
    Tests whether the record with recID has at least one field with 'field_tag'
    where all of the required subfields in subfields_required match a subfield
    in the given field both in code and value

    @param recID: record ID
    @type recID: int

    @param field_tag: a 3 digit code for the field tag code
    @type field_tag: string

    @param subfields_required: a list of subfield code/value tuples
    @type subfields_required: list of tuples of strings.
        same format as in get_record():
            e.g. [('w', 't'),
                  ('4', 'XYZ123')]

    @return: boolean
    """
    rec = get_record(recID)
    for field in rec[field_tag]:
        subfields_present = field[0]
        intersection = set(subfields_present) & set(subfields_required)
        if set(subfields_required) == intersection:
            return True
    return False
def create_xml(recid, tags, experiment):
    record = get_record(recid)
    correct_record = {}
    record_add_field(correct_record, '001', controlfield_value=str(recid))
    flag = None
    for tag in tags:
        field_instances = record_get_field_instances(record, tag[0:3],
                                                     tag[3], tag[4])
        correct_subfields = []
        for field_instance in field_instances:
            correct_subfields = []
            for code, value in field_instance[0]:
                if code == 'a':
                    search = 'find a ' + value + ' and exp ' + experiment
                    new_value = convert_search_to_inspire_id(search)
                    if new_value[0]:
                        flag = True
                        correct_subfields.append(('i', new_value[0]))
                    if new_value[1]:
                        flag = True
                        orcid_value = 'ORCID:' + new_value[1]
                        correct_subfields.append(('j', orcid_value))
                correct_subfields.append((code, value))
            record_add_field(correct_record, tag[0:3], tag[3], tag[4],
                             subfields=correct_subfields)
    #return print_rec(correct_record)
    if flag:
        #print print_rec(correct_record)
        return print_rec(correct_record)
def perform_request_holdingpen(request_type, recId, changeId=None):
    """
    A method performing the holdingPen ajax request. The following types of requests can be made:
       getHoldingPenUpdates - retrieving the holding pen updates pending for a given record
    """
    response = {}
    if request_type == 'getHoldingPenUpdates':
        changeSet = get_related_hp_changesets(recId)
        changes = []
        for change in changeSet:
            changes.append((str(change[0]), str(change[1])))
        response["changes"] = changes
    elif request_type == 'getHoldingPenUpdateDetails':
        # returning the list of changes related to the holding pen update
        # the format based on what the record difference xtool returns

        assert(changeId != None)
        hpContent = get_hp_update_xml(changeId)
        holdingPenRecord = create_record(hpContent[0], "xm")[0]
        databaseRecord = get_record(hpContent[1])
        response['record'] = holdingPenRecord
        response['changeset_number'] = changeId;
    elif request_type == 'deleteHoldingPenChangeset':
        assert(changeId != None)
        delete_hp_change(changeId);
    return response
def get_keywords_body(keywords, req, recid, argd):
    """Returns the body associated with the keywords."""
    body = []
    rec = get_record(recid)

    extend_argd(argd)

    if keywords:
        weights_available = 0 not in zip(*keywords)[1]
    else:
        req.write('There are no keywords associated with this document.<br>' \
            '<form action="" method="get">' \
            '  <input type="hidden" name="generate" value="yes">' \
            '  <input type="submit" value="Generate keywords">' \
            '</form>')
        return

    if argd['type'] == 'tagcloud' and not weights_available:
        # No weight is specified for at least one of the keywords.
        # Display the keywords as a list.
        argd['type'] = 'list'

    if argd['type'] == 'tagcloud':
        body.append('<div style="text-align: center; color: red; '
            'font-size: 80%; margin-top: 15px">Single keywords in grey, '
            'composite keywords in blue.</div>')

    if argd['type'] == 'list':
        # Display keywords as a list.
        body.append(_get_keywords_list(keywords, argd))
    elif argd['type'] == 'tagcloud':
        if argd['sort'] == 'related' and not keywords:
            print 'No similar document was found.'

        # Separate single and composite keywords.
        single_keywords, composite_keywords = [], []
        for keyword in keywords:
            if ': ' in keyword[0]:
                composite_keywords.append(keyword)
            else:
                single_keywords.append(keyword)

        # Display keywords as a tag cloud.
        single_levels = _get_font_levels(single_keywords)
        composite_levels = _get_font_levels(composite_keywords)

        body.append(_get_html_tag_cloud(single_levels +
            composite_levels, argd))
    elif argd['type'] == 'xml':
        body.append('<pre><code>%s</code></pre>' %
            escape_html(record_xml_output(rec, ['653'])))
    else:
        body = 'Unknown type: ' + argd['type']

    out = ''
    for element in body:
        out += '<br>' + element.encode('utf-8')
    req.write(out)
    return
Exemple #26
0
def get_bibrecord(recid):
    """Return record in BibRecord wrapping."""
    if record_exists(recid):
        record_revision_ids = get_record_revision_ids(recid)
        if record_revision_ids:
            return create_record(get_marcxml_of_revision_id(max(record_revision_ids)))[0]
        else:
            return get_record(recid)
    def check_arxiv(recid):
        record = get_record(recid)

        for report_tag in record_get_field_instances(record, "037"):
            for category in field_get_subfield_values(report_tag, 'a'):
                if category.startswith('arXiv'):
                    return True
        return False
def create_xml(recid, arxiv_ids):
    old_record = get_record(recid)
    attached_files = record_get_field_instances(old_record, tag='856', ind1='4')
    fields_to_add = [f for f in attached_files if check_arxiv_url(f, arxiv_ids)]
    record = {}
    record_add_field(record, '001', controlfield_value=str(recid))
    record_add_fields(record, '856', fields_to_add)
    return print_rec(record)
def create_xml(recid, fname=None, oaff=None):
    affs = [a for a in oaff]
    record = get_record(recid)
    auth_location = record_get_field_instances(record, '100', '', '')[0][4]
    record_delete_field(record, '700', '', '')
    for x in affs:
        record_add_subfield_into(record, '100', 'u', x, field_position_global=auth_location)
    return print_rec(record)
def create_our_record(recid):
    old_record = get_record(recid)
    instances = record_get_field_instances(old_record, '980')
    new_instances = [l.field for l in set(OurInstance(i) for i in instances
                     if field_get_subfield_instances(i) != [('a', 'unknown')])]

    record = {}
    record_add_field(record, '001', controlfield_value=str(recid))
    record_add_fields(record, '980', new_instances)
    return print_rec(record)
def tarballs_by_recids(recids, sdir):
    """
    Take a string representing one recid or several and get the associated
    tarballs for those ids.

    @param: recids (string): the record id or ids
    @param: sdir (string): where the tarballs should live

    @return: tarballs ([string, string, ...]): locations of tarballs
    """
    list_of_ids = []

    if ',' in recids:
        recids = recids.split(',')
        for recid in recids:
            if '-' in recid:
                low, high = recid.split('-')
                recid = range(int(low), int(high))
                list_of_ids.extend(recid)
            else:
                recid = int(recid)
                list_of_ids.append(recid)

    else:
        if '-' in recids:
            low, high = recid.split('-')
            list_of_ids = range(int(low), int(high))
        else:
            list_of_ids = int(recid)

    arXiv_ids = []

    for recid in list_of_ids:
        rec = get_record(recid)
        for afieldinstance in record_get_field_instances(rec, tag='037'):
            if 'arXiv' == field_get_subfield_values(afieldinstance, '9')[0]:
                arXiv_id = field_get_subfield_values(afieldinstance, 'a')[0]
                arXiv_ids.append(arXiv_id)

    return tarballs_by_arXiv_id(arXiv_ids, sdir)
Exemple #32
0
def usa_papers_csv(req):
    req.content_type = 'text/csv; charset=utf-8'
    req.headers_out['content-disposition'] = ('attachment; '
                                              'filename=usa_papers.csv')

    li = "%s; https://repo.scoap3.org/record/%s"

    ## print the list of linkt to the articles
    for university in CFG_SELECTED_AFF:
        print >> req, university
        search = create_search_from_affiliation(university)
        for collection in CFG_JOURNALS:
            res = perform_request_search(p='(%s)' % (search,), c=collection)
            if len(res):
                print >> req, collection
                for rec_id in res:
                    rec = get_record(rec_id)
                    line = li.format(str(rec['245'][0][0][0][1]), str(rec_id))
                    print >> req, line
                print >> req, ""
        print >> req, ""
        print >> req, ""
Exemple #33
0
    def tokenize(self, recID):
        phrases = []
        try:
            rec = get_record(recID)

            for rule in self.rules:
                tag_to_index, necessary_tag, necessary_value = rule
                core_tag = tag_to_index[0:3]
                ind = tag_to_index[3:5]
                sub_tag = tag_to_index[5]

                fields = [dict(instance[0]) for instance in record_get_field_instances(rec, core_tag, ind[0], ind[1])]
                for field in fields:
                    tag_condition = necessary_tag and field.has_key(necessary_tag) or necessary_tag == ''
                    value_condition = necessary_value and field.get(necessary_tag, '') == necessary_value or \
                                      necessary_value == ''
                    if tag_condition and field.has_key(sub_tag) and value_condition:
                        phrases.append(field[sub_tag])
            return phrases
        except KeyError:
            return []
        return phrases
Exemple #34
0
def impact_articles(req, year):
    try:
        year = int(year)
        assert 2014 <= year
    except:
        raise SERVER_RETURN(HTTP_BAD_REQUEST)

    req.content_type = 'text/csv; charset=utf-8'
    req.headers_out['content-disposition'] = ('attachment; '
                                              'filename=impact_articles.csv')

    ids = perform_request_search(p="datecreated:{year}-01-01->{year}-12-31".format(year=year))
    counter = 0
    print >> req, "#;recid;journal;author;orcid;affiliation;countries"
    for i in ids:
        counter += 1
        try:
            rec = get_record(i)
        except:
            print >> req, "{c},{recid},Can't load metadata".format(c=counter, recid=i)
            continue
        journal = record_get_field_value(rec, tag='773', code='p')
        for field in ['100', '700']:
            if field in rec:
                for author in rec[field]:
                    name = ""
                    orcid = ""
                    aff = ""
                    country = ""
                    for key, val in author[0]:
                        if key is 'a':
                            name = unicode(val, 'UTF-8').replace('\n', ' ').strip()
                        if key is 'j':
                            orcid = unicode(val, 'UTF-8').replace('\n', ' ').strip()
                        if key in ['v', 'u']:
                            aff += unicode(val, 'UTF-8').replace('\n', ' ').strip() + " | "
                        if key is 'w':
                            country += unicode(val, 'UTF-8').replace('\n', ' ').strip() + ";"
                    print >> req, "{c};{recid};{journal};{name};{orcid};{aff};{country}".format(c=counter, recid=i, journal=journal, name=name, orcid=orcid, aff=aff, country=country)
def _get_formated_record(record_id,
                         output_format,
                         update_commands,
                         language,
                         outputTags=""):
    """Returns a record in a given format

    @param record_id: the ID of record to format
    @param output_format: an output format code (or short identifier for the output format)
    @param update_commands: list of commands used to update record contents
    @param language: the language to use to format the record
    """
    updated_record = _get_updated_record(record_id, update_commands)
    xml_record = bibrecord.record_xml_output(updated_record)

    old_record = search_engine.get_record(recid=record_id)
    if "hm" == output_format:
        result = "<pre>\n"
        if "All tags" not in outputTags or not outputTags:
            diff_result = _get_record_diff(record_id, old_record,
                                           updated_record)
            for line in diff_result.split('\n')[:-1]:
                for tag in outputTags:
                    if tag in line.split()[1]:
                        result += line.strip() + '\n'
                    elif '<strong' in line:
                        if tag in line.split()[3]:
                            result += line.strip() + '\n'
        else:
            result += _get_record_diff(record_id, old_record, updated_record)

        result += "</pre>"
        return result

    result = bibformat.format_record(recID=None,
                                     of=output_format,
                                     xml_record=xml_record,
                                     ln=language)
    return result
Exemple #36
0
def generate_mediaexport_basket(basket_id):
    """
    Exports the content of a basket. Takes each record from a basket and
    calls either generate_mediaexport_album or generate_mediaexport.

    :param str basket_id: The basket id.
    """
    records = get_basket_content(basket_id, format='')
    recids = [record[0] for record in records]

    output = {}
    output['entries'] = []
    for record_id in recids:
        # For each record_id return metadata
        record = get_record(record_id)
        if not record:
            # There is no record, for example when the record_id < 0 (external
            # resource). Skip it.
            continue
        report_number = record_get_field_value(record, *('037', ' ', ' ', 'a'))
        album_dict = generate_mediaexport_album(record_id, report_number, False)
        album_entries = album_dict.get('entries', None)
        if album_entries:
            output['entries'].append(album_entries)
        else:
            # If it's not an album, check if it's an image
            is_image = False
            collections = record_get_field_values(record, *('980', ' ', ' ', 'a'))
            collections.append(record_get_field_values(record, *('980', ' ', ' ', 'b')))
            for collection in collections:
                if "PHOTO" in collection:
                    is_image = True
                    break
            tirage = report_number.rsplit("-", 1)[-1]
            media_dict = generate_mediaexport(record_id, is_image, report_number, tirage, False, False)
            if media_dict:
                output['entries'].append(media_dict)

    return json.dumps(output)
Exemple #37
0
def marcxml_filter_out_tags(recid, fields):
    """
    Returns the fields of record 'recid' that share the same tag and
    indicators as those specified in 'fields', but for which the
    subfield is different. This is nice to emulate a bibupload -c that
    corrects only specific subfields.

    Parameters:
           recid - *int* the id of the record to process

          fields - *list(str)* the list of fields that we want to filter
                   out. Eg ['909COp', '909COo']
    """
    out = ''

    record = get_record(recid)

    # Delete subfields that we want to replace
    for field in fields:
        record_delete_subfield(record,
                               tag=field[0:3],
                               ind1=field[3:4],
                               ind2=field[4:5],
                               subfield_code=field[5:6])

    # Select only datafields that share tag + indicators
    processed_tags_and_ind = []
    for field in fields:
        if not field[0:5] in processed_tags_and_ind:
            # Ensure that we do not process twice the same datafields
            processed_tags_and_ind.append(field[0:5])
            for datafield in record.get(field[0:3], []):
                if datafield[1] == field[3:4].replace('_', ' ') and \
                       datafield[2] == field[4:5].replace('_', ' ') and \
                       datafield[0]:
                    out += field_xml_output(datafield, field[0:3]) + '\n'

    return out
Exemple #38
0
def late(req):
    req.content_type = "text/html"
    print >> req, pageheaderonly("Late journals", req=req)

    th = ("<tr><th>DOI</th><th>Title</th><th>DOI registration</th>"
          "<th>Arrival in SCOAP3</th></tr>")
    tr = ("<tr style='background-color: {0};'><td>"
          "<a href='http://dx.doi.org/{1}' target='_blank'>{2}</td>"
          "<td>{3}</td><td>{4}</td><td>{5}</td></tr>")

    sql_bibrec = "SELECT creation_date FROM bibrec WHERE id=%s"
    sql_doi = "SELECT creation_date FROM doi WHERE doi=%s"

    for journal in CFG_JOURNALS:
        print >> req, "<h2>%s</h2>" % escape(get_coll_i18nname(journal))
        results = get_collection_reclist(journal)
        print >> req, "<table>"
        print >> req, th
        for recid in results:
            creation_date = run_sql(sql_bibrec, (recid, ))[0][0]
            record = get_record(recid)
            doi = record_get_field_value(record, '024', '7', code='a')
            title = record_get_field_value(record, '245', code='a')
            doi_date = run_sql(sql_doi, (doi, ))
            background = "#eee"
            if doi_date:
                doi_date = doi_date[0][0]
                if (creation_date - doi_date).days < 0:
                    background = "#66FF00"
                elif (creation_date - doi_date).days < 1:
                    background = "#FF6600"
                else:
                    background = "#FF0000"
            else:
                doi_date = ''
            print >> req, tr.format(background, escape(doi, True), escape(doi),
                                    title, doi_date, creation_date)
        print >> req, "</table>"
def index(req):
    req.content_type = 'text/csv; charset=utf-8'
    req.headers_out['content-disposition'] = (
        'attachment; filename=scoap3_records_info.csv')

    req.write(
        "SCOAP3 record id; Journal; Creation date; Modification date; Title; Authors; Publication info\n"
    )
    for key, value in JOURNALS.iteritems():
        recids = perform_request_search(c=value)
        for recid in recids:
            rec = get_record(recid)
            if '245' in rec:
                title = rec['245'][0][0][0][1].strip()
            else:
                title = ""
            creation_date = get_creation_date(recid)
            modification_date = get_modification_date(recid)
            if '100' in rec:
                authors = rec['100'][0][0][0][1]
            else:
                authors = ""
            if '700' in rec:
                for author in rec['700']:
                    authors += ' / %s' % (author[0][0][1])
            publication_info = ''
            if '733' in rec:
                publication_info += "%s %s (%s) %s" % (
                    rec['733'][0][0][0][1], rec['733'][0][0][1][1],
                    rec['733'][0][0][2][1], rec['733'][0][0][3][1])
            if '024' in rec:
                publication_info += " %s" % (rec['024'][0][0][0][1], )
            if '037' in rec:
                publication_info += " %s" % (rec['037'][0][0][0][1], )

            req.write("%s; %s; %s; %s; %s; %s; %s\n" %
                      (recid, value, creation_date, modification_date, title,
                       authors, publication_info))
def create_xml773(recid):
    record = get_record(recid)
    correct_record = {}
    record_add_field(correct_record, '001', controlfield_value=str(recid))
    field_instances = record_get_field_instances(record, '773', '', '')
    correct_subfields = []
    for field_instance in field_instances:
        correct_subfields = []
        #        print field_instance[0]
        for code, value in field_instance[0]:
            if code == 'p' and value == old_journal:
                correct_subfields.append(('p', repl_journal))
                if VERBOSE:
                    print "%s: Replacing 773__p %s with %s" % (recid, value,
                                                               repl_journal)
            else:
                correct_subfields.append((code, value))
        record_add_field(correct_record,
                         '773',
                         '',
                         '',
                         subfields=correct_subfields)
    return print_rec(correct_record)
Exemple #41
0
def create_xml(recid, experiment):
    record = get_record(recid)
    correct_record = {}
    common_tags = {}
    experiment_tag = {}
    experiment_tag['693__'] = [('e', experiment)]
    tags = ['693__','710__']
    #for tag in tags:
    #    field_instances = record_get_field_instances(record, tag[0:3], tag[3], tag[4])
    #    for field_instance in field_instances:
    #        correct_subfields = []
    #        for code, value in field_instance[0]:
    #            correct_subfields.append((code, value))
    #        record_add_field(correct_record, tag[0:3], tag[3], tag[4], \
    #            subfields=correct_subfields)
    record_add_field(correct_record, '693', '_', '_', \
                     subfields=experiment_tag['693__'])
    record_add_field(correct_record, '001', controlfield_value=str(recid))
    for key in common_tags:
        tag = key
        record_add_field(correct_record, tag[0:3], tag[3], tag[4], \
            subfields=common_tags[key])
    return print_rec(correct_record)
Exemple #42
0
def create_xml(recid, old_aff=None, new_aff=None, skip_aff=None):
    record = get_record(recid)
    correct_record = {}
    tags = ('100__', '700__')
    record_add_field(correct_record, '001', controlfield_value=recid)
    for tag in tags:
        field_instances = record_get_field_instances(record, \
                                                     tag[0:3], tag[3], tag[4])
        for field_instance in field_instances:
            correct_subfields = []
            skip_aff_exists = False
            for aff in skip_aff:
                if any(val for code, val in field_instance[0] if aff in val):
                    skip_aff_exists = True
                    if VERBOSE:
                        print "%s exists, deleting %s" % (aff, old_aff)
            if skip_aff_exists:
                for code, value in field_instance[0]:
                    if code == 'u':
                        if value != old_aff:
                            correct_subfields.append((code, value))
                    else:
                        correct_subfields.append((code, value))
            else:
                for code, value in field_instance[0]:
                    if code == 'u':
                        if value == old_aff:
                            correct_subfields.append((code, new_aff))
                            if VERBOSE:
                                print "Changing %s to %s" % (old_aff, new_aff)
                        else:
                            correct_subfields.append((code, value))
                    else:
                        correct_subfields.append((code, value))
            record_add_field(correct_record, tag[0:3], tag[3], tag[4], \
                             subfields=correct_subfields)
    return print_rec(correct_record)
Exemple #43
0
def get_ids_from_recid(recid):
    record = get_record(recid)

    ## Retrieving DOI
    doi = ""
    dois = record_get_field_values(record, '024', '7', code='a')
    dois = [doi for doi in dois if doi.startswith('10.')]
    if len(dois) > 1:
        print >> sys.stderr, "WARNING: record %s have more than one DOI: %s" % (
            recid, dois)
    elif len(dois) == 1:
        doi = dois[0]

    ## Retrieving arXiv eprint
    eprint = ""
    eprints = record_get_field_values(record, '035', code='a')
    eprints = [
        an_eprint[len('oai:arXiv.org:'):] for an_eprint in eprints
        if an_eprint.lower().startswith('oai:arxiv.org:')
    ]
    if len(eprints) > 1:
        print >> sys.stderr, "WARNING: record %s have more than one arXiv eprint: %s" % (
            recid, eprints)
    elif len(eprints) == 1:
        eprint = eprints[0]

    ## Retrieving Other service ID
    other_id = ''
    for field in record_get_field_instances(record, '035'):
        subfields = dict(field_get_subfield_instances(field))
        if subfields.get(
                '9',
                '').upper() == CFG_OTHER_SITE.upper() and subfields.get('a'):
            other_id = subfields['a']
    reportnumbers = record_get_field_values(record, '037', code='a')
    return [str(recid), doi, eprint, other_id] + reportnumbers
Exemple #44
0
def usa_papers(req):
    req.content_type = "text/html"
    print >> req, pageheaderonly("USA papers for selected affiliations",
                                 req=req)

    li = "<li><a href='https://repo.scoap3.org/record/{0}'>{1}</a></li>"

    ## print the list of linkt to the articles
    for university in CFG_SELECTED_AFF:
        print >> req, "<h2>%s</h2>" % (str(university),)
        search = create_search_from_affiliation(university)
        for collection in CFG_JOURNALS:
            res = perform_request_search(p='/%s/' % (search,), c=collection)
            if len(res):
                print >> req, "<h3>%s (%i)</h3>" % (str(collection), len(res))
                print >> req, "<ul>"
                for rec_id in res:
                    rec = get_record(rec_id)
                    line = li.format(str(rec_id), str(rec['245'][0][0][0][1]))
                    print >> req, line
                print >> req, "</ul>"

    req.write(pagefooteronly(req=req))
    return ""
Exemple #45
0
 def lazy_parser(collection, left_tags, right_tags, volume_subfield):
     for recid in get_collection_reclist(collection):
         record = get_record(recid)
         for right_tag in right_tags:
             for right_value in record_get_field_values(
                     record, right_tag[:3], right_tag[3], right_tag[4],
                     right_tag[5]):
                 if not right_value:
                     continue  # Empty metadata
                 yield right_value, right_value
                 for left_tag in left_tags:
                     for left_field in record_get_field_instances(
                             record, left_tag[:3], left_tag[3],
                             left_tag[4]):
                         left_subfields = dict(
                             field_get_subfield_instances(left_field))
                         if left_tag[5] not in left_subfields:
                             continue  # Empty field
                         if volume_subfield in left_subfields:
                             yield left_subfields[left_tag[5]], '%s;%s' % (
                                 right_value,
                                 left_subfields[volume_subfield])
                         else:
                             yield left_subfields[left_tag[5]], right_value
def oairepositoryupdater_task():
    """Main business logic code of oai_archive"""
    no_upload = task_get_option("no_upload")
    report = task_get_option("report")

    if report > 1:
        print_repository_status(verbose=report)
        return True

    initial_snapshot = {}
    for set_spec in all_set_specs():
        initial_snapshot[set_spec] = get_set_definitions(set_spec)
    write_message("Initial set snapshot: %s" % pformat(initial_snapshot),
                  verbose=2)

    task_update_progress("Fetching records to process")

    recids_with_oaiid = search_unit_in_bibxxx(p='*',
                                              f=CFG_OAI_ID_FIELD,
                                              type='e')
    write_message("%s recids have an OAI ID" % len(recids_with_oaiid),
                  verbose=2)

    all_current_recids = search_unit_in_bibxxx(p='*',
                                               f=CFG_OAI_SET_FIELD,
                                               type='e')
    no_more_exported_recids = intbitset(all_current_recids)
    write_message("%s recids are currently exported" %
                  (len(all_current_recids)),
                  verbose=2)

    all_affected_recids = intbitset()
    all_should_recids = intbitset()
    recids_for_set = {}
    for set_spec in all_set_specs():
        if not set_spec:
            set_spec = CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC
        should_recids = get_recids_for_set_spec(set_spec)
        recids_for_set[set_spec] = should_recids
        no_more_exported_recids -= should_recids
        all_should_recids |= should_recids
        current_recids = search_unit_in_bibxxx(p=set_spec,
                                               f=CFG_OAI_SET_FIELD,
                                               type='e')
        write_message(
            "%s recids should be in %s. Currently %s are in %s" %
            (len(should_recids), set_spec, len(current_recids), set_spec),
            verbose=2)
        to_add = should_recids - current_recids
        write_message("%s recids should be added to %s" %
                      (len(to_add), set_spec),
                      verbose=2)
        to_remove = current_recids - should_recids
        write_message("%s recids should be removed from %s" %
                      (len(to_remove), set_spec),
                      verbose=2)
        affected_recids = to_add | to_remove
        write_message("%s recids should be hence updated for %s" %
                      (len(affected_recids), set_spec),
                      verbose=2)
        all_affected_recids |= affected_recids

    missing_oaiid = all_should_recids - recids_with_oaiid
    write_message("%s recids are missing an oaiid" % len(missing_oaiid))
    write_message("%s recids should no longer be exported" %
                  len(no_more_exported_recids))

    ## Let's add records with missing OAI ID
    all_affected_recids |= missing_oaiid | no_more_exported_recids
    write_message("%s recids should updated" % (len(all_affected_recids)),
                  verbose=2)

    if not all_affected_recids:
        write_message("Nothing to do!")
        return True

    # Prepare to save results in a tmp file
    (fd, filename) = mkstemp(dir=CFG_TMPDIR,
                                  prefix='oairepository_' + \
                                  time.strftime("%Y%m%d_%H%M%S_",
                                                time.localtime()))
    oai_out = os.fdopen(fd, "w")
    oai_out.write("<collection>")

    tot = 0
    # Iterate over the recids
    for i, recid in enumerate(all_affected_recids):
        task_sleep_now_if_required(can_stop_too=True)
        task_update_progress("Done %s out of %s records." % \
                             (i, len(all_affected_recids)))

        write_message("Elaborating recid %s" % recid, verbose=3)
        record = get_record(recid)
        if not record:
            write_message("Record %s seems empty. Let's skip it." % recid,
                          verbose=3)
            continue
        new_record = {}

        # Check if an OAI identifier is already in the record or
        # not.
        assign_oai_id_entry = False
        oai_id_entry = record_get_field_value(record,
                                              tag=CFG_OAI_ID_FIELD[:3],
                                              ind1=CFG_OAI_ID_FIELD[3],
                                              ind2=CFG_OAI_ID_FIELD[4],
                                              code=CFG_OAI_ID_FIELD[5])
        if not oai_id_entry:
            assign_oai_id_entry = True
            oai_id_entry = "oai:%s:%s" % (CFG_OAI_ID_PREFIX, recid)
            write_message("Setting new oai_id %s for record %s" %
                          (oai_id_entry, recid),
                          verbose=3)
        else:
            write_message("Already existing oai_id %s for record %s" %
                          (oai_id_entry, recid),
                          verbose=3)

        # Get the sets to which this record already belongs according
        # to the metadata
        current_oai_sets = set(
            record_get_field_values(record,
                                    tag=CFG_OAI_SET_FIELD[:3],
                                    ind1=CFG_OAI_SET_FIELD[3],
                                    ind2=CFG_OAI_SET_FIELD[4],
                                    code=CFG_OAI_SET_FIELD[5]))
        write_message("Record %s currently belongs to these oai_sets: %s" %
                      (recid, ", ".join(current_oai_sets)),
                      verbose=3)

        current_previous_oai_sets = set(
            record_get_field_values(record,
                                    tag=CFG_OAI_PREVIOUS_SET_FIELD[:3],
                                    ind1=CFG_OAI_PREVIOUS_SET_FIELD[3],
                                    ind2=CFG_OAI_PREVIOUS_SET_FIELD[4],
                                    code=CFG_OAI_PREVIOUS_SET_FIELD[5]))
        write_message(
            "Record %s currently doesn't belong anymore to these oai_sets: %s"
            % (recid, ", ".join(current_previous_oai_sets)),
            verbose=3)

        # Get the sets that should be in this record according to
        # settings
        updated_oai_sets = set(_set
                               for _set, _recids in recids_for_set.iteritems()
                               if recid in _recids)
        write_message("Record %s now belongs to these oai_sets: %s" %
                      (recid, ", ".join(updated_oai_sets)),
                      verbose=3)

        updated_previous_oai_sets = set(
            _set for _set in (current_previous_oai_sets - updated_oai_sets)
            | (current_oai_sets - updated_oai_sets))
        write_message(
            "Record %s now doesn't belong anymore to these oai_sets: %s" %
            (recid, ", ".join(updated_previous_oai_sets)),
            verbose=3)

        # Ok, we have the old sets and the new sets. If they are equal
        # and oai ID does not need to be added, then great, nothing to
        # change . Otherwise apply the new sets.
        if current_oai_sets == updated_oai_sets and not assign_oai_id_entry:
            write_message("Nothing has changed for record %s, let's move on!" %
                          recid,
                          verbose=3)
            continue  # Jump to next recid

        write_message("Something has changed for record %s, let's update it!" %
                      recid,
                      verbose=3)
        subfields = [(CFG_OAI_ID_FIELD[5], oai_id_entry)]
        for oai_set in updated_oai_sets:
            subfields.append((CFG_OAI_SET_FIELD[5], oai_set))
        for oai_set in updated_previous_oai_sets:
            subfields.append((CFG_OAI_PREVIOUS_SET_FIELD[5], oai_set))

        record_add_field(new_record, tag="001", controlfield_value=str(recid))
        record_add_field(new_record,
                         tag=CFG_OAI_ID_FIELD[:3],
                         ind1=CFG_OAI_ID_FIELD[3],
                         ind2=CFG_OAI_ID_FIELD[4],
                         subfields=subfields)
        oai_out.write(record_xml_output(new_record))
        tot += 1
        if tot == CFG_OAI_REPOSITORY_MARCXML_SIZE:
            oai_out.write("</collection>")
            oai_out.close()
            write_message("Wrote to file %s" % filename)
            if not no_upload:
                task_low_level_submission('bibupload', 'oairepository', '-c',
                                          filename, '-n')
            # Prepare to save results in a tmp file
            (fd, filename) = mkstemp(dir=CFG_TMPDIR,
                                        prefix='oairepository_' + \
                                        time.strftime("%Y%m%d_%H%M%S_",
                                                        time.localtime()))
            oai_out = os.fdopen(fd, "w")
            oai_out.write("<collection>")
            tot = 0
            task_sleep_now_if_required(can_stop_too=True)

    oai_out.write("</collection>")
    oai_out.close()
    write_message("Wrote to file %s" % filename)

    if not no_upload:
        task_sleep_now_if_required(can_stop_too=True)
        if tot > 0:
            task_low_level_submission('bibupload', 'oairepository', '-c',
                                      filename, '-n')
        else:
            os.remove(filename)

    return True
Exemple #47
0
def institutions_list(req, country, year=None):
    from copy import deepcopy
    def find_nations(affiliation):
        NATIONS_DEFAULT_MAP['European Organization for Nuclear Research'] = 'CERN'
        NATIONS_DEFAULT_MAP['Centre Europeen de Recherches Nucleaires'] = 'CERN'
        NATIONS_DEFAULT_MAP['High Energy Accelerator Research Organization'] = 'KEK'
        NATIONS_DEFAULT_MAP['KEK'] = 'KEK'
        NATIONS_DEFAULT_MAP['FNAL'] = 'FNAL'
        NATIONS_DEFAULT_MAP['Fermilab'] = 'FNAL'
        NATIONS_DEFAULT_MAP['Fermi National'] = 'FNAL'
        NATIONS_DEFAULT_MAP['SLAC'] = 'SLAC'
        NATIONS_DEFAULT_MAP['DESY'] = 'DESY'
        NATIONS_DEFAULT_MAP['Deutsches Elektronen-Synchrotron'] = 'DESY'
        NATIONS_DEFAULT_MAP['JINR'] = 'JINR'
        NATIONS_DEFAULT_MAP['JOINT INSTITUTE FOR NUCLEAR RESEARCH'] = 'JINR'

        possible_affs = []
        def _sublistExists(list1, list2):
            return ''.join(map(str, list2)) in ''.join(map(str, list1))
        values = set([y.lower().strip() for y in re.findall(ur"[\w']+", affiliation.replace('.','').decode("UTF-8"), re.UNICODE)])

        for key, val in NATIONS_DEFAULT_MAP.iteritems():
            key = unicode(key)
            key_parts = set(key.lower().decode('utf-8').split())
            if key_parts.issubset(values):
                possible_affs.append(val)
                values = values.difference(key_parts)

        if not possible_affs:
            possible_affs = ['HUMAN CHECK']
        if 'CERN' in possible_affs and 'Switzerland' in possible_affs:
            # Don't use remove in case of multiple Switzerlands
            possible_affs = [x for x in possible_affs if x != 'Switzerland']
        if 'KEK' in possible_affs and 'Japan' in possible_affs:
            possible_affs = [x for x in possible_affs if x != 'Japan']
        if 'FNAL' in possible_affs and 'USA' in possible_affs:
            possible_affs = [x for x in possible_affs if x != 'USA']
        if 'SLAC' in possible_affs and 'USA' in possible_affs:
            possible_affs = [x for x in possible_affs if x != 'USA']
        if 'DESY' in possible_affs and 'Germany' in possible_affs:
            possible_affs = [x for x in possible_affs if x != 'Germany']
        if 'JINR' in possible_affs and 'Russia' in possible_affs:
            possible_affs = [x for x in possible_affs if x != 'Russia']
        return sorted(list(set(possible_affs)))[0]
        
    publisher_dict = {'New J. Phys.':0,
                      'Acta Physica Polonica B':0,
                      'Advances in High Energy Physics':0,
                      'Chinese Phys. C':0,
                      'EPJC':0,
                      'JCAP':0,
                      'JHEP':0,
                      'Nuclear Physics B':0,
                      'Physics letters B':0,
                      'PTEP':0}
    if(year):
        recids = perform_request_search(p='country:"%s" year:%s' % (country,year))
    else:
      recids = perform_request_search(p='country:"%s"' % (country,))

    req.content_type = 'text/csv; charset=utf-8'
    req.headers_out['content-disposition'] = ('attachment; '
                                              'filename=%s_institutions_list.csv' % (country,))

    req.write("recid|authors #|title|country|New J. Phys.|Acta Physica Polonica B|Advances in High Energy Physics|Chinese Phys. C|EPJC|JCAP|JHEP|Nuclear Physics B|Physics letters B|PTEP\n")

    for recid in recids:
            rec = get_record(recid)
            global_affs = {}
            author_count = 0
            if '100' in rec:
                    author_count += len(rec['100'])
            if '700' in rec:
                    author_count += len(rec['700'])

            journal = record_get_field_value(rec, '773', ind1="%", ind2="%", code='p')
            affs = []
            affs.extend(record_get_field_values(rec, '100', ind1="%", ind2="%", code='v'))
            affs.extend(record_get_field_values(rec, '700', ind1="%", ind2="%", code='v'))
            for aff in affs:
                    if aff not in global_affs:
                            global_affs[aff] = deepcopy(publisher_dict)
                    global_affs[aff][journal] += 1

            for aff, j in global_affs.iteritems():
                req.write("%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n" % (recid, author_count, aff.replace('\n', ' ').replace('\r', ''), find_nations(aff), j['New J. Phys.'],j['Acta Physica Polonica B'],j['Advances in High Energy Physics'],j['Chinese Phys. C'],j['EPJC'],j['JCAP'],j['JHEP'],j['Nuclear Physics B'],j['Physics letters B'],j['PTEP']))
def csu(req):
    req.content_type = 'text/csv; charset=utf-8'
    req.headers_out['content-disposition'] = (
        'attachment; filename=csu_records_info.csv')

    search_patterns = [
        "California Polytechnic State University", "Carson", "Dominguez Hills",
        "Fresno", "California State University Fullerton",
        "California State University Long Beach",
        "California State University, Los Angeles", "Northridge",
        "California State University, Sacramento",
        "San Diego State University", "sfsu"
    ]

    def special_aff(author):
        affs = []
        au = ""
        name = ""
        for i in author:
            if i[0] == 'v' and value in i[1]:
                affs.append(i[1])
            if i[0] == 'a':
                name = i[1]
        if len(affs) > 0:
            au = name + '('
            for aff in affs:
                au += aff + ', '
            au += '), '
        return au

    req.write(
        "SCOAP3 record id; Journal; Creation date; Modification date; Title; Authors; Publication info\n"
    )
    for value in search_patterns:
        recids = perform_request_search(p="affiliation:'%s'" % (value, ))
        # req.write("%s; %s\n" % (value, len(recids) ))
        for recid in recids:
            rec = get_record(recid)
            if '245' in rec:
                title = rec['245'][0][0][0][1].strip()
            else:
                title = ""
            creation_date = get_creation_date(recid)
            modification_date = get_modification_date(recid)
            authors = ""
            if '100' in rec:
                authors += special_aff(rec['100'][0][0])
            if '700' in rec:
                for author in rec['700']:
                    authors += special_aff(author[0])
            publication_info = ''
            if '773' in rec:
                for p in rec['773'][0][0]:
                    if p[0] == 'p':
                        publication_info = p[1]
                publication_info += " %s" % (rec['024'][0][0][0][1], )
            if '037' in rec:
                publication_info += " %s" % (rec['037'][0][0][0][1], )

            req.write("%s; %s; %s; %s; %s; %s; %s\n" %
                      (recid, value, creation_date, modification_date, title,
                       authors, publication_info))
def tarballs_by_recids(recids,
                       sdir,
                       docname=None,
                       doctype=None,
                       docformat=None):
    """
    Take a string representing one recid or several and get the associated
    tarballs for those ids. By default look for files with names matching
    the report number and with source field 'arXiv'. This can be changed
    with C{docname}, C{doctype}, C{docformat}

    @param: recids (string): the record id or ids
    @param: sdir (string): where the tarballs should live
    @param docname: select tarball for given recid(s) that match docname
    @param doctype: select tarball for given recid(s) that match doctype
    @param docformat: select tarball for given recid(s) that match docformat
    @return: tarballs ([string, string, ...]): locations of tarballs
    """
    if not recids:
        return []

    list_of_ids = []

    if ',' in recids:
        recids = recids.split(',')
        for recid in recids:
            if '-' in recid:
                low, high = recid.split('-')
                recid = range(int(low), int(high))
                list_of_ids.extend(recid)
            else:
                recid = int(recid)
                list_of_ids.append(recid)

    else:
        if '-' in recids:
            low, high = recids.split('-')
            list_of_ids = range(int(low), int(high))
        else:
            list_of_ids = [int(recids)]

    arXiv_ids = []
    local_files = []
    for recid in list_of_ids:
        rec = get_record(recid)
        if not doctype and not docname and not docformat:
            for afieldinstance in record_get_field_instances(rec, tag='037'):
                if len(field_get_subfield_values(afieldinstance, '9')) > 0:
                    if 'arXiv' == field_get_subfield_values(
                            afieldinstance, '9')[0]:
                        arXiv_id = field_get_subfield_values(
                            afieldinstance, 'a')[0]
                        arXiv_ids.append(arXiv_id)
        else:
            bibarchive = BibRecDocs(recid)
            all_files = bibarchive.list_latest_files()
            if doctype:
                all_files = [
                    docfile for docfile in all_files
                    if docfile.get_type() == doctype
                ]
            if docname:
                all_files = [
                    docfile for docfile in all_files
                    if docfile.get_name() == docname
                ]
            if docformat:
                all_files = [
                    docfile for docfile in all_files
                    if docfile.get_format() == docformat
                ]
            local_files.extend([(docfile.get_path(), recid)
                                for docfile in all_files])

    if doctype or docname or docformat:
        return local_files

    return tarballs_by_arXiv_id(arXiv_ids, sdir)
Exemple #50
0
def get_field_values_on_condition(bibrecid,
                                  get_table="",
                                  get_tag="",
                                  condition_tag="",
                                  condition_value="",
                                  condition="==",
                                  source="MEM"):
    '''
    Method to fetch data from a record in the database.
    It is possible to specify a condition in order to get
    only certain fields if condition holds.

    Examples:

    In [2]: bibauthorid_utils.get_field_values_on_condition
        (742535, [100, 700], 'u', 'a', 'Mathieu, Vincent')
    Out[2]: set(['UMH, Mons'])

    In [3]: bibauthorid_utils.get_field_values_on_condition
        (742535, [100, 700], 'u', 'a')
    Out[3]: set(['LPSC, Grenoble', 'UMH, Mons'])

    In [9]: bibauthorid_utils.get_field_values_on_condition
        (742535, [100,700], 'a', 'u', 'UMH, Mons')
    Out[9]: set(['Semay, Claude', 'Mathieu, Vincent'])

    In [4]: bibauthorid_utils.get_field_values_on_condition
        (742535, [100, 700], 'u')
    Out[4]: set(['LPSC, Grenoble', 'UMH, Mons'])

    In [5]: bibauthorid_utils.get_field_values_on_condition
        (742535, [100, 700])
    Out[5]:
    {'100': [([('a', 'Mathieu, Vincent'), ('u', 'UMH, Mons'), ('i', '4286')],
          ' ',
          ' ',
          '',
          3)],
     '700': [([('a', 'Semay, Claude'), ('u', 'UMH, Mons'), ('i', '4286')],
          ' ',
          ' ',
          '',
          4),
         ([('a', 'Silvestre-Brac, Bernard'),
           ('u', 'LPSC, Grenoble'),
           ('i', '2240')],
          ' ',
          ' ',
          '',
          5)]}
    In [6]: bibauthorid_utils.get_field_values_on_condition(1)
    Out[6]:
    {'001': [([], ' ', ' ', '1', 1)],
    '035': [([('a', 'Groom:0965xu'), ('9', 'SPIRESTeX')], ' ', ' ', '', 13)],
    '037': [([('a', 'CALT-68-62')], ' ', ' ', '', 3)],
    '100': [([('a', 'Groom, Donald E.'), ('u', 'Caltech'), ('i', '981')],
             ' ',
             ' ',
             '',
             4)],
    '245': [([('a',
                'A GENERAL RANGE ENERGY LIGHT OUTPUT PROGRAM FOR HEP')],
              ' ',
              ' ',
              '',
              5)],
    '260': [([('c', '0965')], ' ', ' ', '', 7)],
    '269': [([('c', '0965-12-01')], ' ', ' ', '', 6)],
    '300': [([('a', '10')], ' ', ' ', '', 8)],
    '690': [([('a', 'Preprint')], 'C', ' ', '', 2)],
    '961': [([('x', '2007-03-02')], ' ', ' ', '', 10),
            ([('c', '2007-03-02')], ' ', ' ', '', 11)],
    '970': [([('9', 'DESY'), ('a', 'DESY-404799')], ' ', ' ', '', 9),
            ([('a', 'SPIRES-7090030')], ' ', ' ', '', 12)],
    '980': [([('a', 'Citeable')], ' ', ' ', '', 14),
            ([('a', 'CORE')], ' ', ' ', '', 15)]}

    @param bibrecid: The id of the record (bibrec) to get
    @type bibrecid: int
    @param get_table: List of one or more tables to look at
    @type get_table: list or string or int or long
    @param get_tag: The value of this tag shall be returned
    @type get_tag: string
    @param condition_tag: First part of the condition. Provide a tag to look up
    @type condition_tag: string
    @param condition_value: Second pard of the condition. Provide a value
        that has to be matched
    @type condition_value: string
    @param condition: Optional value to describe the condition.
        Defaults to "==" and may be any comparison

    @return: set of found values, empty set if no value found.
    @rtype: set or dictionary
        (if get_tag, condition_tag and condition_value are empty)

    '''
    rec = None
    if source == "MEM":
        rec = dat.RELEVANT_RECORDS.get(bibrecid)
    elif source == "API":
        rec = get_record(bibrecid)

    if condition_value and isinstance(condition_value, str):
        condition_value = condition_value.decode('utf-8')

    returnset = set()

    if not rec:
        return set()

    if get_table:
        if not isinstance(get_table, list):
            if isinstance(get_table, str):
                get_table = [get_table]
            elif isinstance(get_table, int) or isinstance(get_table, long):
                get_table = [str(get_table)]
            else:
                sys.stderr.write(
                    'Error: Wrong table for table selection. ' +
                    'Allowed are list of strings, string or int/long values\n')

        for table in get_table:
            if str(table) in rec:
                if table in ["cites", "cited-by"]:
                    return rec[str(table)]

                for recordentries in rec[str(table)]:
                    is_condition = True
                    is_skip_entry = False

                    for field in recordentries[0]:
                        if condition_tag and condition_value:
                            if field[0] == condition_tag:
                                condition_holds = False
                                try:
                                    condition_holds = not eval(
                                        ("field[1].decode('utf-8') %s" +
                                         " condition_value") % (condition))
                                except (TypeError, NameError, IndexError):
                                    condition_holds = False

                                if condition_holds:
                                    is_skip_entry = True
                                    is_condition = False
                                    break
                        elif get_tag:
                            if get_tag == field[0]:
                                returnset.add(field[1].decode('utf-8'))
                        else:
                            retlist = {}

                            for table in get_table:
                                try:
                                    retlist[str(table)] = rec[str(table)]
                                except KeyError:
                                    pass

                            return retlist

                    if is_condition and not is_skip_entry:
                        for field in recordentries[0]:
                            if field[0] == get_tag:
                                returnset.add(field[1].decode('utf-8'))

        if len(returnset) == 0:
            returnset = set()

        return returnset
    else:
        return rec
Exemple #51
0
def _get_orcid_dictionaries(papers, personid, old_external_ids, orcid):
    """Return list of dictionaries which can be used in ORCID library.

    Yields orcid list of ORCID_SINGLE_REQUEST_WORKS works of given person.

    @param papers: list of papers' records ids.
    @type papers: list (tuple(int,))
    @param personid: personid of person who is requesting orcid dictionary of
        his works
    @type personid: int
    @param orcid: orcid of the author
    @type orcid: string
    """
    orcid_list = []

    for recid in papers:

        work_dict = {
            'work_title': {}
        }

        recstruct = get_record(recid)

        url = CFG_SITE_URL + ('/record/%d' % recid)

        try:
            external_ids = _get_external_ids(recid, url,
                                             recstruct, old_external_ids,
                                             orcid)
        except OrcidRecordExisting:
            # We will not push this record, skip it.
            continue

        # There always will be some external identifiers.
        work_dict['work_external_identifiers'] = list(external_ids)

        work_dict['work_title']['title'] = \
            encode_for_jinja_and_xml(record_get_field_value(
                recstruct, '245', '', '', 'a'))

        short_descriptions = \
            record_get_field_values(recstruct, '520', '', '', 'a')
        if short_descriptions:
            work_dict['short_description'] = encode_for_jinja_and_xml(
                short_descriptions[0])[
                    :MAX_DESCRIPTION_LENGTH
                ].rsplit(' ', 1)[0]

        journal_title = record_get_field_value(recstruct, '773', '', '', 'p')
        if journal_title:
            work_dict['journal-title'] = encode_for_jinja_and_xml(
                journal_title)

        citation = _get_citation(recid)
        if citation:
            work_dict['work_citation'] = citation

        work_dict['work_type'] = _get_work_type(recstruct)

        publication_date = _get_publication_date(recstruct)
        if publication_date:
            work_dict['publication_date'] = publication_date

        work_dict['url'] = url

        work_contributors = _get_work_contributors(recid, personid)
        if len(work_contributors) > 0:
            work_dict['work_contributors'] = work_contributors

        work_source = record_get_field_value(recstruct, '359', '', '', '9')
        if work_source:
            work_dict['work_source']['work-source'] = \
                encode_for_jinja_and_xml(work_source)

        language = record_get_field_value(recstruct, '041', '', '', 'a')
        if language:
            # If we understand the language we map it to ISO 639-2
            language = LANGUAGE_MAP.get(language.lower().strip())
            if language:
                work_dict['language_code'] = encode_for_jinja_and_xml(language)
        else:
            work_dict['language_code'] = 'en'
        work_dict['visibility'] = 'public'
        orcid_list.append(work_dict)

        bibtask.write_message("Pushing " + str(recid))

        if len(orcid_list) == ORCID_SINGLE_REQUEST_WORKS:
            bibtask.write_message("I will push " +
                                  str(ORCID_SINGLE_REQUEST_WORKS) +
                                  " records to ORCID.")
            yield orcid_list
            orcid_list = []

    if len(orcid_list) > 0:
        # empty message might be invalid
        bibtask.write_message("I will push last " + str(len(orcid_list)) +
                              " records to ORCID.")
        yield orcid_list
Exemple #52
0
def countries_by_publishers(req):
    req.content_type = "text/html"
    print >> req, pageheaderonly("Countries/publishers", req=req)

    ############
    ## PART 1 ##
    # journals = []
    # for pub in CFG_JOURNALS:
    #     ids = perform_request_search(cc=pub)
    #     journals.append((pub, ids))
    # journals.append(("older_than_2014", perform_request_search(cc='older_than_2014')))

    # countries = []
    # for country in sorted(set(NATIONS_DEFAULT_MAP.itervalues())):
    #     ids = perform_request_search(p="country:%s" % (country,)) + perform_request_search(cc='older_than_2014', p="country:%s" % (country,))
    #     countries.append((country, ids))

    req.write("<h1>Number of articles per country per journal</h1>")
    req.write("<h2>Minimum one author from the country</h2>")
    req.flush()
    req.write("<table>\n")
    req.write("<tr><th rowspan=2>Country</th><th colspan=10>Journals</th><th>Other</th></tr>")
    req.write("""<tr>
<td>Acta</td>
<td>Advances in High Energy Physics</td>
<td>Chinese Physics C</td>
<td>European Physical Journal C</td>
<td>Journal of Cosmology and Astroparticle Physics</td>
<td>Journal of High Energy Physics</td>
<td>New Journal of Physics</td>
<td>Nuclear Physics B</td>
<td>Physics Letters B</td>
<td>Progress of Theoretical and Experimental Physics</td>
<td>older_than_2014</td></tr>""")

    for country in sorted(set(NATIONS_DEFAULT_MAP.itervalues())):
        req.write("<tr><td>%s</td>" % (country,))
        for pub in CFG_JOURNALS + ["older_than_2014"]:
            req.write("<td>%s</td>" % perform_request_search(p="country:%s" % (country,), cc=pub))
        req.write("</tr>")

    req.write('</table>')

    ############
    ## PART 2 ##
    # journals = []
    hitcount = {}
    for pub in CFG_JOURNALS + ["older_than_2014"]:
        ids = perform_request_search(cc=pub)
        hitcount[pub] = {}
        for country in sorted(set(NATIONS_DEFAULT_MAP.itervalues())):
            hitcount[pub][country] = 0

        for id in ids:
            record = get_record(id)
            countries = set(record_get_field_values(record, '700', '%', '%', 'w') + record_get_field_values(record, '100', '%', '%', 'w'))
            if len(countries) == 1:
                c = countries.pop()
                if c in set(NATIONS_DEFAULT_MAP.itervalues()):
                    hitcount[pub][countries[0]] += 1

    req.write("<h1>Number of articles per country per journal</h1>")
    req.write("<h2>All author from the country</h2>")
    req.flush()
    req.write("<table>\n")
    req.write("<tr><th rowspan=2>Country</th><th colspan=10>Journals</th><th>Other</th></tr>")
    req.write("""<tr>
<td>Acta</td>
<td>Advances in High Energy Physics</td>
<td>Chinese Physics C</td>
<td>European Physical Journal C</td>
<td>Journal of Cosmology and Astroparticle Physics</td>
<td>Journal of High Energy Physics</td>
<td>New Journal of Physics</td>
<td>Nuclear Physics B</td>
<td>Physics Letters B</td>
<td>Progress of Theoretical and Experimental Physics</td>
<td>older_than_2014</td></tr>""")

    for country in sorted(set(NATIONS_DEFAULT_MAP.itervalues())):
        req.write("<tr><td>%s</td>" % (country,))
        for pub in CFG_JOURNALS + ["older_than_2014"]:
            req.write("<td>%s</td>" % hitcount[pub][country])
        req.write("</tr>")

    req.write('</table>')
    req.write(pagefooteronly(req=req))
    return ""
def record_get_keywords(record,
                        main_field=bconfig.CFG_MAIN_FIELD,
                        others=bconfig.CFG_OTHER_FIELDS):
    """Returns a dictionary of keywordToken objects from the marc
    record. Weight is set to (0,0) if no weight can be found.

    This will load keywords from the field 653 and 695__a (which are the
    old 'DESY' keywords)

    @var record: int or marc record, if int - marc record is loaded
        from the database. If you pass record instance, keywords are
        extracted from it
    @return: tuple (found, keywords, marcxml)
        found - int indicating how many main_field keywords were found
            the other fields are not counted
        keywords - standard dictionary of keywordToken objects
        marcrec - marc record object loaded with data
    """
    keywords = {}

    if isinstance(main_field, basestring):
        main_field = [main_field]
    if isinstance(others, basestring):
        others = [others]

    if isinstance(record, int):
        rec = get_record(record)
    else:
        rec = record

    found = 0
    for m_field in main_field:
        tag, ind1, ind2 = bibclassify_engine._parse_marc_code(m_field)
        for field in rec.get(tag, []):
            keyword = ''
            weight = 0
            type = ''

            for subfield in field[0]:
                if subfield[0] == 'a':
                    keyword = subfield[1]
                elif subfield[0] == 'n':
                    weight = int(subfield[1])
                elif subfield[0] == '9':
                    type = subfield[1]
            if keyword:
                found += 1
                keywords[bor.KeywordToken(keyword, type=type)] = [[
                    (0, 0) for x in range(weight)
                ]]

    if others:
        for field_no in others:
            tag, ind1, ind2 = bibclassify_engine._parse_marc_code(field_no)
            type = 'f%s' % field_no
            for field in rec.get(tag, []):
                keyword = ''
                for subfield in field[0]:
                    if subfield[0] == 'a':
                        keyword = subfield[1]
                        keywords[bor.KeywordToken(keyword,
                                                  type=type)] = [[(0, 0)]]
                        break

    return found, keywords, rec
Exemple #54
0
def write_csv(req, dictionary, journal_list, f_date, t_date,
              created_or_modified_date):

    return_val = ''

    for key in journal_list:
        val = dictionary[key]
        papers = perform_request_search(p="date%s:%s->%s"
                                        % (created_or_modified_date,
                                           f_date, t_date),
                                        c=val)

        if papers == []:
            continue

        return_val += key
        return_val += ';'.join(['recid', 'cr. date', 'mod. date', 'DOI',
                                'XML', 'PDF', 'PDF/A', 'Complete record?',
                                'arXiv number', 'Copyright: authors', 'CC-BY',
                                'Funded by SCOAP3', 'notes', 'First delivery',
                                'First AB delivery', 'Last modification',
                                'PDF/A upload', 'DOI registration',
                                'Delivery diff', 'PDF/A diff']) + '\n'

        for recid in papers:
            rec = get_record(recid)
            doi = get_doi(rec)
            first_del = None
            first_ab_del = None
            last_mod = None
            doi_reg = None
            pdfa_del = None
            delivery_data = run_sql("SELECT doi.creation_date AS 'doi_reg', package.name AS 'pkg_name', package.delivery_date AS 'pkg_delivery' FROM doi_package LEFT JOIN doi ON doi_package.doi=doi.doi LEFT JOIN package ON package.id=doi_package.package_id WHERE doi_package.doi=%s ORDER BY package.delivery_date ASC", (doi,), with_dict=True)
            if delivery_data:
                first_del = delivery_data[0]['pkg_delivery']
                first_ab_del = get_delivery_of_firts_ab_package(delivery_data)
                last_mod = delivery_data[-1]['pkg_delivery']
                doi_reg = delivery_data[0]['doi_reg']
                pdfa_del = get_delivery_of_firts_pdfa(delivery_data)

            record_compl = is_complete_record(recid)
            return_val += ';'.join(str(item) for item in [str(recid),
                                   get_creation_date(recid),
                                   get_modification_date(recid),
                                   doi,
                                   has_or_had_format(recid, '.xml').lstrip('<b>').rstrip('</b>'),
                                   has_or_had_format(recid, '.pdf').lstrip('<b>').rstrip('</b>'),
                                   has_or_had_format(recid, '.pdf;pdfa').lstrip('<b>').rstrip('</b>'),
                                   str(check_complete_rec(record_compl)),
                                   get_arxiv(rec).lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'authors').lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'cc').lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'scoap3').lstrip('<b>').rstrip('</b>'),
                                   is_compliant(recid, 'category').lstrip('<b>').rstrip('</b>'),
                                   str([rec_key for rec_key, rec_val in record_compl.iteritems() if not rec_val]),
                                   str(first_del),
                                   str(first_ab_del),
                                   str(last_mod),
                                   str(pdfa_del),
                                   str(doi_reg),
                                   check_24h_delivery(first_ab_del, doi_reg),
                                   check_24h_delivery(pdfa_del, doi_reg)
                                   ])
            return_val += '\n'

    return return_val
Exemple #55
0
def get_record_checks(req, recids):
    if recids == '':
        return ''

    recids = recids.split(',')
    return_val = []
    for rid in recids:
        try:
            recid = int(rid)
            rec = get_record(recid)
            doi = get_doi(rec)
            delivery_data = run_sql("SELECT doi.creation_date AS 'doi_reg', package.name AS 'pkg_name', package.delivery_date AS 'pkg_delivery' FROM doi_package LEFT JOIN doi ON doi_package.doi=doi.doi LEFT JOIN package ON package.id=doi_package.package_id WHERE doi_package.doi=%s ORDER BY package.delivery_date ASC",
                                    (doi,),
                                    with_dict=True)
            first_del = None
            first_ab_del = None
            last_mod = None
            doi_reg = None
            pdfa_del = None
            if delivery_data:
                first_del = delivery_data[0]['pkg_delivery']
                first_ab_del = get_delivery_of_firts_ab_package(delivery_data)
                last_mod = delivery_data[-1]['pkg_delivery']
                doi_reg = delivery_data[0]['doi_reg']
                pdfa_del = get_delivery_of_firts_pdfa(delivery_data)
            record_compl = is_complete_record(recid)
            return_val.append("""<tr>
                <td><a href="%s">%i</a></td>
                <td>%s</td>
                <td>%s</td>
                <td><a href="http://dx.doi.org/%s">%s</a></td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td>%s</td>
                <td %s>%s</td>
                <td %s>%s</td>
            </tr>""" % (join(CFG_SITE_URL, 'record', str(recid)), recid,
                        get_creation_date(recid),
                        get_modification_date(recid),
                        doi, doi,
                        has_or_had_format(recid, '.xml'),
                        has_or_had_format(recid, '.pdf'),
                        has_or_had_format(recid, '.pdf;pdfa'),
                        check_complete_rec(record_compl),
                        get_arxiv(rec),
                        is_compliant(recid, "authors"),
                        is_compliant(recid, "cc"),
                        is_compliant(recid, "scoap3"),
                        is_compliant(recid. "category"),
                        str([rec_key for rec_key, rec_val
                             in record_compl.iteritems() if not rec_val]),
                        str(first_del),
                        str(first_ab_del),
                        str(last_mod),
                        str(pdfa_del),
                        str(doi_reg),
                        format_24h_delivery(check_24h_delivery(first_del, doi_reg)),
                        check_24h_delivery(first_del, doi_reg),
                        format_24h_delivery(check_24h_delivery(pdfa_del, doi_reg)),
                        check_24h_delivery(pdfa_del, doi_reg)))
        except Exception:
            register_exception()
            recid = rid
            return_val.append("""<tr><th colspan="13" align="left">
                               <h2>%s</h2></th></tr>""" % (recid,))
            return_val.append("""<tr>
                <th>recid</th>
                <th>cr. date</th>
                <th>mod. date</th>
                <th>DOI</th>
                <th>XML</th>
                <th>PDF</th>
                <th>PDF/A</th>
                <th>Complete record?</th>
                <th>arXiv number</th>
                <th>Copyright: authors</th>
                <th>CC-BY</th>
                <th>Funded by SCOAP3</th>
                <th>Category</th>
                <th>notes</th>
                <th>First delivery</th>
                <th>First AB delivery</th>
                <th>Last modification</th>
                <th>PDF/A upload</th>
                <th>DOI registration</th>
                <th>Delivery diff</th>
                <th>PDF/A diff</th>
            </tr>""")
    return ''.join(return_val)
Exemple #56
0
def format_element(bfo, reference_prefix, reference_suffix):
    """
    Prints the references of this record

    @param reference_prefix a prefix displayed before each reference
    @param reference_suffix a suffix displayed after each reference
    """
    references = bfo.fields("999C5", escape=1, repeatable_subfields_p=True)

    out = ""
    last_o = ""

    if not references:
        return out

    out += "<table>"
    for reference in references:
        ref_out = []
        ref_out.append('<tr><td valign="top">')

        display_journal = ''
        display_report = ''
        clean_report = ''
        clean_journal = ''
        hits = []
        if reference.has_key('o') and not reference['o'][0] == last_o:
            temp_ref = reference['o'][0].replace('.', '')
            if '[' in temp_ref and ']' in temp_ref:
                ref_out.append("<small>" + temp_ref + "</small> ")
            else:
                ref_out.append("<small>[" + temp_ref + "] </small> ")
            last_o = temp_ref
        ref_out.append("</td><td>")

        if reference_prefix:
            ref_out.append(reference_prefix)

        if reference.has_key('s'):
            display_journal = reference['s'][0]
            clean_journal = reference['s'][0]
        if reference.has_key('r'):
            if "[" in reference['r'][0] and "]" in reference['r'][0]:
                breaknum = reference['r'][0].find('[')
                newreference = reference['r'][0][:breaknum].strip()
                display_report = newreference
                clean_report = newreference
            else:
                display_report = reference['r'][0]
                clean_report = reference['r'][0]
        if clean_report:
            hits = search_unit(f='reportnumber', p=clean_report)
        if clean_journal and len(hits) != 1:
            hits = search_unit(f='journal', p=clean_journal)
        if reference.has_key('a') and len(hits) != 1:
            hits = search_unit(p=reference['a'][0])
        if reference.has_key('0') and len(hits) != 1:
            # check if the record exists in the database
            try:
                recID = int(reference['0'][0])
                if get_record(recID):
                    # since we already have a recID, we can assign it directly
                    # to the "hits" variable, so it will be handled in the last if statement
                    hits = [recID]
            except ValueError:
                pass
        if len(hits) == 1:
            ref_out.append('<small>' + format_record(list(hits)[0], 'hs') +
                           '</small>')
        else:
            if reference.has_key('h'):
                ref_out.append("<small> " + reference['h'][0] + ".</small>")
            if reference.has_key('t'):
                ref_out.append("<small> " + reference['t'][0] + "</small> -")
            if reference.has_key('y'):
                ref_out.append("<small> " + reference['y'][0] + ".</small>")
            if reference.has_key('p'):
                ref_out.append("<small> " + reference['p'][0] + ".</small>")
            if reference.has_key('m'):
                ref_out.append("<small> " +
                               reference['m'][0].replace(']]', ']') +
                               ".</small>")
            if reference.has_key('a'):
                ref_out.append("<small> <a href=\"http://dx.doi.org/" + \
                reference['a'][0] + "\">" + reference['a'][0]+ "</a></small>")
            if reference.has_key('u'):
                ref_out.append("<small> <a href=" + reference['u'][0] + ">" + \
                reference['u'][0]+ "</a></small>")
            if reference.has_key('i'):
                for r in reference['i']:
                    ref_out.append(
                        "<small> <a href=\"/search?ln=en&amp;p=020__a%3A" + r +
                        "\">" + r + "</a></small>")

            ref_out.append('<small>')
            if display_journal:
                ref_out.append(display_journal)
            if display_report:
                ref_out.append(' ' + display_report)
            ref_out.append("</small>")

        if reference_suffix:
            ref_out.append(reference_suffix)

        ref_out.append("</td></tr>")
        out += ' '.join(ref_out)

    return out + "</table>"
def main(input_file, dry_run, output_dir):
    # Ensure we have data to update first
    _print_out("--------------- Fetching current data ---------------")
    current_record_ids = perform_request_search(p=SEARCH_TERM)
    _print_out(
        str(len(current_record_ids)) +
        " records found matching search term \"" + SEARCH_TERM + "\"")
    _print_verbose("Record IDs found: " + str(current_record_ids))

    current_records = {}  # Struct {'recid': (record)}
    bad_record_ids = []
    # We don't need the records for new PDG data, they are appended
    for recid in current_record_ids:
        record = get_record(recid)
        if '084' not in record:
            bad_record_ids.append(str(recid))
            _print_out("WARNING: No 084 in fetched record %s" % (str(recid), ))
        else:
            current_records[recid] = record

    if len(bad_record_ids) > 0:
        _print_out("WARNING: Bad record IDs found! Printing to file")
        write_list_to_file(output_dir, "bad_record_ids", bad_record_ids)

    _print_out("--------------- Input Parsing ---------------")
    new_lines = get_lines_from_file(input_file)
    new_pdg_data = {}  # Struct {'recid': [pdg_data]}
    lines_missing = []
    lines_ambiguous = []
    lines_invalid = []
    _print_out("Finding records from input file")
    for i, line in enumerate(new_lines):
        status, r_id, data = parse_pdg_line(line)
        if status is ParseResult.Success:
            new_pdg_data[r_id] = data
            _print_verbose("line #" + str(i) + ": Success! Record ID " +
                           str(r_id) + " found for line " + line)
        elif status is ParseResult.Invalid:
            lines_invalid.append(line)
            _print_verbose("line #" + str(i) + ": Invalid line: " + line)
        elif status is ParseResult.Missing:
            lines_missing.append(line)
            _print_verbose("line #" + str(i) + ": Missing line: " + line)
        elif status is ParseResult.Ambiguous:
            lines_ambiguous.append(line)
            _print_verbose("line #" + str(i) + ": Ambiguous line: " + line)

    _print_out("--------------- Matching records ---------------")
    _print_out("Records matched to PDG data (valid): " +
               str(len(new_pdg_data)))
    _print_out("Missing records (not found): " + str(len(lines_missing)))
    _print_out("Ambiguous (multiple results): " + str(len(lines_ambiguous)))
    _print_out("Invalid lines (Dodgy data): " + str(len(lines_invalid)))

    if len(lines_missing) is not 0:
        write_list_to_file(output_dir, "missing-records.txt", lines_missing)
    if len(lines_ambiguous) is not 0:
        write_list_to_file(output_dir, "ambiguous-records.txt",
                           lines_ambiguous)
    if len(lines_invalid) is not 0:
        write_list_to_file(output_dir, "invalid-lines.txt", lines_invalid)

    # These lists contain record IDs of records to have PDG data either:
    #  - add, the PDG data should be appended (record was added to PDG)
    #  - compare, the PDG data should be compared for possible correction
    #  - delete, the PDG data should be removed (record was removed from PDG)
    ids_add = list(set(new_pdg_data.keys()) - set(current_record_ids))
    ids_compare = list(set(current_record_ids) & set(new_pdg_data.keys()))
    ids_delete = list(set(current_record_ids) - set(new_pdg_data.keys()))
    # At this point all rec IDs should be valid!

    _print_out("--------------- Update ---------------")

    appends, updates, deletions = None, None, None

    # Now, cycle through the lists
    if len(ids_add) > 0:
        appends = create_new_pdg_fields(ids_add, new_pdg_data)
    else:
        _print_out("No new fields to append.")
    if len(ids_compare) > 0:
        updates = check_existing_pdg_fields(ids_compare, new_pdg_data,
                                            current_records)
    else:
        _print_out("No duplicate records to compare.")
    if len(ids_delete) > 0:
        deletions = remove_pdg_fields(ids_delete, current_records)
    else:
        _print_out("No fields in records to be deleted.")

    _print_out("--------------- Writing Changes ---------------")
    if appends is not None:
        write_records_to_file(output_dir, "append.xml", appends, dry_run)
    else:
        _print_out("No records to append to.")

    if len(updates) > 0:
        write_records_to_file(output_dir, "correct.xml", updates, dry_run)
    else:
        _print_out("No records to correct.")

    if deletions is not None:
        write_records_to_file(output_dir, "delete.xml", deletions, dry_run)
    else:
        _print_out("No records to delete from.")
Exemple #58
0
def task_run_core(name=NAME):
    """ Performs a search to find records without a texkey, generates a new
    one and uploads the changes in chunks """
    recids = task_get_task_param('recids')
    if recids:
        start_date = None
        write_message("processing recids from commandline")
    else:
        start_date = datetime.now()
        recids = intbitset()
        recids |= intbitset(
            perform_request_search(p='-035:spirestex -035:inspiretex',
                                   cc='HEP'))

        if task_get_task_param('all'):
            write_message("processing all records without texkey")
        else:
            _, last_date = fetch_last_updated(name)
            recids = recids & fetch_records_modified_since(last_date)
            write_message("processing records modified since: %s" % last_date)

    write_message("Found %s records to assign texkeys" % len(recids))
    processed_recids = []
    xml_to_process = []
    for count, recid in enumerate(recids):
        write_message("processing recid %s" % recid)

        # Check that the record does not have already a texkey
        has_texkey = False
        recstruct = get_record(recid)
        for instance in record_get_field_instances(recstruct,
                                                   tag="035",
                                                   ind1="",
                                                   ind2=""):
            try:
                provenance = field_get_subfield_values(instance, "9")[0]
            except IndexError:
                provenance = ""
            try:
                value = field_get_subfield_values(instance, "a")[0]
            except IndexError:
                value = ""
            provenances = ["SPIRESTeX", "INSPIRETeX"]
            if provenance in provenances and value:
                has_texkey = True
                write_message("INFO: Record %s has already texkey %s" %
                              (recid, value))

        if not has_texkey:
            TexKeySeq = TexkeySeq()
            new_texkey = ""
            try:
                new_texkey = TexKeySeq.next_value(recid)
            except TexkeyNoAuthorError:
                write_message(
                    "WARNING: Record %s has no first author or collaboration" %
                    recid)
                continue
            except TexkeyNoYearError:
                write_message("WARNING: Record %s has no year" % recid)
                continue
            write_message("Created texkey %s for record %d" %
                          (new_texkey, recid))
            xml = create_xml(recid, new_texkey)
            processed_recids.append(recid)
            xml_to_process.append(xml)

        task_update_progress("Done %d out of %d." % (count, len(recids)))
        task_sleep_now_if_required()

    # sequence ID to be used in all subsequent tasks
    sequence_id = str(random.randrange(1, 4294967296))
    if xml_to_process:
        process_chunk(xml_to_process, sequence_id)

    # Finally, index all the records processed
    # FIXME: Waiting for sequence id to be fixed
    # if processed_recids:
    #     submit_bibindex_task(processed_recids, sequence_id)

    if start_date:
        store_last_updated(0, start_date, name)

    return True
Exemple #59
0
                    if extract.get('year', False):
                        subfields.append(('y', str(extract['year'])))
                    if extract.get('page', False):
                        subfields.append(('c', str(extract['page'])))
                    new_field = create_field(subfields,
                                             global_position=field[4])
                    record_replace_field(record, '773', new_field, field[4])
                    break

        if not recid or recid == -1:
            # Record (probably) does not exist, flag for inserting into database
            # FIXME: Add some automatic deny/accept parameters, perhaps also bibmatch call
            insert_records.append(record)
        else:
            # Record exists, fetch existing record
            existing_record = get_record(recid)
            if existing_record is None:
                # Did not find existing record in database
                holdingpen_records.append(record)
                continue

            # We remove 500 field temporary/brief entry from revision if record already exists
            fields_500 = record_get_field_instances(record,
                                                    '500',
                                                    ind1="%",
                                                    ind2="%")
            if fields_500 is not None:
                field_positions = []
                for field in fields_500:
                    subfields = field_get_subfield_instances(field)
                    for subfield in subfields:
def _get_formated_record(record_id,
                         output_format,
                         update_commands,
                         language,
                         outputTags="",
                         checked=True,
                         displayed_records=None):
    """Returns a record in a given format

    @param record_id: the ID of record to format
    @param output_format: an output format code (or short identifier for the output format)
    @param update_commands: list of commands used to update record contents
    @param language: the language to use to format the record
    @param outputTags: the tags to be shown to the user
    @param checked: is the record checked by the user?
    @param displayed_records: records to be displayed on a given page

    @returns: record formated to be displayed or None
    """
    if update_commands and checked:
        # Modify the bibrecord object with the appropriate actions
        updated_record = _get_updated_record(record_id, update_commands)

    textmarc_options = {
        "aleph-marc": 0,
        "correct-mode": 1,
        "append-mode": 0,
        "delete-mode": 0,
        "insert-mode": 0,
        "replace-mode": 0,
        "text-marc": 1
    }

    if record_id not in displayed_records:
        return

    old_record = search_engine.get_record(recid=record_id)
    old_record_textmarc = xmlmarc2textmarc.create_marc_record(
        old_record, sysno="", options=textmarc_options)
    if "hm" == output_format:
        if update_commands and checked:
            updated_record_textmarc = xmlmarc2textmarc.create_marc_record(
                updated_record, sysno="", options=textmarc_options)
            result = _get_record_diff(old_record_textmarc,
                                      updated_record_textmarc, outputTags,
                                      record_id)
        else:
            filter_tags = "All tags" not in outputTags and outputTags
            result = ['<pre>']
            for line in old_record_textmarc.splitlines():
                if not filter_tags or line.split()[0].replace(
                        '_', '') in outputTags:
                    result.append("%09d " % record_id + line.strip())
            result.append('</pre>')
            result = '\n'.join(result)
    else:
        if update_commands and checked:
            # No coloring of modifications in this case
            xml_record = bibrecord.record_xml_output(updated_record)
        else:
            xml_record = bibrecord.record_xml_output(old_record)
        result = bibformat.format_record(recID=None,
                                         of=output_format,
                                         xml_record=xml_record,
                                         ln=language)
    return result