def check_records(records):
    for record in records:
        ## Stupid hack because bibcheck filters does not work as expected
        if record_get_field_value(record, '980', code='b') == "Hindawi":
            record.warn("Working on this record")
            recdoc = BibRecDocs(int(record.record_id))
            doc = recdoc.get_bibdoc(recdoc.get_bibdoc_names()[0])
            try:
                xml_file = open(doc.get_file("xml").get_full_path())
            except:
                record.warn("No document can be found")
                continue
            xml2 = xml.dom.minidom.parseString(xml_file.read())
            subject = get_value_in_tag(xml2, "subject")
            if subject in ["Editorial", "Erratum", "Corrigendum", "Addendum","Letter to the Editor"]:
                field = record_get_field_value(record, '980', code='c')
                if field:
                    if field in ['ERRATUM', 'ADDENDUM', 'EDITORIAL','CORRIGENDUM', 'LETTER TO THE EDITOR']:
                        for position, value in record.iterfield('980__c'):
                            record.amend_field(position, subject.upper())
                            break
                    else:
                        for position, value in record.iterfield('980__%'):
                            record.add_subfield(position, 'c', subject.upper())
                            break
                else:
                    for position, value in record.iterfield('980__%'):
                        record.add_subfield(position, 'c', subject.upper())
                        break
            elif subject not in ["Review Article","Research Article","Retraction"]:
                raise Exception("This subject: %s does not exit in SCOAP3 system" % (subject,))
Beispiel #2
0
def late(req):
    req.content_type = "text/html"
    print >> req, pageheaderonly("Late journals", req=req)
    for journal in CFG_JOURNALS:
        print >> req, "<h2>%s</h2>" % escape(get_coll_i18nname(journal))
        results = get_collection_reclist(journal)
        print >> req, "<table>"
        print >> req, "<tr><th>DOI</th><th>Title</th><th>DOI registration</th><th>Arrival in SCOAP3</th></tr>"
        for recid in results:
            creation_date = run_sql("SELECT creation_date FROM bibrec WHERE id=%s", (recid, ))[0][0]
            record = get_record(recid)
            doi = record_get_field_value(record, '024', '7', code='a')
            title = record_get_field_value(record, '245', code='a')
            doi_date = run_sql("SELECT creation_date FROM doi WHERE doi=%s", (doi, ))
            background = "#eee"
            if doi_date:
                doi_date = doi_date[0][0]
                if (creation_date - doi_date).days < 0:
                    background = "#66FF00"
                elif (creation_date - doi_date).days < 1:
                    background = "#FF6600"
                else:
                    background = "#FF0000"
            else:
                doi_date = ''
            print >> req, '<tr style="background-color: %s;"><td><a href="http://dx.doi.org/%s" target="_blank">%s</td><td>%s</td><td>%s</td><td>%s</td></tr>' % (
                    background,
                    escape(doi, True),
                    escape(doi),
                    title,
                    doi_date,
                    creation_date)
        print >> req, "</table>"
Beispiel #3
0
def check_records(records):
    """
    Add INSPIRE ID if missing
    """
    _init_db()
    for record in records:
        if 'INSPIRE' in record_get_field_values(record, '035', code='9'):
            ## Has already the link. Good! Let's go on.
            continue
        doi = record_get_field_value(record, '024', ind1='7', code='a')
        arxiv = record_get_field_value(record, '037', code='a')
        query = 'doi:"%s"' % doi
        if arxiv:
            query += ' or %s' % arxiv
        inspireid = run_sql("SELECT inspireid FROM doi2inspireid WHERE doi=%s", (doi,))
        if inspireid:
            inspireid = inspireid[0][0]
        else:
            sleep(2)
            inspireid = [int(elem.strip()) for elem in urlopen(create_url("http://inspirehep.net/search", {'cc': 'HEP', 'of': 'id', 'p': query})).read().strip()[1:-1].split(',') if elem.strip()]
            if len(inspireid) == 1:
                inspireid = inspireid[0]
                try:
                    run_sql("INSERT INTO doi2inspireid(doi, inspireid, creation_date) VALUES(%s, %s, NOW())", (doi, inspireid))
                except IntegrityError, err:
                    other_doi = run_sql("SELECT doi FROM doi2inspireid WHERE inspireid=%s", (inspireid, ))[0][0]
                    record.warn("This record with doi %s is connected with INSPIRE id %s which is already connected to doi %s" % (doi, inspireid, other_doi))
                    continue
            else:
                record.warn("More than one inspire ID matches this record: %s" % inspireid)
                continue
Beispiel #4
0
 def _get_approximate_address(record):
     city = record_get_field_value(record, '371', code="b")
     zipcode = record_get_field_value(record, '371', code="e")
     country = record_get_field_value(record, '371', code="d")
     address = [value for value in record_get_field_values(record, '371', code='a') if zipcode not in value]
     address.extend([city, zipcode, country])
     return [elem for elem in address if elem]
    def test_get_legacy_recstruct(self):
        """BibField - legacy functions"""
        from invenio.search_engine import get_record as search_engine_get_record
        from invenio.bibrecord import record_get_field_value

        bibfield_recstruct = get_record(8).legacy_create_recstruct()
        bibrecord = search_engine_get_record(8)
        self.assertEqual(record_get_field_value(bibfield_recstruct, '100', code='a'),
                         record_get_field_value(bibrecord, '100', code='a'))
        self.assertEqual(len(bibfield_recstruct['999']), len(bibrecord['999']))
    def test_get_legacy_recstruct(self):
        """BibField - legacy functions"""
        from invenio.search_engine import get_record as search_engine_get_record
        from invenio.bibrecord import record_get_field_value

        bibfield_recstruct = get_record(8).legacy_create_recstruct()
        bibrecord = search_engine_get_record(8)
        self.assertEqual(
            record_get_field_value(bibfield_recstruct, '100', code='a'),
            record_get_field_value(bibrecord, '100', code='a'))
        self.assertEqual(len(bibfield_recstruct['999']), len(bibrecord['999']))
Beispiel #7
0
def national_authors_list(req, search_country):
    req.content_type = 'text/csv; charset=utf-8'
    req.headers_out['content-disposition'] = ('attachment; '
                                              'filename=national_authors_list.csv')
    ids = perform_request_search(p="country:'%s'" % (search_country,))
    req.write("#;RECID;Title;Creation date;Publisher;Total # of authors;Authors name(given country only);Authors country;Authors affiliations\n")

    for number, recid in enumerate(ids):
        doi = record_get_field_value(get_record(recid), '024', ind1="7", code="a")
        journal = record_get_field_value(get_record(recid), '773', code="p")
        title = record_get_field_value(get_record(recid), '245', code="a")
        del_date = get_creation_date(recid)
        publisher = record_get_field_value(get_record(recid), '980', code="b")
        if not publisher:
            publisher = record_get_field_value(get_record(recid), '541', code="a")
        rec = get_record(recid)

        authors = []
        author_count = 0
        for f in ['100', '700']:
            if f in rec:
                for auth in rec[f]:
                    author_count += 1
                    aff = ''
                    name = ''
                    country = ''
                    hit = 0
                    for subfield, value in auth[0]:
                        if subfield == 'a':
                            name = value
                        if subfield in ['v', 'u']:
                            if aff:
                                aff += ', ' + value
                            else:
                                aff = value
                        if subfield == 'w':
                            if country:
                                country += ', ' + value
                            else:
                                country = value
                            if search_country in value:
                                hit = 1

                    if hit:
                        authors.append({'name': name,
                                        'affiliation': aff.replace('\n',''),
                                        'country': country})

        for i, author in enumerate(authors):
            if i == 0:
                req.write("%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n" % (number+1, recid, title.replace('\n',''), del_date, publisher, author_count, author['name'], author['country'], author['affiliation']))
            else:
                req.write("||||||||%s|%s|%s\n" % (author['name'], author['country'], author['affiliation']))
Beispiel #8
0
def _record_has_id_p(record, recid, rec_oaiid, rec_sysno):
    """Check if record matches any of the given IDs."""
    if record_has_field(record, "001"):
        if record_get_field_value(record, "001", "%", "%") == str(recid):
            return True
    if record_has_field(record, OAIID_TAG[0:3]):
        if record_get_field_value(record, OAIID_TAG[0:3], OAIID_TAG[3], OAIID_TAG[4], OAIID_TAG[5]) == rec_oaiid:
            return True
    if record_has_field(record, SYSNO_TAG[0:3]):
        if record_get_field_value(record, SYSNO_TAG[0:3], SYSNO_TAG[3], SYSNO_TAG[4], SYSNO_TAG[5]) == rec_sysno:
            return True
    return False
    def get_short_affiliation_inspire(self, short_aff_rec):
        country = record_get_field_value(short_aff_rec, tag='371', code='d')
        if not country:
            country = record_get_field_value(short_aff_rec, tag='371', code='g')
        if '510' in short_aff_rec:
            short_aff_rec = get_record(record_get_field_value(short_aff_rec, tag='510', code='0'))
        institute = record_get_field_value(short_aff_rec, tag='110', code='g')
        ins = ['cern', 'kek', 'fnal', 'slac', 'desy', 'jinr']
        for i in ins:
            if i in institute.lower():
                country = i.upper()

        return country
def main():
    for journal in CFG_JOURNALS:
        name = get_coll_i18nname(journal)
        reclist = get_collection_reclist(journal)
        print "<h2>%s</h2>" % escape(name)
        if not reclist:
            print "<p>None yet.</p>"
            continue
        print "<p><ul>"
        for recid in reclist:
            record = get_record(recid)
            title = remove_html_markup(record_get_field_value(record, '245', code='a'), remove_escaped_chars_p=False).strip()
            doi = record_get_field_value(record, '024', '7', code='a')
            print '<li><a href="http://dx.doi.org/%s" target="_blank">%s</a>: %s</li>' % (escape(doi, True), escape(doi), title)
        print "</ul></p>"
Beispiel #11
0
def check_records(records):
    for record in records:
        publisher = record_get_field_value(record, '980', code='b')
        if publisher == 'Springer':
            title = record_get_field_value(record, '245', code='a')
            abstract = record_get_field_value(record, '520', code='a')

            title = strip_latex(title)
            abstract = strip_latex(abstract)

            for position, value in record.iterfield('245__a'):
                record.amend_field(position, title)

            for position, value in record.iterfield('520__a'):
                record.amend_field(position, abstract)
Beispiel #12
0
def get_recids_changes(last_recid, max_recs=10000):

    search_op = '>'

    if last_recid == -1:
        l = list(dbquery.run_sql("SELECT id FROM bibrec ORDER BY creation_date ASC LIMIT 1"))
        search_op = '>='
    else:
        # let's make sure we have a valid recid (or get the close valid one)
        l = list(dbquery.run_sql("SELECT id FROM bibrec WHERE id >= %s LIMIT 1", (last_recid,)))
        if not len(l):
            return
    last_recid = l[0][0]

    # there is not api to get this (at least i haven't found it)
    mod_date = search_engine.get_modification_date(last_recid, fmt="%Y-%m-%d %H:%i:%S")
    if not mod_date:
        return
    modified_records = list(dbquery.run_sql("SELECT id,modification_date, creation_date FROM bibrec "
                    "WHERE modification_date " + search_op + "%s LIMIT %s", (mod_date, max_recs )))

    out = {'DELETED': [], 'CHANGED': [], 'ADDED': []}
    for recid, mod_date, create_date in modified_records:
        if mod_date == create_date:
            out['ADDED'].append(recid)
        else:
            rec = search_engine.get_record(recid)
            status = bibrecord.record_get_field_value(rec, tag='980', code='c')
            if status == 'DELETED':
                out['DELETED'].append(recid)
            else:
                out['CHANGED'].append(recid)
    return out
def check_records(records):
    """
    Add INSPIRE ID if missing
    """
    _init_db()
    for record in records:
        if 'INSPIRE' in record_get_field_values(record, '035', code='9'):
            ## Has already the link. Good! Let's go on.
            continue
        doi = record_get_field_value(record, '024', ind1='7', code='a')
        arxiv = record_get_field_value(record, '037', code='a')
        query = 'doi:"%s"' % doi
        if arxiv:
            query += ' or %s' % arxiv
        inspireid = run_sql("SELECT inspireid FROM doi2inspireid WHERE doi=%s",
                            (doi, ))
        if inspireid:
            inspireid = inspireid[0][0]
        else:
            sleep(2)
            inspireid = [
                int(elem.strip()) for elem in urlopen(
                    create_url("http://inspirehep.net/search", {
                        'cc': 'HEP',
                        'of': 'id',
                        'p': query
                    })).read().strip()[1:-1].split(',') if elem.strip()
            ]
            if len(inspireid) == 1:
                inspireid = inspireid[0]
                try:
                    run_sql(
                        "INSERT INTO doi2inspireid(doi, inspireid, creation_date) VALUES(%s, %s, NOW())",
                        (doi, inspireid))
                except IntegrityError, err:
                    other_doi = run_sql(
                        "SELECT doi FROM doi2inspireid WHERE inspireid=%s",
                        (inspireid, ))[0][0]
                    record.warn(
                        "This record with doi %s is connected with INSPIRE id %s which is already connected to doi %s"
                        % (doi, inspireid, other_doi))
                    continue
            else:
                record.warn(
                    "More than one inspire ID matches this record: %s" %
                    inspireid)
                continue
Beispiel #14
0
    def test_BibUpload_revision_verifier(self):
        """ BibUpload Revision Verifier - Called from BibUpload Operation - Patch & Conflict Scenarios"""

        recs = xml_marc_to_records(self.rev1)
        # --> Revision 1 submitted
        error, self.recid, dummy_msg = bibupload(recs[0], opt_mode='insert')
        self.check_record_consistency(self.recid)
        record = get_record(self.recid)
        rev = record_get_field_value(record, '005', '', '')
        recs = xml_marc_to_records(self.rev1)
        self.rev2 = self.rev2.replace('123456789', str(self.recid))
        self.rev2 = self.rev2.replace('20110101000000.0', rev)
        self.rev1_modified = self.rev1_modified.replace(
            '123456789', str(self.recid))
        self.rev1_modified = self.rev1_modified.replace(
            '20110101000000.0', rev)
        self.final_xm = self.final_xm.replace('123456789', str(self.recid))

        recs = xml_marc_to_records(self.rev1)
        recs = xml_marc_to_records(self.rev2)
        # --> Revision 2 submitted
        error, self.recid, dummy_msg = bibupload(recs[0], opt_mode='replace')
        self.check_record_consistency(self.recid)
        record = get_record(self.recid)
        self.rev2 = self.rev2.replace(
            rev, record_get_field_value(record, '005', '', ''))
        self.rev2_modified = self.rev2_modified.replace(
            '123456789', str(self.recid))
        self.rev2_modified = self.rev2_modified.replace(
            '20110101000000.0', record_get_field_value(record, '005', '', ''))
        # --> Revision 1 modified submitted
        recs = xml_marc_to_records(self.rev1_modified)
        error, self.recid, dummy_msg = bibupload(recs[0], opt_mode='replace')
        self.check_record_consistency(self.recid)
        record = get_record(self.recid)
        rev = record_get_field_value(record, '005', '', '')
        self.final_xm = self.final_xm.replace('20110101000000.0', rev)
        self.assertEqual(
            compare_xmbuffers(self.final_xm, print_record(self.recid, 'xm')),
            '')
        # --> Revision 2 modified submitted
        recs = xml_marc_to_records(self.rev2_modified)
        error, self.recid, dummy_msg = bibupload(recs[0], opt_mode='replace')
        self.check_record_consistency(self.recid)
        self.assertEquals(error, 2)
Beispiel #15
0
def check_records(records, empty=False):
    """
    Adds ISSN to records.
    """
    for record in records:
        journal = record_get_field_value(record, '773', code='p')
        record.warn(journal)
        for journal_name in CFG_ISSN_MAP.iterkeys():
            if journal_name.lower() == journal.lower():
                if '022' in record:
                    if CFG_ISSN_MAP[journal_name] is not record_get_field_value(record, '022', code='a'):
                        for position, value in record.iterfield('022__a'):
                            record.amend_field(position, CFG_ISSN_MAP[journal_name])
                            record.warn("Amending")
                            break
                else:
                    record.add_field('022__', value='', subfields=[('a', CFG_ISSN_MAP[journal_name])])
                    record.warn("Adding")
Beispiel #16
0
def articles(req, i, mode='html'):
    try:
        i = int(i)
        assert 0 <= i < len(_AFFILIATIONS)
    except:
        raise SERVER_RETURN(HTTP_BAD_REQUEST)
    nation = _AFFILIATIONS[i]
    ret = []
    page_title = "SCOAP3 Articles by authors from %s" % nation
    if mode == 'text':
        req.content_type = "text/plain; charset=utf8"
        req.headers_out['content-disposition'] = ('attachment; filename=%s.txt'
                                                  % nation)
    else:
        req.content_type = "text/html"
    if mode == 'text':
        print >> req, page_title
        print >> req, "-" * len(page_title)
    query = _build_query(nation)
    for journal in CFG_JOURNALS:
        results = perform_request_search(p=query, cc=journal, of='intbitset')
        if not results:
            continue
        ret.append("<h2>%s (%s)</h2" % (escape(get_coll_i18nname(journal)),
                                        len(results)))
        ret.append("<p><ul>")
        if mode == 'text':
            print >> req, ""
            print >> req, get_coll_i18nname(journal)
        for recid in results:
            record = get_record(recid)
            title = record_get_field_value(record, '245', code='a')
            doi = record_get_field_value(record, '024', '7', code='a')
            if mode == 'text':
                print >> req, "http://dx.doi.org/%s" % doi

            li = ("<li><a href='http://dx.doi.org/{0}' "
                  "target='_blank'>{1}</a>: {2}</li>")
            ret.append(li.format(escape(doi, True), escape(doi), title))
        ret.append("</ul></p>")
    body = '\n'.join(ret)
    if mode == 'text':
        return ""
    return page(req=req, title=page_title, body=body)
Beispiel #17
0
def get_template_data(record):
	from invenio.config import CFG_SITE_URL
	from invenio.bibrecord import record_get_field_value, record_get_field_values

	recid = record_get_field_value(record,'001','','','')
	report_numbers = record_get_field_values('037','_','_','a')
	queue = "HEP_ref"
	subject = "Refs for #%s %s" % ( recid, ' '.join(report_numbers))
	content = "%s/record/edit/#state=edit&recid=%s" % ( CFG_SITE_URL, recid)
	return (queue, subject, content)
def get_template_data(record):
	from invenio.config import CFG_SITE_URL
	from invenio.bibrecord import record_get_field_value, record_get_field_values

	recid = record_get_field_value(record,'001','','','')
	report_numbers = record_get_field_values('037','_','_','a')
	queue = "AUTHORS_long_list"
	subject = "long author list in #%s %s" % ( recid, ' '.join(report_numbers))
	content = "Please update the authors in %s/record/edit/%s" % ( CFG_SITE_URL, recid)
	return (queue, subject, content)
    def _get_approximate_address(record):
        def _is_a_conference(record):
            return record_get_field_values(record, '111', code='c')

        if _is_a_conference(record):
            return record_get_field_value(
                record, '111', code='c').split(', ')
        else:
            city = record_get_field_value(record, '371', code='b') or None
            zipcode = record_get_field_value(record, '371', code='e') or None
            country = record_get_field_value(record, '371', code='d') or None
            address = [city, zipcode, country]
            if allfields or None in address:
                addresses = record_get_field_values(record, '371', code='a')
                if zipcode:
                    addresses = [el for el in addresses if zipcode not in el]
                addresses.extend([city, zipcode, country])
                addresses = [el for el in addresses if el]
                return addresses
            return address
def get_template_data(record):
	from invenio.config import CFG_SITE_URL
	from invenio.bibrecord import record_get_field_value

	recid = record_get_field_value(record,'001','','','')
	queue = "INST_add+cor"
	subject = "new inst"
	content = "The record %(site)s/record/edit/%(recid)s has an unknown affiliation. The information given is:\n\n\
	Please create an Institutions record, if not done in the meantime, and update the paper at\
	 %(site)s/record/edit/%(recid)s with the correct inst short name." % { 'site' : CFG_SITE_URL, 'recid' : recid }
	return (queue, subject, content)
def retrieve_field_values(curdir,
                          field_name,
                          separator=None,
                          system_number_file='SN',
                          tag=None):
    """
    This is a handy function to retrieve values either from the current
    submission directory, when a form has been just submitted, or from
    an existing record (e.g. during MBI action).

    @param curdir: is the current submission directory.
    @type curdir: string
    @param field_name: is the form field name that might exists on disk.
    @type field_name: string
    @param separator: is an optional separator. If it exists, it will be used
        to retrieve multiple values contained in the field.
    @type separator: string
    @param system_number_file: is the name of the file on disk in curdir, that
        is supposed to contain the record id.
    @type system_number_file: string
    @param tag: is the full MARC tag (tag+ind1+ind2+code) that should
        contain values. If not specified, only values in curdir will
        be retrieved.
    @type tag: 6-chars
    @return: the field value(s).
    @rtype: list of strings.

    @note: if field_name exists in curdir it will take precedence over
        retrieving the values from the record.
    """
    field_file = os.path.join(curdir, field_name)
    if os.path.exists(field_file):
        field_value = open(field_file).read()
        if separator is not None:
            return [
                value.strip() for value in field_value.split(separator)
                if value.strip()
            ]
        else:
            return [field_value.strip()]
    elif tag is not None:
        system_number_file = os.path.join(curdir, system_number_file)
        if os.path.exists(system_number_file):
            recid = int(open(system_number_file).read().strip())
            record = get_record(recid)
            if separator:
                return record_get_field_values(record, tag[:3], tag[3], tag[4],
                                               tag[5])
            else:
                return [
                    record_get_field_value(record, tag[:3], tag[3], tag[4],
                                           tag[5])
                ]
    return []
Beispiel #22
0
def check_records(records):
    """
    Add publisher if missing
    """
    for record in records:
        journal = record_get_field_value(record, '773', code='p')
        publisher = record_get_field_value(record, '260', code='b')
        if not publisher:
            if journal not in CFG_JOURNAL_TO_PUBLISHER_MAP:
                record.warn("Unknown journal: %s" % journal)
                continue
            else:
                publisher = CFG_JOURNAL_TO_PUBLISHER_MAP[journal]
            for position, value in record.iterfield('260__%'):
                ## A field 260 already exist. let's add a subfield.
                record.add_subfield(position, 'b', publisher)
                break
            else:
                ## The field does not already exist. Let's add a whole field
                record.add_field('260__', value='', subfields=[('b', publisher)])
Beispiel #23
0
def get_xml_from_textmarc(recid, textmarc_record, uid=None):
    """
    Convert textmarc to marcxml and return the result of the conversion

    @param recid: id of the record that is being converted
    @type: int

    @param textmarc_record: record content in textmarc format
    @type: string

    @return: dictionary with the following keys:
            * resultMsg: message describing conversion status
            * resultXML: xml resulting from conversion
            * parse_error: in case of error, a description of it
    @rtype: dict
    """
    response = {}
    # Let's remove empty lines
    textmarc_record = os.linesep.join([s for s in textmarc_record.splitlines() if s])

    # Create temp file with textmarc to be converted by textmarc2xmlmarc
    (file_descriptor, file_name) = tempfile.mkstemp()
    f = os.fdopen(file_descriptor, "w")

    # If there is a cache file, add the controlfields
    if cache_exists(recid, uid):
        record = get_cache_contents(recid, uid)[2]
        for tag in record:
            if tag.startswith("00") and tag != "001":  # It is a controlfield
                f.write("%09d %s %s\n" % (recid, tag + "__", record_get_field_value(record, tag)))

    # Write content appending sysno at beginning
    for line in textmarc_record.splitlines():
        f.write("%09d %s\n" % (recid, re.sub(r"\s+", " ", line.strip())))
    f.close()

    old_stdout = sys.stdout
    try:
        # Redirect output, transform, restore old references
        new_stdout = StringIO()
        sys.stdout = new_stdout
        try:
            transform_file(file_name)
            response['resultMsg'] = 'textmarc_parsing_success'
            response['resultXML'] = new_stdout.getvalue()
        except ParseError, e:
            # Something went wrong, notify user
            response['resultXML'] = ""
            response['resultMsg'] = 'textmarc_parsing_error'
            response['parse_error'] = [e.lineno, " ".join(e.linecontent.split()[1:]), e.message]
    finally:
        sys.stdout = old_stdout

    return response
def check_records(records):
    for record in records:
        ## Stupid hack because bibcheck filters does not work as expected
        if record_get_field_value(record, '980', code='b') == "Hindawi":
            record.warn("Working on this record")
            recdoc = BibRecDocs(int(record.record_id))
            doc = recdoc.get_bibdoc(recdoc.get_bibdoc_names()[0])
            try:
                xml_file = open(doc.get_file("xml").get_full_path())
            except:
                record.warn("No document can be found")
                continue
            xml2 = xml.dom.minidom.parseString(xml_file.read())
            subject = get_value_in_tag(xml2, "subject")
            if subject in [
                    "Editorial", "Erratum", "Corrigendum", "Addendum",
                    "Letter to the Editor"
            ]:
                field = record_get_field_value(record, '980', code='c')
                if field:
                    if field in [
                            'ERRATUM', 'ADDENDUM', 'EDITORIAL', 'CORRIGENDUM',
                            'LETTER TO THE EDITOR'
                    ]:
                        for position, value in record.iterfield('980__c'):
                            record.amend_field(position, subject.upper())
                            break
                    else:
                        for position, value in record.iterfield('980__%'):
                            record.add_subfield(position, 'c', subject.upper())
                            break
                else:
                    for position, value in record.iterfield('980__%'):
                        record.add_subfield(position, 'c', subject.upper())
                        break
            elif subject not in [
                    "Review Article", "Research Article", "Retraction"
            ]:
                raise Exception(
                    "This subject: %s does not exit in SCOAP3 system" %
                    (subject, ))
Beispiel #25
0
def get_template_data(record):
	from invenio.config import CFG_SITE_URL
	from invenio.bibrecord import record_get_field_value, record_get_field_values

	recid = record_get_field_value(record,'001','','','')
	report_numbers = record_get_field_values('037','_','_','a')
	queue = "Exp"
	subject = "unknown experiment in #%s %s" % ( recid, ' '.join(report_numbers))
	content = "This unknown experiment: \n\n\
	has appeared in this paper. Please create a record in Experiments and update the paper at\
	 %s/record/edit/%s" % ( CFG_SITE_URL, recid )
	return (queue, subject, content)
Beispiel #26
0
def generate_mediaexport_album(recid, resource_id, json_format=True):
    """Return the report number of associate images.

    :param str recid: The record id.
    :param str resource_id: The report number.
    :param str json_format: If true, returns JSON dump, otherwise a dictionary
    """
    # Fileds that are required
    MEDIA_CONFIG = {
        'title_en': ('245', ' ', ' ', 'a'),
        'title_fr': ('246', ' ', '1', 'a'),
    }
    bibarchive = BibRecDocs(recid)
    bibarchive_with_deleted = BibRecDocs(recid, deleted_too=True)
    bibdocs = bibarchive.list_bibdocs()
    doc_numbers = [(bibdoc.get_id(), bibdoc.get_docname(), bibdoc) for bibdoc in bibarchive_with_deleted.list_bibdocs()]
    doc_numbers.sort()
    # Calculate the size
    bibdoc_size = len(bibdocs)
    # Get the record
    record = get_record(recid)
    # Build the response
    entry = {}

    for key in MEDIA_CONFIG:
        entry[key] = record_get_field_value(record, *MEDIA_CONFIG[key])

    entry['id'] = resource_id
    entry['record_id'] = str(recid)
    entry['entry_date'] = get_creation_date(recid)
    entry['total'] = bibdoc_size
    entry['type'] = 'album'
    entry['images'] = []

    # Foreach doc create the corresponding report number
    for (docid, docname, bibdoc) in doc_numbers:
        if not bibdoc.deleted_p():
            bibdoc_number = doc_numbers.index((bibdoc.get_id(), bibdoc.get_docname(), bibdoc)) + 1
            image = generate_mediaexport(recid, True, resource_id, bibdoc_number, False)
            image['tirage_id'] = bibdoc_number
            image['id'] = '{0}-{1}'.format(image['id'], bibdoc_number)
            entry['images'].append(image)

    final = {}
    final['entries'] = [{'entry': entry}]

    if not CFG_JSON_AVAILABLE:
        return ''

    if json_format:
        return json.dumps(final)
    else:
        return final
    def test_BibUpload_revision_verifier(self):
        """ BibUpload Revision Verifier - Called from BibUpload Operation - Patch & Conflict Scenarios"""

        recs = xml_marc_to_records(self.rev1)
        # --> Revision 1 submitted
        error, self.recid, dummy_msg = bibupload(recs[0], opt_mode="insert")
        self.check_record_consistency(self.recid)
        record = get_record(self.recid)
        rev = record_get_field_value(record, "005", "", "")
        recs = xml_marc_to_records(self.rev1)
        self.rev2 = self.rev2.replace("123456789", str(self.recid))
        self.rev2 = self.rev2.replace("20110101000000.0", rev)
        self.rev1_modified = self.rev1_modified.replace("123456789", str(self.recid))
        self.rev1_modified = self.rev1_modified.replace("20110101000000.0", rev)
        self.final_xm = self.final_xm.replace("123456789", str(self.recid))

        recs = xml_marc_to_records(self.rev1)
        recs = xml_marc_to_records(self.rev2)
        # --> Revision 2 submitted
        error, self.recid, dummy_msg = bibupload(recs[0], opt_mode="replace")
        self.check_record_consistency(self.recid)
        record = get_record(self.recid)
        self.rev2 = self.rev2.replace(rev, record_get_field_value(record, "005", "", ""))
        self.rev2_modified = self.rev2_modified.replace("123456789", str(self.recid))
        self.rev2_modified = self.rev2_modified.replace(
            "20110101000000.0", record_get_field_value(record, "005", "", "")
        )
        # --> Revision 1 modified submitted
        recs = xml_marc_to_records(self.rev1_modified)
        error, self.recid, dummy_msg = bibupload(recs[0], opt_mode="replace")
        self.check_record_consistency(self.recid)
        record = get_record(self.recid)
        rev = record_get_field_value(record, "005", "", "")
        self.final_xm = self.final_xm.replace("20110101000000.0", rev)
        self.assertEqual(compare_xmbuffers(self.final_xm, print_record(self.recid, "xm")), "")
        # --> Revision 2 modified submitted
        recs = xml_marc_to_records(self.rev2_modified)
        error, self.recid, dummy_msg = bibupload(recs[0], opt_mode="replace")
        self.check_record_consistency(self.recid)
        self.assertEquals(error, 2)
def record_get_recid(record):
    """
    Returns the recid (tag 001) of the given record, if found in the database.
    It tries to extract an OAI ID from the given record, if not successful it
    returns with errorcode 0.

    @param record: bibrecord structure

    @return: recid if found, otherwise 0 on missing OAI, -1 on OAI tag error,
                 or None if no recid found.
    """
    recid = None
    if record_has_field(record, "001"):
        return str(record_get_field_value(record, tag="001"))

    oai_id = None
    # FIXME: CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG is not set correctly for inspire
    # When OAI config is OK, use bibrecord.record_get_oaiid
    old_oaiid_tag = "035__z"
    try:
        tag = CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:3]
        ind1 = CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3]
        ind2 = CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4]
        code = CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5]
    except IndexError:
        sys.stderr.write("Invalid CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG")
        return - 1
    fieldvalues = record_get_field_values(record, tag, ind1, ind2, code)
    for fieldvalue in fieldvalues:
        if fieldvalue.startswith("oai:arXiv.org:"):
            oai_id = fieldvalue
            break
    if oai_id == None:
        fieldvalues = record_get_field_values(record, old_oaiid_tag[:3], \
                                              old_oaiid_tag[3], old_oaiid_tag[4], \
                                              old_oaiid_tag[5])
        for fieldvalue in fieldvalues:
            if fieldvalue.startswith("oai:arXiv.org:"):
                oai_id = fieldvalue
                break
        if oai_id == None:
            sys.stderr.write("No oai id found for record")
            return 0
    queries = ["%s__%s:%s" % (tag, code, oai_id)]
    queries.append("%s__%s:%s" % (old_oaiid_tag[:3], old_oaiid_tag[5], oai_id))
    queries.append("reportnumber:arXiv:%s" % (oai_id.split(":")[-1],))
    for query in queries:
        hits = search_pattern(p=query).tolist()
        # Try different patterns
        if len(hits) == 1:
            return str(hits[0])
    return None
Beispiel #29
0
def populate_cnums():
    """
    Populates table seqSTORE with the cnums present in CONFERENCE records
    """
    # First get all records from conference collection
    conf_records = perform_request_search(cc="Conferences", p="111__g:C*", rg=0)

    for recid in conf_records:
        cnum = record_get_field_value(get_bibrecord(recid), tag="111", ind1="", ind2="", code="g")
        if cnum:
            if not _cnum_exists(cnum):
                _insert_cnum(cnum)
                print "cnum %s from record %s inserted" % (cnum, recid)
Beispiel #30
0
def get_template_data(record):
	from invenio.config import CFG_SITE_URL
	from invenio.bibrecord import record_get_field_value, record_get_field_values

	recid = record_get_field_value(record,'001','','','')
	report_numbers = record_get_field_values('037','_','_','a')
	postfix =''
	if report_numbers:
		postfix = ' '
	queue = "HEP_cor"
	subject = "%s%s(#%s)" % ( ' '.join(report_numbers), postfix, recid)
	content = "Curate record here: %s/record/edit/#state=edit&recid=%s" % ( CFG_SITE_URL, recid)
	return (queue, subject, content)
Beispiel #31
0
def check_records(records):
    collections = {'errat': 'ERRATUM',
                   'addend': 'ADDENDUM',
                   'editor': 'EDITORIAL'}

    for record in records:
        title = record_get_field_value(record, '245', code='a').lower()
        for phrase in collections.iterkeys():
            if phrase in title:
                field = record_get_field_value(record, '980', code='c')
                if field:
                    if field in ['ERRATUM', 'ADDENDUM', 'EDITORIAL']:
                        for position, value in record.iterfield('980__c'):
                            record.amend_field(position, collections[phrase])
                            break
                    else:
                        for position, value in record.iterfield('980__%'):
                            record.add_subfield(position, 'c', collections[phrase])
                            break
                else:
                    for position, value in record.iterfield('980__%'):
                        record.add_subfield(position, 'c', collections[phrase])
                        break
Beispiel #32
0
def perform_get_holdings_information(recid, req, action="borrowal", ln=CFG_SITE_LANG):
    """
    Display all the copies of an item. If the parameter action is 'proposal', display
    appropriate information to the user.

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @param action: Specifies whether the current record is put up to solicit acquisition
    proposals(if "proposal") or not("borrowal").
    @type proposal: string

    @return body(html)
    """
    _ = gettext_set_language(ln)

    if action == "proposal":
        tag = AMZ_BOOK_PUBLICATION_DATE_TAG
        publication_date = record_get_field_value(get_record(recid), tag[:3],
                                                  ind1=tag[3], ind2=tag[4],
                                                  code=tag[5])
        msg = ''
        if publication_date:
            cur_date = datetime.date.today()
            try:
                pub_date = time.strptime(publication_date, '%d %b %Y')
                pub_date = datetime.date(pub_date[0], pub_date[1], pub_date[2])
                if cur_date < pub_date:
                    msg += _("The publication date of this book is %s.") % (publication_date)
                    msg += "<br /><br />"
                else:
                    msg += _("This book has no copies in the library. ")
            except:
                msg += _("This book has no copies in the library. ")

        msg += _("If you think this book is interesting, suggest it and tell us why you consider this \
                  book is important. The library will consider your opinion and if we decide to buy the \
                  book, we will issue a loan for you as soon as it arrives and send it by internal mail.")
        msg += "<br \><br \>"
        msg += _("In case we decide not to buy the book, we will offer you an interlibrary loan")

        body = bc_templates.tmpl_book_proposal_information(recid, msg, ln=ln)
    else:
        holdings_information = db.get_holdings_information(recid, False)
        body = bc_templates.tmpl_holdings_information(recid=recid,
                                            req=req,
                                            holdings_info=holdings_information,
                                            ln=ln)

    return body
Beispiel #33
0
def get_template_data(record):
    """
    Returns template subject and content for:
    a sample ticket

    record is a recstruct
    """

    recid = record_get_field_value(record, tag="001")
    queue = "Test"
    subject = "Test ticket"
    content = "This is a test ticket for record %s." % recid

    return (queue, subject, content)
Beispiel #34
0
    def _next_value(self, recid=None, xml_record=None, start_date=None):
        """
        Returns the next cnum for the given recid

        @param recid: id of the record where the cnum will be generated
        @type recid: int

        @param xml_record: record in xml format
        @type xml_record: string

        @param start_date: use given start date
        @type start_date: string

        @return: next cnum for the given recid. Format is Cyy-mm-dd.[.1n]
        @rtype: string

        @raises ConferenceNoStartDateError: No date information found in the
        given recid
        """
        bibrecord = None
        if recid is None and xml_record is not None:
            bibrecord = create_record(xml_record)[0]
        elif recid is not None:
            bibrecord = get_bibrecord(recid)

        if start_date is None and bibrecord is not None:
            start_date = record_get_field_value(bibrecord,
                                                tag="111",
                                                ind1="",
                                                ind2="",
                                                code="x")

        if not start_date:
            raise ConferenceNoStartDateError

        base_cnum = "C" + start_date[2:]

        record_cnums = self._get_record_cnums(base_cnum)
        if not record_cnums:
            new_cnum = base_cnum
        elif len(record_cnums) == 1:
            new_cnum = base_cnum + '.' + '1'
        else:
            # Get the max current revision, cnums are in format Cyy-mm-dd,
            # Cyy-mm-dd.1, Cyy-mm-dd.2
            highest_revision = max([int(rev[0].split('.')[1]) for rev in record_cnums[1:]])
            new_cnum = base_cnum + '.' + str(highest_revision + 1)

        return new_cnum
Beispiel #35
0
def get_template_data(record):
    from invenio.config import CFG_SITE_URL
    from invenio.bibrecord import record_get_field_value, record_get_field_values

    recid = record_get_field_value(record, "001", "", "", "")
    report_numbers = record_get_field_values("037", "_", "_", "a")
    queue = "Exp"
    subject = "unknown experiment in #%s %s" % (recid, " ".join(report_numbers))
    content = (
        "This unknown experiment: \n\n\
	has appeared in this paper. Please create a record in Experiments and update the paper at\
	 %s/record/edit/%s"
        % (CFG_SITE_URL, recid)
    )
    return (queue, subject, content)
def output_record(data, tag_list, url=""):
    out = []
    for tag_struct in tag_list:
        tag = tag_struct[:3]
        ind1 = tag_struct[3:4]
        ind2 = tag_struct[4:5]
        if tag.startswith("00"):
            values = record_get_field_value(data, tag)
        else:
            values = record_get_field_values(data, tag, ind1=ind1, ind2=ind2, code="%")
        if url != '' and tag == '001':
            out.append("%s: %s (%s/record/%s/export/hm)\n" % (tag, str(values), url, values))
        else:
            out.append("%s: %s\n" % (tag, str(values)))
    out.append("\n")
    return "".join(out)
def parse_noresultfile(data, recid_patterns=(re_original_id,), sysno_patterns=None):
    """
    This function will look for the original recid in 001 and any matching recids
    from given regular expression patterns in the textmarc format of given record.

    Returns a list of BibRec structure with found recids for original and matching records.
    """
    record_pairs = []
    sysno_gen = get_sysno_generator()
    options = {'text-marc':1, 'aleph-marc':0}
    for match in data:
        original_record_bibrec = create_records(match)[0][0]
        rec_id = record_get_field_value(original_record_bibrec, '001')
        sysno = sysno_gen.next()
        original_record_marc = create_marc_record(original_record_bibrec, sysno, options)
        matching_result_recids = []
        for pattern in recid_patterns:
            matches = pattern.findall(original_record_marc)
            for match in matches:
                if type(match) is tuple:
                    for res in match:
                        if res != "":
                            matching_result_recids = [res]
                            break
                elif type(match) is str:
                    matching_result_recids = [match]
                    break
            if len(matching_result_recids) > 0:
                break
        matching_result_sysnos = []
        for pattern in sysno_patterns:
            matches = pattern.findall(original_record_marc)
            for match in matches:
                if type(match) is tuple:
                    for res in match:
                        if res != "":
                            matching_result_sysnos = [res]
                            break
                elif type(match) is str:
                    matching_result_sysnos = [match]
                    break
            if len(matching_result_sysnos) > 0:
                break

        record_pairs.append((rec_id, matching_result_recids, matching_result_sysnos))
    return record_pairs