def get_as_text(record_id=0, xml_record=None, ln=CFG_SITE_LANG):
    """Return the record in a textual format"""
    _ = gettext_set_language(ln)
    out = ""
    if record_id != 0:
        rec_in_hb = format_record(record_id, of="hb")
    elif xml_record:
        rec_in_hb = format_record(0, of="hb", xml_record=xml_record)
    rec_in_hb = rec_in_hb.replace('\n', ' ')
    htparser = RecordHTMLParser()
    try:
        htparser.feed(rec_in_hb)
        htparser.close()
        out = htparser.result
    except:
        out = remove_html_markup(rec_in_hb)

    # Remove trailing whitespace and linefeeds
    out = out.strip('\n').strip()
    # Merge consecutive whitespaces. Must be done here, once all HTML
    # tags have been removed
    out = whitespaces_pattern.sub(' ', out)
    # Now consider non-breakable spaces
    out = out.replace(' ', ' ')
    out = re.sub(r"[\-:]?\s*%s\s*[\-:]?" % _("Detailed record"), "", out)
    out = re.sub(r"[\-:]?\s*%s\s*[\-:]?" % _("Similar records"), "", out)
    out = re.sub(r"[\-:]?\s*%s\s*[\-:]?" % _("Cited by"), "", out)
    return out.strip()
def _get_hepnames_data_fallback(bibauthorid_data, person_id):
    '''
    Returns  hepnames data
    @param bibauthorid_data: dict with 'is_baid':bool, 'cid':canonicalID, 'pid':personid
    '''
    cid = str(person_id)
    hepdict = {}
    if bibauthorid_data['cid']:
        cid = bibauthorid_data['cid']
    hepRecord = perform_request_search(rg=0, cc='HepNames', p=cid)[:CFG_WEBAUTHORPROFILE_MAX_HEP_CHOICES]

    hepdict['cid'] = cid
    hepdict['pid'] = person_id

    if not hepRecord or len(hepRecord) > 1:
        #present choice dialog with alternatives?
        names_dict = get_person_names_dicts(person_id)
        dbnames = names_dict[0]['db_names_dict'].keys()
        query = ' or '.join(['"%s"' % str(n) for n in dbnames])
        additional_records = perform_request_search(rg=0, cc='HepNames', p=query)[:CFG_WEBAUTHORPROFILE_MAX_HEP_CHOICES]
        hepRecord += additional_records
        hepdict['HaveHep'] = False
        hepdict['HaveChoices'] = bool(hepRecord)
        #limits possible choiches!
        hepdict['HepChoices'] = [(format_record(x, 'hb'), x) for x in hepRecord ]
        hepdict['heprecord'] = hepRecord
        hepdict['bd'] = bibauthorid_data
    else:
        #show the heprecord we just found.
        hepdict['HaveHep'] = True
        hepdict['HaveChoices'] = False
        hepdict['heprecord'] = format_record(hepRecord[0], 'hd')
        hepdict['bd'] = bibauthorid_data
    return hepdict
def _get_hepnames_data_fallback(bibauthorid_data, person_id):
    '''
    Returns  hepnames data
    @param bibauthorid_data: dict with 'is_baid':bool, 'cid':canonicalID, 'pid':personid
    '''
    cid = str(person_id)
    hepdict = {}
    if bibauthorid_data['cid']:
        cid = bibauthorid_data['cid']
    hepRecord = perform_request_search(rg=0, cc='HepNames', p=cid)[:CFG_WEBAUTHORPROFILE_MAX_HEP_CHOICES]

    hepdict['cid'] = cid
    hepdict['pid'] = person_id

    if not hepRecord or len(hepRecord) > 1:
        #present choice dialog with alternatives?
        names_dict = get_person_names_dicts(person_id)
        dbnames = names_dict[0]['db_names_dict'].keys()
        query = ' or '.join(['"%s"' % str(n) for n in dbnames])
        additional_records = perform_request_search(rg=0, cc='HepNames', p=query)[:CFG_WEBAUTHORPROFILE_MAX_HEP_CHOICES]
        hepRecord += additional_records
        hepdict['HaveHep'] = False
        hepdict['HaveChoices'] = bool(hepRecord)
        #limits possible choiches!
        hepdict['HepChoices'] = [(format_record(x, 'hb'), x) for x in hepRecord ]
        hepdict['heprecord'] = hepRecord
        hepdict['bd'] = bibauthorid_data
    else:
        #show the heprecord we just found.
        hepdict['HaveHep'] = True
        hepdict['HaveChoices'] = False
        hepdict['heprecord'] = format_record(hepRecord[0], 'hd')
        hepdict['bd'] = bibauthorid_data
    return hepdict
Example #4
0
def get_as_text(record_id=0, xml_record=None, ln=CFG_SITE_LANG):
    """Return the record in a textual format"""
    _ = gettext_set_language(ln)
    out = ""
    if record_id != 0:
        rec_in_hb = format_record(record_id, of="hb")
    elif xml_record:
        rec_in_hb = format_record(0, of="hb", xml_record=xml_record)
    rec_in_hb = rec_in_hb.replace("\n", " ")
    htparser = RecordHTMLParser()
    try:
        htparser.feed(rec_in_hb)
        htparser.close()
        out = htparser.result
    except:
        out = remove_html_markup(rec_in_hb)

    # Remove trailing whitespace and linefeeds
    out = out.strip("\n").strip()
    # Merge consecutive whitespaces. Must be done here, once all HTML
    # tags have been removed
    out = whitespaces_pattern.sub(" ", out)
    # Now consider non-breakable spaces
    out = out.replace(" ", " ")
    out = re.sub(r"[\-:]?\s*%s\s*[\-:]?" % _("Detailed record"), "", out)
    out = re.sub(r"[\-:]?\s*%s\s*[\-:]?" % _("Similar records"), "", out)
    out = re.sub(r"[\-:]?\s*%s\s*[\-:]?" % _("Cited by"), "", out)
    return out.strip()
Example #5
0
def _get_formated_record(record_id, output_format, update_commands, language, outputTags=""):
    """Returns a record in a given format

    @param record_id: the ID of record to format
    @param output_format: an output format code (or short identifier for the output format)
    @param update_commands: list of commands used to update record contents
    @param language: the language to use to format the record
    """
    updated_record = _get_updated_record(record_id, update_commands)

    xml_record = bibrecord.record_xml_output(updated_record)

    if "hm" == output_format:
        result = "<pre>\n"
        marc_record = _create_marc(xml_record)
        if "All tags" not in outputTags or not outputTags:
            for line in marc_record.split('\n')[:-1]:
                for tag in outputTags:
                    if tag in line.split()[0]:
                        result += "%09d " % record_id + line.strip() + '\n'
        else:
            for line in marc_record.split('\n')[:-1]:
                result += "%09d " % record_id + line.strip() + '\n'

        result += "</pre>"
        return result

    result = bibformat.format_record(recID=None,
                                     of=output_format,
                                     xml_record=xml_record,
                                     ln=language)
    return result
    def tmpl_get_latest_linkbacks(self, latest_linkbacks, ln):
        """
        Display approved latest added linkbacks to display
        @param latest_linkbacks: a list of lists of linkbacks
        """
        result = ''

        for i in range(len(latest_linkbacks)):
            day_group = latest_linkbacks[i]

            date = day_group[0][6]
            date_day_month = convert_datetext_to_dategui(str(date))[:6]

            result += self.tmpl_heading(date_day_month)
            for j in range(len(day_group)):
                current_linkback = day_group[j]
                link_type = current_linkback[4]
                url = str(current_linkback[1])
                recordid = current_linkback[2]
                result += '<font class="rankscoreinfo"><a>(%s)&nbsp;</a></font>' % link_type
                result += '<small>'
                result += '<a href="%s">%s</a> links to ' % (cgi.escape(url), cgi.escape(get_url_title(url)))
                result += format_record(recID=recordid, of='hs', ln=ln)
                result += '</small>'
                result += '<br>'
            result += '<br>'
        return result
    def tmpl_get_latest_linkbacks(self, latest_linkbacks, ln):
        """
        Display approved latest added linkbacks to display
        @param latest_linkbacks: a list of lists of linkbacks
        """
        result = ''

        for i in range(len(latest_linkbacks)):
            day_group = latest_linkbacks[i]

            date = day_group[0][6]
            date_day_month = convert_datetext_to_dategui(str(date))[:6]

            result += self.tmpl_heading(date_day_month)
            for j in range(len(day_group)):
                current_linkback = day_group[j]
                link_type = current_linkback[4]
                url = str(current_linkback[1])
                recordid = current_linkback[2]
                result += '<font class="rankscoreinfo"><a>(%s)&nbsp;</a></font>' % link_type
                result += '<small>'
                result += '<a href="%s">%s</a> links to ' % (
                    cgi.escape(url), cgi.escape(get_url_title(url)))
                result += format_record(recID=recordid, of='hs', ln=ln)
                result += '</small>'
                result += '<br>'
            result += '<br>'
        return result
    def test_validate_xml_against_xsd(self):
        """
        Validate generated DataCite XML for all public records
        """
        from invenio.websearch_model import Collection
        from invenio.bibformat import format_record
        from invenio.bibfield import get_record

        etree.clear_error_log()

        for recid in Collection.query.filter_by(name='zenodo').first().reclist:
            try:
                xml = None
                record = get_record(recid)
                for identifier in record.get('related_identifiers', []):
                    if identifier['scheme'] != identifier['scheme'].lower():
                        raise Exception("Record %s has problem with upper-case scheme %s" % (recid, identifier['scheme']))
                if record.get('doi', None):
                    xml = StringIO(format_record(recid, 'dcite'))
                    xml_doc = etree.parse(xml)
                    self.schema.assertValid(xml_doc)
            except Exception, e:
                print recid
                if xml:
                    print xml.getvalue()
                raise e
Example #9
0
def openaire_register_doi(recid):
    """
    Register a DOI for new publication

    If it fails, it will retry every 10 minutes for 1 hour.
    """
    doi_val = get_fieldvalues(recid, "0247_a")[0]
    logger.debug("Found DOI %s in record %s" % (doi_val, recid))

    pid = PersistentIdentifier.get("doi", doi_val)
    if not pid:
        logger.debug("DOI not locally managed.")
        return
    else:
        logger.debug("DOI locally managed.")

    if not pid.has_object("rec", recid):
        raise Exception("DOI %s is not assigned to record %s." % (doi_val, recid))

    if pid.is_new() or pid.is_reserved():
        logger.info("Registering DOI %s for record %s" % (doi_val, recid))

        url = "%s/record/%s" % (CFG_DATACITE_SITE_URL, recid)
        doc = format_record(recid, 'dcite')

        if not pid.register(url=url, doc=doc):
            m = "Failed to register DOI %s" % doi_val
            logger.error(m + "\n%s\n%s" % (url, doc))
            if not openaire_register_doi.request.is_eager:
                raise openaire_register_doi.retry(exc=Exception(m))
        else:
            logger.info("Successfully registered DOI %s." % doi_val)
Example #10
0
    def test_validate_xml_against_xsd(self):
        """
        Validate generated DataCite XML for all public records
        """
        from invenio.websearch_model import Collection
        from invenio.bibformat import format_record
        from invenio.bibfield import get_record

        etree.clear_error_log()

        for recid in Collection.query.filter_by(name='zenodo').first().reclist:
            try:
                xml = None
                record = get_record(recid)
                for identifier in record.get('related_identifiers', []):
                    if identifier['scheme'] != identifier['scheme'].lower():
                        raise Exception(
                            "Record %s has problem with upper-case scheme %s" %
                            (recid, identifier['scheme']))
                if record.get('doi', None):
                    xml = StringIO(format_record(recid, 'dcite'))
                    xml_doc = etree.parse(xml)
                    self.schema.assertValid(xml_doc)
            except Exception, e:
                print recid
                if xml:
                    print xml.getvalue()
                raise e
def _get_formated_record(record_id, output_format, update_commands, language, outputTags=""):
    """Returns a record in a given format

    @param record_id: the ID of record to format
    @param output_format: an output format code (or short identifier for the output format)
    @param update_commands: list of commands used to update record contents
    @param language: the language to use to format the record
    """
    updated_record = _get_updated_record(record_id, update_commands)
    xml_record = bibrecord.record_xml_output(updated_record)

    old_record = search_engine.get_record(recid=record_id)
    if "hm" == output_format:
        result = "<pre>\n"
        if "All tags" not in outputTags or not outputTags:
            diff_result = _get_record_diff(record_id, old_record, updated_record)
            for line in diff_result.split('\n')[:-1]:
                for tag in outputTags:
                    if tag in line.split()[1]:
                        result += line.strip() + '\n'
                    elif '<strong' in line:
                        if tag in line.split()[3]:
                            result += line.strip() + '\n'
        else:
            result += _get_record_diff(record_id, old_record, updated_record)

        result += "</pre>"
        return result

    result = bibformat.format_record(recID=None,
                                     of=output_format,
                                     xml_record=xml_record,
                                     ln=language)
    return result
Example #12
0
def _get_formated_record(record_id, output_format, update_commands, language, outputTags=""):
    """Returns a record in a given format

    @param record_id: the ID of record to format
    @param output_format: an output format code (or short identifier for the output format)
    @param update_commands: list of commands used to update record contents
    @param language: the language to use to format the record
    """
    updated_record = _get_updated_record(record_id, update_commands)
    xml_record = bibrecord.record_xml_output(updated_record)

    old_record = search_engine.get_record(recid=record_id)
    if "hm" == output_format:
        result = "<pre>\n"
        if "All tags" not in outputTags or not outputTags:
            diff_result = _get_record_diff(record_id, old_record, updated_record)
            for line in diff_result.split('\n')[:-1]:
                for tag in outputTags:
                    if tag in line.split()[1]:
                        result += line.strip() + '\n'
        else:
            result += _get_record_diff(record_id, old_record, updated_record)

        result += "</pre>"
        return result

    result = bibformat.format_record(recID=None,
                                     of=output_format,
                                     xml_record=xml_record,
                                     ln=language)
    return result
Example #13
0
def main(argv):

    recID=0
    opts,pargs=getopt.getopt(argv,'di:')
    verbose = False
    for opt, arg in opts:

        if opt == '-i':
            recID=arg
        if opt == '-d':
            verbose = True

    result=format_record(recID=recID,of='xm')

    if result:
	    #change the result to MARC by applying a template
            if verbose:
                print result
                raw_input("go on?")
	    result = bibconvert_xslt_engine.convert(result, "marcxmltoplain.xsl")
            #call a sub that changes the stuff to editable form, calls editor,
	    #returns a string
	    new = convert_edit(result)
	    newr = to_marc(new)
            if verbose:
                #debug
                f=open('/tmp/debug', 'w')
                f.write(new)
                f.write(newr)
                f.close()
                print newr
	    if upper(raw_input("Save to DB Y/N:")) =='Y':
        	 recs=xml_marc_to_records(''.join(newr))
	         response=bibupload(recs[0],opt_mode='replace')
	         if response[0]:print "Error updating record: "+response[0]
Example #14
0
def openaire_register_doi(recid):
    """
    Register a DOI for new publication

    If it fails, it will retry every 10 minutes for 1 hour.
    """
    doi_val = get_fieldvalues(recid, "0247_a")[0]
    logger.debug("Found DOI %s in record %s" % (doi_val, recid))

    pid = PersistentIdentifier.get("doi", doi_val)
    if not pid:
        logger.debug("DOI not locally managed.")
        return
    else:
        logger.debug("DOI locally managed.")

    if not pid.has_object("rec", recid):
        raise Exception("DOI %s is not assigned to record %s." %
                        (doi_val, recid))

    if pid.is_new() or pid.is_reserved():
        logger.info("Registering DOI %s for record %s" % (doi_val, recid))

        url = "%s/record/%s" % (CFG_DATACITE_SITE_URL, recid)
        doc = format_record(recid, 'dcite')

        if not pid.register(url=url, doc=doc):
            m = "Failed to register DOI %s" % doi_val
            logger.error(m + "\n%s\n%s" % (url, doc))
            if not openaire_register_doi.request.is_eager:
                raise openaire_register_doi.retry(exc=Exception(m))
        else:
            logger.info("Successfully registered DOI %s." % doi_val)
Example #15
0
    def create_latest_additions_info(self, rg=CFG_WEBSEARCH_INSTANT_BROWSE, ln=CFG_SITE_LANG):
        """
        Create info about latest additions that will be used for
        create_instant_browse() later.
        """
        self.latest_additions_info = []
        if self.nbrecs and self.reclist:
            # firstly, get last 'rg' records:
            recIDs = list(self.reclist)

            # FIXME: temporary hack in order to display tweaked latest
            # additions box for some CERN collections:
            if CFG_CERN_SITE:
                this_year = time.strftime("%Y", time.localtime())
                if self.name in ['CERN Yellow Reports']:
                    last_year = str(int(this_year) - 1)
                    # detect recIDs only from this and past year:
                    recIDs = list(self.reclist & \
                                  search_pattern(p='year:%s or year:%s' % \
                                                 (this_year, last_year)))
                elif self.name in ['Videos']:
                    # detect recIDs only from this year:
                    recIDs = list(self.reclist & \
                                  search_pattern(p='year:%s' % this_year))

            total = len(recIDs)
            to_display = min(rg, total)

            for idx in range(total-1, total-to_display-1, -1):
                recid = recIDs[idx]
                self.latest_additions_info.append({'id': recid,
                                                   'format': format_record(recid, "hb", ln=ln),
                                                   'date': get_creation_date(recid, fmt="%Y-%m-%d<br />%H:%i")})
        return
Example #16
0
def main(argv):

    recID=0
    opts,pargs=getopt.getopt(argv,'di:')
    verbose = False
    for opt, arg in opts:

        if opt == '-i':
            recID=arg
        if opt == '-d':
            verbose = True

    result=format_record(recID=recID,of='xm')

    if result:
	    #change the result to MARC by applying a template
            if verbose:
                print result
                raw_input("go on?")
	    result = bibconvert_xslt_engine.convert(result, "marcxmltoplain.xsl")
            #call a sub that changes the stuff to editable form, calls editor,
	    #returns a string
	    new = convert_edit(result)
	    newr = to_marc(new)
            if verbose:
                #debug
                f=open('/tmp/debug', 'w')
                f.write(new)
                f.write(newr)
                f.close()
                print newr
	    if upper(raw_input("Save to DB Y/N:")) =='Y':
        	 recs=xml_marc_to_records(''.join(newr))
	         response=bibupload(recs[0],opt_mode='replace')
	         if response[0]:print "Error updating record: "+response[0]
def _get_formated_record(record_id, output_format, update_commands, language):
    """Returns a record in a given format

    @param record_id: the ID of record to format
    @param output_format: an output format code (or short identifier for the output format)
    @param update_commands: list of commands used to update record contents
    @param language: the language to use to format the record
    """
    updated_record = _get_updated_record(record_id, update_commands)

    xml_record = bibrecord.record_xml_output(updated_record)


    # FIXME: Remove this as soon as the formatting for MARC is
    # implemented in bibformat
    if "hm" == output_format:
        result = _create_marc(xml_record)
        return result


    result = bibformat.format_record(recID=None,
                                     of=output_format,
                                     xml_record=xml_record,
                                     ln=language)
    return result
def _get_person_names_dicts_fallback(person_id):
    '''
    Returns a dict with longest name, normalized names variations and db names variations.
    @param person_id: int personid
    @return [dict{},bool up_to_date]
    '''
    p = perform_request_search(rg=0, p='exactauthor:"%s"' % person_id)
    pcount = len(p)
    if p:
        formatted = format_record(p[0], 'XM')
        try:
            s = formatted.lower().index(person_id.lower())
            person_id = formatted[s:s + len(person_id)]
        except (IndexError, ValueError):
            pass
    return {
        'longest': person_id,
        'names_dict': {
            person_id: pcount
        },
        'db_names_dict': {
            person_id: pcount
        },
        'names_to_records': {
            person_id: p
        }
    }
Example #19
0
def iterate_over_new(list, fmt):
    """
    Iterate over list of IDs

    @param list: the list of record IDs to format
    @param fmt: the output format to use
    @return: tuple (total number of records, time taken to format, time taken to insert)
    """
    global total_rec

    formatted_records = ''      # (string-)List of formatted record of an iteration
    tbibformat  = 0     # time taken up by external call
    tbibupload  = 0     # time taken up by external call
    start_date = task_get_task_param('task_starting_time') # Time at which the record was formatted

    tot = len(list)
    count = 0
    for recID in list:
        t1 = os.times()[4]
        start_date = time.strftime('%Y-%m-%d %H:%M:%S')
        formatted_record = zlib.compress(format_record(recID, fmt, on_the_fly=True))
        run_sql('REPLACE LOW_PRIORITY INTO bibfmt (id_bibrec, format, last_updated, value) VALUES (%s, %s, %s, %s)',
                (recID, fmt, start_date, formatted_record))
        t2 = os.times()[4]
        tbibformat += (t2 - t1)
        count += 1
        if (count % 100) == 0:
            write_message("   ... formatted %s records out of %s" % (count, tot))
            task_update_progress('Formatted %s out of %s' % (count, tot))
            task_sleep_now_if_required(can_stop_too=True)
    if (tot % 100) != 0:
        write_message("   ... formatted %s records out of %s" % (count, tot))
    return (tot, tbibformat, tbibupload)
Example #20
0
    def parse_and_extract_records(self, of='hb'):
        """Parse the buffer and return a list of the recids and a
        dictionary with key:value pairs like the following
        recid:formated record with the selected output format"""

        # the patterns :
        # separate the records from one another
        record_pat = re.compile(r'(<record.*?>.*?</record>)',
                                re.DOTALL + re.MULTILINE + re.IGNORECASE)
        # extract the recid
        recid_pat = re.compile(
            r'<controlfield tag="001">([0-9]+?)</controlfield>',
            re.DOTALL + re.MULTILINE + re.IGNORECASE)

        if not of:
            of = 'hb'

        try:
            results = record_pat.finditer(self.buffer)
            records = {}
            recids = []
            for result in results:
                xml_record = result.group(1)
                recid = recid_pat.search(xml_record).group(1)
                recids.append(recid)
                if of != 'xm':
                    records[recid] = format_record(None,
                                                   of,
                                                   xml_record=xml_record)
                elif of == 'xm':
                    records[recid] = xml_record
            return (recids, records)
        except AttributeError:
            # in case there were no results found an Attribute error is raised
            return ([], {})
Example #21
0
def iterate_over_new(list, fmt):
    "Iterate over list of IDs"
    global total_rec

    formatted_records = ''      # (string-)List of formatted record of an iteration
    tbibformat  = 0     # time taken up by external call
    tbibupload  = 0     # time taken up by external call
    start_date = task_get_task_param('task_starting_time') # Time at which the record was formatted

    tot = len(list)
    count = 0
    for recID in list:
        t1 = os.times()[4]
        start_date = time.strftime('%Y-%m-%d %H:%M:%S')
        formatted_record = zlib.compress(format_record(recID, fmt, on_the_fly=True))
        if run_sql('SELECT id FROM bibfmt WHERE id_bibrec=%s AND format=%s', (recID, fmt)):
            run_sql('UPDATE bibfmt SET last_updated=%s, value=%s WHERE id_bibrec=%s AND format=%s', (start_date, formatted_record, recID, fmt))
        else:
            run_sql('INSERT INTO bibfmt(id_bibrec, format, last_updated, value) VALUES(%s, %s, %s, %s)', (recID, fmt, start_date, formatted_record))
        t2 = os.times()[4]
        tbibformat += (t2 - t1)
        count += 1
        if (count % 100) == 0:
            write_message("   ... formatted %s records out of %s" % (count, tot))
            task_update_progress('Formatted %s out of %s' % (count, tot))
            task_sleep_now_if_required(can_stop_too=True)
    if (tot % 100) != 0:
        write_message("   ... formatted %s records out of %s" % (count, tot))
    return (tot, tbibformat, tbibupload)
    def parse_and_extract_records(self, of='hb'):
        """Parse the buffer and return a list of the recids and a
        dictionary with key:value pairs like the following
        recid:formated record with the selected output format"""

        # the patterns :
        # separate the records from one another
        record_pat = re.compile(r'(<record.*?>.*?</record>)', re.DOTALL + re.MULTILINE + re.IGNORECASE)
        # extract the recid
        recid_pat = re.compile(r'<controlfield tag="001">([0-9]+?)</controlfield>', re.DOTALL + re.MULTILINE + re.IGNORECASE)

        if not of:
            of='hb'

        try:
            results = record_pat.finditer(self.buffer)
            records = {}
            recids = []
            for result in results:
                xml_record = result.group(1)
                recid = recid_pat.search(xml_record).group(1)
                recids.append(recid)
                if of != 'xm':
                    records[recid] = format_record(None, of, xml_record=xml_record)
                elif of == 'xm':
                    records[recid] = xml_record
            return (recids, records)
        except AttributeError:
            # in case there were no results found an Attribute error is raised
            return ([], {})
    def create_latest_additions_info(self, rg=CFG_WEBSEARCH_INSTANT_BROWSE, ln=CFG_SITE_LANG):
        """
        Create info about latest additions that will be used for
        create_instant_browse() later.
        """
        self.latest_additions_info = []
        if self.nbrecs and self.reclist:
            # firstly, get last 'rg' records:
            recIDs = list(self.reclist)
            of = 'hb'
            # CERN hack begins: tweak latest additions for selected collections:
            if CFG_CERN_SITE:
                # alter recIDs list for some CERN collections:
                this_year = time.strftime("%Y", time.localtime())
                if self.name in ['CERN Yellow Reports','Videos']:
                    last_year = str(int(this_year) - 1)
                    # detect recIDs only from this and past year:
                    recIDs = list(self.reclist & \
                                  search_pattern_parenthesised(p='year:%s or year:%s' % \
                                                 (this_year, last_year)))
                elif self.name in ['VideosXXX']:
                    # detect recIDs only from this year:
                    recIDs = list(self.reclist & \
                                  search_pattern_parenthesised(p='year:%s' % this_year))
                elif self.name == 'CMS Physics Analysis Summaries' and \
                         1281585 in self.reclist:
                    # REALLY, REALLY temporary hack
                    recIDs = list(self.reclist)
                    recIDs.remove(1281585)
                # apply special filters:
                if self.name in ['Videos']:
                    # select only videos with movies:
                    recIDs = list(intbitset(recIDs) & \
                                  search_pattern_parenthesised(p='collection:"PUBLVIDEOMOVIE"'))
                    of = 'hvp'
                # sort some CERN collections specially:
                if self.name in ['Videos',
                                 'Video Clips',
                                 'Video Movies',
                                 'Video News',
                                 'Video Rushes',
                                 'Webcast',
                                 'ATLAS Videos',
                                 'Restricted Video Movies',
                                 'Restricted Video Rushes',
                                 'LHC First Beam Videos',
                                 'CERN openlab Videos']:
                    recIDs = sort_records(None, recIDs, '269__c')
            # CERN hack ends.

            total = len(recIDs)
            to_display = min(rg, total)

            for idx in range(total-1, total-to_display-1, -1):
                recid = recIDs[idx]
                self.latest_additions_info.append({'id': recid,
                                                   'format': format_record(recid, of, ln=ln),
                                                   'date': get_creation_date(recid, fmt="%Y-%m-%d<br />%H:%i")})
        return
Example #24
0
def _get_formated_record(record_id,
                         output_format,
                         update_commands,
                         language,
                         outputTags="",
                         run_diff=True,
                         checked=True):
    """Returns a record in a given format

    @param record_id: the ID of record to format
    @param output_format: an output format code (or short identifier for the output format)
    @param update_commands: list of commands used to update record contents
    @param language: the language to use to format the record
    @param run_diff: determines if we want to run _get_recodr_diff function, which sometimes takes too much time
    """
    if update_commands and checked:
        # Modify the bibrecord object with the appropriate actions
        updated_record = _get_updated_record(record_id, update_commands)

    textmarc_options = {
        "aleph-marc": 0,
        "correct-mode": 1,
        "append-mode": 0,
        "delete-mode": 0,
        "insert-mode": 0,
        "replace-mode": 0,
        "text-marc": 1
    }

    old_record = search_engine.get_record(recid=record_id)
    old_record_textmarc = xmlmarc2textmarc.create_marc_record(
        old_record, sysno="", options=textmarc_options)
    if "hm" == output_format:
        if update_commands and run_diff and checked:
            updated_record_textmarc = xmlmarc2textmarc.create_marc_record(
                updated_record, sysno="", options=textmarc_options)
            result = _get_record_diff(old_record_textmarc,
                                      updated_record_textmarc, outputTags,
                                      record_id)
        else:
            filter_tags = "All tags" not in outputTags and outputTags
            result = ['<pre>']
            for line in old_record_textmarc.splitlines()[:-1]:
                if not filter_tags or line.split()[0].replace(
                        '_', '') in outputTags:
                    result.append("%09d " % record_id + line.strip())
            result.append('</pre>')
            result = '\n'.join(result)
    else:
        if update_commands and checked:
            # No coloring of modifications in this case
            xml_record = bibrecord.record_xml_output(updated_record)
        else:
            xml_record = bibrecord.record_xml_output(old_record)
        result = bibformat.format_record(recID=None,
                                         of=output_format,
                                         xml_record=xml_record,
                                         ln=language)
    return result
    def create_latest_additions_info(self, rg=CFG_WEBSEARCH_INSTANT_BROWSE, ln=CFG_SITE_LANG):
        """
        Create info about latest additions that will be used for
        create_instant_browse() later.
        """
        self.latest_additions_info = []
        if self.nbrecs and self.reclist:
            # firstly, get last 'rg' records:
            recIDs = list(self.reclist)

            # CERN hack begins: tweak latest additions for selected collections:
            if CFG_CERN_SITE:
                # alter recIDs list for some CERN collections:
                this_year = time.strftime("%Y", time.localtime())
                if self.name in ['CERN Yellow Reports','Videos']:
                    last_year = str(int(this_year) - 1)
                    # detect recIDs only from this and past year:
                    recIDs = list(self.reclist & \
                                  search_pattern(p='year:%s or year:%s' % \
                                                 (this_year, last_year)))
                elif self.name in ['VideosXXX']:
                    # detect recIDs only from this year:
                    recIDs = list(self.reclist & \
                                  search_pattern(p='year:%s' % this_year))
                elif self.name == 'CMS Physics Analysis Summaries' and \
                         1281585 in self.reclist:
                    # REALLY, REALLY temporary hack
                    recIDs = list(self.reclist)
                    recIDs.remove(1281585)
                # apply special filters:
                if self.name in ['Videos']:
                    # select only videos with movies:
                    recIDs = list(intbitset(recIDs) & \
                                  search_pattern(p='collection:"PUBLVIDEOMOVIE"'))
                # sort some CERN collections specially:
                if self.name in ['Videos',
                                 'Video Clips',
                                 'Video Movies',
                                 'Video News',
                                 'Video Rushes',
                                 'Webcast',
                                 'ATLAS Videos',
                                 'Restricted Video Movies',
                                 'Restricted Video Rushes',
                                 'LHC First Beam Videos',
                                 'CERN openlab Videos']:
                    recIDs = sort_records(None, recIDs, '269__c')
            # CERN hack ends.

            total = len(recIDs)
            to_display = min(rg, total)

            for idx in range(total-1, total-to_display-1, -1):
                recid = recIDs[idx]
                self.latest_additions_info.append({'id': recid,
                                                   'format': format_record(recid, "hb", ln=ln),
                                                   'date': get_creation_date(recid, fmt="%Y-%m-%d<br />%H:%i")})
        return
Example #26
0
def _get_formated_record(record_id,
                         output_format,
                         update_commands,
                         language,
                         outputTags=""):
    """Returns a record in a given format

    @param record_id: the ID of record to format
    @param output_format: an output format code (or short identifier for the output format)
    @param update_commands: list of commands used to update record contents
    @param language: the language to use to format the record
    """
    if update_commands:
        updated_record = _get_updated_record(record_id, update_commands)

    old_record = search_engine.get_record(recid=record_id)
    xml_record = bibrecord.record_xml_output(old_record)
    if "hm" == output_format:
        result = "<pre>\n"
        if ("All tags" not in outputTags) and outputTags:
            if update_commands:
                marc_record = _get_record_diff(record_id, old_record,
                                               updated_record)
                tag_position = 1
            else:
                marc_record = _create_marc(xml_record)
                tag_position = 0
            for line in marc_record.split('\n')[:-1]:
                if line.split()[tag_position][:3] in outputTags:
                    if update_commands:
                        result += line.strip() + '\n'
                    else:
                        result += "%09d " % record_id + line.strip() + '\n'
                elif '<strong' in line:
                    if line.split()[3][5:8] in outputTags:
                        result += line.strip() + '\n'
        else:
            if update_commands:
                result += _get_record_diff(record_id, old_record,
                                           updated_record)
            else:
                marc_record = _create_marc(xml_record)
                for line in marc_record.split('\n')[:-1]:
                    result += "%09d " % record_id + line.strip() + '\n'

        result += "</pre>"
        return result

    if update_commands:
        xml_record = bibrecord.record_xml_output(updated_record)
    result = bibformat.format_record(recID=None,
                                     of=output_format,
                                     xml_record=xml_record,
                                     ln=language)
    return result
Example #27
0
def _get_record_linking_fields(recid_b, recid_a, tag, ind1, ind2):
    """
    Returns the fields (defined by tag, ind1, ind2) in record (given
    by recid_b) that do not link to another given record (recid_a).
    """
    fields = []
    rec = create_record(format_record(recid_b, "xm"))[0]
    for field_instance in record_get_field_instances(rec, tag=tag, ind1=ind1, ind2=ind2):
        if not ('w', str(recid_a)) in field_instance[0]:
            fields.append(field_instance)
    return fields
Example #28
0
def _entry_data_preview(data, of='default'):
    if format == 'hd' or format == 'xm':
        from invenio.bibformat import format_record
        try:
            data['record'] = format_record(recID=None, of=of,
                                           xml_record=data['record'])
        except:
            print "This is not a XML string"
    try:
        return data['record']
    except:
        return data
def move_drafts_articles_to_ready(journal_name, issue):
    """
    Move draft articles to their final "collection".

    To do so we rely on the convention that an admin-chosen keyword
    must be removed from the metadata
    """
    protected_datafields = ['100', '245', '246', '520', '590', '700']
    keyword_to_remove = get_journal_draft_keyword_to_remove(journal_name)
    collections_to_refresh = {}

    categories = get_journal_categories(journal_name, issue)
    for category in categories:
        articles = get_journal_articles(journal_name, issue, category)
        for order, recids in articles.iteritems():
            for recid in recids:
                record_xml = format_record(recid, of='xm')
                if not record_xml:
                    continue
                new_record_xml_path = os.path.join(CFG_TMPDIR,
                                                   'webjournal_publish_' + \
                                                   str(recid) + '.xml')
                if os.path.exists(new_record_xml_path):
                    # Do not modify twice
                    continue
                record_struc = create_record(record_xml)
                record = record_struc[0]
                new_record = update_draft_record_metadata(
                    record, protected_datafields, keyword_to_remove)
                new_record_xml = print_rec(new_record)
                if new_record_xml.find(keyword_to_remove) >= 0:
                    new_record_xml = new_record_xml.replace(
                        keyword_to_remove, '')
                    # Write to file
                    new_record_xml_file = file(new_record_xml_path, 'w')
                    new_record_xml_file.write(new_record_xml)
                    new_record_xml_file.close()
                    # Submit
                    task_low_level_submission('bibupload', 'WebJournal', '-c',
                                              new_record_xml_path)
                    task_low_level_submission('bibindex', 'WebJournal', '-i',
                                              str(recid))
                    for collection in get_all_collections_of_a_record(recid):
                        collections_to_refresh[collection] = ''

    # Refresh collections
    collections_to_refresh.update([
        (c, '')
        for c in get_journal_collection_to_refresh_on_release(journal_name)
    ])
    for collection in collections_to_refresh.keys():
        task_low_level_submission('webcoll', 'WebJournal', '-f', '-p', '2',
                                  '-c', collection)
def move_drafts_articles_to_ready(journal_name, issue):
    """
    Move draft articles to their final "collection".

    To do so we rely on the convention that an admin-chosen keyword
    must be removed from the metadata
    """
    protected_datafields = ['100', '245', '246', '520', '590', '700']
    keyword_to_remove = get_journal_draft_keyword_to_remove(journal_name)
    collections_to_refresh = {}

    categories = get_journal_categories(journal_name, issue)
    for category in categories:
        articles = get_journal_articles(journal_name, issue, category)
        for order, recids in articles.iteritems():
            for recid in recids:
                record_xml = format_record(recid, of='xm')
                if not record_xml:
                    continue
                new_record_xml_path = os.path.join(CFG_TMPDIR,
                                                   'webjournal_publish_' + \
                                                   str(recid) + '.xml')
                if os.path.exists(new_record_xml_path):
                    # Do not modify twice
                    continue
                record_struc = create_record(record_xml)
                record = record_struc[0]
                new_record = update_draft_record_metadata(record,
                                                          protected_datafields,
                                                          keyword_to_remove)
                new_record_xml = print_rec(new_record)
                if new_record_xml.find(keyword_to_remove) >= 0:
                    new_record_xml = new_record_xml.replace(keyword_to_remove, '')
                    # Write to file
                    new_record_xml_file = file(new_record_xml_path, 'w')
                    new_record_xml_file.write(new_record_xml)
                    new_record_xml_file.close()
                    # Submit
                    task_low_level_submission('bibupload',
                                              'WebJournal',
                                              '-c', new_record_xml_path)
                    task_low_level_submission('bibindex',
                                              'WebJournal',
                                              '-i', str(recid))
                    for collection in get_all_collections_of_a_record(recid):
                        collections_to_refresh[collection] = ''

    # Refresh collections
    collections_to_refresh.update([(c, '') for c in get_journal_collection_to_refresh_on_release(journal_name)])
    for collection in collections_to_refresh.keys():
        task_low_level_submission('webcoll',
                                  'WebJournal',
                                  '-f', '-p', '2','-c', collection)
Example #31
0
def format_element(bfo, reference_prefix, reference_suffix):
    """
    Prints the references of this record

    @param reference_prefix a prefix displayed before each reference
    @param reference_suffix a suffix displayed after each reference
    """

    if reference_prefix == None: reference_prefix = ''
    if reference_suffix == None: reference_suffix = ''

    out = ""
    tableid = 0
    for reference in bfo.fields("999C5", escape=0):
        tableid += 1

        ordinal = reference.get('o', '')
        clean_report = reference.get('r', '')
        clean_journal = reference.get('s', '')
        clean_doi = reference.get('a', '')
        h_key = reference.get('h', '')
        m_key = reference.get('m', '')
        inputid = 'c' + str(tableid)

        format_line = reference_prefix
        # the onfocusout chgcite() js is called in the format_template referenceinp
        ref_out = '<td><input type="text" name="cite" size="35" value="%s" class="cite_search_box" id="%s" onChange="chgcite(this.id)"></td>' % (
            _first_nonempty([clean_report, clean_journal, clean_doi]), inputid)

        recid = _get_unique_recid_for(clean_journal, clean_report, clean_doi)
        if recid:
            ref_out += '<td><small>' + format_record(recid,
                                                     'hs') + '</small></td>'
        else:
            ref_out += '<td><small>%s %s <a href="http://dx.doi.org/%s">%s</a> %s</small></td>' % (
                h_key, m_key, clean_doi, clean_doi, clean_journal)
        #<input id="t%(tableid)s" type="button" onclick="insRow(this.id)" value = "V"> (the previous button for safekeeping)
        format_line = """<table id="t%(tableid)s" ><tr id="tr%(tableid)s"><td>%(ordinal)s</td><td><input id="t%(tableid)s" type="image"  src="/img/add.png" onclick="insertRowAfter(%(tableid)s); return false;" value = "+"></td>%(ref_out)s</tr></table>""" % {
            'tableid': str(tableid),
            'ref_out': ref_out,
            'ordinal': ordinal
        }
        format_line += reference_suffix

        out += format_line

    # In our BFT we will want to have an onSubmit() handler which substitutes
    # every short title for a coden; this makes the data for that available
    #out += '\n<script type="text/javascript">gCODENS = %s</script>\n' % (get_kb_mappings_json('CODEN_MAP'), )
    out += _get_json_dump_of_codens()

    return out
    def test_basic_formatting(self):
        """bibformat - Checking BibFormat API"""
        result = format_record(recID=73,
                               of='hx',
                               ln=CFG_SITE_LANG,
                               verbose=0,
                               search_pattern=[],
                               xml_record=None,
                               user_info=None,
                               on_the_fly=True)

        pageurl = CFG_SITE_URL + '/%s/73?of=hx' % CFG_SITE_RECORD
        result = test_web_page_content(pageurl, expected_text=result)
Example #33
0
def _get_formated_record(record_id, output_format, update_commands, language, outputTags="",
                         checked=True, displayed_records=None):
    """Returns a record in a given format

    @param record_id: the ID of record to format
    @param output_format: an output format code (or short identifier for the output format)
    @param update_commands: list of commands used to update record contents
    @param language: the language to use to format the record
    @param outputTags: the tags to be shown to the user
    @param checked: is the record checked by the user?
    @param displayed_records: records to be displayed on a given page

    @returns: record formated to be displayed or None
    """
    if update_commands and checked:
        # Modify the bibrecord object with the appropriate actions
        updated_record = _get_updated_record(record_id, update_commands)

    textmarc_options = {"aleph-marc":0, "correct-mode":1, "append-mode":0,
                        "delete-mode":0, "insert-mode":0, "replace-mode":0,
                        "text-marc":1}

    if record_id not in displayed_records:
        return

    old_record = search_engine.get_record(recid=record_id)
    old_record_textmarc = xmlmarc2textmarc.create_marc_record(old_record, sysno="", options=textmarc_options)
    if "hm" == output_format:
        if update_commands and checked:
            updated_record_textmarc = xmlmarc2textmarc.create_marc_record(updated_record, sysno="", options=textmarc_options)
            result = _get_record_diff(old_record_textmarc, updated_record_textmarc, outputTags, record_id)
        else:
            filter_tags = "All tags" not in outputTags and outputTags
            result = ['<pre>']
            for line in old_record_textmarc.splitlines():
                if not filter_tags or line.split()[0].replace('_', '') in outputTags:
                    result.append("%09d " % record_id + line.strip())
            result.append('</pre>')
            result = '\n'.join(result)
    else:
        if update_commands and checked:
            # No coloring of modifications in this case
            xml_record = bibrecord.record_xml_output(updated_record)
        else:
            xml_record = bibrecord.record_xml_output(old_record)
        result = bibformat.format_record(recID=None,
                                        of=output_format,
                                        xml_record=xml_record,
                                        ln=language)
    return result
Example #34
0
def _get_record_linking_fields(recid_b, recid_a, tag, ind1, ind2):
    """
    Returns the fields (defined by tag, ind1, ind2) in record (given
    by recid_b) that do not link to another given record (recid_a).
    """
    fields = []
    rec = create_record(format_record(recid_b, "xm"))[0]
    for field_instance in record_get_field_instances(rec,
                                                     tag=tag,
                                                     ind1=ind1,
                                                     ind2=ind2):
        if not ('w', str(recid_a)) in field_instance[0]:
            fields.append(field_instance)
    return fields
    def test_basic_formatting(self):
        """bibformat - Checking BibFormat API"""
        result = format_record(recID=73,
                               of='hx',
                               ln=CFG_SITE_LANG,
                               verbose=0,
                               search_pattern=[],
                               xml_record=None,
                               user_info=None,
                               on_the_fly=True)

        pageurl = CFG_SITE_URL + '/record/73?of=hx'
        result = test_web_page_content(pageurl,
                                       expected_text=result)
def _get_formated_record(record_id, output_format, update_commands, language, outputTags=""):
    """Returns a record in a given format

    @param record_id: the ID of record to format
    @param output_format: an output format code (or short identifier for the output format)
    @param update_commands: list of commands used to update record contents
    @param language: the language to use to format the record
    """
    if update_commands:
        updated_record = _get_updated_record(record_id, update_commands)

    old_record = search_engine.get_record(recid=record_id)
    xml_record = bibrecord.record_xml_output(old_record)
    if "hm" == output_format:
        result = "<pre>\n"
        if ("All tags" not in outputTags) and outputTags:
            if update_commands:
                marc_record = _get_record_diff(record_id, old_record, updated_record)
                tag_position = 1
            else:
                marc_record = _create_marc(xml_record)
                tag_position = 0
            for line in marc_record.split('\n')[:-1]:
                if line.split()[tag_position][:3] in outputTags:
                    if update_commands:
                        result += line.strip() + '\n'
                    else:
                        result += "%09d " % record_id + line.strip() + '\n'
                elif '<strong' in line:
                    if line.split()[3][5:8] in outputTags:
                        result += line.strip() + '\n'
        else:
            if update_commands:
                result += _get_record_diff(record_id, old_record, updated_record)
            else:
                marc_record = _create_marc(xml_record)
                for line in marc_record.split('\n')[:-1]:
                    result += "%09d " % record_id + line.strip() + '\n'

        result += "</pre>"
        return result

    if update_commands:
        xml_record = bibrecord.record_xml_output(updated_record)
    result = bibformat.format_record(recID=None,
                                     of=output_format,
                                     xml_record=xml_record,
                                     ln=language)
    return result
def perform_request_display_linkbacks(status, return_code, ln=CFG_SITE_LANG):
    """
    Display linkbacks
    @param status: of CFG_WEBLINKBACK_STATUS, currently only CFG_WEBLINKBACK_STATUS['PENDING'] is supported
    """
    _ = gettext_set_language(ln)
    if status == CFG_WEBLINKBACK_STATUS['PENDING']:
        linkbacks = get_all_linkbacks(status=status, order=CFG_WEBLINKBACK_ORDER_BY_INSERTION_TIME['DESC'])
        entries = []

        for (linkbackid, origin_url, recid, additional_properties, linkback_type, linkback_status, insert_time) in linkbacks: # pylint: disable=W0612
            moderation_prefix = '<a href="moderatelinkback?action=%%s&linkbackid=%s&ln=%s">%%s</a>' % (linkbackid, ln)
            entries.append((linkback_type,
                            format_record(recID=recid, of='hs', ln=ln),
                            '<a href="%s">%s</a>' % (cgi.escape(origin_url), cgi.escape(get_url_title(origin_url))),
                            convert_datetext_to_dategui(str(insert_time)),
                            moderation_prefix % (CFG_WEBLINKBACK_ADMIN_MODERATION_ACTION['APPROVE'], 'Approve') + " / " + moderation_prefix % (CFG_WEBLINKBACK_ADMIN_MODERATION_ACTION['REJECT'], 'Reject')))

        header = ['Linkback type', 'Record', 'Origin', 'Submitted on', '']

        error_message = ""
        if return_code != CFG_WEBLINKBACK_ACTION_RETURN_CODE['OK']:
            error_message = _("Unknown error")
            if return_code == CFG_WEBLINKBACK_ACTION_RETURN_CODE['INVALID_ACTION']:
                error_message = _("Invalid action")

        error_message_html = ""
        if error_message != "":
            error_message_html = "<dt><b><font color=red>" + error_message + "</font></b></dt>" + "<br>"

        out = """
        <dl>
        %(error_message)s
        <dt>%(heading)s</dt>
        <dd>%(description)s</dd>
        </dl>
        """ % {'heading': _("Pending linkbacks"),
               'description': _("these linkbacks are not visible to users, they must be approved or rejected."),
               'error_message': error_message_html}

        if entries:
            out += tupletotable(header=header, tuple=entries, highlight_rows_p=True,
                                alternate_row_colors_p=True)
        else:
            out += "<i>There are no %s linkbacks.</i>" % status.lower()

        return addadminbox('<b>%s</b>'% _("Reduce the amount of currently pending linkback requests"), [out])
    else:
        return "<i>%s</i>" % _('Currently only pending linkbacks are supported.')
 def record_context():
     return dict(recid=recid,
                 record=record,
                 user=user,
                 tabs=tabs,
                 title=title,
                 get_mini_reviews=lambda *args, **kwargs:
                 get_mini_reviews(*args, **kwargs).decode('utf8'),
                 collection=collection,
                 format_record=lambda recID, of='hb', ln=g.ln:
                 format_record(recID,
                               of=of,
                               ln=ln,
                               verbose=0,
                               search_pattern='',
                               on_the_fly=False))
def _get_person_names_dicts_fallback(person_id):
    '''
    Returns a dict with longest name, normalized names variations and db names variations.
    @param person_id: int personid
    @return [dict{},bool up_to_date]
    '''
    p = perform_request_search(rg=0, p='exactauthor:"%s"' % person_id)
    pcount = len(p)
    if p:
        formatted = format_record(p[0], 'XM')
        try:
            s = formatted.lower().index(person_id.lower())
            person_id = formatted[s:s + len(person_id)]
        except (IndexError, ValueError):
            pass
    return {'longest':person_id, 'names_dict':{person_id:pcount}, 'db_names_dict':{person_id:pcount}}
Example #40
0
def format_element(bfo, reference_prefix, reference_suffix):
    """
    Prints the references of this record

    @param reference_prefix a prefix displayed before each reference
    @param reference_suffix a suffix displayed after each reference
    """

    if reference_prefix == None: reference_prefix = ''
    if reference_suffix == None: reference_suffix = ''

    out = ""
    tableid = 0
    for reference in bfo.fields("999C5", escape=0):
        tableid += 1

        ordinal       = reference.get('o', '')
        clean_report  = reference.get('r', '')
        clean_journal = reference.get('s', '')
        clean_doi     = reference.get('a', '')
        h_key         = reference.get('h', '')
        m_key         = reference.get('m', '')
        inputid = 'c' + str(tableid)

        format_line = reference_prefix
        # the onfocusout chgcite() js is called in the format_template referenceinp
        ref_out = '<td><input type="text" name="cite" size="35" value="%s" class="cite_search_box" id="%s" onChange="chgcite(this.id)"></td>' % (_first_nonempty([clean_report, clean_journal, clean_doi]),inputid)

        recid = _get_unique_recid_for(clean_journal, clean_report, clean_doi)
        if recid:
            ref_out += '<td><small>' + format_record(recid, 'hs') + '</small></td>'
        else:
            ref_out += '<td><small>%s %s <a href="http://dx.doi.org/%s">%s</a> %s</small></td>' % (h_key, m_key, clean_doi, clean_doi, clean_journal)
        #<input id="t%(tableid)s" type="button" onclick="insRow(this.id)" value = "V"> (the previous button for safekeeping)
        format_line = """<table id="t%(tableid)s" ><tr id="tr%(tableid)s"><td>%(ordinal)s</td><td><input id="t%(tableid)s" type="image"  src="/img/add.png" onclick="insertRowAfter(%(tableid)s); return false;" value = "+"></td>%(ref_out)s</tr></table>""" % {'tableid': str(tableid), 'ref_out': ref_out, 'ordinal': ordinal}
        format_line += reference_suffix

        out += format_line

    # In our BFT we will want to have an onSubmit() handler which substitutes
    # every short title for a coden; this makes the data for that available
    #out += '\n<script type="text/javascript">gCODENS = %s</script>\n' % (get_kb_mappings_json('CODEN_MAP'), )
    out += _get_json_dump_of_codens()

    return out
def move_drafts_articles_to_ready(journal_name, issue):
    """
    Move draft articles to their final "collection".

    To do so we rely on the convention that an admin-chosen keyword
    must be removed from the metadata
    """
    protected_datafields = ["100", "245", "246", "520", "590", "700"]
    keyword_to_remove = get_journal_draft_keyword_to_remove(journal_name)
    collections_to_refresh = {}

    categories = get_journal_categories(journal_name, issue)
    for category in categories:
        articles = get_journal_articles(journal_name, issue, category)
        for order, recids in articles.iteritems():
            for recid in recids:
                record_xml = format_record(recid, of="xm")
                if not record_xml:
                    continue
                new_record_xml_path = os.path.join(CFG_TMPDIR, "webjournal_publish_" + str(recid) + ".xml")
                if os.path.exists(new_record_xml_path):
                    # Do not modify twice
                    continue
                record_struc = create_record(record_xml)
                record = record_struc[0]
                new_record = update_draft_record_metadata(record, protected_datafields, keyword_to_remove)
                new_record_xml = print_rec(new_record)
                if new_record_xml.find(keyword_to_remove) >= 0:
                    new_record_xml = new_record_xml.replace(keyword_to_remove, "")
                    # Write to file
                    new_record_xml_file = file(new_record_xml_path, "w")
                    new_record_xml_file.write(new_record_xml)
                    new_record_xml_file.close()
                    # Submit
                    task_low_level_submission("bibupload", "WebJournal", "-c", new_record_xml_path)
                    task_low_level_submission("bibindex", "WebJournal", "-i", str(recid))
                    for collection in get_all_collections_of_a_record(recid):
                        collections_to_refresh[collection] = ""

    # Refresh collections
    collections_to_refresh.update([(c, "") for c in get_journal_collection_to_refresh_on_release(journal_name)])
    for collection in collections_to_refresh.keys():
        task_low_level_submission("webcoll", "WebJournal", "-f", "-p", "2", "-c", collection)
Example #42
0
    def extract(self, req, form):
        """Refrences extraction page

        This page can be used for authors to test their pdfs against our
        refrences extraction process"""
        user_info = collect_user_info(req)

        # Handle the 3 POST parameters
        if 'pdf' in form and form['pdf'].value:
            pdf = form['pdf'].value.strip()
            references_xml = extract_from_pdf_string(pdf)
        elif 'arxiv' in form and form['arxiv'].value:
            url = make_arxiv_url(arxiv_id=form['arxiv'].value.strip())
            references_xml = extract_references_from_url_xml(url)
        elif 'url' in form and form['url'].value:
            url = form['url'].value.strip()
            try:
                references_xml = extract_references_from_url_xml(url)
            except (FullTextNotAvailable, ConnectionError, HTTPError, Timeout):
                references_xml = None
        elif 'txt' in form and form['txt'].value:
            txt = form['txt'].value.decode('utf-8', 'ignore')
            references_xml = extract_references_from_string_xml(txt)
        else:
            references_xml = None

        # If we have not uploaded anything yet
        # Display the form that allows us to do so
        if not references_xml:
            out = docextract_templates.tmpl_web_form()
        else:
            references_html = format_record(0,
                                            'hdref',
                                            xml_record=references_xml,
                                            user_info=user_info)
            out = docextract_templates.tmpl_web_result(references_html)

        # Render the page (including header, footer)
        return page(title='References Extractor',
                    body=out,
                    uid=user_info['uid'],
                    req=req)
Example #43
0
def _get_formated_record(record_id, output_format, update_commands, language, outputTags="", run_diff=True):
    """Returns a record in a given format

    @param record_id: the ID of record to format
    @param output_format: an output format code (or short identifier for the output format)
    @param update_commands: list of commands used to update record contents
    @param language: the language to use to format the record
    @param run_diff: determines if we want to run _get_recodr_diff function, which sometimes takes too much time
    """
    if update_commands:
        # Modify te bibrecord object with the appropriate actions
        updated_record = _get_updated_record(record_id, update_commands)

    textmarc_options = {"aleph-marc":0, "correct-mode":1, "append-mode":0,
                        "delete-mode":0, "insert-mode":0, "replace-mode":0,
                        "text-marc":1}

    old_record = search_engine.get_record(recid=record_id)
    old_record_textmarc = xmlmarc2textmarc.create_marc_record(old_record, sysno="", options=textmarc_options)
    if "hm" == output_format:
        if update_commands and run_diff:
            updated_record_textmarc = xmlmarc2textmarc.create_marc_record(updated_record, sysno="", options=textmarc_options)
            result = _get_record_diff(old_record_textmarc, updated_record_textmarc, outputTags, record_id)
        else:
            filter_tags = "All tags" not in outputTags and outputTags
            result = ['<pre>']
            for line in old_record_textmarc.splitlines()[:-1]:
                if not filter_tags or line.split()[0].replace('_', '') in outputTags:
                    result.append("%09d " % record_id + line.strip())
            result.append('</pre>')
            result = '\n'.join(result)
    else:
        if update_commands:
            # No coloring of modifications in this case
            xml_record = bibrecord.record_xml_output(updated_record)
        else:
            xml_record = bibrecord.record_xml_output(old_record)
        result = bibformat.format_record(recID=None,
                                        of=output_format,
                                        xml_record=xml_record,
                                        ln=language)
    return result
Example #44
0
def send_request_notification_to_all_linkback_moderators(recid, origin_url, linkback_type, ln):
    """
    Send notification emails to all linkback moderators for linkback request
    @param recid
    @param origin_url: URL of the requestor
    @param linkback_type: of CFG_WEBLINKBACK_LIST_TYPE
    """
    content = """There is a new %(linkback_type)s request for %(recordURL)s from %(origin_url)s which you should approve or reject.
              """ % {'linkback_type': linkback_type,
                     'recordURL': generate_redirect_url(recid, ln),
                     'origin_url': origin_url}

    html_content = """There is a new %(linkback_type)s request for %(record)s (<a href="%(recordURL)s">%(recordURL)s</a>) from <a href="%(origin_url)s">%(title)s</a> (<a href="%(origin_url)s">%(origin_url)s</a>) which you should approve or reject.
                   """ % {'linkback_type': linkback_type,
                          'record': format_record(recID=recid, of='hs', ln=ln),
                          'recordURL': generate_redirect_url(recid, ln),
                          'origin_url': origin_url,
                          'title': origin_url}

    for email in acc_get_authorized_emails('moderatelinkbacks', collection = guess_primary_collection_of_a_record(recid)):
        send_email(CFG_SITE_ADMIN_EMAIL, email, 'New ' + linkback_type + ' request', content, html_content)
    def extract(self, req, form):
        """Refrences extraction page

        This page can be used for authors to test their pdfs against our
        refrences extraction process"""
        user_info = collect_user_info(req)

        # Handle the 3 POST parameters
        if 'pdf' in form and form['pdf'].value:
            pdf = form['pdf'].value
            references_xml = extract_from_pdf_string(pdf)
        elif 'arxiv' in form and form['arxiv'].value:
            url = make_arxiv_url(arxiv_id=form['arxiv'].value)
            references_xml = extract_references_from_url_xml(url)
        elif 'url' in form and form['url'].value:
            url = form['url'].value
            references_xml = extract_references_from_url_xml(url)
        elif 'txt' in form and form['txt'].value:
            txt = form['txt'].value.decode('utf-8', 'ignore')
            references_xml = extract_references_from_string_xml(txt)
        else:
            references_xml = None

        # If we have not uploaded anything yet
        # Display the form that allows us to do so
        if not references_xml:
            out = docextract_templates.tmpl_web_form()
        else:
            references_html = format_record(0,
                                           'hdref',
                                            xml_record=references_xml,
                                            user_info=user_info)
            out = docextract_templates.tmpl_web_result(references_html)

        # Render the page (including header, footer)
        return page(title='References Extractor',
                    body=out,
                    uid=user_info['uid'],
                    req=req)
Example #46
0
def iterate_over_new(list, fmt):
    "Iterate over list of IDs"
    global total_rec

    formatted_records = ''  # (string-)List of formatted record of an iteration
    tbibformat = 0  # time taken up by external call
    tbibupload = 0  # time taken up by external call
    start_date = task_get_task_param(
        'task_starting_time')  # Time at which the record was formatted

    tot = len(list)
    count = 0
    for recID in list:
        t1 = os.times()[4]
        start_date = time.strftime('%Y-%m-%d %H:%M:%S')
        formatted_record = zlib.compress(
            format_record(recID, fmt, on_the_fly=True))
        if run_sql('SELECT id FROM bibfmt WHERE id_bibrec=%s AND format=%s',
                   (recID, fmt)):
            run_sql(
                'UPDATE bibfmt SET last_updated=%s, value=%s WHERE id_bibrec=%s AND format=%s',
                (start_date, formatted_record, recID, fmt))
        else:
            run_sql(
                'INSERT INTO bibfmt(id_bibrec, format, last_updated, value) VALUES(%s, %s, %s, %s)',
                (recID, fmt, start_date, formatted_record))
        t2 = os.times()[4]
        tbibformat += (t2 - t1)
        count += 1
        if (count % 100) == 0:
            write_message("   ... formatted %s records out of %s" %
                          (count, tot))
            task_update_progress('Formatted %s out of %s' % (count, tot))
            task_sleep_now_if_required(can_stop_too=True)
    if (tot % 100) != 0:
        write_message("   ... formatted %s records out of %s" % (count, tot))
    return (tot, tbibformat, tbibupload)
Example #47
0
def process_references(references, output_format):
    """
    Process a list of references and convert them to a
    given output_format

    """

    btxt_str = ''  # result string
    for ref in references:
        index = None
        if re.search(r'.*\:\d{4}\w\w\w?', ref):
            index = 'texkey'
        elif re.search(r'.*\/\d{7}', ref):
            index = 'eprint'
        elif re.search(r'\d{4}\.\d{4}', ref):
            index = 'eprint'
        elif re.search(r'\w\.\w+\.\w', ref):
            index = 'j'
            ref = re.sub(r'\.', ',', ref)
        elif re.search(r'\w\-\w', ref):
            index = 'r'
        if index:
            # hack to match more records
            recid_list = ''
            if index == 'texkey':
                p_to_find = '035__z:' + ref
                recid_list = perform_request_search(p=p_to_find)
                if not recid_list:
                    #try 035__a
                    p_to_find = '035__a:' + ref
                    recid_list = perform_request_search(p=p_to_find)
            else:
                p_to_find = 'find ' + index + ' ' + ref
                recid_list = perform_request_search(p=p_to_find)

            if recid_list:
                bfo = BibFormatObject(recid_list[0])
                if (output_format == 'hlxu' or output_format == 'hlxe'
                        or output_format == 'hx'):
                    formated_rec = format_record(recid_list[0], output_format,
                                                 'en')
                    # update bibitem and cite if they don't match
                    if not re.search('bibitem{' + ref + '}', formated_rec):
                        ref = re.sub(',', '.', ref)
                        if output_format != 'hx':
                            #laTeX
                            formated_rec = re.sub('bibitem{(.*)}',
                                                  'bibitem{' + ref + '}',
                                                  formated_rec)
                            formated_rec = re.sub('cite{(.*)}',
                                                  'cite{' + ref + '}',
                                                  formated_rec)
                        else:
                            #bibtex
                            if not re.search(r'\@article\{' + ref + '}',
                                             formated_rec):
                                formated_rec = re.sub(r'\@article\{(.*)\,',
                                                      r'@article{' + ref + ',',
                                                      formated_rec)
                    btxt_str = btxt_str + formated_rec + '\n'
                else:
                    btxt_str = (btxt_str +
                                bfe_INSPIRE_bibtex.format_element(bfo) + '\n')
            else:
                btxt_str = (btxt_str + '*** Not Found: ' + ref + ' ' +
                            p_to_find + '\n\n')

    return btxt_str
Example #48
0
def process_references(references, output_format):
    """
    Process a list of references and convert them to a
    given output_format

    """

    btxt_str = '' # result string
    for ref in references:
        index = None
        if re.search(r'.*\:\d{4}\w\w\w?', ref):
            index = 'texkey'
        elif re.search(r'.*\/\d{7}', ref):
            index = 'eprint'
        elif re.search(r'\d{4}\.\d{4}', ref):
            index = 'eprint'
        elif re.search(r'\w\.\w+\.\w', ref):
            index = 'j'
            ref = re.sub(r'\.', ',', ref)
        elif re.search(r'\w\-\w', ref):
            index = 'r'
        if index:
            # hack to match more records
            recid_list = ''
            if index == 'texkey':
                p_to_find = '035__z:' + ref
                recid_list = perform_request_search(p=p_to_find)
                if not recid_list:
                    #try 035__a
                    p_to_find = '035__a:' + ref
                    recid_list = perform_request_search(p=p_to_find)
            else:
                p_to_find = 'find ' + index + ' ' + ref
                recid_list = perform_request_search(p=p_to_find)

            if recid_list:
                bfo = BibFormatObject(recid_list[0])
                if (output_format == 'hlxu' or
                        output_format == 'hlxe' or
                        output_format == 'hx'):
                    formated_rec = format_record(recid_list[0],
                                    output_format, 'en')
                    # update bibitem and cite if they don't match
                    if not re.search('bibitem{' + ref + '}', formated_rec):
                        ref = re.sub(',', '.', ref)
                        if output_format != 'hx':
                            #laTeX
                            formated_rec = re.sub('bibitem{(.*)}',
                                    'bibitem{' + ref + '}', formated_rec)
                            formated_rec = re.sub('cite{(.*)}',
                                            'cite{' + ref + '}', formated_rec)
                        else:
                            #bibtex
                            if not re.search(r'\@article\{' + ref + '}',
                                              formated_rec):
                                formated_rec = re.sub(r'\@article\{(.*)\,',
                                        r'@article{' + ref + ',', formated_rec)
                    btxt_str = btxt_str + formated_rec + '\n'
                else:
                    btxt_str = (btxt_str +
                                bfe_INSPIRE_bibtex.format_element(bfo) + '\n')
            else:
                btxt_str = (btxt_str + '*** Not Found: ' + ref + ' ' +
                            p_to_find + '\n\n')

    return btxt_str
Example #49
0
    header_body = EscapedXMLString('')
    header_body += X.identifier()(ident)
    header_body += X.datestamp()(get_modification_date(recid))
    for set_spec in get_field(recid, CFG_OAI_SET_FIELD):
        if set_spec and set_spec != CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC:
            # Print only if field not empty
            header_body += X.setSpec()(set_spec)

    header = X.header(status=status)(header_body)

    if verb == 'ListIdentifiers':
        return header
    else:
        if record_exists_result:
            metadata_body = format_record(recid,
                                          CFG_OAI_METADATA_FORMATS[prefix][0])
            metadata = X.metadata(body=metadata_body)
            provenance_body = get_record_provenance(recid)
            if provenance_body:
                provenance = X.about(body=provenance_body)
            else:
                provenance = ''
            rights_body = get_record_rights(recid)
            if rights_body:
                rights = X.about(body=rights_body)
            else:
                rights = ''
        else:
            metadata = ''
            provenance = ''
            rights = ''
Example #50
0
def process_references(references, output_format):
    """
    Process a list of references and convert them to a
    given output_format

    """

    btxt_str = '' # result string
    nfmsg = '*** Not Found with lookup:'
    nsmsg = '*** Non-standard form, no INSPIRE lookup performed ***'
    for ref in references:
        index = None
        if re.search(r'.*\:\d{4}\w\w\w?', ref):
            index = 'texkey'
        elif re.search(r'.*\/\d{7}', ref):
            index = 'eprint'
        elif re.search(r'\d{4}\.\d{4,5}', ref):
            index = 'eprint'
        elif re.search(r'\w\.\w+\.\w', ref):
            index = 'j'
            ref = re.sub(r'\.', ',', ref)
        elif re.search(r'\w\-\w', ref):
            index = 'r'
        if index:
            # hack to match more records
            recid_list = ''
            if index == 'texkey':
                p_to_find = '035__z:' + ref
                recid_list = perform_request_search(p=p_to_find)
                if not recid_list:
                    #try 035__a
                    p_to_find = '035__a:' + ref
                    recid_list = perform_request_search(p=p_to_find)
            else:
                p_to_find = 'find ' + index + ' ' + ref
                recid_list = perform_request_search(p=p_to_find)

            if recid_list:
                bfo = BibFormatObject(recid_list[0])
                if (output_format == 'hlxu' or
                        output_format == 'hlxe' or
                        output_format == 'hx'):
                    formated_rec = format_record(recid_list[0], \
                                                 output_format, 'en')
                    # update bibitem and cite if they don't match
                    if not re.search('bibitem{' + re.escape(ref) + '}', formated_rec):
                        ref = re.sub(',', '.', ref)
                        if output_format != 'hx':
                            #laTeX
                            formated_rec = re.sub('bibitem{(.*)}', \
                                                  'bibitem{' + ref + '}', \
                                                  formated_rec)
                            formated_rec = re.sub('cite{(.*)}', \
                                                  'cite{' + ref + '}', \
                                                  formated_rec)
                        else:
                            #bibtex
                            if not re.search(r'\@article\{' + re.escape(ref) + '}', \
                                             formated_rec):
                                formated_rec = re.sub(r'\@article\{(.*)\,', \
                                                      r'@article{' + ref + ',', \
                                                      formated_rec)
                    btxt_str = btxt_str + formated_rec + '\n'
                else:
                    btxt_str = btxt_str + \
                                bfe_INSPIRE_bibtex.format_element(bfo) + '\n'
            else:
                if output_format == 'hx':
                    btxt_str = btxt_str + \
                               '<div class="%s">\n@MISC{%s,\n\t%s \'%s\'\n}\n</div>\n' \
                               % ('notfound', ref, nfmsg, p_to_find,)
                else:
                    btxt_str = btxt_str + \
                               '<div class="%s">\\bibitem{%s}\n\t%s \'%s\'\n</div>\n' \
                               % ('notfound', ref, nfmsg, p_to_find,)
        else:
            if output_format == 'hx':
                btxt_str = btxt_str + \
                           '<div class="%s">\n@MISC{%s,\n\t%s\n}\n</div>\n' \
                           % ('nonstandard', ref, nsmsg,)
            else:
                btxt_str = btxt_str + \
                           '<div class="%s">\\bibitem{%s}\n\t%s\n</div>\n' \
                           % ('nonstandard', ref, nsmsg,)

    return btxt_str
Example #51
0
def print_record(recid, prefix='marcxml', verb='ListRecords', set_spec=None, set_last_updated=None):
    """Prints record 'recid' formatted according to 'prefix'.

    - if record does not exist, return nothing.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is
      'transient' or 'deleted', then return only header, with status
      'deleted'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no',
      then return nothing.

    """

    record_exists_result = record_exists(recid) == 1
    if record_exists_result:
        sets = get_field(recid, CFG_OAI_SET_FIELD)
        if set_spec is not None and not set_spec in sets and not [set_ for set_ in sets if set_.startswith("%s:" % set_spec)]:
            ## the record is not in the requested set, and is not
            ## in any subset
            record_exists_result = False

    if record_exists_result:
        status = None
    else:
        status = 'deleted'

    if not record_exists_result and CFG_OAI_DELETED_POLICY not in ('persistent', 'transient'):
        return ""

    idents = get_field(recid, CFG_OAI_ID_FIELD)
    if not idents:
        return ""
    ## FIXME: Move these checks in a bibtask
    #try:
        #assert idents, "No OAI ID for record %s, please do your checks!" % recid
    #except AssertionError, err:
        #register_exception(alert_admin=True)
        #return ""
    #try:
        #assert len(idents) == 1, "More than OAI ID found for recid %s. Considering only the first one, but please do your checks: %s" % (recid, idents)
    #except AssertionError, err:
        #register_exception(alert_admin=True)
    ident = idents[0]

    header_body = EscapedXMLString('')
    header_body += X.identifier()(ident)
    if set_last_updated:
        header_body += X.datestamp()(max(get_modification_date(recid), set_last_updated))
    else:
        header_body += X.datestamp()(get_modification_date(recid))
    for set_spec in get_field(recid, CFG_OAI_SET_FIELD):
        if set_spec and set_spec != CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC:
            # Print only if field not empty
            header_body += X.setSpec()(set_spec)

    header = X.header(status=status)(header_body)

    if verb == 'ListIdentifiers':
        return header
    else:
        if record_exists_result:
            metadata_body = format_record(recid, CFG_OAI_METADATA_FORMATS[prefix][0])
            metadata = X.metadata(body=metadata_body)
            provenance_body = get_record_provenance(recid)
            if provenance_body:
                provenance = X.about(body=provenance_body)
            else:
                provenance = ''
            rights_body = get_record_rights(recid)
            if rights_body:
                rights = X.about(body=rights_body)
            else:
                rights = ''
        else:
            metadata = ''
            provenance = ''
            rights = ''
        return X.record()(header, metadata, provenance, rights)
Example #52
0
def print_record(sysno, format='marcxml', record_exists_result=None):
    """Prints record 'sysno' formatted according to 'format'.

    - if record does not exist, return nothing.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is
      'transient' or 'deleted', then return only header, with status
      'deleted'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no',
      then return nothing.

    Optional parameter 'record_exists_result' has the value of the result
    of the record_exists(sysno) function (in order not to call that function
    again if already done.)
    """

    out = ""

    # sanity check:
    if record_exists_result is not None:
        _record_exists = record_exists_result
    else:
        _record_exists = record_exists(sysno)

    if not _record_exists:
        return

    if (format == "dc") or (format == "oai_dc"):
        format = "xd"

    # print record opening tags:

    out = out + "  <record>\n"

    if _record_exists == -1:  # Deleted?
        if CFG_OAI_DELETED_POLICY == "persistent" or \
               CFG_OAI_DELETED_POLICY == "transient":
            out = out + "    <header status=\"deleted\">\n"
        else:
            return
    else:
        out = out + "   <header>\n"

    for ident in get_field(sysno, CFG_OAI_ID_FIELD):
        out = "%s    <identifier>%s</identifier>\n" % (out,
                                                       escape_space(ident))
    out = "%s    <datestamp>%s</datestamp>\n" % (out,
                                                 get_modification_date(sysno))
    for set in get_field(sysno, CFG_OAI_SET_FIELD):
        if set:
            # Print only if field not empty
            out = "%s    <setSpec>%s</setSpec>\n" % (out, set)
    out = out + "   </header>\n"

    if _record_exists == -1:  # Deleted?
        pass
    else:
        out = out + "   <metadata>\n"

        if format == "marcxml":
            formatted_record = get_preformatted_record(sysno, 'xm')
            if formatted_record is not None:
                ## MARCXML is already preformatted. Adapt it if needed
                # Infoscience modification :
                # Added custom validator from Swiss librarians
                formatted_record = formatted_record.replace(
                    "<record>",
                    "<marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://ead.nb.admin.ch/web/standards/slb/MARC21/MARC21slim.xsd\" type=\"Bibliographic\">\n     <marc:leader>00000coc  2200000uu 4500</marc:leader>"
                )
                formatted_record = formatted_record.replace(
                    "<record xmlns=\"http://www.loc.gov/MARC21/slim\">",
                    "<marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://ead.nb.admin.ch/web/standards/slb/MARC21/MARC21slim.xsd\" type=\"Bibliographic\">\n     <marc:leader>00000coc  2200000uu 4500</marc:leader>"
                )
                formatted_record = formatted_record.replace(
                    "</record", "</marc:record")
                formatted_record = formatted_record.replace(
                    "<controlfield", "<marc:controlfield")
                formatted_record = formatted_record.replace(
                    "</controlfield", "</marc:controlfield")
                formatted_record = formatted_record.replace(
                    "<datafield", "<marc:datafield")
                formatted_record = formatted_record.replace(
                    "</datafield", "</marc:datafield")
                formatted_record = formatted_record.replace(
                    "<subfield", "<marc:subfield")
                formatted_record = formatted_record.replace(
                    "</subfield", "</marc:subfield")
                out += formatted_record
            else:
                ## MARCXML is not formatted in the database, so produce it.
                # Infoscience modification
                out = out + "    <marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://ead.nb.admin.ch/web/standards/slb/MARC21/MARC21slim.xsd\" type=\"Bibliographic\">"
                out = out + "     <marc:leader>00000coc  2200000uu 4500</marc:leader>"
                out = "%s     <marc:controlfield tag=\"001\">%d</marc:controlfield>\n" % (
                    out, int(sysno))

                for digit1 in range(0, 10):
                    for digit2 in range(0, 10):
                        bibbx = "bib%d%dx" % (digit1, digit2)
                        bibx = "bibrec_bib%d%dx" % (digit1, digit2)
                        query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
                                "WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND b.tag LIKE %%s "\
                                "ORDER BY bb.field_number, b.tag ASC" % (bibbx, bibx)
                        res = run_sql(query,
                                      (sysno, '%d%d%%' % (digit1, digit2)))
                        field_number_old = -999
                        field_old = ""
                        for row in res:
                            field, value, field_number = row[0], row[1], row[2]
                            ind1, ind2 = field[3], field[4]
                            if ind1 == "_":
                                ind1 = " "
                            if ind2 == "_":
                                ind2 = " "
                            # print field tag
                            if field_number != field_number_old or field[:
                                                                         -1] != field_old[:
                                                                                          -1]:
                                if format == "marcxml":

                                    if field_number_old != -999:
                                        if field_old[0:2] == "00":
                                            out = out + "     </marc:controlfield>\n"
                                        else:
                                            out = out + "     </marc:datafield>\n"

                                    if field[0:2] == "00":
                                        out = "%s     <marc:controlfield tag=\"%s\">\n" % (
                                            out, encode_for_xml(field[0:3]))
                                    else:
                                        out = "%s     <marc:datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\">\n" % (
                                            out, encode_for_xml(field[0:3]),
                                            encode_for_xml(ind1).lower(),
                                            encode_for_xml(ind2).lower())

                                field_number_old = field_number
                                field_old = field
                            # print subfield value
                            if format == "marcxml":
                                value = encode_for_xml(value)

                                if (field[0:2] == "00"):
                                    out = "%s      %s\n" % (out, value)
                                else:
                                    out = "%s      <marc:subfield code=\"%s\">%s</marc:subfield>\n" % (
                                        out, encode_for_xml(field[-1:]), value)

                            # fetch next subfield
                        # all fields/subfields printed in this run, so close the tag:
                        if (format == "marcxml") and field_number_old != -999:
                            if field_old[0:2] == "00":
                                out = out + "     </marc:controlfield>\n"
                            else:
                                out = out + "     </marc:datafield>\n"

                out = out + "    </marc:record>\n"

        elif format == "xd":
            out += format_record(sysno, 'xoaidc')

    # print record closing tags:

        out = out + "   </metadata>\n"

    out = out + "  </record>\n"

    return out
def print_record(sysno, format='marcxml', record_exists_result=None):
    """Prints record 'sysno' formatted according to 'format'.

    - if record does not exist, return nothing.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is
      'transient' or 'deleted', then return only header, with status
      'deleted'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no',
      then return nothing.

    Optional parameter 'record_exists_result' has the value of the result
    of the record_exists(sysno) function (in order not to call that function
    again if already done.)
    """

    out = ""

    # sanity check:
    if record_exists_result is not None:
        _record_exists = record_exists_result
    else:
        _record_exists = record_exists(sysno)

    if not _record_exists:
        return

    if (format == "dc") or (format == "oai_dc"):
        format = "xd"

    # print record opening tags:

    out = out + "  <record>\n"

    if _record_exists == -1: # Deleted?
        if CFG_OAI_DELETED_POLICY == "persistent" or \
               CFG_OAI_DELETED_POLICY == "transient":
            out = out + "    <header status=\"deleted\">\n"
        else:
            return
    else:
        out = out + "   <header>\n"

    for ident in get_field(sysno, CFG_OAI_ID_FIELD):
        out = "%s    <identifier>%s</identifier>\n" % (out, escape_space(ident))
    out = "%s    <datestamp>%s</datestamp>\n" % (out, get_modification_date(sysno))
    for set in get_field(sysno, CFG_OAI_SET_FIELD):
        if set:
            # Print only if field not empty
            out = "%s    <setSpec>%s</setSpec>\n" % (out, set)
    out = out + "   </header>\n"

    if _record_exists == -1: # Deleted?
        pass
    else:
        out = out + "   <metadata>\n"

        if format == "marcxml":
            formatted_record = get_preformatted_record(sysno, 'xm')
            if formatted_record is not None:
                ## MARCXML is already preformatted. Adapt it if needed
                formatted_record = formatted_record.replace("<record>", "<marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\" type=\"Bibliographic\">\n     <marc:leader>00000coc  2200000uu 4500</marc:leader>")
                formatted_record = formatted_record.replace("<record xmlns=\"http://www.loc.gov/MARC21/slim\">", "<marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\" type=\"Bibliographic\">\n     <marc:leader>00000coc  2200000uu 4500</marc:leader>")
                formatted_record = formatted_record.replace("</record", "</marc:record")
                formatted_record = formatted_record.replace("<controlfield", "<marc:controlfield")
                formatted_record = formatted_record.replace("</controlfield", "</marc:controlfield")
                formatted_record = formatted_record.replace("<datafield", "<marc:datafield")
                formatted_record = formatted_record.replace("</datafield", "</marc:datafield")
                formatted_record = formatted_record.replace("<subfield", "<marc:subfield")
                formatted_record = formatted_record.replace("</subfield", "</marc:subfield")
                out += formatted_record
            else:
                ## MARCXML is not formatted in the database, so produce it.
                out = out + "    <marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\" type=\"Bibliographic\">"
                out = out + "     <marc:leader>00000coc  2200000uu 4500</marc:leader>"
                out = "%s     <marc:controlfield tag=\"001\">%d</marc:controlfield>\n" % (out, int(sysno))

                for digit1 in range(0, 10):
                    for digit2 in range(0, 10):
                        bibbx = "bib%d%dx" % (digit1, digit2)
                        bibx = "bibrec_bib%d%dx" % (digit1, digit2)
                        query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
                                "WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND b.tag LIKE %%s "\
                                "ORDER BY bb.field_number, b.tag ASC" % (bibbx, bibx)
                        res = run_sql(query, (sysno, '%d%d%%' % (digit1, digit2)))
                        field_number_old = -999
                        field_old = ""
                        for row in res:
                            field, value, field_number = row[0], row[1], row[2]
                            ind1, ind2 = field[3], field[4]
                            if ind1 == "_":
                                ind1 = " "
                            if ind2 == "_":
                                ind2 = " "
                            # print field tag
                            if field_number != field_number_old or field[:-1] != field_old[:-1]:
                                if format == "marcxml":

                                    if field_number_old != -999:
                                        if field_old[0:2] == "00":
                                            out = out + "     </marc:controlfield>\n"
                                        else:
                                            out = out + "     </marc:datafield>\n"

                                    if field[0:2] == "00":
                                        out = "%s     <marc:controlfield tag=\"%s\">\n" % (out, encode_for_xml(field[0:3]))
                                    else:
                                        out = "%s     <marc:datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\">\n" % (out, encode_for_xml(field[0:3]), encode_for_xml(ind1).lower(), encode_for_xml(ind2).lower())


                                field_number_old = field_number
                                field_old = field
                            # print subfield value
                            if format == "marcxml":
                                value = encode_for_xml(value)

                                if(field[0:2] == "00"):
                                    out = "%s      %s\n" % (out, value)
                                else:
                                    out = "%s      <marc:subfield code=\"%s\">%s</marc:subfield>\n" % (out, encode_for_xml(field[-1:]), value)


                            # fetch next subfield
                        # all fields/subfields printed in this run, so close the tag:
                        if (format == "marcxml") and field_number_old != -999:
                            if field_old[0:2] == "00":
                                out = out + "     </marc:controlfield>\n"
                            else:
                                out = out + "     </marc:datafield>\n"

                out = out + "    </marc:record>\n"

        elif format == "xd":
            out += format_record(sysno, 'xoaidc')

    # print record closing tags:

        out = out + "   </metadata>\n"

    out = out + "  </record>\n"

    return out
Example #54
0
 def test_format_2_passes(self):
     result = format_record(recID=None,
                            of="test6",
                            xml_record=self.xml_text)
     self.assertEqual(result, "helloworld\n")
    def detailed_record_container_top(self,
                                      recid,
                                      tabs,
                                      ln=CFG_SITE_LANG,
                                      show_similar_rec_p=True,
                                      creationdate=None,
                                      modificationdate=None,
                                      show_short_rec_p=True,
                                      citationnum=-1,
                                      referencenum=-1):
        """Prints the box displayed in detailed records pages, with tabs at the top.

        Returns content as it is if the number of tabs for this record
        is smaller than 2

           Parameters:

        @param recid: int - the id of the displayed record
        @param tabs: ** - the tabs displayed at the top of the box.
        @param ln: *string* - the language of the page in which the box is displayed
        @param show_similar_rec_p: *bool* print 'similar records' link in the box
        @param creationdate: *string* - the creation date of the displayed record
        @param modificationdate: *string* - the last modification date of the displayed record
        @param show_short_rec_p: *boolean* - prints a very short version of the record as reminder.
        @param citationnum: show (this) number of citations in the citations tab
        @param referencenum: show (this) number of references in the references tab
        """
        # If no tabs, returns nothing
        if len(tabs) <= 1:
            return ''

        # load the right message language
        _ = gettext_set_language(ln)

        # Build the tabs at the top of the page
        out_tabs = ''
        if len(tabs) > 1:
            first_tab = True
            for (label, url, selected, enabled) in tabs:
                addnum = ""
                if (citationnum > -1) and url.count("/citation") == 1:
                    addnum = "(" + str(citationnum) + ")"
                if (referencenum > -1) and url.count("/references") == 1:
                    addnum = "(" + str(referencenum) + ")"
                css_class = []
                if selected:
                    css_class.append('on')
                if first_tab:
                    css_class.append('first')
                    first_tab = False
                if not enabled:
                    css_class.append('disabled')
                css_class = ' class="%s"' % ' '.join(css_class)
                if not enabled:
                    out_tabs += '<li%(class)s><a>%(label)s %(addnum)s</a></li>' % \
                                {'class':css_class,
                                 'label':label,
                                 'addnum':addnum}
                else:
                    out_tabs += '<li%(class)s><a href="%(url)s">%(label)s %(addnum)s </a></li>' % \
                                {'class':css_class,
                                 'url':url,
                                 'label':label,
                                 'addnum':addnum}
        if out_tabs != '':
            out_tabs = '''        <div class="detailedrecordtabs">
            <div>
                <ul class="detailedrecordtabs">%s</ul>
            <div id="tabsSpacer" style="clear:both;height:0px">&nbsp;</div></div>
        </div>''' % out_tabs

        # Add the clip icon and the brief record reminder if necessary
        record_brief = ''
        if show_short_rec_p:
            record_brief = format_record(recID=recid, of='hs', ln=ln)
            record_brief = '''<div id="detailedrecordshortreminder">
                             <div id="clip">&nbsp;</div>
                             <div id="HB">
                                 %(record_brief)s
                             </div>
                         </div>
                         <div style="clear:both;height:1px">&nbsp;</div>
                         ''' % {
                'record_brief': record_brief
            }

        # Print the content
        out = """
    <div class="detailedrecordbox">
        %(tabs)s
        <div class="detailedrecordboxcontent">
            <div class="top-left-folded"></div>
            <div class="top-right-folded"></div>
            <div class="inside">
                <!--<div style="height:0.1em;">&nbsp;</div>
                <p class="notopgap">&nbsp;</p>-->
                %(record_brief)s
                """ % {
            'tabs': out_tabs,
            'record_brief': record_brief
        }
        return out
Example #56
0
    def detailed_record_container_top(self, recid, tabs, ln=CFG_SITE_LANG,
                                      show_similar_rec_p=True,
                                      creationdate=None,
                                      modificationdate=None,
                                      earliestdate=None,
                                      show_short_rec_p=True,
                                      citationnum=-1, referencenum=-1, discussionnum=-1,
                                      include_jquery = False, include_mathjax = False):
        """Prints the box displayed in detailed records pages, with tabs at the top.

        Returns content as it is if the number of tabs for this record
        is smaller than 2

           Parameters:

        @param recid: int - the id of the displayed record
        @param tabs: ** - the tabs displayed at the top of the box.
        @param ln: *string* - the language of the page in which the box is displayed
        @param show_similar_rec_p: *bool* print 'similar records' link in the box
        @param creationdate: *string* - the creation date of the displayed record
        @param modificationdate: *string* - the last modification date of the displayed record
        @param earliestdate: *string* - the earliest date of the displayed record
        @param show_short_rec_p: *boolean* - prints a very short version of the record as reminder.
        @param citationnum: show (this) number of citations in the citations tab
        @param referencenum: show (this) number of references in the references tab
        @param discussionnum: show (this) number of comments/reviews in the discussion tab
        """
        from invenio.search_engine import \
             get_restricted_collections_for_recid, \
             is_record_in_any_collection

        # load the right message language
        _ = gettext_set_language(ln)

        # Prepare restriction flag
        restriction_flag = ''
        if get_restricted_collections_for_recid(recid, recreate_cache_if_needed=False):
            restriction_flag = '<div class="restrictedflag"><span>%s</span></div>' % _("Restricted")
        elif not is_record_in_any_collection(recid, recreate_cache_if_needed=False):
            restriction_flag = '<div class="restrictedflag restrictedflag-pending"><span>%s</span></div>' % _("Restricted (Processing Record)")

        # If no tabs, returns nothing (excepted if restricted)
        if len(tabs) <= 1:
            return restriction_flag

        # Build the tabs at the top of the page
        out_tabs = ''
        if len(tabs) > 1:
            first_tab = True
            for (label, url, selected, enabled) in tabs:
                addnum = ""
                if (citationnum > -1) and url.count("/citation") == 1:
                    addnum = "(" + str(citationnum) + ")"
                if (referencenum > -1) and url.count("/references") == 1:
                    addnum = "(" + str(referencenum) + ")"
                if (discussionnum > -1) and url.count("/comments") == 1:
                    addnum = "(" + str(discussionnum) + ")"

                css_class = []
                if selected:
                    css_class.append('on')
                if first_tab:
                    css_class.append('first')
                    first_tab = False
                if not enabled:
                    css_class.append('disabled')
                css_class = ' class="%s"' % ' '.join(css_class)
                if not enabled:
                    out_tabs += '<li%(class)s><a>%(label)s %(addnum)s</a></li>' % \
                                {'class':css_class,
                                 'label':label,
                                 'addnum':addnum}
                else:
                    out_tabs += '<li%(class)s><a href="%(url)s">%(label)s %(addnum)s </a></li>' % \
                                {'class':css_class,
                                 'url':url,
                                 'label':label,
                                 'addnum':addnum}
        if out_tabs != '':
            out_tabs = '''        <div class="detailedrecordtabs">
            <div>
                <ul class="detailedrecordtabs">%s</ul>
            <div id="tabsSpacer" style="clear:both;height:0px">&nbsp;</div></div>
        </div>''' % out_tabs


        # Add the clip icon and the brief record reminder if necessary
        record_brief = ''
        if show_short_rec_p:
            record_brief = format_record(recID=recid, of='hs', ln=ln)
            record_brief = '''<div id="detailedrecordshortreminder">
                             <div id="clip">&nbsp;</div>
                             <div id="HB">
                                 %(record_brief)s
                             </div>
                         </div>
                         <div style="clear:both;height:1px">&nbsp;</div>
                         ''' % {'record_brief': record_brief}

        additional_scripts = ""
        if include_jquery:
            additional_scripts += """<script type="text/javascript" src="%s/js/jquery.min.js">' \
            '</script>\n""" % (CFG_BASE_URL, )
        if include_mathjax:

            additional_scripts += get_mathjax_header()


        # Print the content
        out = """
        %(additional_scripts)s<div class="detailedrecordbox">
        %(tabs)s
        <div class="detailedrecordboxcontent">
            <div class="top-left-folded"></div>
            <div class="top-right-folded"></div>
            <div class="inside">
                <!--<div style="height:0.1em;">&nbsp;</div>
                <p class="notopgap">&nbsp;</p>-->
                %(record_brief)s
                """ % {'additional_scripts': additional_scripts,
                       'tabs':out_tabs,
                       'record_brief':record_brief}

        out = restriction_flag + out
        return out
Example #57
0
    def detailed_record_container_top(
        self,
        recid,
        tabs,
        ln=CFG_SITE_LANG,
        show_similar_rec_p=True,
        creationdate=None,
        modificationdate=None,
        show_short_rec_p=True,
        citationnum=-1,
        referencenum=-1,
        discussionnum=-1,
    ):
        """Prints the box displayed in detailed records pages, with tabs at the top.

        Returns content as it is if the number of tabs for this record
        is smaller than 2

           Parameters:

        @param recid: int - the id of the displayed record
        @param tabs: ** - the tabs displayed at the top of the box.
        @param ln: *string* - the language of the page in which the box is displayed
        @param show_similar_rec_p: *bool* print 'similar records' link in the box
        @param creationdate: *string* - the creation date of the displayed record
        @param modificationdate: *string* - the last modification date of the displayed record
        @param show_short_rec_p: *boolean* - prints a very short version of the record as reminder.
        @param citationnum: show (this) number of citations in the citations tab
        @param referencenum: show (this) number of references in the references tab
        @param discussionnum: show (this) number of comments/reviews in the discussion tab
        """
        from invenio.search_engine import record_public_p

        # load the right message language
        _ = gettext_set_language(ln)

        # Prepare restriction flag
        restriction_flag = ""
        if not record_public_p(recid):
            restriction_flag = '<div class="restrictedflag"><span>%s</span></div>' % _("Restricted")

        # If no tabs, returns nothing (excepted if restricted)
        if len(tabs) <= 1:
            return restriction_flag

        # Build the tabs at the top of the page
        out_tabs = ""
        if len(tabs) > 1:
            first_tab = True
            for (label, url, selected, enabled) in tabs:
                addnum = ""
                if (citationnum > -1) and url.count("/citation") == 1:
                    addnum = "(" + str(citationnum) + ")"
                if (referencenum > -1) and url.count("/references") == 1:
                    addnum = "(" + str(referencenum) + ")"
                if (discussionnum > -1) and url.count("/comments") == 1:
                    addnum = "(" + str(discussionnum) + ")"

                css_class = []
                if selected:
                    css_class.append("on")
                if first_tab:
                    css_class.append("first")
                    first_tab = False
                if not enabled:
                    css_class.append("disabled")
                css_class = ' class="%s"' % " ".join(css_class)
                if not enabled:
                    out_tabs += "<li%(class)s><a>%(label)s %(addnum)s</a></li>" % {
                        "class": css_class,
                        "label": label,
                        "addnum": addnum,
                    }
                else:
                    out_tabs += '<li%(class)s><a href="%(url)s">%(label)s %(addnum)s </a></li>' % {
                        "class": css_class,
                        "url": url,
                        "label": label,
                        "addnum": addnum,
                    }
        if out_tabs != "":
            out_tabs = (
                """        <div class="detailedrecordtabs">
            <div>
                <ul class="detailedrecordtabs">%s</ul>
            <div id="tabsSpacer" style="clear:both;height:0px">&nbsp;</div></div>
        </div>"""
                % out_tabs
            )

        # Add the clip icon and the brief record reminder if necessary
        record_brief = ""
        if show_short_rec_p:
            record_brief = format_record(recID=recid, of="hs", ln=ln)
            record_brief = """<div id="detailedrecordshortreminder">
                             <div id="clip">&nbsp;</div>
                             <div id="HB">
                                 %(record_brief)s
                             </div>
                         </div>
                         <div style="clear:both;height:1px">&nbsp;</div>
                         """ % {
                "record_brief": record_brief
            }

        # Print the content
        out = """
    <div class="detailedrecordbox">
        %(tabs)s
        <div class="detailedrecordboxcontent">
            <div class="top-left-folded"></div>
            <div class="top-right-folded"></div>
            <div class="inside">
                <!--<div style="height:0.1em;">&nbsp;</div>
                <p class="notopgap">&nbsp;</p>-->
                %(record_brief)s
                """ % {
            "tabs": out_tabs,
            "record_brief": record_brief,
        }

        out = restriction_flag + out
        return out
Example #58
0
def format_element(bfo, reference_prefix, reference_suffix):
    """
    Prints the references of this record

    @param reference_prefix a prefix displayed before each reference
    @param reference_suffix a suffix displayed after each reference
    """
    references = bfo.fields("999C5", escape=1, repeatable_subfields_p=True)

    out = ""
    last_o = ""

    if not references:
        return out

    out += "<table>"
    for reference in references:
        ref_out = []
        ref_out.append('<tr><td valign="top">')

        display_journal = ''
        display_report = ''
        clean_report = ''
        clean_journal = ''
        hits = []
        if reference.has_key('o') and not reference['o'][0] == last_o:
            temp_ref = reference['o'][0].replace('.', '')
            if '[' in temp_ref and ']' in temp_ref:
                ref_out.append("<small>" + temp_ref + "</small> ")
            else:
                ref_out.append("<small>[" + temp_ref + "] </small> ")
            last_o = temp_ref
        ref_out.append("</td><td>")

        if reference_prefix:
            ref_out.append(reference_prefix)

        if reference.has_key('s'):
            display_journal = reference['s'][0]
            clean_journal = reference['s'][0]
        if reference.has_key('r'):
            if "[" in reference['r'][0] and "]" in reference['r'][0]:
                breaknum = reference['r'][0].find('[')
                newreference = reference['r'][0][:breaknum].strip()
                display_report = newreference
                clean_report = newreference
            else:
                display_report = reference['r'][0]
                clean_report = reference['r'][0]
        if clean_report:
            hits = search_unit(f='reportnumber', p=clean_report)
        if clean_journal and len(hits) != 1:
            hits = search_unit(f='journal', p=clean_journal)
        if reference.has_key('a') and len(hits) != 1:
            hits = search_unit(p=reference['a'][0])
        if reference.has_key('0') and len(hits) != 1:
            # check if the record exists in the database
            try:
                recID = int(reference['0'][0])
                if get_record(recID):
                    # since we already have a recID, we can assign it directly
                    # to the "hits" variable, so it will be handled in the last if statement
                    hits = [recID]
            except ValueError:
                pass
        if len(hits) == 1:
            ref_out.append('<small>' + format_record(list(hits)[0], 'hs') +
                           '</small>')
        else:
            if reference.has_key('h'):
                ref_out.append("<small> " + reference['h'][0] + ".</small>")
            if reference.has_key('t'):
                ref_out.append("<small> " + reference['t'][0] + "</small> -")
            if reference.has_key('y'):
                ref_out.append("<small> " + reference['y'][0] + ".</small>")
            if reference.has_key('p'):
                ref_out.append("<small> " + reference['p'][0] + ".</small>")
            if reference.has_key('m'):
                ref_out.append("<small> " +
                               reference['m'][0].replace(']]', ']') +
                               ".</small>")
            if reference.has_key('a'):
                ref_out.append("<small> <a href=\"http://dx.doi.org/" + \
                reference['a'][0] + "\">" + reference['a'][0]+ "</a></small>")
            if reference.has_key('u'):
                ref_out.append("<small> <a href=" + reference['u'][0] + ">" + \
                reference['u'][0]+ "</a></small>")
            if reference.has_key('i'):
                for r in reference['i']:
                    ref_out.append(
                        "<small> <a href=\"/search?ln=en&amp;p=020__a%3A" + r +
                        "\">" + r + "</a></small>")

            ref_out.append('<small>')
            if display_journal:
                ref_out.append(display_journal)
            if display_report:
                ref_out.append(' ' + display_report)
            ref_out.append("</small>")

        if reference_suffix:
            ref_out.append(reference_suffix)

        ref_out.append("</td></tr>")
        out += ' '.join(ref_out)

    return out + "</table>"