Beispiel #1
0
def invenio_search_xml(kwargs):
    """Simple version which just fetches XML records
    from Invenio. It only understands query of type:
    p=recid:1->50 OR recid:50 OR recid:....
    
    Unfortunately, we cannot use 'print_records' because
    that one (for strange reasons) creates a range out 
    of recIDS. And I don't want to use bibformat, 
    because bibformat is not working nicely with 
    strings (it is slower)
    
    """
    out = []
    p = kwargs['p']
    of = 'xm'
    if 'of' in kwargs:
        of = kwargs['of']

    if of == 'xm':
        out.append('<?xml version="1.0" encoding="UTF-8"?>')
        out.append('<collection xmlns="http://www.loc.gov/MARC21/slim">')
    
    clauses = p.split(' OR ')
    for c in clauses:
        c = c.replace('recid:', '')
        if '->' in c:
            ints = c.split('->')
            for x in xrange(int(ints[0]), int(ints[1])+1):
                out.append(search_engine.print_record(x, format=of))
        else:
            out.append(search_engine.print_record(int(c), format=of))
    if of == 'xm':
        out.append('</collection>')
    
    return '\n'.join(out)
Beispiel #2
0
def format_element(bfo):
    """
    Displays the latest posts on a blog and it also offers
    a link to see all the posts of the corresponding blog
    """

    this_recid = bfo.control_field('001')
    current_language = bfo.lang
    blog_posts_recids = get_posts(this_recid, newest_first=True)
    out = ""
    if blog_posts_recids:
        # let's print just the 3 latest posts
        latest_blog_posts_recids = blog_posts_recids[:3]
        try:
            out += "<h4>%s</h4>" % cfg_messages["in_issue"][current_language]
        except: # in english by default
            out += "<h4>%s</h4>" % cfg_messages["in_issue"]['en']

        for post_recid in latest_blog_posts_recids:
            out += print_record(post_recid, format='hb')
            out += "<br />"

        all_posts = ""
        all_blog_posts_recids = blog_posts_recids[3:]
        for post_recid in all_blog_posts_recids:
            all_posts += print_record(post_recid, format='hb')
            all_posts += "<br />"

        out += """
            <script type="text/javascript">
            function displayAllPosts(){
                var all_posts = document.getElementById('all_posts');
                var see_all_link = document.getElementById('see_all_link');
                if (all_posts.style.display == 'none'){
                    all_posts.style.display = '';
                    see_all_link.innerHTML = "Show less posts"
                } else {
                    all_posts.style.display = 'none';
                    see_all_link.innerHTML = "Show all posts"
                }
            }
            </script>
            """

        out += '<span id="all_posts" style="">' + all_posts + '</span>'
        out += '<a class="moreinfo" id="see_all_link" \
                href="javascript:void(0)" onclick="displayAllPosts()""></a>'
        out += '<script type="text/javascript">displayAllPosts()</script>'

    return out
def perform_candidate_record_search(requestType, data):
    """Handle search requests.
    """
    max_results = 999
    too_many = False
    result = {'resultCode': 0, 'resultText': ''}
    if requestType == "searchCandidates":
        recids = perform_request_search(p=data['query'])
        if len(recids) > max_results:
            too_many = True
        else:
            captions = [search_result_info(x) for x in recids]
            alternative_titles = [
                remove_html_markup(print_record(x, "hs")) for x in recids
            ]
            search_results = [recids, captions, alternative_titles]
    elif requestType == "searchRevisions":
        revisions = get_record_revision_ids(data['recID1'])
        captions = [split_revid(x, 'datetext')[1] for x in revisions]
        search_results = [revisions, captions]

    if too_many == True:
        result['resultCode'] = 1
        result['resultText'] = 'Too many results'
    else:
        result['results'] = search_results
        result['resultText'] = '%s results' % len(search_results[0])

    return result
def perform_candidate_record_search(requestType, data):
    """Handle search requests.
    """
    max_results = 999
    too_many = False
    result = {"resultCode": 0, "resultText": ""}
    if requestType == "searchCandidates":
        recids = perform_request_search(p=data["query"])
        if len(recids) > max_results:
            too_many = True
        else:
            captions = [search_result_info(x) for x in recids]
            alternative_titles = [remove_html_markup(print_record(x, "hs")) for x in recids]
            search_results = [recids, captions, alternative_titles]
    elif requestType == "searchRevisions":
        revisions = get_record_revision_ids(data["recID1"])
        captions = [split_revid(x, "datetext")[1] for x in revisions]
        search_results = [revisions, captions]

    if too_many == True:
        result["resultCode"] = 1
        result["resultText"] = "Too many results"
    else:
        result["results"] = search_results
        result["resultText"] = "%s results" % len(search_results[0])

    return result
Beispiel #5
0
def Get_Field(fieldname,bibrec):
    """
    This function returns the value of the specified field
    from the specified document
    """
    value = string.strip(print_record(int(bibrec),'tm',[fieldname]))
    return value
def cached_format_record(recIDs, of, ln='', verbose=0,
                         search_pattern=None, xml_records=None, user_info=None,
                         record_prefix=None, record_separator=None,
                         record_suffix=None, prologue="", epilogue="",
                         req=None, on_the_fly=False):
    return print_record(recIDs, of, ln=ln, verbose=verbose,
                        brief_links=False)
Beispiel #7
0
def Get_Field(fieldname, bibrec):
    """
    This function returns the value of the specified field
    from the specified document
    """
    value = string.strip(print_record(int(bibrec), 'tm', [fieldname]))
    return value
def perform_candidate_record_search(requestType, data):
    """Handle search requests.
    """
    max_results = 999
    too_many = False
    result = {
        'resultCode': 0,
        'resultText': ''
        }
    if requestType == "searchCandidates":
        recids = perform_request_search( p=data['query'] )
        if len(recids) > max_results:
            too_many = True
        else:
            captions = [ search_result_info(x) for x in recids ]
            alternative_titles = [ remove_html_markup(print_record(x, "hs")) for x in recids ]
            search_results = [recids, captions, alternative_titles]
    elif requestType == "searchRevisions":
        revisions = get_record_revision_ids( data['recID1'] )
        captions = [ split_revid(x, 'datetext')[1] for x in revisions ]
        search_results = [revisions, captions]

    if too_many == True:
        result['resultCode'] = 1
        result['resultText'] = 'Too many results'
    else:
        result['results'] = search_results
        result['resultText'] = '%s results' % len(search_results[0])

    return result
def format_element(bfo):
    """
    Displays comments on a post
    """

    this_recid = bfo.control_field('001')
    current_language = bfo.lang
    post_comments_recids = get_comments(this_recid, newest_first=True)
    out = ""
    if post_comments_recids:
        # let's print just the 3 latest posts
        latest_post_comments_recids = post_comments_recids[:2]
        out += "<h4>%s</h4>" % cfg_messages["in_issue"][current_language]

        for comment_recid in latest_post_comments_recids:
            out += print_record(comment_recid, format='hb')
            out += "<br />"

        all_comments = ""
        all_post_comments_recids = post_comments_recids[2:]
        for comment_recid in all_post_comments_recids:
            all_comments += print_record(comment_recid, format='hb')
            all_comments += "<br />"

        out += """
            <script type="text/javascript">
            function displayAllComments(){
                var all_comments = document.getElementById('all_comments');
                var see_all_link = document.getElementById('see_all_link');
                if (all_comments.style.display == 'none'){
                    all_comments.style.display = '';
                    see_all_link.innerHTML = "Show less comments"
                } else {
                    all_comments.style.display = 'none';
                    see_all_link.innerHTML = "Show all comments"
                }
            }
            </script>
            """

        out += '<span id="all_comments" style="">' + all_comments + '</span>'
        out += '<a class="moreinfo" id="see_all_link" \
                href="javascript:void(0)" onclick="displayAllComments()""></a>'
        out += '<script type="text/javascript">displayAllComments()</script>'

    return out
Beispiel #10
0
 def _format_record(recid,
                    of='hd',
                    user_info=current_user,
                    *args,
                    **kwargs):
     return print_record(recid,
                         format=of,
                         user_info=user_info,
                         *args,
                         **kwargs)
Beispiel #11
0
def get_record(recid, reset_cache=False, fields=()):
    """
    Record factory, it retrieves the record from bibfmt table if it is there,
    if not, or reset_cache is set to True, it searches for the appropriate
    reader to create the representation of the record.

    @return: Bibfield object representing the record or None if the recid is not
    present in the system
    """
    record = None
    #Search for recjson
    if not reset_cache:
        res = run_sql(
            "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format='recjson'",
            (recid, ))
        if res:
            try:
                record = Record(msgpack.loads(res[0][0]))
            except:
                #Maybe the cached version is broken
                record = None

    #There is no version cached or we want to renew it
    #Then retrieve information and blob
    if not record or reset_cache:
        try:
            master_format = run_sql(
                "SELECT master_format FROM bibrec WHERE id=%s",
                (recid, ))[0][0]
        except:
            return None
        schema = 'xml'
        master_format = 'marc'
        try:
            from invenio.search_engine import print_record
            blob = print_record(recid, format='xm')
        except:
            return None

        reader = CFG_BIBFIELD_READERS['bibfield_%sreader.py' %
                                      (master_format, )](blob, schema=schema)
        record = Record(reader.translate())
        #Update bibfmt for future uses
        run_sql(
            "REPLACE INTO bibfmt(id_bibrec, format, last_updated, value) VALUES (%s, 'recjson', NOW(), %s)",
            (recid, msgpack.dumps(record.dumps())))

    if fields:
        chunk = SmartDict()
        for key in fields:
            chunk[key] = record.get(key)
        record = chunk
    return record
Beispiel #12
0
def _get_record_slave(recid, result, mode=None, uid=None):
    """Check if record exists and return it in dictionary format.
       If any kind of error occurs returns None.
       If mode=='revision' then recid parameter is considered as revid."""
    record = None
    if recid == 'none':
        mode = 'none'
    if mode == 'recid':
        record_status = record_exists(recid)
        #check for errors
        if record_status == 0:
            result['resultCode'], result[
                'resultText'] = 1, 'Non-existent record: %s' % recid
        elif record_status == -1:
            result['resultCode'], result[
                'resultText'] = 1, 'Deleted record: %s' % recid
        elif record_locked_by_queue(recid):
            result['resultCode'], result[
                'resultText'] = 1, 'Record %s locked by queue' % recid
        else:
            record = create_record(print_record(recid, 'xm'))[0]
            record_order_subfields(record)

    elif mode == 'tmpfile':
        file_path = '%s_%s.xml' % (_get_file_path(
            recid, uid), CFG_BIBEDIT_TO_MERGE_SUFFIX)
        if not os.path.isfile(file_path):  #check if file doesn't exist
            result['resultCode'], result[
                'resultText'] = 1, 'Temporary file doesnt exist'
        else:  #open file
            tmpfile = open(file_path, 'r')
            record = create_record(tmpfile.read())[0]
            tmpfile.close()

    elif mode == 'revision':
        if revision_format_valid_p(recid):
            marcxml = get_marcxml_of_revision_id(recid)
            if marcxml:
                record = create_record(marcxml)[0]
            else:
                result['resultCode'], result[
                    'resultText'] = 1, 'The specified revision does not exist'
        else:
            result['resultCode'], result[
                'resultText'] = 1, 'Invalid revision id'

    elif mode == 'none':
        return {}

    else:
        result['resultCode'], result[
            'resultText'] = 1, 'Invalid record mode for record2'
    return record
Beispiel #13
0
def main():
    content = []
    recids = []
    date_last_run = False
    latest_recid = False
    latest = False
    filename = re.sub(".py", "_log", "tmp_" + __file__)
    stored_file = open(filename, 'r+')
    for line in stored_file.readlines():
        matchObj1 = re.match('most recent recid = (\d+)', line)
        if matchObj1:
            latest_recid = matchObj1.group(0)
        matchObj2 = re.match('date last run = (.*)', line)
        if matchObj2:
            date_last_run = matchObj2.group(0)
    if date_last_run and latest_recid:
        results = perform_request_search(p="fin da >= %s" % date_last_run,
                                         cc="Jobs")
    else:
        date_last_run = raw_input(
            """Couldn't find the date of the most recently sent New Jobs Mailout.
Send jobs posted on and after this date (yyyy-mm-dd): """)
        if date_last_run:
            results = perform_request_search(p="fin da >= %s" % date_last_run,
                                             cc="Jobs")
    if len(results) > 0:
        for r in results:
            recids.append(r)
        print recids
        if latest_recid:
            recids = [x for x in recids if x > latest_recid]
        print recids
        if recids:
            latest = max(recids)
        else:
            print "No postings since %s." % date_last_run

    else:
        print "No postings since %s." % date_last_run
    if latest:
        stored_file.write("most recent recid = %s" % str(latest))
        today = str(datetime.date.today())
        stored_file.write("date last run = %s" % today)
        stored_file.write("records in last mailout = %s" %
                          ', '.join(str(x) for x in sorted(recids)))
        for r in recids:
            content.append(print_record(r, ot=[], format='he'))
        content = ''.join(content)
        send_jobs_mail(content)
    stored_file.close()
def get_pubtype_info(doctype):
	"""call output format for publication types and return it as dictionary (json)"""
	# directly call the backend...
	query = '3367_:'+doctype
	res = perform_request_search(p=query, cc='PubTypes')

	# and return the first rec in JS for further processing
	if res == []: return {}
	
	text = print_record(res[0], 'js')

	jsontext = washJSONinput(text)
	jsondict = json.loads(jsontext, 'utf8')
	return jsondict
Beispiel #15
0
def get_record(recid, reset_cache=False, fields=()):
    """
    Record factory, it retrieves the record from bibfmt table if it is there,
    if not, or reset_cache is set to True, it searches for the appropriate
    reader to create the representation of the record.

    @return: Bibfield object representing the record or None if the recid is not
    present in the system
    """
    record = None
    #Search for recjson
    if not reset_cache:
        res = run_sql("SELECT value FROM bibfmt WHERE id_bibrec=%s AND format='recjson'",
                      (recid,))
        if res:
            try:
                record = Record(msgpack.loads(res[0][0]))
            except:
                #Maybe the cached version is broken
                record = None

    #There is no version cached or we want to renew it
    #Then retrieve information and blob
    if not record or reset_cache:
        try:
            master_format = run_sql("SELECT master_format FROM bibrec WHERE id=%s", (recid,))[0][0]
        except:
            return None
        schema = 'xml'
        master_format = 'marc'
        try:
            from invenio.search_engine import print_record
            blob = print_record(recid, format='xm')
        except:
            return None

        reader = CFG_BIBFIELD_READERS['bibfield_%sreader.py' % (master_format,)](blob, schema=schema)
        record = Record(reader.translate())
        #Update bibfmt for future uses
        run_sql("REPLACE INTO bibfmt(id_bibrec, format, last_updated, value) VALUES (%s, 'recjson', NOW(), %s)",
                (recid, msgpack.dumps(record.dumps())))

    if fields:
        chunk = SmartDict()
        for key in fields:
            chunk[key] = record.get(key)
        record = chunk
    return record
Beispiel #16
0
    def answer(self, req, user_info, of, cc, colls_to_search, p, f,
               search_units, ln):
        """Answer question given by context.
        Return (relevance, html_string) where relevance is integer
        from 0 to 100 indicating how relevant to the question the
        answer is (see C{CFG_WEBSEARCH_SERVICE_MAX_SERVICE_ANSWER_RELEVANCE}
        for details), and html_string being a formatted answer.
        """
        from invenio.refextract_api import search_from_reference

        _ = gettext_set_language(ln)

        if f or not self.seems_a_journal_reference(p):
            return (0, "")

        (field, pattern) = search_from_reference(p.decode('utf-8'))

        if field is not "journal":
            return (0, "")

        recids = perform_request_search(req=req,
                                        p=pattern,
                                        f=field,
                                        cc=cc,
                                        c=colls_to_search)

        if not recids:
            return (0, "")

        if len(recids) == 1:
            recid = recids.pop()
            user_info = collect_user_info(req)
            return (100, """\
<p><span class="journalhint">%s</span></p>
<table style="padding: 5px; border: 2px solid #ccc; margin: 20px"><tr><td>
%s
</td></tr></table>""" % (escape(_("Were you looking for this paper?")),
                         print_record(recid, ln=ln, user_info=user_info)))

        query = "find rawref \"" + p + "\""
        query_link = CFG_SITE_URL + '/search?' + urlencode({'p': query})
        return (
            80, '<span class="journalhint">%s</span>' %
            (_("Were you looking for a journal reference? Try: %(x_href)s") % {
                "x_href":
                '<a href="{0}">{1}</a>'.format(escape(query_link, True),
                                               escape(query))
            }, ))
def main():
    content = []
    recids = []
    date_last_run = False
    latest_recid = False
    latest = False
    filename = re.sub(".py", "_log", "tmp_"+__file__)
    stored_file = open(filename, 'r+')
    for line in stored_file.readlines():
        matchObj1 = re.match('most recent recid = (\d+)', line)
        if matchObj1:
            latest_recid = matchObj1.group(0)
        matchObj2 = re.match('date last run = (.*)', line)
        if matchObj2:
            date_last_run = matchObj2.group(0)
    if date_last_run and latest_recid:
        results = perform_request_search(p="fin da >= %s" % date_last_run, cc="Jobs")
    else:
        date_last_run = raw_input("""Couldn't find the date of the most recently sent New Jobs Mailout.
Send jobs posted on and after this date (yyyy-mm-dd): """)
        if date_last_run:
            results = perform_request_search(p="fin da >= %s" % date_last_run, cc="Jobs")
    if len(results) > 0:
        for r in results:
            recids.append(r)
        print recids
        if latest_recid:
            recids = [x for x in recids if x > latest_recid]
        print recids
        if recids:
            latest = max(recids)
        else:
            print "No postings since %s." % date_last_run

    else:
        print "No postings since %s." % date_last_run
    if latest:
        stored_file.write("most recent recid = %s" % str(latest))
        today = str(datetime.date.today())
        stored_file.write("date last run = %s" % today)
        stored_file.write("records in last mailout = %s" % ', '.join(str(x) for x in sorted(recids)))
        for r in recids:
            content.append(print_record(r,ot=[],format='he'))
        content = ''.join(content)
        send_jobs_mail(content)
    stored_file.close()
def _get_record_slave(recid, result, mode=None, uid=None):
    """Check if record exists and return it in dictionary format.
       If any kind of error occurs returns None.
       If mode=='revision' then recid parameter is considered as revid."""
    record = None
    if recid == 'none':
        mode = 'none'
    if mode == 'recid':
        record_status = record_exists(recid)
        #check for errors
        if record_status == 0:
            result['resultCode'], result['resultText'] = 1, 'Non-existent record: %s' % recid
        elif record_status == -1:
            result['resultCode'], result['resultText'] = 1, 'Deleted record: %s' % recid
        elif record_locked_by_queue(recid):
            result['resultCode'], result['resultText'] = 1, 'Record %s locked by queue' % recid
        else:
            record = create_record( print_record(recid, 'xm') )[0]
            record_order_subfields(record)

    elif mode == 'tmpfile':
        file_path = '%s_%s.xml' % (_get_file_path(recid, uid),
                                       CFG_BIBEDIT_TO_MERGE_SUFFIX)
        if not os.path.isfile(file_path): #check if file doesn't exist
            result['resultCode'], result['resultText'] = 1, 'Temporary file doesnt exist'
        else: #open file
            tmpfile = open(file_path, 'r')
            record = create_record( tmpfile.read() )[0]
            tmpfile.close()

    elif mode == 'revision':
        if revision_format_valid_p(recid):
            marcxml = get_marcxml_of_revision_id(recid)
            if marcxml:
                record = create_record(marcxml)[0]
            else:
                result['resultCode'], result['resultText'] = 1, 'The specified revision does not exist'
        else:
            result['resultCode'], result['resultText'] = 1, 'Invalid revision id'

    elif mode == 'none':
        return {}

    else:
        result['resultCode'], result['resultText'] = 1, 'Invalid record mode for record2'
    return record
    def _create_records_xml(self, record_IDs):
        """Creates XML containing all the information
        for the records with the given identifiers

        @param record_IDs: list of identifiers of records

        @return: MARC XML containing all the information about the records
        """
        output_xml = "<collection>"

        for record_id in record_IDs:
            record_xml = search_engine.print_record(recID = record_id, format = "xm")
            output_xml += record_xml

        output_xml += "</collection>"

        return output_xml
Beispiel #20
0
    def test_BibUpload_revision_verifier(self):
        """ BibUpload Revision Verifier - Called from BibUpload Operation - Patch & Conflict Scenarios"""

        recs = xml_marc_to_records(self.rev1)
        # --> Revision 1 submitted
        error, self.recid, dummy_msg = bibupload(recs[0], opt_mode='insert')
        self.check_record_consistency(self.recid)
        record = get_record(self.recid)
        rev = record_get_field_value(record, '005', '', '')
        recs = xml_marc_to_records(self.rev1)
        self.rev2 = self.rev2.replace('123456789', str(self.recid))
        self.rev2 = self.rev2.replace('20110101000000.0', rev)
        self.rev1_modified = self.rev1_modified.replace(
            '123456789', str(self.recid))
        self.rev1_modified = self.rev1_modified.replace(
            '20110101000000.0', rev)
        self.final_xm = self.final_xm.replace('123456789', str(self.recid))

        recs = xml_marc_to_records(self.rev1)
        recs = xml_marc_to_records(self.rev2)
        # --> Revision 2 submitted
        error, self.recid, dummy_msg = bibupload(recs[0], opt_mode='replace')
        self.check_record_consistency(self.recid)
        record = get_record(self.recid)
        self.rev2 = self.rev2.replace(
            rev, record_get_field_value(record, '005', '', ''))
        self.rev2_modified = self.rev2_modified.replace(
            '123456789', str(self.recid))
        self.rev2_modified = self.rev2_modified.replace(
            '20110101000000.0', record_get_field_value(record, '005', '', ''))
        # --> Revision 1 modified submitted
        recs = xml_marc_to_records(self.rev1_modified)
        error, self.recid, dummy_msg = bibupload(recs[0], opt_mode='replace')
        self.check_record_consistency(self.recid)
        record = get_record(self.recid)
        rev = record_get_field_value(record, '005', '', '')
        self.final_xm = self.final_xm.replace('20110101000000.0', rev)
        self.assertEqual(
            compare_xmbuffers(self.final_xm, print_record(self.recid, 'xm')),
            '')
        # --> Revision 2 modified submitted
        recs = xml_marc_to_records(self.rev2_modified)
        error, self.recid, dummy_msg = bibupload(recs[0], opt_mode='replace')
        self.check_record_consistency(self.recid)
        self.assertEquals(error, 2)
def _get_record_slave(recid, result, mode=None, uid=None):
    """Check if record exists and return it in dictionary format.
       If any kind of error occurs returns None.
       If mode=='revision' then recid parameter is considered as revid."""
    record = None
    if recid == "none":
        mode = "none"
    if mode == "recid":
        record_status = record_exists(recid)
        # check for errors
        if record_status == 0:
            result["resultCode"], result["resultText"] = 1, "Non-existent record: %s" % recid
        elif record_status == -1:
            result["resultCode"], result["resultText"] = 1, "Deleted record: %s" % recid
        elif record_locked_by_queue(recid):
            result["resultCode"], result["resultText"] = 1, "Record %s locked by queue" % recid
        else:
            record = create_record(print_record(recid, "xm"))[0]

    elif mode == "tmpfile":
        file_path = "%s_%s.xml" % (_get_file_path(recid, uid), CFG_BIBEDIT_TO_MERGE_SUFFIX)
        if not os.path.isfile(file_path):  # check if file doesn't exist
            result["resultCode"], result["resultText"] = 1, "Temporary file doesnt exist"
        else:  # open file
            tmpfile = open(file_path, "r")
            record = create_record(tmpfile.read())[0]
            tmpfile.close()

    elif mode == "revision":
        if revision_format_valid_p(recid):
            marcxml = get_marcxml_of_revision_id(recid)
            if marcxml:
                record = create_record(marcxml)[0]
            else:
                result["resultCode"], result["resultText"] = 1, "The specified revision does not exist"
        else:
            result["resultCode"], result["resultText"] = 1, "Invalid revision id"

    elif mode == "none":
        return {}

    else:
        result["resultCode"], result["resultText"] = 1, "Invalid record mode for record2"
    return record
def main():
    file_name = 'tmp_' + __file__
    file_name = re.sub('.py', '_correct.out', file_name)
    output = open(file_name,'w')
    output.write('<collection>')
    result = perform_request_search(p=SEARCH, cc=SUBFILE)
    result = result[:500]
    for recid in result:
        #info = print_record(recid, ot=['001','037'], format='xm')
        #info = re.sub(r'code="a">FERMILAB', r'code="z">FERMILAB', info)
        info = print_record(recid, ot=['001', MARC_FIELD], format='xm')
        #info = re.sub(r'>.*[fF]*o[rf] the (\w+) [Cc]oll.*<', r'>\1 Collaboration<', info)
        info = re.sub(r'>.* (\S+youtube\S+).*<', r'>\1<', info)
        info = re.sub(r'code="e">FERMILAB-TEV-', r'', info)
        info = re.sub(r'.*Waseda U., Cosmic Ray Div..*', r'', info)
        info += '\n\n'
        output.write(info)
    output.write('</collection>')
    output.close()
    def answer(self, req, user_info, of, cc,
               colls_to_search, p, f, search_units, ln):
        """Answer question given by context.

        Return (relevance, html_string) where relevance is integer
        from 0 to 100 indicating how relevant to the question the
        answer is (see C{CFG_WEBSEARCH_SERVICE_MAX_SERVICE_ANSWER_RELEVANCE}
        for details), and html_string being a formatted answer.
        """
        from invenio.refextract_api import search_from_reference

        _ = gettext_set_language(ln)

        if f or not self.seems_a_journal_reference(p):
            return (0, "")

        (field, pattern) = search_from_reference(p.decode('utf-8'))

        if field is not "journal":
            return (0, "")

        recids = perform_request_search(
            req=req, p=pattern, f=field, cc=cc, c=colls_to_search)

        if not recids:
            return (0, "")

        if len(recids) == 1:
            recid = recids.pop()
            user_info = collect_user_info(req)
            return (100, """\
<p><span class="journalhint">%s</span></p>
<table style="padding: 5px; border: 2px solid #ccc; margin: 20px"><tr><td>
%s
</td></tr></table>""" % (escape(_("Were you looking for this paper?")),
             print_record(recid, ln=ln, user_info=user_info)))

        query = "find rawref \"" + p + "\""
        query_link = CFG_SITE_URL + '/search?' + urlencode({'p': query})
        return (80, '<span class="journalhint">%s</span>' % (
                _("Were you looking for a journal reference? Try: %(x_href)s") %
                {"x_href": '<a href="{0}">{1}</a>'.format(
                 escape(query_link, True), escape(query))}, ))
Beispiel #24
0
def main():
    file_name = 'tmp_' + __file__
    file_name = re.sub('.py', '_correct.out', file_name)
    output = open(file_name, 'w')
    output.write('<collection>')
    result = perform_request_search(p=SEARCH, cc=SUBFILE)
    result = result[:500]
    for recid in result:
        #info = print_record(recid, ot=['001','037'], format='xm')
        #info = re.sub(r'code="a">FERMILAB', r'code="z">FERMILAB', info)
        info = print_record(recid, ot=['001', MARC_FIELD], format='xm')
        #info = re.sub(r'>.*[fF]*o[rf] the (\w+) [Cc]oll.*<', r'>\1 Collaboration<', info)
        info = re.sub(r'>.* (\S+youtube\S+).*<', r'>\1<', info)
        info = re.sub(r'code="e">FERMILAB-TEV-', r'', info)
        info = re.sub(r'.*Waseda U., Cosmic Ray Div..*', r'', info)
        info += '\n\n'
        output.write(info)
    output.write('</collection>')
    output.close()
def main():
    file_name = 'tmp_' + __file__
    file_name = re.sub('.py', '_correct.out', file_name)
    output = open(file_name,'w')
    output.write('<collection>')
    result_hep = perform_request_search(p=SEARCH, cc=SUBFILE)
    result = search_unit(p="*D0-PRELIMINARY-NOTE*",m='a',f='980*')
    result = result[:5]
    for recid in result:
        #info = print_record(recid, ot=['001','037'], format='xm')
        #info = re.sub(r'code="a">FERMILAB', r'code="z">FERMILAB', info)
        info = print_record(recid, ot=['001', '100', '700', '980'], format='hm')
        #info = re.sub(r'>.*[fF]*o[rf] the (\w+) [Cc]oll.*<', r'>\1 Collaboration<', info)
        #info = re.sub(r'>\w+tion [fF]*or [Tt]he (\w+)<', r'>\1 Collaboration<', info)
        info = re.sub(r'code="e">FERMILAB-TEV-', r'', info)
        info = re.sub(r'<\/?pre[^\>]*>', r'', info)
        info += '\n\n'
        output.write(info)
    output.write('</collection>')
    output.close()
    def test_BibUpload_revision_verifier(self):
        """ BibUpload Revision Verifier - Called from BibUpload Operation - Patch & Conflict Scenarios"""

        recs = xml_marc_to_records(self.rev1)
        # --> Revision 1 submitted
        error, self.recid, dummy_msg = bibupload(recs[0], opt_mode="insert")
        self.check_record_consistency(self.recid)
        record = get_record(self.recid)
        rev = record_get_field_value(record, "005", "", "")
        recs = xml_marc_to_records(self.rev1)
        self.rev2 = self.rev2.replace("123456789", str(self.recid))
        self.rev2 = self.rev2.replace("20110101000000.0", rev)
        self.rev1_modified = self.rev1_modified.replace("123456789", str(self.recid))
        self.rev1_modified = self.rev1_modified.replace("20110101000000.0", rev)
        self.final_xm = self.final_xm.replace("123456789", str(self.recid))

        recs = xml_marc_to_records(self.rev1)
        recs = xml_marc_to_records(self.rev2)
        # --> Revision 2 submitted
        error, self.recid, dummy_msg = bibupload(recs[0], opt_mode="replace")
        self.check_record_consistency(self.recid)
        record = get_record(self.recid)
        self.rev2 = self.rev2.replace(rev, record_get_field_value(record, "005", "", ""))
        self.rev2_modified = self.rev2_modified.replace("123456789", str(self.recid))
        self.rev2_modified = self.rev2_modified.replace(
            "20110101000000.0", record_get_field_value(record, "005", "", "")
        )
        # --> Revision 1 modified submitted
        recs = xml_marc_to_records(self.rev1_modified)
        error, self.recid, dummy_msg = bibupload(recs[0], opt_mode="replace")
        self.check_record_consistency(self.recid)
        record = get_record(self.recid)
        rev = record_get_field_value(record, "005", "", "")
        self.final_xm = self.final_xm.replace("20110101000000.0", rev)
        self.assertEqual(compare_xmbuffers(self.final_xm, print_record(self.recid, "xm")), "")
        # --> Revision 2 modified submitted
        recs = xml_marc_to_records(self.rev2_modified)
        error, self.recid, dummy_msg = bibupload(recs[0], opt_mode="replace")
        self.check_record_consistency(self.recid)
        self.assertEquals(error, 2)
def main():
    file_name = 'tmp_' + __file__
    file_name = re.sub('.py', '_correct.out', file_name)
    output = open(file_name, 'w')
    output.write('<collection>')
    result_hep = perform_request_search(p=SEARCH, cc=SUBFILE)
    result = search_unit(p="*D0-PRELIMINARY-NOTE*", m='a', f='980*')
    result = result[:5]
    for recid in result:
        #info = print_record(recid, ot=['001','037'], format='xm')
        #info = re.sub(r'code="a">FERMILAB', r'code="z">FERMILAB', info)
        info = print_record(recid,
                            ot=['001', '100', '700', '980'],
                            format='hm')
        #info = re.sub(r'>.*[fF]*o[rf] the (\w+) [Cc]oll.*<', r'>\1 Collaboration<', info)
        #info = re.sub(r'>\w+tion [fF]*or [Tt]he (\w+)<', r'>\1 Collaboration<', info)
        info = re.sub(r'code="e">FERMILAB-TEV-', r'', info)
        info = re.sub(r'<\/?pre[^\>]*>', r'', info)
        info += '\n\n'
        output.write(info)
    output.write('</collection>')
    output.close()
Beispiel #28
0
    def _getAuthority(self, prog, simulation):
        simulation = False
        if simulation == False:
            print 'Simulation ist False'
            import simplejson as json
            from invenio.websubmit_functions.Websubmit_Helpers_hgf import washJSONinput
            from invenio.search_engine import perform_request_search, print_record
            search_str = self._pofsearchdict[prog]
            # print 'search_string:', search_str
 
            authrec = perform_request_search(p=search_str)
            if len(authrec) == 1:
               jsontext = print_record(authrec[0], format='js')
               jsontext = washJSONinput(jsontext)
               # print jsontext
               jsondict = json.loads(jsontext, 'utf8')
               if 'label' in jsondict:
                   del jsondict['label']
               return jsondict
               #return authrec[0]
        else: # simulation mode no connection to Invenio
            print 'Simulation ist True'
            return self._getPOF('Krebsforschung')
 def _get_record_NLM_XML(self, record):
     """Returns the record in National Library of Medicine XML format."""
     return print_record(record, format='xn')
 def _get_record_MARCXML(self, record):
     """Returns the record in MARCXML format."""
     return print_record(record, format='xm')
Beispiel #31
0
def Send_APP_Mail (parameters, curdir, form, user_info=None):
    """
    This function send an email informing the original submitter of a
    document that the referee has approved/ rejected the document. The
    email is also sent to the referee for checking.

    Parameters:

       * addressesAPP: email addresses of the people who will receive
         this email (comma separated list). this parameter may contain
         the <CATEG> string. In which case the variable computed from
         the [categformatAFP] parameter replaces this string.
         eg.: "<CATEG>[email protected]"

       * categformatAPP contains a regular expression used to compute
         the category of the document given the reference of the
         document.
         eg.: if [categformatAFP]="TEST-<CATEG>-.*" and the reference
         of the document is "TEST-CATEGORY1-2001-001", then the computed
         category equals "CATEGORY1"

       * newrnin: Name of the file containing the 2nd reference of the
                  approved document (if any).

       * edsrn: Name of the file containing the reference of the
                approved document.
    """
    global titlevalue,authorvalue, emailvalue,sysno,rn
    FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME,CFG_SITE_SUPPORT_EMAIL)
    sequence_id = bibtask_allocate_sequenceid(curdir)
    doctype = form['doctype']
    titlevalue = titlevalue.replace("\n"," ")
    authorvalue = authorvalue.replace("\n","; ")
    # variables declaration
    categformat = parameters['categformatAPP']
    otheraddresses = parameters['addressesAPP']
    newrnpath = parameters['newrnin']
    ## Get the name of the decision file:
    try:
        decision_filename = parameters['decision_file']
    except KeyError:
        decision_filename = ""
    ## Get the name of the comments file:
    try:
        comments_filename = parameters['comments_file']
    except KeyError:
        comments_filename = ""

    ## Now try to read the comments from the comments_filename:
    if comments_filename in (None, "", "NULL"):
        ## We don't have a name for the comments file.
        ## For backward compatibility reasons, try to read the comments from
        ## a file called 'COM' in curdir:
        if os.path.exists("%s/COM" % curdir):
            try:
                fh_comments = open("%s/COM" % curdir, "r")
                comment = fh_comments.read()
                fh_comments.close()
            except IOError:
                ## Unable to open the comments file
                exception_prefix = "Error in WebSubmit function " \
                                   "Send_APP_Mail. Tried to open " \
                                   "comments file [%s/COM] but was " \
                                   "unable to." % curdir
                register_exception(prefix=exception_prefix)
                comment = ""
            else:
                comment = comment.strip()
        else:
            comment = ""
    else:
        ## Try to read the comments from the comments file:
        if os.path.exists("%s/%s" % (curdir, comments_filename)):
            try:
                fh_comments = open("%s/%s" % (curdir, comments_filename), "r")
                comment = fh_comments.read()
                fh_comments.close()
            except IOError:
                ## Oops, unable to open the comments file.
                comment = ""
                exception_prefix = "Error in WebSubmit function " \
                                "Send_APP_Mail. Tried to open comments " \
                                "file [%s/%s] but was unable to." \
                                % (curdir, comments_filename)
                register_exception(prefix=exception_prefix)
            else:
                comment = comment.strip()
        else:
            comment = ""

    ## Now try to read the decision from the decision_filename:
    if decision_filename in (None, "", "NULL"):
        ## We don't have a name for the decision file.
        ## For backward compatibility reasons, try to read the decision from
        ## a file called 'decision' in curdir:
        if os.path.exists("%s/decision" % curdir):
            try:
                fh_decision = open("%s/decision" % curdir, "r")
                decision = fh_decision.read()
                fh_decision.close()
            except IOError:
                ## Unable to open the decision file
                exception_prefix = "Error in WebSubmit function " \
                                   "Send_APP_Mail. Tried to open " \
                                   "decision file [%s/decision] but was " \
                                   "unable to." % curdir
                register_exception(prefix=exception_prefix)
                decision = ""
            else:
                decision = decision.strip()
        else:
            decision = ""
    else:
        ## Try to read the decision from the decision file:
        try:
            fh_decision = open("%s/%s" % (curdir, decision_filename), "r")
            decision = fh_decision.read()
            fh_decision.close()
        except IOError:
            ## Oops, unable to open the decision file.
            decision = ""
            exception_prefix = "Error in WebSubmit function " \
                               "Send_APP_Mail. Tried to open decision " \
                               "file [%s/%s] but was unable to." \
                               % (curdir, decision_filename)
            register_exception(prefix=exception_prefix)
        else:
            decision = decision.strip()

    if os.path.exists("%s/%s" % (curdir,newrnpath)):
        fp = open("%s/%s" % (curdir,newrnpath) , "r")
        newrn = fp.read()
        fp.close()
    else:
        newrn = ""
    # Document name
    res = run_sql("SELECT ldocname FROM sbmDOCTYPE WHERE sdocname=%s", (doctype,))
    docname = res[0][0]
    # retrieve category
    categformat = categformat.replace("<CATEG>", "([^-]*)")
    m_categ_search = re.match(categformat, rn)
    if m_categ_search is not None:
        if len(m_categ_search.groups()) > 0:
            ## Found a match for the category of this document. Get it:
            category = m_categ_search.group(1)
        else:
            ## This document has no category.
            category = "unknown"
    else:
        category = "unknown"
    ## Get the referee email address:
    if CFG_CERN_SITE:
        ## The referees system in CERN now works with listbox membership.
        ## List names should take the format
        ## "*****@*****.**"
        ## Make sure that your list exists!
        ## FIXME - to be replaced by a mailing alias in webaccess in the
        ## future.
        referee_listname = "service-cds-referee-%s" % doctype.lower()
        if category != "":
            referee_listname += "-%s" % category.lower()
        referee_listname += "@cern.ch"
        addresses = referee_listname
    else:
        # Build referee's email address
        refereeaddress = ""
        # Try to retrieve the referee's email from the referee's database
        for user in acc_get_role_users(acc_get_role_id("referee_%s_%s" % (doctype,category))):
            refereeaddress += user[1] + ","
        # And if there is a general referee
        for user in acc_get_role_users(acc_get_role_id("referee_%s_*" % doctype)):
            refereeaddress += user[1] + ","
        refereeaddress = re.sub(",$","",refereeaddress)
        # Creation of the mail for the referee
        otheraddresses = otheraddresses.replace("<CATEG>",category)
        addresses = ""
        if refereeaddress != "":
            addresses = refereeaddress + ","
        if otheraddresses != "":
            addresses += otheraddresses
        else:
            addresses = re.sub(",$","",addresses)
    ## Add the record's submitter(s) into the list of recipients:
    ## Get the email address(es) of the record submitter(s)/owner(s) from
    ## the record itself:
    record_owners = print_record(sysno, 'tm', \
                                 [CFG_WEBSUBMIT_RECORD_OWNER_EMAIL]).strip()
    if record_owners != "":
        record_owners_list = record_owners.split("\n")
        record_owners_list = [email.lower().strip() \
                              for email in record_owners_list]
    else:
        #if the record owner can not be retrieved from the metadata
        #(in case the record has not been inserted yet), 
        #try to use the global variable emailvalue
        try:
            record_owners_list = [emailvalue]
        except NameError:
            record_owners_list = []
    record_owners = ",".join([owner for owner in record_owners_list])
    if record_owners != "":
        addresses += ",%s" % record_owners

    if decision == "approve":
        mailtitle = "%s has been approved" % rn
        mailbody = "The %s %s has been approved." % (docname,rn)
        mailbody += "\nIt will soon be accessible here:\n\n<%s/%s/%s>" % (CFG_SITE_URL,CFG_SITE_RECORD,sysno)
    else:
        mailtitle = "%s has been rejected" % rn
        mailbody = "The %s %s has been rejected." % (docname,rn)
    if rn != newrn and decision == "approve" and newrn != "":
        mailbody += "\n\nIts new reference number is: %s" % newrn
    mailbody += "\n\nTitle: %s\n\nAuthor(s): %s\n\n" % (titlevalue,authorvalue)
    if comment != "":
        mailbody += "Comments from the referee:\n%s\n" % comment
    # Send mail to referee if any recipients or copy to admin
    if addresses or CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN:
        scheduled_send_email(FROMADDR, addresses, mailtitle, mailbody,
                             copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN,
                             other_bibtasklet_arguments=['-I', str(sequence_id)])
    return ""
def Ask_For_Record_Details_Confirmation(parameters, \
                                        curdir, \
                                        form, \
                                        user_info=None):
    """
       Display the details of a record on which some operation is to be carried
       out and prompt for the user's confirmation that it is the correct record.
       Upon the clicking of the confirmation button, augment step by one.

       Given the "recid" (001) of a record, retrieve the basic metadata
       (title, report-number(s) and author(s)) and display them in the
       user's browser along with a prompt asking them to confirm that
       it is indeed the record that they expected to see.

       The function depends upon the presence of the "sysno" global and the
       presence of the "step" field in the "form" parameter.
       When the user clicks on the "confirm" button, step will be augmented by
       1 and the form will be submitted.
       @parameters: None.
       @return: None.
       @Exceptions raise: InvenioWebSubmitFunctionError if problems are
        encountered;
        InvenioWebSubmitFunctionStop in order to display the details of the
        record and the confirmation message.
    """
    global sysno

    ## Make sure that we know the current step:
    try:
        current_step = int(form['step'])
    except TypeError:
        ## Can't determine step.
        msg = "Unable to determine submission step. Cannot continue."
        raise InvenioWebSubmitFunctionError(msg)
    else:
        newstep = current_step + 1

    ## Make sure that the sysno is valid:
    try:
        working_recid = int(sysno)
    except TypeError:
        ## Unable to find the details of this record - cannot query the database
        msg = "Unable to retrieve details of record - record id was invalid."
        raise InvenioWebSubmitFunctionError(msg)

    if not record_exists(working_recid):
        ## Record doesn't exist.
        msg = "Unable to retrieve details of record [%s] - record does not " \
              "exist." % working_recid
        raise InvenioWebSubmitFunctionError(msg)

    ## Retrieve the details to be displayed:
    ##
    ## Author(s):
    rec_authors = ""
    rec_first_author    = print_record(int(sysno), 'tm', "100__a")
    rec_other_authors   = print_record(int(sysno), 'tm', "700__a")
    if rec_first_author != "":
        rec_authors += "".join(["%s<br />\n" % cgi.escape(author.strip()) for \
                                author in rec_first_author.split("\n")])
    if rec_other_authors != "":
        rec_authors += "".join(["%s<br />\n" % cgi.escape(author.strip()) for \
                                author in rec_other_authors.split("\n")])

    ## Title:
    rec_title = "".join(["%s<br />\n" % cgi.escape(title.strip()) for title in \
                          print_record(int(sysno), 'tm', "245__a").split("\n")])

    ## Report numbers:
    rec_reportnums = ""
    rec_reportnum        = print_record(int(sysno), 'tm', "037__a")
    rec_other_reportnums = print_record(int(sysno), 'tm', "088__a")
    if rec_reportnum != "":
        rec_reportnums += "".join(["%s<br />\n" % cgi.escape(repnum.strip()) \
                                   for repnum in rec_reportnum.split("\n")])
    if rec_other_reportnums != "":
        rec_reportnums += "".join(["%s<br />\n" % cgi.escape(repnum.strip()) \
                                   for repnum in \
                                   rec_other_reportnums.split("\n")])

    raise InvenioWebSubmitFunctionStop(CFG_DOCUMENT_DETAILS_MESSAGE % \
                                  { 'report-numbers' : rec_reportnums, \
                                    'title'          : rec_title, \
                                    'author'         : rec_authors, \
                                    'newstep'        : newstep, \
                                    'admin-email'    : CFG_SITE_ADMIN_EMAIL, \
                                  }   )
Beispiel #33
0
def get_bibrecord(recid):
    """Return record in BibRecord wrapping."""
    if record_exists(recid):
        return create_record(print_record(recid, 'xm'))[0]
Beispiel #34
0
def process_references(book):
    '''
    Look through reference list to find the cited reference and clean it up.
    '''

    print book
    reference_flag = False
    date = None
    counter = 1
    author = book[0]
    title = book[1].lower()
    isbn = book[2]
    isbn_tag = "i"
    recid_book = book[3]
    if len(book) == 5:
        date = book[4]
    if re.search(r"\-", isbn):
        isbn_tag = "r"

    search_author = '999C5:/' + author + '/'
    search_title  = '999C5:/' + title + '/ -refersto:recid:' + recid_book + \
                    ' -999C50:' + recid_book
    x_author = perform_request_search(p=search_author, cc='HEP')
    x_title = perform_request_search(p=search_title, cc='HEP')
    result = list(intbitset(x_author) & intbitset(x_title))

    records = []
    new_records = []
    for recid in result:
        records.append(print_record(recid, ot=['999C5'], format='hm'))
    #lines = [record.split('\n') for record in records]]
    for record in records:
        if counter > COUNTER_MAX:
            continue
        new_record = []
        reference_flag = False
        for i in record.split('\n'):
            i = re.sub(r'\n', r'', i)
            i = re.sub(r'</?pre>', '', i)
            i = re.sub(r'<pre style="margin: 1em 0px;">', '', i)
            if re.search(author, i):
                j = i.lower()
                #j = re.sub(r', ', r' ', j)
                #j = re.sub('[ ]+', r' ', j)
                if re.search(title, j) and not re.search(r'$$0', j):
                    if date:
                        if re.search(date, j):
                            i = i + "$$" + isbn_tag + isbn + "$$0" + \
                                str(recid_book) + "$$z1"
                            reference_flag = True
                    else:
                        i = i + "$$" + isbn_tag + isbn  + "$$0" + \
                                str(recid_book) + "$$z1"
                        reference_flag = True
                    if not re.search(r'CURATOR', i):
                        i = i + "$$9CURATOR"
                    if reference_flag:
                        counter += 1
            new_record.append(i + '\n')
        if reference_flag:
            new_records.append(new_record)
    return new_records
Beispiel #35
0
def check_doi_status_after_merge(original_recid1, original_recid2, final_record1, final_record_2, record2_marked_as_duplicate_p=False, submit_confirmed_p=False):
    """
    Check that the result of the merge does not removed DOIs managed
    by the system, and that not duplicate DOI would be
    created. Returns a tuple(error_code, message).

    @param original_recid1: the record ID of the original record 1 (master)
    @type original_recid1: int
    @param original_recid2: the record ID of the original record 2 (slave)
    @type original_recid2: int
    @param final_record1: the resulting merged record
    @type final_record1: BibRecord object
    @param final_record_2: the resulting slave "merged" record (optional when record2_marked_as_duplicate_p is False)
    @type final_record_2: BibRecord object
    @param record2_marked_as_duplicate_p: True if the record 2 will be marked as duplicate (and deleted)
    @type record2_marked_as_duplicate_p: bool
    @param submit_confirmed_p: if the user has already confirmed to proceed with submission, according to previous messages displayed. If True, do not ask again confirmation and proceed if all tests pass.
    @type submit_confirmed_p: bool
    """
    errcode = 0
    message = ''
    new_record1_dois = get_dois(final_record1)
    new_record1_managed_dois = get_dois(final_record1, internal_only_p=True)
    original_record1_managed_dois = get_dois(create_record(print_record(original_recid1, 'xm'))[0],
                                             internal_only_p=True)
    original_record2_dois = get_dois(create_record(print_record(original_recid2, 'xm'))[0])

    # Are there any DOI from record 1 (master) lost in the merging?
    lost_dois_in_record1 = [doi for doi in original_record1_managed_dois \
                            if not doi in new_record1_managed_dois]

    # Enough to check for duplicate DOI creation in this record,
    # not whole DB
    duplicate_dois_after_merge = [doi for doi in new_record1_dois if new_record1_dois.count(doi) > 1]

    if record2_marked_as_duplicate_p:
        new_record2_managed_dois = get_dois(final_record_2, internal_only_p=True)
        original_record2_managed_dois = get_dois(create_record(print_record(original_recid2, 'xm'))[0],
                                                 internal_only_p=True)
        # Are there any DOI from record 2 (slave) lost in the merging?
        lost_dois_in_record2 = [doi for doi in original_record2_managed_dois \
                                    if not doi in new_record1_managed_dois]
    else:
        lost_dois_in_record2 = []
        duplicate_dois_after_merge += [doi for doi in new_record1_dois if doi in original_record2_dois]

    if ((lost_dois_in_record1 or lost_dois_in_record2) and \
        CFG_BIBEDIT_INTERNAL_DOI_PROTECTION_LEVEL > 0) or \
        duplicate_dois_after_merge:

        if CFG_BIBEDIT_INTERNAL_DOI_PROTECTION_LEVEL == 1 and \
               not duplicate_dois_after_merge and \
               not submit_confirmed_p:
            errcode = 1
            message = 'The resulting merged record misses DOI(s) managed by the system.<script type="text/javascript">%(check_duplicate_box)sif (confirm(\'The resulting merged record will lose DOI(s) managed by the system.\\n' + \
                      'The following DOI(s) were in the original record (#1) but are not in the final merged one:\\n' + '\\n'.join(lost_dois_in_record1) + \
                      '\\nAre you sure that you want to submit the merged records without the DOI(s)?\')) {onclickSubmitButton(confirm_p=false, additional_data={\'confirmed_submit\': true})}</script>'
        elif duplicate_dois_after_merge and lost_dois_in_record1:
            errcode = 1
            message = 'The changes cannot be submitted because the resulting merged record (a) misses DOI(s) managed by the system and/or (b) will create duplicate DOIs.<script type="text/javascript">%(check_duplicate_box)salert(\'The changes cannot be submitted because the resulting merged record (a) misses DOI(s) managed by the system and (b) will create duplicate DOIs.\\n' + \
                      'The following DOI(s) were in the original record (#1) but are not in the final merged one:\\n' + '\\n'.join(lost_dois_in_record1) + \
                      '\\nThe following DOI(s) would be duplicate after merge:\\n' + '\\n'.join(duplicate_dois_after_merge) + \
                      '\\nMake sure that the mentionned DOI(s) are included in the final merged record and/or no duplicate DOIs are created (suggestion: merge in the other way around).\');</script>'
        elif duplicate_dois_after_merge:
            errcode = 1
            message = 'The changes cannot be submitted because the resulting merged record will create a duplicate DOI.<script type="text/javascript">%(check_duplicate_box)salert(\'The changes cannot be submitted because the resulting merged record will create a duplicate DOI.\\n' + \
                      'The following DOI(s) would be duplicate after merge:\\n' + '\\n'.join(duplicate_dois_after_merge) + \
                      '\\nMake sure that the mentionned DOI(s) are not duplicated (suggestion: merge in the other way around).\');</script>'
        elif not (CFG_BIBEDIT_INTERNAL_DOI_PROTECTION_LEVEL == 1 and submit_confirmed_p):
            # lost DOIs after merge
            errcode = 1
            message = 'The changes cannot be submitted because the resulting merged record misses DOI(s) managed by the system.<script type="text/javascript">%(check_duplicate_box)salert(\'The changes cannot be submitted because the resulting merged record misses the DOI(s) managed by the system.\\n' + \
                      'The following DOI(s) were in the original record (#1) but are not in the final merged one:\\n' + '\\n'.join(lost_dois_in_record1) + \
                          '\\nMake sure that the mentionned DOI(s) are included in the final merged record.\');</script>'

    message = message % {'check_duplicate_box': record2_marked_as_duplicate_p and '$(\'#bibMergeDupeCheckbox\').attr(\'checked\', true);' or ''}

    return (errcode, message)
Beispiel #36
0
def check_doi_status_after_merge(original_recid1,
                                 original_recid2,
                                 final_record1,
                                 final_record_2,
                                 record2_marked_as_duplicate_p=False,
                                 submit_confirmed_p=False):
    """
    Check that the result of the merge does not removed DOIs managed
    by the system, and that not duplicate DOI would be
    created. Returns a tuple(error_code, message).

    @param original_recid1: the record ID of the original record 1 (master)
    @type original_recid1: int
    @param original_recid2: the record ID of the original record 2 (slave)
    @type original_recid2: int
    @param final_record1: the resulting merged record
    @type final_record1: BibRecord object
    @param final_record_2: the resulting slave "merged" record (optional when record2_marked_as_duplicate_p is False)
    @type final_record_2: BibRecord object
    @param record2_marked_as_duplicate_p: True if the record 2 will be marked as duplicate (and deleted)
    @type record2_marked_as_duplicate_p: bool
    @param submit_confirmed_p: if the user has already confirmed to proceed with submission, according to previous messages displayed. If True, do not ask again confirmation and proceed if all tests pass.
    @type submit_confirmed_p: bool
    """
    errcode = 0
    message = ''
    new_record1_dois = get_dois(final_record1)
    new_record1_managed_dois = get_dois(final_record1, internal_only_p=True)
    original_record1_managed_dois = get_dois(create_record(
        print_record(original_recid1, 'xm'))[0],
                                             internal_only_p=True)
    original_record2_dois = get_dois(
        create_record(print_record(original_recid2, 'xm'))[0])

    # Are there any DOI from record 1 (master) lost in the merging?
    lost_dois_in_record1 = [doi for doi in original_record1_managed_dois \
                            if not doi in new_record1_managed_dois]

    # Enough to check for duplicate DOI creation in this record,
    # not whole DB
    duplicate_dois_after_merge = [
        doi for doi in new_record1_dois if new_record1_dois.count(doi) > 1
    ]

    if record2_marked_as_duplicate_p:
        new_record2_managed_dois = get_dois(final_record_2,
                                            internal_only_p=True)
        original_record2_managed_dois = get_dois(create_record(
            print_record(original_recid2, 'xm'))[0],
                                                 internal_only_p=True)
        # Are there any DOI from record 2 (slave) lost in the merging?
        lost_dois_in_record2 = [doi for doi in original_record2_managed_dois \
                                    if not doi in new_record1_managed_dois]
    else:
        lost_dois_in_record2 = []
        duplicate_dois_after_merge += [
            doi for doi in new_record1_dois if doi in original_record2_dois
        ]

    if ((lost_dois_in_record1 or lost_dois_in_record2) and \
        CFG_BIBEDIT_INTERNAL_DOI_PROTECTION_LEVEL > 0) or \
        duplicate_dois_after_merge:

        if CFG_BIBEDIT_INTERNAL_DOI_PROTECTION_LEVEL == 1 and \
               not duplicate_dois_after_merge and \
               not submit_confirmed_p:
            errcode = 1
            message = 'The resulting merged record misses DOI(s) managed by the system.<script type="text/javascript">%(check_duplicate_box)sif (confirm(\'The resulting merged record will lose DOI(s) managed by the system.\\n' + \
                      'The following DOI(s) were in the original record (#1) but are not in the final merged one:\\n' + '\\n'.join(lost_dois_in_record1) + \
                      '\\nAre you sure that you want to submit the merged records without the DOI(s)?\')) {onclickSubmitButton(confirm_p=false, additional_data={\'confirmed_submit\': true})}</script>'
        elif duplicate_dois_after_merge and lost_dois_in_record1:
            errcode = 1
            message = 'The changes cannot be submitted because the resulting merged record (a) misses DOI(s) managed by the system and/or (b) will create duplicate DOIs.<script type="text/javascript">%(check_duplicate_box)salert(\'The changes cannot be submitted because the resulting merged record (a) misses DOI(s) managed by the system and (b) will create duplicate DOIs.\\n' + \
                      'The following DOI(s) were in the original record (#1) but are not in the final merged one:\\n' + '\\n'.join(lost_dois_in_record1) + \
                      '\\nThe following DOI(s) would be duplicate after merge:\\n' + '\\n'.join(duplicate_dois_after_merge) + \
                      '\\nMake sure that the mentionned DOI(s) are included in the final merged record and/or no duplicate DOIs are created (suggestion: merge in the other way around).\');</script>'
        elif duplicate_dois_after_merge:
            errcode = 1
            message = 'The changes cannot be submitted because the resulting merged record will create a duplicate DOI.<script type="text/javascript">%(check_duplicate_box)salert(\'The changes cannot be submitted because the resulting merged record will create a duplicate DOI.\\n' + \
                      'The following DOI(s) would be duplicate after merge:\\n' + '\\n'.join(duplicate_dois_after_merge) + \
                      '\\nMake sure that the mentionned DOI(s) are not duplicated (suggestion: merge in the other way around).\');</script>'
        elif not (CFG_BIBEDIT_INTERNAL_DOI_PROTECTION_LEVEL == 1
                  and submit_confirmed_p):
            # lost DOIs after merge
            errcode = 1
            message = 'The changes cannot be submitted because the resulting merged record misses DOI(s) managed by the system.<script type="text/javascript">%(check_duplicate_box)salert(\'The changes cannot be submitted because the resulting merged record misses the DOI(s) managed by the system.\\n' + \
                      'The following DOI(s) were in the original record (#1) but are not in the final merged one:\\n' + '\\n'.join(lost_dois_in_record1) + \
                          '\\nMake sure that the mentionned DOI(s) are included in the final merged record.\');</script>'

    message = message % {
        'check_duplicate_box':
        record2_marked_as_duplicate_p
        and '$(\'#bibMergeDupeCheckbox\').attr(\'checked\', true);' or ''
    }

    return (errcode, message)
Beispiel #37
0
"""
Provides text CV output of papers with current citation counts
"""
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import re
from invenio.search_engine import perform_request_search
from invenio.search_engine import print_record

search = raw_input('Search: ')
file = 'IHEP-pub-report.doc'
x = perform_request_search(p=search, cc="HEP")
with open(file, 'w') as output:
    for r in x:
        cv = print_record(r, format='htcv')
        cv = re.sub(r'<br/>', '', cv)
        cv = re.sub(r'\s\s+', '', cv)
        citesearch = perform_request_search(p="refersto:recid:%i" % r,
                                            cc="HEP")
        cv = cv + '\n' + str(len(citesearch)) + ' citations'
        print cv + '\n\n'
        output.write(cv + '\n\n')
def APO_Mail_Final_Decision_to_User (parameters, curdir, form, user_info=None):
    """
    This function sends an email to the user informing him/her about
    the decision taken by the referee on his/her proposition.
    This email is also sent to the referee for checking.

    Parameters:

       * categformatAPP contains a regular expression used to compute
         the category of the document given the reference of the
         document.
         eg.: if [categformatAFP]="TEST-<CATEG>-.*" and the reference
         of the document is "TEST-CATEGORY1-2001-001", then the computed
         category equals "CATEGORY1"

    """

    global sysno, rn
    FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME,CFG_SITE_SUPPORT_EMAIL)
    doctype = form['doctype']
    # variables declaration
    categformat = parameters['categformatAPP']
    sequence_id = bibtask_allocate_sequenceid(curdir)

    ## Get the name of the decision file:
    try:
        decision_filename = parameters['decision_file']
    except KeyError:
        decision_filename = ""
    ## Get the name of the comments file:
    try:
        comments_filename = parameters['comments_file']
    except KeyError:
        comments_filename = ""

    ## Now try to read the comments from the comments_filename:
    if comments_filename in (None, "", "NULL"):
        ## We don't have a name for the comments file.
        ## For backward compatibility reasons, try to read the comments from
        ## a file called 'COM' in curdir:
        if os.path.exists("%s/COM" % curdir):
            try:
                fh_comments = open("%s/COM" % curdir, "r")
                comment = fh_comments.read()
                fh_comments.close()
            except IOError:
                ## Unable to open the comments file
                exception_prefix = "Error in WebSubmit function " \
                                   "APO_Mail_Final_Decision_to_User. Tried to open " \
                                   "comments file [%s/COM] but was " \
                                   "unable to." % curdir
                register_exception(prefix=exception_prefix)
                comment = ""
            else:
                comment = comment.strip()
        else:
            comment = ""
    else:
        ## Try to read the comments from the comments file:
        if os.path.exists("%s/%s" % (curdir, comments_filename)):
            try:
                fh_comments = open("%s/%s" % (curdir, comments_filename), "r")
                comment = fh_comments.read()
                fh_comments.close()
            except IOError:
                ## Oops, unable to open the comments file.
                comment = ""
                exception_prefix = "Error in WebSubmit function " \
                                "APO_Mail_Final_Decision_to_User. Tried to open comments " \
                                "file [%s/%s] but was unable to." \
                                % (curdir, comments_filename)
                register_exception(prefix=exception_prefix)
            else:
                comment = comment.strip()
        else:
            comment = ""

    ## Now try to read the decision from the decision_filename:
    if decision_filename in (None, "", "NULL"):
        ## We don't have a name for the decision file.
        ## For backward compatibility reasons, try to read the decision from
        ## a file called 'decision' in curdir:
        if os.path.exists("%s/decision" % curdir):
            try:
                fh_decision = open("%s/decision" % curdir, "r")
                decision = fh_decision.read()
                fh_decision.close()
            except IOError:
                ## Unable to open the decision file
                exception_prefix = "Error in WebSubmit function " \
                                   "APO_Mail_Final_Decision_to_User. Tried to open " \
                                   "decision file [%s/decision] but was " \
                                   "unable to." % curdir
                register_exception(prefix=exception_prefix)
                decision = ""
            else:
                decision = decision.strip()
        else:
            decision = ""
    else:
        ## Try to read the decision from the decision file:
        try:
            fh_decision = open("%s/%s" % (curdir, decision_filename), "r")
            decision = fh_decision.read()
            fh_decision.close()
        except IOError:
            ## Oops, unable to open the decision file.
            decision = ""
            exception_prefix = "Error in WebSubmit function " \
                               "APO_Mail_Final_Decision_to_User. Tried to open decision " \
                               "file [%s/%s] but was unable to." \
                               % (curdir, decision_filename)
            register_exception(prefix=exception_prefix)
        else:
            decision = decision.strip()

    # Document name
    res = run_sql("SELECT ldocname FROM sbmDOCTYPE WHERE sdocname=%s", (doctype,))
    docname = res[0][0]
    # retrieve category
    categformat = categformat.replace("<CATEG>", "([^-]*)")
    m_categ_search = re.match(categformat, rn)
    if m_categ_search is not None:
        if len(m_categ_search.groups()) > 0:
            ## Found a match for the category of this document. Get it:
            category = m_categ_search.group(1)
        else:
            ## This document has no category.
            category = "unknown"
    else:
        category = "unknown"

    # Build referee's email address
    refereeaddress = ""
    # Try to retrieve the referee's email from the referee's database
    for user in acc_get_role_users(acc_get_role_id("referee_%s_%s" % (doctype,category))):
        refereeaddress += user[1] + ","
    # And if there is a general referee
    for user in acc_get_role_users(acc_get_role_id("referee_%s_*" % doctype)):
        refereeaddress += user[1] + ","
    refereeaddress = re.sub(",$","",refereeaddress)
    # Creation of the mail for the referee
    addresses = ""
    if refereeaddress != "":
        addresses = refereeaddress + ","
    else:
        addresses = re.sub(",$","",addresses)

    ## Add the record's submitter(s) into the list of recipients:
    ## Get the email address(es) of the record submitter(s)/owner(s) from
    ## the record itself:
    record_owners = print_record(sysno, 'tm', \
                                 [CFG_WEBSUBMIT_RECORD_OWNER_EMAIL]).strip()
    if record_owners != "":
        record_owners_list = record_owners.split("\n")
        record_owners_list = [email.lower().strip() \
                              for email in record_owners_list]
    else:
        record_owners_list = []
    record_owners = ",".join([owner for owner in record_owners_list])
    if record_owners != "":
        addresses += ",%s" % record_owners

    # Add "SuE" (user who throught the action) into the list of addresses:
    try:
        fp_sue = open("%s/SuE" % curdir, "r")
        sue = fp_sue.readline()
        fp_sue.close()
        addresses += ",%s" % sue
    except IOError:
        sue = ""

    post_title = "".join(["%s" % title.strip() for title in \
                    get_fieldvalues(int(sysno), "245__a")])

    post_url = "".join(["%s" % url.strip() for url in \
                         get_fieldvalues(int(sysno), "520__u")])

    if decision != "":
        if decision == "approve":
            mailtitle = "Post record deletion approved: [%(id)s]"
            if post_title:
                mailtitle = mailtitle % {'id': post_title}
            else:
                mailtitle = mailtitle % {'id': post_url}
            mailbody = "\nThe deletion of the post record with URL [%s] and title '%s' has been approved.\n" % (post_url, post_title)
            mailbody += "\nThis post record will be no longer available in the repository.\n"
        else:
            mailtitle = "Post record deletion has been rejected: [%(id)s]"
            if post_title:
                mailtitle = mailtitle % {'id': post_title}
            else:
                mailtitle = mailtitle % {'id': post_url}
            mailbody = "\nThe deletion of the post record with URL [%s] and title '%s' has been rejected.\n" % (post_url, post_title)

    # Send mail to referee if any recipients or copy to admin
    if addresses or CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN:
        scheduled_send_email(FROMADDR,addresses,mailtitle,mailbody, \
                             copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN, \
                             other_bibtasklet_arguments=['-I', str(sequence_id)])

    return ""
def JOBSUBMIT_Send_APP_Mail(parameters, curdir, form, user_info=None):
    """
    This function send an email informing the original submitter of a
    document that the referee has approved/ rejected the document.

    Parameters:

       * addressesAPP: email addresses of the people who will receive
         this email (comma separated list). this parameter may contain
         the <CATEG> string. In which case the variable computed from
         the [categformatAFP] parameter replaces this string.
         eg.: "<CATEG>[email protected]"

       * categformatAPP contains a regular expression used to compute
         the category of the document given the reference of the
         document.
         eg.: if [categformatAFP]="TEST-<CATEG>-.*" and the reference
         of the document is "TEST-CATEGORY1-2001-001", then the computed
         category equals "CATEGORY1"

       * emailFile: Name of the file containing the email of the
                    submitter of the document

       * newrnin: Name of the file containing the 2nd reference of the
                  document (if any).

       * decision_file: Name of the file containing the decision of the
                document.

       * comments_file: Name of the file containing the comments of the
                document.

       * edsrn: Name of the file containing the reference of the
                document.
    """
    global titlevalue, authorvalue, sysno, rn
    doctype = form['doctype']
    titlevalue = titlevalue.replace("\n", " ")
    authorvalue = authorvalue.replace("\n", "; ")
    # variables declaration
    categformat = parameters['categformatAPP']
    otheraddresses = parameters['addressesAPP']
    newrnpath = parameters['newrnin']
    ## Get the name of the decision file:
    try:
        decision_filename = parameters['decision_file']
    except KeyError:
        decision_filename = ""
    ## Get the name of the comments file:
    try:
        comments_filename = parameters['comments_file']
    except KeyError:
        comments_filename = ""

    ## Now try to read the comments from the comments_filename:
    if comments_filename in (None, "", "NULL"):
        ## We don't have a name for the comments file.
        ## For backward compatibility reasons, try to read the comments from
        ## a file called 'COM' in curdir:
        if os.path.exists("%s/COM" % curdir):
            try:
                fh_comments = open("%s/COM" % curdir, "r")
                comment = fh_comments.read()
                fh_comments.close()
            except IOError:
                ## Unable to open the comments file
                exception_prefix = "Error in WebSubmit function " \
                                   "Send_APP_Mail. Tried to open " \
                                   "comments file [%s/COM] but was " \
                                   "unable to." % curdir
                register_exception(prefix=exception_prefix)
                comment = ""
            else:
                comment = comment.strip()
        else:
            comment = ""
    else:
        ## Try to read the comments from the comments file:
        if os.path.exists("%s/%s" % (curdir, comments_filename)):
            try:
                fh_comments = open("%s/%s" % (curdir, comments_filename), "r")
                comment = fh_comments.read()
                fh_comments.close()
            except IOError:
                ## Oops, unable to open the comments file.
                comment = ""
                exception_prefix = "Error in WebSubmit function " \
                                "Send_APP_Mail. Tried to open comments " \
                                "file [%s/%s] but was unable to." \
                                % (curdir, comments_filename)
                register_exception(prefix=exception_prefix)
            else:
                comment = comment.strip()
        else:
            comment = ""

    ## Now try to read the decision from the decision_filename:
    if decision_filename in (None, "", "NULL"):
        ## We don't have a name for the decision file.
        ## For backward compatibility reasons, try to read the decision from
        ## a file called 'decision' in curdir:
        if os.path.exists("%s/decision" % curdir):
            try:
                fh_decision = open("%s/decision" % curdir, "r")
                decision = fh_decision.read()
                fh_decision.close()
            except IOError:
                ## Unable to open the decision file
                exception_prefix = "Error in WebSubmit function " \
                                   "Send_APP_Mail. Tried to open " \
                                   "decision file [%s/decision] but was " \
                                   "unable to." % curdir
                register_exception(prefix=exception_prefix)
                decision = ""
            else:
                decision = decision.strip()
        else:
            decision = ""
    else:
        ## Try to read the decision from the decision file:
        try:
            fh_decision = open("%s/%s" % (curdir, decision_filename), "r")
            decision = fh_decision.read()
            fh_decision.close()
        except IOError:
            ## Oops, unable to open the decision file.
            decision = ""
            exception_prefix = "Error in WebSubmit function " \
                               "Send_APP_Mail. Tried to open decision " \
                               "file [%s/%s] but was unable to." \
                               % (curdir, decision_filename)
            register_exception(prefix=exception_prefix)
        else:
            decision = decision.strip()

    if os.path.exists("%s/%s" % (curdir, newrnpath)):
        fp = open("%s/%s" % (curdir, newrnpath), "r")
        newrn = fp.read()
        fp.close()
    else:
        newrn = ""
    # Document name
    res = run_sql("SELECT ldocname FROM sbmDOCTYPE WHERE sdocname=%s",
                  (doctype, ))
    docname = res[0][0]
    # retrieve category
    categformat = categformat.replace("<CATEG>", "([^-]*)")
    m_categ_search = re.match(categformat, rn)
    if m_categ_search is not None:
        if len(m_categ_search.groups()) > 0:
            ## Found a match for the category of this document. Get it:
            category = m_categ_search.group(1)
        else:
            ## This document has no category.
            category = "unknown"
    else:
        category = "unknown"

    # Creation of the mail for the referee
    otheraddresses = otheraddresses.replace("<CATEG>", category)
    addresses = ""
    if otheraddresses != "":
        addresses += otheraddresses
    else:
        addresses = re.sub(",$", "", addresses)

    ## Add the record's submitter(s) into the list of recipients:
    # The submitters email address is read from the file specified by 'emailFile'
    try:
        fp = open("%s/%s" % (curdir, parameters['emailFile']), "r")
        addresses += fp.read().replace("\n", " ")
        fp.close()
    except:
        pass

    record_owners = print_record(sysno, 'tm', \
                                 [CFG_WEBSUBMIT_RECORD_OWNER_EMAIL]).strip()
    if record_owners != "":
        record_owners_list = record_owners.split("\n")
        record_owners_list = [email.lower().strip() \
                              for email in record_owners_list]
    else:
        record_owners_list = []
    record_owners = ",".join([owner for owner in record_owners_list])
    if record_owners != "":
        addresses += ",%s" % record_owners

    if decision == "approve":
        mailtitle = "%s has been approved" % rn
        mailbody = "The submitted job listing with reference number %s has been fully approved." % (
            rn, )
        mailbody += "\n\nIt will soon become visible in the INSPIRE-HEP Jobs database - <%s/Jobs>" % (
            CFG_SITE_URL, )
    else:
        mailtitle = "%s has been rejected" % rn
        mailbody = "The %s %s has been rejected." % (docname, rn)
    if rn != newrn and decision == "approve" and newrn != "":
        mailbody += "\n\nIts new reference number is: %s" % newrn
    mailbody += "\n\nTitle: %s\n\nAuthor(s): %s\n\n" % (titlevalue,
                                                        authorvalue)
    if comment != "":
        mailbody += "Comments from the referee:\n%s\n" % comment
    # Send mail to referee
    send_email(fromaddr=CFG_WEBSUBMIT_JOBS_FROMADDR, toaddr=addresses, subject=mailtitle, \
               content=mailbody, footer=job_email_footer(), copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN)
    return ""
Beispiel #40
0
def Send_APP_Mail(parameters, curdir, form, user_info=None):
    """
    This function send an email informing the original submitter of a
    document that the referee has approved/ rejected the document. The
    email is also sent to the referee for checking.

    Parameters:

       * addressesAPP: email addresses of the people who will receive
         this email (comma separated list). this parameter may contain
         the <CATEG> string. In which case the variable computed from
         the [categformatAFP] parameter replaces this string.
         eg.: "<CATEG>[email protected]"

       * categformatAPP contains a regular expression used to compute
         the category of the document given the reference of the
         document.
         eg.: if [categformatAFP]="TEST-<CATEG>-.*" and the reference
         of the document is "TEST-CATEGORY1-2001-001", then the computed
         category equals "CATEGORY1"

       * newrnin: Name of the file containing the 2nd reference of the
                  approved document (if any).

       * edsrn: Name of the file containing the reference of the
                approved document.
    """
    global titlevalue, authorvalue, sysno, rn
    FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME,
                                              CFG_SITE_SUPPORT_EMAIL)
    doctype = form['doctype']
    titlevalue = titlevalue.replace("\n", " ")
    authorvalue = authorvalue.replace("\n", "; ")
    # variables declaration
    categformat = parameters['categformatAPP']
    otheraddresses = parameters['addressesAPP']
    newrnpath = parameters['newrnin']
    ## Get the name of the decision file:
    try:
        decision_filename = parameters['decision_file']
    except KeyError:
        decision_filename = ""
    ## Get the name of the comments file:
    try:
        comments_filename = parameters['comments_file']
    except KeyError:
        comments_filename = ""

    ## Now try to read the comments from the comments_filename:
    if comments_filename in (None, "", "NULL"):
        ## We don't have a name for the comments file.
        ## For backward compatibility reasons, try to read the comments from
        ## a file called 'COM' in curdir:
        if os.path.exists("%s/COM" % curdir):
            try:
                fh_comments = open("%s/COM" % curdir, "r")
                comment = fh_comments.read()
                fh_comments.close()
            except IOError:
                ## Unable to open the comments file
                exception_prefix = "Error in WebSubmit function " \
                                   "Send_APP_Mail. Tried to open " \
                                   "comments file [%s/COM] but was " \
                                   "unable to." % curdir
                register_exception(prefix=exception_prefix)
                comment = ""
            else:
                comment = comment.strip()
        else:
            comment = ""
    else:
        ## Try to read the comments from the comments file:
        if os.path.exists("%s/%s" % (curdir, comments_filename)):
            try:
                fh_comments = open("%s/%s" % (curdir, comments_filename), "r")
                comment = fh_comments.read()
                fh_comments.close()
            except IOError:
                ## Oops, unable to open the comments file.
                comment = ""
                exception_prefix = "Error in WebSubmit function " \
                                "Send_APP_Mail. Tried to open comments " \
                                "file [%s/%s] but was unable to." \
                                % (curdir, comments_filename)
                register_exception(prefix=exception_prefix)
            else:
                comment = comment.strip()
        else:
            comment = ""

    ## Now try to read the decision from the decision_filename:
    if decision_filename in (None, "", "NULL"):
        ## We don't have a name for the decision file.
        ## For backward compatibility reasons, try to read the decision from
        ## a file called 'decision' in curdir:
        if os.path.exists("%s/decision" % curdir):
            try:
                fh_decision = open("%s/decision" % curdir, "r")
                decision = fh_decision.read()
                fh_decision.close()
            except IOError:
                ## Unable to open the decision file
                exception_prefix = "Error in WebSubmit function " \
                                   "Send_APP_Mail. Tried to open " \
                                   "decision file [%s/decision] but was " \
                                   "unable to." % curdir
                register_exception(prefix=exception_prefix)
                decision = ""
            else:
                decision = decision.strip()
        else:
            decision = ""
    else:
        ## Try to read the decision from the decision file:
        try:
            fh_decision = open("%s/%s" % (curdir, decision_filename), "r")
            decision = fh_decision.read()
            fh_decision.close()
        except IOError:
            ## Oops, unable to open the decision file.
            decision = ""
            exception_prefix = "Error in WebSubmit function " \
                               "Send_APP_Mail. Tried to open decision " \
                               "file [%s/%s] but was unable to." \
                               % (curdir, decision_filename)
            register_exception(prefix=exception_prefix)
        else:
            decision = decision.strip()

    if os.path.exists("%s/%s" % (curdir, newrnpath)):
        fp = open("%s/%s" % (curdir, newrnpath), "r")
        newrn = fp.read()
        fp.close()
    else:
        newrn = ""
    # Document name
    res = run_sql("SELECT ldocname FROM sbmDOCTYPE WHERE sdocname=%s",
                  (doctype, ))
    docname = res[0][0]
    # retrieve category
    categformat = categformat.replace("<CATEG>", "([^-]*)")
    m_categ_search = re.match(categformat, rn)
    if m_categ_search is not None:
        if len(m_categ_search.groups()) > 0:
            ## Found a match for the category of this document. Get it:
            category = m_categ_search.group(1)
        else:
            ## This document has no category.
            category = "unknown"
    else:
        category = "unknown"
    ## Get the referee email address:
    if CFG_CERN_SITE:
        ## The referees system in CERN now works with listbox membership.
        ## List names should take the format
        ## "*****@*****.**"
        ## Make sure that your list exists!
        ## FIXME - to be replaced by a mailing alias in webaccess in the
        ## future.
        referee_listname = "service-cds-referee-%s" % doctype.lower()
        if category != "":
            referee_listname += "-%s" % category.lower()
        referee_listname += "@cern.ch"
        addresses = referee_listname
    else:
        # Build referee's email address
        refereeaddress = ""
        # Try to retrieve the referee's email from the referee's database
        for user in acc_get_role_users(
                acc_get_role_id("referee_%s_%s" % (doctype, category))):
            refereeaddress += user[1] + ","
        # And if there is a general referee
        for user in acc_get_role_users(
                acc_get_role_id("referee_%s_*" % doctype)):
            refereeaddress += user[1] + ","
        refereeaddress = re.sub(",$", "", refereeaddress)
        # Creation of the mail for the referee
        otheraddresses = otheraddresses.replace("<CATEG>", category)
        addresses = ""
        if refereeaddress != "":
            addresses = refereeaddress + ","
        if otheraddresses != "":
            addresses += otheraddresses
        else:
            addresses = re.sub(",$", "", addresses)
    ## Add the record's submitter(s) into the list of recipients:
    ## Get the email address(es) of the record submitter(s)/owner(s) from
    ## the record itself:
    record_owners = print_record(sysno, 'tm', \
                                 [CFG_WEBSUBMIT_RECORD_OWNER_EMAIL]).strip()
    if record_owners != "":
        record_owners_list = record_owners.split("\n")
        record_owners_list = [email.lower().strip() \
                              for email in record_owners_list]
    else:
        record_owners_list = []
    record_owners = ",".join([owner for owner in record_owners_list])
    if record_owners != "":
        addresses += ",%s" % record_owners

    if decision == "approve":
        mailtitle = "%s has been approved" % rn
        mailbody = "The %s %s has been approved." % (docname, rn)
        mailbody += "\nIt will soon be accessible here:\n\n<%s/%s/%s>" % (
            CFG_SITE_URL, CFG_SITE_RECORD, sysno)
    else:
        mailtitle = "%s has been rejected" % rn
        mailbody = "The %s %s has been rejected." % (docname, rn)
    if rn != newrn and decision == "approve" and newrn != "":
        mailbody += "\n\nIts new reference number is: %s" % newrn
    mailbody += "\n\nTitle: %s\n\nAuthor(s): %s\n\n" % (titlevalue,
                                                        authorvalue)
    if comment != "":
        mailbody += "Comments from the referee:\n%s\n" % comment
    # Send mail to referee if any recipients or copy to admin
    if addresses or CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN:
        send_email(FROMADDR,
                   addresses,
                   mailtitle,
                   mailbody,
                   copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN)
    return ""
Beispiel #41
0
def iterate_over_old(list, fmt):
    "Iterate over list of IDs"

    n_rec = 0
    n_max = 10000
    xml_content = ''  # hold the contents
    tbibformat = 0  # time taken up by external call
    tbibupload = 0  # time taken up by external call
    total_rec = 0  # Number of formatted records

    for record in list:

        n_rec = n_rec + 1
        total_rec = total_rec + 1

        message = "Processing record: %d" % (record)
        write_message(message, verbose=9)

        query = "id=%d&of=xm" % (record)

        count = 0

        contents = print_record(record, 'xm')

        while (contents == "") and (count < 10):
            contents = print_record(record, 'xm')
            count = count + 1
            time.sleep(10)
        if count == 10:
            sys.stderr.write(
                "Failed to download %s from %s after 10 attempts... terminating"
                % (query, CFG_SITE_URL))
            sys.exit(0)

        xml_content = xml_content + contents

        if xml_content:

            if n_rec >= n_max:

                finalfilename = "%s/rec_fmt_%s.xml" % (
                    CFG_TMPDIR, time.strftime('%Y%m%d_%H%M%S'))
                filename = "%s/bibreformat.xml" % CFG_TMPDIR
                filehandle = open(filename, "w")
                filehandle.write(xml_content)
                filehandle.close()

                ### bibformat external call
                ###
                task_sleep_now_if_required(can_stop_too=True)
                t11 = os.times()[4]
                message = "START bibformat external call"
                write_message(message, verbose=9)
                command = "%s/bibformat otype='%s' < %s/bibreformat.xml > %s 2> %s/bibreformat.err" % (
                    CFG_BINDIR, fmt.upper(), CFG_TMPDIR, finalfilename,
                    CFG_TMPDIR)
                os.system(command)

                t22 = os.times()[4]
                message = "END bibformat external call (time elapsed:%2f)" % (
                    t22 - t11)
                write_message(message, verbose=9)
                task_sleep_now_if_required(can_stop_too=True)
                tbibformat = tbibformat + (t22 - t11)

                ### bibupload external call
                ###

                t11 = os.times()[4]
                message = "START bibupload external call"
                write_message(message, verbose=9)

                task_id = task_low_level_submission('bibupload', 'bibreformat',
                                                    '-f', finalfilename)
                write_message("Task #%s submitted" % task_id)

                t22 = os.times()[4]
                message = "END bibupload external call (time elapsed:%2f)" % (
                    t22 - t11)
                write_message(message, verbose=9)

                tbibupload = tbibupload + (t22 - t11)

                n_rec = 0
                xml_content = ''


### Process the last re-formated chunk
###

    if n_rec > 0:

        write_message("Processing last record set (%d)" % n_rec, verbose=9)

        finalfilename = "%s/rec_fmt_%s.xml" % (CFG_TMPDIR,
                                               time.strftime('%Y%m%d_%H%M%S'))
        filename = "%s/bibreformat.xml" % CFG_TMPDIR
        filehandle = open(filename, "w")
        filehandle.write(xml_content)
        filehandle.close()

        ### bibformat external call
        ###

        t11 = os.times()[4]
        message = "START bibformat external call"
        write_message(message, verbose=9)

        command = "%s/bibformat otype='%s' < %s/bibreformat.xml > %s 2> %s/bibreformat.err" % (
            CFG_BINDIR, fmt.upper(), CFG_TMPDIR, finalfilename, CFG_TMPDIR)
        os.system(command)

        t22 = os.times()[4]
        message = "END bibformat external call (time elapsed:%2f)" % (t22 -
                                                                      t11)
        write_message(message, verbose=9)

        tbibformat = tbibformat + (t22 - t11)

        ### bibupload external call
        ###

        t11 = os.times()[4]
        message = "START bibupload external call"
        write_message(message, verbose=9)

        task_id = task_low_level_submission('bibupload', 'bibreformat', '-f',
                                            finalfilename)
        write_message("Task #%s submitted" % task_id)

        t22 = os.times()[4]
        message = "END bibupload external call (time elapsed:%2f)" % (t22 -
                                                                      t11)
        write_message(message, verbose=9)

        tbibupload = tbibupload + (t22 - t11)

    return (total_rec, tbibformat, tbibupload)
def Mail_Approval_Request_to_Referee(parameters, curdir, form, user_info=None):
    """
    This function sends an email to the referee of a document informing
    him/her that a request for its approval has been submitted by the
    user.

    @param categ_file_appreq: (string) - some document types are
          separated into different categories, each of which has its own
          referee(s).
          In such document types, it's necessary to know the document-
          type's category in order to choose the referee.
          This parameter provides a means by which the category information
          can be extracted from a file in the current submission's working
          directory. It should therefore be a filename.

    @param categ_rnseek_appreq: (string) - some document types are
          separated into different categories, each of which has its own
          referee(s).
          In such document types, it's necessary to know the document-
          type's category in order to choose the referee.
          This parameter provides a means by which the category information
          can be extracted from the document's reference number.
          It is infact a string that will be compiled into a regexp and
          an attempt will be made to match it agains the document's reference
          number starting from the left-most position.
          The only pre-requisite is that the segment in which the category is
          sought should be indicated with <CATEGORY>.
          Thus, an example might be as follows:
             ATL(-COM)?-<CATEGORY>-.+

          This would allow "PHYS" in the following reference number to be
          recognised as the category:
             ATL-COM-PHYS-2008-001

    @param edsrn: (string) - the name of the field in which the report
          number should be placed when the referee visits the form for making
          a decision.

    @return: (string) - empty string.
    """
    ## Get the reference number (as global rn - sorry!) and the document type:
    global sysno, rn
    doctype = form['doctype']

    ########
    ## Get the parameters from the list:

    ########
    ## Get the name of the report-number file:
    ########
    try:
        edsrn_file = parameters["edsrn"]
    except KeyError:
        ## No value given for the edsrn file:
        msg = "Error in Mail_Approval_Request_to_Referee function: unable " \
              "to determine the name of the file in which the document's " \
              "report number should be stored."
        raise InvenioWebSubmitFunctionError(msg)
    else:
        edsrn_file = str(edsrn_file)
        edsrn_file = os.path.basename(edsrn_file).strip()
        if edsrn_file == "":
            msg = "Error in Mail_Approval_Request_to_Referee function: " \
                  "unable to determine the name of the file in which " \
                  "the document's report number should be stored."
            raise InvenioWebSubmitFunctionError(msg)
    ########
    ## Get the name of the category file:
    #######
    try:
        ## If it has been provided, get the name of the file in which the
        ## category is stored:
        category_file = parameters["categ_file_appreq"]
    except KeyError:
        ## No value given for the category file:
        category_file = None
    else:
        if category_file is not None:
            category_file = str(category_file)
            category_file = os.path.basename(category_file).strip()
            if category_file == "":
                category_file = None
    ########
    ## Get the regexp that is used to find the category in the report number:
    ########
    try:
        ## If it has been provided, get the regexp used for identifying
        ## a document-type's category from its reference number:
        category_rn_regexp = parameters["categ_rnseek_appreq"]
    except KeyError:
        ## No value given for the category regexp:
        category_rn_regexp = None
    else:
        if category_rn_regexp is not None:
            category_rn_regexp = str(category_rn_regexp).strip()
        if category_rn_regexp == "":
            category_rn_regexp = None
    #######
    ## Resolve the document type's category:
    ##
    ## This is a long process. The end result is that the category is extracted
    ## either from a file in curdir, or from the report number.
    ## If it's taken from the report number, the admin must configure the
    ## function to accept a regular expression that is used to find the
    ## category in the report number.
    ##
    if category_file is not None and category_rn_regexp is not None:
        ## It is not valid to have both a category file and a pattern
        ## describing how to extract the category from a report number.
        ## raise an InvenioWebSubmitFunctionError
        msg = "Error in Register_Approval_Request function: received " \
              "instructions to search for the document's category in " \
              "both its report number AND in a category file. Could " \
              "not determine which to use - please notify the " \
              "administrator."
        raise InvenioWebSubmitFunctionError(msg)
    elif category_file is not None:
        ## Attempt to recover the category information from a file in the
        ## current submission's working directory:
        category = ParamFromFile("%s/%s" % (curdir, category_file))
        if category is not None:
            category = category.strip()
        if category in (None, ""):
            ## The category cannot be resolved.
            msg = "Error in Register_Approval_Request function: received " \
                  "instructions to search for the document's category in " \
                  "a category file, but could not recover the category " \
                  "from that file. An approval request therefore cannot " \
                  "be registered for the document."
            raise InvenioWebSubmitFunctionError(msg)
    elif category_rn_regexp is not None:
        ## Attempt to recover the category information from the document's
        ## reference number using the regexp in category_rn_regexp:
        ##
        ## Does the category regexp contain the key-phrase "<CATEG>"?
        if category_rn_regexp.find("<CATEG>") != -1:
            ## Yes. Replace "<CATEG>" with "(?P<category>.+?)".
            ## For example, this:
            ##    ATL(-COM)?-<CATEG>-
            ## Will be transformed into this:
            ##    ATL(-COM)?-(?P<category>.+?)-
            category_rn_final_regexp = \
                category_rn_regexp.replace("<CATEG>", r"(?P<category>.+?)", 1)
        else:
            ## The regexp for category didn't contain "<CATEG>", but this is
            ## mandatory.
            msg = "Error in Register_Approval_Request function: The " \
                  "[%(doctype)s] submission has been configured to search " \
                  "for the document type's category in its reference number, " \
                  "using a poorly formed search expression (no marker for " \
                  "the category was present.) Since the document's category " \
                  "therefore cannot be retrieved, an approval request cannot " \
                  "be registered for it. Please report this problem to the " \
                  "administrator." \
                  % { 'doctype' : doctype, }
            raise InvenioWebSubmitFunctionError(msg)
        ##
        try:
            ## Attempt to compile the regexp for finding the category:
            re_categ_from_rn = re.compile(category_rn_final_regexp)
        except sre_constants.error:
            ## The expression passed to this function could not be compiled
            ## into a regexp. Register this exception and raise an
            ## InvenioWebSubmitFunctionError:
            exception_prefix = "Error in Register_Approval_Request function: " \
                               "The [%(doctype)s] submission has been " \
                               "configured to search for the document type's " \
                               "category in its reference number, using the " \
                               "following regexp: /%(regexp)s/. This regexp, " \
                               "however, could not be compiled correctly " \
                               "(created it from %(categ-search-term)s.)" \
                               % { 'doctype'       : doctype, \
                                   'regexp'        : category_rn_final_regexp, \
                                   'categ-search-term' : category_rn_regexp, }
            register_exception(prefix=exception_prefix)
            msg = "Error in Register_Approval_Request function: The " \
                  "[%(doctype)s] submission has been configured to search " \
                  "for the document type's category in its reference number, " \
                  "using a poorly formed search expression. Since the " \
                  "document's category therefore cannot be retrieved, an " \
                  "approval request cannot be registered for it. Please " \
                  "report this problem to the administrator." \
                  % { 'doctype' : doctype, }
            raise InvenioWebSubmitFunctionError(msg)
        else:
            ## Now attempt to recover the category from the RN string:
            m_categ_from_rn = re_categ_from_rn.match(rn)
            if m_categ_from_rn is not None:
                ## The pattern matched in the string.
                ## Extract the category from the match:
                try:
                    category = m_categ_from_rn.group("category")
                except IndexError:
                    ## There was no "category" group. That group is mandatory.
                    exception_prefix = \
                       "Error in Register_Approval_Request function: The " \
                       "[%(doctype)s] submission has been configured to " \
                       "search for the document type's category in its " \
                       "reference number using the following regexp: " \
                       "/%(regexp)s/. The search produced a match, but " \
                       "there was no \"category\" group in the match " \
                       "object although this group is mandatory. The " \
                       "regexp was compiled from the following string: " \
                       "[%(categ-search-term)s]." \
                       % { 'doctype'           : doctype, \
                           'regexp'            : category_rn_final_regexp, \
                           'categ-search-term' : category_rn_regexp, }
                    register_exception(prefix=exception_prefix)
                    msg = "Error in Register_Approval_Request function: The " \
                          "[%(doctype)s] submission has been configured to " \
                          "search for the document type's category in its " \
                          "reference number, using a poorly formed search " \
                          "expression (there was no category marker). Since " \
                          "the document's category therefore cannot be " \
                          "retrieved, an approval request cannot be " \
                          "registered for it. Please report this problem to " \
                          "the administrator." \
                          % { 'doctype' : doctype, }
                    raise InvenioWebSubmitFunctionError(msg)
                else:
                    category = category.strip()
                    if category == "":
                        msg = "Error in Register_Approval_Request function: " \
                              "The [%(doctype)s] submission has been " \
                              "configured to search for the document type's " \
                              "category in its reference number, but no " \
                              "category was found. The request for approval " \
                              "cannot be registered. Please report this " \
                              "problem to the administrator." \
                              % { 'doctype' : doctype, }
                        raise InvenioWebSubmitFunctionError(msg)
            else:
                ## No match. Cannot find the category and therefore cannot
                ## continue:
                msg = "Error in Register_Approval_Request function: The " \
                      "[%(doctype)s] submission has been configured to " \
                      "search for the document type's category in its " \
                      "reference number, but no match was made. The request " \
                      "for approval cannot be registered. Please report " \
                      "this problem to the administrator." \
                      % { 'doctype' : doctype, }
                raise InvenioWebSubmitFunctionError(msg)
    else:
        ## The document type has no category.
        category = ""
    ##
    ## End of category recovery
    #######
    #######
    ## Get the title and author(s) from the record:
    #######
    ## Author(s):
    rec_authors = ""
    rec_first_author    = print_record(int(sysno), 'tm', "100__a")
    rec_other_authors   = print_record(int(sysno), 'tm', "700__a")
    if rec_first_author != "":
        rec_authors += "".join(["%s\n" % author.strip() for \
                                author in rec_first_author.split("\n")])
    if rec_other_authors != "":
        rec_authors += "".join(["%s\n" % author.strip() for \
                                author in rec_other_authors.split("\n")])
    ## Title:
    rec_title = "".join(["%s\n" % title.strip() for title in \
                          print_record(int(sysno), 'tm', "245__a").split("\n")])
    ##
    #######
    ## the normal approval action
    approve_act = 'APP'
    ## Get notes about the approval request:
    approval_notes = get_approval_request_notes(doctype, rn)
    ## Get the referee email address:
    if CFG_CERN_SITE:
        ## The referees system in CERN now works with listbox membership.
        ## List names should take the format
        ## "*****@*****.**"
        ## Make sure that your list exists!
        ## FIXME - to be replaced by a mailing alias in webaccess in the
        ## future.
        if doctype == 'ATN':  ## Special case of 'RPR' action for doctype ATN
            action = ParamFromFile("%s/%s" % (curdir,'act')).strip()
            if action == 'RPR':
                notetype = ParamFromFile("%s/%s" % (curdir,'ATN_NOTETYPE')).strip()
                if notetype not in ('SLIDE','PROC'):
                    raise InvenioWebSubmitFunctionError('ERROR function Mail_Approval_Request_to_Referee:: do not recognize notetype ' + notetype)
                if notetype == 'PROC':
                    approve_act = 'APR'  # RPR PROC requires APR action to approve
                    referee_listname = "*****@*****.**"
                elif notetype == 'SLIDE':  ## SLIDES approval
                    approve_act = 'APS'  # RPR SLIDE requires APS action to approve
                    referee_listname = "*****@*****.**"
                else:
                    raise InvenioWebSubmitFunctionError('ERROR function Mail_Approval_Request_to_Referee:: do not understand notetype: ' +notetype)
            else:
                referee_listname = "service-cds-referee-%s" % doctype.lower()
                if category != "":
                    referee_listname += "-%s" % category.lower()
            mailto_addresses = referee_listname + "@cern.ch"
            if category == 'CDSTEST':
                referee_listname = "service-cds-referee-%s" % doctype.lower()
                referee_listname += "-%s" % category.lower()
                mailto_addresses = referee_listname + "@cern.ch"
    else:
        referee_address = ""
        ## Try to retrieve the referee's email from the referee's database:
        for user in \
            acc_get_role_users(acc_get_role_id("referee_%s_%s" \
                                               % (doctype, category))):
            referee_address += user[1] + ","
        ## And if there are general referees:
        for user in \
            acc_get_role_users(acc_get_role_id("referee_%s_*" % doctype)):
            referee_address += user[1] + ","
        referee_address = re.sub(",$", "", referee_address)
        # Creation of the mail for the referee
        mailto_addresses = ""
        if referee_address != "":
            mailto_addresses = referee_address + ","
        else:
            mailto_addresses = re.sub(",$", "", mailto_addresses)
    ##
    ## Send the email:
    mail_subj = "Request for approval of [%s]" % rn
    mail_body = CFG_MAIL_BODY % \
                { 'site-name'               : CFG_SITE_NAME,
                  'report-number-fieldname' : edsrn_file,
                  'report-number'           : rn,
                  'title'                   : rec_title,
                  'authors'                 : rec_authors,
                  'site-url'                : CFG_SITE_URL,
                  'record-id'               : sysno,
                  'approval-action'         : approve_act,
                  'doctype'                 : doctype,
                  'notes'                   : approval_notes,
                  'category'                : category,
                }
    send_email(CFG_SITE_SUPPORT_EMAIL,
               mailto_addresses,
               mail_subj,
               mail_body,
               copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN)
    ##
    return ""
 def _get_record_MARCXML(self, record):
     """Returns the record in MARCXML format."""
     return print_record(record, format='xm')
Beispiel #44
0
["E40","K.Miwa","Tohoku U","Measurement of the cross sections of Σp scatterings"]]

search = "371__u:/a/ or 371__u:/e/ or 371__u:/i/ or 371__u:/o/ or 371__u:/u/"



#x = perform_request_search(p=search,cc='HepNames')
#x = x[:5]
#print len(x)

fileName = 'tmp_junk.out'
output = open(fileName,'w')

if True:
    recid = 355574 
    output.write(print_record(recid, ot=['001','700'],format='xm'))

if False:
  result = [1317852, 1319472, 1324458, 1325164, 1326367, 1327466, 1328450, 1328598, 1328943, 1333186, 1333470]
  for r in result:
    #print r
    print print_record(r,ot=['001','980'],format='xm')
    print \
'''<datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">HEP</subfield>
  </datafield>
'''


if False:
  all_refs = []
Beispiel #45
0
def Ask_For_Record_Details_Confirmation(parameters, \
                                        curdir, \
                                        form, \
                                        user_info=None):
    """
       Display the details of a record on which some operation is to be carried
       out and prompt for the user's confirmation that it is the correct record.
       Upon the clicking of the confirmation button, augment step by one.

       Given the "recid" (001) of a record, retrieve the basic metadata
       (title, report-number(s) and author(s)) and display them in the
       user's browser along with a prompt asking them to confirm that
       it is indeed the record that they expected to see.

       The function depends upon the presence of the "sysno" global and the
       presence of the "step" field in the "form" parameter.
       When the user clicks on the "confirm" button, step will be augmented by
       1 and the form will be submitted.
       @parameters: None.
       @return: None.
       @Exceptions raise: InvenioWebSubmitFunctionError if problems are
        encountered;
        InvenioWebSubmitFunctionStop in order to display the details of the
        record and the confirmation message.
    """
    global sysno

    ## Make sure that we know the current step:
    try:
        current_step = int(form['step'])
    except TypeError:
        ## Can't determine step.
        msg = "Unable to determine submission step. Cannot continue."
        raise InvenioWebSubmitFunctionError(msg)
    else:
        newstep = current_step + 1

    ## Make sure that the sysno is valid:
    try:
        working_recid = int(sysno)
    except TypeError:
        ## Unable to find the details of this record - cannot query the database
        msg = "Unable to retrieve details of record - record id was invalid."
        raise InvenioWebSubmitFunctionError(msg)

    if not record_exists(working_recid):
        ## Record doesn't exist.
        msg = "Unable to retrieve details of record [%s] - record does not " \
              "exist." % working_recid
        raise InvenioWebSubmitFunctionError(msg)

    ## Retrieve the details to be displayed:
    ##
    ## Author(s):
    rec_authors = ""
    rec_first_author = print_record(int(sysno), 'tm', "100__a")
    rec_other_authors = print_record(int(sysno), 'tm', "700__a")
    if rec_first_author != "":
        rec_authors += "".join(["%s<br />\n" % cgi.escape(author.strip()) for \
                                author in rec_first_author.split("\n")])
    if rec_other_authors != "":
        rec_authors += "".join(["%s<br />\n" % cgi.escape(author.strip()) for \
                                author in rec_other_authors.split("\n")])

    ## Title:
    rec_title = "".join(["%s<br />\n" % cgi.escape(title.strip()) for title in \
                          print_record(int(sysno), 'tm', "245__a").split("\n")])

    ## Report numbers:
    rec_reportnums = ""
    rec_reportnum = print_record(int(sysno), 'tm', "037__a")
    rec_other_reportnums = print_record(int(sysno), 'tm', "088__a")
    if rec_reportnum != "":
        rec_reportnums += "".join(["%s<br />\n" % cgi.escape(repnum.strip()) \
                                   for repnum in rec_reportnum.split("\n")])
    if rec_other_reportnums != "":
        rec_reportnums += "".join(["%s<br />\n" % cgi.escape(repnum.strip()) \
                                   for repnum in \
                                   rec_other_reportnums.split("\n")])

    raise InvenioWebSubmitFunctionStop(CFG_DOCUMENT_DETAILS_MESSAGE % \
                                  { 'report-numbers' : rec_reportnums, \
                                    'title'          : rec_title, \
                                    'author'         : rec_authors, \
                                    'newstep'        : newstep, \
                                    'admin-email'    : CFG_SITE_ADMIN_EMAIL, \
                                  }   )
Beispiel #46
0
def format_element(bfo, reference_prefix, reference_suffix):
    """
    Prints the references of this record

    @param reference_prefix: a prefix displayed before each reference
    @param reference_suffix: a suffix displayed after each reference
    """
    from invenio.config import CFG_BASE_URL, CFG_ADS_SITE
    from invenio.search_engine import get_mysql_recid_from_aleph_sysno, \
         print_record

    if CFG_ADS_SITE:
        ## FIXME: store external sysno into 999 $e, not into 999 $r
        # do not escape field values for now because of things like A&A in
        # 999 $r that are going to be resolved further down:
        references = bfo.fields("999C5", escape=0)
    else:
        references = bfo.fields("999C5", escape=1)
    out = ""

    for reference in references:
        ref_out = ''

        if reference.has_key('o'):
            if out != "":
                ref_out = '</li>'
            ref_out += "<li><small>"+ reference['o']+ "</small> "

        if reference.has_key('m'):
            ref_out += "<small>"+ reference['m']+ "</small> "

        if reference.has_key('r'):
            if CFG_ADS_SITE:
                # 999 $r contains external sysno to be resolved:
                recid_to_display = get_mysql_recid_from_aleph_sysno(reference['r'])
                if recid_to_display:
                    ref_out += print_record(recid_to_display, 'hs')
                else:
                    ref_out += '<small>' + reference['r'] + ' (not in ADS)</small>'
            else:
                ref_out += '<small> [<a href="'+CFG_BASE_URL+'/search?f=reportnumber&amp;p='+ \
                       reference['r']+ \
                       '&amp;ln=' + bfo.lang + \
                       '">'+ reference['r']+ "</a>] </small> <br />"

        if reference.has_key('t'):
            ejournal = bfo.kb("ejournals", reference.get('t', ""))
            if ejournal != "":
                ref_out += ' <small> <a href="https://cds.cern.ch/ejournals.py?publication='\
                      + reference['t'].replace(" ", "+") \
                +"&amp;volume="+reference.get('v', "")+"&amp;year="+\
                reference.get('y', "")+"&amp;page="+\
                reference.get('p',"").split("-")[0]+'">'
                ref_out += reference['t']+": "+reference.get('v', "")+\
                       " ("+reference.get('y', "")+") "
                ref_out += reference.get('p', "")+"</a> </small> <br />"
            else:
                ref_out += " <small> "+reference['t']+ reference.get('v', "")+\
                       reference.get('y',"")+ reference.get('p',"")+ \
                       " </small> <br />"


        if reference_prefix is not None and ref_out != '':
            ref_out = reference_prefix + ref_out
        if reference_suffix is not None and ref_out != '':
            ref_out += reference_suffix

        out += ref_out

    if out != '':
        out += '</li>'

    return out
def User_is_Record_Owner_or_Curator(parameters, curdir, form, user_info=None):
    """
    Check that user is either the original submitter, or that it has
    been granted access to carry out the action via Webaccess. This
    enables collaborative editing of records, so that collections can
    be curated by a group of people in addition to the original submitter.

    If the user has permission, the function ends silently. If not, it
    will raise an InvenioWebSubmitFunctionStop, informing the user that
    they don't have rights and sending them back to the submission web
    form.

    Note that the original author must also be authorized by WebAccess
    in order to modify the record.

    WARNING: you have to understand that wherever you use this
    function, any user authorized via WebAccess for this action will
    be able to modify any records that can go through this
    workflow. For eg. when using this function in a DEMOPIC
    submission, in a 'MBI' action, it is enough that a user is
    connected to the 'submit' action with the 'DEMOPIC/MBI' parameters
    to modify any record.

    @parameters: None.
    @return: Empty string.
    @Exceptions raised: InvenioWebSubmitFunctionStop when user is denied
                permission to work with the record.
    """
    global sysno
    ## Get the document type and action from the form. They can be used to
    ## ask webaccess whether the user is a super-user for this doctype/action.
    doctype = form['doctype']
    act = form['act']
    ## Get the current user's e-mail address:
    user_email = user_info["email"].lower()
    ## Now get the email address(es) of the record submitter(s)/owner(s) from
    ## the record itself:
    record_owners = print_record(sysno, 'tm', \
                                 [CFG_WEBSUBMIT_RECORD_OWNER_EMAIL]).strip()
    if record_owners != "":
        record_owners_list = record_owners.split("\n")
        record_owners_list = [email.lower().strip() \
                              for email in record_owners_list]
    else:
        record_owners_list = []
    ## Now determine whether this user is listed in the record as an "owner"
    ## (or submitter):
    user_has_permission = False
    user_msg = ""
    if user_email not in ("", "guest") and user_email in record_owners_list:
        ## This user's email address is listed in the record. She should
        ## be allowed to work with it:
        user_has_permission = True
    if not user_has_permission:
        ## The user isn't listed in the record.
        ## Using WebAccess, test if she is a "curator" for this submission:
        (auth_code, dummy) = acc_authorize_action(user_info, \
                                                  "submit", \
                                                  verbose=0, \
                                                  doctype=doctype, \
                                                  act=act)
        if auth_code == 0:
            ## The user is a curator for this submission/collection. Do not
            ## prevent access.
            user_has_permission = True
    ## Finally, if the user still doesn't have permission to work with this
    ## record, raise an InvenioWebSubmitFunctionStop exception sending the
    ## user back to the form.
    if not user_has_permission:
        raise InvenioWebSubmitFunctionStop(CFG_MSG_USER_NOT_AUTHORIZED)
    return ""
Beispiel #48
0
def format_element(bfo, reference_prefix, reference_suffix):
    """
    Prints the references of this record

    @param reference_prefix: a prefix displayed before each reference
    @param reference_suffix: a suffix displayed after each reference
    """
    from invenio.config import CFG_SITE_URL, CFG_ADS_SITE
    from invenio.search_engine import get_mysql_recid_from_aleph_sysno, \
         print_record

    if CFG_ADS_SITE:
        ## FIXME: store external sysno into 999 $e, not into 999 $r
        # do not escape field values for now because of things like A&A in
        # 999 $r that are going to be resolved further down:
        references = bfo.fields("999C5", escape=0)
    else:
        references = bfo.fields("999C5", escape=1)
    out = ""

    for reference in references:
        ref_out = ''

        if reference.has_key('o'):
            if out != "":
                ref_out = '</li>'
            ref_out += "<li><small>" + reference['o'] + "</small> "

        if reference.has_key('m'):
            ref_out += "<small>" + reference['m'] + "</small> "

        if reference.has_key('r'):
            if CFG_ADS_SITE:
                # 999 $r contains external sysno to be resolved:
                recid_to_display = get_mysql_recid_from_aleph_sysno(
                    reference['r'])
                if recid_to_display:
                    ref_out += print_record(recid_to_display, 'hs')
                else:
                    ref_out += '<small>' + reference[
                        'r'] + ' (not in ADS)</small>'
            else:
                ref_out += '<small> [<a href="'+CFG_SITE_URL+'/search?f=reportnumber&amp;p='+ \
                       reference['r']+ \
                       '&amp;ln=' + bfo.lang + \
                       '">'+ reference['r']+ "</a>] </small> <br />"

        if reference.has_key('t'):
            ejournal = bfo.kb("ejournals", reference.get('t', ""))
            if ejournal != "":
                ref_out += ' <small> <a href="https://cdsweb.cern.ch/ejournals.py?publication='\
                      + reference['t'].replace(" ", "+") \
                +"&amp;volume="+reference.get('v', "")+"&amp;year="+\
                reference.get('y', "")+"&amp;page="+\
                reference.get('p',"").split("-")[0]+'">'
                ref_out += reference['t']+": "+reference.get('v', "")+\
                       " ("+reference.get('y', "")+") "
                ref_out += reference.get('p', "") + "</a> </small> <br />"
            else:
                ref_out += " <small> "+reference['t']+ reference.get('v', "")+\
                       reference.get('y',"")+ reference.get('p',"")+ \
                       " </small> <br />"

        if reference_prefix is not None and ref_out != '':
            ref_out = reference_prefix + ref_out
        if reference_suffix is not None and ref_out != '':
            ref_out += reference_suffix

        out += ref_out

    if out != '':
        out += '</li>'

    return out
def User_is_Record_Owner_or_Curator(parameters, curdir, form, user_info=None):
    """
    In certain actions of some WebSubmit submissions, it may be not be
    desirable to allow all users to have a carte blanche to modify
    records in a collection as they see fit.
    For example, we may say that a user is allowed to modify a document
    ONLY if they are listed in the record as an "owner"/"submitter", or
    if listed in WebAccess as a "curator" for the given document type
    collection.
    This function therefore checks whether the user has the rights to
    carry out a given action on a document type either by being listed in
    the record or explicitly via WebAccess.
    If the user has permission, the function ends silently. If not, it
    will raise an InvenioWebSubmitFunctionStop, informing the user that
    they don't have rights and sending them back to the submission web
    form.
    @parameters: None.
    @return: Empty string.
    @Exceptions raised: InvenioWebSubmitFunctionStop when user is denied
                permission to work with the record.
    """
    global sysno
    ## Get the document type and action from the form. They can be used to
    ## ask webaccess whether the user is a super-user for this doctype/action.
    doctype = form['doctype']
    act = form['act']
    ## Get the current user's e-mail address:
    user_email = user_info["email"].lower()
    ## Now get the email address(es) of the record submitter(s)/owner(s) from
    ## the record itself:
    record_owners = print_record(sysno, 'tm', \
                                 [CFG_WEBSUBMIT_RECORD_OWNER_EMAIL]).strip()
    if record_owners != "":
        record_owners_list = record_owners.split("\n")
        record_owners_list = [email.lower().strip() \
                              for email in record_owners_list]
    else:
        record_owners_list = []
    ## Now determine whether this user is listed in the record as an "owner"
    ## (or submitter):
    user_has_permission = False
    user_msg = ""
    if user_email not in ("", "guest") and user_email in record_owners_list:
        ## This user's email address is listed in the record. She should
        ## be allowed to work with it:
        user_has_permission = True
    if not user_has_permission:
        ## The user isn't listed in the record.
        ## Using WebAccess, test if she is a "curator" for this submission:
        (auth_code, dummy) = acc_authorize_action(user_info, \
                                                  "submit", \
                                                  verbose=0, \
                                                  doctype=doctype, \
                                                  act=act)
        if auth_code == 0:
            ## The user is a curator for this submission/collection. Do not
            ## prevent access.
            user_has_permission = True
    ## Finally, if the user still doesn't have permission to work with this
    ## record, raise an InvenioWebSubmitFunctionStop exception sending the
    ## user back to the form.
    if not user_has_permission:
        raise InvenioWebSubmitFunctionStop(CFG_MSG_USER_NOT_AUTHORIZED)
    return ""
def iterate_over_old(list, fmt):
    "Iterate over list of IDs"

    n_rec       = 0
    n_max       = 10000
    xml_content = ''        # hold the contents
    tbibformat  = 0     # time taken up by external call
    tbibupload  = 0     # time taken up by external call
    total_rec      = 0          # Number of formatted records

    for record in list:

        n_rec = n_rec + 1
        total_rec = total_rec + 1

        message = "Processing record: %d" % (record)
        write_message(message, verbose=9)

        query = "id=%d&of=xm" % (record)

        count = 0

        contents = print_record(record, 'xm')

        while (contents == "") and (count < 10):
            contents = print_record(record, 'xm')
            count = count + 1
            time.sleep(10)
        if count == 10:
            sys.stderr.write("Failed to download %s from %s after 10 attempts... terminating" % (query, CFG_SITE_URL))
            sys.exit(0)

        xml_content = xml_content + contents

        if xml_content:

            if n_rec >= n_max:

                finalfilename = "%s/rec_fmt_%s.xml" % (CFG_TMPDIR, time.strftime('%Y%m%d_%H%M%S'))
                filename = "%s/bibreformat.xml" % CFG_TMPDIR
                filehandle = open(filename ,"w")
                filehandle.write(xml_content)
                filehandle.close()

### bibformat external call
###
                task_sleep_now_if_required(can_stop_too=True)
                t11 = os.times()[4]
                message = "START bibformat external call"
                write_message(message, verbose=9)
                command = "%s/bibformat otype='%s' < %s/bibreformat.xml > %s 2> %s/bibreformat.err" % (CFG_BINDIR, fmt.upper(), CFG_TMPDIR, finalfilename, CFG_TMPDIR)
                os.system(command)

                t22 = os.times()[4]
                message = "END bibformat external call (time elapsed:%2f)" % (t22-t11)
                write_message(message, verbose=9)
                task_sleep_now_if_required(can_stop_too=True)
                tbibformat = tbibformat + (t22 - t11)


### bibupload external call
###

                t11 = os.times()[4]
                message = "START bibupload external call"
                write_message(message, verbose=9)

                task_id = task_low_level_submission('bibupload', 'bibreformat', '-f', finalfilename)
                write_message("Task #%s submitted" % task_id)

                t22 = os.times()[4]
                message = "END bibupload external call (time elapsed:%2f)" % (t22-t11)
                write_message(message, verbose=9)

                tbibupload = tbibupload + (t22- t11)

                n_rec = 0
                xml_content = ''

### Process the last re-formated chunk
###

    if n_rec > 0:

        write_message("Processing last record set (%d)" % n_rec, verbose=9)

        finalfilename = "%s/rec_fmt_%s.xml" % (CFG_TMPDIR, time.strftime('%Y%m%d_%H%M%S'))
        filename = "%s/bibreformat.xml" % CFG_TMPDIR
        filehandle = open(filename ,"w")
        filehandle.write(xml_content)
        filehandle.close()

### bibformat external call
###

        t11 = os.times()[4]
        message = "START bibformat external call"
        write_message(message, verbose=9)

        command = "%s/bibformat otype='%s' < %s/bibreformat.xml > %s 2> %s/bibreformat.err" % (CFG_BINDIR, fmt.upper(), CFG_TMPDIR, finalfilename, CFG_TMPDIR)
        os.system(command)

        t22 = os.times()[4]
        message = "END bibformat external call (time elapsed:%2f)" % (t22 - t11)
        write_message(message, verbose=9)

        tbibformat = tbibformat + (t22 - t11)

### bibupload external call
###

        t11 = os.times()[4]
        message = "START bibupload external call"
        write_message(message, verbose=9)

        task_id = task_low_level_submission('bibupload', 'bibreformat', '-f', finalfilename)
        write_message("Task #%s submitted" % task_id)

        t22 = os.times()[4]
        message = "END bibupload external call (time elapsed:%2f)" % (t22 - t11)
        write_message(message, verbose=9)

        tbibupload = tbibupload + (t22 - t11)

    return (total_rec, tbibformat, tbibupload)
"""
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import re
from invenio.search_engine import perform_request_search
from invenio.search_engine import get_fieldvalues
from invenio.search_engine import print_record

date = raw_input('date: ')
file = 'theory_pubs_' + date + '.doc'
output = open(file, 'w')
x = perform_request_search(p="find r fermilab pub t and de %s" % date)
#if False:
#x = [1416470]
for r in x:
    olivia = print_record(r, format='htcv')
    olivia = re.sub(r'<br/>', '', olivia)
    olivia = re.sub(r'\s\s+', '', olivia)
    reports = get_fieldvalues(r, '037__a')
    pages = get_fieldvalues(r, '300__a')
    print '\n' + olivia
    output.write('\n\n' + olivia + '\n')
    for page in pages:
        print page + ' pp.'
        output.write(page + ' pp.\n')
    for report in reports:
        if re.search(r'FERMILAB', report):
            print report
            output.write(report)
output.close()
Beispiel #52
0
def get_bibrecord(recid):
    """Return record in BibRecord wrapping."""
    if record_exists(recid):
        return create_record(print_record(recid, "xm"))[0]
Beispiel #53
0
def main(search):
    if not search:
        search = "find r fermilab and dadd 2014"
    search_original = search
    x = intbitset(perform_request_search(p=search, cc='HEP'))
    print search, ':', len(x)
    fermilab         = intbitset(perform_request_search(p="8564_y:fermilab*", cc='HEP'))
    fermilabtoday   = intbitset(perform_request_search(p="8564_y:fermilabtoday", cc='HEP'))
    fermilabpub     = intbitset(perform_request_search(p="8564_y:fermilabpub", cc='HEP'))
    fermilabthesis  = intbitset(perform_request_search(p="8564_y:fermilabthesis", cc='HEP'))
    fermilabconf    = intbitset(perform_request_search(p="8564_y:fermilabconf", cc='HEP'))
    fermilabtm      = intbitset(perform_request_search(p="8564_y:fermilabtm", cc='HEP'))
    scoap           = intbitset(perform_request_search(p="8564_y:'Article from SCOAP3'", cc='HEP'))
    oa              = intbitset(perform_request_search(p="8564_z:postprint or 8564_z:openaccess", cc='HEP'))
    cms = intbitset(perform_request_search(p="find r fermilab and cn cms", cc='HEP'))
    ok = fermilab - fermilabtoday | fermilabpub | fermilabthesis | fermilabconf | fermilabtm | scoap | cms | oa
    print 'Total number of Fermilab links:', len(ok)
    x = x & ok
    print 'Intersection:', len(x), x

    fileName    = 'osti.out'
    fileName2 = 'osti2.out'
    output = open(fileName, 'w')
    output.write("<harvest-site>\n")
    for r in x:
            output.write(print_record(r, format='xsti'))
    output.write("</harvest-site>\n")
    output.close()


    output2 = open(fileName2, 'w')
    #noUrl = False
    #arXiv_flag = False
    subj_category_flag = False
    subj_keywords_flag = False
    url_check_flag = True
    url_check_flag = False
    url_oa = False
    counter = 1

    for i in open(fileName, 'r'):
        issue = None
        i = re.sub(r'(find_paper\.pl\?[\w\-]+)', r'\1.pdf', i)
        i = re.sub(r'pdf\.pdf', 'pdf', i)
        i = re.sub(r'shtml\.pdf', 'shtml', i)

        if re.search(r'accession_num', i):
            matchObj = re.match(r'.*<accession_num>(\d+)</accession_num>.*', i)
            if matchObj:
                doctype_flag = False
                accepted = False
                accession_num = matchObj.group(1)
                search = "find recid " + accession_num + " or irn " + accession_num + " and r fermilab"
                y = perform_request_search(p=search, cc='HEP')
                if len(y) == 1 : recid = y[0]
                if VERBOSE:
                    print("{0} {1} {2}".format(counter, accession_num, recid))
                counter += 1
                url_oa = False
                try:
                    doi = get_fieldvalues(recid, '0247_a')[0]
                    search_oa = 'find recid ' + str(recid) + ' and exp cern-lhc-cms'                    
                    if VERBOSE:
                        print "doi =", doi
                        print "search_oa =", search_oa
                    if re.search(r'PhysRevSTAB', doi):
                        url_oa = 'http://journals.aps.org/prstab/pdf/' + doi
                    #elif perform_request_search(p=search_oa, cc='HEP'):
                    #    if re.search(r'PhysRevD', doi):
                    #        url_oa = 'http://journals.aps.org/prd/pdf/10.1103/' + doi
                    #    elif re.search(r'PhysRevLett', doi):
                    #        url_oa = 'http://journals.aps.org/prl/pdf/10.1103/' + doi
                    #    if VERBOSE:
                    #        print url_oa
                except:
                    pass
                try:
                    accepted = get_fieldvalues(recid, '8564_3')
                    urls = get_fieldvalues(recid, '8564_u')
                    for url in urls:
                        if re.search('scoap3-fulltext.pdf', url):
                            url_oa = url
                            accepted = True
                        elif re.search(r'record/\d+/files/arXiv', url) and recid in cms and not url_oa:
                            #This is to catch the CMS papers
                            url_oa = url 
                    [url_oa, accepted] = get_url(recid)
                except:
                    pass
                if url_oa:
                    i += "  <url>" + url_oa + "</url>\n"
                if accepted:
                    i += "  <journal_type>AM</journal_type>\n"
                else:
                    i += "  <journal_type>FT</journal_type>\n"
                authors = get_fieldvalues(recid, '700__a')
                if len(authors) > 9 :
                    author = get_fieldvalues(recid, '100__a')[0]
                    author = "    <author>" + author + "; et al.</author>\n"
                    i = i + author
                    collaboration = get_fieldvalues(recid, '710__g')
                    if collaboration:
                        collaboration = cgi.escape(collaboration[0])
                        collaboration = "    <contributor_organizations>" + collaboration + "</contributor_organizations>\n"
                        i = i + collaboration

                #search = "001:" + str(recid) + " 8564_y:FERMILAB*"
                ##search = "001:" + str(recid) + " 037__9:arXiv"
                #z = perform_request_search(p=search, cc='HEP')
                #if len(z) < 1 :
                #  noUrl = True
                #  print 'No url for ', recid
                #  break
                #if len(z) == 1 : arXiv_flag = True
                #else : arXiv_flag = False

                phd_date = get_fieldvalues(recid, '502__d')
                normal_date = get_fieldvalues(recid, '269__c')
                try:
                    published_date = get_fieldvalues(recid, '260__c')[0]
                except IndexError:
                    published_date = False
                if phd_date and not normal_date:
                    phd_date = "    <date>" + phd_date[0] + "</date>\n"
                    i = i + phd_date
                elif published_date and not normal_date:
                    published_date = "    <date>" + published_date + "</date>\n"
                    i = i + published_date
            if VERBOSE:
                print i
        #if arXiv_flag and re.search("<availability>http://arXiv.org", i) :
        #    url = i
        #    url = re.sub(r'availability', 'url', url)
        #    url = re.sub(r'arXiv.org/abs', 'arXiv.org/pdf', url)
        #    i = i + url
        #    #noUrl = False

        elif re.search("<url>", i):
            if url_oa: 
                i = ''
            if re.search("www.fnal.gov/pub/today", i):
                i = ''
            elif re.search("<url>.*fnal", i) :
                matchObj = re.match(r'.*<url>(.*fnal.*)</url>.*', i)
                if matchObj:
                    url_to_check =    matchObj.group(1)
                    if re.search("shtml", url_to_check) :
                        url_to_check = re.sub(r'.*fermilab\-(.*)\.shtml', r'http://lss.fnal.gov/cgi-bin/find_paper.pl?\1.pdf', url_to_check)
                        if url_check_flag:
                            if not checkURL(url_to_check) :
                                error_message = "Something wrong with " + url_to_check
                                print error_message
                                break
                            else : print "No problem with url: ", url_to_check
                    i = "  <url>" + url_to_check + "</url>\n"
            
            else : i = ''

        if re.search("<title>", i) :
            title = get_fieldvalues(recid, '245__a')[0]
            title = cgi.escape(title)
            i = "  <title>" + title + "</title>\n"

        if re.search("<date>", i) :
            if re.search(">\d\d\d\d\-\d\d\-\d\d<", i) :
              i = re.sub(r'>(\d\d\d\d)\-(\d\d)\-(\d\d)<', r'>\2/\3/\1<', i)
            elif re.search(">\d\d\d\d\-\d\d<", i) :
              i = re.sub(r'>(\d\d\d\d)\-(\d\d)<', r'>\2/01/\1<', i)
            elif re.search(">\d\d\d\d<", i) :
              i = re.sub(r'>(\d\d\d\d)<', r'>01/01/\1<', i)
            else :
                print "Bad date: ", recid, i
                break
            abstract = get_fieldvalues(recid, '520__a')
            if abstract :
                abstract = abstract[0]
                abstract = cgi.escape(abstract)
                abstract = "  <abstract>" + abstract + "</abstract>\n"
                i = i + abstract

        if re.search("<doctype>", i) :
            doctype_flag = True

        if re.search("<arXiv_eprint>", i) :
            if not doctype_flag:
                try:
                    report = get_fieldvalues(recid, '037__z')[0]
                    report = "  <report_number>" + report + "</report_number>\n"
                    i = i + report
                    i = i + "  <doctype>JA</doctype>\n"
                except:
                    pass

        if re.search("journal", i) :
            i = re.sub(r'<journal_info>(.*[ \.])(\S+)\:(\S+)\,(\d+)</journal_info>', r'<journal_name>\1</journal_name>\n    <journal_volume>\2</journal_volume>\n    <journal_issue></journal_issue>', i)
            issue = get_fieldvalues(recid, '773__n')
            if issue :
                issue = issue[0]
                issue = "<journal_issue>" + str(issue) + "</journal_issue>"
                i = re.sub(r'<journal_issue></journal_issue>', issue, i)

        if re.search("<sponsor_org>", i) :
            i = re.sub(r'DOE Office of Science', r'USDOE Office of Science (SC), High Energy Physics (HEP) (SC-25)', i)

        if re.search("<subj_category>", i) :
            if subj_category_flag : i = ''
            subj_category_flag = True

        if re.search("<subj_keywords>", i) :
            if subj_keywords_flag : i = ''
            subj_keywords_flag = True

        if i: 
            #print i
            output2.write(i)

    output2.close()
    print search_original