def invenio_search_xml(kwargs): """Simple version which just fetches XML records from Invenio. It only understands query of type: p=recid:1->50 OR recid:50 OR recid:.... Unfortunately, we cannot use 'print_records' because that one (for strange reasons) creates a range out of recIDS. And I don't want to use bibformat, because bibformat is not working nicely with strings (it is slower) """ out = [] p = kwargs['p'] of = 'xm' if 'of' in kwargs: of = kwargs['of'] if of == 'xm': out.append('<?xml version="1.0" encoding="UTF-8"?>') out.append('<collection xmlns="http://www.loc.gov/MARC21/slim">') clauses = p.split(' OR ') for c in clauses: c = c.replace('recid:', '') if '->' in c: ints = c.split('->') for x in xrange(int(ints[0]), int(ints[1])+1): out.append(search_engine.print_record(x, format=of)) else: out.append(search_engine.print_record(int(c), format=of)) if of == 'xm': out.append('</collection>') return '\n'.join(out)
def format_element(bfo): """ Displays the latest posts on a blog and it also offers a link to see all the posts of the corresponding blog """ this_recid = bfo.control_field('001') current_language = bfo.lang blog_posts_recids = get_posts(this_recid, newest_first=True) out = "" if blog_posts_recids: # let's print just the 3 latest posts latest_blog_posts_recids = blog_posts_recids[:3] try: out += "<h4>%s</h4>" % cfg_messages["in_issue"][current_language] except: # in english by default out += "<h4>%s</h4>" % cfg_messages["in_issue"]['en'] for post_recid in latest_blog_posts_recids: out += print_record(post_recid, format='hb') out += "<br />" all_posts = "" all_blog_posts_recids = blog_posts_recids[3:] for post_recid in all_blog_posts_recids: all_posts += print_record(post_recid, format='hb') all_posts += "<br />" out += """ <script type="text/javascript"> function displayAllPosts(){ var all_posts = document.getElementById('all_posts'); var see_all_link = document.getElementById('see_all_link'); if (all_posts.style.display == 'none'){ all_posts.style.display = ''; see_all_link.innerHTML = "Show less posts" } else { all_posts.style.display = 'none'; see_all_link.innerHTML = "Show all posts" } } </script> """ out += '<span id="all_posts" style="">' + all_posts + '</span>' out += '<a class="moreinfo" id="see_all_link" \ href="javascript:void(0)" onclick="displayAllPosts()""></a>' out += '<script type="text/javascript">displayAllPosts()</script>' return out
def perform_candidate_record_search(requestType, data): """Handle search requests. """ max_results = 999 too_many = False result = {'resultCode': 0, 'resultText': ''} if requestType == "searchCandidates": recids = perform_request_search(p=data['query']) if len(recids) > max_results: too_many = True else: captions = [search_result_info(x) for x in recids] alternative_titles = [ remove_html_markup(print_record(x, "hs")) for x in recids ] search_results = [recids, captions, alternative_titles] elif requestType == "searchRevisions": revisions = get_record_revision_ids(data['recID1']) captions = [split_revid(x, 'datetext')[1] for x in revisions] search_results = [revisions, captions] if too_many == True: result['resultCode'] = 1 result['resultText'] = 'Too many results' else: result['results'] = search_results result['resultText'] = '%s results' % len(search_results[0]) return result
def perform_candidate_record_search(requestType, data): """Handle search requests. """ max_results = 999 too_many = False result = {"resultCode": 0, "resultText": ""} if requestType == "searchCandidates": recids = perform_request_search(p=data["query"]) if len(recids) > max_results: too_many = True else: captions = [search_result_info(x) for x in recids] alternative_titles = [remove_html_markup(print_record(x, "hs")) for x in recids] search_results = [recids, captions, alternative_titles] elif requestType == "searchRevisions": revisions = get_record_revision_ids(data["recID1"]) captions = [split_revid(x, "datetext")[1] for x in revisions] search_results = [revisions, captions] if too_many == True: result["resultCode"] = 1 result["resultText"] = "Too many results" else: result["results"] = search_results result["resultText"] = "%s results" % len(search_results[0]) return result
def Get_Field(fieldname,bibrec): """ This function returns the value of the specified field from the specified document """ value = string.strip(print_record(int(bibrec),'tm',[fieldname])) return value
def cached_format_record(recIDs, of, ln='', verbose=0, search_pattern=None, xml_records=None, user_info=None, record_prefix=None, record_separator=None, record_suffix=None, prologue="", epilogue="", req=None, on_the_fly=False): return print_record(recIDs, of, ln=ln, verbose=verbose, brief_links=False)
def Get_Field(fieldname, bibrec): """ This function returns the value of the specified field from the specified document """ value = string.strip(print_record(int(bibrec), 'tm', [fieldname])) return value
def perform_candidate_record_search(requestType, data): """Handle search requests. """ max_results = 999 too_many = False result = { 'resultCode': 0, 'resultText': '' } if requestType == "searchCandidates": recids = perform_request_search( p=data['query'] ) if len(recids) > max_results: too_many = True else: captions = [ search_result_info(x) for x in recids ] alternative_titles = [ remove_html_markup(print_record(x, "hs")) for x in recids ] search_results = [recids, captions, alternative_titles] elif requestType == "searchRevisions": revisions = get_record_revision_ids( data['recID1'] ) captions = [ split_revid(x, 'datetext')[1] for x in revisions ] search_results = [revisions, captions] if too_many == True: result['resultCode'] = 1 result['resultText'] = 'Too many results' else: result['results'] = search_results result['resultText'] = '%s results' % len(search_results[0]) return result
def format_element(bfo): """ Displays comments on a post """ this_recid = bfo.control_field('001') current_language = bfo.lang post_comments_recids = get_comments(this_recid, newest_first=True) out = "" if post_comments_recids: # let's print just the 3 latest posts latest_post_comments_recids = post_comments_recids[:2] out += "<h4>%s</h4>" % cfg_messages["in_issue"][current_language] for comment_recid in latest_post_comments_recids: out += print_record(comment_recid, format='hb') out += "<br />" all_comments = "" all_post_comments_recids = post_comments_recids[2:] for comment_recid in all_post_comments_recids: all_comments += print_record(comment_recid, format='hb') all_comments += "<br />" out += """ <script type="text/javascript"> function displayAllComments(){ var all_comments = document.getElementById('all_comments'); var see_all_link = document.getElementById('see_all_link'); if (all_comments.style.display == 'none'){ all_comments.style.display = ''; see_all_link.innerHTML = "Show less comments" } else { all_comments.style.display = 'none'; see_all_link.innerHTML = "Show all comments" } } </script> """ out += '<span id="all_comments" style="">' + all_comments + '</span>' out += '<a class="moreinfo" id="see_all_link" \ href="javascript:void(0)" onclick="displayAllComments()""></a>' out += '<script type="text/javascript">displayAllComments()</script>' return out
def _format_record(recid, of='hd', user_info=current_user, *args, **kwargs): return print_record(recid, format=of, user_info=user_info, *args, **kwargs)
def get_record(recid, reset_cache=False, fields=()): """ Record factory, it retrieves the record from bibfmt table if it is there, if not, or reset_cache is set to True, it searches for the appropriate reader to create the representation of the record. @return: Bibfield object representing the record or None if the recid is not present in the system """ record = None #Search for recjson if not reset_cache: res = run_sql( "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format='recjson'", (recid, )) if res: try: record = Record(msgpack.loads(res[0][0])) except: #Maybe the cached version is broken record = None #There is no version cached or we want to renew it #Then retrieve information and blob if not record or reset_cache: try: master_format = run_sql( "SELECT master_format FROM bibrec WHERE id=%s", (recid, ))[0][0] except: return None schema = 'xml' master_format = 'marc' try: from invenio.search_engine import print_record blob = print_record(recid, format='xm') except: return None reader = CFG_BIBFIELD_READERS['bibfield_%sreader.py' % (master_format, )](blob, schema=schema) record = Record(reader.translate()) #Update bibfmt for future uses run_sql( "REPLACE INTO bibfmt(id_bibrec, format, last_updated, value) VALUES (%s, 'recjson', NOW(), %s)", (recid, msgpack.dumps(record.dumps()))) if fields: chunk = SmartDict() for key in fields: chunk[key] = record.get(key) record = chunk return record
def _get_record_slave(recid, result, mode=None, uid=None): """Check if record exists and return it in dictionary format. If any kind of error occurs returns None. If mode=='revision' then recid parameter is considered as revid.""" record = None if recid == 'none': mode = 'none' if mode == 'recid': record_status = record_exists(recid) #check for errors if record_status == 0: result['resultCode'], result[ 'resultText'] = 1, 'Non-existent record: %s' % recid elif record_status == -1: result['resultCode'], result[ 'resultText'] = 1, 'Deleted record: %s' % recid elif record_locked_by_queue(recid): result['resultCode'], result[ 'resultText'] = 1, 'Record %s locked by queue' % recid else: record = create_record(print_record(recid, 'xm'))[0] record_order_subfields(record) elif mode == 'tmpfile': file_path = '%s_%s.xml' % (_get_file_path( recid, uid), CFG_BIBEDIT_TO_MERGE_SUFFIX) if not os.path.isfile(file_path): #check if file doesn't exist result['resultCode'], result[ 'resultText'] = 1, 'Temporary file doesnt exist' else: #open file tmpfile = open(file_path, 'r') record = create_record(tmpfile.read())[0] tmpfile.close() elif mode == 'revision': if revision_format_valid_p(recid): marcxml = get_marcxml_of_revision_id(recid) if marcxml: record = create_record(marcxml)[0] else: result['resultCode'], result[ 'resultText'] = 1, 'The specified revision does not exist' else: result['resultCode'], result[ 'resultText'] = 1, 'Invalid revision id' elif mode == 'none': return {} else: result['resultCode'], result[ 'resultText'] = 1, 'Invalid record mode for record2' return record
def main(): content = [] recids = [] date_last_run = False latest_recid = False latest = False filename = re.sub(".py", "_log", "tmp_" + __file__) stored_file = open(filename, 'r+') for line in stored_file.readlines(): matchObj1 = re.match('most recent recid = (\d+)', line) if matchObj1: latest_recid = matchObj1.group(0) matchObj2 = re.match('date last run = (.*)', line) if matchObj2: date_last_run = matchObj2.group(0) if date_last_run and latest_recid: results = perform_request_search(p="fin da >= %s" % date_last_run, cc="Jobs") else: date_last_run = raw_input( """Couldn't find the date of the most recently sent New Jobs Mailout. Send jobs posted on and after this date (yyyy-mm-dd): """) if date_last_run: results = perform_request_search(p="fin da >= %s" % date_last_run, cc="Jobs") if len(results) > 0: for r in results: recids.append(r) print recids if latest_recid: recids = [x for x in recids if x > latest_recid] print recids if recids: latest = max(recids) else: print "No postings since %s." % date_last_run else: print "No postings since %s." % date_last_run if latest: stored_file.write("most recent recid = %s" % str(latest)) today = str(datetime.date.today()) stored_file.write("date last run = %s" % today) stored_file.write("records in last mailout = %s" % ', '.join(str(x) for x in sorted(recids))) for r in recids: content.append(print_record(r, ot=[], format='he')) content = ''.join(content) send_jobs_mail(content) stored_file.close()
def get_pubtype_info(doctype): """call output format for publication types and return it as dictionary (json)""" # directly call the backend... query = '3367_:'+doctype res = perform_request_search(p=query, cc='PubTypes') # and return the first rec in JS for further processing if res == []: return {} text = print_record(res[0], 'js') jsontext = washJSONinput(text) jsondict = json.loads(jsontext, 'utf8') return jsondict
def get_record(recid, reset_cache=False, fields=()): """ Record factory, it retrieves the record from bibfmt table if it is there, if not, or reset_cache is set to True, it searches for the appropriate reader to create the representation of the record. @return: Bibfield object representing the record or None if the recid is not present in the system """ record = None #Search for recjson if not reset_cache: res = run_sql("SELECT value FROM bibfmt WHERE id_bibrec=%s AND format='recjson'", (recid,)) if res: try: record = Record(msgpack.loads(res[0][0])) except: #Maybe the cached version is broken record = None #There is no version cached or we want to renew it #Then retrieve information and blob if not record or reset_cache: try: master_format = run_sql("SELECT master_format FROM bibrec WHERE id=%s", (recid,))[0][0] except: return None schema = 'xml' master_format = 'marc' try: from invenio.search_engine import print_record blob = print_record(recid, format='xm') except: return None reader = CFG_BIBFIELD_READERS['bibfield_%sreader.py' % (master_format,)](blob, schema=schema) record = Record(reader.translate()) #Update bibfmt for future uses run_sql("REPLACE INTO bibfmt(id_bibrec, format, last_updated, value) VALUES (%s, 'recjson', NOW(), %s)", (recid, msgpack.dumps(record.dumps()))) if fields: chunk = SmartDict() for key in fields: chunk[key] = record.get(key) record = chunk return record
def answer(self, req, user_info, of, cc, colls_to_search, p, f, search_units, ln): """Answer question given by context. Return (relevance, html_string) where relevance is integer from 0 to 100 indicating how relevant to the question the answer is (see C{CFG_WEBSEARCH_SERVICE_MAX_SERVICE_ANSWER_RELEVANCE} for details), and html_string being a formatted answer. """ from invenio.refextract_api import search_from_reference _ = gettext_set_language(ln) if f or not self.seems_a_journal_reference(p): return (0, "") (field, pattern) = search_from_reference(p.decode('utf-8')) if field is not "journal": return (0, "") recids = perform_request_search(req=req, p=pattern, f=field, cc=cc, c=colls_to_search) if not recids: return (0, "") if len(recids) == 1: recid = recids.pop() user_info = collect_user_info(req) return (100, """\ <p><span class="journalhint">%s</span></p> <table style="padding: 5px; border: 2px solid #ccc; margin: 20px"><tr><td> %s </td></tr></table>""" % (escape(_("Were you looking for this paper?")), print_record(recid, ln=ln, user_info=user_info))) query = "find rawref \"" + p + "\"" query_link = CFG_SITE_URL + '/search?' + urlencode({'p': query}) return ( 80, '<span class="journalhint">%s</span>' % (_("Were you looking for a journal reference? Try: %(x_href)s") % { "x_href": '<a href="{0}">{1}</a>'.format(escape(query_link, True), escape(query)) }, ))
def main(): content = [] recids = [] date_last_run = False latest_recid = False latest = False filename = re.sub(".py", "_log", "tmp_"+__file__) stored_file = open(filename, 'r+') for line in stored_file.readlines(): matchObj1 = re.match('most recent recid = (\d+)', line) if matchObj1: latest_recid = matchObj1.group(0) matchObj2 = re.match('date last run = (.*)', line) if matchObj2: date_last_run = matchObj2.group(0) if date_last_run and latest_recid: results = perform_request_search(p="fin da >= %s" % date_last_run, cc="Jobs") else: date_last_run = raw_input("""Couldn't find the date of the most recently sent New Jobs Mailout. Send jobs posted on and after this date (yyyy-mm-dd): """) if date_last_run: results = perform_request_search(p="fin da >= %s" % date_last_run, cc="Jobs") if len(results) > 0: for r in results: recids.append(r) print recids if latest_recid: recids = [x for x in recids if x > latest_recid] print recids if recids: latest = max(recids) else: print "No postings since %s." % date_last_run else: print "No postings since %s." % date_last_run if latest: stored_file.write("most recent recid = %s" % str(latest)) today = str(datetime.date.today()) stored_file.write("date last run = %s" % today) stored_file.write("records in last mailout = %s" % ', '.join(str(x) for x in sorted(recids))) for r in recids: content.append(print_record(r,ot=[],format='he')) content = ''.join(content) send_jobs_mail(content) stored_file.close()
def _get_record_slave(recid, result, mode=None, uid=None): """Check if record exists and return it in dictionary format. If any kind of error occurs returns None. If mode=='revision' then recid parameter is considered as revid.""" record = None if recid == 'none': mode = 'none' if mode == 'recid': record_status = record_exists(recid) #check for errors if record_status == 0: result['resultCode'], result['resultText'] = 1, 'Non-existent record: %s' % recid elif record_status == -1: result['resultCode'], result['resultText'] = 1, 'Deleted record: %s' % recid elif record_locked_by_queue(recid): result['resultCode'], result['resultText'] = 1, 'Record %s locked by queue' % recid else: record = create_record( print_record(recid, 'xm') )[0] record_order_subfields(record) elif mode == 'tmpfile': file_path = '%s_%s.xml' % (_get_file_path(recid, uid), CFG_BIBEDIT_TO_MERGE_SUFFIX) if not os.path.isfile(file_path): #check if file doesn't exist result['resultCode'], result['resultText'] = 1, 'Temporary file doesnt exist' else: #open file tmpfile = open(file_path, 'r') record = create_record( tmpfile.read() )[0] tmpfile.close() elif mode == 'revision': if revision_format_valid_p(recid): marcxml = get_marcxml_of_revision_id(recid) if marcxml: record = create_record(marcxml)[0] else: result['resultCode'], result['resultText'] = 1, 'The specified revision does not exist' else: result['resultCode'], result['resultText'] = 1, 'Invalid revision id' elif mode == 'none': return {} else: result['resultCode'], result['resultText'] = 1, 'Invalid record mode for record2' return record
def _create_records_xml(self, record_IDs): """Creates XML containing all the information for the records with the given identifiers @param record_IDs: list of identifiers of records @return: MARC XML containing all the information about the records """ output_xml = "<collection>" for record_id in record_IDs: record_xml = search_engine.print_record(recID = record_id, format = "xm") output_xml += record_xml output_xml += "</collection>" return output_xml
def test_BibUpload_revision_verifier(self): """ BibUpload Revision Verifier - Called from BibUpload Operation - Patch & Conflict Scenarios""" recs = xml_marc_to_records(self.rev1) # --> Revision 1 submitted error, self.recid, dummy_msg = bibupload(recs[0], opt_mode='insert') self.check_record_consistency(self.recid) record = get_record(self.recid) rev = record_get_field_value(record, '005', '', '') recs = xml_marc_to_records(self.rev1) self.rev2 = self.rev2.replace('123456789', str(self.recid)) self.rev2 = self.rev2.replace('20110101000000.0', rev) self.rev1_modified = self.rev1_modified.replace( '123456789', str(self.recid)) self.rev1_modified = self.rev1_modified.replace( '20110101000000.0', rev) self.final_xm = self.final_xm.replace('123456789', str(self.recid)) recs = xml_marc_to_records(self.rev1) recs = xml_marc_to_records(self.rev2) # --> Revision 2 submitted error, self.recid, dummy_msg = bibupload(recs[0], opt_mode='replace') self.check_record_consistency(self.recid) record = get_record(self.recid) self.rev2 = self.rev2.replace( rev, record_get_field_value(record, '005', '', '')) self.rev2_modified = self.rev2_modified.replace( '123456789', str(self.recid)) self.rev2_modified = self.rev2_modified.replace( '20110101000000.0', record_get_field_value(record, '005', '', '')) # --> Revision 1 modified submitted recs = xml_marc_to_records(self.rev1_modified) error, self.recid, dummy_msg = bibupload(recs[0], opt_mode='replace') self.check_record_consistency(self.recid) record = get_record(self.recid) rev = record_get_field_value(record, '005', '', '') self.final_xm = self.final_xm.replace('20110101000000.0', rev) self.assertEqual( compare_xmbuffers(self.final_xm, print_record(self.recid, 'xm')), '') # --> Revision 2 modified submitted recs = xml_marc_to_records(self.rev2_modified) error, self.recid, dummy_msg = bibupload(recs[0], opt_mode='replace') self.check_record_consistency(self.recid) self.assertEquals(error, 2)
def _get_record_slave(recid, result, mode=None, uid=None): """Check if record exists and return it in dictionary format. If any kind of error occurs returns None. If mode=='revision' then recid parameter is considered as revid.""" record = None if recid == "none": mode = "none" if mode == "recid": record_status = record_exists(recid) # check for errors if record_status == 0: result["resultCode"], result["resultText"] = 1, "Non-existent record: %s" % recid elif record_status == -1: result["resultCode"], result["resultText"] = 1, "Deleted record: %s" % recid elif record_locked_by_queue(recid): result["resultCode"], result["resultText"] = 1, "Record %s locked by queue" % recid else: record = create_record(print_record(recid, "xm"))[0] elif mode == "tmpfile": file_path = "%s_%s.xml" % (_get_file_path(recid, uid), CFG_BIBEDIT_TO_MERGE_SUFFIX) if not os.path.isfile(file_path): # check if file doesn't exist result["resultCode"], result["resultText"] = 1, "Temporary file doesnt exist" else: # open file tmpfile = open(file_path, "r") record = create_record(tmpfile.read())[0] tmpfile.close() elif mode == "revision": if revision_format_valid_p(recid): marcxml = get_marcxml_of_revision_id(recid) if marcxml: record = create_record(marcxml)[0] else: result["resultCode"], result["resultText"] = 1, "The specified revision does not exist" else: result["resultCode"], result["resultText"] = 1, "Invalid revision id" elif mode == "none": return {} else: result["resultCode"], result["resultText"] = 1, "Invalid record mode for record2" return record
def main(): file_name = 'tmp_' + __file__ file_name = re.sub('.py', '_correct.out', file_name) output = open(file_name,'w') output.write('<collection>') result = perform_request_search(p=SEARCH, cc=SUBFILE) result = result[:500] for recid in result: #info = print_record(recid, ot=['001','037'], format='xm') #info = re.sub(r'code="a">FERMILAB', r'code="z">FERMILAB', info) info = print_record(recid, ot=['001', MARC_FIELD], format='xm') #info = re.sub(r'>.*[fF]*o[rf] the (\w+) [Cc]oll.*<', r'>\1 Collaboration<', info) info = re.sub(r'>.* (\S+youtube\S+).*<', r'>\1<', info) info = re.sub(r'code="e">FERMILAB-TEV-', r'', info) info = re.sub(r'.*Waseda U., Cosmic Ray Div..*', r'', info) info += '\n\n' output.write(info) output.write('</collection>') output.close()
def answer(self, req, user_info, of, cc, colls_to_search, p, f, search_units, ln): """Answer question given by context. Return (relevance, html_string) where relevance is integer from 0 to 100 indicating how relevant to the question the answer is (see C{CFG_WEBSEARCH_SERVICE_MAX_SERVICE_ANSWER_RELEVANCE} for details), and html_string being a formatted answer. """ from invenio.refextract_api import search_from_reference _ = gettext_set_language(ln) if f or not self.seems_a_journal_reference(p): return (0, "") (field, pattern) = search_from_reference(p.decode('utf-8')) if field is not "journal": return (0, "") recids = perform_request_search( req=req, p=pattern, f=field, cc=cc, c=colls_to_search) if not recids: return (0, "") if len(recids) == 1: recid = recids.pop() user_info = collect_user_info(req) return (100, """\ <p><span class="journalhint">%s</span></p> <table style="padding: 5px; border: 2px solid #ccc; margin: 20px"><tr><td> %s </td></tr></table>""" % (escape(_("Were you looking for this paper?")), print_record(recid, ln=ln, user_info=user_info))) query = "find rawref \"" + p + "\"" query_link = CFG_SITE_URL + '/search?' + urlencode({'p': query}) return (80, '<span class="journalhint">%s</span>' % ( _("Were you looking for a journal reference? Try: %(x_href)s") % {"x_href": '<a href="{0}">{1}</a>'.format( escape(query_link, True), escape(query))}, ))
def main(): file_name = 'tmp_' + __file__ file_name = re.sub('.py', '_correct.out', file_name) output = open(file_name, 'w') output.write('<collection>') result = perform_request_search(p=SEARCH, cc=SUBFILE) result = result[:500] for recid in result: #info = print_record(recid, ot=['001','037'], format='xm') #info = re.sub(r'code="a">FERMILAB', r'code="z">FERMILAB', info) info = print_record(recid, ot=['001', MARC_FIELD], format='xm') #info = re.sub(r'>.*[fF]*o[rf] the (\w+) [Cc]oll.*<', r'>\1 Collaboration<', info) info = re.sub(r'>.* (\S+youtube\S+).*<', r'>\1<', info) info = re.sub(r'code="e">FERMILAB-TEV-', r'', info) info = re.sub(r'.*Waseda U., Cosmic Ray Div..*', r'', info) info += '\n\n' output.write(info) output.write('</collection>') output.close()
def main(): file_name = 'tmp_' + __file__ file_name = re.sub('.py', '_correct.out', file_name) output = open(file_name,'w') output.write('<collection>') result_hep = perform_request_search(p=SEARCH, cc=SUBFILE) result = search_unit(p="*D0-PRELIMINARY-NOTE*",m='a',f='980*') result = result[:5] for recid in result: #info = print_record(recid, ot=['001','037'], format='xm') #info = re.sub(r'code="a">FERMILAB', r'code="z">FERMILAB', info) info = print_record(recid, ot=['001', '100', '700', '980'], format='hm') #info = re.sub(r'>.*[fF]*o[rf] the (\w+) [Cc]oll.*<', r'>\1 Collaboration<', info) #info = re.sub(r'>\w+tion [fF]*or [Tt]he (\w+)<', r'>\1 Collaboration<', info) info = re.sub(r'code="e">FERMILAB-TEV-', r'', info) info = re.sub(r'<\/?pre[^\>]*>', r'', info) info += '\n\n' output.write(info) output.write('</collection>') output.close()
def test_BibUpload_revision_verifier(self): """ BibUpload Revision Verifier - Called from BibUpload Operation - Patch & Conflict Scenarios""" recs = xml_marc_to_records(self.rev1) # --> Revision 1 submitted error, self.recid, dummy_msg = bibupload(recs[0], opt_mode="insert") self.check_record_consistency(self.recid) record = get_record(self.recid) rev = record_get_field_value(record, "005", "", "") recs = xml_marc_to_records(self.rev1) self.rev2 = self.rev2.replace("123456789", str(self.recid)) self.rev2 = self.rev2.replace("20110101000000.0", rev) self.rev1_modified = self.rev1_modified.replace("123456789", str(self.recid)) self.rev1_modified = self.rev1_modified.replace("20110101000000.0", rev) self.final_xm = self.final_xm.replace("123456789", str(self.recid)) recs = xml_marc_to_records(self.rev1) recs = xml_marc_to_records(self.rev2) # --> Revision 2 submitted error, self.recid, dummy_msg = bibupload(recs[0], opt_mode="replace") self.check_record_consistency(self.recid) record = get_record(self.recid) self.rev2 = self.rev2.replace(rev, record_get_field_value(record, "005", "", "")) self.rev2_modified = self.rev2_modified.replace("123456789", str(self.recid)) self.rev2_modified = self.rev2_modified.replace( "20110101000000.0", record_get_field_value(record, "005", "", "") ) # --> Revision 1 modified submitted recs = xml_marc_to_records(self.rev1_modified) error, self.recid, dummy_msg = bibupload(recs[0], opt_mode="replace") self.check_record_consistency(self.recid) record = get_record(self.recid) rev = record_get_field_value(record, "005", "", "") self.final_xm = self.final_xm.replace("20110101000000.0", rev) self.assertEqual(compare_xmbuffers(self.final_xm, print_record(self.recid, "xm")), "") # --> Revision 2 modified submitted recs = xml_marc_to_records(self.rev2_modified) error, self.recid, dummy_msg = bibupload(recs[0], opt_mode="replace") self.check_record_consistency(self.recid) self.assertEquals(error, 2)
def main(): file_name = 'tmp_' + __file__ file_name = re.sub('.py', '_correct.out', file_name) output = open(file_name, 'w') output.write('<collection>') result_hep = perform_request_search(p=SEARCH, cc=SUBFILE) result = search_unit(p="*D0-PRELIMINARY-NOTE*", m='a', f='980*') result = result[:5] for recid in result: #info = print_record(recid, ot=['001','037'], format='xm') #info = re.sub(r'code="a">FERMILAB', r'code="z">FERMILAB', info) info = print_record(recid, ot=['001', '100', '700', '980'], format='hm') #info = re.sub(r'>.*[fF]*o[rf] the (\w+) [Cc]oll.*<', r'>\1 Collaboration<', info) #info = re.sub(r'>\w+tion [fF]*or [Tt]he (\w+)<', r'>\1 Collaboration<', info) info = re.sub(r'code="e">FERMILAB-TEV-', r'', info) info = re.sub(r'<\/?pre[^\>]*>', r'', info) info += '\n\n' output.write(info) output.write('</collection>') output.close()
def _getAuthority(self, prog, simulation): simulation = False if simulation == False: print 'Simulation ist False' import simplejson as json from invenio.websubmit_functions.Websubmit_Helpers_hgf import washJSONinput from invenio.search_engine import perform_request_search, print_record search_str = self._pofsearchdict[prog] # print 'search_string:', search_str authrec = perform_request_search(p=search_str) if len(authrec) == 1: jsontext = print_record(authrec[0], format='js') jsontext = washJSONinput(jsontext) # print jsontext jsondict = json.loads(jsontext, 'utf8') if 'label' in jsondict: del jsondict['label'] return jsondict #return authrec[0] else: # simulation mode no connection to Invenio print 'Simulation ist True' return self._getPOF('Krebsforschung')
def _get_record_NLM_XML(self, record): """Returns the record in National Library of Medicine XML format.""" return print_record(record, format='xn')
def _get_record_MARCXML(self, record): """Returns the record in MARCXML format.""" return print_record(record, format='xm')
def Send_APP_Mail (parameters, curdir, form, user_info=None): """ This function send an email informing the original submitter of a document that the referee has approved/ rejected the document. The email is also sent to the referee for checking. Parameters: * addressesAPP: email addresses of the people who will receive this email (comma separated list). this parameter may contain the <CATEG> string. In which case the variable computed from the [categformatAFP] parameter replaces this string. eg.: "<CATEG>[email protected]" * categformatAPP contains a regular expression used to compute the category of the document given the reference of the document. eg.: if [categformatAFP]="TEST-<CATEG>-.*" and the reference of the document is "TEST-CATEGORY1-2001-001", then the computed category equals "CATEGORY1" * newrnin: Name of the file containing the 2nd reference of the approved document (if any). * edsrn: Name of the file containing the reference of the approved document. """ global titlevalue,authorvalue, emailvalue,sysno,rn FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME,CFG_SITE_SUPPORT_EMAIL) sequence_id = bibtask_allocate_sequenceid(curdir) doctype = form['doctype'] titlevalue = titlevalue.replace("\n"," ") authorvalue = authorvalue.replace("\n","; ") # variables declaration categformat = parameters['categformatAPP'] otheraddresses = parameters['addressesAPP'] newrnpath = parameters['newrnin'] ## Get the name of the decision file: try: decision_filename = parameters['decision_file'] except KeyError: decision_filename = "" ## Get the name of the comments file: try: comments_filename = parameters['comments_file'] except KeyError: comments_filename = "" ## Now try to read the comments from the comments_filename: if comments_filename in (None, "", "NULL"): ## We don't have a name for the comments file. ## For backward compatibility reasons, try to read the comments from ## a file called 'COM' in curdir: if os.path.exists("%s/COM" % curdir): try: fh_comments = open("%s/COM" % curdir, "r") comment = fh_comments.read() fh_comments.close() except IOError: ## Unable to open the comments file exception_prefix = "Error in WebSubmit function " \ "Send_APP_Mail. Tried to open " \ "comments file [%s/COM] but was " \ "unable to." % curdir register_exception(prefix=exception_prefix) comment = "" else: comment = comment.strip() else: comment = "" else: ## Try to read the comments from the comments file: if os.path.exists("%s/%s" % (curdir, comments_filename)): try: fh_comments = open("%s/%s" % (curdir, comments_filename), "r") comment = fh_comments.read() fh_comments.close() except IOError: ## Oops, unable to open the comments file. comment = "" exception_prefix = "Error in WebSubmit function " \ "Send_APP_Mail. Tried to open comments " \ "file [%s/%s] but was unable to." \ % (curdir, comments_filename) register_exception(prefix=exception_prefix) else: comment = comment.strip() else: comment = "" ## Now try to read the decision from the decision_filename: if decision_filename in (None, "", "NULL"): ## We don't have a name for the decision file. ## For backward compatibility reasons, try to read the decision from ## a file called 'decision' in curdir: if os.path.exists("%s/decision" % curdir): try: fh_decision = open("%s/decision" % curdir, "r") decision = fh_decision.read() fh_decision.close() except IOError: ## Unable to open the decision file exception_prefix = "Error in WebSubmit function " \ "Send_APP_Mail. Tried to open " \ "decision file [%s/decision] but was " \ "unable to." % curdir register_exception(prefix=exception_prefix) decision = "" else: decision = decision.strip() else: decision = "" else: ## Try to read the decision from the decision file: try: fh_decision = open("%s/%s" % (curdir, decision_filename), "r") decision = fh_decision.read() fh_decision.close() except IOError: ## Oops, unable to open the decision file. decision = "" exception_prefix = "Error in WebSubmit function " \ "Send_APP_Mail. Tried to open decision " \ "file [%s/%s] but was unable to." \ % (curdir, decision_filename) register_exception(prefix=exception_prefix) else: decision = decision.strip() if os.path.exists("%s/%s" % (curdir,newrnpath)): fp = open("%s/%s" % (curdir,newrnpath) , "r") newrn = fp.read() fp.close() else: newrn = "" # Document name res = run_sql("SELECT ldocname FROM sbmDOCTYPE WHERE sdocname=%s", (doctype,)) docname = res[0][0] # retrieve category categformat = categformat.replace("<CATEG>", "([^-]*)") m_categ_search = re.match(categformat, rn) if m_categ_search is not None: if len(m_categ_search.groups()) > 0: ## Found a match for the category of this document. Get it: category = m_categ_search.group(1) else: ## This document has no category. category = "unknown" else: category = "unknown" ## Get the referee email address: if CFG_CERN_SITE: ## The referees system in CERN now works with listbox membership. ## List names should take the format ## "*****@*****.**" ## Make sure that your list exists! ## FIXME - to be replaced by a mailing alias in webaccess in the ## future. referee_listname = "service-cds-referee-%s" % doctype.lower() if category != "": referee_listname += "-%s" % category.lower() referee_listname += "@cern.ch" addresses = referee_listname else: # Build referee's email address refereeaddress = "" # Try to retrieve the referee's email from the referee's database for user in acc_get_role_users(acc_get_role_id("referee_%s_%s" % (doctype,category))): refereeaddress += user[1] + "," # And if there is a general referee for user in acc_get_role_users(acc_get_role_id("referee_%s_*" % doctype)): refereeaddress += user[1] + "," refereeaddress = re.sub(",$","",refereeaddress) # Creation of the mail for the referee otheraddresses = otheraddresses.replace("<CATEG>",category) addresses = "" if refereeaddress != "": addresses = refereeaddress + "," if otheraddresses != "": addresses += otheraddresses else: addresses = re.sub(",$","",addresses) ## Add the record's submitter(s) into the list of recipients: ## Get the email address(es) of the record submitter(s)/owner(s) from ## the record itself: record_owners = print_record(sysno, 'tm', \ [CFG_WEBSUBMIT_RECORD_OWNER_EMAIL]).strip() if record_owners != "": record_owners_list = record_owners.split("\n") record_owners_list = [email.lower().strip() \ for email in record_owners_list] else: #if the record owner can not be retrieved from the metadata #(in case the record has not been inserted yet), #try to use the global variable emailvalue try: record_owners_list = [emailvalue] except NameError: record_owners_list = [] record_owners = ",".join([owner for owner in record_owners_list]) if record_owners != "": addresses += ",%s" % record_owners if decision == "approve": mailtitle = "%s has been approved" % rn mailbody = "The %s %s has been approved." % (docname,rn) mailbody += "\nIt will soon be accessible here:\n\n<%s/%s/%s>" % (CFG_SITE_URL,CFG_SITE_RECORD,sysno) else: mailtitle = "%s has been rejected" % rn mailbody = "The %s %s has been rejected." % (docname,rn) if rn != newrn and decision == "approve" and newrn != "": mailbody += "\n\nIts new reference number is: %s" % newrn mailbody += "\n\nTitle: %s\n\nAuthor(s): %s\n\n" % (titlevalue,authorvalue) if comment != "": mailbody += "Comments from the referee:\n%s\n" % comment # Send mail to referee if any recipients or copy to admin if addresses or CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN: scheduled_send_email(FROMADDR, addresses, mailtitle, mailbody, copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN, other_bibtasklet_arguments=['-I', str(sequence_id)]) return ""
def Ask_For_Record_Details_Confirmation(parameters, \ curdir, \ form, \ user_info=None): """ Display the details of a record on which some operation is to be carried out and prompt for the user's confirmation that it is the correct record. Upon the clicking of the confirmation button, augment step by one. Given the "recid" (001) of a record, retrieve the basic metadata (title, report-number(s) and author(s)) and display them in the user's browser along with a prompt asking them to confirm that it is indeed the record that they expected to see. The function depends upon the presence of the "sysno" global and the presence of the "step" field in the "form" parameter. When the user clicks on the "confirm" button, step will be augmented by 1 and the form will be submitted. @parameters: None. @return: None. @Exceptions raise: InvenioWebSubmitFunctionError if problems are encountered; InvenioWebSubmitFunctionStop in order to display the details of the record and the confirmation message. """ global sysno ## Make sure that we know the current step: try: current_step = int(form['step']) except TypeError: ## Can't determine step. msg = "Unable to determine submission step. Cannot continue." raise InvenioWebSubmitFunctionError(msg) else: newstep = current_step + 1 ## Make sure that the sysno is valid: try: working_recid = int(sysno) except TypeError: ## Unable to find the details of this record - cannot query the database msg = "Unable to retrieve details of record - record id was invalid." raise InvenioWebSubmitFunctionError(msg) if not record_exists(working_recid): ## Record doesn't exist. msg = "Unable to retrieve details of record [%s] - record does not " \ "exist." % working_recid raise InvenioWebSubmitFunctionError(msg) ## Retrieve the details to be displayed: ## ## Author(s): rec_authors = "" rec_first_author = print_record(int(sysno), 'tm', "100__a") rec_other_authors = print_record(int(sysno), 'tm', "700__a") if rec_first_author != "": rec_authors += "".join(["%s<br />\n" % cgi.escape(author.strip()) for \ author in rec_first_author.split("\n")]) if rec_other_authors != "": rec_authors += "".join(["%s<br />\n" % cgi.escape(author.strip()) for \ author in rec_other_authors.split("\n")]) ## Title: rec_title = "".join(["%s<br />\n" % cgi.escape(title.strip()) for title in \ print_record(int(sysno), 'tm', "245__a").split("\n")]) ## Report numbers: rec_reportnums = "" rec_reportnum = print_record(int(sysno), 'tm', "037__a") rec_other_reportnums = print_record(int(sysno), 'tm', "088__a") if rec_reportnum != "": rec_reportnums += "".join(["%s<br />\n" % cgi.escape(repnum.strip()) \ for repnum in rec_reportnum.split("\n")]) if rec_other_reportnums != "": rec_reportnums += "".join(["%s<br />\n" % cgi.escape(repnum.strip()) \ for repnum in \ rec_other_reportnums.split("\n")]) raise InvenioWebSubmitFunctionStop(CFG_DOCUMENT_DETAILS_MESSAGE % \ { 'report-numbers' : rec_reportnums, \ 'title' : rec_title, \ 'author' : rec_authors, \ 'newstep' : newstep, \ 'admin-email' : CFG_SITE_ADMIN_EMAIL, \ } )
def get_bibrecord(recid): """Return record in BibRecord wrapping.""" if record_exists(recid): return create_record(print_record(recid, 'xm'))[0]
def process_references(book): ''' Look through reference list to find the cited reference and clean it up. ''' print book reference_flag = False date = None counter = 1 author = book[0] title = book[1].lower() isbn = book[2] isbn_tag = "i" recid_book = book[3] if len(book) == 5: date = book[4] if re.search(r"\-", isbn): isbn_tag = "r" search_author = '999C5:/' + author + '/' search_title = '999C5:/' + title + '/ -refersto:recid:' + recid_book + \ ' -999C50:' + recid_book x_author = perform_request_search(p=search_author, cc='HEP') x_title = perform_request_search(p=search_title, cc='HEP') result = list(intbitset(x_author) & intbitset(x_title)) records = [] new_records = [] for recid in result: records.append(print_record(recid, ot=['999C5'], format='hm')) #lines = [record.split('\n') for record in records]] for record in records: if counter > COUNTER_MAX: continue new_record = [] reference_flag = False for i in record.split('\n'): i = re.sub(r'\n', r'', i) i = re.sub(r'</?pre>', '', i) i = re.sub(r'<pre style="margin: 1em 0px;">', '', i) if re.search(author, i): j = i.lower() #j = re.sub(r', ', r' ', j) #j = re.sub('[ ]+', r' ', j) if re.search(title, j) and not re.search(r'$$0', j): if date: if re.search(date, j): i = i + "$$" + isbn_tag + isbn + "$$0" + \ str(recid_book) + "$$z1" reference_flag = True else: i = i + "$$" + isbn_tag + isbn + "$$0" + \ str(recid_book) + "$$z1" reference_flag = True if not re.search(r'CURATOR', i): i = i + "$$9CURATOR" if reference_flag: counter += 1 new_record.append(i + '\n') if reference_flag: new_records.append(new_record) return new_records
def check_doi_status_after_merge(original_recid1, original_recid2, final_record1, final_record_2, record2_marked_as_duplicate_p=False, submit_confirmed_p=False): """ Check that the result of the merge does not removed DOIs managed by the system, and that not duplicate DOI would be created. Returns a tuple(error_code, message). @param original_recid1: the record ID of the original record 1 (master) @type original_recid1: int @param original_recid2: the record ID of the original record 2 (slave) @type original_recid2: int @param final_record1: the resulting merged record @type final_record1: BibRecord object @param final_record_2: the resulting slave "merged" record (optional when record2_marked_as_duplicate_p is False) @type final_record_2: BibRecord object @param record2_marked_as_duplicate_p: True if the record 2 will be marked as duplicate (and deleted) @type record2_marked_as_duplicate_p: bool @param submit_confirmed_p: if the user has already confirmed to proceed with submission, according to previous messages displayed. If True, do not ask again confirmation and proceed if all tests pass. @type submit_confirmed_p: bool """ errcode = 0 message = '' new_record1_dois = get_dois(final_record1) new_record1_managed_dois = get_dois(final_record1, internal_only_p=True) original_record1_managed_dois = get_dois(create_record(print_record(original_recid1, 'xm'))[0], internal_only_p=True) original_record2_dois = get_dois(create_record(print_record(original_recid2, 'xm'))[0]) # Are there any DOI from record 1 (master) lost in the merging? lost_dois_in_record1 = [doi for doi in original_record1_managed_dois \ if not doi in new_record1_managed_dois] # Enough to check for duplicate DOI creation in this record, # not whole DB duplicate_dois_after_merge = [doi for doi in new_record1_dois if new_record1_dois.count(doi) > 1] if record2_marked_as_duplicate_p: new_record2_managed_dois = get_dois(final_record_2, internal_only_p=True) original_record2_managed_dois = get_dois(create_record(print_record(original_recid2, 'xm'))[0], internal_only_p=True) # Are there any DOI from record 2 (slave) lost in the merging? lost_dois_in_record2 = [doi for doi in original_record2_managed_dois \ if not doi in new_record1_managed_dois] else: lost_dois_in_record2 = [] duplicate_dois_after_merge += [doi for doi in new_record1_dois if doi in original_record2_dois] if ((lost_dois_in_record1 or lost_dois_in_record2) and \ CFG_BIBEDIT_INTERNAL_DOI_PROTECTION_LEVEL > 0) or \ duplicate_dois_after_merge: if CFG_BIBEDIT_INTERNAL_DOI_PROTECTION_LEVEL == 1 and \ not duplicate_dois_after_merge and \ not submit_confirmed_p: errcode = 1 message = 'The resulting merged record misses DOI(s) managed by the system.<script type="text/javascript">%(check_duplicate_box)sif (confirm(\'The resulting merged record will lose DOI(s) managed by the system.\\n' + \ 'The following DOI(s) were in the original record (#1) but are not in the final merged one:\\n' + '\\n'.join(lost_dois_in_record1) + \ '\\nAre you sure that you want to submit the merged records without the DOI(s)?\')) {onclickSubmitButton(confirm_p=false, additional_data={\'confirmed_submit\': true})}</script>' elif duplicate_dois_after_merge and lost_dois_in_record1: errcode = 1 message = 'The changes cannot be submitted because the resulting merged record (a) misses DOI(s) managed by the system and/or (b) will create duplicate DOIs.<script type="text/javascript">%(check_duplicate_box)salert(\'The changes cannot be submitted because the resulting merged record (a) misses DOI(s) managed by the system and (b) will create duplicate DOIs.\\n' + \ 'The following DOI(s) were in the original record (#1) but are not in the final merged one:\\n' + '\\n'.join(lost_dois_in_record1) + \ '\\nThe following DOI(s) would be duplicate after merge:\\n' + '\\n'.join(duplicate_dois_after_merge) + \ '\\nMake sure that the mentionned DOI(s) are included in the final merged record and/or no duplicate DOIs are created (suggestion: merge in the other way around).\');</script>' elif duplicate_dois_after_merge: errcode = 1 message = 'The changes cannot be submitted because the resulting merged record will create a duplicate DOI.<script type="text/javascript">%(check_duplicate_box)salert(\'The changes cannot be submitted because the resulting merged record will create a duplicate DOI.\\n' + \ 'The following DOI(s) would be duplicate after merge:\\n' + '\\n'.join(duplicate_dois_after_merge) + \ '\\nMake sure that the mentionned DOI(s) are not duplicated (suggestion: merge in the other way around).\');</script>' elif not (CFG_BIBEDIT_INTERNAL_DOI_PROTECTION_LEVEL == 1 and submit_confirmed_p): # lost DOIs after merge errcode = 1 message = 'The changes cannot be submitted because the resulting merged record misses DOI(s) managed by the system.<script type="text/javascript">%(check_duplicate_box)salert(\'The changes cannot be submitted because the resulting merged record misses the DOI(s) managed by the system.\\n' + \ 'The following DOI(s) were in the original record (#1) but are not in the final merged one:\\n' + '\\n'.join(lost_dois_in_record1) + \ '\\nMake sure that the mentionned DOI(s) are included in the final merged record.\');</script>' message = message % {'check_duplicate_box': record2_marked_as_duplicate_p and '$(\'#bibMergeDupeCheckbox\').attr(\'checked\', true);' or ''} return (errcode, message)
def check_doi_status_after_merge(original_recid1, original_recid2, final_record1, final_record_2, record2_marked_as_duplicate_p=False, submit_confirmed_p=False): """ Check that the result of the merge does not removed DOIs managed by the system, and that not duplicate DOI would be created. Returns a tuple(error_code, message). @param original_recid1: the record ID of the original record 1 (master) @type original_recid1: int @param original_recid2: the record ID of the original record 2 (slave) @type original_recid2: int @param final_record1: the resulting merged record @type final_record1: BibRecord object @param final_record_2: the resulting slave "merged" record (optional when record2_marked_as_duplicate_p is False) @type final_record_2: BibRecord object @param record2_marked_as_duplicate_p: True if the record 2 will be marked as duplicate (and deleted) @type record2_marked_as_duplicate_p: bool @param submit_confirmed_p: if the user has already confirmed to proceed with submission, according to previous messages displayed. If True, do not ask again confirmation and proceed if all tests pass. @type submit_confirmed_p: bool """ errcode = 0 message = '' new_record1_dois = get_dois(final_record1) new_record1_managed_dois = get_dois(final_record1, internal_only_p=True) original_record1_managed_dois = get_dois(create_record( print_record(original_recid1, 'xm'))[0], internal_only_p=True) original_record2_dois = get_dois( create_record(print_record(original_recid2, 'xm'))[0]) # Are there any DOI from record 1 (master) lost in the merging? lost_dois_in_record1 = [doi for doi in original_record1_managed_dois \ if not doi in new_record1_managed_dois] # Enough to check for duplicate DOI creation in this record, # not whole DB duplicate_dois_after_merge = [ doi for doi in new_record1_dois if new_record1_dois.count(doi) > 1 ] if record2_marked_as_duplicate_p: new_record2_managed_dois = get_dois(final_record_2, internal_only_p=True) original_record2_managed_dois = get_dois(create_record( print_record(original_recid2, 'xm'))[0], internal_only_p=True) # Are there any DOI from record 2 (slave) lost in the merging? lost_dois_in_record2 = [doi for doi in original_record2_managed_dois \ if not doi in new_record1_managed_dois] else: lost_dois_in_record2 = [] duplicate_dois_after_merge += [ doi for doi in new_record1_dois if doi in original_record2_dois ] if ((lost_dois_in_record1 or lost_dois_in_record2) and \ CFG_BIBEDIT_INTERNAL_DOI_PROTECTION_LEVEL > 0) or \ duplicate_dois_after_merge: if CFG_BIBEDIT_INTERNAL_DOI_PROTECTION_LEVEL == 1 and \ not duplicate_dois_after_merge and \ not submit_confirmed_p: errcode = 1 message = 'The resulting merged record misses DOI(s) managed by the system.<script type="text/javascript">%(check_duplicate_box)sif (confirm(\'The resulting merged record will lose DOI(s) managed by the system.\\n' + \ 'The following DOI(s) were in the original record (#1) but are not in the final merged one:\\n' + '\\n'.join(lost_dois_in_record1) + \ '\\nAre you sure that you want to submit the merged records without the DOI(s)?\')) {onclickSubmitButton(confirm_p=false, additional_data={\'confirmed_submit\': true})}</script>' elif duplicate_dois_after_merge and lost_dois_in_record1: errcode = 1 message = 'The changes cannot be submitted because the resulting merged record (a) misses DOI(s) managed by the system and/or (b) will create duplicate DOIs.<script type="text/javascript">%(check_duplicate_box)salert(\'The changes cannot be submitted because the resulting merged record (a) misses DOI(s) managed by the system and (b) will create duplicate DOIs.\\n' + \ 'The following DOI(s) were in the original record (#1) but are not in the final merged one:\\n' + '\\n'.join(lost_dois_in_record1) + \ '\\nThe following DOI(s) would be duplicate after merge:\\n' + '\\n'.join(duplicate_dois_after_merge) + \ '\\nMake sure that the mentionned DOI(s) are included in the final merged record and/or no duplicate DOIs are created (suggestion: merge in the other way around).\');</script>' elif duplicate_dois_after_merge: errcode = 1 message = 'The changes cannot be submitted because the resulting merged record will create a duplicate DOI.<script type="text/javascript">%(check_duplicate_box)salert(\'The changes cannot be submitted because the resulting merged record will create a duplicate DOI.\\n' + \ 'The following DOI(s) would be duplicate after merge:\\n' + '\\n'.join(duplicate_dois_after_merge) + \ '\\nMake sure that the mentionned DOI(s) are not duplicated (suggestion: merge in the other way around).\');</script>' elif not (CFG_BIBEDIT_INTERNAL_DOI_PROTECTION_LEVEL == 1 and submit_confirmed_p): # lost DOIs after merge errcode = 1 message = 'The changes cannot be submitted because the resulting merged record misses DOI(s) managed by the system.<script type="text/javascript">%(check_duplicate_box)salert(\'The changes cannot be submitted because the resulting merged record misses the DOI(s) managed by the system.\\n' + \ 'The following DOI(s) were in the original record (#1) but are not in the final merged one:\\n' + '\\n'.join(lost_dois_in_record1) + \ '\\nMake sure that the mentionned DOI(s) are included in the final merged record.\');</script>' message = message % { 'check_duplicate_box': record2_marked_as_duplicate_p and '$(\'#bibMergeDupeCheckbox\').attr(\'checked\', true);' or '' } return (errcode, message)
""" Provides text CV output of papers with current citation counts """ #!/usr/bin/python # -*- coding: UTF-8 -*- import re from invenio.search_engine import perform_request_search from invenio.search_engine import print_record search = raw_input('Search: ') file = 'IHEP-pub-report.doc' x = perform_request_search(p=search, cc="HEP") with open(file, 'w') as output: for r in x: cv = print_record(r, format='htcv') cv = re.sub(r'<br/>', '', cv) cv = re.sub(r'\s\s+', '', cv) citesearch = perform_request_search(p="refersto:recid:%i" % r, cc="HEP") cv = cv + '\n' + str(len(citesearch)) + ' citations' print cv + '\n\n' output.write(cv + '\n\n')
def APO_Mail_Final_Decision_to_User (parameters, curdir, form, user_info=None): """ This function sends an email to the user informing him/her about the decision taken by the referee on his/her proposition. This email is also sent to the referee for checking. Parameters: * categformatAPP contains a regular expression used to compute the category of the document given the reference of the document. eg.: if [categformatAFP]="TEST-<CATEG>-.*" and the reference of the document is "TEST-CATEGORY1-2001-001", then the computed category equals "CATEGORY1" """ global sysno, rn FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME,CFG_SITE_SUPPORT_EMAIL) doctype = form['doctype'] # variables declaration categformat = parameters['categformatAPP'] sequence_id = bibtask_allocate_sequenceid(curdir) ## Get the name of the decision file: try: decision_filename = parameters['decision_file'] except KeyError: decision_filename = "" ## Get the name of the comments file: try: comments_filename = parameters['comments_file'] except KeyError: comments_filename = "" ## Now try to read the comments from the comments_filename: if comments_filename in (None, "", "NULL"): ## We don't have a name for the comments file. ## For backward compatibility reasons, try to read the comments from ## a file called 'COM' in curdir: if os.path.exists("%s/COM" % curdir): try: fh_comments = open("%s/COM" % curdir, "r") comment = fh_comments.read() fh_comments.close() except IOError: ## Unable to open the comments file exception_prefix = "Error in WebSubmit function " \ "APO_Mail_Final_Decision_to_User. Tried to open " \ "comments file [%s/COM] but was " \ "unable to." % curdir register_exception(prefix=exception_prefix) comment = "" else: comment = comment.strip() else: comment = "" else: ## Try to read the comments from the comments file: if os.path.exists("%s/%s" % (curdir, comments_filename)): try: fh_comments = open("%s/%s" % (curdir, comments_filename), "r") comment = fh_comments.read() fh_comments.close() except IOError: ## Oops, unable to open the comments file. comment = "" exception_prefix = "Error in WebSubmit function " \ "APO_Mail_Final_Decision_to_User. Tried to open comments " \ "file [%s/%s] but was unable to." \ % (curdir, comments_filename) register_exception(prefix=exception_prefix) else: comment = comment.strip() else: comment = "" ## Now try to read the decision from the decision_filename: if decision_filename in (None, "", "NULL"): ## We don't have a name for the decision file. ## For backward compatibility reasons, try to read the decision from ## a file called 'decision' in curdir: if os.path.exists("%s/decision" % curdir): try: fh_decision = open("%s/decision" % curdir, "r") decision = fh_decision.read() fh_decision.close() except IOError: ## Unable to open the decision file exception_prefix = "Error in WebSubmit function " \ "APO_Mail_Final_Decision_to_User. Tried to open " \ "decision file [%s/decision] but was " \ "unable to." % curdir register_exception(prefix=exception_prefix) decision = "" else: decision = decision.strip() else: decision = "" else: ## Try to read the decision from the decision file: try: fh_decision = open("%s/%s" % (curdir, decision_filename), "r") decision = fh_decision.read() fh_decision.close() except IOError: ## Oops, unable to open the decision file. decision = "" exception_prefix = "Error in WebSubmit function " \ "APO_Mail_Final_Decision_to_User. Tried to open decision " \ "file [%s/%s] but was unable to." \ % (curdir, decision_filename) register_exception(prefix=exception_prefix) else: decision = decision.strip() # Document name res = run_sql("SELECT ldocname FROM sbmDOCTYPE WHERE sdocname=%s", (doctype,)) docname = res[0][0] # retrieve category categformat = categformat.replace("<CATEG>", "([^-]*)") m_categ_search = re.match(categformat, rn) if m_categ_search is not None: if len(m_categ_search.groups()) > 0: ## Found a match for the category of this document. Get it: category = m_categ_search.group(1) else: ## This document has no category. category = "unknown" else: category = "unknown" # Build referee's email address refereeaddress = "" # Try to retrieve the referee's email from the referee's database for user in acc_get_role_users(acc_get_role_id("referee_%s_%s" % (doctype,category))): refereeaddress += user[1] + "," # And if there is a general referee for user in acc_get_role_users(acc_get_role_id("referee_%s_*" % doctype)): refereeaddress += user[1] + "," refereeaddress = re.sub(",$","",refereeaddress) # Creation of the mail for the referee addresses = "" if refereeaddress != "": addresses = refereeaddress + "," else: addresses = re.sub(",$","",addresses) ## Add the record's submitter(s) into the list of recipients: ## Get the email address(es) of the record submitter(s)/owner(s) from ## the record itself: record_owners = print_record(sysno, 'tm', \ [CFG_WEBSUBMIT_RECORD_OWNER_EMAIL]).strip() if record_owners != "": record_owners_list = record_owners.split("\n") record_owners_list = [email.lower().strip() \ for email in record_owners_list] else: record_owners_list = [] record_owners = ",".join([owner for owner in record_owners_list]) if record_owners != "": addresses += ",%s" % record_owners # Add "SuE" (user who throught the action) into the list of addresses: try: fp_sue = open("%s/SuE" % curdir, "r") sue = fp_sue.readline() fp_sue.close() addresses += ",%s" % sue except IOError: sue = "" post_title = "".join(["%s" % title.strip() for title in \ get_fieldvalues(int(sysno), "245__a")]) post_url = "".join(["%s" % url.strip() for url in \ get_fieldvalues(int(sysno), "520__u")]) if decision != "": if decision == "approve": mailtitle = "Post record deletion approved: [%(id)s]" if post_title: mailtitle = mailtitle % {'id': post_title} else: mailtitle = mailtitle % {'id': post_url} mailbody = "\nThe deletion of the post record with URL [%s] and title '%s' has been approved.\n" % (post_url, post_title) mailbody += "\nThis post record will be no longer available in the repository.\n" else: mailtitle = "Post record deletion has been rejected: [%(id)s]" if post_title: mailtitle = mailtitle % {'id': post_title} else: mailtitle = mailtitle % {'id': post_url} mailbody = "\nThe deletion of the post record with URL [%s] and title '%s' has been rejected.\n" % (post_url, post_title) # Send mail to referee if any recipients or copy to admin if addresses or CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN: scheduled_send_email(FROMADDR,addresses,mailtitle,mailbody, \ copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN, \ other_bibtasklet_arguments=['-I', str(sequence_id)]) return ""
def JOBSUBMIT_Send_APP_Mail(parameters, curdir, form, user_info=None): """ This function send an email informing the original submitter of a document that the referee has approved/ rejected the document. Parameters: * addressesAPP: email addresses of the people who will receive this email (comma separated list). this parameter may contain the <CATEG> string. In which case the variable computed from the [categformatAFP] parameter replaces this string. eg.: "<CATEG>[email protected]" * categformatAPP contains a regular expression used to compute the category of the document given the reference of the document. eg.: if [categformatAFP]="TEST-<CATEG>-.*" and the reference of the document is "TEST-CATEGORY1-2001-001", then the computed category equals "CATEGORY1" * emailFile: Name of the file containing the email of the submitter of the document * newrnin: Name of the file containing the 2nd reference of the document (if any). * decision_file: Name of the file containing the decision of the document. * comments_file: Name of the file containing the comments of the document. * edsrn: Name of the file containing the reference of the document. """ global titlevalue, authorvalue, sysno, rn doctype = form['doctype'] titlevalue = titlevalue.replace("\n", " ") authorvalue = authorvalue.replace("\n", "; ") # variables declaration categformat = parameters['categformatAPP'] otheraddresses = parameters['addressesAPP'] newrnpath = parameters['newrnin'] ## Get the name of the decision file: try: decision_filename = parameters['decision_file'] except KeyError: decision_filename = "" ## Get the name of the comments file: try: comments_filename = parameters['comments_file'] except KeyError: comments_filename = "" ## Now try to read the comments from the comments_filename: if comments_filename in (None, "", "NULL"): ## We don't have a name for the comments file. ## For backward compatibility reasons, try to read the comments from ## a file called 'COM' in curdir: if os.path.exists("%s/COM" % curdir): try: fh_comments = open("%s/COM" % curdir, "r") comment = fh_comments.read() fh_comments.close() except IOError: ## Unable to open the comments file exception_prefix = "Error in WebSubmit function " \ "Send_APP_Mail. Tried to open " \ "comments file [%s/COM] but was " \ "unable to." % curdir register_exception(prefix=exception_prefix) comment = "" else: comment = comment.strip() else: comment = "" else: ## Try to read the comments from the comments file: if os.path.exists("%s/%s" % (curdir, comments_filename)): try: fh_comments = open("%s/%s" % (curdir, comments_filename), "r") comment = fh_comments.read() fh_comments.close() except IOError: ## Oops, unable to open the comments file. comment = "" exception_prefix = "Error in WebSubmit function " \ "Send_APP_Mail. Tried to open comments " \ "file [%s/%s] but was unable to." \ % (curdir, comments_filename) register_exception(prefix=exception_prefix) else: comment = comment.strip() else: comment = "" ## Now try to read the decision from the decision_filename: if decision_filename in (None, "", "NULL"): ## We don't have a name for the decision file. ## For backward compatibility reasons, try to read the decision from ## a file called 'decision' in curdir: if os.path.exists("%s/decision" % curdir): try: fh_decision = open("%s/decision" % curdir, "r") decision = fh_decision.read() fh_decision.close() except IOError: ## Unable to open the decision file exception_prefix = "Error in WebSubmit function " \ "Send_APP_Mail. Tried to open " \ "decision file [%s/decision] but was " \ "unable to." % curdir register_exception(prefix=exception_prefix) decision = "" else: decision = decision.strip() else: decision = "" else: ## Try to read the decision from the decision file: try: fh_decision = open("%s/%s" % (curdir, decision_filename), "r") decision = fh_decision.read() fh_decision.close() except IOError: ## Oops, unable to open the decision file. decision = "" exception_prefix = "Error in WebSubmit function " \ "Send_APP_Mail. Tried to open decision " \ "file [%s/%s] but was unable to." \ % (curdir, decision_filename) register_exception(prefix=exception_prefix) else: decision = decision.strip() if os.path.exists("%s/%s" % (curdir, newrnpath)): fp = open("%s/%s" % (curdir, newrnpath), "r") newrn = fp.read() fp.close() else: newrn = "" # Document name res = run_sql("SELECT ldocname FROM sbmDOCTYPE WHERE sdocname=%s", (doctype, )) docname = res[0][0] # retrieve category categformat = categformat.replace("<CATEG>", "([^-]*)") m_categ_search = re.match(categformat, rn) if m_categ_search is not None: if len(m_categ_search.groups()) > 0: ## Found a match for the category of this document. Get it: category = m_categ_search.group(1) else: ## This document has no category. category = "unknown" else: category = "unknown" # Creation of the mail for the referee otheraddresses = otheraddresses.replace("<CATEG>", category) addresses = "" if otheraddresses != "": addresses += otheraddresses else: addresses = re.sub(",$", "", addresses) ## Add the record's submitter(s) into the list of recipients: # The submitters email address is read from the file specified by 'emailFile' try: fp = open("%s/%s" % (curdir, parameters['emailFile']), "r") addresses += fp.read().replace("\n", " ") fp.close() except: pass record_owners = print_record(sysno, 'tm', \ [CFG_WEBSUBMIT_RECORD_OWNER_EMAIL]).strip() if record_owners != "": record_owners_list = record_owners.split("\n") record_owners_list = [email.lower().strip() \ for email in record_owners_list] else: record_owners_list = [] record_owners = ",".join([owner for owner in record_owners_list]) if record_owners != "": addresses += ",%s" % record_owners if decision == "approve": mailtitle = "%s has been approved" % rn mailbody = "The submitted job listing with reference number %s has been fully approved." % ( rn, ) mailbody += "\n\nIt will soon become visible in the INSPIRE-HEP Jobs database - <%s/Jobs>" % ( CFG_SITE_URL, ) else: mailtitle = "%s has been rejected" % rn mailbody = "The %s %s has been rejected." % (docname, rn) if rn != newrn and decision == "approve" and newrn != "": mailbody += "\n\nIts new reference number is: %s" % newrn mailbody += "\n\nTitle: %s\n\nAuthor(s): %s\n\n" % (titlevalue, authorvalue) if comment != "": mailbody += "Comments from the referee:\n%s\n" % comment # Send mail to referee send_email(fromaddr=CFG_WEBSUBMIT_JOBS_FROMADDR, toaddr=addresses, subject=mailtitle, \ content=mailbody, footer=job_email_footer(), copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN) return ""
def Send_APP_Mail(parameters, curdir, form, user_info=None): """ This function send an email informing the original submitter of a document that the referee has approved/ rejected the document. The email is also sent to the referee for checking. Parameters: * addressesAPP: email addresses of the people who will receive this email (comma separated list). this parameter may contain the <CATEG> string. In which case the variable computed from the [categformatAFP] parameter replaces this string. eg.: "<CATEG>[email protected]" * categformatAPP contains a regular expression used to compute the category of the document given the reference of the document. eg.: if [categformatAFP]="TEST-<CATEG>-.*" and the reference of the document is "TEST-CATEGORY1-2001-001", then the computed category equals "CATEGORY1" * newrnin: Name of the file containing the 2nd reference of the approved document (if any). * edsrn: Name of the file containing the reference of the approved document. """ global titlevalue, authorvalue, sysno, rn FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME, CFG_SITE_SUPPORT_EMAIL) doctype = form['doctype'] titlevalue = titlevalue.replace("\n", " ") authorvalue = authorvalue.replace("\n", "; ") # variables declaration categformat = parameters['categformatAPP'] otheraddresses = parameters['addressesAPP'] newrnpath = parameters['newrnin'] ## Get the name of the decision file: try: decision_filename = parameters['decision_file'] except KeyError: decision_filename = "" ## Get the name of the comments file: try: comments_filename = parameters['comments_file'] except KeyError: comments_filename = "" ## Now try to read the comments from the comments_filename: if comments_filename in (None, "", "NULL"): ## We don't have a name for the comments file. ## For backward compatibility reasons, try to read the comments from ## a file called 'COM' in curdir: if os.path.exists("%s/COM" % curdir): try: fh_comments = open("%s/COM" % curdir, "r") comment = fh_comments.read() fh_comments.close() except IOError: ## Unable to open the comments file exception_prefix = "Error in WebSubmit function " \ "Send_APP_Mail. Tried to open " \ "comments file [%s/COM] but was " \ "unable to." % curdir register_exception(prefix=exception_prefix) comment = "" else: comment = comment.strip() else: comment = "" else: ## Try to read the comments from the comments file: if os.path.exists("%s/%s" % (curdir, comments_filename)): try: fh_comments = open("%s/%s" % (curdir, comments_filename), "r") comment = fh_comments.read() fh_comments.close() except IOError: ## Oops, unable to open the comments file. comment = "" exception_prefix = "Error in WebSubmit function " \ "Send_APP_Mail. Tried to open comments " \ "file [%s/%s] but was unable to." \ % (curdir, comments_filename) register_exception(prefix=exception_prefix) else: comment = comment.strip() else: comment = "" ## Now try to read the decision from the decision_filename: if decision_filename in (None, "", "NULL"): ## We don't have a name for the decision file. ## For backward compatibility reasons, try to read the decision from ## a file called 'decision' in curdir: if os.path.exists("%s/decision" % curdir): try: fh_decision = open("%s/decision" % curdir, "r") decision = fh_decision.read() fh_decision.close() except IOError: ## Unable to open the decision file exception_prefix = "Error in WebSubmit function " \ "Send_APP_Mail. Tried to open " \ "decision file [%s/decision] but was " \ "unable to." % curdir register_exception(prefix=exception_prefix) decision = "" else: decision = decision.strip() else: decision = "" else: ## Try to read the decision from the decision file: try: fh_decision = open("%s/%s" % (curdir, decision_filename), "r") decision = fh_decision.read() fh_decision.close() except IOError: ## Oops, unable to open the decision file. decision = "" exception_prefix = "Error in WebSubmit function " \ "Send_APP_Mail. Tried to open decision " \ "file [%s/%s] but was unable to." \ % (curdir, decision_filename) register_exception(prefix=exception_prefix) else: decision = decision.strip() if os.path.exists("%s/%s" % (curdir, newrnpath)): fp = open("%s/%s" % (curdir, newrnpath), "r") newrn = fp.read() fp.close() else: newrn = "" # Document name res = run_sql("SELECT ldocname FROM sbmDOCTYPE WHERE sdocname=%s", (doctype, )) docname = res[0][0] # retrieve category categformat = categformat.replace("<CATEG>", "([^-]*)") m_categ_search = re.match(categformat, rn) if m_categ_search is not None: if len(m_categ_search.groups()) > 0: ## Found a match for the category of this document. Get it: category = m_categ_search.group(1) else: ## This document has no category. category = "unknown" else: category = "unknown" ## Get the referee email address: if CFG_CERN_SITE: ## The referees system in CERN now works with listbox membership. ## List names should take the format ## "*****@*****.**" ## Make sure that your list exists! ## FIXME - to be replaced by a mailing alias in webaccess in the ## future. referee_listname = "service-cds-referee-%s" % doctype.lower() if category != "": referee_listname += "-%s" % category.lower() referee_listname += "@cern.ch" addresses = referee_listname else: # Build referee's email address refereeaddress = "" # Try to retrieve the referee's email from the referee's database for user in acc_get_role_users( acc_get_role_id("referee_%s_%s" % (doctype, category))): refereeaddress += user[1] + "," # And if there is a general referee for user in acc_get_role_users( acc_get_role_id("referee_%s_*" % doctype)): refereeaddress += user[1] + "," refereeaddress = re.sub(",$", "", refereeaddress) # Creation of the mail for the referee otheraddresses = otheraddresses.replace("<CATEG>", category) addresses = "" if refereeaddress != "": addresses = refereeaddress + "," if otheraddresses != "": addresses += otheraddresses else: addresses = re.sub(",$", "", addresses) ## Add the record's submitter(s) into the list of recipients: ## Get the email address(es) of the record submitter(s)/owner(s) from ## the record itself: record_owners = print_record(sysno, 'tm', \ [CFG_WEBSUBMIT_RECORD_OWNER_EMAIL]).strip() if record_owners != "": record_owners_list = record_owners.split("\n") record_owners_list = [email.lower().strip() \ for email in record_owners_list] else: record_owners_list = [] record_owners = ",".join([owner for owner in record_owners_list]) if record_owners != "": addresses += ",%s" % record_owners if decision == "approve": mailtitle = "%s has been approved" % rn mailbody = "The %s %s has been approved." % (docname, rn) mailbody += "\nIt will soon be accessible here:\n\n<%s/%s/%s>" % ( CFG_SITE_URL, CFG_SITE_RECORD, sysno) else: mailtitle = "%s has been rejected" % rn mailbody = "The %s %s has been rejected." % (docname, rn) if rn != newrn and decision == "approve" and newrn != "": mailbody += "\n\nIts new reference number is: %s" % newrn mailbody += "\n\nTitle: %s\n\nAuthor(s): %s\n\n" % (titlevalue, authorvalue) if comment != "": mailbody += "Comments from the referee:\n%s\n" % comment # Send mail to referee if any recipients or copy to admin if addresses or CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN: send_email(FROMADDR, addresses, mailtitle, mailbody, copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN) return ""
def iterate_over_old(list, fmt): "Iterate over list of IDs" n_rec = 0 n_max = 10000 xml_content = '' # hold the contents tbibformat = 0 # time taken up by external call tbibupload = 0 # time taken up by external call total_rec = 0 # Number of formatted records for record in list: n_rec = n_rec + 1 total_rec = total_rec + 1 message = "Processing record: %d" % (record) write_message(message, verbose=9) query = "id=%d&of=xm" % (record) count = 0 contents = print_record(record, 'xm') while (contents == "") and (count < 10): contents = print_record(record, 'xm') count = count + 1 time.sleep(10) if count == 10: sys.stderr.write( "Failed to download %s from %s after 10 attempts... terminating" % (query, CFG_SITE_URL)) sys.exit(0) xml_content = xml_content + contents if xml_content: if n_rec >= n_max: finalfilename = "%s/rec_fmt_%s.xml" % ( CFG_TMPDIR, time.strftime('%Y%m%d_%H%M%S')) filename = "%s/bibreformat.xml" % CFG_TMPDIR filehandle = open(filename, "w") filehandle.write(xml_content) filehandle.close() ### bibformat external call ### task_sleep_now_if_required(can_stop_too=True) t11 = os.times()[4] message = "START bibformat external call" write_message(message, verbose=9) command = "%s/bibformat otype='%s' < %s/bibreformat.xml > %s 2> %s/bibreformat.err" % ( CFG_BINDIR, fmt.upper(), CFG_TMPDIR, finalfilename, CFG_TMPDIR) os.system(command) t22 = os.times()[4] message = "END bibformat external call (time elapsed:%2f)" % ( t22 - t11) write_message(message, verbose=9) task_sleep_now_if_required(can_stop_too=True) tbibformat = tbibformat + (t22 - t11) ### bibupload external call ### t11 = os.times()[4] message = "START bibupload external call" write_message(message, verbose=9) task_id = task_low_level_submission('bibupload', 'bibreformat', '-f', finalfilename) write_message("Task #%s submitted" % task_id) t22 = os.times()[4] message = "END bibupload external call (time elapsed:%2f)" % ( t22 - t11) write_message(message, verbose=9) tbibupload = tbibupload + (t22 - t11) n_rec = 0 xml_content = '' ### Process the last re-formated chunk ### if n_rec > 0: write_message("Processing last record set (%d)" % n_rec, verbose=9) finalfilename = "%s/rec_fmt_%s.xml" % (CFG_TMPDIR, time.strftime('%Y%m%d_%H%M%S')) filename = "%s/bibreformat.xml" % CFG_TMPDIR filehandle = open(filename, "w") filehandle.write(xml_content) filehandle.close() ### bibformat external call ### t11 = os.times()[4] message = "START bibformat external call" write_message(message, verbose=9) command = "%s/bibformat otype='%s' < %s/bibreformat.xml > %s 2> %s/bibreformat.err" % ( CFG_BINDIR, fmt.upper(), CFG_TMPDIR, finalfilename, CFG_TMPDIR) os.system(command) t22 = os.times()[4] message = "END bibformat external call (time elapsed:%2f)" % (t22 - t11) write_message(message, verbose=9) tbibformat = tbibformat + (t22 - t11) ### bibupload external call ### t11 = os.times()[4] message = "START bibupload external call" write_message(message, verbose=9) task_id = task_low_level_submission('bibupload', 'bibreformat', '-f', finalfilename) write_message("Task #%s submitted" % task_id) t22 = os.times()[4] message = "END bibupload external call (time elapsed:%2f)" % (t22 - t11) write_message(message, verbose=9) tbibupload = tbibupload + (t22 - t11) return (total_rec, tbibformat, tbibupload)
def Mail_Approval_Request_to_Referee(parameters, curdir, form, user_info=None): """ This function sends an email to the referee of a document informing him/her that a request for its approval has been submitted by the user. @param categ_file_appreq: (string) - some document types are separated into different categories, each of which has its own referee(s). In such document types, it's necessary to know the document- type's category in order to choose the referee. This parameter provides a means by which the category information can be extracted from a file in the current submission's working directory. It should therefore be a filename. @param categ_rnseek_appreq: (string) - some document types are separated into different categories, each of which has its own referee(s). In such document types, it's necessary to know the document- type's category in order to choose the referee. This parameter provides a means by which the category information can be extracted from the document's reference number. It is infact a string that will be compiled into a regexp and an attempt will be made to match it agains the document's reference number starting from the left-most position. The only pre-requisite is that the segment in which the category is sought should be indicated with <CATEGORY>. Thus, an example might be as follows: ATL(-COM)?-<CATEGORY>-.+ This would allow "PHYS" in the following reference number to be recognised as the category: ATL-COM-PHYS-2008-001 @param edsrn: (string) - the name of the field in which the report number should be placed when the referee visits the form for making a decision. @return: (string) - empty string. """ ## Get the reference number (as global rn - sorry!) and the document type: global sysno, rn doctype = form['doctype'] ######## ## Get the parameters from the list: ######## ## Get the name of the report-number file: ######## try: edsrn_file = parameters["edsrn"] except KeyError: ## No value given for the edsrn file: msg = "Error in Mail_Approval_Request_to_Referee function: unable " \ "to determine the name of the file in which the document's " \ "report number should be stored." raise InvenioWebSubmitFunctionError(msg) else: edsrn_file = str(edsrn_file) edsrn_file = os.path.basename(edsrn_file).strip() if edsrn_file == "": msg = "Error in Mail_Approval_Request_to_Referee function: " \ "unable to determine the name of the file in which " \ "the document's report number should be stored." raise InvenioWebSubmitFunctionError(msg) ######## ## Get the name of the category file: ####### try: ## If it has been provided, get the name of the file in which the ## category is stored: category_file = parameters["categ_file_appreq"] except KeyError: ## No value given for the category file: category_file = None else: if category_file is not None: category_file = str(category_file) category_file = os.path.basename(category_file).strip() if category_file == "": category_file = None ######## ## Get the regexp that is used to find the category in the report number: ######## try: ## If it has been provided, get the regexp used for identifying ## a document-type's category from its reference number: category_rn_regexp = parameters["categ_rnseek_appreq"] except KeyError: ## No value given for the category regexp: category_rn_regexp = None else: if category_rn_regexp is not None: category_rn_regexp = str(category_rn_regexp).strip() if category_rn_regexp == "": category_rn_regexp = None ####### ## Resolve the document type's category: ## ## This is a long process. The end result is that the category is extracted ## either from a file in curdir, or from the report number. ## If it's taken from the report number, the admin must configure the ## function to accept a regular expression that is used to find the ## category in the report number. ## if category_file is not None and category_rn_regexp is not None: ## It is not valid to have both a category file and a pattern ## describing how to extract the category from a report number. ## raise an InvenioWebSubmitFunctionError msg = "Error in Register_Approval_Request function: received " \ "instructions to search for the document's category in " \ "both its report number AND in a category file. Could " \ "not determine which to use - please notify the " \ "administrator." raise InvenioWebSubmitFunctionError(msg) elif category_file is not None: ## Attempt to recover the category information from a file in the ## current submission's working directory: category = ParamFromFile("%s/%s" % (curdir, category_file)) if category is not None: category = category.strip() if category in (None, ""): ## The category cannot be resolved. msg = "Error in Register_Approval_Request function: received " \ "instructions to search for the document's category in " \ "a category file, but could not recover the category " \ "from that file. An approval request therefore cannot " \ "be registered for the document." raise InvenioWebSubmitFunctionError(msg) elif category_rn_regexp is not None: ## Attempt to recover the category information from the document's ## reference number using the regexp in category_rn_regexp: ## ## Does the category regexp contain the key-phrase "<CATEG>"? if category_rn_regexp.find("<CATEG>") != -1: ## Yes. Replace "<CATEG>" with "(?P<category>.+?)". ## For example, this: ## ATL(-COM)?-<CATEG>- ## Will be transformed into this: ## ATL(-COM)?-(?P<category>.+?)- category_rn_final_regexp = \ category_rn_regexp.replace("<CATEG>", r"(?P<category>.+?)", 1) else: ## The regexp for category didn't contain "<CATEG>", but this is ## mandatory. msg = "Error in Register_Approval_Request function: The " \ "[%(doctype)s] submission has been configured to search " \ "for the document type's category in its reference number, " \ "using a poorly formed search expression (no marker for " \ "the category was present.) Since the document's category " \ "therefore cannot be retrieved, an approval request cannot " \ "be registered for it. Please report this problem to the " \ "administrator." \ % { 'doctype' : doctype, } raise InvenioWebSubmitFunctionError(msg) ## try: ## Attempt to compile the regexp for finding the category: re_categ_from_rn = re.compile(category_rn_final_regexp) except sre_constants.error: ## The expression passed to this function could not be compiled ## into a regexp. Register this exception and raise an ## InvenioWebSubmitFunctionError: exception_prefix = "Error in Register_Approval_Request function: " \ "The [%(doctype)s] submission has been " \ "configured to search for the document type's " \ "category in its reference number, using the " \ "following regexp: /%(regexp)s/. This regexp, " \ "however, could not be compiled correctly " \ "(created it from %(categ-search-term)s.)" \ % { 'doctype' : doctype, \ 'regexp' : category_rn_final_regexp, \ 'categ-search-term' : category_rn_regexp, } register_exception(prefix=exception_prefix) msg = "Error in Register_Approval_Request function: The " \ "[%(doctype)s] submission has been configured to search " \ "for the document type's category in its reference number, " \ "using a poorly formed search expression. Since the " \ "document's category therefore cannot be retrieved, an " \ "approval request cannot be registered for it. Please " \ "report this problem to the administrator." \ % { 'doctype' : doctype, } raise InvenioWebSubmitFunctionError(msg) else: ## Now attempt to recover the category from the RN string: m_categ_from_rn = re_categ_from_rn.match(rn) if m_categ_from_rn is not None: ## The pattern matched in the string. ## Extract the category from the match: try: category = m_categ_from_rn.group("category") except IndexError: ## There was no "category" group. That group is mandatory. exception_prefix = \ "Error in Register_Approval_Request function: The " \ "[%(doctype)s] submission has been configured to " \ "search for the document type's category in its " \ "reference number using the following regexp: " \ "/%(regexp)s/. The search produced a match, but " \ "there was no \"category\" group in the match " \ "object although this group is mandatory. The " \ "regexp was compiled from the following string: " \ "[%(categ-search-term)s]." \ % { 'doctype' : doctype, \ 'regexp' : category_rn_final_regexp, \ 'categ-search-term' : category_rn_regexp, } register_exception(prefix=exception_prefix) msg = "Error in Register_Approval_Request function: The " \ "[%(doctype)s] submission has been configured to " \ "search for the document type's category in its " \ "reference number, using a poorly formed search " \ "expression (there was no category marker). Since " \ "the document's category therefore cannot be " \ "retrieved, an approval request cannot be " \ "registered for it. Please report this problem to " \ "the administrator." \ % { 'doctype' : doctype, } raise InvenioWebSubmitFunctionError(msg) else: category = category.strip() if category == "": msg = "Error in Register_Approval_Request function: " \ "The [%(doctype)s] submission has been " \ "configured to search for the document type's " \ "category in its reference number, but no " \ "category was found. The request for approval " \ "cannot be registered. Please report this " \ "problem to the administrator." \ % { 'doctype' : doctype, } raise InvenioWebSubmitFunctionError(msg) else: ## No match. Cannot find the category and therefore cannot ## continue: msg = "Error in Register_Approval_Request function: The " \ "[%(doctype)s] submission has been configured to " \ "search for the document type's category in its " \ "reference number, but no match was made. The request " \ "for approval cannot be registered. Please report " \ "this problem to the administrator." \ % { 'doctype' : doctype, } raise InvenioWebSubmitFunctionError(msg) else: ## The document type has no category. category = "" ## ## End of category recovery ####### ####### ## Get the title and author(s) from the record: ####### ## Author(s): rec_authors = "" rec_first_author = print_record(int(sysno), 'tm', "100__a") rec_other_authors = print_record(int(sysno), 'tm', "700__a") if rec_first_author != "": rec_authors += "".join(["%s\n" % author.strip() for \ author in rec_first_author.split("\n")]) if rec_other_authors != "": rec_authors += "".join(["%s\n" % author.strip() for \ author in rec_other_authors.split("\n")]) ## Title: rec_title = "".join(["%s\n" % title.strip() for title in \ print_record(int(sysno), 'tm', "245__a").split("\n")]) ## ####### ## the normal approval action approve_act = 'APP' ## Get notes about the approval request: approval_notes = get_approval_request_notes(doctype, rn) ## Get the referee email address: if CFG_CERN_SITE: ## The referees system in CERN now works with listbox membership. ## List names should take the format ## "*****@*****.**" ## Make sure that your list exists! ## FIXME - to be replaced by a mailing alias in webaccess in the ## future. if doctype == 'ATN': ## Special case of 'RPR' action for doctype ATN action = ParamFromFile("%s/%s" % (curdir,'act')).strip() if action == 'RPR': notetype = ParamFromFile("%s/%s" % (curdir,'ATN_NOTETYPE')).strip() if notetype not in ('SLIDE','PROC'): raise InvenioWebSubmitFunctionError('ERROR function Mail_Approval_Request_to_Referee:: do not recognize notetype ' + notetype) if notetype == 'PROC': approve_act = 'APR' # RPR PROC requires APR action to approve referee_listname = "*****@*****.**" elif notetype == 'SLIDE': ## SLIDES approval approve_act = 'APS' # RPR SLIDE requires APS action to approve referee_listname = "*****@*****.**" else: raise InvenioWebSubmitFunctionError('ERROR function Mail_Approval_Request_to_Referee:: do not understand notetype: ' +notetype) else: referee_listname = "service-cds-referee-%s" % doctype.lower() if category != "": referee_listname += "-%s" % category.lower() mailto_addresses = referee_listname + "@cern.ch" if category == 'CDSTEST': referee_listname = "service-cds-referee-%s" % doctype.lower() referee_listname += "-%s" % category.lower() mailto_addresses = referee_listname + "@cern.ch" else: referee_address = "" ## Try to retrieve the referee's email from the referee's database: for user in \ acc_get_role_users(acc_get_role_id("referee_%s_%s" \ % (doctype, category))): referee_address += user[1] + "," ## And if there are general referees: for user in \ acc_get_role_users(acc_get_role_id("referee_%s_*" % doctype)): referee_address += user[1] + "," referee_address = re.sub(",$", "", referee_address) # Creation of the mail for the referee mailto_addresses = "" if referee_address != "": mailto_addresses = referee_address + "," else: mailto_addresses = re.sub(",$", "", mailto_addresses) ## ## Send the email: mail_subj = "Request for approval of [%s]" % rn mail_body = CFG_MAIL_BODY % \ { 'site-name' : CFG_SITE_NAME, 'report-number-fieldname' : edsrn_file, 'report-number' : rn, 'title' : rec_title, 'authors' : rec_authors, 'site-url' : CFG_SITE_URL, 'record-id' : sysno, 'approval-action' : approve_act, 'doctype' : doctype, 'notes' : approval_notes, 'category' : category, } send_email(CFG_SITE_SUPPORT_EMAIL, mailto_addresses, mail_subj, mail_body, copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN) ## return ""
["E40","K.Miwa","Tohoku U","Measurement of the cross sections of Σp scatterings"]] search = "371__u:/a/ or 371__u:/e/ or 371__u:/i/ or 371__u:/o/ or 371__u:/u/" #x = perform_request_search(p=search,cc='HepNames') #x = x[:5] #print len(x) fileName = 'tmp_junk.out' output = open(fileName,'w') if True: recid = 355574 output.write(print_record(recid, ot=['001','700'],format='xm')) if False: result = [1317852, 1319472, 1324458, 1325164, 1326367, 1327466, 1328450, 1328598, 1328943, 1333186, 1333470] for r in result: #print r print print_record(r,ot=['001','980'],format='xm') print \ '''<datafield tag="980" ind1=" " ind2=" "> <subfield code="a">HEP</subfield> </datafield> ''' if False: all_refs = []
def format_element(bfo, reference_prefix, reference_suffix): """ Prints the references of this record @param reference_prefix: a prefix displayed before each reference @param reference_suffix: a suffix displayed after each reference """ from invenio.config import CFG_BASE_URL, CFG_ADS_SITE from invenio.search_engine import get_mysql_recid_from_aleph_sysno, \ print_record if CFG_ADS_SITE: ## FIXME: store external sysno into 999 $e, not into 999 $r # do not escape field values for now because of things like A&A in # 999 $r that are going to be resolved further down: references = bfo.fields("999C5", escape=0) else: references = bfo.fields("999C5", escape=1) out = "" for reference in references: ref_out = '' if reference.has_key('o'): if out != "": ref_out = '</li>' ref_out += "<li><small>"+ reference['o']+ "</small> " if reference.has_key('m'): ref_out += "<small>"+ reference['m']+ "</small> " if reference.has_key('r'): if CFG_ADS_SITE: # 999 $r contains external sysno to be resolved: recid_to_display = get_mysql_recid_from_aleph_sysno(reference['r']) if recid_to_display: ref_out += print_record(recid_to_display, 'hs') else: ref_out += '<small>' + reference['r'] + ' (not in ADS)</small>' else: ref_out += '<small> [<a href="'+CFG_BASE_URL+'/search?f=reportnumber&p='+ \ reference['r']+ \ '&ln=' + bfo.lang + \ '">'+ reference['r']+ "</a>] </small> <br />" if reference.has_key('t'): ejournal = bfo.kb("ejournals", reference.get('t', "")) if ejournal != "": ref_out += ' <small> <a href="https://cds.cern.ch/ejournals.py?publication='\ + reference['t'].replace(" ", "+") \ +"&volume="+reference.get('v', "")+"&year="+\ reference.get('y', "")+"&page="+\ reference.get('p',"").split("-")[0]+'">' ref_out += reference['t']+": "+reference.get('v', "")+\ " ("+reference.get('y', "")+") " ref_out += reference.get('p', "")+"</a> </small> <br />" else: ref_out += " <small> "+reference['t']+ reference.get('v', "")+\ reference.get('y',"")+ reference.get('p',"")+ \ " </small> <br />" if reference_prefix is not None and ref_out != '': ref_out = reference_prefix + ref_out if reference_suffix is not None and ref_out != '': ref_out += reference_suffix out += ref_out if out != '': out += '</li>' return out
def User_is_Record_Owner_or_Curator(parameters, curdir, form, user_info=None): """ Check that user is either the original submitter, or that it has been granted access to carry out the action via Webaccess. This enables collaborative editing of records, so that collections can be curated by a group of people in addition to the original submitter. If the user has permission, the function ends silently. If not, it will raise an InvenioWebSubmitFunctionStop, informing the user that they don't have rights and sending them back to the submission web form. Note that the original author must also be authorized by WebAccess in order to modify the record. WARNING: you have to understand that wherever you use this function, any user authorized via WebAccess for this action will be able to modify any records that can go through this workflow. For eg. when using this function in a DEMOPIC submission, in a 'MBI' action, it is enough that a user is connected to the 'submit' action with the 'DEMOPIC/MBI' parameters to modify any record. @parameters: None. @return: Empty string. @Exceptions raised: InvenioWebSubmitFunctionStop when user is denied permission to work with the record. """ global sysno ## Get the document type and action from the form. They can be used to ## ask webaccess whether the user is a super-user for this doctype/action. doctype = form['doctype'] act = form['act'] ## Get the current user's e-mail address: user_email = user_info["email"].lower() ## Now get the email address(es) of the record submitter(s)/owner(s) from ## the record itself: record_owners = print_record(sysno, 'tm', \ [CFG_WEBSUBMIT_RECORD_OWNER_EMAIL]).strip() if record_owners != "": record_owners_list = record_owners.split("\n") record_owners_list = [email.lower().strip() \ for email in record_owners_list] else: record_owners_list = [] ## Now determine whether this user is listed in the record as an "owner" ## (or submitter): user_has_permission = False user_msg = "" if user_email not in ("", "guest") and user_email in record_owners_list: ## This user's email address is listed in the record. She should ## be allowed to work with it: user_has_permission = True if not user_has_permission: ## The user isn't listed in the record. ## Using WebAccess, test if she is a "curator" for this submission: (auth_code, dummy) = acc_authorize_action(user_info, \ "submit", \ verbose=0, \ doctype=doctype, \ act=act) if auth_code == 0: ## The user is a curator for this submission/collection. Do not ## prevent access. user_has_permission = True ## Finally, if the user still doesn't have permission to work with this ## record, raise an InvenioWebSubmitFunctionStop exception sending the ## user back to the form. if not user_has_permission: raise InvenioWebSubmitFunctionStop(CFG_MSG_USER_NOT_AUTHORIZED) return ""
def format_element(bfo, reference_prefix, reference_suffix): """ Prints the references of this record @param reference_prefix: a prefix displayed before each reference @param reference_suffix: a suffix displayed after each reference """ from invenio.config import CFG_SITE_URL, CFG_ADS_SITE from invenio.search_engine import get_mysql_recid_from_aleph_sysno, \ print_record if CFG_ADS_SITE: ## FIXME: store external sysno into 999 $e, not into 999 $r # do not escape field values for now because of things like A&A in # 999 $r that are going to be resolved further down: references = bfo.fields("999C5", escape=0) else: references = bfo.fields("999C5", escape=1) out = "" for reference in references: ref_out = '' if reference.has_key('o'): if out != "": ref_out = '</li>' ref_out += "<li><small>" + reference['o'] + "</small> " if reference.has_key('m'): ref_out += "<small>" + reference['m'] + "</small> " if reference.has_key('r'): if CFG_ADS_SITE: # 999 $r contains external sysno to be resolved: recid_to_display = get_mysql_recid_from_aleph_sysno( reference['r']) if recid_to_display: ref_out += print_record(recid_to_display, 'hs') else: ref_out += '<small>' + reference[ 'r'] + ' (not in ADS)</small>' else: ref_out += '<small> [<a href="'+CFG_SITE_URL+'/search?f=reportnumber&p='+ \ reference['r']+ \ '&ln=' + bfo.lang + \ '">'+ reference['r']+ "</a>] </small> <br />" if reference.has_key('t'): ejournal = bfo.kb("ejournals", reference.get('t', "")) if ejournal != "": ref_out += ' <small> <a href="https://cdsweb.cern.ch/ejournals.py?publication='\ + reference['t'].replace(" ", "+") \ +"&volume="+reference.get('v', "")+"&year="+\ reference.get('y', "")+"&page="+\ reference.get('p',"").split("-")[0]+'">' ref_out += reference['t']+": "+reference.get('v', "")+\ " ("+reference.get('y', "")+") " ref_out += reference.get('p', "") + "</a> </small> <br />" else: ref_out += " <small> "+reference['t']+ reference.get('v', "")+\ reference.get('y',"")+ reference.get('p',"")+ \ " </small> <br />" if reference_prefix is not None and ref_out != '': ref_out = reference_prefix + ref_out if reference_suffix is not None and ref_out != '': ref_out += reference_suffix out += ref_out if out != '': out += '</li>' return out
def User_is_Record_Owner_or_Curator(parameters, curdir, form, user_info=None): """ In certain actions of some WebSubmit submissions, it may be not be desirable to allow all users to have a carte blanche to modify records in a collection as they see fit. For example, we may say that a user is allowed to modify a document ONLY if they are listed in the record as an "owner"/"submitter", or if listed in WebAccess as a "curator" for the given document type collection. This function therefore checks whether the user has the rights to carry out a given action on a document type either by being listed in the record or explicitly via WebAccess. If the user has permission, the function ends silently. If not, it will raise an InvenioWebSubmitFunctionStop, informing the user that they don't have rights and sending them back to the submission web form. @parameters: None. @return: Empty string. @Exceptions raised: InvenioWebSubmitFunctionStop when user is denied permission to work with the record. """ global sysno ## Get the document type and action from the form. They can be used to ## ask webaccess whether the user is a super-user for this doctype/action. doctype = form['doctype'] act = form['act'] ## Get the current user's e-mail address: user_email = user_info["email"].lower() ## Now get the email address(es) of the record submitter(s)/owner(s) from ## the record itself: record_owners = print_record(sysno, 'tm', \ [CFG_WEBSUBMIT_RECORD_OWNER_EMAIL]).strip() if record_owners != "": record_owners_list = record_owners.split("\n") record_owners_list = [email.lower().strip() \ for email in record_owners_list] else: record_owners_list = [] ## Now determine whether this user is listed in the record as an "owner" ## (or submitter): user_has_permission = False user_msg = "" if user_email not in ("", "guest") and user_email in record_owners_list: ## This user's email address is listed in the record. She should ## be allowed to work with it: user_has_permission = True if not user_has_permission: ## The user isn't listed in the record. ## Using WebAccess, test if she is a "curator" for this submission: (auth_code, dummy) = acc_authorize_action(user_info, \ "submit", \ verbose=0, \ doctype=doctype, \ act=act) if auth_code == 0: ## The user is a curator for this submission/collection. Do not ## prevent access. user_has_permission = True ## Finally, if the user still doesn't have permission to work with this ## record, raise an InvenioWebSubmitFunctionStop exception sending the ## user back to the form. if not user_has_permission: raise InvenioWebSubmitFunctionStop(CFG_MSG_USER_NOT_AUTHORIZED) return ""
def iterate_over_old(list, fmt): "Iterate over list of IDs" n_rec = 0 n_max = 10000 xml_content = '' # hold the contents tbibformat = 0 # time taken up by external call tbibupload = 0 # time taken up by external call total_rec = 0 # Number of formatted records for record in list: n_rec = n_rec + 1 total_rec = total_rec + 1 message = "Processing record: %d" % (record) write_message(message, verbose=9) query = "id=%d&of=xm" % (record) count = 0 contents = print_record(record, 'xm') while (contents == "") and (count < 10): contents = print_record(record, 'xm') count = count + 1 time.sleep(10) if count == 10: sys.stderr.write("Failed to download %s from %s after 10 attempts... terminating" % (query, CFG_SITE_URL)) sys.exit(0) xml_content = xml_content + contents if xml_content: if n_rec >= n_max: finalfilename = "%s/rec_fmt_%s.xml" % (CFG_TMPDIR, time.strftime('%Y%m%d_%H%M%S')) filename = "%s/bibreformat.xml" % CFG_TMPDIR filehandle = open(filename ,"w") filehandle.write(xml_content) filehandle.close() ### bibformat external call ### task_sleep_now_if_required(can_stop_too=True) t11 = os.times()[4] message = "START bibformat external call" write_message(message, verbose=9) command = "%s/bibformat otype='%s' < %s/bibreformat.xml > %s 2> %s/bibreformat.err" % (CFG_BINDIR, fmt.upper(), CFG_TMPDIR, finalfilename, CFG_TMPDIR) os.system(command) t22 = os.times()[4] message = "END bibformat external call (time elapsed:%2f)" % (t22-t11) write_message(message, verbose=9) task_sleep_now_if_required(can_stop_too=True) tbibformat = tbibformat + (t22 - t11) ### bibupload external call ### t11 = os.times()[4] message = "START bibupload external call" write_message(message, verbose=9) task_id = task_low_level_submission('bibupload', 'bibreformat', '-f', finalfilename) write_message("Task #%s submitted" % task_id) t22 = os.times()[4] message = "END bibupload external call (time elapsed:%2f)" % (t22-t11) write_message(message, verbose=9) tbibupload = tbibupload + (t22- t11) n_rec = 0 xml_content = '' ### Process the last re-formated chunk ### if n_rec > 0: write_message("Processing last record set (%d)" % n_rec, verbose=9) finalfilename = "%s/rec_fmt_%s.xml" % (CFG_TMPDIR, time.strftime('%Y%m%d_%H%M%S')) filename = "%s/bibreformat.xml" % CFG_TMPDIR filehandle = open(filename ,"w") filehandle.write(xml_content) filehandle.close() ### bibformat external call ### t11 = os.times()[4] message = "START bibformat external call" write_message(message, verbose=9) command = "%s/bibformat otype='%s' < %s/bibreformat.xml > %s 2> %s/bibreformat.err" % (CFG_BINDIR, fmt.upper(), CFG_TMPDIR, finalfilename, CFG_TMPDIR) os.system(command) t22 = os.times()[4] message = "END bibformat external call (time elapsed:%2f)" % (t22 - t11) write_message(message, verbose=9) tbibformat = tbibformat + (t22 - t11) ### bibupload external call ### t11 = os.times()[4] message = "START bibupload external call" write_message(message, verbose=9) task_id = task_low_level_submission('bibupload', 'bibreformat', '-f', finalfilename) write_message("Task #%s submitted" % task_id) t22 = os.times()[4] message = "END bibupload external call (time elapsed:%2f)" % (t22 - t11) write_message(message, verbose=9) tbibupload = tbibupload + (t22 - t11) return (total_rec, tbibformat, tbibupload)
""" #!/usr/bin/python # -*- coding: UTF-8 -*- import re from invenio.search_engine import perform_request_search from invenio.search_engine import get_fieldvalues from invenio.search_engine import print_record date = raw_input('date: ') file = 'theory_pubs_' + date + '.doc' output = open(file, 'w') x = perform_request_search(p="find r fermilab pub t and de %s" % date) #if False: #x = [1416470] for r in x: olivia = print_record(r, format='htcv') olivia = re.sub(r'<br/>', '', olivia) olivia = re.sub(r'\s\s+', '', olivia) reports = get_fieldvalues(r, '037__a') pages = get_fieldvalues(r, '300__a') print '\n' + olivia output.write('\n\n' + olivia + '\n') for page in pages: print page + ' pp.' output.write(page + ' pp.\n') for report in reports: if re.search(r'FERMILAB', report): print report output.write(report) output.close()
def get_bibrecord(recid): """Return record in BibRecord wrapping.""" if record_exists(recid): return create_record(print_record(recid, "xm"))[0]
def main(search): if not search: search = "find r fermilab and dadd 2014" search_original = search x = intbitset(perform_request_search(p=search, cc='HEP')) print search, ':', len(x) fermilab = intbitset(perform_request_search(p="8564_y:fermilab*", cc='HEP')) fermilabtoday = intbitset(perform_request_search(p="8564_y:fermilabtoday", cc='HEP')) fermilabpub = intbitset(perform_request_search(p="8564_y:fermilabpub", cc='HEP')) fermilabthesis = intbitset(perform_request_search(p="8564_y:fermilabthesis", cc='HEP')) fermilabconf = intbitset(perform_request_search(p="8564_y:fermilabconf", cc='HEP')) fermilabtm = intbitset(perform_request_search(p="8564_y:fermilabtm", cc='HEP')) scoap = intbitset(perform_request_search(p="8564_y:'Article from SCOAP3'", cc='HEP')) oa = intbitset(perform_request_search(p="8564_z:postprint or 8564_z:openaccess", cc='HEP')) cms = intbitset(perform_request_search(p="find r fermilab and cn cms", cc='HEP')) ok = fermilab - fermilabtoday | fermilabpub | fermilabthesis | fermilabconf | fermilabtm | scoap | cms | oa print 'Total number of Fermilab links:', len(ok) x = x & ok print 'Intersection:', len(x), x fileName = 'osti.out' fileName2 = 'osti2.out' output = open(fileName, 'w') output.write("<harvest-site>\n") for r in x: output.write(print_record(r, format='xsti')) output.write("</harvest-site>\n") output.close() output2 = open(fileName2, 'w') #noUrl = False #arXiv_flag = False subj_category_flag = False subj_keywords_flag = False url_check_flag = True url_check_flag = False url_oa = False counter = 1 for i in open(fileName, 'r'): issue = None i = re.sub(r'(find_paper\.pl\?[\w\-]+)', r'\1.pdf', i) i = re.sub(r'pdf\.pdf', 'pdf', i) i = re.sub(r'shtml\.pdf', 'shtml', i) if re.search(r'accession_num', i): matchObj = re.match(r'.*<accession_num>(\d+)</accession_num>.*', i) if matchObj: doctype_flag = False accepted = False accession_num = matchObj.group(1) search = "find recid " + accession_num + " or irn " + accession_num + " and r fermilab" y = perform_request_search(p=search, cc='HEP') if len(y) == 1 : recid = y[0] if VERBOSE: print("{0} {1} {2}".format(counter, accession_num, recid)) counter += 1 url_oa = False try: doi = get_fieldvalues(recid, '0247_a')[0] search_oa = 'find recid ' + str(recid) + ' and exp cern-lhc-cms' if VERBOSE: print "doi =", doi print "search_oa =", search_oa if re.search(r'PhysRevSTAB', doi): url_oa = 'http://journals.aps.org/prstab/pdf/' + doi #elif perform_request_search(p=search_oa, cc='HEP'): # if re.search(r'PhysRevD', doi): # url_oa = 'http://journals.aps.org/prd/pdf/10.1103/' + doi # elif re.search(r'PhysRevLett', doi): # url_oa = 'http://journals.aps.org/prl/pdf/10.1103/' + doi # if VERBOSE: # print url_oa except: pass try: accepted = get_fieldvalues(recid, '8564_3') urls = get_fieldvalues(recid, '8564_u') for url in urls: if re.search('scoap3-fulltext.pdf', url): url_oa = url accepted = True elif re.search(r'record/\d+/files/arXiv', url) and recid in cms and not url_oa: #This is to catch the CMS papers url_oa = url [url_oa, accepted] = get_url(recid) except: pass if url_oa: i += " <url>" + url_oa + "</url>\n" if accepted: i += " <journal_type>AM</journal_type>\n" else: i += " <journal_type>FT</journal_type>\n" authors = get_fieldvalues(recid, '700__a') if len(authors) > 9 : author = get_fieldvalues(recid, '100__a')[0] author = " <author>" + author + "; et al.</author>\n" i = i + author collaboration = get_fieldvalues(recid, '710__g') if collaboration: collaboration = cgi.escape(collaboration[0]) collaboration = " <contributor_organizations>" + collaboration + "</contributor_organizations>\n" i = i + collaboration #search = "001:" + str(recid) + " 8564_y:FERMILAB*" ##search = "001:" + str(recid) + " 037__9:arXiv" #z = perform_request_search(p=search, cc='HEP') #if len(z) < 1 : # noUrl = True # print 'No url for ', recid # break #if len(z) == 1 : arXiv_flag = True #else : arXiv_flag = False phd_date = get_fieldvalues(recid, '502__d') normal_date = get_fieldvalues(recid, '269__c') try: published_date = get_fieldvalues(recid, '260__c')[0] except IndexError: published_date = False if phd_date and not normal_date: phd_date = " <date>" + phd_date[0] + "</date>\n" i = i + phd_date elif published_date and not normal_date: published_date = " <date>" + published_date + "</date>\n" i = i + published_date if VERBOSE: print i #if arXiv_flag and re.search("<availability>http://arXiv.org", i) : # url = i # url = re.sub(r'availability', 'url', url) # url = re.sub(r'arXiv.org/abs', 'arXiv.org/pdf', url) # i = i + url # #noUrl = False elif re.search("<url>", i): if url_oa: i = '' if re.search("www.fnal.gov/pub/today", i): i = '' elif re.search("<url>.*fnal", i) : matchObj = re.match(r'.*<url>(.*fnal.*)</url>.*', i) if matchObj: url_to_check = matchObj.group(1) if re.search("shtml", url_to_check) : url_to_check = re.sub(r'.*fermilab\-(.*)\.shtml', r'http://lss.fnal.gov/cgi-bin/find_paper.pl?\1.pdf', url_to_check) if url_check_flag: if not checkURL(url_to_check) : error_message = "Something wrong with " + url_to_check print error_message break else : print "No problem with url: ", url_to_check i = " <url>" + url_to_check + "</url>\n" else : i = '' if re.search("<title>", i) : title = get_fieldvalues(recid, '245__a')[0] title = cgi.escape(title) i = " <title>" + title + "</title>\n" if re.search("<date>", i) : if re.search(">\d\d\d\d\-\d\d\-\d\d<", i) : i = re.sub(r'>(\d\d\d\d)\-(\d\d)\-(\d\d)<', r'>\2/\3/\1<', i) elif re.search(">\d\d\d\d\-\d\d<", i) : i = re.sub(r'>(\d\d\d\d)\-(\d\d)<', r'>\2/01/\1<', i) elif re.search(">\d\d\d\d<", i) : i = re.sub(r'>(\d\d\d\d)<', r'>01/01/\1<', i) else : print "Bad date: ", recid, i break abstract = get_fieldvalues(recid, '520__a') if abstract : abstract = abstract[0] abstract = cgi.escape(abstract) abstract = " <abstract>" + abstract + "</abstract>\n" i = i + abstract if re.search("<doctype>", i) : doctype_flag = True if re.search("<arXiv_eprint>", i) : if not doctype_flag: try: report = get_fieldvalues(recid, '037__z')[0] report = " <report_number>" + report + "</report_number>\n" i = i + report i = i + " <doctype>JA</doctype>\n" except: pass if re.search("journal", i) : i = re.sub(r'<journal_info>(.*[ \.])(\S+)\:(\S+)\,(\d+)</journal_info>', r'<journal_name>\1</journal_name>\n <journal_volume>\2</journal_volume>\n <journal_issue></journal_issue>', i) issue = get_fieldvalues(recid, '773__n') if issue : issue = issue[0] issue = "<journal_issue>" + str(issue) + "</journal_issue>" i = re.sub(r'<journal_issue></journal_issue>', issue, i) if re.search("<sponsor_org>", i) : i = re.sub(r'DOE Office of Science', r'USDOE Office of Science (SC), High Energy Physics (HEP) (SC-25)', i) if re.search("<subj_category>", i) : if subj_category_flag : i = '' subj_category_flag = True if re.search("<subj_keywords>", i) : if subj_keywords_flag : i = '' subj_keywords_flag = True if i: #print i output2.write(i) output2.close() print search_original