def generate_list_to_send(search): ''' Generate a list to send to MSNET. ''' filename = 'tmp_' + __file__ filename = re.sub('.py', '_send.txt', filename) output = open(filename,'w') recids_nomatch = find_recids_nomatch() print search result_m = perform_request_search(p=search, cc='HEP') print search, len(result_m) search = "035__9:msnet" result_i = perform_request_search(p=search, cc='HEP') search = "0247_2:doi" result_d = perform_request_search(p=search, cc='HEP') result = intbitset(result_m) & intbitset(result_d) - intbitset(result_i) result = result - intbitset(recids_nomatch) for recid in result: try: doi = get_fieldvalues(recid, '0247_a')[0] except IndexError: print 'Problem with:', recid, doi break output.write(str(recid) + ',' + doi + '\n') output.close() print filename
def main(): counter = 0 filename = 'ADS_eprints_missing_in_INSPIRE.csv' mismatch_filename = ''ADS_eprints_missing_in_INSPIRE_mismatch.csv' output = open(filename, 'w') mismatch_output = open(mismatch_filename, 'w') records = collections.defaultdict(dict) search = '0247_2:doi -037__9:arxiv' results = perform_request_search(p=search, cc='HEP') for r in results: doi = get_fieldvalues(r, '0247_a') if doi: records[r]['doi'] = doi eprints = [] eprint_search = perform_request_search(p='037__9:arxiv', cc='HEP') for e in eprint_search: eprint = get_eprint_id(e) if eprint: eprint = eprint.replace('arxiv:', '') eprints.append(eprint) tree = ET.parse(DOCUMENT) root = tree.getroot() for child in root: if counter < 10: if 'doi' and 'preprint_id' in child.attrib: found_eprint = check_doi(child.attrib, records, eprints) if found_eprint: if found_eprint[0] is True: counter+=1 output.write('%s,%s,%s\n' % (found_eprint[0], found_eprint[1], found_eprint[2])) else: mismatch_output.write('%s,%s,%s\n' % (found_eprint[0], found_eprint[1], found_eprint[2])) output.close() print counter
def calculate_index(author): '''Calculate the authors indexes.''' search = "find ea " + author result = perform_request_search(p=search, cc='HEP') if len(result) == 0: print author, 'has no citations.' return None citation_list = [] for recid in result: search = 'refersto:recid:' + str(recid) citation_list.append(len(perform_request_search(p=search, cc='HEP'))) citation_list.sort(reverse=True) total_citations = 0 h_index = False g_index = False for index, value in enumerate(citation_list, 1): total_citations += value #print '{0:3d} {1:6d} {2:6d} {3:6d}'.format(index, value, # total_citations, # index*index) if index > value and h_index == False: h_index = index - 1 if total_citations > index*index: g_index = index print '{0:20s} {1:7d} {2:7d}'.format(author, h_index, g_index)
def main(search): """This module returns a Google-like result showing the most highly cited papers from a given result.""" all_refs = [] if not search: search = 'standard model' search = '"dark matter"' search = 'qcd sum rules' print 'Your search is', search result = perform_request_search(p=search, cc='HEP') print 'The result is', len(result) for recid in result: try: search = 'citedby:recid:' + str(recid) refs = perform_request_search(p=search, cc='HEP') all_refs += refs except: print 'problem with', recid all_refs.sort() counted_all_refs = Counter(all_refs) sorted_count = sorted(counted_all_refs.items(), key=operator.itemgetter(1)) for recid_count, count in sorted_count[-10:]: url = 'http://inspirehep.net/record/' + str(recid_count) print count, url title = get_fieldvalues(recid_count, '245__a')[0] try: author = get_fieldvalues(recid_count, '710__g')[0] except: try: author = get_fieldvalues(recid_count, '100__a')[0] except: author = 'No Author' print ' ', author, ':', title
def eprints(): total = 0 fermilab = get_collection_reclist('Fermilab') print '{0:4s} {1:3s} {2:3s} {3:3s}'.format('Date', 'All', 'FNA', '%') date_range = ['1901', '1902', '1903'] date_range = range(1, 20) for yymm in date_range: yymm = str(yymm) if len(yymm) == 1: yymm = '0' + yymm search_f = '037__a:fermilab* 037__c:physics.acc-ph 037__a:"arXiv:' + \ yymm + '*"' search = '037__c:physics.acc-ph 037__a:"arXiv:' + yymm + '*"' x = perform_request_search(p=search, cc='HEP') search = '037__c:acc-phys 037__a:"acc-phys/' + yymm + '*"' y = perform_request_search(p=search, cc='HEP') x_f = intbitset(x) & fermilab y_f = intbitset(y) & fermilab length = len(x) + len(y) length_f = len(x_f) + len(y_f) try: ratio = float(length_f)/float(length)*100.0 except ZeroDivisionError: ratio = 0 print '{0:4s} {1:3d} {2:3d} {3:3f}'.format(yymm, length, length_f, ratio) total += length print "Total =", total
def _get_coauthors_fallback(personid, collabs): # python 2.4 does not supprt max() with key argument. # Please remove this function when python 2.6 is supported. def max_key(iterable, key): try: ret = iterable[0] except IndexError: return None for i in iterable[1:]: if key(i) > key(ret): ret = i return ret if collabs: query = 'exactauthor:"%s" and (%s)' % (personid, ' or '.join([('collaboration:"%s"' % x) for x in zip(*collabs)[0]])) exclude_recs = perform_request_search(rg=0, p=query) else: exclude_recs = [] recids = perform_request_search(rg=0, p='exactauthor:"%s"' % str(personid)) recids = list(set(recids) - set(exclude_recs)) a = format_records(recids, 'WAPAFF') a = [pickle.loads(p) for p in a.split('!---THEDELIMITER---!') if p] coauthors = {} for rec, affs in a: keys = affs.keys() for n in keys: try: coauthors[n].add(rec) except KeyError: coauthors[n] = set([rec]) coauthors = [(x, x, len(coauthors[x])) for x in coauthors if x.lower() != personid.lower()] return coauthors
def _get_hepnames_data_fallback(bibauthorid_data, person_id): ''' Returns hepnames data @param bibauthorid_data: dict with 'is_baid':bool, 'cid':canonicalID, 'pid':personid ''' cid = str(person_id) hepdict = {} if bibauthorid_data['cid']: cid = bibauthorid_data['cid'] hepRecord = perform_request_search(rg=0, cc='HepNames', p=cid)[:CFG_WEBAUTHORPROFILE_MAX_HEP_CHOICES] hepdict['cid'] = cid hepdict['pid'] = person_id if not hepRecord or len(hepRecord) > 1: #present choice dialog with alternatives? names_dict = get_person_names_dicts(person_id) dbnames = names_dict[0]['db_names_dict'].keys() query = ' or '.join(['"%s"' % str(n) for n in dbnames]) additional_records = perform_request_search(rg=0, cc='HepNames', p=query)[:CFG_WEBAUTHORPROFILE_MAX_HEP_CHOICES] hepRecord += additional_records hepdict['HaveHep'] = False hepdict['HaveChoices'] = bool(hepRecord) #limits possible choiches! hepdict['HepChoices'] = [(format_record(x, 'hb'), x) for x in hepRecord ] hepdict['heprecord'] = hepRecord hepdict['bd'] = bibauthorid_data else: #show the heprecord we just found. hepdict['HaveHep'] = True hepdict['HaveChoices'] = False hepdict['heprecord'] = format_record(hepRecord[0], 'hd') hepdict['bd'] = bibauthorid_data return hepdict
def find_records(): '''Looks for candidate records.''' search = "find fc g not fc m not fc t and tc p and jy " + str(YEAR) result_m = perform_request_search(p=search, cc='HEP') search = "035__9:msnet" result_i = perform_request_search(p=search, cc='HEP') result = intbitset(result_m) - intbitset(result_i) return result
def get_institution_ids(text): # HACK: I know... I am sorry for that. It's for a good cause # FIXME: use redis global INSTITUTION_CACHE if text not in INSTITUTION_CACHE: INSTITUTION_CACHE[text] = intbitset(perform_request_search(cc='Institutions', p='110__u:"%s"' % text)) or \ intbitset(perform_request_search(cc='Institutions', p='110__t:"%s"' % text)) return INSTITUTION_CACHE[text]
def goto(type, document='', number=0, lang='en', modif=0): today = time.strftime('%Y-%m-%d') if type == 'SSR': ## We would like a CERN Staff Rules and Regulations recids = perform_request_search(cc='Staff Rules and Regulations', f="925__a:1996-01-01->%s 925__b:%s->9999-99-99" % (today, today)) recid = recids[-1] reportnumber = get_fieldvalues(recid, '037__a')[0] edition = int(reportnumber[-2:]) ## e.g. CERN-STAFF-RULES-ED08 return BibRecDocs(recid).get_bibdoc(make_cern_ssr_docname(lang, edition, modif)).get_file('.pdf').get_url() elif type == "OPER-CIRC": recids = perform_request_search(cc="Operational Circulars", p="reportnumber=\"CERN-OPER-CIRC-%s-*\"" % number, sf="925__a") recid = recids[-1] documents = {} bibrecdocs = BibRecDocs(recid) for docname in bibrecdocs.get_bibdoc_names(): ldocname = docname.lower() if 'implementation' in ldocname: _register_document(documents, docname, 'implementation_en') elif 'application' in ldocname: _register_document(documents, docname, 'implementation_fr') elif 'archiving' in ldocname: _register_document(documents, docname, 'archiving_en') elif 'archivage' in ldocname: _register_document(documents, docname, 'archiving_fr') elif 'annexe' in ldocname or 'annexes_fr' in ldocname: _register_document(documents, docname, 'annex_fr') elif 'annexes_en' in ldocname or 'annex' in ldocname: _register_document(documents, docname, 'annex_en') elif '_en_' in ldocname or '_eng_' in ldocname or '_angl_' in ldocname: _register_document(documents, docname, 'en') elif '_fr_' in ldocname: _register_document(documents, docname, 'fr') return bibrecdocs.get_bibdoc(documents[document]).get_file('.pdf').get_url() elif type == 'ADMIN-CIRC': recids = perform_request_search(cc="Administrative Circulars", p="reportnumber=\"CERN-ADMIN-CIRC-%s-*\"" % number, sf="925__a") recid = recids[-1] documents = {} bibrecdocs = BibRecDocs(recid) for docname in bibrecdocs.get_bibdoc_names(): ldocname = docname.lower() if 'implementation' in ldocname: _register_document(documents, docname, 'implementation-en') elif 'application' in ldocname: _register_document(documents, docname, 'implementation-fr') elif 'archiving' in ldocname: _register_document(documents, docname, 'archiving-en') elif 'archivage' in ldocname: _register_document(documents, docname, 'archiving-fr') elif 'annexe' in ldocname or 'annexes_fr' in ldocname: _register_document(documents, docname, 'annex-fr') elif 'annexes_en' in ldocname or 'annex' in ldocname: _register_document(documents, docname, 'annex-en') elif '_en_' in ldocname or '_eng_' in ldocname or '_angl_' in ldocname: _register_document(documents, docname, 'en') elif '_fr_' in ldocname: _register_document(documents, docname, 'fr') return bibrecdocs.get_bibdoc(documents[document]).get_file('.pdf').get_url()
def test_fin_to_find_trans(self): """SPIRES search syntax - fin a ellis, j == find a ellis, j""" fin_search = "fin a ellis, j" fin_result = perform_request_search(p=fin_search) find_search = "find a ellis, j" find_result = perform_request_search(p=find_search) # We don't care if results are [], as long as they're the same # Uncovered corner case: parsing could be broken and also happen to # return [] twice. Unlikely though. self.assertEqual(fin_result, find_result)
def get_kbd_values(kbname, searchwith=""): """Return a list of values by searching a dynamic kb. @param kbname: name of the knowledge base @param searchwith: a term to search with """ from invenio import search_engine #first check that the kb in question is dynamic kbid = bibknowledge_dblayer.get_kb_id(kbname) if not kbid: return [] kbtype = bibknowledge_dblayer.get_kb_type(kbid) if not kbtype: return [] if kbtype != 'd': return [] #get the configuration so that we see what the field is confdict = bibknowledge_dblayer.get_kb_dyn_config(kbid) if not confdict: return [] if 'field' not in confdict: return [] field = confdict['field'] expression = confdict['expression'] collection = "" if 'collection' in confdict: collection = confdict['collection'] reclist = [] # return this if searchwith and expression: if (expression.count('%') > 0): expression = expression.replace("%", searchwith) reclist = search_engine.perform_request_search(p=expression, cc=collection) else: #no %.. just make a combination expression = expression + " and " + searchwith reclist = search_engine.perform_request_search(p=expression, cc=collection) else: # either no expr or no searchwith.. but never mind about searchwith if expression: # in this case: only expression reclist = search_engine.perform_request_search(p=expression, cc=collection) else: #make a fake expression so that only records that have this field #will be returned fake_exp = "/.*/" if searchwith: fake_exp = searchwith reclist = search_engine.perform_request_search(f=field, p=fake_exp, cc=collection) if reclist: return [val for (val, dummy) in \ search_engine.get_most_popular_field_values(reclist, field)] return [] # in case nothing worked
def test_irn_processing(self): """SPIRES search syntax - find irn 1360337 == find irn SPIRES-1360337""" # Added for trac-130 with_spires = "fin irn SPIRES-1360337" with_result = perform_request_search(p=with_spires) without_spires = "fin irn 1360337" without_result = perform_request_search(p=without_spires) # We don't care if results are [], as long as they're the same # Uncovered corner case: parsing could be broken and also happen to # return [] twice. Unlikely though. self.assertEqual(with_result, without_result)
def match(self, query=None, **kwargs): """Try to match the current record to the database.""" from invenio.search_engine import perform_request_search if not query: # We use default setup recid = self.record["001"][0][3] return perform_request_search(p="035:%s" % (recid,), of="id") else: if "recid" not in kwargs: kwargs["recid"] = self.record["001"][0][3] return perform_request_search(p=query % kwargs, of="id")
def job_stats(): grand_total = 0 print "{0:16s} {1:5s} {2:5s} {3:5s}".format('search', 'open', 'closed', 'total') for month in range(1,4): if month < 10: month = '0' + str(month) search = 'dadd:2019-' + str(month) x = perform_request_search(p=search, cc='Jobs') y = perform_request_search(p=search, cc='Jobs Hidden') total = len(x+y) grand_total += total print "{0:20s} {1:5d} {2:5d} {3:5d} {4:5d}".format(search, len(x), len(y), total, grand_total)
def get_reference_number(tarball): ''' Attempts to determine the reference number of the file by searching. @param: tarball (string): the name of the tarball as downloaded from arXiv @return: refno (string): the reference number of the paper ''' # we just need the name of the file tarball = os.path.split(tarball)[1] # the name right now looks like arXiv:hep-ph_9703009 # or arXiv:0910.0476 if tarball.startswith(ARXIV_HEADER): tarball = tarball.split(':')[1] if len(tarball.split('_')) > 1: arXiv_record = tarball.replace('_', '/') else: arXiv_record = tarball result = perform_request_search(p=arXiv_record, f='reportnumber') if len(result) == 0: return tarball return str(result[0]) arXiv_record = re.findall('(([a-zA-Z\\-]+/\\d+)|(\\d+\\.\\d+))', tarball) if len(arXiv_record) > 1: arXiv_record = arXiv_record[0] result = perform_request_search(p=arXiv_record, f='reportnumber') if len(result) > 0: return str(result[0]) tarball_mod = tarball.replace('_', '/') arXiv_record = re.findall('(([a-zA-Z\\-]+/\\d+)|(\\d+\\.\\d+))',\ tarball_mod) if len(arXiv_record) > 1: arXiv_record = arXiv_record[0] result = perform_request_search(p=arXiv_record, f='reportnumber') if len(result) > 0: return str(result[0]) return tarball
def get_list(): papers = [] prev_version = perform_request_search() for recid in prev_version: rec = get_record(recid) doi = None arxiv_id = None try: if ('2', 'DOI') in rec['024'][0][0]: for t in rec['024'][0][0]: if 'a' in t: doi = t[1] if not doi: print "No DOI for record: %i" % (recid, ) else: print "No DOI for record: %i" % (recid, ) except: print "No DOI for record: %i" % (recid, ) checksum, url, url_type = get_pdf(recid) if '037' in rec.keys(): if ('9', 'arXiv') in rec.get('037')[0][0]: for t in rec.get('037')[0][0]: if 'a' in t: arxiv_id = t[1] papers.append((recid, arxiv_id, get_creation_date(recid), checksum, url, url_type, doi)) return papers
def find_records_containing_email(): """ Searches for HEP records with emails """ #emails = set() #recids = set() #for email in EMAILS_HEP: # if email not in EMAILS_HEPNAMES: # continue # if email.startswith('email'): # continue # emails.add(email) # search = "100__m:{0} or 700__m:{0}".format(email) # result = perform_request_search(p=search, cc='HEP') # if len(result) > 1: # recids.update(result) #print recids #quit() search = r'100__m:/\@/ or 700__m:/\@/ \ - \ 100__m:email* - 700__m:email*' search = r'100__m:/\w/ or 700__m:/\w/' if SEARCH: search = SEARCH result = perform_request_search(p=search, cc='HEP') print "Checking", len(result), "records" return sorted(result, reverse=True)
def get_matched_id(subfields): citation_element = reference2citation_element(subfields) if "doi_string" in citation_element: recids = find_doi(citation_element) if len(recids) == 1: return recids.pop() if "journal_title" in citation_element and "year" in citation_element: recids = find_journal(citation_element) if len(recids) == 1: return recids.pop() if "pubnote" in citation_element: recids = perform_request_search(p=citation_element["pubnote"], f="journal") if len(recids) == 1: return recids.pop() if "report_num" in citation_element: recids = find_reportnumber(citation_element) if len(recids) == 1: return recids.pop() if "ISBN" in citation_element: recids = find_isbn(citation_element) if len(recids) == 1: return recids.pop() # if 'title' in citation_element: # recids = find_book(citation_element) # if len(recids) == 1: # return recids.pop() return None
def _submit_changes_to_bibupload(search_criteria, update_commands, upload_mode, tag_list, collection, req): """This methods takes care of submitting the changes to the server through bibupload. @param search_criteria: the search criteria used for filtering the records. The changes will be applied to all the records matching the criteria @param update_commands: the commands defining the changes. These commands perform the necessary changes before the records are submitted """ if collection == "Any collection": collection = "" record_IDs = search_engine.perform_request_search(p=search_criteria, c=collection) num_records = len(record_IDs) updated_records = [] for current_id in record_IDs: current_updated_record = _get_updated_record(current_id, update_commands) updated_records.append(current_updated_record) file_path = _get_file_path_for_bibupload() _save_records_xml(updated_records, file_path, upload_mode, tag_list) return _upload_file_with_bibupload(file_path, upload_mode, num_records, req)
def run(query_file): fi = open(query_file, 'r') queries = filter(len, map(lambda x: x.strip(), fi.readlines())) fi.close() success = failure = error = 0 for q in queries: print '---' print q inv_res = len(search_engine.perform_request_search(None, p=q)) msg = 'NO' inv_query = '\t\t' try: (solr_res, inv_query) = ask_solr(q) except Exception, e: solr_res = None #print e msg = 'ER' error += 1 failure -= 1 print inv_query if inv_res == solr_res: success += 1 msg = 'OK' else: failure += 1 print "%s invenio=%s montysolr=%s" % (msg, inv_res, solr_res)
def test_get_record(self): for recid in perform_request_search(p=""): # Our bibrecord we want to test record = self.records_cache[recid] # Reference implementation original_record = get_record_original(recid) self.assertXmlEqual(record.to_xml(), print_rec(original_record))
def perform_candidate_record_search(requestType, data): """Handle search requests. """ max_results = 999 too_many = False result = { 'resultCode': 0, 'resultText': '' } if requestType == "searchCandidates": recids = perform_request_search( p=data['query'] ) if len(recids) > max_results: too_many = True else: captions = [ search_result_info(x) for x in recids ] alternative_titles = [ remove_html_markup(print_record(x, "hs")) for x in recids ] search_results = [recids, captions, alternative_titles] elif requestType == "searchRevisions": revisions = get_record_revision_ids( data['recID1'] ) captions = [ split_revid(x, 'datetext')[1] for x in revisions ] search_results = [revisions, captions] if too_many == True: result['resultCode'] = 1 result['resultText'] = 'Too many results' else: result['results'] = search_results result['resultText'] = '%s results' % len(search_results[0]) return result
def get_recids_for_set_spec(set_spec): """ Returns the list (as intbitset) of recids belonging to 'set' Parameters: set_spec - *str* the set_spec for which we would like to get the recids """ recids = intbitset() for set_def in get_set_definitions(set_spec): new_recids = perform_request_search(c=[coll.strip() \ for coll in set_def['c'].split(',')], p1=set_def['p1'], f1=set_def['f1'], m1=set_def['m1'], op1=set_def['op1'], p2=set_def['p2'], f2=set_def['f2'], m2=set_def['m2'], op2=set_def['op2'], p3=set_def['p3'], f3=set_def['f3'], m3=set_def['m3'], ap=0) recids |= intbitset(new_recids) return recids
def fetch_xml_files(folder, els, new_files): """Recursively gets the downloaded xml files converts them to marc xml format and stores them in the same directory with the name "upload.xml".""" if exists(folder): for subfolder in listdir(folder): subfolder = join(folder, subfolder).lstrip() if isfile(subfolder): if not subfolder.endswith('upload.xml'): folders = subfolder.split('/') folders[-1] = 'upload.xml' file_loc = "/".join(folders) if not exists(file_loc): xmlFile = open(subfolder, "r") xmlString = xmlFile.read() xmlFile.close() dom_xml = xml.dom.minidom.parseString(xmlString) doi = els.get_publication_information(dom_xml)[-1] write_message("DOI in record: %s" % (doi,)) res = perform_request_search(p="doi:%s" % (doi,), of="id") if not res: write_message("DOI not found") doctype = els.get_doctype(dom_xml).lower() #ignore index pages if doctype in INTERESTING_DOCTYPES: marcfile = open(file_loc, 'w') marcfile.write(els.get_record(subfolder)) marcfile.close() new_files.append(file_loc) task_sleep_now_if_required(can_stop_too=False) else: write_message("DOI found: %s" % (res,)) else: fetch_xml_files(subfolder, els, new_files)
def build_issns_from_local_site(): """ Retrieves the ISSNs from the local database. Store the "journal name -> issn" relation. Normalize journal names a little bit: - strip whithespace chars (left and right) - all lower case - remove "[Online]" suffix Print the result as Python dict structure. """ rec_id_list = perform_request_search(cc='Periodicals', of='id') built_issns = {} #built_issns = issns # Uncomment this to extend existing issns dict # (e.g. in case of manual addition) for rec_id in rec_id_list: journal_name_list = get_fieldvalues(rec_id, '210__%') issn_list = get_fieldvalues(rec_id, '022__a') if issn_list: issn = issn_list[0] # There should be only one ISSN for journal_name in journal_name_list: # Depending on how journal names are entered into the database, # you might want to do some processing before saving: journal_name = journal_name.lower().strip() if journal_name.endswith("[online]"): journal_name = journal_name[:-8].rstrip() built_issns[journal_name] = issn prtyp = pprint.PrettyPrinter(indent=4) prtyp.pprint(built_issns)
def oaigetsysnolist(set="", fromdate="", untildate=""): "Returns list of system numbers for the OAI set 'set', modified from 'fromdate' until 'untildate'." from invenio.oai_repository_updater import get_set_definitions if fromdate != "": fromdate = normalize_date(fromdate, "T00:00:00Z") else: fromdate = get_earliest_datestamp() if untildate != "": untildate = normalize_date(untildate, "T23:59:59Z") else: untildate = get_latest_datestamp() collections = [] for set_definition in get_set_definitions(set): collections.extend(coll.strip() for coll in set_definition['c'].split(',')) recids = perform_request_search(f1=CFG_OAI_ID_FIELD, p1="oai:*", m1="e", op1='a', f2=((set and CFG_OAI_SET_FIELD) or ""), p2=set, m2="e", d1=utc_to_localtime(fromdate), d2=utc_to_localtime(untildate), c=collections, dt='m', ap=0) ## Let's discard non public records return list(intbitset(recids) - get_all_restricted_recids())
def get_hepnames_recid_from_email(email): """ Find the HEPNames recid based on email """ if email not in EMAILS_HEPNAMES: if VERBOSE: print "WARNING: no hepnames record found for %s: " % (email) return None emailsearch = '371__m:%s or 371__o:%s' reclist = perform_request_search(p=emailsearch % (email, email), cc='HepNames') hidden_m = search_unit(email, f='595__m', m='a') hidden_o = search_unit(email, f='595__o', m='a') reclist_hidden = hidden_m or hidden_o & HN reclist = intbitset(reclist) or reclist_hidden if len(reclist) == 1: return reclist[0] elif len(reclist) > 1: if VERBOSE: print "WARNING: more than one hepnames record found for %s: " \ % (email) print '\t' + ', '.join([str(r) for r in reclist]) return [r for r in reclist] else: if VERBOSE: print "WARNING: no hepnames record found for %s: " % (email) return None
def get_recid_from_id(id_number): search = '035__a:' + id_number result = perform_request_search(p=search, cc='HepNames') if len(result) == 1: return result[0] else: return None
def check_records(records, amend_case=False): for record in records: for position, value in record.iterfields(['100__a', '700__a']): value = value.decode('utf8') new_value = NAME_CACHE.get(value) if new_value is None: search_value = value if ',' in value: splitted_values = search_value.split(',', 1) search_value = u"%s %s" % (splitted_values[1].strip(), splitted_values[0].strip()) original_family_name = value.split(',')[0].strip() search_value = RE_SPACES.sub(' ', search_value).strip() if len(search_value.split()) < 3: # Simple name continue i = perform_request_search(p=u'author:"%s"' % search_value, cc='HepNames') possible_values = get_fieldvalues(i, '100__a', sort=False) + get_fieldvalues(i, '400__a', sort=False) for correct_value in possible_values: correct_value = correct_value.decode('utf8') if search_value.lower().endswith(" " + correct_value.lower().split(',')[0]): family_name = correct_value.split(',')[0].strip() if len(family_name) < len(original_family_name): continue first_name = search_value[:-(len(family_name) + 1)].strip() new_value = u'%s, %s' % (family_name, first_name) NAME_CACHE[value] = new_value break else: NAME_CACHE[value] = value if new_value: if amend_case and new_value == value: continue elif new_value.lower() == value.lower(): continue record.amend_field(position, new_value.encode('utf8'))
def __call__(self, req, form): argd = wash_search_urlargd(form) argd['recid'] = self.recid argd['tab'] = self.tab if self.format is not None: argd['of'] = self.format req.argd = argd uid = getUid(req) if uid == -1: return page_not_authorized( req, "../", text="You are not authorized to view this record.", navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass user_info = collect_user_info(req) (auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid) if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and acc_authorize_action( req, 'runbibedit')[0] != 0: argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS #check if the user has rights to set a high wildcard limit #if not, reduce the limit set by user, with the default one if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and ( argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0): if acc_authorize_action(req, 'runbibedit')[0] != 0: argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT # only superadmins can use verbose parameter for obtaining debug information if not isUserSuperAdmin(user_info): argd['verbose'] = 0 if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action( VIEWRESTRCOLL, { 'collection': guess_primary_collection_of_a_record( self.recid) }) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') from invenio.search_engine import record_exists, get_merged_recid # check if the current record has been deleted # and has been merged, case in which the deleted record # will be redirect to the new one record_status = record_exists(argd['recid']) merged_recid = get_merged_recid(argd['recid']) if record_status == -1 and merged_recid: url = CFG_SITE_URL + '/' + CFG_SITE_RECORD + '/%s?ln=%s' url %= (str(merged_recid), argd['ln']) redirect_to_url(req, url) elif record_status == -1: req.status = apache.HTTP_GONE ## The record is gone! # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out
def __call__(self, req, form): argd = wash_search_urlargd(form) argd['recid'] = self.recid if self.format is not None: argd['of'] = self.format req.argd = argd uid = getUid(req) user_info = collect_user_info(req) if uid == -1: return page_not_authorized( req, "../", text="You are not authorized to view this record.", navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and acc_authorize_action( req, 'runbibedit')[0] != 0: argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS #check if the user has rights to set a high wildcard limit #if not, reduce the limit set by user, with the default one if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and ( argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0): if acc_authorize_action(req, 'runbibedit')[0] != 0: argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT # only superadmins can use verbose parameter for obtaining debug information if not isUserSuperAdmin(user_info): argd['verbose'] = 0 record_primary_collection = guess_primary_collection_of_a_record( self.recid) if collection_restricted_p(record_primary_collection): (auth_code, dummy) = acc_authorize_action( user_info, VIEWRESTRCOLL, collection=record_primary_collection) if auth_code: return page_not_authorized( req, "../", text="You are not authorized to view this record.", navmenuid='search') # Keep all the arguments, they might be reused in the # record page itself to derivate other queries req.argd = argd # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out
def find_records_to_check(): search = '100__m:/\@/ or 700__m:/\@/ 980:CORE' #search = 'find tc t and date 2000->2009' #search = 'find cat jkemp or cleggm1 and primarch hep-ex and ac 1' #search = 'find fc g or fc l or fc n and cat luba and date 2010->2014' return perform_request_search(p=search, cc='HEP')
def __call__(self, req, form): """ Perform a search. """ argd = wash_search_urlargd(form) _ = gettext_set_language(argd['ln']) if req.method == 'POST': raise apache.SERVER_RETURN, apache.HTTP_METHOD_NOT_ALLOWED uid = getUid(req) user_info = collect_user_info(req) if uid == -1: return page_not_authorized( req, "../", text=_("You are not authorized to view this area."), navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and acc_authorize_action( req, 'runbibedit')[0] != 0: argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS involved_collections = set() involved_collections.update(argd['c']) involved_collections.add(argd['cc']) if argd['id'] > 0: argd['recid'] = argd['id'] if argd['idb'] > 0: argd['recidb'] = argd['idb'] if argd['sysno']: tmp_recid = find_record_from_sysno(argd['sysno']) if tmp_recid: argd['recid'] = tmp_recid if argd['sysnb']: tmp_recid = find_record_from_sysno(argd['sysnb']) if tmp_recid: argd['recidb'] = tmp_recid if argd['recid'] > 0: if argd['recidb'] > argd['recid']: # Hack to check if among the restricted collections # at least a record of the range is there and # then if the user is not authorized for that # collection. recids = intbitset(xrange(argd['recid'], argd['recidb'])) restricted_collection_cache.recreate_cache_if_needed() for collname in restricted_collection_cache.cache: (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=collname) if auth_code and user_info['email'] == 'guest': coll_recids = get_collection(collname).reclist if coll_recids & recids: cookie = mail_cookie_create_authorize_action( VIEWRESTRCOLL, {'collection': collname}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') else: involved_collections.add( guess_primary_collection_of_a_record(argd['recid'])) # If any of the collection requires authentication, redirect # to the authentication form. for coll in involved_collections: if collection_restricted_p(coll): (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action( VIEWRESTRCOLL, {'collection': coll}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') #check if the user has rights to set a high wildcard limit #if not, reduce the limit set by user, with the default one if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and ( argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0): auth_code, auth_message = acc_authorize_action(req, 'runbibedit') if auth_code != 0: argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT # only superadmins can use verbose parameter for obtaining debug information if not isUserSuperAdmin(user_info): argd['verbose'] = 0 # Keep all the arguments, they might be reused in the # search_engine itself to derivate other queries req.argd = argd # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out
def __call__(self, req, form): """RSS 2.0 feed service.""" # Keep only interesting parameters for the search default_params = websearch_templates.rss_default_urlargd # We need to keep 'jrec' and 'rg' here in order to have # 'multi-page' RSS. These parameters are not kept be default # as we don't want to consider them when building RSS links # from search and browse pages. default_params.update({ 'jrec': (int, 1), 'rg': (int, CFG_WEBSEARCH_INSTANT_BROWSE_RSS) }) argd = wash_urlargd(form, default_params) user_info = collect_user_info(req) for coll in argd['c'] + [argd['cc']]: if collection_restricted_p(coll): (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action( VIEWRESTRCOLL, {'collection': coll}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') # Create a standard filename with these parameters current_url = websearch_templates.build_rss_url(argd) cache_filename = current_url.split('/')[-1] # In the same way as previously, add 'jrec' & 'rg' req.content_type = "application/rss+xml" req.send_http_header() try: # Try to read from cache path = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename) # Check if cache needs refresh filedesc = open(path, "r") last_update_time = datetime.datetime.fromtimestamp( os.stat(os.path.abspath(path)).st_mtime) assert (datetime.datetime.now() < last_update_time + datetime.timedelta(minutes=CFG_WEBSEARCH_RSS_TTL)) c_rss = filedesc.read() filedesc.close() req.write(c_rss) return except Exception, e: # do it live and cache previous_url = None if argd['jrec'] > 1: prev_jrec = argd['jrec'] - argd['rg'] if prev_jrec < 1: prev_jrec = 1 previous_url = websearch_templates.build_rss_url( argd, jrec=prev_jrec) #check if the user has rights to set a high wildcard limit #if not, reduce the limit set by user, with the default one if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and ( argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0): if acc_authorize_action(req, 'runbibedit')[0] != 0: argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT req.argd = argd recIDs = perform_request_search(req, of="id", c=argd['c'], cc=argd['cc'], p=argd['p'], f=argd['f'], p1=argd['p1'], f1=argd['f1'], m1=argd['m1'], op1=argd['op1'], p2=argd['p2'], f2=argd['f2'], m2=argd['m2'], op2=argd['op2'], p3=argd['p3'], f3=argd['f3'], m3=argd['m3'], wl=argd['wl']) nb_found = len(recIDs) next_url = None if len(recIDs) >= argd['jrec'] + argd['rg']: next_url = websearch_templates.build_rss_url( argd, jrec=(argd['jrec'] + argd['rg'])) first_url = websearch_templates.build_rss_url(argd, jrec=1) last_url = websearch_templates.build_rss_url(argd, jrec=nb_found - argd['rg'] + 1) recIDs = recIDs[-argd['jrec']:(-argd['rg'] - argd['jrec']):-1] rss_prologue = '<?xml version="1.0" encoding="UTF-8"?>\n' + \ websearch_templates.tmpl_xml_rss_prologue(current_url=current_url, previous_url=previous_url, next_url=next_url, first_url=first_url, last_url=last_url, nb_found=nb_found, jrec=argd['jrec'], rg=argd['rg'], cc=argd['cc']) + '\n' req.write(rss_prologue) rss_body = format_records(recIDs, of='xr', ln=argd['ln'], user_info=user_info, record_separator="\n", req=req, epilogue="\n") rss_epilogue = websearch_templates.tmpl_xml_rss_epilogue() + '\n' req.write(rss_epilogue) # update cache dirname = "%s/rss" % (CFG_CACHEDIR) mymkdir(dirname) fullfilename = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename) try: # Remove the file just in case it already existed # so that a bit of space is created os.remove(fullfilename) except OSError: pass # Check if there's enough space to cache the request. if len(os.listdir( dirname)) < CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS: try: os.umask(022) f = open(fullfilename, "w") f.write(rss_prologue + rss_body + rss_epilogue) f.close() except IOError, v: if v[0] == 36: # URL was too long. Never mind, don't cache pass else: raise repr(v)
def query_get_comments(uid, cmtID, recID, reviews, ln, abuse=False, user_collections='', collection=''): """ private function @param user_collections: allowed collections for the user @param collection: collection to display @return tuple of comment where comment is tuple (nickname, uid, date_creation, body, id, status) if ranking disabled or tuple (nickname, uid, date_creation, body, nb_votes_yes, nb_votes_total, star_score, title, id, status) """ qdict = { 'id': 0, 'id_bibrec': 1, 'uid': 2, 'date_creation': 3, 'body': 4, 'status': 5, 'nb_abuse_reports': 6, 'nb_votes_yes': 7, 'nb_votes_total': 8, 'star_score': 9, 'title': 10, 'email': -2, 'nickname': -1 } query = """SELECT c.id, c.id_bibrec, c.id_user, DATE_FORMAT(c.date_creation, '%%Y-%%m-%%d %%H:%%i:%%S'), c.body, c.status, c.nb_abuse_reports, %s u.email, u.nickname FROM cmtRECORDCOMMENT c LEFT JOIN user u ON c.id_user = u.id %s ORDER BY c.nb_abuse_reports DESC, c.nb_votes_yes DESC, c.date_creation """ select_fields = reviews and 'c.nb_votes_yes, c.nb_votes_total, c.star_score, c.title,' or '' where_clause = "WHERE " + (reviews and 'c.star_score>0' or 'c.star_score=0') if uid: where_clause += ' AND c.id_user=%i' % uid if recID: where_clause += ' AND c.id_bibrec=%i' % recID if cmtID: where_clause += ' AND c.id=%i' % cmtID if abuse: where_clause += ' AND c.nb_abuse_reports>0' res = run_sql(query % (select_fields, where_clause)) collection_records = [] if collection == 'Show all': for collection_name in user_collections: collection_records.extend( perform_request_search(cc=collection_name)) else: collection_records.extend(perform_request_search(cc=collection)) output = [] for qtuple in res: if qtuple[qdict['id_bibrec']] in collection_records: nickname = qtuple[qdict['nickname']] or get_user_info( qtuple[qdict['uid']], ln)[2] if reviews: comment_tuple = (nickname, qtuple[qdict['uid']], qtuple[qdict['date_creation']], qtuple[qdict['body']], qtuple[qdict['nb_votes_yes']], qtuple[qdict['nb_votes_total']], qtuple[qdict['star_score']], qtuple[qdict['title']], qtuple[qdict['id']], qtuple[qdict['status']]) else: comment_tuple = (nickname, qtuple[qdict['uid']], qtuple[qdict['date_creation']], qtuple[qdict['body']], qtuple[qdict['id']], qtuple[qdict['status']]) general_infos_tuple = (nickname, qtuple[qdict['uid']], qtuple[qdict['email']], qtuple[qdict['id']], qtuple[qdict['id_bibrec']], qtuple[qdict['nb_abuse_reports']]) out_tuple = (comment_tuple, general_infos_tuple) output.append(out_tuple) return tuple(output)
def query_get_latest(comments, ln, top, user_collections, collection): """ private function @param comments: boolean indicating if we want to retrieve comments or reviews @param ln: language @param top: number of results to display @param user_collections: allowed collections for the user @param collection: collection to display @return tuple of comment where comment is tuple (nickname, uid, date_creation, body, id) if latest comments or tuple (nickname, uid, date_creation, body, star_score, id) if latest reviews """ qdict = { 'id': 0, 'id_bibrec': 1, 'uid': 2, 'date_creation': 3, 'body': 4, 'nb_abuse_reports': 5, 'star_score': 6, 'nickname': -1 } query = """SELECT c.id, c.id_bibrec, c.id_user, DATE_FORMAT(c.date_creation, '%%Y-%%m-%%d %%H:%%i:%%S'), c.body, c.nb_abuse_reports, %s u.nickname FROM cmtRECORDCOMMENT c LEFT JOIN user u ON c.id_user = u.id %s ORDER BY c.date_creation DESC LIMIT %s """ select_fields = not comments and 'c.star_score, ' or '' where_clause = "WHERE " + ( comments and 'c.star_score=0' or 'c.star_score>0' ) + ' AND c.status="ok" AND c.nb_abuse_reports < %s' % CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN res = run_sql(query % (select_fields, where_clause, top)) collection_records = [] if collection == 'Show all': for collection_name in user_collections: collection_records.extend( perform_request_search(cc=collection_name)) else: collection_records.extend(perform_request_search(cc=collection)) output = [] for qtuple in res: if qtuple[qdict['id_bibrec']] in collection_records: nickname = qtuple[qdict['nickname']] or get_user_info( qtuple[qdict['uid']], ln)[2] if not comments: comment_tuple = (nickname, qtuple[qdict['uid']], qtuple[qdict['date_creation']], qtuple[qdict['body']], qtuple[qdict['star_score']], qtuple[qdict['id']]) else: comment_tuple = (nickname, qtuple[qdict['uid']], qtuple[qdict['date_creation']], qtuple[qdict['body']], qtuple[qdict['id']]) general_infos_tuple = (nickname, qtuple[qdict['uid']], qtuple[qdict['id']], qtuple[qdict['id_bibrec']], qtuple[qdict['nb_abuse_reports']]) out_tuple = (comment_tuple, general_infos_tuple) output.append(out_tuple) return tuple(output)
def bst_fermilab(): write_message('cd /afs/fnal.gov/files/expwww/bss/html/techpubs') for series in SERIES1: reports = [] authorId = False search = "find r fermilab-" + series + "-*" #search = "find recid 1261432" #print search result = perform_request_search(p=search, cc='HEP') for recid in result: #print recid reportValues = get_fieldvalues(recid, '037__a') author = get_fieldvalues(recid, '100__a') authorId = get_fieldvalues(recid, '100__i') authorAff = get_fieldvalues(recid, '100__u') title = get_fieldvalues(recid, '245__a') experiment = get_fieldvalues(recid, '693__e') if author: author = author[0] else: author = '' if title: title = '<i>' + title[0][:100] + '</i>' else: title = '' if experiment: experiment = experiment[0] else: experiment = '' if authorAff: authorAff = authorAff[0] else: authorAff = '' #print "author = ", author #print "title = ", title #print "authorId = ", authorId #print "experiment = ", experiment if authorId: authorId = authorId[0] for report in reportValues: if re.match('FERMILAB-' + series, report, re.IGNORECASE): y = [ report, str(recid), author, title, authorId, experiment, authorAff ] #print "y = ", y reports.append(y) reports.sort(reverse=True) filename = os.path.join(CFG_FERMILAB_PATH, 'fermilab-reports-' + series + '.html') output = open(filename, 'w') output.write( '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"\n') output.write( ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n') output.write('<html xmlns="http://www.w3.org/1999/xhtml">\n') output.write('<head>\n') output.write('<title>Fermilab Technical Publications: ') output.write(escape(series)) output.write('</title>\n') output.write( '<meta http-equiv="content-type" content="text/html;charset=utf-8" />\n' ) output.write('</head>\n') output.write('<body>\n') output.write( '<a href="http://bss.fnal.gov/techpubs/fermilab_spires.html">Fermilab Technical Publications</a>\n' ) output.write('<br /><br />') dateTimeStamp = '<i>Updated ' + chicago_timezone.fromutc( datetime.datetime.utcnow()).strftime( '%Y-%m-%d %H:%M:%S') + '</i>\n' output.write(dateTimeStamp) output.write('<br />\n<table>\n') for report in reports: #print "report =", report if report[4]: search2 = '035__a:' + report[4] #print "search2 =", search2 result = perform_request_search(p=search2, cc='HepNames') #print report[4], result report[2] = '<a href="http://inspirehep.net/record/' + str( result[0]) + '">' + report[2] + '</a>' line = '<tr><td><a href="http://inspirehep.net/record/' + report[ 1] + '">' + report[0] + '</a></td>\ <td>' + report[2] + '</td><td>' + report[3] + '</td></tr>\n' if re.search(r'THESIS', report[0]): if report[5]: search2 = '119__a:' + report[5] result = perform_request_search(p=search2, cc='Experiments') if result: result = result[0] collaboration = get_fieldvalues(result, '710__g') if collaboration: collaboration = collaboration[0] collaboration = collaboration.replace( ' Collaboration', '') report[5] = report[5] + ' (' + collaboration + ')' if result: report[ 5] = '<a href="http://inspirehep.net/record/' + str( result) + '">' + report[5] + '</a>' line = '<tr><td><a href="http://inspirehep.net/record/' + report[ 1] + '">' + report[0] + '</a></td>\ <td>' + report[2] + '</td><td>' + report[ 5] + '</td><td>' + report[6] + '</td><td>' + report[ 3] + '</td></tr>\n' output.write(line) output.write('</table>\n') output.write('</body>\n') output.write('</html>\n') output.close() write_message('\\rm fermilab-reports-' + series + '.html') write_message('cp %s .' % filename) reports = [] currentyear = time.strftime('%Y') for series in SERIES2: #print series for year in range(1970, time.localtime()[0] + 1): #print year dd = str(year) dd = re.sub(r"19", "", dd) dd = re.sub(r"20", "", dd) search = "find r fermilab-" + series + "-" + dd + "*" #print search result = perform_request_search(p=search, cc='HEP') for recid in result: reportValues = get_fieldvalues(recid, '037__a') author = get_fieldvalues(recid, '100__a') title = get_fieldvalues(recid, '245__a') if author: author = author[0] else: author = '' if title: title = title[0][:100] else: title = '' for report in reportValues: #print 'report = ' + report #print 'FERMILAB-' + series if re.match('FERMILAB-' + series, report, re.IGNORECASE): number = re.sub("FERMILAB-" + series + "-", "", report) y = [year, number, report, str(recid), author, title] #print 'y = ' , y reports.append(y) reports.sort(reverse=True) #print reports filename = os.path.join(CFG_FERMILAB_PATH, 'fermilab-reports-preprints.html') output = open(filename, 'w') output.write('<html>\n') output.write('<header>\n') output.write('<title>Fermilab Technical Publications: ') output.write('preprints') output.write('</title>\n') output.write('</header>\n') output.write('<body>\n') output.write( '<a href="http://bss.fnal.gov/techpubs/fermilab_spires.html">Fermilab Technical Publications</a>\n' ) output.write('<br /><br />') dateTimeStamp = '<i>Updated ' + chicago_timezone.fromutc( datetime.datetime.utcnow()).strftime('%Y-%m-%d %H:%M:%S') + '</i>\n' output.write(dateTimeStamp) output.write('<br />\n<table>\n') for report in reports: line = '<tr><td><a href="http://inspirehep.net/record/' + report[ 3] + '">' + report[2] + '</a></td><td>' + report[ 4] + '</td><td>' + report[5] + '</td></tr>\n' output.write(line) output.write('</table>\n') output.write('</body>\n') output.write('</html>\n') output.close() write_message('cd /afs/fnal.gov/files/expwww/bss/html/techpubs') write_message('\\rm fermilab-reports-preprints.html') write_message('cp %s .' % filename)
def arxiv_login(req, picked_profile=None): ''' Log in through arxive. If user already associated to a personid, returns the personid. If user has no pid, try to guess which personid to associate based on surname and papers from arxiv. If no compatible person is found, creates a new person. At the end of the process opens a ticket for the user claiming the papers from arxiv. !!! the user will find the open ticket, which will require him to go through the final review before getting committed. @param req: Apache request object @type req: Apache request object @return: Returns the pid resulting in the process @rtype: int ''' def session_bareinit(req): try: pinfo = session["personinfo"] if 'ticket' not in pinfo: pinfo["ticket"] = [] except KeyError: pinfo = dict() session['personinfo'] = pinfo pinfo["ticket"] = [] session.dirty = True session_bareinit(req) pinfo = session['personinfo'] ticket = session['personinfo']['ticket'] uinfo = collect_user_info(req) pinfo['external_first_entry'] = False try: name = uinfo['external_firstname'] except KeyError: name = '' try: surname = uinfo['external_familyname'] except KeyError: surname = '' if surname: session['personinfo']['arxiv_name'] = nameapi.create_normalized_name( nameapi.split_name_parts(surname + ', ' + name)) else: session['personinfo']['arxiv_name'] = '' session.dirty = True try: arxiv_p_ids = uinfo['external_arxivids'].split(';') except KeyError: arxiv_p_ids = [] #'external_arxivids': 'hep-th/0112017;hep-th/0112020', #'external_familyname': 'Weiler', #'external_firstname': 'Henning', try: found_bibrecs = set( reduce(add, [ perform_request_search(p='037:' + str(arx), of='id', rg=0) for arx in arxiv_p_ids ])) except (IndexError, TypeError): found_bibrecs = set() #found_bibrecs = [567700, 567744] uid = getUid(req) pid, pid_found = dbapi.get_personid_from_uid([[uid]]) if pid_found: pid = pid[0] else: if picked_profile == None: top5_list = dbapi.find_top5_personid_for_new_arXiv_user( found_bibrecs, nameapi.create_normalized_name( nameapi.split_name_parts(surname + ', ' + name))) return ("top5_list", top5_list) else: pid = dbapi.check_personids_availability(picked_profile, uid) pid_bibrecs = set( [i[0] for i in dbapi.get_all_personids_recs(pid, claimed_only=True)]) missing_bibrecs = found_bibrecs - pid_bibrecs #present_bibrecs = found_bibrecs.intersection(pid_bibrecs) #assert len(found_bibrecs) == len(missing_bibrecs) + len(present_bibrecs) tempticket = [] #now we have to open the tickets... #person_papers contains the papers which are already assigned to the person and came from arxive, #they can be claimed regardless for bibrec in missing_bibrecs: tempticket.append({ 'pid': pid, 'bibref': str(bibrec), 'action': 'confirm' }) #check if ticket targets (bibref for pid) are already in ticket for t in list(tempticket): for e in list(ticket): if e['pid'] == t['pid'] and e['bibref'] == t['bibref']: ticket.remove(e) ticket.append(t) session.dirty = True if picked_profile != None and picked_profile != pid and picked_profile != -1: return ("chosen pid not available", pid) elif picked_profile != None and picked_profile == pid and picked_profile != -1: return ("pid assigned by user", pid) else: return ("pid", pid)
def tmpl_papers_with_self_papers_box(self, pubs, self_pubs, bibauthorid_data, num_downloads, ln, add_box=True, loading=False): _ = gettext_set_language(ln) if not loading: ib_pubs = intbitset(pubs) ib_self_pubs = intbitset(self_pubs) if bibauthorid_data["cid"]: baid_query = 'exactauthor:%s' % wrap_author_name_in_quotes_if_needed(bibauthorid_data["cid"]) else: baid_query = 'exactauthor:%s' % wrap_author_name_in_quotes_if_needed(bibauthorid_data["pid"]) baid_query = baid_query + " " rec_query = baid_query self_rec_query = baid_query + " authorcount:1 " descstr = ['', "<strong>" + "All papers" + "</strong>"] searchstr = [" All papers "] self_searchstr = [" Single authored "] if pubs: searchstr.append(("" + create_html_link(websearch_templates.build_search_url(p=rec_query), {}, str(len(pubs)) ,) + "")) else: searchstr.append(("0")) if self_pubs: self_searchstr.append(("" + create_html_link(websearch_templates.build_search_url(p=self_rec_query), {}, str(len(self_pubs)) ,) + "")) else: self_searchstr.append(("0")) psummary = searchstr self_psummary = self_searchstr if CFG_BIBRANK_SHOW_DOWNLOAD_STATS and num_downloads: psummary[0] += " <br> (" + _("downloaded") + " " psummary[0] += str(num_downloads) + " " + _("times") + ")" if CFG_INSPIRE_SITE: CFG_COLLS = ['Book', 'ConferencePaper', 'Introductory', 'Lectures', 'Published', 'Review', 'Thesis', 'Proceedings'] else: CFG_COLLS = ['Article', 'Book', 'Preprint', ] collsd = {} self_collsd = {} for coll in CFG_COLLS: search_result = intbitset(perform_request_search(rg=0, f="collection", p=coll)) collsd[coll] = list(ib_pubs & search_result) self_collsd[coll] = list(ib_self_pubs & search_result) for coll in CFG_COLLS: rec_query = baid_query + 'collection:' + wrap_author_name_in_quotes_if_needed(coll) self_rec_query = baid_query + 'collection:' + wrap_author_name_in_quotes_if_needed(coll) + ' authorcount:1 ' descstr.append("%s" % coll) if collsd[coll]: psummary.append(("" + create_html_link(websearch_templates.build_search_url(p=rec_query), {}, str(len(collsd[coll])),) + '')) else: psummary.append(("0")) if self_collsd[coll]: self_psummary.append(("" + create_html_link(websearch_templates.build_search_url(p=self_rec_query), {}, str(len(self_collsd[coll])),) + '')) else: self_psummary.append(("0")) tp = "<tr><td> %s </td> <td align='right'> %s </td> <td align='right'> %s </td></tr>" line2 = "<table > %s </table>" line2 = line2 % ''.join(tp % (x, y, z) for x, y, z in zip(*(descstr, psummary, self_psummary))) else: line2 = self.loading_html() if not add_box: return line2 line1 = "<strong>" + _("Papers") + "</strong>" papers_box = self.tmpl_print_searchresultbox("combined_papers", line1, line2) return papers_box
import re import sys import datetime import subprocess from invenio.search_engine import perform_request_search VERBOSE = True VERBOSE = False search = raw_input("Run bibrank on this search: ") x = perform_request_search(p=search, cc="HEP") if len(x) > 0: mylist = [str(r) for r in x] else: print "No records found." sys.exit() today = str(datetime.date.today()) newfile = 'tmp_loss_from_search__' + today + '.txt' output = open(newfile, 'w') amount = str(len(mylist)) mystring = ','.join(mylist) mystring2 = 'sudo -u apache /opt/cds-invenio/bin/bibrank -u cleggm1 \ --disable-citation-losses-check -i ' + mystring if len(mylist) > 1000: print "There are %s records that will be touched" % (amount) chunks = [mylist[x:x + 500] for x in xrange(0, len(mylist), 500)] time_inter = 0 for x in chunks: mystring = ','.join(x) time_inter += 1