def get_new_accepteds(): '''Find the new accepted PDFs''' recid_list = [] for search in SEARCH_ACCEPTED: search += SEARCH_ACCEPTED_END recid_list.extend(get_result_ids(search)) result_recids = set(recid_list) sent_accepteds = retrieve_accepteds() sent_accepteds_recids = set() for osti in sent_accepteds: #sent_recids.add(str(sent[osti][0])) sent_accepteds_recids.add(sent_accepteds[osti][0]) new_accepteds_recids = list(result_recids - sent_accepteds_recids) print(f''' Accepteds {len(result_recids)} Already sent {len(sent_accepteds_recids)} New accepteds {len(new_accepteds_recids)} new_accepteds_recids {new_accepteds_recids} ''') if len(new_accepteds_recids) == 0: return None jrec_new_accepteds = [] #for recid in new_accepteds_recids[:20]: for recid in new_accepteds_recids: jrec_new_accepteds.extend(get_result(recid)) return jrec_new_accepteds
def find_result(search_input=None): ''' Finds records to send email to. ''' if not search_input: search_input = input('Your search? ').lower() if len(search_input) > 3: search = f'{search_input} {SEARCH_FNAL}' search += f' {SEARCH_DATE}' if EXCLUDE: search += f' -{SEARCH_OSTI}' else: print('Badly formed search.') return None else: search = search_input print(search) result = get_result(search) if VERBOSE: print(len(result)) if len(result) > 0: log = open(LOGFILE, 'a') date_time_stamp = \ datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') date_time_stamp = date_time_stamp + ' ' + search + ' : '\ + str(len(result)) + '\n' log.write(date_time_stamp) log.close() return result print('No result found') return None
def main(reports): """Print the Fermilab report number on a PDF stored at INSPIRE""" logging.basicConfig(level="INFO") for report in reports: done = False fields = ('arxiv_eprints', 'documents', 'urls') #print(report) result = get_result('find r ' + report, fields) #if len(result): # print(result) try: urls = result[0]['urls'] except (IndexError, KeyError): try: eprint = result[0]['arxiv_eprints'][0]['value'] print(report, eprint) except (IndexError, KeyError): try: url = result[0]['documents'][0]['url'] if get_pdf_from_url(url, message=False): print(report, url) except (IndexError, KeyError): pass continue for url in urls: try: url_desc = url['description'] except KeyError: print('Problem with:', report) print(url) sys.exit() if url_desc.lower().startswith('fermilab library'): print(report, 'DONE') done = True break try: url_value = url['value'] except KeyError: print('Problem with:', report) print(url) sys.exit() if url_value.lower().endswith('pdf'): print(report, url_value) if done: continue try: eprint = result[0]['arxiv_eprints'][0]['value'] print(report, eprint) continue except (IndexError, KeyError): pass try: url = result[0]['documents'][0]['url'] print(report, url) except (IndexError, KeyError): continue
def get_conference(jrec_hep): ''' Get conference information ''' try: cnum = jrec_hep['metadata']['publication_info'][0]['cnum'] except KeyError: return None search = 'cnum:' + cnum jrec_confs = get_result(search, fields=None) if len(jrec_confs) != 1: return None jrec_conf = jrec_confs[0] try: conference_note = jrec_conf['metadata']['titles'][0]['title'] except KeyError: conference_note = '' try: for item in jrec_conf['metadata']['address']: if 'cities' in item: conference_note += ', ' + item['cities'][0] if 'state' in item: conference_note += ', ' + item['state'] if 'country_code' in item: conference_note += ', ' + item['country_code'] except KeyError: pass try: date = jrec_conf['metadata']['opening_date'] #date = get_fieldvalues(recid, "111__x")[0] date_object = datetime.datetime.strptime(date, '%Y-%m-%d') date = date_object.strftime('%m/%d') conference_note += ', ' + date except KeyError: pass try: date = jrec_conf['metadata']['closing_date'] date_object = datetime.datetime.strptime(date, '%Y-%m-%d') date = date_object.strftime('%m/%d/%Y') conference_note += '-' + date except KeyError: pass if conference_note: return conference_note return None
def create_xml(osti_id, recid): """ The function checks if the OSTI ID should be added to INSPIRE. If so, it builds up that information. """ osti_id = str(osti_id) recid = str(recid) recid = recid.replace('oai:inspirehep.net:', '') search = f'_collections:Fermilab recid:{recid}' result = get_result(search) if len(result) == 0: print(f'No such INSPIRE Fermilab record {recid}') return None jrec = result[0] report = get_fermilab_report(recid)[0] doi = get_pubnote(jrec)[4] create_osti_id_pdf(jrec, recid, osti_id, doi, report) search = '_collections:Fermilab ' search += f'external_system_identifiers.value:{osti_id} ' search += 'external_system_identifiers.schema:osti' result_osti = get_result_ids(search) if result_osti == recid: return None if len(result_osti) == 1: print(f'OSTI ID {osti_id} already on {result_osti[0]}') return None search = f'recid:{recid} -external_system_identifiers.schema:osti' if TEST: print(search) result = get_result_ids(search) if len(result) != 1: print(f'Problem with {recid} {osti_id}') print(f' {search} {result}') return False if TEST: print(result) return print_rec(osti_id, recid)
def get_fermilab_report(recid): '''Get the Fermilab report number.''' accepted = False fermilab_report = None #print(f'recid:{recid}') result = get_result(search=f'recid:{recid}', fields=('report_numbers', )) #print(result) #result = get_record(recid) #print(result) try: #reports = result[0]['metadata']['report_numbers'] reports = result[0]['report_numbers'] except KeyError: return (fermilab_report, accepted) for report in reports: report = report['value'].upper() if report.startswith('FERMILAB'): fermilab_report = report elif report == 'OSTI_ACCEPTED': accepted = True #print(fermilab_report, accepted) return (fermilab_report, accepted)