def tmpl_your_tickets(self, uid, ln=CFG_SITE_LANG, start=1): """ make a pretty html body of tickets that belong to the user given as param """ ln = wash_language(ln) _ = gettext_set_language(ln) if BIBCATALOG_SYSTEM is None: return _("Error: No BibCatalog system configured.") #errors? tell what happened and get out bibcat_probs = BIBCATALOG_SYSTEM.check_system(uid) if bibcat_probs: return _("Error") + " " + bibcat_probs tickets = BIBCATALOG_SYSTEM.ticket_search(uid, owner=uid) # get ticket id's lines = "" # put result here i = 1 lines += (_("You have %i tickets.") % len(tickets)) + "<br/>" #make a prev link if needed if (start > 1): newstart = start - self.SHOW_MAX_TICKETS if (newstart < 1): newstart = 1 lines += '<a href="/yourtickets/display?start=' + str( newstart) + '">' + _("Previous") + '</a>' lines += """<table border="1">""" lastshown = len( tickets) # what was the number of the last shown ticket? for ticket in tickets: #get info and show only for those that within the show range if (i >= start) and (i < start + self.SHOW_MAX_TICKETS): ticket_info = BIBCATALOG_SYSTEM.ticket_get_info(uid, ticket) subject = ticket_info['subject'] status = ticket_info['status'] text = "" if 'text' in ticket_info: text = ticket_info['text'] display = '<a href="' + ticket_info['url_display'] + '">' + _( "show") + '</a>' close = '<a href="' + ticket_info['url_close'] + '">' + _( "close") + '</a>' lines += "<tr><td>" + str( ticket ) + "</td><td>" + subject + " " + text + "</td><td>" + status + "</td><td>" + display + "</td><td>" + close + "</td></tr>\n" lastshown = i i = i + 1 lines += "</table>" #make next link if needed if (len(tickets) > lastshown): newstart = lastshown + 1 lines += '<a href="/yourtickets/display?start=' + str( newstart) + '">' + _("Next") + '</a>' return lines
def open_rt_ticket(e, debug_log=False, queue='Test'): """Take an exception e and, if allowed by the configuration, open a ticket for that exception. Arguments: e -- the exception to be reported """ global ticket_hashes ticket_hash = e.hash() subject = e.get_message_subject() + ' ' + ticket_hash body = e.get_message_body() if debug_log: debug = "\n Debugging information: \n" + e.__repr__() + '\n' + \ '\n'.join([ str(key) + " " + str(value) for key, value in vars(e).iteritems()]) else: debug = '' if rt_ticket_report: if ticket_hash not in ticket_hashes.iterkeys(): ticket_id = BIBCATALOG_SYSTEM.ticket_submit(uid=None, subject=subject, recordid=e.recid, text=body + debug, queue=queue, priority="", owner="", requestor="") HooverStats.tickets_raised += 1 ticket_data = BIBCATALOG_SYSTEM.ticket_get_info(None, ticket_id) ticket_hashes[ticket_hash] = ticket_data, ticket_id, True else: ticket_hashes[ticket_hash] = ticket_hashes[ticket_hash][:2] + \ (True,) # If the ticket is already there check its status. In case it is # marked as somehow solved -- i.e. resolved, deleted or rejected -- # reopen it. if ticket_hashes[ticket_hash][0]['status'] in [ 'resolved', 'deleted', 'rejected' ]: BIBCATALOG_SYSTEM.ticket_set_attribute( None, ticket_hashes[ticket_hash][1], 'status', 'open') HooverStats.tickets_raised += 1 else: write_message('sub: ' + subject + '\nbody:\n' + body + '\ndbg:\n' + debug, verbose=9)
def tmpl_your_tickets(self, uid, ln=CFG_SITE_LANG, start=1): """ make a pretty html body of tickets that belong to the user given as param """ ln = wash_language(ln) _ = gettext_set_language(ln) if BIBCATALOG_SYSTEM is None: return _("Error: No BibCatalog system configured.") #errors? tell what happened and get out bibcat_probs = BIBCATALOG_SYSTEM.check_system(uid) if bibcat_probs: return _("Error")+" "+bibcat_probs tickets = BIBCATALOG_SYSTEM.ticket_search(uid, owner=uid) # get ticket id's lines = "" # put result here i = 1 lines += (_("You have %i tickets.") % len(tickets)) + "<br/>" #make a prev link if needed if (start > 1): newstart = start - self.SHOW_MAX_TICKETS if (newstart < 1): newstart = 1 lines += '<a href="/yourtickets/display?start='+str(newstart)+'">'+_("Previous")+'</a>' lines += """<table border="1">""" lastshown = len(tickets) # what was the number of the last shown ticket? for ticket in tickets: #get info and show only for those that within the show range if (i >= start) and (i < start+self.SHOW_MAX_TICKETS): ticket_info = BIBCATALOG_SYSTEM.ticket_get_info(uid, ticket) subject = ticket_info['subject'] status = ticket_info['status'] text = "" if 'text' in ticket_info: text = ticket_info['text'] display = '<a href="'+ticket_info['url_display']+'">'+_("show")+'</a>' close = '<a href="'+ticket_info['url_close']+'">'+_("close")+'</a>' lines += "<tr><td>"+str(ticket)+"</td><td>"+subject+" "+text+"</td><td>"+status+"</td><td>"+display+"</td><td>"+close+"</td></tr>\n" lastshown = i i = i+1 lines += "</table>" #make next link if needed if (len(tickets) > lastshown): newstart = lastshown+1 lines += '<a href="/yourtickets/display?start='+str(newstart)+'">'+_("Next")+'</a>' return lines
def hoover(authors=None, check_db_consistency=False, dry_run=False, packet_size=1000, dry_hepnames_run=False, open_tickets=False, queue='Test'): """The actions that hoover performs are the following: 1. Find out the identifiers that belong to the authors(pids) in the database 2. Find and pull all the signatures that have the same identifier as the author to the author 3. Connect the profile of the author with the hepnames collection entry (optional) check the database to see if it is in a consistent state Keyword arguments: authors -- an iterable of authors to be hoovered check_db_consistency -- perform checks for the consistency of the database dry_run -- do not alter the database tables packet_size -- squeeze together the marcxml. This there are fewer bibupload processes for the bibsched to run. dry_hepnames_run -- do not alter the hepnames collection queue -- the name of the queue to be used in the rt system for the tickets """ global rt_ticket_report rt_ticket_report = open_tickets write_message("Packet size {0}".format(packet_size), verbose=1) write_message("Initializing hoover", verbose=1) write_message("Selecting records with identifiers...", verbose=1) recs = get_records_with_tag('100__i') task_sleep_now_if_required(can_stop_too=True) recs += get_records_with_tag('100__j') task_sleep_now_if_required(can_stop_too=True) recs += get_records_with_tag('700__i') task_sleep_now_if_required(can_stop_too=True) recs += get_records_with_tag('700__j') task_sleep_now_if_required(can_stop_too=True) write_message("Found {0} records".format(len(set(recs))), verbose=2) recs = set(recs) & set( run_sql("select DISTINCT(bibrec) from aidPERSONIDPAPERS")) write_message(" out of which {0} are in BibAuthorID".format(len(recs)), verbose=2) task_sleep_now_if_required(can_stop_too=True) records_with_id = set(rec[0] for rec in recs) destroy_partial_marc_caches() populate_partial_marc_caches(records_with_id, create_inverted_dicts=True) if rt_ticket_report: global ticket_hashes write_message("Ticketing system rt is used", verbose=9) write_message("Building hash cache for tickets for queue %s" % queue, verbose=9) ticket_ids = BIBCATALOG_SYSTEM.ticket_search(None, subject='[Hoover]', queue=queue) write_message("Found %s existing tickets" % len(ticket_ids), verbose=9) for ticket_id in ticket_ids: task_sleep_now_if_required(can_stop_too=True) try: ticket_data = BIBCATALOG_SYSTEM.ticket_get_info( None, ticket_id) ticket_hashes[ticket_data['subject'].split() [-1]] = ticket_data, ticket_id, False except IndexError: write_message( "Problem in subject of ticket {0}".format(ticket_id), verbose=5) write_message("Found {0} tickets".format(len(ticket_hashes)), verbose=2) task_sleep_now_if_required(can_stop_too=True) fdict_id_getters = { "INSPIREID": { 'reliable': [ get_inspire_id_of_author, get_inspireID_from_hepnames, lambda pid: get_inspireID_from_claimed_papers( pid, intersection_set=records_with_id, queue=queue) ], 'unreliable': [ lambda pid: get_inspireID_from_unclaimed_papers( pid, intersection_set=records_with_id, queue=queue) ], 'signatures_getter': get_signatures_with_inspireID, 'connection': dict_entry_for_hepnames_connector, 'data_dicts': { 'pid_mapping': defaultdict(set), 'id_mapping': defaultdict(set) } }, "ORCID": { 'reliable': [ # get_orcid_id_of_author, # get_inspireID_from_hepnames, # lambda pid: get_inspireID_from_claimed_papers(pid, # intersection_set=records_with_id)] ], 'unreliable': [ # get_inspireID_from_hepnames, # lambda pid: get_inspireID_from_claimed_papers(pid, # intersection_set=records_with_id)] ], 'signatures_getter': lambda x: list(), 'connection': lambda pid, _id: None, 'data_dicts': { 'pid_mapping': defaultdict(set), 'id_mapping': defaultdict(set) } } } if not authors: authors = get_existing_authors() write_message("Running on {0}".format(len(authors)), verbose=2) unclaimed_authors = defaultdict(set) hep_connector = HepnamesConnector(packet_size=packet_size, dry_hepnames_run=dry_hepnames_run) for index, pid in enumerate(authors): task_sleep_now_if_required(can_stop_too=True) write_message("Searching for reliable ids of person {0}".format(pid), verbose=2) for identifier_type, functions in fdict_id_getters.iteritems(): write_message(" Type: {0}".format(identifier_type, ), verbose=9) try: G = (func(pid) for func in functions['reliable']) if check_db_consistency: results = filter(None, (func for func in G if func)) try: # check if this is reduntant if len(results) == 1: consistent_db = True else: consistent_db = len(set(results)) <= 1 res = results[0] except IndexError: res = None else: if not consistent_db: res = None raise InconsistentIdentifiersException( 'Inconsistent database', pid, identifier_type, set(results)) else: res = next((func for func in G if func), None) except MultipleIdsOnSingleAuthorException as e: open_rt_ticket(e, queue=queue) except BrokenHepNamesRecordException as e: continue except InconsistentIdentifiersException as e: open_rt_ticket(e, queue=queue) except MultipleHepnamesRecordsWithSameIdException as e: open_rt_ticket(e, queue=queue) else: if res: HooverStats.new_ids_found += 1 write_message(" Found reliable id {0}".format(res, ), verbose=9) fdict_id_getters[identifier_type]['data_dicts'][ 'pid_mapping'][pid].add(res) fdict_id_getters[identifier_type]['data_dicts'][ 'id_mapping'][res].add(pid) else: write_message(" No reliable id found", verbose=9) unclaimed_authors[identifier_type].add(pid) write_message("Vacuuming reliable ids...", verbose=2) for identifier_type, data in fdict_id_getters.iteritems(): task_sleep_now_if_required(can_stop_too=True) hep_connector.produce_connection_entry = fdict_id_getters[ identifier_type]['connection'] for pid, identifiers in data['data_dicts']['pid_mapping'].iteritems(): write_message( " Person {0} has reliable identifier(s) {1} ".format( str(pid), str(identifiers)), verbose=9) try: if len(identifiers) == 1: identifier = list(identifiers)[0] write_message( " Considering {0}".format(identifier), verbose=9) if len(data['data_dicts']['id_mapping'][identifier]) == 1: if not dry_run: rowenta = Vacuumer(pid) signatures = data['signatures_getter'](identifier) write_message( " Vacuuming {0} signatures! ".format( str(len(signatures))), verbose=4) for sig in signatures: try: rowenta.vacuum_signature(sig) except DuplicateClaimedPaperException as e: open_rt_ticket(e, queue=queue) except DuplicateUnclaimedPaperException as e: unclaimed_authors[identifier_type].add( e.pid) write_message( " Adding inspireid {0} to pid {1}". format(identifier, pid), verbose=3) add_external_id_to_author(pid, identifier_type, identifier) hep_connector.add_connection(pid, identifier) else: raise MultipleAuthorsWithSameIdException( "More than one authors with the same identifier", data['data_dicts']['id_mapping'][identifier], identifier) else: raise MultipleIdsOnSingleAuthorException( "More than one identifier on a single author ", pid, 'INSPIREID', identifiers) except MultipleAuthorsWithSameIdException as e: open_rt_ticket(e, queue=queue) except MultipleIdsOnSingleAuthorException as e: open_rt_ticket(e, queue=queue) except MultipleHepnamesRecordsWithSameIdException as e: open_rt_ticket(e, queue=queue) write_message(" Done with {0}".format(pid, ), verbose=3) write_message("Vacuuming unreliable ids...", verbose=2) for identifier_type, functions in fdict_id_getters.iteritems(): task_sleep_now_if_required(can_stop_too=True) hep_connector.produce_connection_entry = fdict_id_getters[ identifier_type]['connection'] for index, pid in enumerate(unclaimed_authors[identifier_type]): write_message( "Searching for unreliable ids of person {0}".format(pid), verbose=9) try: G = (func(pid) for func in functions['unreliable']) res = next((func for func in G if func), None) if res is None: continue except MultipleIdsOnSingleAuthorException as e: continue except BrokenHepNamesRecordException as e: continue except MultipleHepnamesRecordsWithSameIdException as e: open_rt_ticket(e, queue=queue) HooverStats.new_ids_found += 1 write_message( " Person {0} has unreliable identifier {1} ".format( str(pid), str(res)), verbose=9) if res in fdict_id_getters[identifier_type]['data_dicts'][ 'id_mapping']: write_message( " Id {0} is already assigned to another person, skipping person {1} " .format(str(res), pid)) continue if not dry_run: rowenta = Vacuumer(pid) signatures = functions['signatures_getter'](res) for sig in signatures: try: rowenta.vacuum_signature(sig) except DuplicateClaimedPaperException as e: open_rt_ticket(e, queue=queue) except DuplicateUnclaimedPaperException as e: pass write_message(" Adding inspireid {0} to pid {1}".format( res, pid), verbose=3) add_external_id_to_author(pid, identifier_type, res) hep_connector.add_connection(pid, res) write_message(" Done with {0}".format(pid), verbose=3) hep_connector.execute_connection() for ticket in ticket_hashes: if ticket[2] == False: BIBCATALOG_SYSTEM.ticket_set_attribute(None, ticket[1], 'status', 'resolved') HooverStats.report_results() write_message("Terminating hoover", verbose=1)