Exemple #1
0
def get_new_ticket_RT_info(uid, recId):
    response = {}
    response['resultCode'] = 0
    if BIBCATALOG_SYSTEM is None:
        response['description'] = "<!--No ticket system configured-->"
    elif BIBCATALOG_SYSTEM and uid:
        bibcat_resp = BIBCATALOG_SYSTEM.check_system(uid)
        if bibcat_resp == "":
            # add available owners
            users = []
            users_list = list_registered_users()
            for user_tuple in users_list:
                try:
                    user = {'username': get_user_preferences(user_tuple[0])['bibcatalog_username'],
                        'id': user_tuple[0]}
                except KeyError:
                    continue
                users.append(user)
            response['users'] = users
            # add available queues
            response['queues'] = BIBCATALOG_SYSTEM.get_queues(uid)
            # add user email
            response['email'] = get_email(uid)
            # TODO try catch
            response['ticketTemplates'] = load_ticket_templates(recId)
            response['resultCode'] = 1
        else:
            # put something in the tickets container, for debug
            response['description'] = "Error connecting to RT<!--" + bibcat_resp + "-->"
    return response
Exemple #2
0
def check_records(records, doi_field="0247_a", extra_subfields=(("2", "DOI"), ("9", "bibcheck")), create_ticket=False):
    """
    Find the DOI for the records using crossref and add it to the specified
    field.

    This plugin won't ask for the DOI if it's already set.
    """
    records_to_check = {}
    for record in records:
        has_doi = False
        for position, value in record.iterfield("0247_2"):
            if value.lower() == "doi":
                has_doi = True
                break
        if not has_doi:
            records_to_check[record.record_id] = record

    dois = get_doi_for_records(records_to_check.values())
    for record_id, doi in dois.iteritems():
        record = records_to_check[record_id]
        dup_doi_recid = find_record_from_doi(doi)
        if dup_doi_recid:
            record.warn("DOI %s to be added to record %s already exists in record/s %s" % (doi, record_id, dup_doi_recid))
            if create_ticket:
                subject = "DOI conflict record #%s" % str(record_id)
                res = BIBCATALOG_SYSTEM.ticket_submit(
                    subject=subject,
                    recordid=record_id,
                    text=subject,
                    queue="Bibcheck"
                )
                if res > 0:
                    msg = """
                        DOI %s to be added to record %s already exists in record/s %s

                        Record with conflict: %s
                        Record with original DOI: %s

                        Merge both records: %s
                    """
                    dup_doi_recid = int(dup_doi_recid)
                    record_id = int(record_id)
                    msg = msg % (
                        doi,
                        record_id,
                        dup_doi_recid,
                        "%s/record/%s" % (CFG_SITE_URL, record_id),
                        "%s/record/%s" % (CFG_SITE_URL, dup_doi_recid),
                        "%s/record/merge/?#recid1=%s&recid2=%s" % (CFG_SITE_URL, min(dup_doi_recid, record_id), max(dup_doi_recid, record_id))
                    )
                    if isinstance(msg, unicode):
                        msg = msg.encode("utf-8")
                    BIBCATALOG_SYSTEM.ticket_comment(None, res, msg)
            continue
        subfields = [(doi_field[5], doi.encode("utf-8"))] + map(tuple, extra_subfields)
        record_add_field(record, tag=doi_field[:3], ind1=doi_field[3],
                ind2=doi_field[4], subfields=subfields)
        record.set_amended("Added DOI in field %s" % doi_field)
Exemple #3
0
    def tmpl_your_tickets(self, uid, ln=CFG_SITE_LANG, start=1):
        """ make a pretty html body of tickets that belong to the user given as param """
        ln = wash_language(ln)
        _ = gettext_set_language(ln)
        if BIBCATALOG_SYSTEM is None:
            return _("Error: No BibCatalog system configured.")
        #errors? tell what happened and get out
        bibcat_probs = BIBCATALOG_SYSTEM.check_system(uid)
        if bibcat_probs:
            return _("Error") + " " + bibcat_probs

        tickets = BIBCATALOG_SYSTEM.ticket_search(uid,
                                                  owner=uid)  # get ticket id's
        lines = ""  # put result here
        i = 1

        lines += (_("You have %i tickets.") % len(tickets)) + "<br/>"

        #make a prev link if needed
        if (start > 1):
            newstart = start - self.SHOW_MAX_TICKETS
            if (newstart < 1):
                newstart = 1
            lines += '<a href="/yourtickets/display?start=' + str(
                newstart) + '">' + _("Previous") + '</a>'
        lines += """<table border="1">"""
        lastshown = len(
            tickets)  # what was the number of the last shown ticket?
        for ticket in tickets:
            #get info and show only for those that within the show range
            if (i >= start) and (i < start + self.SHOW_MAX_TICKETS):
                ticket_info = BIBCATALOG_SYSTEM.ticket_get_info(uid, ticket)
                subject = ticket_info['subject']
                status = ticket_info['status']
                text = ""
                if 'text' in ticket_info:
                    text = ticket_info['text']
                display = '<a href="' + ticket_info['url_display'] + '">' + _(
                    "show") + '</a>'
                close = '<a href="' + ticket_info['url_close'] + '">' + _(
                    "close") + '</a>'
                lines += "<tr><td>" + str(
                    ticket
                ) + "</td><td>" + subject + " " + text + "</td><td>" + status + "</td><td>" + display + "</td><td>" + close + "</td></tr>\n"
                lastshown = i
            i = i + 1
        lines += "</table>"

        #make next link if needed
        if (len(tickets) > lastshown):
            newstart = lastshown + 1
            lines += '<a href="/yourtickets/display?start=' + str(
                newstart) + '">' + _("Next") + '</a>'
        return lines
Exemple #4
0
def submit_ticket(msg, subject, record_id, queue="Test", email=""):
    """
    Submit a ticket to RT with the given subject and body
    """
    if isinstance(msg, unicode):
        msg = msg.encode("utf-8")

    res = BIBCATALOG_SYSTEM.ticket_submit(subject=subject,
                                          recordid=record_id,
                                          text=subject,
                                          queue=queue,
                                          requestor=email)
    if res > 0:
        BIBCATALOG_SYSTEM.ticket_comment(None, res, msg)
Exemple #5
0
    def submit_ticket(msg_subject, msg, record_id):
        """Submit a single ticket."""
        if isinstance(msg, unicode):
            msg = msg.encode("utf-8")

        submit = functools.partial(BIBCATALOG_SYSTEM.ticket_submit,
                                   subject=msg_subject,
                                   text=msg,
                                   queue=task_get_option("queue", "Bibcheck"))
        if record_id is not None:
            submit = functools.partial(submit, recordid=record_id)
        res = submit()
        write_message("Bibcatalog returned %s" % res)
        if res > 0:
            BIBCATALOG_SYSTEM.ticket_comment(None, res, msg)
Exemple #6
0
def submit_ticket(msg, subject, record_id, queue="Test", email=""):
    """
    Submit a ticket to RT with the given subject and body
    """
    if isinstance(msg, unicode):
        msg = msg.encode("utf-8")

    res = BIBCATALOG_SYSTEM.ticket_submit(
        subject=subject,
        recordid=record_id,
        text=subject,
        queue=queue,
        requestor=email
    )
    if res:
        BIBCATALOG_SYSTEM.ticket_comment(None, res, msg)
Exemple #7
0
def open_rt_ticket(e, debug_log=False, queue='Test'):
    """Take an exception e and, if allowed by the configuration,
    open a ticket for that exception.

    Arguments:
    e -- the exception to be reported
    """
    global ticket_hashes
    ticket_hash = e.hash()
    subject = e.get_message_subject() + ' ' + ticket_hash
    body = e.get_message_body()
    if debug_log:
        debug = "\n Debugging information: \n" + e.__repr__() + '\n' + \
            '\n'.join([
                str(key) + " " +
                str(value) for key, value in vars(e).iteritems()])
    else:
        debug = ''
    if rt_ticket_report:
        if ticket_hash not in ticket_hashes.iterkeys():
            ticket_id = BIBCATALOG_SYSTEM.ticket_submit(uid=None,
                                                        subject=subject,
                                                        recordid=e.recid,
                                                        text=body + debug,
                                                        queue=queue,
                                                        priority="",
                                                        owner="",
                                                        requestor="")
            HooverStats.tickets_raised += 1
            ticket_data = BIBCATALOG_SYSTEM.ticket_get_info(None, ticket_id)
            ticket_hashes[ticket_hash] = ticket_data, ticket_id, True
        else:
            ticket_hashes[ticket_hash] = ticket_hashes[ticket_hash][:2] + \
                (True,)
            # If the ticket is already there check its status.  In case it is
            # marked as somehow solved -- i.e. resolved, deleted or rejected --
            # reopen it.
            if ticket_hashes[ticket_hash][0]['status'] in [
                    'resolved', 'deleted', 'rejected'
            ]:
                BIBCATALOG_SYSTEM.ticket_set_attribute(
                    None, ticket_hashes[ticket_hash][1], 'status', 'open')
                HooverStats.tickets_raised += 1
    else:
        write_message('sub: ' + subject + '\nbody:\n' + body + '\ndbg:\n' +
                      debug,
                      verbose=9)
    def tmpl_your_tickets(self, uid, ln=CFG_SITE_LANG, start=1):
        """ make a pretty html body of tickets that belong to the user given as param """
        ln = wash_language(ln)
        _ = gettext_set_language(ln)
        if BIBCATALOG_SYSTEM is None:
            return _("Error: No BibCatalog system configured.")
        #errors? tell what happened and get out
        bibcat_probs = BIBCATALOG_SYSTEM.check_system(uid)
        if bibcat_probs:
            return _("Error")+" "+bibcat_probs

        tickets = BIBCATALOG_SYSTEM.ticket_search(uid, owner=uid) # get ticket id's
        lines = "" # put result here
        i = 1

        lines += (_("You have %i tickets.") % len(tickets)) + "<br/>"

        #make a prev link if needed
        if (start > 1):
            newstart = start - self.SHOW_MAX_TICKETS
            if (newstart < 1):
                newstart = 1
            lines += '<a href="/yourtickets/display?start='+str(newstart)+'">'+_("Previous")+'</a>'
        lines += """<table border="1">"""
        lastshown = len(tickets) # what was the number of the last shown ticket?
        for ticket in tickets:
            #get info and show only for those that within the show range
            if (i >= start) and (i < start+self.SHOW_MAX_TICKETS):
                ticket_info = BIBCATALOG_SYSTEM.ticket_get_info(uid, ticket)
                subject = ticket_info['subject']
                status = ticket_info['status']
                text = ""
                if 'text' in ticket_info:
                    text = ticket_info['text']
                display = '<a href="'+ticket_info['url_display']+'">'+_("show")+'</a>'
                close = '<a href="'+ticket_info['url_close']+'">'+_("close")+'</a>'
                lines += "<tr><td>"+str(ticket)+"</td><td>"+subject+" "+text+"</td><td>"+status+"</td><td>"+display+"</td><td>"+close+"</td></tr>\n"
                lastshown = i
            i = i+1
        lines += "</table>"

        #make next link if needed
        if (len(tickets) > lastshown):
            newstart = lastshown+1
            lines += '<a href="/yourtickets/display?start='+str(newstart)+'">'+_("Next")+'</a>'
        return lines
Exemple #9
0
    def exists(self):
        """
        Does the ticket already exist in the RT system?

        @return results: Evaluates to True if it exists, False if not.
        """
        results = BIBCATALOG_SYSTEM.ticket_search(None,
                                                  recordid=self.recid,
                                                  queue=self.queue,
                                                  subject=self.subject)
        return results
    def exists(self):
        """
        Does the ticket already exist in the RT system?

        @return results: Evaluates to True if it exists, False if not.
        """
        results = BIBCATALOG_SYSTEM.ticket_search(None,
                                                  recordid=self.recid,
                                                  queue=self.queue,
                                                  subject=self.subject)
        return results
Exemple #11
0
def submit_ticket(record, record_id):
    """ Submit the errors to bibcatalog """

    if task_get_option("no_tickets", False):
        return

    msg = """
Bibcheck found some problems with the record with id %s:

Errors:
%s

Amendments:
%s

Warnings:
%s

Edit this record: %s
"""
    msg = msg % (
        record_id,
        "\n".join(record.errors),
        "\n".join(record.amendments),
        "\n".join(record.warnings),
        "%s/record/%s/edit" % (CFG_SITE_URL, record_id),
    )
    if isinstance(msg, unicode):
        msg = msg.encode("utf-8")

    subject = "Bibcheck rule failed in record %s" % record_id

    ticket_id = BIBCATALOG_SYSTEM.ticket_submit(
        subject=subject,
        recordid=record_id,
        text=subject,
        queue=task_get_option("queue", "Bibcheck")
    )
    write_message("Bibcatalog returned %s" % ticket_id)
    if ticket_id:
        BIBCATALOG_SYSTEM.ticket_comment(None, ticket_id, msg)
Exemple #12
0
def submit_ticket(record, record_id):
    """ Submit the errors to bibcatalog """

    if task_get_option("no_tickets", False):
        return

    msg = """
Bibcheck found some problems with the record with id %s:

Errors:
%s

Amendments:
%s

Warnings:
%s

Edit this record: %s
"""
    msg = msg % (
        record_id,
        "\n".join(record.errors),
        "\n".join(record.amendments),
        "\n".join(record.warnings),
        "%s/record/%s/edit" % (CFG_SITE_URL, record_id),
    )
    if isinstance(msg, unicode):
        msg = msg.encode("utf-8")

    subject = "Bibcheck rule failed in record %s" % record_id

    ticket_id = BIBCATALOG_SYSTEM.ticket_submit(subject=subject,
                                                recordid=record_id,
                                                text=subject,
                                                queue=task_get_option(
                                                    "queue", "Bibcheck"))
    write_message("Bibcatalog returned %s" % ticket_id)
    if ticket_id:
        BIBCATALOG_SYSTEM.ticket_comment(None, ticket_id, msg)
def get_new_ticket_RT_info(uid, recId):
    response = {}
    response['resultCode'] = 0
    if BIBCATALOG_SYSTEM is None:
        response['description'] = "<!--No ticket system configured-->"
    elif BIBCATALOG_SYSTEM and uid:
        bibcat_resp = BIBCATALOG_SYSTEM.check_system(uid)
        if bibcat_resp == "":
            # add available owners
            users = []
            users_list = list_registered_users()
            for user_tuple in users_list:
                try:
                    user = {
                        'username':
                        get_user_preferences(
                            user_tuple[0])['bibcatalog_username'],
                        'id':
                        user_tuple[0]
                    }
                except KeyError:
                    continue
                users.append(user)
            response['users'] = users
            # add available queues
            response['queues'] = BIBCATALOG_SYSTEM.get_queues(uid)
            # add user email
            response['email'] = get_email(uid)
            # TODO try catch
            response['ticketTemplates'] = load_ticket_templates(recId)
            response['resultCode'] = 1
        else:
            # put something in the tickets container, for debug
            response[
                'description'] = "Error connecting to RT<!--" + bibcat_resp + "-->"
    return response
Exemple #14
0
    def submit(self):
        """
        Submits the ticket using BibCatalog API.

        @raise Exception: if ticket creation is not successful.
        @return bool: True if created, False if not.
        """
        if not self.exists():
            self.ticketid = BIBCATALOG_SYSTEM.ticket_submit(
                subject=self.subject,
                queue=self.queue,
                text=self.body,
                recordid=self.recid)
            return True
        return False
    def submit(self):
        """
        Submits the ticket using BibCatalog API.

        @raise Exception: if ticket creation is not successful.
        @return bool: True if created, False if not.
        """
        if not self.exists():
            self.ticketid = BIBCATALOG_SYSTEM.ticket_submit(
                                                  subject=self.subject,
                                                  queue=self.queue,
                                                  text=self.body,
                                                  recordid=self.recid)
            return True
        return False
Exemple #16
0
def task_check_options():
    """ Reimplement this method for having the possibility to check options
    before submitting the task, in order for example to provide default
    values. It must return False if there are errors in the options.
    """
    if not task_get_option('new') \
            and not task_get_option('modified') \
            and not task_get_option('recids') \
            and not task_get_option('collections')\
            and not task_get_option('reportnumbers'):
        print >>sys.stderr, 'Error: No records specified, you need' \
            ' to specify which records to run on'
        return False

    ticket_plugins = {}
    all_plugins, error_messages = load_ticket_plugins()

    if error_messages:
        # We got broken plugins. We alert only for now.
        print >> sys.stderr, "\n".join(error_messages)

    if task_get_option('tickets'):
        # Tickets specified
        for ticket in task_get_option('tickets'):
            if ticket not in all_plugins.get_enabled_plugins():
                print ticket
                print >> sys.stderr, 'Error: plugin %s is broken or does not exist'
                return False
            ticket_plugins[ticket] = all_plugins[ticket]
    elif task_get_option('all-tickets'):
        ticket_plugins = all_plugins.get_enabled_plugins()
    else:
        print >>sys.stderr, 'Error: No tickets specified, you need' \
            ' to specify at least one ticket type to create'
        return False

    task_set_option('tickets', ticket_plugins)

    if not BIBCATALOG_SYSTEM:
        print >> sys.stderr, 'Error: no cataloging system defined'
        return False

    res = BIBCATALOG_SYSTEM.check_system()
    if res:
        print >>sys.stderr, 'Error while checking cataloging system: %s' % \
            (res,)
    return True
def task_check_options():
    """ Reimplement this method for having the possibility to check options
    before submitting the task, in order for example to provide default
    values. It must return False if there are errors in the options.
    """
    if not task_get_option('new') \
            and not task_get_option('modified') \
            and not task_get_option('recids') \
            and not task_get_option('collections')\
            and not task_get_option('reportnumbers'):
        print >>sys.stderr, 'Error: No records specified, you need' \
            ' to specify which records to run on'
        return False

    ticket_plugins = {}
    all_plugins, error_messages = load_ticket_plugins()

    if error_messages:
        # We got broken plugins. We alert only for now.
        print >>sys.stderr, "\n".join(error_messages)

    if task_get_option('tickets'):
        # Tickets specified
        for ticket in task_get_option('tickets'):
            if ticket not in all_plugins.get_enabled_plugins():
                print ticket
                print >>sys.stderr, 'Error: plugin %s is broken or does not exist'
                return False
            ticket_plugins[ticket] = all_plugins[ticket]
    elif task_get_option('all-tickets'):
        ticket_plugins = all_plugins.get_enabled_plugins()
    else:
        print >>sys.stderr, 'Error: No tickets specified, you need' \
            ' to specify at least one ticket type to create'
        return False

    task_set_option('tickets', ticket_plugins)

    if not BIBCATALOG_SYSTEM:
        print >>sys.stderr, 'Error: no cataloging system defined'
        return False

    res = BIBCATALOG_SYSTEM.check_system()
    if res:
        print >>sys.stderr, 'Error while checking cataloging system: %s' % \
            (res,)
    return True
Exemple #18
0
def hoover(authors=None,
           check_db_consistency=False,
           dry_run=False,
           packet_size=1000,
           dry_hepnames_run=False,
           open_tickets=False,
           queue='Test'):
    """The actions that hoover performs are the following:
    1. Find out the identifiers that belong to the authors(pids) in the database
    2. Find and pull all the signatures that have the same identifier as the author to the author
    3. Connect the profile of the author with the hepnames collection entry
    (optional) check the database to see if it is in a consistent state

    Keyword arguments:
    authors -- an iterable of authors to be hoovered
    check_db_consistency -- perform checks for the consistency of the database
    dry_run -- do not alter the database tables
    packet_size -- squeeze together the marcxml. This there are fewer bibupload
                   processes for the bibsched to run.
    dry_hepnames_run -- do not alter the hepnames collection
    queue -- the name of the queue to be used in the rt system for the tickets
    """
    global rt_ticket_report
    rt_ticket_report = open_tickets
    write_message("Packet size {0}".format(packet_size), verbose=1)
    write_message("Initializing hoover", verbose=1)
    write_message("Selecting records with identifiers...", verbose=1)
    recs = get_records_with_tag('100__i')
    task_sleep_now_if_required(can_stop_too=True)
    recs += get_records_with_tag('100__j')
    task_sleep_now_if_required(can_stop_too=True)
    recs += get_records_with_tag('700__i')
    task_sleep_now_if_required(can_stop_too=True)
    recs += get_records_with_tag('700__j')
    task_sleep_now_if_required(can_stop_too=True)
    write_message("Found {0} records".format(len(set(recs))), verbose=2)
    recs = set(recs) & set(
        run_sql("select DISTINCT(bibrec) from aidPERSONIDPAPERS"))
    write_message("   out of which {0} are in BibAuthorID".format(len(recs)),
                  verbose=2)
    task_sleep_now_if_required(can_stop_too=True)

    records_with_id = set(rec[0] for rec in recs)

    destroy_partial_marc_caches()
    populate_partial_marc_caches(records_with_id, create_inverted_dicts=True)

    if rt_ticket_report:
        global ticket_hashes
        write_message("Ticketing system rt is used", verbose=9)
        write_message("Building hash cache for tickets for queue %s" % queue,
                      verbose=9)
        ticket_ids = BIBCATALOG_SYSTEM.ticket_search(None,
                                                     subject='[Hoover]',
                                                     queue=queue)
        write_message("Found %s existing tickets" % len(ticket_ids), verbose=9)
        for ticket_id in ticket_ids:
            task_sleep_now_if_required(can_stop_too=True)
            try:
                ticket_data = BIBCATALOG_SYSTEM.ticket_get_info(
                    None, ticket_id)
                ticket_hashes[ticket_data['subject'].split()
                              [-1]] = ticket_data, ticket_id, False
            except IndexError:
                write_message(
                    "Problem in subject of ticket {0}".format(ticket_id),
                    verbose=5)
        write_message("Found {0} tickets".format(len(ticket_hashes)),
                      verbose=2)

    task_sleep_now_if_required(can_stop_too=True)
    fdict_id_getters = {
        "INSPIREID": {
            'reliable': [
                get_inspire_id_of_author, get_inspireID_from_hepnames,
                lambda pid: get_inspireID_from_claimed_papers(
                    pid, intersection_set=records_with_id, queue=queue)
            ],
            'unreliable': [
                lambda pid: get_inspireID_from_unclaimed_papers(
                    pid, intersection_set=records_with_id, queue=queue)
            ],
            'signatures_getter':
            get_signatures_with_inspireID,
            'connection':
            dict_entry_for_hepnames_connector,
            'data_dicts': {
                'pid_mapping': defaultdict(set),
                'id_mapping': defaultdict(set)
            }
        },
        "ORCID": {
            'reliable': [  # get_orcid_id_of_author,
                # get_inspireID_from_hepnames,
                # lambda pid: get_inspireID_from_claimed_papers(pid,
                # intersection_set=records_with_id)]
            ],
            'unreliable': [
                # get_inspireID_from_hepnames,
                # lambda pid: get_inspireID_from_claimed_papers(pid,
                # intersection_set=records_with_id)]
            ],
            'signatures_getter': lambda x: list(),
            'connection': lambda pid, _id: None,
            'data_dicts': {
                'pid_mapping': defaultdict(set),
                'id_mapping': defaultdict(set)
            }
        }
    }

    if not authors:
        authors = get_existing_authors()

    write_message("Running on {0}".format(len(authors)), verbose=2)

    unclaimed_authors = defaultdict(set)
    hep_connector = HepnamesConnector(packet_size=packet_size,
                                      dry_hepnames_run=dry_hepnames_run)

    for index, pid in enumerate(authors):
        task_sleep_now_if_required(can_stop_too=True)
        write_message("Searching for reliable ids of person {0}".format(pid),
                      verbose=2)
        for identifier_type, functions in fdict_id_getters.iteritems():
            write_message("    Type: {0}".format(identifier_type, ), verbose=9)

            try:
                G = (func(pid) for func in functions['reliable'])
                if check_db_consistency:
                    results = filter(None, (func for func in G if func))
                    try:
                        # check if this is reduntant
                        if len(results) == 1:
                            consistent_db = True
                        else:
                            consistent_db = len(set(results)) <= 1
                        res = results[0]
                    except IndexError:
                        res = None
                    else:
                        if not consistent_db:
                            res = None
                            raise InconsistentIdentifiersException(
                                'Inconsistent database', pid, identifier_type,
                                set(results))
                else:
                    res = next((func for func in G if func), None)
            except MultipleIdsOnSingleAuthorException as e:
                open_rt_ticket(e, queue=queue)
            except BrokenHepNamesRecordException as e:
                continue
            except InconsistentIdentifiersException as e:
                open_rt_ticket(e, queue=queue)
            except MultipleHepnamesRecordsWithSameIdException as e:
                open_rt_ticket(e, queue=queue)
            else:
                if res:
                    HooverStats.new_ids_found += 1
                    write_message("   Found reliable id {0}".format(res, ),
                                  verbose=9)
                    fdict_id_getters[identifier_type]['data_dicts'][
                        'pid_mapping'][pid].add(res)
                    fdict_id_getters[identifier_type]['data_dicts'][
                        'id_mapping'][res].add(pid)
                else:
                    write_message("   No reliable id found", verbose=9)
                    unclaimed_authors[identifier_type].add(pid)

    write_message("Vacuuming reliable ids...", verbose=2)

    for identifier_type, data in fdict_id_getters.iteritems():
        task_sleep_now_if_required(can_stop_too=True)
        hep_connector.produce_connection_entry = fdict_id_getters[
            identifier_type]['connection']
        for pid, identifiers in data['data_dicts']['pid_mapping'].iteritems():
            write_message(
                "   Person {0} has reliable identifier(s) {1} ".format(
                    str(pid), str(identifiers)),
                verbose=9)
            try:
                if len(identifiers) == 1:
                    identifier = list(identifiers)[0]
                    write_message(
                        "        Considering  {0}".format(identifier),
                        verbose=9)

                    if len(data['data_dicts']['id_mapping'][identifier]) == 1:
                        if not dry_run:
                            rowenta = Vacuumer(pid)
                            signatures = data['signatures_getter'](identifier)
                            write_message(
                                "        Vacuuming {0} signatures! ".format(
                                    str(len(signatures))),
                                verbose=4)
                            for sig in signatures:
                                try:
                                    rowenta.vacuum_signature(sig)
                                except DuplicateClaimedPaperException as e:
                                    open_rt_ticket(e, queue=queue)
                                except DuplicateUnclaimedPaperException as e:
                                    unclaimed_authors[identifier_type].add(
                                        e.pid)
                            write_message(
                                "        Adding inspireid {0} to pid {1}".
                                format(identifier, pid),
                                verbose=3)
                            add_external_id_to_author(pid, identifier_type,
                                                      identifier)
                            hep_connector.add_connection(pid, identifier)

                    else:
                        raise MultipleAuthorsWithSameIdException(
                            "More than one authors with the same identifier",
                            data['data_dicts']['id_mapping'][identifier],
                            identifier)
                else:
                    raise MultipleIdsOnSingleAuthorException(
                        "More than one identifier on a single author ", pid,
                        'INSPIREID', identifiers)

            except MultipleAuthorsWithSameIdException as e:
                open_rt_ticket(e, queue=queue)
            except MultipleIdsOnSingleAuthorException as e:
                open_rt_ticket(e, queue=queue)
            except MultipleHepnamesRecordsWithSameIdException as e:
                open_rt_ticket(e, queue=queue)
            write_message("   Done with {0}".format(pid, ), verbose=3)

    write_message("Vacuuming unreliable ids...", verbose=2)

    for identifier_type, functions in fdict_id_getters.iteritems():
        task_sleep_now_if_required(can_stop_too=True)
        hep_connector.produce_connection_entry = fdict_id_getters[
            identifier_type]['connection']
        for index, pid in enumerate(unclaimed_authors[identifier_type]):
            write_message(
                "Searching for unreliable ids of person {0}".format(pid),
                verbose=9)
            try:
                G = (func(pid) for func in functions['unreliable'])
                res = next((func for func in G if func), None)
                if res is None:
                    continue
            except MultipleIdsOnSingleAuthorException as e:
                continue
            except BrokenHepNamesRecordException as e:
                continue
            except MultipleHepnamesRecordsWithSameIdException as e:
                open_rt_ticket(e, queue=queue)

            HooverStats.new_ids_found += 1
            write_message(
                "   Person {0} has unreliable identifier {1} ".format(
                    str(pid), str(res)),
                verbose=9)

            if res in fdict_id_getters[identifier_type]['data_dicts'][
                    'id_mapping']:
                write_message(
                    "        Id {0} is already assigned to another person, skipping person {1} "
                    .format(str(res), pid))
                continue

            if not dry_run:
                rowenta = Vacuumer(pid)
                signatures = functions['signatures_getter'](res)
                for sig in signatures:
                    try:
                        rowenta.vacuum_signature(sig)
                    except DuplicateClaimedPaperException as e:
                        open_rt_ticket(e, queue=queue)
                    except DuplicateUnclaimedPaperException as e:
                        pass

                write_message("     Adding inspireid {0} to pid {1}".format(
                    res, pid),
                              verbose=3)
                add_external_id_to_author(pid, identifier_type, res)
                hep_connector.add_connection(pid, res)
            write_message("   Done with {0}".format(pid), verbose=3)
    hep_connector.execute_connection()
    for ticket in ticket_hashes:
        if ticket[2] == False:
            BIBCATALOG_SYSTEM.ticket_set_attribute(None, ticket[1], 'status',
                                                   'resolved')

    HooverStats.report_results()
    write_message("Terminating hoover", verbose=1)
Exemple #19
0
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

# pylint: disable=C0103
"""BibEdit Templates."""

__revision__ = "$Id$"

from invenio.config import CFG_SITE_URL, CFG_INSPIRE_SITE
from invenio.messages import gettext_set_language

from invenio.bibcatalog import BIBCATALOG_SYSTEM

try:
    BIBCATALOG_SYSTEM.ticket_search(0)
    CFG_CAN_SEARCH_FOR_TICKET = True
except NotImplementedError:
    CFG_CAN_SEARCH_FOR_TICKET = False


class Template:
    """BibEdit Templates Class."""
    def __init__(self):
        """Initialize."""
        pass

    def menu(self):
        """Create the menu."""

        recordmenu = '<div class="bibEditMenuSectionHeader">\n' \
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

# pylint: disable=C0103

"""BibEdit Templates."""

__revision__ = "$Id$"

from invenio.config import CFG_SITE_URL, CFG_INSPIRE_SITE
from invenio.messages import gettext_set_language

from invenio.bibcatalog import BIBCATALOG_SYSTEM

try:
    BIBCATALOG_SYSTEM.ticket_search(0)
    CFG_CAN_SEARCH_FOR_TICKET = True
except NotImplementedError:
    CFG_CAN_SEARCH_FOR_TICKET = False

class Template:

    """BibEdit Templates Class."""

    def __init__(self):
        """Initialize."""
        pass

    def menu(self):
        """Create the menu."""
Exemple #21
0
def check_records(records,
                  doi_field="0247_a",
                  extra_subfields=(("2", "DOI"), ("9", "bibcheck")),
                  create_ticket=False):
    """
    Find the DOI for the records using crossref and add it to the specified
    field.

    This plugin won't ask for the DOI if it's already set.
    """
    records_to_check = {}
    for record in records:
        check_record = True
        for position, value in record.iterfield("0247_2"):
            if value.lower() == "doi":
                check_record = False
                break
        # Do not consider records in the proceedings collection
        for position, value in record.iterfield("980__a"):
            if value.lower() == "proceedings":
                check_record = False
                break
        if check_record:
            records_to_check[record.record_id] = record

    dois = get_doi_for_records(records_to_check.values())
    for record_id, doi in dois.iteritems():
        record = records_to_check[record_id]
        dup_doi_recid = find_record_from_doi(doi)
        if dup_doi_recid:
            record.warn(
                "DOI %s to be added to record %s already exists in record/s %s"
                % (doi, record_id, dup_doi_recid))
            if create_ticket:
                subject = "DOI conflict record #%s" % str(record_id)
                res = BIBCATALOG_SYSTEM.ticket_submit(subject=subject,
                                                      recordid=record_id,
                                                      text=subject,
                                                      queue="Bibcheck")
                if res:
                    msg = """
                        DOI %s to be added to record %s already exists in record/s %s

                        Record with conflict: %s
                        Record with original DOI: %s

                        Merge both records: %s
                    """
                    dup_doi_recid = int(dup_doi_recid)
                    record_id = int(record_id)
                    msg = msg % (doi, record_id, dup_doi_recid,
                                 "%s/record/%s" %
                                 (CFG_SITE_URL, record_id), "%s/record/%s" %
                                 (CFG_SITE_URL, dup_doi_recid),
                                 "%s/record/merge/?#recid1=%s&recid2=%s" %
                                 (CFG_SITE_URL, min(dup_doi_recid, record_id),
                                  max(dup_doi_recid, record_id)))
                    if isinstance(msg, unicode):
                        msg = msg.encode("utf-8")
                    BIBCATALOG_SYSTEM.ticket_comment(None, res, msg)
            continue
        subfields = [(doi_field[5], doi.encode("utf-8"))] + map(
            tuple, extra_subfields)
        record_add_field(record,
                         tag=doi_field[:3],
                         ind1=doi_field[3],
                         ind2=doi_field[4],
                         subfields=subfields)
        record.set_amended("Added DOI in field %s" % doi_field)