def check_paper(self):
            if bconfig.TABLES_UTILS_DEBUG:
                print " -> processing paper = %s" % (self.paper[0],)

            bibrefs100 = dbinter.get_authors_from_paper(self.paper[0])
            bibrefs700 = dbinter.get_coauthors_from_paper(self.paper[0])
            bibrecreflist = frozenset(
                ["100:%s,%s" % (str(i[0]), self.paper[0]) for i in bibrefs100]
                + ["700:%s,%s" % (str(i[0]), self.paper[0]) for i in bibrefs700]
            )
            pid_rows_lazy = None

            # finally, if a bibrec/ref pair is in the authornames table but not in this list that name of that paper
            # is no longer existing and must be removed from the table. The new one will be addedd by the
            # update procedure in future; this entry will be risky becouse the garbage collector may
            # decide to kill the bibref in the bibX0x table
            for row in self.paper[1]:
                if row[3] not in bibrecreflist:
                    if not pid_rows_lazy:
                        pid_rows_lazy = dbinter.collect_personid_papers(paper=(self.paper[0],), person=personid_q)

                    other_bibrefs = [b[0] for b in pid_rows_lazy if b[1] == row[1] and b[3] != row[3]]
                    dbinter.delete_personid_by_id(int(row[0]))
                    if bconfig.TABLES_UTILS_DEBUG:
                        print "*   deleting record with missing bibref: \
                               id = %s, personid = %s, tag = %s, data = %s, flag = %s, lcul = %s" % row
                        print "found %d other records with the same personid and bibrec" % len(other_bibrefs)
                    if len(other_bibrefs) == 1:
                        # we have one and only one sobstitute, we can switch them!
                        dbinter.update_flags_in_personid(row[4], row[5], other_bibrefs[0])
                        if bconfig.TABLES_UTILS_DEBUG:
                            print "updating id=%d with flag=%d,lcul=%d" % (other_bibrefs[0], row[4], row[5])

            persons_to_update = set([(p[1],) for p in self.paper[1]])
            dbinter.update_personID_canonical_names(persons_to_update)
            dbinter.update_personID_names_string_set(persons_to_update, single_threaded=True, wait_finished=True)
            close_connection()
def personid_remove_automatically_assigned_papers(pids=None):
    '''
    Part of the person repair facility.
    Removes every person entity that has no prior human interaction.
    Will run on all person entities if pids == None
    @param pids: List of tuples of person IDs
    @type pids: list of tuples
    '''
    if not pids:
        pids = get_all_person_ids()

    for pid in pids:
        tickets = get_person_rt_tickets(pid[0])
        pclaims = get_person_claimed_papers(pid[0])
        nclaims = get_person_rejected_papers(pid[0])

        if len(tickets) > 0 and len(pclaims) == 0 and len(nclaims) == 0:
            continue
        elif len(tickets) == 0 and len(pclaims) == 0 and len(nclaims) == 0:
            delete_personid_by_id(pid[0])
        elif len(pclaims) > 0:
            del_person_not_manually_claimed_papers(pid)
        elif len(nclaims) > 0:
            continue
        def check_paper(self):
            if bconfig.TABLES_UTILS_DEBUG:
                print " -> processing paper = %s" % (self.paper[0],)

            bibrefs100 = dbinter.get_authors_from_paper(self.paper[0])
            bibrefs700 = dbinter.get_coauthors_from_paper(self.paper[0])
            bibrecreflist = frozenset(["100:%s,%s" % (str(i[0]), self.paper[0]) for i in bibrefs100] +
                                      ["700:%s,%s" % (str(i[0]), self.paper[0]) for i in bibrefs700])
            pid_rows_lazy = None

            #finally, if a bibrec/ref pair is in the authornames table but not in this list that name of that paper
            #is no longer existing and must be removed from the table. The new one will be addedd by the
            #update procedure in future; this entry will be risky becouse the garbage collector may
            #decide to kill the bibref in the bibX0x table
            for row in self.paper[1]:
                if row[3] not in bibrecreflist:
                    if not pid_rows_lazy:
                        pid_rows_lazy = dbinter.collect_personid_papers(paper=(self.paper[0],),
                                                                        person=personid_q)

                    other_bibrefs = [b[0] for b in pid_rows_lazy if b[1] == row[1] and b[3] != row[3]]
                    dbinter.delete_personid_by_id(int(row[0]))
                    if bconfig.TABLES_UTILS_DEBUG:
                        print "*   deleting record with missing bibref: \
                               id = %s, personid = %s, tag = %s, data = %s, flag = %s, lcul = %s" % row
                        print "found %d other records with the same personid and bibrec" % len(other_bibrefs)
                    if len(other_bibrefs) == 1:
                        #we have one and only one sobstitute, we can switch them!
                        dbinter.update_flags_in_personid(row[4], row[5], other_bibrefs[0])
                        if bconfig.TABLES_UTILS_DEBUG:
                            print "updating id=%d with flag=%d,lcul=%d" % (other_bibrefs[0], row[4], row[5])

            persons_to_update = set([(p[1],) for p in self.paper[1]])
            dbinter.update_personID_canonical_names(persons_to_update)
            dbinter.update_personID_names_string_set(persons_to_update, single_threaded=True, wait_finished=True)
            close_connection()
def personid_remove_automatically_assigned_papers(pids=None):
    '''
    Part of the person repair facility.
    Removes every person entity that has no prior human interaction.
    Will run on all person entities if pids == None
    @param pids: List of tuples of person IDs
    @type pids: list of tuples
    '''
    if not pids:
        pids = get_all_person_ids()

    for pid in pids:
        tickets = get_person_rt_tickets(pid[0])
        pclaims = get_person_claimed_papers(pid[0])
        nclaims = get_person_rejected_papers(pid[0])

        if len(tickets) > 0 and len(pclaims) == 0 and len(nclaims) == 0:
            continue
        elif len(tickets) == 0 and len(pclaims) == 0 and len(nclaims) == 0:
            delete_personid_by_id(pid[0])
        elif len(pclaims) > 0:
            del_person_not_manually_claimed_papers(pid)
        elif len(nclaims) > 0:
            continue
        else:
            end_loop = True

        papers_data = tuple((extract_bibrec(p[3]), p) for p in papers_data)
        to_remove = set()
        jobs = dict()
        for p in papers_data:
            if int(p[0]) in deleted_recs:
                to_remove.add(p[1][0])
            elif not papers_list or int(p[0]) in papers_list:
                jobs[p[0]] = jobs.get(p[0], []) + [p[1]]
        del(papers_data)

        if len(to_remove) > 0:
            task_sleep_now_if_required(True)
            delta = dbinter.delete_personid_by_id(to_remove)
            counter -= delta
            if bconfig.TABLES_UTILS_DEBUG:
                print "*   deleting %d papers, from %d, marked as deleted" % (delta, len(to_remove))

        jobslist = Queue()
        for p in jobs.items():
            jobslist.put(p)
        del(jobs)

        max_processes = bconfig.CFG_BIBAUTHORID_PERSONID_SQL_MAX_THREADS
        while not jobslist.empty():
            workers = []
            checker = status_checker()
            for i in range(max_processes):
                w = Worker(jobslist, checker)
        else:
            end_loop = True

        papers_data = tuple((extract_bibrec(p[3]), p) for p in papers_data)
        to_remove = set()
        jobs = dict()
        for p in papers_data:
            if int(p[0]) in deleted_recs:
                to_remove.add(p[1][0])
            elif not papers_list or int(p[0]) in papers_list:
                jobs[p[0]] = jobs.get(p[0], []) + [p[1]]
        del(papers_data)

        if len(to_remove) > 0:
            task_sleep_now_if_required(can_stop_too=False)
            delta = dbinter.delete_personid_by_id(to_remove)
            counter -= delta
            if bconfig.TABLES_UTILS_DEBUG:
                print "*   deleting %d papers, from %d, marked as deleted" % (delta, len(to_remove))

        jobslist = Queue()
        for p in jobs.items():
            jobslist.put(p)
        del(jobs)

        max_processes = bconfig.CFG_BIBAUTHORID_PERSONID_SQL_MAX_THREADS
        while not jobslist.empty():
            workers = []
            checker = status_checker()
            for i in range(max_processes):
                w = Worker(jobslist, checker)