def run(self): while True: if checker.should_stop(): break try: bibrec = self.p_q.get_nowait() except Empty: break close_connection() pfap_assign_paper_iteration(self.i, bibrec, self.atul, self.personid_new_id_lock)
def check_paper(self): if bconfig.TABLES_UTILS_DEBUG: print " -> processing paper = %s" % (self.paper[0],) bibrefs100 = dbinter.get_authors_from_paper(self.paper[0]) bibrefs700 = dbinter.get_coauthors_from_paper(self.paper[0]) bibrecreflist = frozenset( ["100:%s,%s" % (str(i[0]), self.paper[0]) for i in bibrefs100] + ["700:%s,%s" % (str(i[0]), self.paper[0]) for i in bibrefs700] ) pid_rows_lazy = None # finally, if a bibrec/ref pair is in the authornames table but not in this list that name of that paper # is no longer existing and must be removed from the table. The new one will be addedd by the # update procedure in future; this entry will be risky becouse the garbage collector may # decide to kill the bibref in the bibX0x table for row in self.paper[1]: if row[3] not in bibrecreflist: if not pid_rows_lazy: pid_rows_lazy = dbinter.collect_personid_papers(paper=(self.paper[0],), person=personid_q) other_bibrefs = [b[0] for b in pid_rows_lazy if b[1] == row[1] and b[3] != row[3]] dbinter.delete_personid_by_id(int(row[0])) if bconfig.TABLES_UTILS_DEBUG: print "* deleting record with missing bibref: \ id = %s, personid = %s, tag = %s, data = %s, flag = %s, lcul = %s" % row print "found %d other records with the same personid and bibrec" % len(other_bibrefs) if len(other_bibrefs) == 1: # we have one and only one sobstitute, we can switch them! dbinter.update_flags_in_personid(row[4], row[5], other_bibrefs[0]) if bconfig.TABLES_UTILS_DEBUG: print "updating id=%d with flag=%d,lcul=%d" % (other_bibrefs[0], row[4], row[5]) persons_to_update = set([(p[1],) for p in self.paper[1]]) dbinter.update_personID_canonical_names(persons_to_update) dbinter.update_personID_names_string_set(persons_to_update, single_threaded=True, wait_finished=True) close_connection()
def check_paper(self): if bconfig.TABLES_UTILS_DEBUG: print " -> processing paper = %s" % (self.paper[0],) bibrefs100 = dbinter.get_authors_from_paper(self.paper[0]) bibrefs700 = dbinter.get_coauthors_from_paper(self.paper[0]) bibrecreflist = frozenset(["100:%s,%s" % (str(i[0]), self.paper[0]) for i in bibrefs100] + ["700:%s,%s" % (str(i[0]), self.paper[0]) for i in bibrefs700]) pid_rows_lazy = None #finally, if a bibrec/ref pair is in the authornames table but not in this list that name of that paper #is no longer existing and must be removed from the table. The new one will be addedd by the #update procedure in future; this entry will be risky becouse the garbage collector may #decide to kill the bibref in the bibX0x table for row in self.paper[1]: if row[3] not in bibrecreflist: if not pid_rows_lazy: pid_rows_lazy = dbinter.collect_personid_papers(paper=(self.paper[0],), person=personid_q) other_bibrefs = [b[0] for b in pid_rows_lazy if b[1] == row[1] and b[3] != row[3]] dbinter.delete_personid_by_id(int(row[0])) if bconfig.TABLES_UTILS_DEBUG: print "* deleting record with missing bibref: \ id = %s, personid = %s, tag = %s, data = %s, flag = %s, lcul = %s" % row print "found %d other records with the same personid and bibrec" % len(other_bibrefs) if len(other_bibrefs) == 1: #we have one and only one sobstitute, we can switch them! dbinter.update_flags_in_personid(row[4], row[5], other_bibrefs[0]) if bconfig.TABLES_UTILS_DEBUG: print "updating id=%d with flag=%d,lcul=%d" % (other_bibrefs[0], row[4], row[5]) persons_to_update = set([(p[1],) for p in self.paper[1]]) dbinter.update_personID_canonical_names(persons_to_update) dbinter.update_personID_names_string_set(persons_to_update, single_threaded=True, wait_finished=True) close_connection()