def recalculate(self, cluster_set): ''' Constructs probability matrix. If use_cache is true, it will try to load old computations from the database. If save cache is true it will save the current results into the database. @param cluster_set: A cluster set object, used to initialize the matrix. ''' last_cleaned = 0 old_matrix = self._bib_matrix cached_bibs = self.__get_up_to_date_bibs() have_cached_bibs = bool(cached_bibs) self._bib_matrix = Bib_matrix(cluster_set) ncl = cluster_set.num_all_bibs expected = ((ncl * (ncl - 1)) / 2) if expected == 0: expected = 1 cur_calc, opti, prints_counter = 0, 0, 0 for cl1 in cluster_set.clusters: if cur_calc + opti - prints_counter > 100000: update_status( (float(opti) + cur_calc) / expected, "Prob matrix: calc %d, opti %d." % (cur_calc, opti)) prints_counter = cur_calc + opti #clean caches if cur_calc - last_cleaned > 2000000: clear_comparison_caches() last_cleaned = cur_calc for cl2 in cluster_set.clusters: if id(cl1) < id(cl2) and not cl1.hates(cl2): for bib1 in cl1.bibs: for bib2 in cl2.bibs: if have_cached_bibs and bib1 in cached_bibs and bib2 in cached_bibs: val = old_matrix[bib1, bib2] if not val: cur_calc += 1 val = compare_bibrefrecs(bib1, bib2) else: opti += 1 if bconfig.DEBUG_CHECKS: assert _debug_is_eq_v( val, compare_bibrefrecs(bib1, bib2)) else: cur_calc += 1 val = compare_bibrefrecs(bib1, bib2) self._bib_matrix[bib1, bib2] = val clear_comparison_caches() update_status_final("Matrix done. %d calc, %d opt." % (cur_calc, opti))
def recalculate(self, cluster_set): ''' Constructs probability matrix. If use_cache is true, it will try to load old computations from the database. If save cache is true it will save the current results into the database. @param cluster_set: A cluster set object, used to initialize the matrix. ''' last_cleaned = 0 old_matrix = self._bib_matrix cached_bibs = self.__get_up_to_date_bibs() have_cached_bibs = bool(cached_bibs) self._bib_matrix = Bib_matrix(cluster_set) ncl = cluster_set.num_all_bibs expected = ((ncl * (ncl - 1)) / 2) if expected == 0: expected = 1 cur_calc, opti, prints_counter = 0, 0, 0 for cl1 in cluster_set.clusters: if cur_calc+opti - prints_counter > 100000: update_status((float(opti) + cur_calc) / expected, "Prob matrix: calc %d, opti %d." % (cur_calc, opti)) prints_counter = cur_calc+opti #clean caches if cur_calc - last_cleaned > 2000000: clear_comparison_caches() last_cleaned = cur_calc for cl2 in cluster_set.clusters: if id(cl1) < id(cl2) and not cl1.hates(cl2): for bib1 in cl1.bibs: for bib2 in cl2.bibs: if have_cached_bibs and bib1 in cached_bibs and bib2 in cached_bibs: val = old_matrix[bib1, bib2] if not val: cur_calc += 1 val = compare_bibrefrecs(bib1, bib2) else: opti += 1 if bconfig.DEBUG_CHECKS: assert _debug_is_eq_v(val, compare_bibrefrecs(bib1, bib2)) else: cur_calc += 1 val = compare_bibrefrecs(bib1, bib2) self._bib_matrix[bib1, bib2] = val clear_comparison_caches() update_status_final("Matrix done. %d calc, %d opt." % (cur_calc, opti))
def __init__(self): self._bib_matrix = Bib_matrix()
class ProbabilityMatrix(object): ''' This class contains and maintains the comparison between all virtual authors. It is able to write and read from the database and update the results. ''' def __init__(self): self._bib_matrix = Bib_matrix() def load(self, lname, load_map=True, load_matrix=True): update_status(0., "Loading probability matrix...") self._bib_matrix.load(lname, load_map, load_matrix) update_status_final("Probability matrix loaded.") def store(self, name): update_status(0., "Saving probability matrix...") self._bib_matrix.store(name) update_status_final("Probability matrix saved.") def __getitem__(self, bibs): return self._bib_matrix[bibs[0], bibs[1]] def __get_up_to_date_bibs(self): return frozenset( filter_modified_record_ids(self._bib_matrix.get_keys(), self._bib_matrix.creation_time)) def is_up_to_date(self, cluster_set): return self.__get_up_to_date_bibs() >= frozenset( cluster_set.all_bibs()) def recalculate(self, cluster_set): ''' Constructs probability matrix. If use_cache is true, it will try to load old computations from the database. If save cache is true it will save the current results into the database. @param cluster_set: A cluster set object, used to initialize the matrix. ''' last_cleaned = 0 old_matrix = self._bib_matrix cached_bibs = self.__get_up_to_date_bibs() have_cached_bibs = bool(cached_bibs) self._bib_matrix = Bib_matrix(cluster_set) ncl = cluster_set.num_all_bibs expected = ((ncl * (ncl - 1)) / 2) if expected == 0: expected = 1 cur_calc, opti, prints_counter = 0, 0, 0 for cl1 in cluster_set.clusters: if cur_calc + opti - prints_counter > 100000: update_status( (float(opti) + cur_calc) / expected, "Prob matrix: calc %d, opti %d." % (cur_calc, opti)) prints_counter = cur_calc + opti #clean caches if cur_calc - last_cleaned > 2000000: clear_comparison_caches() last_cleaned = cur_calc for cl2 in cluster_set.clusters: if id(cl1) < id(cl2) and not cl1.hates(cl2): for bib1 in cl1.bibs: for bib2 in cl2.bibs: if have_cached_bibs and bib1 in cached_bibs and bib2 in cached_bibs: val = old_matrix[bib1, bib2] if not val: cur_calc += 1 val = compare_bibrefrecs(bib1, bib2) else: opti += 1 if bconfig.DEBUG_CHECKS: assert _debug_is_eq_v( val, compare_bibrefrecs(bib1, bib2)) else: cur_calc += 1 val = compare_bibrefrecs(bib1, bib2) self._bib_matrix[bib1, bib2] = val clear_comparison_caches() update_status_final("Matrix done. %d calc, %d opt." % (cur_calc, opti))
class ProbabilityMatrix(object): ''' This class contains and maintains the comparison between all virtual authors. It is able to write and read from the database and update the results. ''' def __init__(self): self._bib_matrix = Bib_matrix() def load(self, lname, load_map=True, load_matrix=True): update_status(0., "Loading probability matrix...") self._bib_matrix.load(lname, load_map, load_matrix) update_status_final("Probability matrix loaded.") def store(self, name): update_status(0., "Saving probability matrix...") self._bib_matrix.store(name) update_status_final("Probability matrix saved.") def __getitem__(self, bibs): return self._bib_matrix[bibs[0], bibs[1]] def __get_up_to_date_bibs(self): return frozenset(filter_modified_record_ids( self._bib_matrix.get_keys(), self._bib_matrix.creation_time)) def is_up_to_date(self, cluster_set): return self.__get_up_to_date_bibs() >= frozenset(cluster_set.all_bibs()) def recalculate(self, cluster_set): ''' Constructs probability matrix. If use_cache is true, it will try to load old computations from the database. If save cache is true it will save the current results into the database. @param cluster_set: A cluster set object, used to initialize the matrix. ''' last_cleaned = 0 old_matrix = self._bib_matrix cached_bibs = self.__get_up_to_date_bibs() have_cached_bibs = bool(cached_bibs) self._bib_matrix = Bib_matrix(cluster_set) ncl = cluster_set.num_all_bibs expected = ((ncl * (ncl - 1)) / 2) if expected == 0: expected = 1 cur_calc, opti, prints_counter = 0, 0, 0 for cl1 in cluster_set.clusters: if cur_calc+opti - prints_counter > 100000: update_status((float(opti) + cur_calc) / expected, "Prob matrix: calc %d, opti %d." % (cur_calc, opti)) prints_counter = cur_calc+opti #clean caches if cur_calc - last_cleaned > 2000000: clear_comparison_caches() last_cleaned = cur_calc for cl2 in cluster_set.clusters: if id(cl1) < id(cl2) and not cl1.hates(cl2): for bib1 in cl1.bibs: for bib2 in cl2.bibs: if have_cached_bibs and bib1 in cached_bibs and bib2 in cached_bibs: val = old_matrix[bib1, bib2] if not val: cur_calc += 1 val = compare_bibrefrecs(bib1, bib2) else: opti += 1 if bconfig.DEBUG_CHECKS: assert _debug_is_eq_v(val, compare_bibrefrecs(bib1, bib2)) else: cur_calc += 1 val = compare_bibrefrecs(bib1, bib2) self._bib_matrix[bib1, bib2] = val clear_comparison_caches() update_status_final("Matrix done. %d calc, %d opt." % (cur_calc, opti))