def wedge_and_store(cluster_set): bibs = cluster_set.num_all_bibs expected = bibs * (bibs - 1) / 2 bibauthor_print("Start working on %s. Total number of bibs: %d, " "maximum number of comparisons: %d" % (cluster_set.last_name, bibs, expected)) wedge(cluster_set) remove_result_cluster(cluster_set.last_name) cluster_set.store() return True
def wedge_and_store(cluster_set, wedge_threshold=None): bibs = cluster_set.num_all_bibs expected = bibs * (bibs - 1) / 2 logger.log("Start working on %s. Total number of bibs: %d, " "maximum number of comparisons: %d" % (cluster_set.last_name, bibs, expected)) wedge(cluster_set, force_wedge_thrsh=wedge_threshold) remove_clusters_by_name(cluster_set.last_name) cluster_set.store() return True
def _collect_statistics_lname_coeff(params): lname = params[0] coeff = params[1] clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname]) idx = lnames.index(lname) cluster = clusters[idx] size = sizes[idx] bibauthor_print("Found, %s. Total number of bibs: %d." % (lname, size)) cluster_set = cluster() create_matrix(cluster_set, False) bibs = cluster_set.num_all_bibs expected = bibs * (bibs - 1) / 2 bibauthor_print("Start working on %s. Total number of bibs: %d, " "maximum number of comparisons: %d" % (cluster_set.last_name, bibs, expected)) wedge(cluster_set, True, coeff) remove_result_cluster(cluster_set.last_name)
def _collect_statistics_lname_coeff(params): lname = params[0] coeff = params[1] clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname]) try: idx = lnames.index(lname) cluster = clusters[idx] size = sizes[idx] logger.log("Found, %s. Total number of bibs: %d." % (lname, size)) cluster_set = cluster() create_matrix(cluster_set, False) bibs = cluster_set.num_all_bibs expected = bibs * (bibs - 1) / 2 logger.log("Start working on %s. Total number of bibs: %d, " "maximum number of comparisons: %d" % (cluster_set.last_name, bibs, expected)) wedge(cluster_set, True, coeff) remove_clusters_by_name(cluster_set.last_name) except (IndexError, ValueError): logger.log("Sorry, %s not found in the last name clusters," % (lname))