def tortoise_from_scratch(): bibauthor_print("Preparing cluster sets.") cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables() bibauthor_print("Building all matrices.") schedule_workers(lambda x: force_create_matrix(x, force=True), cluster_sets) empty_tortoise_results_table() bibauthor_print("Preparing cluster sets.") cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables() bibauthor_print("Starting disambiguation.") schedule_workers(wedge, cluster_sets)
def tortoise_from_scratch(): bibauthor_print("Preparing cluster sets.") cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables() bibauthor_print("Building all matrices.") schedule_workers(lambda x: force_create_matrix(x, force=True), cluster_sets) empty_tortoise_results_table() bibauthor_print("Preparing cluster sets.") cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables() bibauthor_print("Starting disambiguation.") schedule_workers(wedge, cluster_sets)
def tortoise_from_scratch(): logger.log("Preparing cluster sets.") cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables() logger.log("Building all matrices.") cluster_sets = [(s, ) for s in cluster_sets] schedule_workers(lambda x: force_create_matrix(x, force=True), cluster_sets) empty_tortoise_results_table() logger.log("Preparing cluster sets.") cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables() cluster_sets = [(s(), ) for s in cluster_sets] logger.log("Starting disambiguation.") schedule_workers(wedge_and_store, cluster_sets)
def tortoise_from_scratch(): bibauthor_print("Preparing cluster sets.") cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables() bibauthor_print("Building all matrices.") exit_statuses = schedule_create_matrix(cluster_sets, sizes, force=True) assert len(exit_statuses) == len(cluster_sets) assert all(stat == os.EX_OK for stat in exit_statuses) empty_results_table() bibauthor_print("Preparing cluster sets.") cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables() bibauthor_print("Starting disambiguation.") exit_statuses = schedule_wedge_and_store(cluster_sets, sizes) assert len(exit_statuses) == len(cluster_sets) assert all(stat == os.EX_OK for stat in exit_statuses)
def tortoise_from_scratch(): bibauthor_print("Preparing cluster sets.") cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables() bibauthor_print("Building all matrices.") exit_statuses = schedule_create_matrix(cluster_sets, sizes, force=True) assert len(exit_statuses) == len(cluster_sets) assert all(stat == os.EX_OK for stat in exit_statuses) empty_results_table() bibauthor_print("Preparing cluster sets.") cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables() bibauthor_print("Starting disambiguation.") exit_statuses = schedule_wedge_and_store(cluster_sets, sizes) assert len(exit_statuses) == len(cluster_sets) assert all(stat == os.EX_OK for stat in exit_statuses)
def tortoise_last_name(name, wedge_threshold=None, from_mark=True, pure=False): logger.log('Start working on %s' % name) assert not (from_mark and pure) lname = generate_last_name_cluster_str(name) if from_mark: logger.log(' ... from mark!') clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname]) logger.log(' ... delayed done') else: logger.log(' ... from pid, pure=%s' % str(pure)) clusters, lnames, sizes = delayed_cluster_sets_from_personid(pure) logger.log(' ... delayed pure done!') try: idx = lnames.index(lname) cluster = clusters[idx] size = sizes[idx] cluster_set = cluster() logger.log("Found, %s(%s). Total number of bibs: %d." % (name, lname, size)) create_matrix(cluster_set, False) wedge_and_store(cluster_set) except (IndexError, ValueError): logger.log("Sorry, %s not found in the last name clusters" % (lname))
def _create_matrix(lname): clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname]) idx = lnames.index(lname) cluster = clusters[idx] size = sizes[idx] bibauthor_print("Found, %s. Total number of bibs: %d." % (lname, size)) cluster_set = cluster() create_matrix(cluster_set, True) bibs = cluster_set.num_all_bibs expected = bibs * (bibs - 1) / 2 bibauthor_print("Start working on %s. Total number of bibs: %d, " "maximum number of comparisons: %d" % (cluster_set.last_name, bibs, expected)) cluster_set.store()
def _create_matrix(lname): clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname]) idx = lnames.index(lname) cluster = clusters[idx] size = sizes[idx] bibauthor_print("Found, %s. Total number of bibs: %d." % (lname, size)) cluster_set = cluster() create_matrix(cluster_set, True) bibs = cluster_set.num_all_bibs expected = bibs * (bibs - 1) / 2 bibauthor_print("Start working on %s. Total number of bibs: %d, " "maximum number of comparisons: %d" % (cluster_set.last_name, bibs, expected)) cluster_set.store()
def _create_matrix(lname): clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname]) try: idx = lnames.index(lname) cluster = clusters[idx] size = sizes[idx] bibauthor_print("Found, %s. Total number of bibs: %d." % (lname, size)) cluster_set = cluster() create_matrix(cluster_set, False) bibs = cluster_set.num_all_bibs expected = bibs * (bibs - 1) / 2 bibauthor_print("Start working on %s. Total number of bibs: %d, " "maximum number of comparisons: %d" % (cluster_set.last_name, bibs, expected)) cluster_set.store() except (IndexError, ValueError): bibauthor_print("Sorry, %s not found in the last name clusters, not creating matrix" % (lname))
def _create_matrix(lname): clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname]) try: idx = lnames.index(lname) cluster = clusters[idx] size = sizes[idx] bibauthor_print("Found, %s. Total number of bibs: %d." % (lname, size)) cluster_set = cluster() create_matrix(cluster_set, False) bibs = cluster_set.num_all_bibs expected = bibs * (bibs - 1) / 2 bibauthor_print("Start working on %s. Total number of bibs: %d, " "maximum number of comparisons: %d" % (cluster_set.last_name, bibs, expected)) cluster_set.store() except (IndexError, ValueError): bibauthor_print("Sorry, %s not found in the last name clusters, not creating matrix" % (lname))
def _collect_statistics_lname_coeff(params): lname = params[0] coeff = params[1] clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname]) idx = lnames.index(lname) cluster = clusters[idx] size = sizes[idx] bibauthor_print("Found, %s. Total number of bibs: %d." % (lname, size)) cluster_set = cluster() create_matrix(cluster_set, False) bibs = cluster_set.num_all_bibs expected = bibs * (bibs - 1) / 2 bibauthor_print("Start working on %s. Total number of bibs: %d, " "maximum number of comparisons: %d" % (cluster_set.last_name, bibs, expected)) wedge(cluster_set, True, coeff) remove_result_cluster(cluster_set.last_name)
def _collect_statistics_lname_coeff(params): lname = params[0] coeff = params[1] clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname]) idx = lnames.index(lname) cluster = clusters[idx] size = sizes[idx] bibauthor_print("Found, %s. Total number of bibs: %d." % (lname, size)) cluster_set = cluster() create_matrix(cluster_set, False) bibs = cluster_set.num_all_bibs expected = bibs * (bibs - 1) / 2 bibauthor_print("Start working on %s. Total number of bibs: %d, " "maximum number of comparisons: %d" % (cluster_set.last_name, bibs, expected)) wedge(cluster_set, True, coeff) remove_result_cluster(cluster_set.last_name)
def tortoise_last_name(name, from_mark=False, pure=False): assert not (from_mark and pure) lname = generate_last_name_cluster_str(name) if from_mark: clusters, lnames, sizes = delayed_cluster_sets_from_marktables() else: clusters, lnames, sizes = delayed_cluster_sets_from_personid(pure) try: idx = lnames.index(lname) cluster = clusters[idx] size = sizes[idx] bibauthor_print("Found, %s(%s). Total number of bibs: %d." % (name, lname, size)) cluster_set = cluster() create_matrix(cluster_set, True) wedge_and_store(cluster_set) except IndexError: bibauthor_print("Sorry, %s(%s) not found in the last name clusters" % (name, lname))
def tortoise_last_name(name, from_mark=False, pure=False): bibauthor_print('Start working on %s' % name) assert not(from_mark and pure) lname = generate_last_name_cluster_str(name) if from_mark: bibauthor_print(' ... from mark!') clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname]) bibauthor_print(' ... delayed done') else: bibauthor_print(' ... from pid, pure') clusters, lnames, sizes = delayed_cluster_sets_from_personid(pure) bibauthor_print(' ... delayed pure done!') # try: idx = lnames.index(lname) cluster = clusters[idx] size = sizes[idx] cluster_set = cluster() bibauthor_print("Found, %s(%s). Total number of bibs: %d." % (name, lname, size)) create_matrix(cluster_set, True) wedge_and_store(cluster_set)
def tortoise_last_name(name, from_mark=True, pure=False): bibauthor_print('Start working on %s' % name) assert not(from_mark and pure) lname = generate_last_name_cluster_str(name) if from_mark: bibauthor_print(' ... from mark!') clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname]) bibauthor_print(' ... delayed done') else: bibauthor_print(' ... from pid, pure=%s'%str(pure)) clusters, lnames, sizes = delayed_cluster_sets_from_personid(pure) bibauthor_print(' ... delayed pure done!') # try: idx = lnames.index(lname) cluster = clusters[idx] size = sizes[idx] cluster_set = cluster() bibauthor_print("Found, %s(%s). Total number of bibs: %d." % (name, lname, size)) create_matrix(cluster_set, False) wedge_and_store(cluster_set)
def _collect_statistics_lname_coeff(params): lname = params[0] coeff = params[1] clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname]) try: idx = lnames.index(lname) cluster = clusters[idx] size = sizes[idx] logger.log("Found, %s. Total number of bibs: %d." % (lname, size)) cluster_set = cluster() create_matrix(cluster_set, False) bibs = cluster_set.num_all_bibs expected = bibs * (bibs - 1) / 2 logger.log("Start working on %s. Total number of bibs: %d, " "maximum number of comparisons: %d" % (cluster_set.last_name, bibs, expected)) wedge(cluster_set, True, coeff) remove_clusters_by_name(cluster_set.last_name) except (IndexError, ValueError): logger.log("Sorry, %s not found in the last name clusters," % (lname))