def tortoise(pure=False, force_matrix_creation=False, skip_matrix_creation=False, last_run=None): assert not force_matrix_creation or not skip_matrix_creation # The computation must be forced in case we want # to compute pure results force_matrix_creation = force_matrix_creation or pure if not skip_matrix_creation: bibauthor_print("Preparing cluster sets.") clusters, _lnames, sizes = delayed_cluster_sets_from_personid( pure, last_run) bibauthor_print("Building all matrices.") exit_statuses = schedule_create_matrix(clusters, sizes, force=force_matrix_creation) assert len(exit_statuses) == len(clusters) assert all(stat == os.EX_OK for stat in exit_statuses) bibauthor_print("Preparing cluster sets.") clusters, _lnames, sizes = delayed_cluster_sets_from_personid( pure, last_run) bibauthor_print("Starting disambiguation.") exit_statuses = schedule_wedge_and_store(clusters, sizes) assert len(exit_statuses) == len(clusters) assert all(stat == os.EX_OK for stat in exit_statuses)
def tortoise(pure=False, force_matrix_creation=False, skip_matrix_creation=False, last_run=None): assert not force_matrix_creation or not skip_matrix_creation # The computation must be forced in case we want # to compute pure results force_matrix_creation = force_matrix_creation or pure if not skip_matrix_creation: bibauthor_print("Preparing cluster sets.") clusters, _lnames, sizes = delayed_cluster_sets_from_personid(pure, last_run) bibauthor_print("Building all matrices.") exit_statuses = schedule_create_matrix( clusters, sizes, force=force_matrix_creation) assert len(exit_statuses) == len(clusters) assert all(stat == os.EX_OK for stat in exit_statuses) bibauthor_print("Preparing cluster sets.") clusters, _lnames, sizes = delayed_cluster_sets_from_personid(pure, last_run) bibauthor_print("Starting disambiguation.") exit_statuses = schedule_wedge_and_store( clusters, sizes) assert len(exit_statuses) == len(clusters) assert all(stat == os.EX_OK for stat in exit_statuses)
def tortoise(pure=False, force_matrix_creation=False, skip_matrix_creation=False, last_run=None): assert not force_matrix_creation or not skip_matrix_creation # The computation must be forced in case we want # to compute pure results force_matrix_creation = force_matrix_creation or pure if not skip_matrix_creation: logger.log("Preparing cluster sets.") clusters, _lnames, sizes = delayed_cluster_sets_from_personid( pure, last_run) logger.log("Building all matrices.") clusters = [(s, ) for s in clusters] schedule_workers( lambda x: force_create_matrix(x, force=force_matrix_creation), clusters) logger.log("Preparing cluster sets.") clusters, _lnames, sizes = delayed_cluster_sets_from_personid( pure, last_run) clusters = [(s(), ) for s in clusters] logger.log("Starting disambiguation.") schedule_workers(wedge_and_store, clusters)
def tortoise_last_name(name, wedge_threshold=None, from_mark=True, pure=False): logger.log('Start working on %s' % name) assert not (from_mark and pure) lname = generate_last_name_cluster_str(name) if from_mark: logger.log(' ... from mark!') clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname]) logger.log(' ... delayed done') else: logger.log(' ... from pid, pure=%s' % str(pure)) clusters, lnames, sizes = delayed_cluster_sets_from_personid(pure) logger.log(' ... delayed pure done!') try: idx = lnames.index(lname) cluster = clusters[idx] size = sizes[idx] cluster_set = cluster() logger.log("Found, %s(%s). Total number of bibs: %d." % (name, lname, size)) create_matrix(cluster_set, False) wedge_and_store(cluster_set) except (IndexError, ValueError): logger.log("Sorry, %s not found in the last name clusters" % (lname))
def tortoise(pure=False, force_matrix_creation=False, skip_matrix_creation=False, last_run=None): assert not force_matrix_creation or not skip_matrix_creation # The computation must be forced in case we want # to compute pure results force_matrix_creation = force_matrix_creation or pure if not skip_matrix_creation: bibauthor_print("Preparing cluster sets.") clusters, _lnames, sizes = delayed_cluster_sets_from_personid(pure, last_run) bibauthor_print("Building all matrices.") schedule_workers(lambda x: force_create_matrix(x, force=force_matrix_creation), clusters) bibauthor_print("Preparing cluster sets.") clusters, _lnames, sizes = delayed_cluster_sets_from_personid(pure, last_run) bibauthor_print("Starting disambiguation.") schedule_workers(wedge_and_store, clusters)
def tortoise_last_name(name, from_mark=False, pure=False): assert not (from_mark and pure) lname = generate_last_name_cluster_str(name) if from_mark: clusters, lnames, sizes = delayed_cluster_sets_from_marktables() else: clusters, lnames, sizes = delayed_cluster_sets_from_personid(pure) try: idx = lnames.index(lname) cluster = clusters[idx] size = sizes[idx] bibauthor_print("Found, %s(%s). Total number of bibs: %d." % (name, lname, size)) cluster_set = cluster() create_matrix(cluster_set, True) wedge_and_store(cluster_set) except IndexError: bibauthor_print("Sorry, %s(%s) not found in the last name clusters" % (name, lname))
def tortoise_last_name(name, from_mark=False, pure=False): bibauthor_print('Start working on %s' % name) assert not(from_mark and pure) lname = generate_last_name_cluster_str(name) if from_mark: bibauthor_print(' ... from mark!') clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname]) bibauthor_print(' ... delayed done') else: bibauthor_print(' ... from pid, pure') clusters, lnames, sizes = delayed_cluster_sets_from_personid(pure) bibauthor_print(' ... delayed pure done!') # try: idx = lnames.index(lname) cluster = clusters[idx] size = sizes[idx] cluster_set = cluster() bibauthor_print("Found, %s(%s). Total number of bibs: %d." % (name, lname, size)) create_matrix(cluster_set, True) wedge_and_store(cluster_set)
def tortoise_last_name(name, from_mark=True, pure=False): bibauthor_print('Start working on %s' % name) assert not(from_mark and pure) lname = generate_last_name_cluster_str(name) if from_mark: bibauthor_print(' ... from mark!') clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname]) bibauthor_print(' ... delayed done') else: bibauthor_print(' ... from pid, pure=%s'%str(pure)) clusters, lnames, sizes = delayed_cluster_sets_from_personid(pure) bibauthor_print(' ... delayed pure done!') # try: idx = lnames.index(lname) cluster = clusters[idx] size = sizes[idx] cluster_set = cluster() bibauthor_print("Found, %s(%s). Total number of bibs: %d." % (name, lname, size)) create_matrix(cluster_set, False) wedge_and_store(cluster_set)