Ejemplo n.º 1
0
def tortoise_from_scratch():
    bibauthor_print("Preparing cluster sets.")
    cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables()
    bibauthor_print("Building all matrices.")
    schedule_workers(lambda x: force_create_matrix(x, force=True), cluster_sets)

    empty_tortoise_results_table()

    bibauthor_print("Preparing cluster sets.")
    cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables()
    bibauthor_print("Starting disambiguation.")
    schedule_workers(wedge, cluster_sets)
Ejemplo n.º 2
0
def tortoise_from_scratch():
    bibauthor_print("Preparing cluster sets.")
    cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables()
    bibauthor_print("Building all matrices.")
    schedule_workers(lambda x: force_create_matrix(x, force=True), cluster_sets)

    empty_tortoise_results_table()

    bibauthor_print("Preparing cluster sets.")
    cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables()
    bibauthor_print("Starting disambiguation.")
    schedule_workers(wedge, cluster_sets)
Ejemplo n.º 3
0
def tortoise_from_scratch():
    logger.log("Preparing cluster sets.")
    cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables()
    logger.log("Building all matrices.")
    cluster_sets = [(s, ) for s in cluster_sets]
    schedule_workers(lambda x: force_create_matrix(x, force=True),
                     cluster_sets)

    empty_tortoise_results_table()

    logger.log("Preparing cluster sets.")
    cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables()
    cluster_sets = [(s(), ) for s in cluster_sets]
    logger.log("Starting disambiguation.")
    schedule_workers(wedge_and_store, cluster_sets)
Ejemplo n.º 4
0
def tortoise_from_scratch():
    bibauthor_print("Preparing cluster sets.")
    cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables()
    bibauthor_print("Building all matrices.")
    exit_statuses = schedule_create_matrix(cluster_sets, sizes, force=True)
    assert len(exit_statuses) == len(cluster_sets)
    assert all(stat == os.EX_OK for stat in exit_statuses)

    empty_results_table()

    bibauthor_print("Preparing cluster sets.")
    cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables()
    bibauthor_print("Starting disambiguation.")
    exit_statuses = schedule_wedge_and_store(cluster_sets, sizes)
    assert len(exit_statuses) == len(cluster_sets)
    assert all(stat == os.EX_OK for stat in exit_statuses)
Ejemplo n.º 5
0
def tortoise_from_scratch():
    bibauthor_print("Preparing cluster sets.")
    cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables()
    bibauthor_print("Building all matrices.")
    exit_statuses = schedule_create_matrix(cluster_sets, sizes, force=True)
    assert len(exit_statuses) == len(cluster_sets)
    assert all(stat == os.EX_OK for stat in exit_statuses)

    empty_results_table()

    bibauthor_print("Preparing cluster sets.")
    cluster_sets, _lnames, sizes = delayed_cluster_sets_from_marktables()
    bibauthor_print("Starting disambiguation.")
    exit_statuses = schedule_wedge_and_store(cluster_sets, sizes)
    assert len(exit_statuses) == len(cluster_sets)
    assert all(stat == os.EX_OK for stat in exit_statuses)
Ejemplo n.º 6
0
def tortoise_last_name(name, wedge_threshold=None, from_mark=True, pure=False):
    logger.log('Start working on %s' % name)
    assert not (from_mark and pure)
    lname = generate_last_name_cluster_str(name)

    if from_mark:
        logger.log(' ... from mark!')
        clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname])
        logger.log(' ... delayed done')
    else:
        logger.log(' ... from pid, pure=%s' % str(pure))
        clusters, lnames, sizes = delayed_cluster_sets_from_personid(pure)
        logger.log(' ... delayed pure done!')

    try:
        idx = lnames.index(lname)
        cluster = clusters[idx]
        size = sizes[idx]
        cluster_set = cluster()
        logger.log("Found, %s(%s). Total number of bibs: %d." %
                   (name, lname, size))
        create_matrix(cluster_set, False)
        wedge_and_store(cluster_set)
    except (IndexError, ValueError):
        logger.log("Sorry, %s not found in the last name clusters" % (lname))
Ejemplo n.º 7
0
def _create_matrix(lname):

    clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname])
    idx = lnames.index(lname)
    cluster = clusters[idx]
    size = sizes[idx]
    bibauthor_print("Found, %s. Total number of bibs: %d." % (lname, size))
    cluster_set = cluster()
    create_matrix(cluster_set, True)

    bibs = cluster_set.num_all_bibs
    expected = bibs * (bibs - 1) / 2
    bibauthor_print("Start working on %s. Total number of bibs: %d, "
                    "maximum number of comparisons: %d"
                    % (cluster_set.last_name, bibs, expected))
    cluster_set.store()
Ejemplo n.º 8
0
def _create_matrix(lname):

    clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname])
    idx = lnames.index(lname)
    cluster = clusters[idx]
    size = sizes[idx]
    bibauthor_print("Found, %s. Total number of bibs: %d." % (lname, size))
    cluster_set = cluster()
    create_matrix(cluster_set, True)

    bibs = cluster_set.num_all_bibs
    expected = bibs * (bibs - 1) / 2
    bibauthor_print("Start working on %s. Total number of bibs: %d, "
                    "maximum number of comparisons: %d" %
                    (cluster_set.last_name, bibs, expected))
    cluster_set.store()
Ejemplo n.º 9
0
def _create_matrix(lname):

    clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname])
    try:
        idx = lnames.index(lname)
        cluster = clusters[idx]
        size = sizes[idx]
        bibauthor_print("Found, %s. Total number of bibs: %d." % (lname, size))
        cluster_set = cluster()
        create_matrix(cluster_set, False)

        bibs = cluster_set.num_all_bibs
        expected = bibs * (bibs - 1) / 2
        bibauthor_print("Start working on %s. Total number of bibs: %d, "
                        "maximum number of comparisons: %d"
                        % (cluster_set.last_name, bibs, expected))
        cluster_set.store()
    except (IndexError, ValueError):
        bibauthor_print("Sorry, %s not found in the last name clusters, not creating matrix" % (lname))
Ejemplo n.º 10
0
def _create_matrix(lname):

    clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname])
    try:
        idx = lnames.index(lname)
        cluster = clusters[idx]
        size = sizes[idx]
        bibauthor_print("Found, %s. Total number of bibs: %d." % (lname, size))
        cluster_set = cluster()
        create_matrix(cluster_set, False)

        bibs = cluster_set.num_all_bibs
        expected = bibs * (bibs - 1) / 2
        bibauthor_print("Start working on %s. Total number of bibs: %d, "
                        "maximum number of comparisons: %d"
                        % (cluster_set.last_name, bibs, expected))
        cluster_set.store()
    except (IndexError, ValueError):
        bibauthor_print("Sorry, %s not found in the last name clusters, not creating matrix" % (lname))
Ejemplo n.º 11
0
def _collect_statistics_lname_coeff(params):
    lname = params[0]
    coeff = params[1]

    clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname])
    idx = lnames.index(lname)
    cluster = clusters[idx]
    size = sizes[idx]
    bibauthor_print("Found, %s. Total number of bibs: %d." % (lname, size))
    cluster_set = cluster()
    create_matrix(cluster_set, False)

    bibs = cluster_set.num_all_bibs
    expected = bibs * (bibs - 1) / 2
    bibauthor_print("Start working on %s. Total number of bibs: %d, "
                    "maximum number of comparisons: %d"
                    % (cluster_set.last_name, bibs, expected))

    wedge(cluster_set, True, coeff)
    remove_result_cluster(cluster_set.last_name)
Ejemplo n.º 12
0
def _collect_statistics_lname_coeff(params):
    lname = params[0]
    coeff = params[1]

    clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname])
    idx = lnames.index(lname)
    cluster = clusters[idx]
    size = sizes[idx]
    bibauthor_print("Found, %s. Total number of bibs: %d." % (lname, size))
    cluster_set = cluster()
    create_matrix(cluster_set, False)

    bibs = cluster_set.num_all_bibs
    expected = bibs * (bibs - 1) / 2
    bibauthor_print("Start working on %s. Total number of bibs: %d, "
                    "maximum number of comparisons: %d" %
                    (cluster_set.last_name, bibs, expected))

    wedge(cluster_set, True, coeff)
    remove_result_cluster(cluster_set.last_name)
Ejemplo n.º 13
0
def tortoise_last_name(name, from_mark=False, pure=False):
    assert not (from_mark and pure)

    lname = generate_last_name_cluster_str(name)

    if from_mark:
        clusters, lnames, sizes = delayed_cluster_sets_from_marktables()
    else:
        clusters, lnames, sizes = delayed_cluster_sets_from_personid(pure)

    try:
        idx = lnames.index(lname)
        cluster = clusters[idx]
        size = sizes[idx]
        bibauthor_print("Found, %s(%s). Total number of bibs: %d." % (name, lname, size))
        cluster_set = cluster()
        create_matrix(cluster_set, True)
        wedge_and_store(cluster_set)
    except IndexError:
        bibauthor_print("Sorry, %s(%s) not found in the last name clusters" % (name, lname))
Ejemplo n.º 14
0
def tortoise_last_name(name, from_mark=False, pure=False):
    bibauthor_print('Start working on %s' % name)
    assert not(from_mark and pure)

    lname = generate_last_name_cluster_str(name)

    if from_mark:
        bibauthor_print(' ... from mark!')
        clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname])
        bibauthor_print(' ... delayed done')
    else:
        bibauthor_print(' ... from pid, pure')
        clusters, lnames, sizes = delayed_cluster_sets_from_personid(pure)
        bibauthor_print(' ... delayed pure done!')

#    try:
    idx = lnames.index(lname)
    cluster = clusters[idx]
    size = sizes[idx]
    cluster_set = cluster()
    bibauthor_print("Found, %s(%s). Total number of bibs: %d." % (name, lname, size))
    create_matrix(cluster_set, True)
    wedge_and_store(cluster_set)
Ejemplo n.º 15
0
def tortoise_last_name(name, from_mark=True, pure=False):
    bibauthor_print('Start working on %s' % name)
    assert not(from_mark and pure)

    lname = generate_last_name_cluster_str(name)

    if from_mark:
        bibauthor_print(' ... from mark!')
        clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname])
        bibauthor_print(' ... delayed done')
    else:
        bibauthor_print(' ... from pid, pure=%s'%str(pure))
        clusters, lnames, sizes = delayed_cluster_sets_from_personid(pure)
        bibauthor_print(' ... delayed pure done!')

#    try:
    idx = lnames.index(lname)
    cluster = clusters[idx]
    size = sizes[idx]
    cluster_set = cluster()
    bibauthor_print("Found, %s(%s). Total number of bibs: %d." % (name, lname, size))
    create_matrix(cluster_set, False)
    wedge_and_store(cluster_set)
Ejemplo n.º 16
0
def _collect_statistics_lname_coeff(params):
    lname = params[0]
    coeff = params[1]

    clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname])
    try:
        idx = lnames.index(lname)
        cluster = clusters[idx]
        size = sizes[idx]
        logger.log("Found, %s. Total number of bibs: %d." % (lname, size))
        cluster_set = cluster()
        create_matrix(cluster_set, False)

        bibs = cluster_set.num_all_bibs
        expected = bibs * (bibs - 1) / 2
        logger.log("Start working on %s. Total number of bibs: %d, "
                   "maximum number of comparisons: %d" %
                   (cluster_set.last_name, bibs, expected))

        wedge(cluster_set, True, coeff)
        remove_clusters_by_name(cluster_set.last_name)
    except (IndexError, ValueError):
        logger.log("Sorry, %s not found in the last name clusters," % (lname))