Esempio n. 1
0
def tortoise(pure=False,
             force_matrix_creation=False,
             skip_matrix_creation=False,
             last_run=None):
    assert not force_matrix_creation or not skip_matrix_creation
    # The computation must be forced in case we want
    # to compute pure results
    force_matrix_creation = force_matrix_creation or pure

    if not skip_matrix_creation:
        bibauthor_print("Preparing cluster sets.")
        clusters, _lnames, sizes = delayed_cluster_sets_from_personid(
            pure, last_run)
        bibauthor_print("Building all matrices.")
        exit_statuses = schedule_create_matrix(clusters,
                                               sizes,
                                               force=force_matrix_creation)
        assert len(exit_statuses) == len(clusters)
        assert all(stat == os.EX_OK for stat in exit_statuses)

    bibauthor_print("Preparing cluster sets.")
    clusters, _lnames, sizes = delayed_cluster_sets_from_personid(
        pure, last_run)
    bibauthor_print("Starting disambiguation.")
    exit_statuses = schedule_wedge_and_store(clusters, sizes)
    assert len(exit_statuses) == len(clusters)
    assert all(stat == os.EX_OK for stat in exit_statuses)
def tortoise(pure=False,
             force_matrix_creation=False,
             skip_matrix_creation=False,
             last_run=None):
    assert not force_matrix_creation or not skip_matrix_creation
    # The computation must be forced in case we want
    # to compute pure results
    force_matrix_creation = force_matrix_creation or pure

    if not skip_matrix_creation:
        bibauthor_print("Preparing cluster sets.")
        clusters, _lnames, sizes = delayed_cluster_sets_from_personid(pure, last_run)
        bibauthor_print("Building all matrices.")
        exit_statuses = schedule_create_matrix(
            clusters,
            sizes,
            force=force_matrix_creation)
        assert len(exit_statuses) == len(clusters)
        assert all(stat == os.EX_OK for stat in exit_statuses)

    bibauthor_print("Preparing cluster sets.")
    clusters, _lnames, sizes = delayed_cluster_sets_from_personid(pure, last_run)
    bibauthor_print("Starting disambiguation.")
    exit_statuses = schedule_wedge_and_store(
        clusters,
        sizes)
    assert len(exit_statuses) == len(clusters)
    assert all(stat == os.EX_OK for stat in exit_statuses)
Esempio n. 3
0
def tortoise(pure=False,
             force_matrix_creation=False,
             skip_matrix_creation=False,
             last_run=None):
    assert not force_matrix_creation or not skip_matrix_creation
    # The computation must be forced in case we want
    # to compute pure results
    force_matrix_creation = force_matrix_creation or pure

    if not skip_matrix_creation:
        logger.log("Preparing cluster sets.")
        clusters, _lnames, sizes = delayed_cluster_sets_from_personid(
            pure, last_run)
        logger.log("Building all matrices.")
        clusters = [(s, ) for s in clusters]
        schedule_workers(
            lambda x: force_create_matrix(x, force=force_matrix_creation),
            clusters)

    logger.log("Preparing cluster sets.")
    clusters, _lnames, sizes = delayed_cluster_sets_from_personid(
        pure, last_run)
    clusters = [(s(), ) for s in clusters]
    logger.log("Starting disambiguation.")
    schedule_workers(wedge_and_store, clusters)
Esempio n. 4
0
def tortoise_last_name(name, wedge_threshold=None, from_mark=True, pure=False):
    logger.log('Start working on %s' % name)
    assert not (from_mark and pure)
    lname = generate_last_name_cluster_str(name)

    if from_mark:
        logger.log(' ... from mark!')
        clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname])
        logger.log(' ... delayed done')
    else:
        logger.log(' ... from pid, pure=%s' % str(pure))
        clusters, lnames, sizes = delayed_cluster_sets_from_personid(pure)
        logger.log(' ... delayed pure done!')

    try:
        idx = lnames.index(lname)
        cluster = clusters[idx]
        size = sizes[idx]
        cluster_set = cluster()
        logger.log("Found, %s(%s). Total number of bibs: %d." %
                   (name, lname, size))
        create_matrix(cluster_set, False)
        wedge_and_store(cluster_set)
    except (IndexError, ValueError):
        logger.log("Sorry, %s not found in the last name clusters" % (lname))
def tortoise(pure=False,
             force_matrix_creation=False,
             skip_matrix_creation=False,
             last_run=None):
    assert not force_matrix_creation or not skip_matrix_creation
    # The computation must be forced in case we want
    # to compute pure results
    force_matrix_creation = force_matrix_creation or pure

    if not skip_matrix_creation:
        bibauthor_print("Preparing cluster sets.")
        clusters, _lnames, sizes = delayed_cluster_sets_from_personid(pure, last_run)
        bibauthor_print("Building all matrices.")
        schedule_workers(lambda x: force_create_matrix(x, force=force_matrix_creation), clusters)

    bibauthor_print("Preparing cluster sets.")
    clusters, _lnames, sizes = delayed_cluster_sets_from_personid(pure, last_run)
    bibauthor_print("Starting disambiguation.")
    schedule_workers(wedge_and_store, clusters)
Esempio n. 6
0
def tortoise_last_name(name, from_mark=False, pure=False):
    assert not (from_mark and pure)

    lname = generate_last_name_cluster_str(name)

    if from_mark:
        clusters, lnames, sizes = delayed_cluster_sets_from_marktables()
    else:
        clusters, lnames, sizes = delayed_cluster_sets_from_personid(pure)

    try:
        idx = lnames.index(lname)
        cluster = clusters[idx]
        size = sizes[idx]
        bibauthor_print("Found, %s(%s). Total number of bibs: %d." % (name, lname, size))
        cluster_set = cluster()
        create_matrix(cluster_set, True)
        wedge_and_store(cluster_set)
    except IndexError:
        bibauthor_print("Sorry, %s(%s) not found in the last name clusters" % (name, lname))
def tortoise_last_name(name, from_mark=False, pure=False):
    bibauthor_print('Start working on %s' % name)
    assert not(from_mark and pure)

    lname = generate_last_name_cluster_str(name)

    if from_mark:
        bibauthor_print(' ... from mark!')
        clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname])
        bibauthor_print(' ... delayed done')
    else:
        bibauthor_print(' ... from pid, pure')
        clusters, lnames, sizes = delayed_cluster_sets_from_personid(pure)
        bibauthor_print(' ... delayed pure done!')

#    try:
    idx = lnames.index(lname)
    cluster = clusters[idx]
    size = sizes[idx]
    cluster_set = cluster()
    bibauthor_print("Found, %s(%s). Total number of bibs: %d." % (name, lname, size))
    create_matrix(cluster_set, True)
    wedge_and_store(cluster_set)
def tortoise_last_name(name, from_mark=True, pure=False):
    bibauthor_print('Start working on %s' % name)
    assert not(from_mark and pure)

    lname = generate_last_name_cluster_str(name)

    if from_mark:
        bibauthor_print(' ... from mark!')
        clusters, lnames, sizes = delayed_cluster_sets_from_marktables([lname])
        bibauthor_print(' ... delayed done')
    else:
        bibauthor_print(' ... from pid, pure=%s'%str(pure))
        clusters, lnames, sizes = delayed_cluster_sets_from_personid(pure)
        bibauthor_print(' ... delayed pure done!')

#    try:
    idx = lnames.index(lname)
    cluster = clusters[idx]
    size = sizes[idx]
    cluster_set = cluster()
    bibauthor_print("Found, %s(%s). Total number of bibs: %d." % (name, lname, size))
    create_matrix(cluster_set, False)
    wedge_and_store(cluster_set)