예제 #1
0
def bio_to_brat_parallel_wrapper(file_names, n_cores):
    """Parallel wrapper for article_list_bio_to_brat

    Args:
        file_names (list of lists): elements: [PosixPath, PosixPath, PosixPath, PosixPath] paths to text, labels and output text and output annotation
        n_cores (int): number of python processes to use (multiprocessing package)
    """
    list_segments = chunk_list(file_names, n_cores)
    with Pool(n_cores) as p:
        p.map(article_list_bio_to_brat, list_segments)
예제 #2
0
def parse_article_list_parallel_wrapper(in_list, n_cores=4):
    """Parallel wrapper around parse_article_list

    Args:
        in_list ([in_path, out_path]): path to input JATS, location for output plain txt
        n_cores (int, optional): number parallel python processes to spawn (multiprocessing package). Defaults to 4.
    """
    list_segments = chunk_list(in_list, n_cores)
    with Pool(n_cores) as p:
        error_counts = p.map(parse_article_list, list_segments)
    return sum(error_counts)
예제 #3
0
def parse_file_list_parallel_wrapper(in_list, out_path='.', n_cores=4):
    """Parallel wrapper for parse_file_list

    Args:
        in_list (list of PosixPaths): List of input files
        out_path (str, optional): Directory in which to write the outputs. Defaults to '.'.
        n_cores (int, optional): Number of python threads to use. Defaults to 4. 

    Returns:
        int: Number of articles extracted 
    """
    list_segments = chunk_list(in_list, n_cores)
    fct_to_execute = partial(parse_file_list, out_path=out_path)
    with Pool(n_cores) as p:
        n_articles = p.map(fct_to_execute, list_segments)
    return sum(n_articles)
예제 #4
0
def preprocess_articles_parallel_wrapper(file_list,
                                         n_cores,
                                         process_unicode=True,
                                         replace_math=True,
                                         correct=True,
                                         corr_cite=True):
    """Parallel wrapper for preprocess_articles

    Args:
        file_list ([input filename, output filename]): pair of file names to read and to write
        n_cores (int): number of python processes to use (multiprocessing package)
        process_unicode (bool, optional): replace unicodes. Defaults to True.
        replace_math (bool, optional): replace math equations. Defaults to True.
        correct (bool, optional): replace string errors. Defaults to True.
        corr_cite (bool, optional): correct citation errors. Defaults to True.
    """
    list_segments = chunk_list(file_list, n_cores)
    fct_to_execute = partial(preprocess_articles,
                             process_unicode=process_unicode,
                             replace_math=replace_math,
                             correct=correct,
                             corr_cite=corr_cite)
    with Pool(n_cores) as p:
        p.map(fct_to_execute, list_segments)
예제 #5
0
def brat_to_bio_parallel_wrapper(file_names,
                                 n_cores,
                                 process_unicode=True,
                                 replace_math=True,
                                 correct=True,
                                 corr_cite=True):
    """Parallel wrapper for article_list_brat_to_bio

    Args:
        file_names (list of lists): elements: [PosixPath, PosixPath, PosixPath] paths to text, annotation and output base path
        n_cores (int): number of python processes to use (multiprocessing package)
        process_unicode (bool, optional): replace unicodes. Defaults to True.
        replace_math (bool, optional): replace math equations. Defaults to True.
        correct (bool, optional): replace string errors. Defaults to True.
        corr_cite (bool, optional): correct citation errors. Defaults to True.
    """
    list_segments = chunk_list(file_names, n_cores)
    fct_to_execute = partial(article_list_brat_to_bio,
                             process_unicode=process_unicode,
                             replace_math=replace_math,
                             correct=correct,
                             corr_cite=corr_cite)
    with Pool(n_cores) as p:
        p.map(fct_to_execute, list_segments)