def find_and_save_cc_net_nmf_clusters_parallel(network_mat, spreadsheet_mat,
                                               lap_diag, lap_pos,
                                               run_parameters,
                                               local_parallelism):
    """ central loop: compute components for the consensus matrix from the input
        network and spreadsheet matrices and save them to temp files.

    Args:
        network_mat: genes x genes symmetric matrix.
        spreadsheet_mat: genes x samples matrix.
        lap_dag: laplacian matrix component, L = lap_dag - lap_val.
        lap_val: laplacian matrix component, L = lap_dag - lap_val.
        run_parameters: dictionary of run-time parameters.
        number_of_cpus: number of processes to be running in parallel
    """

    jobs_id = range(0, local_parallelism)
    zipped_arguments = dstutil.zip_parameters(network_mat, spreadsheet_mat,
                                              lap_diag, lap_pos,
                                              run_parameters, jobs_id)

    if 'parallelism' in run_parameters:
        parallelism = dstutil.determine_parallelism_locally(
            local_parallelism, run_parameters['parallelism'])

    else:
        parallelism = dstutil.determine_parallelism_locally(local_parallelism)

    dstutil.parallelize_processes_locally(run_cc_net_nmf_clusters_worker,
                                          zipped_arguments, parallelism)
Example #2
0
def find_and_save_cc_similarity_parallel(expression_df, signature_df,
                                         run_parameters, local_parallelism):
    """ central loop: compute components for the similarity matrix by

    Args:
        expression_df    : genes x samples
        signature_df     : genes x samples
        run_parameters   : dictionary of run-time parameters
        local_parallelism: parallelism option
    """
    import knpackage.distributed_computing_utils as dstutil

    jobs_id = range(0, local_parallelism)
    zipped_arguments = dstutil.zip_parameters(expression_df, signature_df,
                                              run_parameters, jobs_id)

    if 'parallelism' in run_parameters:
        parallelism = dstutil.determine_parallelism_locally(
            local_parallelism, run_parameters['parallelism'])

    else:
        parallelism = dstutil.determine_parallelism_locally(local_parallelism)

    dstutil.parallelize_processes_locally(run_cc_similarity_signature_worker,
                                          zipped_arguments, parallelism)
Example #3
0
def find_and_save_cc_link_hclust_clusters_parallel(spreadsheet_mat,
                                                   run_parameters,
                                                   local_parallelism):
    #-----------------------------------------------------
    """ central loop: compute components for the consensus matrix by hclust.

    Args:
        spreadsheet_mat: genes x samples matrix.
        run_parameters: dictionary of run-time parameters.
        number_of_cpus: number of processes to be running in parallel
    """

    import knpackage.distributed_computing_utils as dstutil

    jobs_id = range(0, local_parallelism)
    zipped_arguments = dstutil.zip_parameters(spreadsheet_mat, run_parameters,
                                              jobs_id)

    if 'parallelism' in run_parameters:
        parallelism = dstutil.determine_parallelism_locally(
            local_parallelism, run_parameters['parallelism'])
    else:
        parallelism = dstutil.determine_parallelism_locally(local_parallelism)

    dstutil.parallelize_processes_locally(run_cc_link_hclust_clusters_worker,
                                          zipped_arguments, parallelism)
def find_and_save_cc_nmf_clusters_parallel(spreadsheet_mat, run_parameters,
                                           local_parallelism):
    """ central loop: compute components for the consensus matrix by
        non-negative matrix factorization.

    Args:
        spreadsheet_mat: genes x samples matrix.
        run_parameters: dictionary of run-time parameters.
        number_of_cpus: number of processes to be running in parallel
    """

    jobs_id = range(0, local_parallelism)
    zipped_arguments = dstutil.zip_parameters(spreadsheet_mat, run_parameters,
                                              jobs_id)

    if 'parallelism' in run_parameters:
        parallelism = dstutil.determine_parallelism_locally(
            local_parallelism, run_parameters['parallelism'])

    else:
        parallelism = dstutil.determine_parallelism_locally(local_parallelism)

    dstutil.parallelize_processes_locally(run_cc_nmf_clusters_worker,
                                          zipped_arguments, parallelism)
def get_fisher_exact_test(prop_gene_network_sparse, sparse_dict, spreadsheet_df, max_cpu):
    """ central loop: compute components for fisher exact test.

    Args:
        prop_gene_network_sparse: sparse matrix of network gene set.
        sparse_dict: look up table of sparse matrix.
        spreadsheet_df: the dataframe of user gene set.
        max_cpu: the maximum number of processors to use.

    Returns:
        fisher_contingency_pval: list of seven items lists.
    """
    universe_count = spreadsheet_df.shape[0]
    overlap_count  = prop_gene_network_sparse.T.dot(spreadsheet_df.values)
    user_count     = np.sum(spreadsheet_df.values, axis=0)
    gene_count     = prop_gene_network_sparse.sum(axis=0)
    set_list       = spreadsheet_df.columns.values

    dimension      = [range(overlap_count.shape[0]), range(overlap_count.shape[1])]
    combinations   = list(itertools.product(*dimension))
    parallelism    = dstutil.determine_parallelism_locally(min(max_cpu, len(combinations)))

   #----
    try:
   #----
        p = multiprocessing.Pool(processes=parallelism)
        p.starmap_async(fisher_exact_worker
                       , zip( itertools.repeat(sparse_dict)
                            , itertools.repeat(overlap_count)
                            , itertools.repeat(user_count)
                            , itertools.repeat(gene_count)
                            , itertools.repeat(universe_count)
                            , itertools.repeat(set_list)
                            , combinations
                            )
                       , callback=callback_extend_list  )
        p.close()
        p.join()
        # print(fisher_contingency_pval_parallel_insertion)
        # print(type(fisher_contingency_pval_parallel_insertion))

        return fisher_contingency_pval_parallel_insertion
   #-------
    except:
   #-------
        raise OSError("Failed running parallel processing:{}".format(sys.exc_info()))