Exemple #1
0
def find_adjacencies_command(args):
    """
    Infer co-expression modules.
    """
    LOGGER.info("Loading expression matrix.")
    ex_mtx = _load_expression_matrix(args)
    tf_names = load_tf_names(args.tfs_fname.name)

    n_total_genes = len(ex_mtx.columns)
    n_matching_genes = len(ex_mtx.columns.isin(tf_names))
    if n_total_genes == 0:
        LOGGER.error(
            "The expression matrix supplied does not contain any genes. Make sure the extension of the file matches the format (tab separation for TSV and comma sepatration for CSV)."
        )
        sys.exit(1)
    if float(n_matching_genes) / n_total_genes < 0.80:
        LOGGER.warning(
            "Expression data is available for less than 80% of the supplied transcription factors."
        )

    LOGGER.info("Inferring regulatory networks.")
    client, shutdown_callback = _prepare_client(args.client_or_address,
                                                num_workers=args.num_workers)
    try:
        network = grnboost2(expression_data=ex_mtx,
                            tf_names=tf_names,
                            verbose=True,
                            client_or_address=client)
    finally:
        shutdown_callback(False)

    LOGGER.info("Writing results to file.")
    network.to_csv(args.output, index=False, sep='\t')
Exemple #2
0
def find_adjacencies_command(args):
    """
    Infer co-expression modules.
    """
    LOGGER.info("Loading expression matrix.")
    try:
        ex_mtx = load_exp_matrix(
            args.expression_mtx_fname.name,
            (args.transpose == 'yes'),
            args.sparse,
            args.cell_id_attribute,
            args.gene_attribute,
        )
    except ValueError as e:
        LOGGER.error(e)
        sys.exit(1)

    tf_names = load_tf_names(args.tfs_fname.name)

    if args.sparse:
        n_total_genes = len(ex_mtx[1])
        n_matching_genes = len(ex_mtx[1].isin(tf_names))
    else:
        n_total_genes = len(ex_mtx.columns)
        n_matching_genes = len(ex_mtx.columns.isin(tf_names))
    if n_total_genes == 0:
        LOGGER.error(
            "The expression matrix supplied does not contain any genes. "
            "Make sure the extension of the file matches the format (tab separation for TSV and "
            "comma sepatration for CSV)."
        )
        sys.exit(1)
    if float(n_matching_genes) / n_total_genes < 0.80:
        LOGGER.warning("Expression data is available for less than 80% of the supplied transcription factors.")

    LOGGER.info("Inferring regulatory networks.")
    client, shutdown_callback = _prepare_client(args.client_or_address, num_workers=args.num_workers)
    method = grnboost2 if args.method == 'grnboost2' else genie3
    try:
        if args.sparse:
            network = method(
                expression_data=ex_mtx[0],
                gene_names=ex_mtx[1],
                tf_names=tf_names,
                verbose=True,
                client_or_address=client,
                seed=args.seed,
            )
        else:
            network = method(
                expression_data=ex_mtx, tf_names=tf_names, verbose=True, client_or_address=client, seed=args.seed
            )
    finally:
        shutdown_callback(False)

    LOGGER.info("Writing results to file.")

    extension = PurePath(args.output.name).suffixes
    network.to_csv(args.output.name, index=False, sep=suffixes_to_separator(extension))