Esempio n. 1
0
def between_correls(args):
    """TABLES MUST SORT SO THAT SAMPLES ARE IN THE SAME ORDER """
    logger = general.Logger("SCNIC_log.txt")
    logger["SCNIC analysis type"] = "between"

    # correlation and p-value adjustment methods
    correl_methods = {'spearman': spearmanr, 'pearson': pearsonr}
    p_methods = {'bh': general.bh_adjust, 'bon': general.bonferroni_adjust}
    correl_method = correl_methods[args.correl_method]
    if args.p_adjust is not None:
        p_adjust = p_methods[args.p_adjust]
    else:
        p_adjust = None

    # load tables
    table1 = load_table(args.table1)
    table2 = load_table(args.table2)
    logger["input table 1"] = args.table1
    logger["input table 1"] = args.table2

    table1 = table1.sort()
    table2 = table2.sort()

    if not np.array_equal(table1.ids(), table2.ids()):
        raise ValueError("Tables have different sets of samples present")

    # make new output directory and change to it
    if args.output is not None:
        os.makedirs(args.output)
        os.chdir(args.output)
        logger["output directory"] = args.output

    # filter tables
    if args.min_sample is not None:
        table1 = general.filter_table(table1, args.min_sample)
        metadata = general.get_metadata_from_table(table1)
        table2 = general.filter_table(table2, args.min_sample)
        metadata.update(general.get_metadata_from_table(table2))
    else:
        metadata = general.get_metadata_from_table(table1)
        metadata.update(general.get_metadata_from_table(table2))

    # make correlations
    logger["correlation metric"] = args.correl_method
    logger["p adjustment method"] = args.p_adjust
    correls = between_correls_from_tables(table1, table2, correl_method)
    correls.sort_values(correls.columns[-1], inplace=True)
    correls.to_csv(open('correls.txt', 'w'), sep='\t', index=False)

    # adjust p-values
    correls['p_adj'] = p_adjust(correls['p'])

    # make network
    net = general.correls_to_net(correls, metadata=metadata, min_p=args.min_p, min_r=args.min_r)
    logger["number of nodes"] = net.number_of_nodes()
    logger["number of edges"] = net.number_of_edges()
    nx.write_gml(net, 'crossnet.gml')

    logger.output_log()
    print '\a'
def test_between_correls_from_tables_too_many_procs(biom_table1, biom_table2):
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter('always')
        correls = between_correls_from_tables(biom_table1,
                                              biom_table2,
                                              nprocs=1000)
        assert len(w) == 1
    assert isinstance(correls, pd.DataFrame)
    assert correls.shape[0] == biom_table1.shape[0] * biom_table2.shape[0]
Esempio n. 3
0
def between_correls(args):
    """TABLES MUST SORT SO THAT SAMPLES ARE IN THE SAME ORDER """
    logger = general.Logger("SCNIC_log.txt")
    logger["SCNIC analysis type"] = "between"

    # correlation and p-value adjustment methods
    correl_methods = {'spearman': spearmanr, 'pearson': pearsonr}
    correl_method = correl_methods[args.correl_method]

    # load tables
    table1 = load_table(args.table1)
    table2 = load_table(args.table2)
    logger["input table 1"] = args.table1
    logger["input table 1"] = args.table2

    table1 = table1.sort()
    table2 = table2.sort()

    # make new output directory and change to it
    if args.force and args.output is not None:
        shutil.rmtree(args.output, ignore_errors=True)
    if args.output is not None:
        os.makedirs(args.output)
        os.chdir(args.output)
        logger["output directory"] = args.output

    # filter tables
    if args.sparcc_filter is True:
        table1 = general.sparcc_paper_filter(table1)
        table2 = general.sparcc_paper_filter(table2)
        print("Table 1 filtered: %s observations" % str(table1.shape[0]))
        print("Table 2 filtered: %s observations" % str(table2.shape[0]))
        logger["sparcc paper filter"] = True
        logger["number of observations present in table 1 after filter"] = table1.shape[0]
        logger["number of observations present in table 2 after filter"] = table2.shape[0]
    if args.min_sample is not None:
        table1 = general.filter_table(table1, args.min_sample)
        table2 = general.filter_table(table2, args.min_sample)

    if not np.array_equal(table1.ids(), table2.ids()):
        raise ValueError("Tables have different sets of samples present")

    metadata = general.get_metadata_from_table(table1)
    metadata.update(general.get_metadata_from_table(table2))

    # make correlations
    logger["correlation metric"] = args.correl_method
    logger["p adjustment method"] = args.p_adjust
    correls = ca.between_correls_from_tables(table1, table2, correl_method, nprocs=args.procs)
    correls.sort_values(correls.columns[-1], inplace=True)
    correls['p_adj'] = general.p_adjust(correls['p'])
    correls.to_csv(open('correls.txt', 'w'), sep='\t', index=True)

    # make network
    correls_filt = general.filter_correls(correls, min_p=args.min_p, min_r=args.min_r)
    net = general.correls_to_net(correls_filt, metadata=metadata)
    logger["number of nodes"] = net.number_of_nodes()
    logger["number of edges"] = net.number_of_edges()
    nx.write_gml(net, 'crossnet.gml')

    logger.output_log()
def test_between_correls_from_tables_multi(biom_table1, biom_table2):
    correls = between_correls_from_tables(biom_table1, biom_table2, nprocs=2)
    assert isinstance(correls, pd.DataFrame)
    assert correls.shape[0] == biom_table1.shape[0] * biom_table2.shape[0]
Esempio n. 5
0
def test_between_correls_from_tables(biom_table1, biom_table2):
    correls = between_correls_from_tables(biom_table1, biom_table2)
    assert type(correls) is pd.DataFrame
    assert correls.shape[0] == biom_table1.shape[0] * biom_table2.shape[0]