def trn_stats(genes, trn, t_factors, version):
    LOGGER.info("Computing TRN statistics")
    nodes = sorted(trn.nodes_iter())
    node2id = {n: i for (i, n) in enumerate(nodes)}
    id2node = {i: n for (i, n) in enumerate(nodes)}
    (grn, node2id) = to_simple(trn.to_grn(), return_map=True)
    nodes = sorted(grn.nodes_iter())
    regulating = {node for (node, deg) in grn.out_degree_iter() if deg > 0}
    regulated = set(nodes) - regulating
    components = sorted(nx.weakly_connected_components(grn), key=len,
            reverse=True)
    data = dict()
    for (a, b) in itertools.product(("in", "out"), repeat=2):
        data["{a}_{b}_ass".format(a=a, b=b)] = nx.degree_assortativity_coefficient(grn, x=a, y=b)
    census = triadic_census(grn)
    forward = census["030T"]
    feedback = census["030C"]
    num_cycles = sum(1 for cyc in nx.simple_cycles(grn) if len(cyc) > 2)
    in_deg = [grn.in_degree(node) for node in regulated]
    out_deg = [grn.out_degree(node) for node in regulating]
    data["version"] = version,
    data["release"] = pd.to_datetime(RELEASE[version]),
    data["num_genes"] = len(genes),
    data["num_tf"] = len(t_factors),
    data["num_nodes"] = len(nodes),
    data["num_regulating"] = len(regulating),
    data["num_regulated"] = len(regulated),
    data["num_links"] = grn.size(),
    data["density"] = nx.density(grn),
    data["num_components"] = len(components),
    data["largest_component"] = len(components[0]),
    data["feed_forward"] = forward,
    data["feedback"] = feedback,
    data["fis_out"] = trn.out_degree(TranscriptionFactor[FIS_ID, version]),
    data["hns_out"] = trn.out_degree(TranscriptionFactor[HNS_ID, version]),
    data["cycles"] = num_cycles,
    data["regulated_in_deg"] = mean(in_deg),
    data["regulating_out_deg"] = mean(out_deg),
    data["hub_out_deg"] = max(out_deg)
    stats = pd.DataFrame(data, index=[1])
    in_deg = [grn.in_degree(node) for node in nodes]
    out_deg = [grn.out_degree(node) for node in nodes]
    bc = nx.betweenness_centrality(grn)
    bc = [bc[node] for node in nodes]
    dists = pd.DataFrame({
            "version": version,
            "release": [pd.to_datetime(RELEASE[version])] * len(nodes),
            "node": [id2node[node].unique_id for node in nodes],
            "regulated_in_degree": in_deg,
            "regulating_out_degree": out_deg,
            "betweenness": bc
        })
    return (stats, dists)
Ejemplo n.º 2
0
def main_random(args):
    locations = sorted(glob(os.path.join(args.in_path, args.glob)))
    locations = [os.path.abspath(loc) for loc in locations]
    pool = multiprocessing.Pool(args.nproc)
    bar = ProgressBar(maxval=args.rnd_num, widgets=[Timer(), " ",
        SimpleProgress(), " ", Percentage(), " ", Bar(), " ", ETA()])
    rewire_name = "grn_rewired_{0:.1f}.pkl".format(args.prob)
    switch_name = "grn_switched_{0:d}.pkl".format(args.flip_num)
    for path in locations:
        filename = os.path.join(path, "trn.pkl")
        if not os.path.exists(filename):
            continue
        ver = version_from_path(path)
        base_path = os.path.join(args.out_path, ver)
        if not os.path.isdir(base_path):
            os.makedirs(base_path)
        LOGGER.info(ver)
        trn = pyorg.read_pickle(filename)
        # we consider null-models on the projected level since that is the level
        # on which we evaluate topological quantities
        net = pyreg.to_simple(trn.to_grn())
        if args.run_rewire:
            LOGGER.info("Rewiring with probability %.1f", args.prob)
            tasks = [(net, args.prob)] * args.rnd_num
            res_it = pool.imap_unordered(rewire, tasks)
            rands = list()
            bar.start()
            for rnd in res_it:
                rands.append(rnd)
                bar += 1
            bar.finish()
            pyorg.write_pickle(rands, os.path.join(base_path, rewire_name))
        if args.run_switch:
            LOGGER.info("Switch-randomizing each edge %d times", args.flip_num)
            tasks = [(net, args.flip_num)] * args.rnd_num
            res_it = pool.imap_unordered(switch, tasks)
            success = list()
            rands = list()
            bar.start()
            for (rnd, rate) in res_it:
                rands.append(rnd)
                success.append(rate)
                bar += 1
            bar.finish()
            pyorg.write_pickle(rands, os.path.join(base_path, switch_name))
            LOGGER.info("mean flip success rate: %.3G +/- %.3G", np.mean(success),
                    np.std(success))
    pool.close()