Beispiel #1
0
def main(args):

    print("Loading SPM data...")

    spm = flower.load_spm(args.spm_file)

    nodelist = spm['nodelist']

    srcmodules = utils.load_nodeset_file(args.srcmodules_file, sep=args.sep)
    destmodules = utils.load_nodeset_file(args.destmodules_file, sep=args.sep)
    if not isinstance(nodelist[0], str):
        srcmodules = list(map(lambda x: list(map(int, x)), srcmodules))
        destmodules = list(map(lambda x: list(map(int, x)), destmodules))

    print("Evaluating flows...")
    flows_batch = flower.evaluate_flows_batch_nx(srcmodules,
                                                 destmodules,
                                                 spm,
                                                 n_threads=args.n_threads)

    print("Completed.")

    print("Writing to file...")
    flows_batch.to_csv(args.output_file, sep='\t')
    print("Completed.")
Beispiel #2
0
def main(args):
    print("Loading SPM data...")

    spm_data = flower.load_spm(args.spm_file)

    srcnodes = utils.load_nodeset_file(args.srcnodes_file,
                                       sep=args.sep,
                                       as_column=args.as_col)
    destnodes = utils.load_nodeset_file(args.destnodes_file,
                                        sep=args.sep,
                                        as_column=args.as_col)

    print(len(srcnodes), len(destnodes))
    if not isinstance(spm_data['nodelist'][0], str):
        srcnodes = list(map(int, srcnodes))
        destnodes = list(map(int, destnodes))
    print("Evaluating flows...")

    flows = flower.eval_flow_centrality_nx(srcnodes, destnodes, spm=spm_data)

    print("Completed.")

    print(flows.iloc[:10])

    print("Writing to file...")
    flows.to_csv(args.output_file, sep='\t')
    print("Completed.")
Beispiel #3
0
def main(args):

    if args.rdm_fc_path is None and (args.rdm_src_path is None
                                     or args.rdm_dest_path is None):
        raise ValueError(
            'Either rdm_fc_path or (rdm_src_path, rdm_dest_path) have to be specified!'
        )

    print("Loading SPM data...")

    spm = flower.load_spm(args.spm_file)

    nodelist = spm['nodelist']
    srcnodes = utils.load_nodeset_file(args.srcnodes_file,
                                       sep=args.sep,
                                       as_column=args.as_col)
    destnodes = utils.load_nodeset_file(args.destnodes_file,
                                        sep=args.sep,
                                        as_column=args.as_col)

    if not isinstance(nodelist[0], str):
        srcnodes = list(map(int, srcnodes))
        destnodes = list(map(int, destnodes))

    print("Evaluating flows...")

    if args.rdm_fc_path is None:
        rdm_srcmodules = utils.load_nodeset_file(args.rdm_src_path,
                                                 sep=args.sep)
        rdm_destmodules = utils.load_nodeset_file(args.rdm_dest_path,
                                                  sep=args.sep)
        if not isinstance(nodelist[0], str):
            rdm_srcmodules = map(lambda x: map(int, x), rdm_srcmodules)
            rdm_destmodules = map(lambda x: map(int, x), rdm_destmodules)

        fcs = flower.evaluate_fcs_nx(srcnodes,
                                     destnodes,
                                     rdm_srcmodules,
                                     rdm_destmodules,
                                     spm,
                                     fill_na=args.fill_na,
                                     n_threads=args.n_threads)

    else:
        rdm_fc = pd.read_csv(args.rdm_fc_path, sep='\t', index_col=0)
        fc = flower.eval_flow_centrality_nx(srcnodes, destnodes, spm)
        fcs = flower.evaluate_flows_statistics_from_batch_nx(rdm_fc, fc)
        fcs = flower.enrich_flows_table_nx(srcnodes,
                                           destnodes,
                                           fcs,
                                           spm,
                                           fill_na=args.fill_na)

    print("Completed.")

    print("Writing to file...")
    fcs.to_csv(args.output_file, sep=args.sep)
    print("Completed.")
def main(args):

    print ("Loading SPM data...")

    spm = flower.load_spm(args.spm_file)

    nodelist = spm['nodelist']

    srcmodules = pd.read_csv(args.srcmodules_file, sep=args.sep).values.tolist()
    destmodules = pd.read_csv(args.destmodules_file, sep=args.sep).values.tolist()

    if type(nodelist[0]) is int:
        srcmodules = map(lambda x: map(int,x), srcmodules)
        destmodules = map(lambda x: map(int,x), destmodules)

    print ("Evaluating flows...")

    flows_batch = flower.evaluate_flows_statistics_nx(srcmodules, destmodules, spm, true_flows=, n_threads=args.n_threads)

    print ("Completed.")

    print ("Writing to file...")
    flows_batch.to_csv(args.output_file, sep='\t')
    print ("Completed.")
Beispiel #5
0
flows['FCScore'] = (flows.Flow_value.values - flows.Flow_mean.values) / flows.Flow_std.values

flows[np.isnan(flows.FCScore) | np.isinf(flows.FCScore)].FCScore = None

print "Creating rich columns..."

# NOTE: this operation cannot be parallelized when gene symbols have to be retrieved on mygene server, probably the sock connection to the mygene server doesn't work in parallel

flows['Degree'] = flows.apply(lambda row: nx.degree(ppi, row.NodeID),axis=1)

flows['in_DestModule'] = flows.apply(lambda row: row.NodeID in destnodes,axis=1)
flows['in_SrcModule'] = flows.apply(lambda row: row.NodeID in srcnodes,axis=1)

print "Calculating distance statistics..."

spm = flower.load_spm(args.spm_file)

gm = utils.GIDMapper(spm['nodelist'])

destnodes_ids = gm.gid2id(destnodes)
srcnodes_ids = gm.gid2id(srcnodes)
ids = gm.gid2id(flows.NodeID.values)

flows['min_dist_dest'] = flower.eval_min_distance(spm['path_lengths'], destnodes_ids, ids)
flows['min_dist_src'] = flower.eval_min_distance(spm['path_lengths'], srcnodes_ids, ids)

flows['avg_dist_dest'] = flower.eval_avg_distance(spm['path_lengths'], destnodes_ids, ids)
flows['avg_dist_src'] = flower.eval_avg_distance(spm['path_lengths'], srcnodes_ids, ids)

print "Sorting..."
Beispiel #6
0
                    default=1,
                    help='Number of parallel processes')
parser.add_argument('--int_nodeid',
                    action='store_true',
                    help='Cast the node IDs to integer')
parser.add_argument('--dry', '--dry_run', action='store_true', help='Dry run')
args = parser.parse_args()

print "Importing network..."

nodetype = int if args.int_nodeid else str
ppi = nx.read_edgelist(args.network_file, nodetype=nodetype)

print "Loading SPM data..."

spm_data = flower.load_spm(args.spm_file)

nodelist = spm_data['nodelist']

with open(args.srcdestmodules_file, 'r') as f:
    srcrows, destrows = zip(
        *map(lambda x: x.rstrip().split('|'), f.readlines())
    )  # this line is tricky but it just splits the rows in srcmodule and destmodule

srcmodules = map(lambda x: map(nodetype, x.split(' ')), srcrows)
destmodules = map(lambda x: map(nodetype, x.split(' ')), destrows)

gm = utils.GIDMapper(nodelist=nodelist)

nodes_dest_list = map(gm.gid2id, destmodules)
nodes_src_list = map(gm.gid2id, srcmodules)