Example #1
0
def main():
    # Set the seed and network files
    data_dir = "../../DATA/guild_tutorial/"
    seed_file = data_dir + "seeds.txt"
    network_file = data_dir + "interactions.sif"
    scoring_folder = data_dir + "test/"
    executable_path = "../guild/scoreN"

    # Create input files for scoring
    guild_utilities.prepare_scoring(network_file,
                                    seed_file,
                                    scoring_folder,
                                    non_seed_score=0.01,
                                    seed_score=1.0,
                                    edge_score=1.0,
                                    n_sample=100,
                                    delim=" ")

    # Generate cross validation files
    node_scores_file = scoring_folder + "node_scores.sif"
    edge_scores_file = scoring_folder + "edge_scores_netshort.sif"

    # fill the code to get nodes, seed_to_score, edges and edge_to_score variables below
    g = network_utilities.create_network_from_sif_file(network_file,
                                                       use_edge_data=True)
    seeds = guild_utilities.get_nodes(seed_file)
    nodes = g.nodes()
    edges = g.edges()
    seed_to_score = dict([(node, 1) for node in seeds])
    edge_to_score = dict([((u, v), 1) for u, v in edges])

    guild_utilities.generate_cross_validation_node_score_files(
        nodes,
        seed_to_score,
        node_scores_file,
        xval=3,
        default_score=0.01,
        replicable=123)

    guild_utilities.generate_cross_validation_edge_score_as_node_score_files(
        edges,
        seed_to_score,
        edge_to_score,
        edge_scores_file,
        xval=3,
        default_score=0.01,
        replicable=123)

    # Run NetScore on these cross validation files
    guild_utilities.run_scoring(scoring_folder,
                                executable_path,
                                scoring_type="netscore",
                                parameters={
                                    "n_iteration": 2,
                                    "n_repetition": 3
                                },
                                qname=None,
                                calculate_pvalue=True,
                                xval=3)
    return
Example #2
0
def score_mcl(node_scores_file, network_file, output_scores_file, module_file, default_non_seed_score):
    g = network_utilities.create_network_from_sif_file(network_file, use_edge_data=True)
    #modules = get_modules_of_graph(g, "mcl", inflation=2) # if edge weight based clustering is desired
    seeds, nodes = get_seeds_from_node_scores_file(node_scores_file, default_non_seed_score)
    modules = get_modules_from_file(module_file)
    f = open(output_scores_file, 'w')
    node_to_score = {}
    #selected = set()
    for module in modules:
	module = set(module)
	#common = module&seeds
	#if 100*float(len(common))/len(module) > threshold:
	    #selected |= module
	#score = float(len(common))/len(module)
	#n = len(module)-len(common)
	#if n == 0:
	#    continue
	#score = 1.0/n
	for node in module:
	    #node_to_score[node] = score
	    neighbors = set(g.neighbors(node))
	    common = neighbors & module
	    if node in common:
		common.remove(node)
	    #if len(common) == 0:
	    #	continue
	    score = float(len(common&seeds)) / len(module)
	    node_to_score[node] = score
    for node in nodes:
	if node in node_to_score:
	    f.write("%s\t%f\n" % (node, node_to_score[node]))
	else:
	    f.write("%s\t0.0\n" % node)
    f.close()
    return
def calculate_proximity_multiple(parameter_file_prefix, i_start, i_end):
    network_file, nodes_from, nodes_to, out_file, min_bin_size, n_random, n_seed = get_parameters_from_file(
        parameter_file_prefix + "%s.txt" % i_start)
    network = network_utilities.create_network_from_sif_file(
        network_file,
        use_edge_data=False,
        delim=None,
        include_unconnected=True)
    bins = network_utilities.get_degree_binning(network,
                                                min_bin_size,
                                                lengths=None)
    for i in xrange(i_start, i_end):
        if not os.path.exists(parameter_file_prefix + "%s.txt" % i):
            print "File does not exists for index (aborting):", i
            break
        network_file, nodes_from, nodes_to, out_file, min_bin_size, n_random, n_seed = get_parameters_from_file(
            parameter_file_prefix + "%s.txt" % i)
        if os.path.exists(out_file):
            print "Skipping existing file for index:", i
            continue
        print network_file, nodes_from, nodes_to, n_random, min_bin_size, n_seed, out_file
        values = wrappers.calculate_proximity(network,
                                              nodes_from=nodes_from,
                                              nodes_to=nodes_to,
                                              bins=bins,
                                              n_random=n_random,
                                              min_bin_size=min_bin_size,
                                              seed=n_seed)
        if values is not None:  # not in network
            d, z, (m, s) = values
            #print z, d, (m, s)
            open(out_file, 'w').write("%f %f %f %f\n" % (z, d, m, s))
    return
Example #4
0
def get_network(network_file, only_lcc):
    network = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False, delim = None, include_unconnected=True)
    #print len(network.nodes()), len(network.edges())
    if only_lcc:
	components = network_utilities.get_connected_components(network, False)
	network = network_utilities.get_subgraph(network, components[0])
	#print len(network.nodes()), len(network.edges())
    return network
Example #5
0
def get_network(network_file, only_lcc):
    network = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False, delim = None, include_unconnected=True)
    #print len(network.nodes()), len(network.edges())
    if only_lcc:
	components = network_utilities.get_connected_components(network, False)
	network = network_utilities.get_subgraph(network, components[0])
	#print len(network.nodes()), len(network.edges())
	network_lcc_file = network_file + ".lcc"
	if not os.path.exists(network_lcc_file ):
	    f = open(network_lcc_file, 'w')
	    for u,v in network.edges():
		f.write("%s 1 %s\n" % (u, v))
	    f.close()
    return network
Example #6
0
def get_network(network_file, only_lcc):
    network = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False, delim = None, include_unconnected=True)
    #print len(network.nodes()), len(network.edges())
    if only_lcc and not network_file.endswith(".lcc"):
	print "Shrinking network to its LCC", len(network.nodes()), len(network.edges())
	components = network_utilities.get_connected_components(network, False)
	network = network_utilities.get_subgraph(network, components[0])
	print "Final shape:", len(network.nodes()), len(network.edges())
	#print len(network.nodes()), len(network.edges())
	network_lcc_file = network_file + ".lcc"
	if not os.path.exists(network_lcc_file ):
	    f = open(network_lcc_file, 'w')
	    for u,v in network.edges():
		f.write("%s 1 %s\n" % (u, v))
	    f.close()
    return network
Example #7
0
def main():
    """
	Get nodes that are top scoring w.r.t. GUILD scores.
	Assumes that GUILD scores have been calculated already (i.e. python hello_world.py).
    """

    # Set the seed and network files
    data_dir = "../../DATA/guild_tutorial/"
    seed_file = data_dir + "seeds.txt"
    network_file = data_dir + "interactions.sif"
    enrichment_file = data_dir + "enrichment.txt"

    scoring_folder = data_dir + "test/"
    pvalue_file = scoring_folder + "output_scores.sif.netcombo.pval"
    subnetwork_file = scoring_folder + "subnetwork.sif"

    # Get GUILD scores
    node_to_vals = guild_utilities.get_values_from_pvalue_file(pvalue_file)

    # Get top scoring, i.e. nodes that have p-value <= 0.05
    top_nodes = set()
    for node, vals in node_to_vals.iteritems():
        score, pval = vals
        if pval <= 0.05:
            top_nodes.add(node)

    # Load interaction network
    g = network_utilities.create_network_from_sif_file(network_file,
                                                       use_edge_data=True)

    # Get subnetwork induced by top scoring nodes
    g_sub = network_utilities.get_subgraph(g, top_nodes)

    # Output subnetwork along with the inverted p-value scores (z-scores) calculated for edges
    f = open(subnetwork_file, 'w')
    for u, v in g_sub.edges():
        zscore_u = stat_utilities.convert_p_values_to_z_scores(
            [node_to_vals[u][1]])[0]
        zscore_v = stat_utilities.convert_p_values_to_z_scores(
            [node_to_vals[v][1]])[0]
        score = (zscore_u + zscore_v) / 2
        f.write("%s\t%f\t%s\n" % (u, score, v))
    f.close()
    return
Example #8
0
def calculate_proximity_multiple(parameter_file_prefix, i_start, i_end):
    network_file, nodes_from, nodes_to, out_file, min_bin_size, n_random, n_seed = get_parameters_from_file(parameter_file_prefix + "%s.txt" % i_start)
    network = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False, delim = None, include_unconnected=True)
    bins = network_utilities.get_degree_binning(network, min_bin_size, lengths=None)
    for i in xrange(i_start, i_end):
	if not os.path.exists(parameter_file_prefix + "%s.txt" % i):
	    print "File does not exists for index (aborting):", i
	    break
	network_file, nodes_from, nodes_to, out_file, min_bin_size, n_random, n_seed = get_parameters_from_file(parameter_file_prefix + "%s.txt" % i)
	if os.path.exists(out_file):
	    print "Skipping existing file for index:", i
	    continue
	print network_file, nodes_from, nodes_to, n_random, min_bin_size, n_seed, out_file
	values = wrappers.calculate_proximity(network, nodes_from = nodes_from, nodes_to = nodes_to, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = n_seed)
	if values is not None: # not in network
	    d, z, (m, s) = values
	    #print z, d, (m, s)
	    open(out_file, 'w').write("%f %f %f %f\n" % (z, d, m, s))
    return
Example #9
0
def main():                         
    parser = argparse.ArgumentParser()
    parser.add_argument('-e', '--network_file') #, required=True) 
    parser.add_argument('-s', '--nodes_from') #, required=True) 
    parser.add_argument('-t', '--nodes_to') #, required=True) 
    parser.add_argument('-o', '--out_file') #, required=True)
    parser.add_argument('-n', '--n_random', type=int, default=1000) 
    parser.add_argument('-m', '--min_bin_size', type=int, default=100) 
    parser.add_argument('-x', '--n_seed', type=int, default=452456) 
    parser.add_argument('-f', '--parameter_file', type=str, default=None) 
    parser.add_argument('-p', '--parameter_file_prefix', type=str, default=None) 
    parser.add_argument('-i', '--parameter_file_start_index', type=int, default=None) 
    parser.add_argument('-j', '--parameter_file_end_index', type=int, default=None) 
    args = parser.parse_args()
    # Run more than once for given input files
    if args.parameter_file_prefix is not None:
        parameter_file_prefix = args.parameter_file_prefix
        i_start = args.parameter_file_start_index
        i_end = args.parameter_file_end_index
        calculate_proximity_multiple(parameter_file_prefix, i_start, i_end)
        return
    # Run from input parameter file
    elif args.parameter_file is not None:
        network_file, nodes_from, nodes_to, out_file, min_bin_size, n_random, n_seed = get_parameters_from_file(parameter_file_prefix + "%s.txt" % n)
    # Run once with provided arguments
    else:
        nodes_from = args.nodes_from.split(",")
        nodes_to = args.nodes_to.split(",")
        network_file = args.network_file
        n_random = args.n_random
        min_bin_size = args.min_bin_size
        n_seed = args.n_seed
        out_file = args.out_file
        network = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False, delim = None, include_unconnected=True)
    #print args
    print network_file, nodes_from, nodes_to, n_random, min_bin_size, n_seed, out_file
    values = wrappers.calculate_proximity(network, nodes_from = nodes_from, nodes_to = nodes_to, n_random = n_random, min_bin_size = min_bin_size, seed = n_seed)
    if values is not None: # not in network
        d, z, (m, s) = values
        #print z, d, (m, s)
        open(out_file, 'w').write("%f %f %f %f\n" % (z, d, m, s))
    return
Example #10
0
def main():                         
    parser = argparse.ArgumentParser()
    parser.add_argument('-e', '--network_file') #, required=True) 
    parser.add_argument('-s', '--nodes_from') #, required=True) 
    parser.add_argument('-t', '--nodes_to') #, required=True) 
    parser.add_argument('-o', '--out_file') #, required=True)
    parser.add_argument('-n', '--n_random', type=int, default=1000) 
    parser.add_argument('-m', '--min_bin_size', type=int, default=100) 
    parser.add_argument('-x', '--n_seed', type=int, default=452456) 
    parser.add_argument('-f', '--parameter_file', type=str, default=None) 
    parser.add_argument('-p', '--parameter_file_prefix', type=str, default=None) 
    parser.add_argument('-i', '--parameter_file_start_index', type=int, default=None) 
    parser.add_argument('-j', '--parameter_file_end_index', type=int, default=None) 
    args = parser.parse_args()
    # Run more than once for given input files
    if args.parameter_file_prefix is not None:
	parameter_file_prefix = args.parameter_file_prefix
	i_start = args.parameter_file_start_index
	i_end = args.parameter_file_end_index
	calculate_proximity_multiple(parameter_file_prefix, i_start, i_end)
	return
    # Run from input parameter file
    elif args.parameter_file is not None:
	network_file, nodes_from, nodes_to, out_file, min_bin_size, n_random, n_seed = get_parameters_from_file(parameter_file_prefix + "%s.txt" % n)
    # Run once with provided arguments
    else:
	nodes_from = args.nodes_from.split(",")
	nodes_to = args.nodes_to.split(",")
	network_file = args.network_file
	n_random = args.n_random
	min_bin_size = args.min_bin_size
	n_seed = args.n_seed
	out_file = args.out_file
	network = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False, delim = None, include_unconnected=True)
    #print args
    print network_file, nodes_from, nodes_to, n_random, min_bin_size, n_seed, out_file
    values = wrappers.calculate_proximity(network, nodes_from = nodes_from, nodes_to = nodes_to, n_random = n_random, min_bin_size = min_bin_size, seed = n_seed)
    if values is not None: # not in network
	d, z, (m, s) = values
	#print z, d, (m, s)
	open(out_file, 'w').write("%f %f %f %f\n" % (z, d, m, s))
    return
Example #11
0
def score_mcl(node_scores_file, network_file, output_scores_file, module_file,
              default_non_seed_score):
    g = network_utilities.create_network_from_sif_file(network_file,
                                                       use_edge_data=True)
    #modules = get_modules_of_graph(g, "mcl", inflation=2) # if edge weight based clustering is desired
    seeds, nodes = get_seeds_from_node_scores_file(node_scores_file,
                                                   default_non_seed_score)
    modules = get_modules_from_file(module_file)
    f = open(output_scores_file, 'w')
    node_to_score = {}
    #selected = set()
    for module in modules:
        module = set(module)
        #common = module&seeds
        #if 100*float(len(common))/len(module) > threshold:
        #selected |= module
        #score = float(len(common))/len(module)
        #n = len(module)-len(common)
        #if n == 0:
        #    continue
        #score = 1.0/n
        for node in module:
            #node_to_score[node] = score
            neighbors = set(g.neighbors(node))
            common = neighbors & module
            if node in common:
                common.remove(node)
            #if len(common) == 0:
            #	continue
            score = float(len(common & seeds)) / len(module)
            node_to_score[node] = score
    for node in nodes:
        if node in node_to_score:
            f.write("%s\t%f\n" % (node, node_to_score[node]))
        else:
            f.write("%s\t0.0\n" % node)
    f.close()
    return
Example #12
0
def create_network_from_sif_file(network_file, **kwargs):
    return network_utilities.create_network_from_sif_file(
        network_file, **kwargs)
Example #13
0
def create_network_from_sif_file(network_file, **kwargs):
    return network_utilities.create_network_from_sif_file(network_file, **kwargs)