예제 #1
0
def get_nodes(file_name):
    """
	Parses nodes from a given file (e.g., seed file).
    """
    nodes, dummy, dummy, dummy = network_utilities.get_nodes_and_edges_from_sif_file(file_name = file_name, store_edge_type = False)
    #nodes = set([ line.strip() for line in open(file_name) ])
    return nodes
예제 #2
0
def get_node_to_score(score_file):
    """
	Parses scores from a scoring file created by GUILD.
    """
    nodes, dummy, node_to_score, dummy = network_utilities.get_nodes_and_edges_from_sif_file(
        file_name=score_file, store_edge_type=False)
    return node_to_score
예제 #3
0
def get_nodes(file_name):
    """
	Parses nodes from a given file (e.g., seed file).
    """
    nodes, dummy, dummy, dummy = network_utilities.get_nodes_and_edges_from_sif_file(file_name = file_name, store_edge_type = False)
    #nodes = set([ line.strip() for line in open(file_name) ])
    return nodes
예제 #4
0
def get_node_to_score(score_file):
    """
	Parses scores from a scoring file created by GUILD.
    """
    nodes, dummy, node_to_score, dummy = network_utilities.get_nodes_and_edges_from_sif_file(file_name = score_file, store_edge_type = False)
    return node_to_score
예제 #5
0
def prepare_scoring(network_file, seed_file, scoring_folder="./", non_seed_score=0.01, seed_score=1.0, edge_score=1.0, n_sample=100, delim=" ", name=None):
    """
	Creates input files required by GUILD executable.

	network_file: network in sif-like format where edge type is edge score (e.g., "A 0.5 B" or "A pp B")
	seed_file: seeds in text format where nodes and their scores are given (e.g., "A 0.1" or "A")
	scoring_folder: path to directory where the input/output files will be created
	non_seed_score: initial scores of non-seeds (0.01, by default)
	seed_score: initial scores of seeds (1.0, by default)
	edge_score: weight of edges, in case the values in network_file is not convertable to float  (1.0, by default)
	n_sample: number of randomly generated graphs for netzcore (100, by default)
	delim: delimiter that separates columns in input/output files (" ", by default)
	name: optional name defining the phenotype, the scoring files will created under this dir (in case of multiple phenotype analysis)
    """
    if not os.path.exists(scoring_folder):
	os.mkdir(scoring_folder)
    if name is not None:
	if not os.path.exists(scoring_folder+name):
	    os.mkdir(scoring_folder+name)
	name += os.sep
    else:
	name = ""

    # Read node info from network file (use network file as edge file)
    print "Creating edge score file"
    edge_score_file = scoring_folder + "edge_scores.sif" #network_file.split("/")[-1] + ".converted"
    if os.path.exists(edge_score_file):
	print "\tEdge score file exists, overwriting!"
    nodes, edges, dummy, edge_to_data = network_utilities.get_nodes_and_edges_from_sif_file(network_file, store_edge_type = True, delim = delim, data_to_float=False)
    edge_to_weight = create_edge_score_file(edge_score_file, edges, edge_to_data, edge_score, delim)

    # Create node file (ignore seeds that are not in the network and assign non-seed scores)
    print "Creating node score file"
    node_score_file = scoring_folder + name + "node_scores.sif" #seed_file.split("/")[-1] + ".converted"
    seed_score_file = scoring_folder + name + "seed_scores.sif"
    seeds, dummy, seed_to_data, dummy = network_utilities.get_nodes_and_edges_from_sif_file(seed_file, store_edge_type = False, delim = delim, data_to_float=True)
    if seed_to_data is None:
	seed_to_data = {}
	for seed in seeds: 
	    seed_to_data[seed] = seed_score
    node_to_data = create_node_score_file(node_score_file, seed_score_file, nodes, seeds, seed_to_data, non_seed_score, seed_score, delim)

    # Create background node file (selects k non-seeds randomly where k is the number of seeds)
    print "Creating background node score file"
    bg_node_file = scoring_folder + name + "node_scores_background.sif" #seed_file.split("/")[-1] + ".converted"
    bg_seed_file = scoring_folder + name + "seed_scores_background.sif" 
    create_background_score_file(bg_node_file, bg_seed_file, nodes, seeds, seed_to_data, non_seed_score, delim)

    # Create modified edge file using node scores for netshort
    print "Creating node score converted edge file (for netshort)"
    nd_edge_file = scoring_folder + name + "edge_scores_netshort.sif" #network_file.split("/")[-1] + ".converted_for_netshort"
    create_node_score_converted_edge_score_file(nd_edge_file, edges, edge_to_weight, node_to_data, delim)

    # Create random network files for netzcore
    print "Creating random networks (for netzcore)"
    sampling_prefix = scoring_folder + "../"  + "sampled_graph."
    if os.path.exists(sampling_prefix+"%s"%n_sample):
	print "\tSampled networks exists, skipping this step!"
    else:
	g = network_utilities.create_network_from_sif_file(network_file_in_sif = edge_score_file, use_edge_data = True, delim = delim)
	for i in xrange(1,n_sample+1):
	    g_sampled = network_utilities.randomize_graph(graph=g, randomization_type="preserve_topology_and_node_degree")
	    network_utilities.output_network_in_sif(g_sampled, sampling_prefix+"%s"%i)
    return
예제 #6
0
def prepare_scoring(network_file,
                    seed_file,
                    scoring_folder="./",
                    non_seed_score=0.01,
                    seed_score=1.0,
                    edge_score=1.0,
                    n_sample=100,
                    delim=" ",
                    name=None):
    """
	Creates input files required by GUILD executable.

	network_file: network in sif-like format where edge type is edge score (e.g., "A 0.5 B" or "A pp B")
	seed_file: seeds in text format where nodes and their scores are given (e.g., "A 0.1" or "A")
	scoring_folder: path to directory where the input/output files will be created
	non_seed_score: initial scores of non-seeds (0.01, by default)
	seed_score: initial scores of seeds (1.0, by default)
	edge_score: weight of edges, in case the values in network_file is not convertable to float  (1.0, by default)
	n_sample: number of randomly generated graphs for netzcore (100, by default)
	delim: delimiter that separates columns in input/output files (" ", by default)
	name: optional name defining the phenotype, the scoring files will created under this dir (in case of multiple phenotype analysis)
    """
    if not os.path.exists(scoring_folder):
        os.mkdir(scoring_folder)
    if name is not None:
        if not os.path.exists(scoring_folder + name):
            os.mkdir(scoring_folder + name)
        name += os.sep
    else:
        name = ""

    # Read node info from network file (use network file as edge file)
    print "Creating edge score file"
    edge_score_file = scoring_folder + "edge_scores.sif"  #network_file.split("/")[-1] + ".converted"
    if os.path.exists(edge_score_file):
        print "\tEdge score file exists, overwriting!"
    nodes, edges, dummy, edge_to_data = network_utilities.get_nodes_and_edges_from_sif_file(
        network_file, store_edge_type=True, delim=delim, data_to_float=False)
    edge_to_weight = create_edge_score_file(edge_score_file, edges,
                                            edge_to_data, edge_score, delim)

    # Create node file (ignore seeds that are not in the network and assign non-seed scores)
    print "Creating node score file"
    node_score_file = scoring_folder + name + "node_scores.sif"  #seed_file.split("/")[-1] + ".converted"
    seed_score_file = scoring_folder + name + "seed_scores.sif"
    seeds, dummy, seed_to_data, dummy = network_utilities.get_nodes_and_edges_from_sif_file(
        seed_file, store_edge_type=False, delim=delim, data_to_float=True)
    if seed_to_data is None:
        seed_to_data = {}
        for seed in seeds:
            seed_to_data[seed] = seed_score
    node_to_data = create_node_score_file(node_score_file, seed_score_file,
                                          nodes, seeds, seed_to_data,
                                          non_seed_score, seed_score, delim)

    # Create background node file (selects k non-seeds randomly where k is the number of seeds)
    print "Creating background node score file"
    bg_node_file = scoring_folder + name + "node_scores_background.sif"  #seed_file.split("/")[-1] + ".converted"
    bg_seed_file = scoring_folder + name + "seed_scores_background.sif"
    create_background_score_file(bg_node_file, bg_seed_file, nodes, seeds,
                                 seed_to_data, non_seed_score, delim)

    # Create modified edge file using node scores for netshort
    print "Creating node score converted edge file (for netshort)"
    nd_edge_file = scoring_folder + name + "edge_scores_netshort.sif"  #network_file.split("/")[-1] + ".converted_for_netshort"
    create_node_score_converted_edge_score_file(nd_edge_file, edges,
                                                edge_to_weight, node_to_data,
                                                delim)

    # Create random network files for netzcore
    print "Creating random networks (for netzcore)"
    sampling_prefix = scoring_folder + "../" + "sampled_graph."
    if os.path.exists(sampling_prefix + "%s" % n_sample):
        print "\tSampled networks exists, skipping this step!"
    else:
        g = network_utilities.create_network_from_sif_file(
            network_file_in_sif=edge_score_file,
            use_edge_data=True,
            delim=delim)
        for i in xrange(1, n_sample + 1):
            g_sampled = network_utilities.randomize_graph(
                graph=g,
                randomization_type="preserve_topology_and_node_degree")
            network_utilities.output_network_in_sif(g_sampled,
                                                    sampling_prefix + "%s" % i)
    return