def get_nodes(file_name): """ Parses nodes from a given file (e.g., seed file). """ nodes, dummy, dummy, dummy = network_utilities.get_nodes_and_edges_from_sif_file(file_name = file_name, store_edge_type = False) #nodes = set([ line.strip() for line in open(file_name) ]) return nodes
def get_node_to_score(score_file): """ Parses scores from a scoring file created by GUILD. """ nodes, dummy, node_to_score, dummy = network_utilities.get_nodes_and_edges_from_sif_file( file_name=score_file, store_edge_type=False) return node_to_score
def get_node_to_score(score_file): """ Parses scores from a scoring file created by GUILD. """ nodes, dummy, node_to_score, dummy = network_utilities.get_nodes_and_edges_from_sif_file(file_name = score_file, store_edge_type = False) return node_to_score
def prepare_scoring(network_file, seed_file, scoring_folder="./", non_seed_score=0.01, seed_score=1.0, edge_score=1.0, n_sample=100, delim=" ", name=None): """ Creates input files required by GUILD executable. network_file: network in sif-like format where edge type is edge score (e.g., "A 0.5 B" or "A pp B") seed_file: seeds in text format where nodes and their scores are given (e.g., "A 0.1" or "A") scoring_folder: path to directory where the input/output files will be created non_seed_score: initial scores of non-seeds (0.01, by default) seed_score: initial scores of seeds (1.0, by default) edge_score: weight of edges, in case the values in network_file is not convertable to float (1.0, by default) n_sample: number of randomly generated graphs for netzcore (100, by default) delim: delimiter that separates columns in input/output files (" ", by default) name: optional name defining the phenotype, the scoring files will created under this dir (in case of multiple phenotype analysis) """ if not os.path.exists(scoring_folder): os.mkdir(scoring_folder) if name is not None: if not os.path.exists(scoring_folder+name): os.mkdir(scoring_folder+name) name += os.sep else: name = "" # Read node info from network file (use network file as edge file) print "Creating edge score file" edge_score_file = scoring_folder + "edge_scores.sif" #network_file.split("/")[-1] + ".converted" if os.path.exists(edge_score_file): print "\tEdge score file exists, overwriting!" nodes, edges, dummy, edge_to_data = network_utilities.get_nodes_and_edges_from_sif_file(network_file, store_edge_type = True, delim = delim, data_to_float=False) edge_to_weight = create_edge_score_file(edge_score_file, edges, edge_to_data, edge_score, delim) # Create node file (ignore seeds that are not in the network and assign non-seed scores) print "Creating node score file" node_score_file = scoring_folder + name + "node_scores.sif" #seed_file.split("/")[-1] + ".converted" seed_score_file = scoring_folder + name + "seed_scores.sif" seeds, dummy, seed_to_data, dummy = network_utilities.get_nodes_and_edges_from_sif_file(seed_file, store_edge_type = False, delim = delim, data_to_float=True) if seed_to_data is None: seed_to_data = {} for seed in seeds: seed_to_data[seed] = seed_score node_to_data = create_node_score_file(node_score_file, seed_score_file, nodes, seeds, seed_to_data, non_seed_score, seed_score, delim) # Create background node file (selects k non-seeds randomly where k is the number of seeds) print "Creating background node score file" bg_node_file = scoring_folder + name + "node_scores_background.sif" #seed_file.split("/")[-1] + ".converted" bg_seed_file = scoring_folder + name + "seed_scores_background.sif" create_background_score_file(bg_node_file, bg_seed_file, nodes, seeds, seed_to_data, non_seed_score, delim) # Create modified edge file using node scores for netshort print "Creating node score converted edge file (for netshort)" nd_edge_file = scoring_folder + name + "edge_scores_netshort.sif" #network_file.split("/")[-1] + ".converted_for_netshort" create_node_score_converted_edge_score_file(nd_edge_file, edges, edge_to_weight, node_to_data, delim) # Create random network files for netzcore print "Creating random networks (for netzcore)" sampling_prefix = scoring_folder + "../" + "sampled_graph." if os.path.exists(sampling_prefix+"%s"%n_sample): print "\tSampled networks exists, skipping this step!" else: g = network_utilities.create_network_from_sif_file(network_file_in_sif = edge_score_file, use_edge_data = True, delim = delim) for i in xrange(1,n_sample+1): g_sampled = network_utilities.randomize_graph(graph=g, randomization_type="preserve_topology_and_node_degree") network_utilities.output_network_in_sif(g_sampled, sampling_prefix+"%s"%i) return
def prepare_scoring(network_file, seed_file, scoring_folder="./", non_seed_score=0.01, seed_score=1.0, edge_score=1.0, n_sample=100, delim=" ", name=None): """ Creates input files required by GUILD executable. network_file: network in sif-like format where edge type is edge score (e.g., "A 0.5 B" or "A pp B") seed_file: seeds in text format where nodes and their scores are given (e.g., "A 0.1" or "A") scoring_folder: path to directory where the input/output files will be created non_seed_score: initial scores of non-seeds (0.01, by default) seed_score: initial scores of seeds (1.0, by default) edge_score: weight of edges, in case the values in network_file is not convertable to float (1.0, by default) n_sample: number of randomly generated graphs for netzcore (100, by default) delim: delimiter that separates columns in input/output files (" ", by default) name: optional name defining the phenotype, the scoring files will created under this dir (in case of multiple phenotype analysis) """ if not os.path.exists(scoring_folder): os.mkdir(scoring_folder) if name is not None: if not os.path.exists(scoring_folder + name): os.mkdir(scoring_folder + name) name += os.sep else: name = "" # Read node info from network file (use network file as edge file) print "Creating edge score file" edge_score_file = scoring_folder + "edge_scores.sif" #network_file.split("/")[-1] + ".converted" if os.path.exists(edge_score_file): print "\tEdge score file exists, overwriting!" nodes, edges, dummy, edge_to_data = network_utilities.get_nodes_and_edges_from_sif_file( network_file, store_edge_type=True, delim=delim, data_to_float=False) edge_to_weight = create_edge_score_file(edge_score_file, edges, edge_to_data, edge_score, delim) # Create node file (ignore seeds that are not in the network and assign non-seed scores) print "Creating node score file" node_score_file = scoring_folder + name + "node_scores.sif" #seed_file.split("/")[-1] + ".converted" seed_score_file = scoring_folder + name + "seed_scores.sif" seeds, dummy, seed_to_data, dummy = network_utilities.get_nodes_and_edges_from_sif_file( seed_file, store_edge_type=False, delim=delim, data_to_float=True) if seed_to_data is None: seed_to_data = {} for seed in seeds: seed_to_data[seed] = seed_score node_to_data = create_node_score_file(node_score_file, seed_score_file, nodes, seeds, seed_to_data, non_seed_score, seed_score, delim) # Create background node file (selects k non-seeds randomly where k is the number of seeds) print "Creating background node score file" bg_node_file = scoring_folder + name + "node_scores_background.sif" #seed_file.split("/")[-1] + ".converted" bg_seed_file = scoring_folder + name + "seed_scores_background.sif" create_background_score_file(bg_node_file, bg_seed_file, nodes, seeds, seed_to_data, non_seed_score, delim) # Create modified edge file using node scores for netshort print "Creating node score converted edge file (for netshort)" nd_edge_file = scoring_folder + name + "edge_scores_netshort.sif" #network_file.split("/")[-1] + ".converted_for_netshort" create_node_score_converted_edge_score_file(nd_edge_file, edges, edge_to_weight, node_to_data, delim) # Create random network files for netzcore print "Creating random networks (for netzcore)" sampling_prefix = scoring_folder + "../" + "sampled_graph." if os.path.exists(sampling_prefix + "%s" % n_sample): print "\tSampled networks exists, skipping this step!" else: g = network_utilities.create_network_from_sif_file( network_file_in_sif=edge_score_file, use_edge_data=True, delim=delim) for i in xrange(1, n_sample + 1): g_sampled = network_utilities.randomize_graph( graph=g, randomization_type="preserve_topology_and_node_degree") network_utilities.output_network_in_sif(g_sampled, sampling_prefix + "%s" % i) return