def run_and_assess_performance_of_folds(k, edge_file_weights, edge_file_scores_prefix, node_file_scores, node_file_scores_prefix, result_file_prefix): setNode, setDummy, dictNode, dictDummy = network_utilities.get_nodes_and_edges_from_sif_file( file_name=node_file_scores[:-3] + "sif") dictNodeToListScore = {} for i in xrange(k): setNodeTrain, setDummy, dictNodeTrain, dictDummy = network_utilities.get_nodes_and_edges_from_sif_file( file_name=node_file_scores_prefix + "_%i.sif" % i) seeds_test = set() seeds = set() for u, s in dictNode.iteritems(): if dictNodeTrain[u] != s: seeds_test.add(u) if float(s) > 0: seeds.add(u) node_to_scores = run_and_save_results( edge_file_weights=edge_file_weights, edge_file_scores=edge_file_scores_prefix + "_%i.sif" % i, dump_file=result_file_prefix + "_%i.dump" % i) for u, s in node_to_scores.iteritems(): if u in seeds: label = 1 else: label = 0 if u not in seeds_test: label = 0 dictNodeToListScore.setdefault(u, []).append((s, label)) #print dictNodeToListScore createROCRPredictionsData(dictNodeToListScore, result_file_prefix + "_predictions.txt", result_file_prefix + "_labels.txt") return
def old_create_edge_file_from_weight_and_score_files(edge_file_weights, edge_file_scores, out_file): setNode, setEdge, dictNode, dictEdgeWeight = network_utilities.get_nodes_and_edges_from_sif_file(file_name = edge_file_weights[:-3]+"sif", store_edge_type = True, data_to_float=False) setNode, setEdge, dictNode, dictEdge = network_utilities.get_nodes_and_edges_from_sif_file(file_name = edge_file_scores[:-3]+"sif", store_edge_type = True, data_to_float=False) f = open(out_file, "w") for e, s in dictEdge.iteritems(): u,v=e s = float(s) w = dictEdgeWeight[e] s = s*100 + 1 #if s == 0: # s = 0.1 w /= s #print w f.write("%s %s %s\n" % (u, w, v)) return
def score_by_random_model(file_node_scores, file_edge_scores, file_output_scores, default_score=0, max_score=1): from random import uniform f = open(file_output_scores, "w") setNode, setDummy, dictDummy, dictDummy = network_utilities.get_nodes_and_edges_from_sif_file( file_name=file_node_scores, store_edge_type=False) for id in setNode: f.write("%s\t%f\n" % (id, uniform(default_score, max_score))) f.close() return
def get_node_association_score_mapping(network_file, network_file_identifier_type, node_description_file, association_scores_file, association_scores_file_identifier_type, log_file = None, default_seed_score=1.0): """ Maps genes and their scores to nodes in the network using given association_scores_file, correspondance identifiers """ g = network_utilities.create_network_from_sif_file(network_file) nodes = g.nodes() setNode, setDummy, dictNode, dictDummy = network_utilities.get_nodes_and_edges_from_sif_file(file_name = association_scores_file, store_edge_type = False) if dictNode is None: dictNode = dict([ (v, default_seed_score) for v in setNode ]) node_to_genes, gene_to_nodes = biana_output_converter.get_attribute_to_attribute_mapping(node_description_file, network_file_identifier_type, association_scores_file_identifier_type, keys_to_include=set(nodes)) covered_genes = set() seeds = set() seed_to_score = {} if log_file is not None: log_fd = open(log_file, "a") else: log_fd = None for v in nodes: gene_with_score = setNode & node_to_genes[v] covered_genes |= gene_with_score if len(gene_with_score) > 0: seeds.add(v) if len(gene_with_score) > 1: #print "More than one gene:", gene_with_score, "for", v if log_fd is not None: log_fd.write("More than one gene: %s for %s\n" % (gene_with_score, v)) i=0 score = 0 for gene in covered_genes: i+=1 score += float(dictNode[gene]) score /= i if score <= 0: #print "non-positive seed score", v, score, "genes:", node_to_genes[v] if log_fd is not None: log_fd.write("non-positive seed score %s %s genes: %s\n" % (v, score, node_to_genes[v])) seed_to_score[v] = score #else: # score = default_score #node_to_score[v] = score #print "Covered genes (seed genes):", len(covered_genes), "among", len(setNode) #print "Covered gene products (seed nodes):", len(seeds), "among", g.number_of_nodes() if log_fd is not None: log_fd.write("Covered genes (seed genes): %s among %s\n" % (len(covered_genes), len(setNode))) log_fd.write("Covered gene products (seed nodes): %s among %s\n" % (len(seeds), g.number_of_nodes())) log_fd.close() return seed_to_score
def create_network_from_weight_and_score_files(edge_file_weights, edge_file_scores): g = network_utilities.create_network_from_sif_file( network_file=edge_file_weights[:-3] + "sif", weighted=True) setNode, setEdge, dictNode, dictEdge = network_utilities.get_nodes_and_edges_from_sif_file( file_name=edge_file_scores[:-3] + "sif", store_edge_type=True) for e, s in dictEdge.iteritems(): u, v = e s = float(s) w = g.get_edge(u, v) s = s * 100 + 1 #if s == 0: # s = 0.1 w /= s #print w g.add_edge(u, v, w) return g
def old_create_edge_scores_as_node_scores_file(edges, node_to_score, edge_scores_file, ignored_nodes = None, default_score = 0): """ Creates edge score file from node association scores, intended comparing netshort with other algorithms without using other edge reliability/relevance score """ g = network_utilities.create_network_from_sif_file(network_file) setNode, setDummy, dictNode, dictDummy = network_utilities.get_nodes_and_edges_from_sif_file(file_name = node_scores_file, store_edge_type = False) f = open(edge_scores_file, 'w') for u,v in g.edges_iter(): if ignored_nodes is not None and u in ignored_nodes: score_u = default_score else: score_u = dictNode[u] if ignored_nodes is not None and v in ignored_nodes: score_v = default_score else: score_v = dictNode[v] f.write("%s %f %s\n" % (u, (score_u + score_v) / 2, v)) f.close() return
def get_nodes_from_nodes_file(node_scores_file): nodes, set_dummy, dict_dummy, dict_dummy = network_utilities.get_nodes_and_edges_from_sif_file(file_name = node_scores_file, store_edge_type = False) return nodes
def convert_file_using_new_id_mapping(file_to_be_converted, node_description_file, from_id_type, to_id_type, new_file, id_to_id_mapping=False, intermediate_mapping_file=None, intermediate_mapping_id_type=None): """ Maps nodes given as from_id_type to their correspondants in to_id_type using node_description_file Can convert node / network file in sif format (node file with data, network file with data in the middle) id_to_id_mapping: Output id to id mapping as TSV file """ nodes, edges, node_to_data, edge_to_data = network_utilities.get_nodes_and_edges_from_sif_file(file_name = file_to_be_converted, store_edge_type = True, data_to_float=False) if intermediate_mapping_file is not None and intermediate_mapping_id_type is not None: reader = TsvReader.TsvReader(node_description_file, inner_delim = ",") columns, node_id_to_intermediate_ids = reader.read(fields_to_include = [from_id_type, intermediate_mapping_id_type], keys_to_include=nodes, merge_inner_values = True) reader = TsvReader.TsvReader(intermediate_mapping_file, inner_delim = ",") vals = reduce(lambda x,y: x+y, node_id_to_intermediate_ids.values()) vals = reduce(lambda x,y: x+y, vals) columns, node_intermediate_id_to_new_ids = reader.read(fields_to_include = [intermediate_mapping_id_type, to_id_type], keys_to_include=vals, merge_inner_values = True) node_id_to_new_ids = {} for id in nodes: vals = reduce(lambda x,y: x+y, node_id_to_intermediate_ids[id]) for val in vals: if val == "-": in_val = ["-"] else: in_val = node_intermediate_id_to_new_ids[val] node_id_to_new_ids.setdefault(id, []).extend(in_val) else: #node_id_to_new_ids, dummy = biana_output_converter.get_attribute_to_attribute_mapping(node_description_file, from_id_type, to_id_type, keys_to_include=nodes, include_inverse_mapping = False) reader = TsvReader.TsvReader(node_description_file, inner_delim = ",") columns, node_id_to_new_ids = reader.read(fields_to_include = [from_id_type, to_id_type], keys_to_include=nodes, merge_inner_values = True) f = open(new_file, 'w') if id_to_id_mapping: f.write("%s\t%s\n" % (from_id_type, to_id_type)) if edges is None: for v in nodes: if node_to_data.has_key(v): if node_id_to_new_ids.has_key(v): vals = reduce(lambda x,y: x+y, node_id_to_new_ids[v]) for id in vals: if id_to_id_mapping: id = id.strip() if id != "": f.write("%s\t%s\n" % (v, id)) else: f.write("%s %s\n" % (id, node_to_data[v])) else: if node_id_to_new_ids.has_key(v): vals = reduce(lambda x,y: x+y, node_id_to_new_ids[v]) for id in vals: if id_to_id_mapping: id = id.strip() if id != "": f.write("%s\t%s\n" % (v, id)) else: f.write("%s\n", id) else: for e in edges: u,v = e if edge_to_data.has_key(e): if node_id_to_new_ids.has_key(u): vals = reduce(lambda x,y: x+y, node_id_to_new_ids[u]) for id in vals: if node_id_to_new_ids.has_key(v): vals2 = reduce(lambda x,y: x+y, node_id_to_new_ids[v]) for id2 in vals2: f.write("%s %s %s\n" % (id, edge_to_data[e], id2)) f.close() return