Ejemplo n.º 1
0
def run_and_assess_performance_of_folds(k, edge_file_weights,
                                        edge_file_scores_prefix,
                                        node_file_scores,
                                        node_file_scores_prefix,
                                        result_file_prefix):
    setNode, setDummy, dictNode, dictDummy = network_utilities.get_nodes_and_edges_from_sif_file(
        file_name=node_file_scores[:-3] + "sif")
    dictNodeToListScore = {}
    for i in xrange(k):
        setNodeTrain, setDummy, dictNodeTrain, dictDummy = network_utilities.get_nodes_and_edges_from_sif_file(
            file_name=node_file_scores_prefix + "_%i.sif" % i)
        seeds_test = set()
        seeds = set()
        for u, s in dictNode.iteritems():
            if dictNodeTrain[u] != s:
                seeds_test.add(u)
            if float(s) > 0:
                seeds.add(u)
        node_to_scores = run_and_save_results(
            edge_file_weights=edge_file_weights,
            edge_file_scores=edge_file_scores_prefix + "_%i.sif" % i,
            dump_file=result_file_prefix + "_%i.dump" % i)
        for u, s in node_to_scores.iteritems():
            if u in seeds: label = 1
            else: label = 0
            if u not in seeds_test: label = 0
            dictNodeToListScore.setdefault(u, []).append((s, label))
    #print dictNodeToListScore
    createROCRPredictionsData(dictNodeToListScore,
                              result_file_prefix + "_predictions.txt",
                              result_file_prefix + "_labels.txt")
    return
Ejemplo n.º 2
0
def old_create_edge_file_from_weight_and_score_files(edge_file_weights, edge_file_scores, out_file):
    setNode, setEdge, dictNode, dictEdgeWeight = network_utilities.get_nodes_and_edges_from_sif_file(file_name = edge_file_weights[:-3]+"sif", store_edge_type = True, data_to_float=False)
    setNode, setEdge, dictNode, dictEdge = network_utilities.get_nodes_and_edges_from_sif_file(file_name = edge_file_scores[:-3]+"sif", store_edge_type = True, data_to_float=False)
    f = open(out_file, "w")
    for e, s in dictEdge.iteritems():
	u,v=e
	s = float(s)
	w = dictEdgeWeight[e]
	s = s*100 + 1
	#if s == 0:
	#    s = 0.1
	w /= s
	#print w
	f.write("%s %s %s\n" % (u, w, v))
    return 
Ejemplo n.º 3
0
def score_by_random_model(file_node_scores,
                          file_edge_scores,
                          file_output_scores,
                          default_score=0,
                          max_score=1):
    from random import uniform
    f = open(file_output_scores, "w")
    setNode, setDummy, dictDummy, dictDummy = network_utilities.get_nodes_and_edges_from_sif_file(
        file_name=file_node_scores, store_edge_type=False)
    for id in setNode:
        f.write("%s\t%f\n" % (id, uniform(default_score, max_score)))
    f.close()
    return
Ejemplo n.º 4
0
def get_node_association_score_mapping(network_file, network_file_identifier_type, node_description_file, association_scores_file, association_scores_file_identifier_type, log_file = None, default_seed_score=1.0):
    """
	Maps genes and their scores to nodes in the network using given association_scores_file, correspondance identifiers
    """
    g = network_utilities.create_network_from_sif_file(network_file)
    nodes = g.nodes()
    setNode, setDummy, dictNode, dictDummy = network_utilities.get_nodes_and_edges_from_sif_file(file_name = association_scores_file, store_edge_type = False)
    if dictNode is None:
	dictNode = dict([ (v, default_seed_score) for v in setNode ])
    node_to_genes, gene_to_nodes = biana_output_converter.get_attribute_to_attribute_mapping(node_description_file, network_file_identifier_type, association_scores_file_identifier_type, keys_to_include=set(nodes))
    covered_genes = set()
    seeds = set()
    seed_to_score = {}
    if log_file is not None:
	log_fd = open(log_file, "a")
    else:
	log_fd = None
    for v in nodes: 
	gene_with_score = setNode & node_to_genes[v]
	covered_genes |= gene_with_score

	if len(gene_with_score) > 0:
	    seeds.add(v)
	    if len(gene_with_score) > 1:
		#print "More than one gene:", gene_with_score, "for", v
		if log_fd is not None:
		    log_fd.write("More than one gene: %s for %s\n" % (gene_with_score, v))
	    i=0
	    score = 0
	    for gene in covered_genes:
		i+=1
		score += float(dictNode[gene])
	    score /= i
	    if score <= 0:
		#print "non-positive seed score", v, score, "genes:", node_to_genes[v]
		if log_fd is not None:
		    log_fd.write("non-positive seed score %s %s genes: %s\n" % (v, score, node_to_genes[v]))
	    seed_to_score[v] = score
	#else:
	#    score = default_score
	#node_to_score[v] = score

    #print "Covered genes (seed genes):", len(covered_genes), "among", len(setNode)
    #print "Covered gene products (seed nodes):", len(seeds), "among", g.number_of_nodes()
    if log_fd is not None:
	log_fd.write("Covered genes (seed genes): %s among %s\n" % (len(covered_genes), len(setNode)))
	log_fd.write("Covered gene products (seed nodes): %s among %s\n" % (len(seeds), g.number_of_nodes()))
	log_fd.close()
    return seed_to_score
Ejemplo n.º 5
0
def create_network_from_weight_and_score_files(edge_file_weights,
                                               edge_file_scores):
    g = network_utilities.create_network_from_sif_file(
        network_file=edge_file_weights[:-3] + "sif", weighted=True)
    setNode, setEdge, dictNode, dictEdge = network_utilities.get_nodes_and_edges_from_sif_file(
        file_name=edge_file_scores[:-3] + "sif", store_edge_type=True)
    for e, s in dictEdge.iteritems():
        u, v = e
        s = float(s)
        w = g.get_edge(u, v)
        s = s * 100 + 1
        #if s == 0:
        #    s = 0.1
        w /= s
        #print w
        g.add_edge(u, v, w)
    return g
Ejemplo n.º 6
0
def old_create_edge_scores_as_node_scores_file(edges, node_to_score, edge_scores_file, ignored_nodes = None, default_score = 0):
    """
	Creates edge score file from node association scores, intended comparing netshort with other algorithms without using other edge reliability/relevance score
    """
    g = network_utilities.create_network_from_sif_file(network_file)
    setNode, setDummy, dictNode, dictDummy = network_utilities.get_nodes_and_edges_from_sif_file(file_name = node_scores_file, store_edge_type = False)
    f = open(edge_scores_file, 'w')
    for u,v in g.edges_iter():
	if ignored_nodes is not None and u in ignored_nodes:
	    score_u = default_score
	else:
	    score_u = dictNode[u]
	if ignored_nodes is not None and v in ignored_nodes:
	    score_v = default_score
	else:
	    score_v = dictNode[v]
	f.write("%s %f %s\n" % (u, (score_u + score_v) / 2, v))
    f.close()
    return
Ejemplo n.º 7
0
def get_nodes_from_nodes_file(node_scores_file):
    nodes, set_dummy, dict_dummy, dict_dummy = network_utilities.get_nodes_and_edges_from_sif_file(file_name = node_scores_file, store_edge_type = False)
    return nodes
Ejemplo n.º 8
0
def convert_file_using_new_id_mapping(file_to_be_converted, node_description_file, from_id_type, to_id_type, new_file, id_to_id_mapping=False, intermediate_mapping_file=None, intermediate_mapping_id_type=None):
    """
	Maps nodes given as from_id_type to their correspondants in to_id_type using node_description_file
	Can convert node / network file in sif format (node file with data, network file with data in the middle)
	id_to_id_mapping: Output id to id mapping as TSV file
    """
    nodes, edges, node_to_data, edge_to_data = network_utilities.get_nodes_and_edges_from_sif_file(file_name = file_to_be_converted, store_edge_type = True, data_to_float=False)

    if intermediate_mapping_file is not None and intermediate_mapping_id_type is not None:
	reader = TsvReader.TsvReader(node_description_file, inner_delim = ",")
	columns, node_id_to_intermediate_ids = reader.read(fields_to_include = [from_id_type, intermediate_mapping_id_type], keys_to_include=nodes, merge_inner_values = True)
	reader = TsvReader.TsvReader(intermediate_mapping_file, inner_delim = ",")
	vals = reduce(lambda x,y: x+y, node_id_to_intermediate_ids.values())
	vals = reduce(lambda x,y: x+y, vals)
	columns, node_intermediate_id_to_new_ids = reader.read(fields_to_include = [intermediate_mapping_id_type, to_id_type], keys_to_include=vals, merge_inner_values = True)
	node_id_to_new_ids = {}
	for id in nodes:
	    vals = reduce(lambda x,y: x+y, node_id_to_intermediate_ids[id])
	    for val in vals:
		if val == "-":
		    in_val = ["-"]
		else:
		    in_val = node_intermediate_id_to_new_ids[val]
		node_id_to_new_ids.setdefault(id, []).extend(in_val)
    else:
	#node_id_to_new_ids, dummy = biana_output_converter.get_attribute_to_attribute_mapping(node_description_file, from_id_type, to_id_type, keys_to_include=nodes, include_inverse_mapping = False)
	reader = TsvReader.TsvReader(node_description_file, inner_delim = ",")
	columns, node_id_to_new_ids = reader.read(fields_to_include = [from_id_type, to_id_type], keys_to_include=nodes, merge_inner_values = True)

    f = open(new_file, 'w')
    if id_to_id_mapping:
	f.write("%s\t%s\n" % (from_id_type, to_id_type))

    if edges is None:
	for v in nodes:
	    if node_to_data.has_key(v):
		if node_id_to_new_ids.has_key(v):
		    vals = reduce(lambda x,y: x+y, node_id_to_new_ids[v])
		    for id in vals:
			if id_to_id_mapping:
			    id = id.strip()
			    if id != "":
				f.write("%s\t%s\n" % (v, id))
			else:
			    f.write("%s %s\n" % (id, node_to_data[v]))
	    else:
		if node_id_to_new_ids.has_key(v):
		    vals = reduce(lambda x,y: x+y, node_id_to_new_ids[v])
		    for id in vals:
			if id_to_id_mapping:
			    id = id.strip()
			    if id != "":
				f.write("%s\t%s\n" % (v, id))
			else:
			    f.write("%s\n", id)
    else:
	for e in edges:
	    u,v = e
	    if edge_to_data.has_key(e):
		if node_id_to_new_ids.has_key(u):
		    vals = reduce(lambda x,y: x+y, node_id_to_new_ids[u])
		    for id in vals:
			if node_id_to_new_ids.has_key(v):
			    vals2 = reduce(lambda x,y: x+y, node_id_to_new_ids[v])
			    for id2 in vals2:
				f.write("%s %s %s\n" % (id, edge_to_data[e], id2))
    f.close()
    return