def get_seeds_from_node_scores_file(node_scores_file, default_non_seed_score): nodes, dummy, initial_node_to_score, dummy = network_utilities.get_nodes_and_edges_from_sif_file(file_name = node_scores_file, store_edge_type = False) seeds = set() for node in initial_node_to_score: if initial_node_to_score[node] > default_non_seed_score: seeds.add(node) return seeds, nodes
def convert_ids_using_mapping_file(input_file, mapping_file, output_file, one_gene_per_node=True, delim="\t"): nodes, dummy, node_to_data, dummy = network_utilities.get_nodes_and_edges_from_sif_file(file_name = input_file, store_edge_type = False) id_to_mapped_ids = get_id_to_mapped_id_mapping(mapping_file, delim=delim) values = [] #for node, d in node_to_data.iteritems(): for node in nodes: if node not in id_to_mapped_ids: continue if one_gene_per_node: genes = [ id_to_mapped_ids[node][0] ] else: genes = id_to_mapped_ids[node] for gene in genes: #values.append((d, gene)) values.append(gene) values.sort() values.reverse() #i = 1 f = open(output_file, 'w') #f2 = open(output_file + ".ranks", 'w') #for d, gene in values: for gene in values: f.write("%s\n" % (gene)) #f.write("%s\t%s\n" % (gene, str(score))) #f2.write("%s\t%d\n" % (gene, i)) #i += 1 f.close() #f2.close() return
def get_seeds_from_node_scores_file(node_scores_file, default_non_seed_score): nodes, dummy, initial_node_to_score, dummy = network_utilities.get_nodes_and_edges_from_sif_file( file_name=node_scores_file, store_edge_type=False) seeds = set() for node in initial_node_to_score: if initial_node_to_score[node] > default_non_seed_score: seeds.add(node) return seeds, nodes
def get_scores(score_file): """ Parses scores from a scoring file created by GUILD (node <whitespace> score), returns a dictionary where the values are floats. """ nodes, dummy, node_to_score, dummy = network_utilities.get_nodes_and_edges_from_sif_file( file_name=score_file, store_edge_type=False, delim=None, data_to_float=True) return node_to_score
def output_mapped_node_id_scores(output_scores_file, node_mapping_file, one_gene_per_node=True, output_file=None): """ Output mapped ids of nodes """ dummy, dummy, node_to_score, dummy = network_utilities.get_nodes_and_edges_from_sif_file( file_name=output_scores_file, store_edge_type=False) id_to_mapped_ids = get_id_to_mapped_id_mapping(node_mapping_file) values = [] for node, score in node_to_score.iteritems(): if node not in id_to_mapped_ids: continue if one_gene_per_node: genes = [id_to_mapped_ids[node][0]] else: genes = id_to_mapped_ids[node] for gene in genes: values.append((score, gene)) values.sort() values.reverse() included = set() i = 1 if output_file is not None: f = open(output_file, 'w') f2 = open(output_file + ".ranks", 'w') f3 = open(output_file + ".unique", 'w') for score, gene in values: f.write("%s\t%s\n" % (gene, str(score))) f2.write("%s\t%d\n" % (gene, i)) if gene not in included: f3.write("%s\t%s\n" % (gene, str(score))) included.add(gene) i += 1 f.close() f2.close() f3.close() else: print "%s\t%f" % (gene, score) return
def output_mapped_node_id_scores(output_scores_file, node_mapping_file, one_gene_per_node=True, output_file=None): """ Output mapped ids of nodes """ dummy, dummy, node_to_score, dummy = network_utilities.get_nodes_and_edges_from_sif_file(file_name = output_scores_file, store_edge_type = False) id_to_mapped_ids = get_id_to_mapped_id_mapping(node_mapping_file) values = [] for node, score in node_to_score.iteritems(): if node not in id_to_mapped_ids: continue if one_gene_per_node: genes = [ id_to_mapped_ids[node][0] ] else: genes = id_to_mapped_ids[node] for gene in genes: values.append((score, gene)) values.sort() values.reverse() included = set() i = 1 if output_file is not None: f = open(output_file, 'w') f2 = open(output_file + ".ranks", 'w') f3 = open(output_file + ".unique", 'w') for score, gene in values: f.write("%s\t%s\n" % (gene, str(score))) f2.write("%s\t%d\n" % (gene, i)) if gene not in included: f3.write("%s\t%s\n" % (gene, str(score))) included.add(gene) i += 1 f.close() f2.close() f3.close() else: print "%s\t%f" % (gene, score) return
def convert_ids_using_mapping_file(input_file, mapping_file, output_file, one_gene_per_node=True, delim="\t"): nodes, dummy, node_to_data, dummy = network_utilities.get_nodes_and_edges_from_sif_file( file_name=input_file, store_edge_type=False) id_to_mapped_ids = get_id_to_mapped_id_mapping(mapping_file, delim=delim) values = [] #for node, d in node_to_data.iteritems(): for node in nodes: if node not in id_to_mapped_ids: continue if one_gene_per_node: genes = [id_to_mapped_ids[node][0]] else: genes = id_to_mapped_ids[node] for gene in genes: #values.append((d, gene)) values.append(gene) values.sort() values.reverse() #i = 1 f = open(output_file, 'w') #f2 = open(output_file + ".ranks", 'w') #for d, gene in values: for gene in values: f.write("%s\n" % (gene)) #f.write("%s\t%s\n" % (gene, str(score))) #f2.write("%s\t%d\n" % (gene, i)) #i += 1 f.close() #f2.close() return
def get_scores(score_file): """ Parses scores from a scoring file created by GUILD (node <whitespace> score), returns a dictionary where the values are floats. """ nodes, dummy, node_to_score, dummy = network_utilities.get_nodes_and_edges_from_sif_file(file_name = score_file, store_edge_type = False, delim=None, data_to_float=True) return node_to_score