def output_cluster_bs_data(self, communicator, parameter_list, data): """ 12-20-05 for darwin output out:=[ [id, [fuzzyDense gene list], { [ [TF gene list1], [TF target gene list1], p-value], [ [TF gene list2], [TF target gene list2], p-value], ...}, \ {[dataset_no1, description], [dataset_no2, description], ... } ], [...], ... []]: """ outf, gene_id2symbol, dataset_no2desc = parameter_list data = cPickle.loads(data) for row in data: id, core_vertex_ls, on_dataset_index_ls, ls_to_return = row #prepare the dataset_no_desc_ls dataset_no_desc_ls = [] for dataset_index in on_dataset_index_ls: dataset_no = dataset_index +1 dataset_no_desc_ls.append([dataset_no, dataset_no2desc[dataset_no]]) #prepare the tfbs_row_darwin_ls tfbs_row_darwin_ls = [] for tfbs_row in ls_to_return: score, score_type, bs_no_list, target_gene_no_list, global_ratio, local_ratio, expected_ratio, unknown_ratio = tfbs_row bs_no_symbol_list = dict_map(gene_id2symbol, bs_no_list) target_gene_no_symbol_list = dict_map(gene_id2symbol, target_gene_no_list) tfbs_row_darwin_ls.append([bs_no_symbol_list, target_gene_no_symbol_list, score]) #translate the core_vertex_ls core_vertex_symbol_ls = dict_map(gene_id2symbol, core_vertex_ls) #output them all outf.write('[%s, %s,{%s},{%s}],\n'%(id, repr(core_vertex_symbol_ls), repr(tfbs_row_darwin_ls)[1:-1], repr(dataset_no_desc_ls)[1:-1]))
def output_function_group1(self, curs, writer, function_struc_dict, gene_no2gene_id, go_no2go_id, go_no2name, go_no2accuracy, \ go_no2accuracy_pair, prediction_pair2lca_list=None, gene_no=None): """ 03-15-05 copied from output_function_group() """ for (go_no, function_struc) in function_struc_dict.iteritems(): #transform to character type p_value_list = map(repr, function_struc.p_value_list) mcl_id_list = map(repr, function_struc.cluster_array) if prediction_pair2lca_list: #use prediction_pair2lca_list to judge whether it's for known genes or unknown if function_struc.is_correct_lca==1: lca_list = prediction_pair2lca_list.get((gene_no,go_no)) if lca_list: former_length = len(lca_list) lca_list = dict_map(go_no2go_id, lca_list) if len(lca_list)!=former_length: sys.stderr.write("Warning: some go_nos in lca_list have no go_ids for gene: %s and go_no: %s.\n\tList shrinked from %s to %s\n"%\ (gene_no, go_no, former_length, len(lca_list))) else: sys.stderr.write("Error: prediction pair gene=%s, go_no=%s is correct by lca, but no lca_list.\n"%\ (gene_no, go_no)) sys.exit(2) else: lca_list = [] row = [go_no2go_id[go_no], go_no2name[go_no], function_struc.is_correct_lca, ';'.join(lca_list), \ ';'.join(p_value_list), len(mcl_id_list), go_no2accuracy_pair[go_no].ratio] else: row = [go_no2go_id[go_no], go_no2name[go_no], \ ';'.join(p_value_list), len(mcl_id_list), go_no2accuracy_pair[go_no].ratio] writer.writerow(row)
def _pattern_darwin_format(self, curs, pattern_table, gene_no2id, go_no2id, output_fname, mcl_id_set=None): """ 2007-01-07 add edge_set format: r:=[ [mcl_id, vertex_set, edge_set, recurrence_array, recurrence, connectivity, unknown_ratio] [...], ... []]: """ sys.stderr.write("pattern...\n") of = open(output_fname, 'w') of.write('r:=[\n') curs.execute("DECLARE crs CURSOR FOR select id, vertex_set, edge_set, recurrence_array, recurrence, \ connectivity, unknown_gene_ratio from %s"%pattern_table) curs.execute("fetch 5000 from crs") rows = curs.fetchall() while rows: for row in rows: mcl_id, vertex_set, edge_set, recurrence_array, recurrence, connectivity, unknown_ratio = row if mcl_id_set and mcl_id not in mcl_id_set: continue vertex_set = vertex_set[1:-1].split(',') vertex_set = map(int, vertex_set) vertex_set = dict_map(gene_no2id, vertex_set, type=2) edge_set = edge_set[2:-2].split('},{') for i in range(len(edge_set)): edge = edge_set[i].split(',') edge = map(int, edge) edge = dict_map(gene_no2id, edge, type=2) edge_set[i] = edge recurrence_array = '[' + recurrence_array[1:-1] + ']' of.write('[%s, %s, %s, %s, %s, %s, %s],\n'%(mcl_id, repr(vertex_set), repr(edge_set), recurrence_array,\ recurrence, connectivity, unknown_ratio)) curs.execute("fetch 5000 from crs") rows = curs.fetchall() of.write('[]]:\n') #add the last blank list del of sys.stderr.write("pattern darwin format done.\n")
def _prediction_darwin_format(self, curs, p_gene_table, gene_p_table, gene_no2id, go_no2id, output_fname): """ 12-01-05 deal with lca_list={} 03-01-06 add no_of_distinct_funcitons_from_gene_p_table in the output 2006-09-25 now defunct format: r:=[ [gene_id, go_id, is_correct_lca, p_value, mcl_id, lca_list, no_of_distinct_funcitons_from_gene_p_table], [...], []]: """ sys.stderr.write("prediction...\n") #03-01-06 firstly get the gene_no2p_gene_id_src_set curs.execute("DECLARE crs_1 CURSOR FOR SELECT p.gene_no, g.p_gene_id_src from %s p, %s g\ where p.p_gene_id=g.p_gene_id_src"%(p_gene_table, gene_p_table)) curs.execute("fetch 5000 from crs_1") rows = curs.fetchall() gene_no2p_gene_id_src_set = {} while rows: for row in rows: gene_no, p_gene_id_src = row if gene_no not in gene_no2p_gene_id_src_set: gene_no2p_gene_id_src_set[gene_no] = Set() gene_no2p_gene_id_src_set[gene_no].add(p_gene_id_src) curs.execute("fetch 5000 from crs_1") rows = curs.fetchall() curs.execute("close crs_1") of = open(output_fname, 'w') of.write('r:=[\n') curs.execute("DECLARE crs CURSOR FOR select p.gene_no, p.go_no, p.is_correct_lca, p.avg_p_value, p.mcl_id, p.lca_list\ from %s p, %s g where g.p_gene_id = p.p_gene_id"%(p_gene_table, gene_p_table)) curs.execute("fetch 5000 from crs") rows = curs.fetchall() while rows: for row in rows: gene_no, go_no, is_correct_lca, p_value, mcl_id, lca_list = row if lca_list and len(lca_list)>2: #12-01-05 lca_list={} just blank lca_list = lca_list[1:-1].split(',') lca_list = map(int, lca_list) lca_list = dict_map(go_no2id, lca_list, type=2) else: lca_list = [] of.write("['%s', '%s', %s, %s, %s, %s, %s],\n"%(gene_no2id.get(gene_no) or gene_no, go_no2id[go_no], is_correct_lca,\ p_value, mcl_id, repr(lca_list), len(gene_no2p_gene_id_src_set[gene_no]))) #03-01-06 curs.execute("fetch 5000 from crs") rows = curs.fetchall() of.write('[]]:\n') #add the last blank list del of curs.execute("close crs") sys.stderr.write("prediction darwin format done.\n")
def run(self): """ 2007-08-30 add label_type 3 """ import MySQLdb conn = MySQLdb.connect(db=self.dbname, host=self.hostname) curs = conn.cursor() popid2pos_size = self.get_popid2pos_size(curs, self.popid2ecotypeid_table) popid_ls = popid2pos_size.keys() pos_size_ls = dict_map(popid2pos_size, popid_ls) weighted_pos_ls = [row[0] for row in pos_size_ls] diameter_ls = [row[1] for row in pos_size_ls] if self.label_type == 1: label_ls = popid_ls elif self.label_type == 2: label_ls = diameter_ls elif self.label_type == 3: if self.selfing_rate_table is None: sys.stderr.write( "Label type is 3(selfing rate), but no selfing_rate_table specified\n" ) sys.exit(3) popid2selfing_rate = self.get_popid2selfing_rate( curs, self.selfing_rate_table, self.which_method) label_ls = [] for popid in popid_ls: avg_s = '0' if popid in popid2selfing_rate: if popid2selfing_rate[popid]: #not NULL avg_s = int(round(popid2selfing_rate[popid] * 1000)) label_ls.append(avg_s) self.draw_clustered_strain_location( label_ls, weighted_pos_ls, diameter_ls, self.label_type, self.label_type2label_name, pic_area=self.pic_area, output_fname_prefix=self.output_fname_prefix) if self.draw_site_network: from CreatePopulation import CreatePopulation CreatePopulation_instance = CreatePopulation() lat_lon_ls, pos2ecotypeid_ls = CreatePopulation_instance.get_pos2ecotypeid_ls( curs, self.strain_info_table) g, node_label2pos_counts = CreatePopulation_instance.divide_data_by_geography( lat_lon_ls, self.max_dist) self.DrawSiteNetwork(g, node_label2pos_counts, self.pic_area, self.output_fname_prefix)
def get_mcl_id2vertex_edge_recurrence(self, curs, pattern_table, gene_no2id, go_no2id, mcl_id_set): """ 2007-02-08 """ sys.stderr.write("Getting mcl_id2vertex_edge_recurrence ...\n") mcl_id2vertex_edge_recurrence = {} curs.execute("DECLARE crs CURSOR FOR select id, vertex_set, edge_set, recurrence_array from %s"%pattern_table) curs.execute("fetch 5000 from crs") rows = curs.fetchall() recurrence_func = lambda x: int(float(x)>=0.8) while rows: for row in rows: mcl_id, vertex_set, edge_set, recurrence_array = row if mcl_id in mcl_id_set: vertex_set = vertex_set[1:-1].split(',') vertex_set = map(int, vertex_set) vertex_set = dict_map(gene_no2id, vertex_set, type=2) edge_set = edge_set[2:-2].split('},{') for i in range(len(edge_set)): edge = edge_set[i].split(',') edge = map(int, edge) edge = dict_map(gene_no2id, edge, type=2) edge_set[i] = edge recurrence_array = recurrence_array[1:-1].split(',') recurrence_array = map(recurrence_func, recurrence_array) new_recurrence_array = [] for i in range(len(recurrence_array)): if recurrence_array[i]==1: new_recurrence_array.append(i+1) mcl_id2vertex_edge_recurrence[mcl_id] = [vertex_set, edge_set, new_recurrence_array] curs.execute("fetch 5000 from crs") rows = curs.fetchall() curs.execute("close crs") sys.stderr.write("done.\n") return mcl_id2vertex_edge_recurrence
def _cluster_darwin_format(self, curs, good_cluster_table, gene_no2id, go_no2id, output_fname): """ format: r:=[ [mcl_id, vertex_set, recurrence_array, recurrence, connectivity, unknown_ratio, size, go_id_list, p_value_list] [...], ... []]: """ sys.stderr.write("cluster...\n") of = open(output_fname, 'w') of.write('r:=[\n') curs.execute("DECLARE crs CURSOR FOR select mcl_id, vertex_set, recurrence_array, recurrence, \ connectivity, unknown_ratio, size, go_no_list, p_value_list from %s"%good_cluster_table) curs.execute("fetch 5000 from crs") rows = curs.fetchall() while rows: for row in rows: mcl_id, vertex_set, recurrence_array, recurrence, connectivity, unknown_ratio,\ size, go_no_list, p_value_list = row vertex_set = vertex_set[1:-1].split(',') vertex_set = map(int, vertex_set) vertex_set = dict_map(gene_no2id, vertex_set, type=2) recurrence_array = '[' + recurrence_array[1:-1] + ']' go_no_list = go_no_list[1:-1].split(',') go_no_list = map(int, go_no_list) go_id_list = dict_map(go_no2id, go_no_list, type=2) p_value_list = '[' + p_value_list[1:-1] + ']' of.write('[%s, %s, %s, %s, %s, %s, %s, %s, %s],\n'%(mcl_id, repr(vertex_set), recurrence_array,\ recurrence, connectivity, unknown_ratio, size, repr(go_id_list), p_value_list)) curs.execute("fetch 5000 from crs") rows = curs.fetchall() of.write('[]]:\n') #add the last blank list del of sys.stderr.write("cluster darwin format done.\n")
def run(self): """ 12-28-05 """ conn, curs = db_connect(self.hostname, self.dbname, self.schema) organism = get_org_from_tax_id(curs, self.tax_id) #get the key_map gene_id2symbol = get_gene_id2gene_symbol(curs, self.tax_id) #open output here outf = open(self.output_fname, 'w') if len(self.running_bit)>=1 and self.running_bit[0] =='1': gene_id2go_bp_term = get_gene_id2go_term(curs, term_type='biological_process', organism=organism) self.dict2darwin(gene_id2go_bp_term, 'go_bp', gene_id2symbol, outf) if len(self.running_bit)>=2 and self.running_bit[1] =='1': gene_id2go_cc_term = get_gene_id2go_term(curs, term_type='cellular_component', organism=organism) self.dict2darwin(gene_id2go_cc_term, 'go_cc', gene_id2symbol, outf) if len(self.running_bit)>=3 and self.running_bit[2] =='1': gene_id2no_of_events = get_gene_id2no_of_events(curs, self.tax_id, ensembl2no_of_events_table='graph.ensembl2no_of_events') self.dict2darwin(gene_id2no_of_events, 'as', gene_id2symbol, outf) if len(self.running_bit)>=4 and self.running_bit[3] =='1': gene_id2no_of_promoters = get_gene_id2no_of_promoters(curs, self.tax_id) #get_gene_id2no_of_events(curs, self.tax_id, ensembl2no_of_events_table='graph.ensembl_id2no_of_promoters') self.dict2darwin(gene_id2no_of_promoters, 'dp', gene_id2symbol, outf) if len(self.running_bit)>=5 and self.running_bit[4] =='1': tg_tax_id2ca_depth_tax_id_short_org = get_tg_tax_id2ca_depth_tax_id_short_org(curs, self.tax_id) gene_id2ortholog_tax_id_set = get_gene_id2ortholog_tax_id_set(curs, self.tax_id, homologene_table='homologene.homologene') #convert gene_id2ortholog_tax_id_set to gene_id2ca_depth_tax_id_short_org_list gene_id2ca_depth_tax_id_short_org_list = {} for gene_id, ortholog_tax_id_set in gene_id2ortholog_tax_id_set.iteritems(): ca_depth_tax_id_short_org_list = dict_map(tg_tax_id2ca_depth_tax_id_short_org, list(ortholog_tax_id_set)) ca_depth_tax_id_short_org_list.sort() gene_id2ca_depth_tax_id_short_org_list[gene_id] = ca_depth_tax_id_short_org_list self.dict2darwin(gene_id2ca_depth_tax_id_short_org_list, 'gene_age', gene_id2symbol, outf) if len(self.running_bit)>=6 and self.running_bit[5] =='1': gene_id2tissue_list = get_gene_id2tissue_list(curs, self.tax_id) self.dict2darwin(gene_id2tissue_list, 'gene_tissue', gene_id2symbol, outf) if len(self.running_bit)>=7 and self.running_bit[6] =='1': gene_id2family_size = get_gene_id2family_size(curs, self.tax_id) self.dict2darwin(gene_id2family_size, 'gene_family_size', gene_id2symbol, outf) if len(self.running_bit)>=8 and self.running_bit[7] =='1': gnf_gene_id2tissue = get_gnf_gene_id2tissue_list(curs, self.tax_id) self.dict2darwin(gnf_gene_id2tissue, 'gnf_gene_tissue', gene_id2symbol, outf) #close output outf.close()
def run(self): """ 2007-08-30 add label_type 3 """ import MySQLdb conn = MySQLdb.connect(db=self.dbname,host=self.hostname) curs = conn.cursor() popid2pos_size = self.get_popid2pos_size(curs, self.popid2ecotypeid_table) popid_ls = popid2pos_size.keys() pos_size_ls = dict_map(popid2pos_size, popid_ls) weighted_pos_ls = [row[0] for row in pos_size_ls] diameter_ls = [row[1] for row in pos_size_ls] if self.label_type == 1: label_ls = popid_ls elif self.label_type == 2: label_ls = diameter_ls elif self.label_type == 3: if self.selfing_rate_table is None: sys.stderr.write("Label type is 3(selfing rate), but no selfing_rate_table specified\n") sys.exit(3) popid2selfing_rate = self.get_popid2selfing_rate(curs, self.selfing_rate_table, self.which_method) label_ls = [] for popid in popid_ls: avg_s = '0' if popid in popid2selfing_rate: if popid2selfing_rate[popid]: #not NULL avg_s = int(round(popid2selfing_rate[popid]*1000)) label_ls.append(avg_s) self.draw_clustered_strain_location(label_ls, weighted_pos_ls, diameter_ls, self.label_type, self.label_type2label_name, pic_area=self.pic_area, output_fname_prefix=self.output_fname_prefix) if self.draw_site_network: from CreatePopulation import CreatePopulation CreatePopulation_instance = CreatePopulation() lat_lon_ls, pos2ecotypeid_ls = CreatePopulation_instance.get_pos2ecotypeid_ls(curs, self.strain_info_table) g, node_label2pos_counts = CreatePopulation_instance.divide_data_by_geography(lat_lon_ls, self.max_dist) self.DrawSiteNetwork(g, node_label2pos_counts, self.pic_area, self.output_fname_prefix)
def _tf_darwin_format(self, curs, good_cluster_table, output_fname, gene_no2id, mcl_id2tf_set): """ 2006-09-25 change good_cluster_table to be pattern_table format: r:=[ [mcl_id, [gene1, gene2, ...], [ [TF1], [hyper_p_value] ], [ [TF2, TF3], [hyper_p_value] ], ... ], [...], []]: """ sys.stderr.write("TF...\n") of = open(output_fname, 'w') of.write('r:=[\n') curs.execute("DECLARE crs CURSOR FOR select id, vertex_set from %s"%good_cluster_table) #2006-09-25 curs.execute("fetch 5000 from crs") rows = curs.fetchall() while rows: for row in rows: mcl_id, vertex_set = row if mcl_id in mcl_id2tf_set: vertex_set = vertex_set[1:-1].split(',') vertex_set = map(int, vertex_set) vertex_set = dict_map(gene_no2id, vertex_set, type=2) tf_list = list(mcl_id2tf_set[mcl_id]) tf_list = map(list, tf_list) #first transform to list, so will have [] for i in range(len(tf_list)): tf_list[i] = map(list, tf_list[i]) #one tf_list[i] is (tf_name_tuple, ratio_tuple) tf_list = map(repr, tf_list) #second transform inner list to string row = [repr(mcl_id), repr(vertex_set)] + tf_list of.write('[%s],\n'%(','.join(row))) curs.execute("fetch 5000 from crs") rows = curs.fetchall() of.write('[]]:\n') #add the last blank list del of sys.stderr.write("TF darwin format done.\n")
def output(self, curs, gene_no2go_id_set_list, go_id_set_list, support, prefix, gene_no2id, go_id2name, schema_list): """ 07-06-05 """ sys.stderr.write("Outputing...") #get the total set total_gene_no_set = Set() total_go_id_set = Set() for i in range(len(gene_no2go_id_set_list)): total_gene_no_set |= Set(gene_no2go_id_set_list[i].keys()) total_go_id_set |= go_id_set_list[i] print "the total number of genes is ",len(total_gene_no_set) gene_ofname = '%s.gene'%prefix function_ofname = '%s.function'%prefix gene_writer = csv.writer(open(gene_ofname,'w'), delimiter='\t') function_writer = csv.writer(open(function_ofname, 'w'), delimiter='\t') gene_writer.writerow(['']+schema_list) function_writer.writerow([''] + schema_list) from gene_p_map_redundancy import gene_p_map_redundancy node_distance_class = gene_p_map_redundancy() go_id2term_id = get_go_id2term_id(curs) go_term_id2depth = get_go_term_id2depth(curs) #output the gene-oriented information for gene_no in total_gene_no_set: freq = 0 p_go_id_set_list = [] for i in range(len(gene_no2go_id_set_list)): if gene_no in gene_no2go_id_set_list[i]: p_go_id_set_list.append(gene_no2go_id_set_list[i][gene_no]) freq += 1 if freq == support: if self.p_go_id_set_list_distinct(curs, p_go_id_set_list, node_distance_class, go_term_id2depth, go_id2term_id): row = [gene_no2id[gene_no]] for i in range(len(gene_no2go_id_set_list)): if gene_no in gene_no2go_id_set_list[i]: go_id_set = gene_no2go_id_set_list[i][gene_no] go_name_list = dict_map(go_id2name, go_id_set) row.append(';'.join(go_name_list)) else: row.append('') gene_writer.writerow(row) #output the function_oriented information for go_id in total_go_id_set: freq = 0 for i in range(len(go_id_set_list)): if go_id in go_id_set_list[i]: freq += 1 if freq == support: row = ['%s(%s)'%(go_id2name[go_id],go_id)] for i in range(len(go_id_set_list)): if go_id in go_id_set_list[i]: row.append('1') else: row.append('0') function_writer.writerow(row) sys.stderr.write("Done.\n")
def draw_pattern(self, figure_no, old_g, pos, sub_label_map, title_map, go_id_or_mt_no_struct, go_id_or_mt_no2gene_id_set, \ output_fname_prefix, is_go_function=0, prot_interaction_graph=None): """ 2006-11-20 add prot_interaction_graph 2006-12-27 just draw the labels, ignore the circle nodes 2007-01-10 edges overlapping between interaction and co-expression are separated from interaction_edge_list and they were widened with color 'magenta', the pure- interaction edges are justed colored in 'magenta' 2007-01-25 turn off the axis add codes to draw a pure network change png format to svg, eps, png """ g = old_g.copy() """ 2007-01-30 at font_size=16 char_width=12 and char_height=20 is good for svg and eps char_width=50 and char_height=80 is good for png """ char_width = 12 char_height = 20 for key in go_id_or_mt_no_struct: figure_no += 1 pylab.figure() pylab.axis("off") pylab.title(title_map[key]) standout_gene_id_list = [] standout_and_associated_gene_id_list = [] associated_gene_id_list = [] other_gene_id_list = [] for v in g: if is_go_function: if v in go_id_or_mt_no_struct[key] and v in go_id_or_mt_no2gene_id_set[key]: standout_and_associated_gene_id_list.append(v) elif v in go_id_or_mt_no_struct[key] and v not in go_id_or_mt_no2gene_id_set[key]: standout_gene_id_list.append(v) elif v in go_id_or_mt_no2gene_id_set[key]: associated_gene_id_list.append(v) else: other_gene_id_list.append(v) else: if v in go_id_or_mt_no2gene_id_set[key]: associated_gene_id_list.append(v) else: other_gene_id_list.append(v) if prot_interaction_graph: nodes_of_g = g.nodes() sub_prot_graph = prot_interaction_graph.subgraph(nodes_of_g) interaction_edge_list = [] non_interaction_edge_list = [] #pure interaction overlapping_edge_list = [] #2007-01-10 for (u, v) in g.edges(): if not sub_prot_graph.has_edge(u,v): non_interaction_edge_list.append((u,v)) else: #2007-01-10 overlapping_edge_list.append((u, v)) for (u, v, interaction_type_id) in sub_prot_graph.edges(): if not g.has_edge(u, v): #2007-01-10 interaction_edge_list.append((u,v)) #if not g.has_edge(u,v): #expand g, not necesary # print 'added' # g.add_edge(u,v) #2007-01-10 overlapping nx.draw_networkx_edges(g, pos, alpha=0.4, edge_color='m', width=5, edgelist=overlapping_edge_list) #pure interaction nx.draw_networkx_edges(g, pos, alpha=0.4, edge_color='m', edgelist=interaction_edge_list) #pure co-expression nx.draw_networkx_edges(g, pos, alpha=1.0, edgelist=non_interaction_edge_list) else: nx.draw_networkx_edges(g, pos, alpha=1.0) node_size_list = [] for v in g: node_size_list.append(len(sub_label_map[v])) #2007-01-29, it's gonna extend along the x-axis nx.draw_networkx_nodes(g, pos, node_color='w', node_size=node_size_list, node_shape=None, alpha=1, verts=[[-char_width/2, -char_height/2], [-char_width/2, char_height/2], [char_width/2, char_height/2], [char_width/2, -char_height/2]]) if standout_gene_id_list: nx.draw_networkx_labels(g, pos, labels=dict_map(sub_label_map, standout_gene_id_list, type=3), \ font_color='g', alpha=0.4, font_size=16) #nx.draw_networkx_nodes(g, pos, nodelist= standout_gene_id_list, node_color='g', alpha=0.4) if standout_and_associated_gene_id_list: nx.draw_networkx_labels(g, pos, labels=dict_map(sub_label_map, standout_and_associated_gene_id_list, type=3), \ font_color='y', alpha=0.4, font_size=16) #nx.draw_networkx_nodes(g, pos, nodelist= standout_and_associated_gene_id_list, node_color='y', alpha=0.4) if associated_gene_id_list: nx.draw_networkx_labels(g, pos, labels=dict_map(sub_label_map, associated_gene_id_list, type=3), \ font_color='r', alpha=0.4, font_size=16) #nx.draw_networkx_nodes(g, pos, nodelist= associated_gene_id_list, node_color='r', alpha=0.4) if other_gene_id_list: nx.draw_networkx_labels(g, pos, labels=dict_map(sub_label_map, other_gene_id_list, type=3), \ font_color='k', alpha=0.4, font_size=16) #nx.draw_networkx_nodes(g, pos, nodelist= other_gene_id_list, node_color='b', alpha=0.4) #nx.draw_networkx_labels(g, pos, labels=sub_label_map) #nx.draw(g, pos, node_color=pylab.array(color_gene_id_list), labels=sub_label_map, alpha=0.4) pylab.savefig('%s_%s.svg'%(output_fname_prefix, key), dpi=300) pylab.savefig('%s_%s.eps'%(output_fname_prefix, key), dpi=300) pylab.savefig('%s_%s.png'%(output_fname_prefix, key), dpi=300) pylab.clf() if go_id_or_mt_no_struct == []: #2007-01-25 figure_no += 1 pylab.figure() pylab.axis("off") nx.draw_networkx_edges(g, pos, alpha=0.8) #2007-01-30 node_size_list = [] for v in g: node_size_list.append(len(sub_label_map[v])) #2007-01-29, it's gonna extend along the x-axis nx.draw_networkx_nodes(g, pos, node_color='w', node_size=node_size_list, node_shape=None, alpha=1.0, verts=[[-char_width/2, -char_height/2], [-char_width/2, char_height/2], [char_width/2, char_height/2], [char_width/2, -char_height/2]]) #node_shape has to be set to None, otherwise verts won't work nx.draw_networkx_labels(g, pos, labels=dict_map(sub_label_map, g.nodes(), type=3), alpha=0.4, font_size=16) pylab.savefig('%s.svg'%(output_fname_prefix), dpi=300) pylab.savefig('%s.eps'%(output_fname_prefix), dpi=300) pylab.savefig('%s.png'%(output_fname_prefix), dpi=300) pylab.clf() return figure_no
def draw_augmented_PI_graph(self, old_g, prot_interaction_graph, old_sub_label_map, gene_id2gene_symbol, output_fname_prefix): """ 2006-12-16 draw a graph based on old_g but augmented by prot_interaction_graph augment the old graph by node-pairwise searching against the interaction graph old nodes and edges are marked in 'green' and 'red' color, respectively overlapping edges are widened with 'red' color 2006-12-27 just draw the labels, ignore the circle nodes 2006-12-29, some protein interaction genes are new to table gene.gene 2007-01-10 add the pure co-expression edges to the interaction graph and re-position 2007-01-25 turn off the axis change png format to svg, eps, png """ g = nx.XGraph() pylab.axis("off") node_list = old_g.nodes() no_of_nodes = len(node_list) overlapping_edge_list = [] non_standout_edge_list = [] for m in range(no_of_nodes): for n in range(m+1, no_of_nodes): u = node_list[m] v = node_list[n] if prot_interaction_graph.has_node(u) and prot_interaction_graph.has_node(v): shortest_path_list = nx.shortest_path(prot_interaction_graph, u, v) if shortest_path_list: #check the whole shortest path for i in range(len(shortest_path_list)-1): j = i+1 if not g.has_edge(shortest_path_list[i], shortest_path_list[j]): g.add_edge(shortest_path_list[i], shortest_path_list[j], 1) if old_g.has_edge(shortest_path_list[i], shortest_path_list[j]): overlapping_edge_list.append((shortest_path_list[i], shortest_path_list[j])) else: non_standout_edge_list.append((shortest_path_list[i], shortest_path_list[j])) standout_edge_list = [] for (u,v) in old_g.edges(): if not g.has_edge(u,v): standout_edge_list.append((u,v)) g.add_edge(u, v, 1) #2007-01-10 sub_label_map = old_sub_label_map.copy() standout_node_list = [] non_standout_node_list = [] for v in g: if v not in old_g: if v in gene_id2gene_symbol: #2006-12-29, some protein interaction genes are new to table gene.gene sub_label_map[v] = gene_id2gene_symbol[v] else: sub_label_map[v] = repr(v) non_standout_node_list.append(v) for v in old_g: standout_node_list.append(v) if v not in g: g.add_node(v) pos = nx.spring_layout(g) #position is determined by the interaction graph if standout_edge_list: #pure co-expression edges nx.draw_networkx_edges(g, pos, alpha=0.4, edgelist=standout_edge_list) if non_standout_edge_list: #pure interaction edges nx.draw_networkx_edges(g, pos, alpha=0.4, edge_color='m', edgelist=non_standout_edge_list) if overlapping_edge_list: #overlapping nx.draw_networkx_edges(g, pos, alpha=0.4, edge_color='m', width=5, edgelist=overlapping_edge_list) if standout_node_list: #in co-expression network nx.draw_networkx_labels(g, pos, labels=dict_map(sub_label_map, standout_node_list, type=3), \ font_color='k', alpha=0.4, font_size=10) #nx.draw_networkx_nodes(g, pos, nodelist= standout_node_list, node_color='k', alpha=0.4) if non_standout_node_list: #pure protein interaction nodes nx.draw_networkx_labels(g, pos, labels=dict_map(sub_label_map, non_standout_node_list, type=3), \ font_color='m', alpha=0.4, font_size=10) #nx.draw_networkx_nodes(g, pos, nodelist= non_standout_node_list, alpha=0.4) #nx.draw_networkx_labels(g, pos, labels=sub_label_map) pylab.savefig('%s.svg'%(output_fname_prefix), dpi=300) pylab.savefig('%s.eps'%(output_fname_prefix), dpi=300) pylab.savefig('%s.png'%(output_fname_prefix), dpi=300) pylab.clf()
from TF_functions import cluster_bs_analysis ls_to_return = cluster_bs_analysis(core_vertex_ls, gene_no2bs_no_set, bs_no2gene_no_set, ratio_cutoff, \ top_number, p_value_cut_off) gene_id2symbol = get_gene_id2gene_symbol(curs, tax_id) dataset_no2desc = get_dataset_no2desc(curs) dataset_no_desc_ls = [] for dataset_index in recurrent_and_on_datasets_ls: dataset_no = dataset_index +1 dataset_no_desc_ls.append([dataset_no, dataset_no2desc[dataset_no]]) outf = open(output_file, 'w') outf.write("out:=[\n") for i in range(len(ls_to_return)): row = ls_to_return[i] score, score_type, bs_no_list, target_gene_no_list, global_ratio, local_ratio, expected_ratio, unknown_ratio = row core_vertex_symbol_ls = dict_map(gene_id2symbol, core_vertex_ls) bs_no_symbol_list = dict_map(gene_id2symbol, bs_no_list) if i == len(ls_to_return)-1: outf.write('[{%s},{%s},{%s}]\n'%(repr(core_vertex_symbol_ls)[1:-1], repr(bs_no_symbol_list)[1:-1], repr(dataset_no_desc_ls)[1:-1])) else: outf.write('[{%s},{%s},{%s}],\n'%(repr(core_vertex_symbol_ls)[1:-1], repr(bs_no_symbol_list)[1:-1], repr(dataset_no_desc_ls)[1:-1])) outf.write(']:\n') else: print __doc__ sys.exit(2)