def dstruc_loadin(self, curs): """ 03-09-05 get the context from mcl_table via linking through mcl_id of p_gene_table context_dict is set """ from codense.common import get_known_genes_dict, get_go_no2go_id, get_go_no2name, get_gene_no2gene_id self.known_genes_dict = get_known_genes_dict(curs) self.go_no2go_id = get_go_no2go_id(curs) self.go_no2go_name = get_go_no2name(curs) self.gene_no2gene_id = get_gene_no2gene_id(curs) sys.stderr.write("Setting up gene_prediction_dict...") # setup self.gene_prediction_dict curs.execute( "select p.gene_no, p.go_no, p.is_correct, p.is_correct_l1, p.is_correct_lca, m.vertex_set\ from %s p, %s g, %s m where g.p_gene_id=p.p_gene_id and m.mcl_id=p.mcl_id" % (self.gene_table, self.table, self.mcl_table) ) rows = curs.fetchall() for row in rows: gene_no = row[0] if self.type == 2 and gene_no not in self.known_genes_dict: # I only want the known genes, but this gene is unknown continue elif self.type == 3 and gene_no in self.known_genes_dict: # i only want the unknown genes, but this gene is known continue go_no = row[1] is_correct = row[2] is_correct_l1 = row[3] is_correct_lca = row[4] vertex_set = row[5][1:-1].split(",") vertex_set = map(int, vertex_set) item = function_struc() item.is_correct = is_correct item.is_correct_l1 = is_correct_l1 item.is_correct_lca = is_correct_lca # context_dict is a set item.context_dict = Set(vertex_set) if gene_no not in self.gene_prediction_dict: self.gene_prediction_dict[gene_no] = gene_prediction() self.gene_prediction_dict[gene_no].p_functions_struc_dict[go_no] = item else: self.gene_prediction_dict[gene_no].p_functions_struc_dict[go_no] = item sys.stderr.write("Done\n") """
def dstruc_loadin(self, curs): """ 03-14-05 remove the distance loading part """ sys.stderr.write("Loading Data STructure...\n") from codense.common import get_known_genes_dict, get_go_no2go_id,\ get_go_no2term_id, get_go_no2depth, get_go_term_id2go_no, \ get_go_term_id2depth self.known_genes_dict = get_known_genes_dict(curs) self.go_no2go_id = get_go_no2go_id(curs) self.go_no2term_id = get_go_no2term_id(curs) self.go_no2depth = get_go_no2depth(curs) self.go_term_id2go_no = get_go_term_id2go_no(curs) self.go_term_id2depth = get_go_term_id2depth(curs) sys.stderr.write("Done\n")
def parse_cluster_fname(self, curs, cluster_fname, gim_inputfname, cluster_id_set, schema_instance): """ 01-24-06 a lot of analogy to codense2db.py's run() """ sys.stderr.write("Parsing cluster_fname: %s ...\n"%os.path.basename(cluster_fname)) codense2db_instance = codense2db() codense2db_instance.create_tables(curs, schema_instance.splat_table, \ schema_instance.mcl_table, schema_instance.pattern_table) gene_id2gene_no = get_gene_id2gene_no(curs) gene_no2incidence_array = get_gene_no2incidence_array(gim_inputfname, gene_id2gene_no) known_gene_no2go_no_set = get_known_genes_dict(curs) counter = 0 real_counter = 0 cluster_id2properties = {} #additional properties for prediction_pair2instance reader = csv.reader(open(cluster_fname, 'r'), delimiter='\t') for row in reader: counter += 1 #only those who are in cluster_id_set if counter in cluster_id_set: #cluster_id starts from 1 cluster_list = codense2db_instance.fimbfs_parser(row, gene_no2incidence_array, curs) for cluster in cluster_list: real_counter += 1 cluster.unknown_gene_ratio = codense2db_instance.calculate_unknown_gene_ratio(cluster.vertex_set, \ known_gene_no2go_no_set) cluster.cluster_id = counter #line number is the cluster_id codense2db_instance.db_submit(curs, cluster, schema_instance.pattern_table) cluster_id2properties[cluster.cluster_id] = [cluster.connectivity, cluster.unknown_gene_ratio, cluster.vertex_set] if real_counter==len(cluster_id_set): #all relevant clusters have been got, ignore remaining clusters break if self.report and counter%2000==0: sys.stderr.write("%s%s/%s"%('\x08'*20, counter, real_counter)) if self.report: sys.stderr.write("%s%s/%s"%('\x08'*20, counter, real_counter)) del reader sys.stderr.write("Done.\n") return cluster_id2properties