Beispiel #1
0
    def dstruc_loadin(self, curs):
        """
		
		03-09-05
			get the context from mcl_table via linking through mcl_id of p_gene_table
			context_dict is set
		"""
        from codense.common import get_known_genes_dict, get_go_no2go_id, get_go_no2name, get_gene_no2gene_id

        self.known_genes_dict = get_known_genes_dict(curs)
        self.go_no2go_id = get_go_no2go_id(curs)
        self.go_no2go_name = get_go_no2name(curs)
        self.gene_no2gene_id = get_gene_no2gene_id(curs)

        sys.stderr.write("Setting up gene_prediction_dict...")
        # setup self.gene_prediction_dict
        curs.execute(
            "select p.gene_no, p.go_no, p.is_correct, p.is_correct_l1, p.is_correct_lca, m.vertex_set\
			from %s p, %s g, %s m where g.p_gene_id=p.p_gene_id and m.mcl_id=p.mcl_id"
            % (self.gene_table, self.table, self.mcl_table)
        )
        rows = curs.fetchall()
        for row in rows:
            gene_no = row[0]
            if self.type == 2 and gene_no not in self.known_genes_dict:
                # I only want the known genes, but this gene is unknown
                continue
            elif self.type == 3 and gene_no in self.known_genes_dict:
                # i only want the unknown genes, but this gene is known
                continue
            go_no = row[1]
            is_correct = row[2]
            is_correct_l1 = row[3]
            is_correct_lca = row[4]
            vertex_set = row[5][1:-1].split(",")
            vertex_set = map(int, vertex_set)

            item = function_struc()
            item.is_correct = is_correct
            item.is_correct_l1 = is_correct_l1
            item.is_correct_lca = is_correct_lca
            # context_dict is a set
            item.context_dict = Set(vertex_set)
            if gene_no not in self.gene_prediction_dict:
                self.gene_prediction_dict[gene_no] = gene_prediction()
                self.gene_prediction_dict[gene_no].p_functions_struc_dict[go_no] = item
            else:
                self.gene_prediction_dict[gene_no].p_functions_struc_dict[go_no] = item

        sys.stderr.write("Done\n")

        """
Beispiel #2
0
	def dstruc_loadin(self, curs):
		"""
		03-14-05
			remove the distance loading part
		"""
		sys.stderr.write("Loading Data STructure...\n")
		from codense.common import get_known_genes_dict, get_go_no2go_id,\
			get_go_no2term_id, get_go_no2depth, get_go_term_id2go_no, \
			get_go_term_id2depth
		
		self.known_genes_dict = get_known_genes_dict(curs)
		self.go_no2go_id = get_go_no2go_id(curs)
		self.go_no2term_id = get_go_no2term_id(curs)
		self.go_no2depth = get_go_no2depth(curs)
		self.go_term_id2go_no = get_go_term_id2go_no(curs)
		self.go_term_id2depth = get_go_term_id2depth(curs)
		
		sys.stderr.write("Done\n")
	def parse_cluster_fname(self, curs, cluster_fname, gim_inputfname, cluster_id_set, schema_instance):
		"""
		01-24-06
			a lot of analogy to codense2db.py's run()
		"""
		sys.stderr.write("Parsing cluster_fname: %s ...\n"%os.path.basename(cluster_fname))
		codense2db_instance  = codense2db()
		codense2db_instance.create_tables(curs, schema_instance.splat_table, \
			schema_instance.mcl_table, schema_instance.pattern_table)
		gene_id2gene_no = get_gene_id2gene_no(curs)
		gene_no2incidence_array = get_gene_no2incidence_array(gim_inputfname, gene_id2gene_no)
		known_gene_no2go_no_set = get_known_genes_dict(curs)
		counter = 0
		real_counter = 0
		cluster_id2properties = {}	#additional properties for prediction_pair2instance
		reader = csv.reader(open(cluster_fname, 'r'), delimiter='\t')
		for row in reader:
			counter += 1
			#only those who are in cluster_id_set
			if counter in cluster_id_set:	#cluster_id starts from 1
				cluster_list = codense2db_instance.fimbfs_parser(row, gene_no2incidence_array, curs)
				for cluster in cluster_list:
					real_counter += 1
					cluster.unknown_gene_ratio = codense2db_instance.calculate_unknown_gene_ratio(cluster.vertex_set, \
						known_gene_no2go_no_set)
					cluster.cluster_id = counter	#line number is the cluster_id
					codense2db_instance.db_submit(curs, cluster, schema_instance.pattern_table)
					
					cluster_id2properties[cluster.cluster_id] = [cluster.connectivity, cluster.unknown_gene_ratio, cluster.vertex_set]
			if real_counter==len(cluster_id_set):
				#all relevant clusters have been got, ignore remaining clusters
				break
			if self.report and counter%2000==0:
				sys.stderr.write("%s%s/%s"%('\x08'*20, counter, real_counter))
		if self.report:
			sys.stderr.write("%s%s/%s"%('\x08'*20, counter, real_counter))
		del reader
		sys.stderr.write("Done.\n")
		return cluster_id2properties