예제 #1
0
	def get_basic_cluster_dstructure(self, curs, mcl_id, splat_table, mcl_table):
		"""
		04-06-05
		
		"""
		sys.stderr.write("Getting the basic information of cluster no.%s..."%mcl_id)
		unit = cluster_dstructure()
		curs.execute("select m.mcl_id, m.vertex_set, m.connectivity, 0,\
			m.recurrence_array, s.edge_set, s.connectivity from %s m, %s s where m.splat_id=s.splat_id and \
			m.mcl_id=%s"\
			%(mcl_table, splat_table, mcl_id))	#06-20-05	connectivity_original faked to be 0
		rows = curs.fetchall()
		if len(rows)>0:
			for row in rows:
				unit.cluster_id = row[0]
				vertex_set = row[1][1:-1].split(',')
				unit.vertex_set = map(int, vertex_set)
				unit.connectivity = row[2]
				unit.connectivity_original = row[3]
				recurrence_array = row[4][1:-1].split(',')
				unit.recurrence_array = map(float, recurrence_array)
				unit.edge_set = parse_splat_table_edge_set(row[5])
				unit.splat_connectivity = row[6]
			sys.stderr.write("Done.\n")
		else:
			unit = None
			sys.stderr.write("Cluster: %s not found.\n"%mcl_id)
		return unit
예제 #2
0
    def copath_parser(self, row, writer, argument=None, argument2=None):
        """
		04-12-05
			copied from codense2db.py, changed a lot
		"""

        cooccurrent_cluster_id = self.p_cooccurrent_cluster_id.match(row[0]).group()
        vertex_set = row[2][1:-2].split(";")
        vertex_set = map(int, vertex_set)
        edge_list = row[3][2:-4].split(" );(")
        edge_set = []
        for edge in edge_list:
            edge = edge.split(",")
            edge = map(int, edge)
            # in ascending order
            edge.sort()
            edge_set.append(edge)
            # 04-29-05	cc module come into play to get the connected components
        instance = cc_from_edge_list()
        instance.run(edge_set)
        cc_list = instance.cc_list
        for cc_edge_list in cc_list:
            cluster = cluster_dstructure()
            cluster.cooccurrent_cluster_id = cooccurrent_cluster_id  # it's not used in the output()
            # initialize two sets
            cluster.vertex_set = self.vertex_set_from_cc_edge_list(cc_edge_list)
            cluster.edge_set = cc_edge_list
            self.output(writer, cluster)
예제 #3
0
	def data_fetch(self, curs, splat_table, mcl_table, crs_no=0, output_fname=None):
		"""
		04-17-05
			fetch cluster_dstructures for all clusters(Jasmine's request)	
		04-19-05
			1. return a mcl_id2cluster_dstructure
			2. crs_no
		08-31-05
			output clusters directly to output_fname
		09-01-05
			add the last []
		"""
		gene_no2gene_id = get_gene_no2gene_id(curs)	#08-31-05
		outf = open(output_fname, 'w')	#08-31-05
		outf.write("r:=[")	#08-31-05
		
		mcl_id2cluster_dstructure = {}
		no_of_total_genes = get_no_of_total_genes(curs)
		sys.stderr.write("Getting the basic information for all clusters...\n")
		curs.execute("DECLARE crs%s CURSOR FOR select m.mcl_id, m.vertex_set, m.connectivity, 0,\
			m.recurrence_array, s.edge_set, s.connectivity, m.cooccurrent_cluster_id from %s m, %s s where \
			m.splat_id=s.splat_id"\
			%(crs_no, mcl_table, splat_table))	#06-20-05	connectivity_original faked to be 0
		curs.execute("fetch 5000 from crs%s"%crs_no)
		rows = curs.fetchall()
		while rows:
			for row in rows:
				unit = cluster_dstructure()
				unit.cluster_id = row[0]
				vertex_set = row[1][1:-1].split(',')
				unit.vertex_set = map(int, vertex_set)
				unit.connectivity = row[2]
				unit.connectivity_original = row[3]
				recurrence_array = row[4][1:-1].split(',')
				unit.recurrence_array = map(float, recurrence_array)
				unit.edge_set = parse_splat_table_edge_set(row[5])
				unit.splat_connectivity = row[6]
				unit.cooccurrent_cluster_id = row[7]
				unit.go_no2association_genes = self.get_go_functions_of_this_gene_set(curs, unit.vertex_set)
				unit.go_no2information = self.get_information_of_go_functions(curs, \
					unit.go_no2association_genes, len(unit.vertex_set), no_of_total_genes, p_value_cut_off=0.05)	#jasmine wants to cut some go-nos.
				unit.edge_cor_2d_list, unit.edge_sig_2d_list = self.get_cor_sig_2d_list(curs, unit.edge_set)
				
				str_tmp = self.return_string_form_of_cluster_dstructure(unit, gene_no2gene_id)	#08-31-05
				outf.write("%s,"%str_tmp)
				#mcl_id2cluster_dstructure[unit.cluster_id] = unit
				"""
				order_1st_id, order_2nd_id = map(int, unit.cooccurrent_cluster_id.split('.'))
				if order_1st_id not in self.order_1st_id2all_clusters:
					self.order_1st_id2all_clusters[order_1st_id] = {}
				if order_2nd_id not in self.order_1st_id2all_clusters[order_1st_id]:
					self.order_1st_id2all_clusters[order_1st_id][order_2nd_id] = []
				self.order_1st_id2all_clusters[order_1st_id][order_2nd_id].append(unit)
				"""
			curs.execute("fetch 5000 from crs%s"%crs_no)
			rows = curs.fetchall()
		outf.write("[]]:")	#08-31-05, 09-01-05 add the last blank []
		del outf
		sys.stderr.write("Done.\n")
		return mcl_id2cluster_dstructure
	def copath_parser(self, row, argument=None, argument2=None):
		"""
		04-12-05
			copied from codense2db.py, changed a lot
		"""
		cooccurrent_cluster_id = self.p_cooccurrent_cluster_id.match(row[0]).group()
		vertex_set = row[2][1:-2].split(';')
		vertex_set = map(int, vertex_set)
		edge_list = row[3][2:-4].split(' );(')
		edge_set = []
		for edge in edge_list:
			edge = edge.split(',')
			edge = map(int, edge)
			#in ascending order
			edge.sort()
			edge_set.append(edge)
		
		if cooccurrent_cluster_id not in self.cooccurrent_cluster_id2cluster:
			cluster = cluster_dstructure()
			cluster.cooccurrent_cluster_id = cooccurrent_cluster_id	#it's not used in the output()
			#initialize two sets
			cluster.vertex_set = []
			cluster.edge_set = []

			self.cooccurrent_cluster_id2cluster[cooccurrent_cluster_id] = cluster
		
		#pass it to ease programming
		cluster = self.cooccurrent_cluster_id2cluster[cooccurrent_cluster_id]
		cluster.vertex_set += vertex_set
		cluster.edge_set += edge_set