コード例 #1
0
	def patternFormation(self, communicator, signature2pattern, of, no_cc, \
		edge_sig_matrix, no_of_datasets, min_cluster_size, debug):
		"""
		08-07-05
			datasetSignatureFname is outputed by fim_closed
			intermediateFile is outputed by outputEdgeData()
		08-08-05
			store the occurrence_vector in edge2occurrence_vector
		08-24-05
			edge_sig_matrix replaces the intermediateFile
		01-04-06
			1st part split out to be readin_signature2pattern()
		01-07-06
			back to edge_sig_matrix
			add min_cluster_size
			
			(loop)
				--decodeOccurrenceBv()
				--outputCcFromEdgeList()
					--get_combined_vector()
					--codense2db_instance.parse_recurrence()
		"""		
		sys.stderr.write("node %s starts patternFormation ...\n"%(communicator.rank))
		codense2db_instance = codense2db()
		counter = 0
		for edge_occurrenceBinaryForm_row in edge_sig_matrix:
			counter += 1
			edge = edge_occurrenceBinaryForm_row[:2]
			occurrenceBinaryForm = edge_occurrenceBinaryForm_row[2]	#08-24-05	already encoded when edge_sig_matrix is filled in
			#occurrence_vector = decodeOccurrenceToBv(occurrenceBinaryForm, no_of_datasets)
			signatureToBeDeleted = []
			for signature in signature2pattern:
				frequency = signature2pattern[signature][0]
				if (occurrenceBinaryForm&signature)==signature:
					signature2pattern[signature].append(edge)
					"""
					if debug:
						sys.stderr.write("the occurrence_vector of edge %s is %s\n"%(repr(edge), \
							repr(decodeOccurrenceToBv(occurrenceBinaryForm, no_of_datasets))))
						sys.stderr.write("occurrence_vector's binary form is %s, signature is %s\n"%(occurrenceBinaryForm, signature))
					"""
					if len(signature2pattern[signature]) == frequency+1:	#the 1st entry is frequency
						signatureToBeDeleted.append(signature)
						"""
						if debug:
							sys.stderr.write("signature %s to be deleted, its pattern is %s\n"%(signature, repr(signature2pattern[signature])))
						"""
					"""
					edge_tuple = tuple(edge)
					if edge_tuple not in edge2occurrence_vector:
						edge2occurrence_vector[edge_tuple] = [1]
						edge2occurrence_vector[edge_tuple].append(occurrence_vector)
					else:
						edge2occurrence_vector[edge_tuple][0] += 1
					"""
			for signature in signatureToBeDeleted:
				edge_list = signature2pattern[signature][1:]
				outputCcFromEdgeList(of, signature, edge_list, codense2db_instance, min_cluster_size, no_cc)
				del signature2pattern[signature]
		sys.stderr.write("node %s patternFormation done.\n"%(communicator.rank))
コード例 #2
0
ファイル: CrackSplat.py プロジェクト: polyactis/annot
	def init(self):
		"""
		02-24-05
			instantiate a class, create the temp directory if necessary,
		03-20-05
			name two descending tables.
		"""
		from splat_to_db import splat_to_db
		from visualize.clustering_test import clustering_test
		from codense.codense2db import codense2db
		self.splat_to_db_instance = splat_to_db()
		self.clustering_test_instance = clustering_test()
		self.codense2db_instance = codense2db()
		
		if not os.path.isdir(self.dir_files):
			os.makedirs(self.dir_files)
		else:
			sys.stderr.write("Warning, directory %s already exists.\n"%(self.dir_files))
		self.tmpinfname = os.path.join(self.dir_files, 'input')
		self.tmpoutfname = os.path.join(self.dir_files, 'output')
		
		self.crack_dict = {1: crack_by_modes(self.debug),
			2:crack_by_splat(self.debug)}
		self.argument1_dict = {1: self.clustering_test_instance,
			2: self.splat_to_db_instance}
		
		#two descending tables
		self.splat_table = '%ss'%self.table
		self.mcl_table = self.splat_table.replace('splat','mcl')
		if self.mcl_table == self.splat_table:
			sys.stderr.write("Error: new splat and mcl tables have the same name, %s\n"%self.splat_table)
			sys.exit(2)
コード例 #3
0
ファイル: MpiCrackSplat.py プロジェクト: polyactis/annot
    def run(self):
        """
		09-05-05
			Watch: when sending via MPI, tag 0 means from node 0,  tag 1 means goes to the last node.
		10-21-05
			replace output_node() with the one from codense.common for better scheduling
			
			--fill_edge2encodedOccurrence()
			
			--input_node()
				--get_cluster_block()
			--computing_node()
				--node_fire()
			--output_node()
				--output_cluster()
			
			--uniqueSort()
		"""
        communicator = MPI.world.duplicate()
        node_rank = communicator.rank
        intermediateFile = "%s.unsorted" % self.outputfile  # intermediateFile to store concatenated results
        if communicator.rank == (communicator.size - 1):
            edge2encodedOccurrence = {}
            no_of_datasets = self.fill_edge2encodedOccurrence(
                self.hostname, self.dbname, self.schema, edge2encodedOccurrence, self.min_sup, self.max_sup
            )

        mpi_synchronize(communicator)

        if node_rank == 0:
            self.input_node(
                communicator, self.inputfile, self.min_size, self.cluster_block_size, self.cluster_block_edges
            )
        elif node_rank <= communicator.size - 2:  # exclude the last node
            self.computing_node(communicator, self.cluster_block_size, self.min_size, self.min_con)
        elif node_rank == communicator.size - 1:
            codense2db_instance = codense2db()
            free_computing_nodes = range(1, communicator.size - 1)
            writer = csv.writer(open(intermediateFile, "w"), delimiter="\t")
            parameter_list = [writer, codense2db_instance, edge2encodedOccurrence, no_of_datasets]
            output_node(
                communicator,
                free_computing_nodes,
                parameter_list,
                self.output_cluster,
                report=self.report,
                type=Numeric.Int,
            )
            del writer
            # 10-21-05self.output_node(communicator, intermediateFile, codense2db_instance, edge2encodedOccurrence, no_of_datasets)

        mpi_synchronize(communicator)
        # collecting
        if node_rank == 0:
            MpiFromDatasetSignatureToPattern_instance = MpiFromDatasetSignatureToPattern()
            MpiFromDatasetSignatureToPattern_instance.uniqueSort(intermediateFile, self.outputfile)
コード例 #4
0
	def parse_cluster_fname(self, curs, cluster_fname, gim_inputfname, cluster_id_set, schema_instance):
		"""
		01-24-06
			a lot of analogy to codense2db.py's run()
		"""
		sys.stderr.write("Parsing cluster_fname: %s ...\n"%os.path.basename(cluster_fname))
		codense2db_instance  = codense2db()
		codense2db_instance.create_tables(curs, schema_instance.splat_table, \
			schema_instance.mcl_table, schema_instance.pattern_table)
		gene_id2gene_no = get_gene_id2gene_no(curs)
		gene_no2incidence_array = get_gene_no2incidence_array(gim_inputfname, gene_id2gene_no)
		known_gene_no2go_no_set = get_known_genes_dict(curs)
		counter = 0
		real_counter = 0
		cluster_id2properties = {}	#additional properties for prediction_pair2instance
		reader = csv.reader(open(cluster_fname, 'r'), delimiter='\t')
		for row in reader:
			counter += 1
			#only those who are in cluster_id_set
			if counter in cluster_id_set:	#cluster_id starts from 1
				cluster_list = codense2db_instance.fimbfs_parser(row, gene_no2incidence_array, curs)
				for cluster in cluster_list:
					real_counter += 1
					cluster.unknown_gene_ratio = codense2db_instance.calculate_unknown_gene_ratio(cluster.vertex_set, \
						known_gene_no2go_no_set)
					cluster.cluster_id = counter	#line number is the cluster_id
					codense2db_instance.db_submit(curs, cluster, schema_instance.pattern_table)
					
					cluster_id2properties[cluster.cluster_id] = [cluster.connectivity, cluster.unknown_gene_ratio, cluster.vertex_set]
			if real_counter==len(cluster_id_set):
				#all relevant clusters have been got, ignore remaining clusters
				break
			if self.report and counter%2000==0:
				sys.stderr.write("%s%s/%s"%('\x08'*20, counter, real_counter))
		if self.report:
			sys.stderr.write("%s%s/%s"%('\x08'*20, counter, real_counter))
		del reader
		sys.stderr.write("Done.\n")
		return cluster_id2properties
コード例 #5
0
ファイル: cluster_info.py プロジェクト: polyactis/annot
	def __init__(self, hostname='zhoudb', dbname='graphdb', schema=None, table=None, mcl_table=None, \
			gene_p_table=None, gene_table=None, function=0, functioncolor='green', centralnode=1, mcl_id=1, \
			type=1, output_fname=None, plot_type="dot", label=1):
		self.hostname = hostname
		self.dbname = dbname
		self.schema = schema
		self.table = table
		self.mcl_table = mcl_table
		self.gene_p_table = gene_p_table
		self.gene_table = gene_table
		self.function = int(function)
		self.functioncolor = functioncolor
		self.centralnode = int(centralnode)
		self.mcl_id = int(mcl_id)
		self.type = int(type)
		self.output_fname = output_fname
		self.plot_type = plot_type
		self.label = int(label)
	
		"""
		04-06-05
			other initializations
		"""
		#the table for edge_correlation_vector
		self.edge_table = 'edge_cor_vector'
		#mapping between go_no and go_id
		self.go_no2go_id = {}
		#mapping between go_no and go's name
		self.go_no2go_name = {}
		#mapping between gene_no and gene_id
		self.gene_no2gene_id = {}
		self.gene_id2gene_no = {}
		self.global_gene_to_go_dict = {}
		self.label_dict = {}

		self.order_1st_id2all_clusters = {}
		self.codense2db_instance = codense2db()
コード例 #6
0
	def patternFormation(self, signature2pattern, node_outputfile, no_cc, \
		edge_sig_vector_queue, no_of_datasets, debug):
		"""
		08-07-05
			datasetSignatureFname is outputed by fim_closed
			intermediateFile is outputed by outputEdgeData()
		08-08-05
			store the occurrence_vector in edge2occurrence_vector
		08-24-05
			edge_sig_matrix replaces the intermediateFile
		01-04-06
			1st part split out to be readin_signature2pattern()
			
			(loop)
				--decodeOccurrenceBv()
				--outputCcFromEdgeList()
					--get_combined_vector()
					--codense2db_instance.parse_recurrence()
		"""		
		sys.stderr.write("Thread of node %s starts patternFormation ...\n"%(self.rank))
		of = open(node_outputfile, 'w')
		codense2db_instance = codense2db()
		counter = 0
		edge_occurrenceBinaryForm_row = edge_sig_vector_queue.get()
		while edge_occurrenceBinaryForm_row!= -1:
			counter += 1
			edge = edge_occurrenceBinaryForm_row[:2]
			occurrenceBinaryForm = edge_occurrenceBinaryForm_row[2]	#08-24-05	already encoded when edge_sig_matrix is filled in
			#occurrence_vector = decodeOccurrenceToBv(occurrenceBinaryForm, no_of_datasets)
			signatureToBeDeleted = []
			for signature in signature2pattern:
				frequency = signature2pattern[signature][0]
				if (occurrenceBinaryForm&signature)==signature:
					signature2pattern[signature].append(edge)
					if debug:
						sys.stderr.write("the occurrence_vector of edge %s is %s\n"%(repr(edge), \
							repr(decodeOccurrenceToBv(occurrenceBinaryForm, no_of_datasets))))
						sys.stderr.write("occurrence_vector's binary form is %s, signature is %s\n"%(occurrenceBinaryForm, signature))
					if len(signature2pattern[signature]) == frequency+1:	#the 1st entry is frequency
						signatureToBeDeleted.append(signature)
						if debug:
							sys.stderr.write("signature %s to be deleted, its pattern is %s\n"%(signature, repr(signature2pattern[signature])))
					"""
					edge_tuple = tuple(edge)
					if edge_tuple not in edge2occurrence_vector:
						edge2occurrence_vector[edge_tuple] = [1]
						edge2occurrence_vector[edge_tuple].append(occurrence_vector)
					else:
						edge2occurrence_vector[edge_tuple][0] += 1
					"""
			for signature in signatureToBeDeleted:
				edge_list = signature2pattern[signature][1:]
				outputCcFromEdgeList(of, signature, edge_list, codense2db_instance, no_cc)
				del signature2pattern[signature]
			edge_occurrenceBinaryForm_row = edge_sig_vector_queue.get()
		if len(signature2pattern)>1:
			sys.stderr.write('Weird %s signatures are still available\n'%len(signature2pattern))
			if debug:
				sys.stderr.write('%s\n'%repr(signature2pattern))
		of.close()
		sys.stderr.write("Thread of node %s patternFormation done.\n"%(self.rank))