def run(self): """ 09-05-05 2006-09-21 add fuzzyDense_flag 2006-11-02 add tfbs_association_type 2006-11-02 differentiate good_cluster_table as pattern_xxx or good_xxx for pattern id --db_connect() --get_gene_no2bs_no_block() --construct_two_dicts() --input_node() --fetch_cluster_block() --computing_node() --node_fire() --cluster_bs_analysis() --create_cluster_bs_table() --output_node() --submit_cluster_bs_table() """ communicator = MPI.world.duplicate() node_rank = communicator.rank free_computing_nodes = range(1,communicator.size-1) print self.tfbs_association_type if node_rank == 0: (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) if self.tfbs_association_type==1: #2006-11-02 gene_no2bs_no_block = self.get_gene_no2bs_no_block(curs) elif self.tfbs_association_type==2: gene_no2bs_no_block = get_gene_no2bs_no_block_from_expt_tf_mapping(curs) for node in range(1, communicator.size-1): #send it to the computing_node communicator.send(gene_no2bs_no_block, node, 0) if self.fuzzyDense_flag: #2006-09-21 add fuzzyDense_flag #12-18-05 get edge2encodedOccurrence MpiCrackSplat_instance = MpiCrackSplat() edge2encodedOccurrence = {} min_sup = 5 #need to expose them max_sup = 40 total_vertex_set = self.return_total_vertex_set(curs, self.good_cluster_table) edge2encodedOccurrence, no_of_datasets = self.fill_edge2encodedOccurrence(\ self.sig_vector_fname, min_sup, max_sup, total_vertex_set) edge2encodedOccurrence_pickle = cPickle.dumps(edge2encodedOccurrence, -1) for node in free_computing_nodes: #send it to the computing_node communicator.send(edge2encodedOccurrence_pickle, node, 0) elif node_rank>0 and node_rank<communicator.size-1: data, source, tag, count = communicator.receive(Numeric.Int, 0, 0) gene_no2bs_no_set, bs_no2gene_no_set = self.construct_two_dicts(node_rank, data) if self.fuzzyDense_flag: #2006-09-21 #12-18-05 data, source, tag = communicator.receiveString(0, 0) edge2encodedOccurrence = cPickle.loads(data) elif node_rank==communicator.size-1: #establish connection before pursuing (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) #12-20-05 for darwin output gene_id2symbol = get_gene_id2gene_symbol(curs, self.tax_id) dataset_no2desc = get_dataset_no2desc(curs) mpi_synchronize(communicator) if node_rank == 0: if self.good_cluster_table.find('pattern')!=-1: #2006-11-02 it's pattern_xxx table, use id as pattern_id curs.execute("DECLARE crs CURSOR FOR select distinct id, vertex_set, recurrence_array\ from %s "%(self.good_cluster_table)) else: #2006-11-02 it's good_xxx table, use mcl_id as pattern_id curs.execute("DECLARE crs CURSOR FOR select distinct mcl_id, vertex_set, recurrence_array\ from %s "%(self.good_cluster_table)) input_node(communicator, curs, free_computing_nodes, self.size, self.report) curs.execute("close crs") elif node_rank<=communicator.size-2: #exclude the last node if self.fuzzyDense_flag: #2006-09-21 fuzzyDense_instance = fuzzyDense(edge2encodedOccurrence) else: fuzzyDense_instance = None parameter_list = [gene_no2bs_no_set, bs_no2gene_no_set, self.ratio_cutoff, \ self.top_number, self.p_value_cut_off, fuzzyDense_instance, self.degree_cut_off, self.fuzzyDense_flag] computing_node(communicator, parameter_list, self.computing_node_handler, report=self.report) elif node_rank==communicator.size-1: #12-20-05 comment out if self.new_table: self.create_cluster_bs_table(curs, self.cluster_bs_table) parameter_list = [curs, self.cluster_bs_table] output_node(communicator, free_computing_nodes, parameter_list, self.submit_cluster_bs_table, report=self.report) if self.commit: curs.execute("end") """
recurrence_array = recurrence_array[1:-1].split(',') recurrence_array = map(float, recurrence_array) fuzzyDense_instance = fuzzyDense(edge2encodedOccurrence, debug) core_vertex_ls, recurrent_and_on_datasets_ls = fuzzyDense_instance.get_core_vertex_set(vertex_list, recurrence_array, degree_cut_off) from MpiClusterBsStat import MpiClusterBsStat MpiClusterBsStat_instance = MpiClusterBsStat() gene_no2bs_no_block = MpiClusterBsStat_instance.get_gene_no2bs_no_block(curs) gene_no2bs_no_set, bs_no2gene_no_set = MpiClusterBsStat_instance.construct_two_dicts(0, gene_no2bs_no_block) from TF_functions import cluster_bs_analysis ls_to_return = cluster_bs_analysis(core_vertex_ls, gene_no2bs_no_set, bs_no2gene_no_set, ratio_cutoff, \ top_number, p_value_cut_off) gene_id2symbol = get_gene_id2gene_symbol(curs, tax_id) dataset_no2desc = get_dataset_no2desc(curs) dataset_no_desc_ls = [] for dataset_index in recurrent_and_on_datasets_ls: dataset_no = dataset_index +1 dataset_no_desc_ls.append([dataset_no, dataset_no2desc[dataset_no]]) outf = open(output_file, 'w') outf.write("out:=[\n") for i in range(len(ls_to_return)): row = ls_to_return[i] score, score_type, bs_no_list, target_gene_no_list, global_ratio, local_ratio, expected_ratio, unknown_ratio = row core_vertex_symbol_ls = dict_map(gene_id2symbol, core_vertex_ls) bs_no_symbol_list = dict_map(gene_id2symbol, bs_no_list) if i == len(ls_to_return)-1: