def run(self): """ 09-05-05 10-23-05 create views from old schema result goes to the new schema's p_gene_table (input_node) --db_connect() --form_schema_tables() --form_schema_tables() --get_gene_no2go_no_set() --get_go_no2depth() (pass data to computing_node) (computing_node) (take data from other nodes, 0 and size-1) (judge_node) --gene_stat() --db_connect() --gene_p_map_redundancy() (output_node) --db_connect() --form_schema_tables() --form_schema_tables() --MpiPredictionFilter() --MpiPredictionFilter_instance.createGeneTable() --get_go_no2edge_counter_list()(if necessary) (pass go_no2edge_counter_list to computing_node) (input_node) --fetch_cluster_block() (computing_node) --get_no_of_unknown_genes() --node_fire_handler() --cleanup_handler() --judge_node() --gene_stat_instance.(match functions) --output_node() --output_node_handler() --MpiPredictionFilter_instance.submit_to_p_gene_table() """ communicator = MPI.world.duplicate() node_rank = communicator.rank if node_rank == 0: (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) """ #01-02-06 old_schema_instance = form_schema_tables(self.input_fname) new_schema_instance = form_schema_tables(self.jnput_fname) """ gene_no2go_no = get_gene_no2go_no_set(curs) gene_no2go_no_pickle = cPickle.dumps(gene_no2go_no, -1) #-1 means use the highest protocol go_no2depth = get_go_no2depth(curs) go_no2depth_pickle = cPickle.dumps(go_no2depth, -1) go_no2gene_no_set = get_go_no2gene_no_set(curs) go_no2gene_no_set_pickle = cPickle.dumps(go_no2gene_no_set, -1) for node in range(1, communicator.size-2): #send it to the computing_node communicator.send(gene_no2go_no_pickle, node, 0) communicator.send(go_no2depth_pickle, node, 0) communicator.send(go_no2gene_no_set_pickle, node, 0) elif node_rank<=communicator.size-3: #WATCH: last 2 nodes are not here. data, source, tag = communicator.receiveString(0, 0) gene_no2go_no = cPickle.loads(data) #take the data data, source, tag = communicator.receiveString(0, 0) go_no2depth = cPickle.loads(data) data, source, tag = communicator.receiveString(0, 0) go_no2gene_no_set = cPickle.loads(data) data, source, tag = communicator.receiveString(communicator.size-1, 0) #from the last node go_no2edge_counter_list = cPickle.loads(data) #choose a functor for recurrence_array functor_dict = {0: None, 1: lambda x: int(x>=self.recurrence_x), 2: lambda x: math.pow(x, self.recurrence_x)} functor = functor_dict[self.recurrence_x_type] elif node_rank == communicator.size-2: #judge node gene_stat_instance = gene_stat(depth_cut_off=self.depth) (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) gene_stat_instance.dstruc_loadin(curs) from gene_p_map_redundancy import gene_p_map_redundancy node_distance_class = gene_p_map_redundancy() elif node_rank==communicator.size-1: #establish connection before pursuing (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) """ #01-02-06, input and output are all directed to files old_schema_instance = form_schema_tables(self.input_fname) new_schema_instance = form_schema_tables(self.jnput_fname) MpiPredictionFilter_instance = MpiPredictionFilter() MpiPredictionFilter_instance.view_from_table(curs, old_schema_instance.splat_table, new_schema_instance.splat_table) MpiPredictionFilter_instance.view_from_table(curs, old_schema_instance.mcl_table, new_schema_instance.mcl_table) MpiPredictionFilter_instance.view_from_table(curs, old_schema_instance.pattern_table, new_schema_instance.pattern_table) if self.new_table: MpiPredictionFilter_instance.createGeneTable(curs, new_schema_instance.p_gene_table) """ if self.go_no2edge_counter_list_fname: go_no2edge_counter_list = cPickle.load(open(self.go_no2edge_counter_list_fname,'r')) else: if self.eg_d_type==2: go_no2edge_counter_list = None else: gene_no2go_no = get_gene_no2go_no_set(curs) go_no2edge_counter_list = get_go_no2edge_counter_list(curs, gene_no2go_no, self.edge_type2index) go_no2edge_counter_list_pickle = cPickle.dumps(go_no2edge_counter_list, -1) for node in range(1, communicator.size-2): #send it to the computing_node communicator.send(go_no2edge_counter_list_pickle, node, 0) mpi_synchronize(communicator) free_computing_nodes = range(1,communicator.size-2) #exclude the last node if node_rank == 0: """ curs.execute("DECLARE crs CURSOR FOR SELECT id, vertex_set, edge_set, no_of_edges,\ connectivity, unknown_gene_ratio, recurrence_array, d_matrix from %s"%(old_schema_instance.pattern_table)) """ self.counter = 0 #01-02-06 counter is used as id reader = csv.reader(open(self.input_fname, 'r'), delimiter='\t') parameter_list = [reader] input_node(communicator, parameter_list, free_computing_nodes, self.message_size, \ self.report, input_handler=self.input_handler) del reader elif node_rank in free_computing_nodes: no_of_unknown_genes = get_no_of_unknown_genes(gene_no2go_no) GradientScorePrediction_instance = GradientScorePrediction(gene_no2go_no, go_no2gene_no_set, go_no2depth, \ go_no2edge_counter_list, no_of_unknown_genes, self.depth, self.min_layer1_associated_genes, \ self.min_layer1_ratio, self.min_layer2_associated_genes, self.min_layer2_ratio, self.exponent, \ self.score_list, self.max_layer, self.norm_exp, self.eg_d_type, self.debug) parameter_list = [GradientScorePrediction_instance, functor] computing_node(communicator, parameter_list, self.node_fire_handler, self.cleanup_handler, self.report) elif node_rank == communicator.size-2: self.judge_node(communicator, curs, gene_stat_instance, node_distance_class) elif node_rank==communicator.size-1: #01-02-06 output goes to plain file, not database writer = csv.writer(open(self.jnput_fname, 'w'), delimiter='\t') parameter_list = [writer] output_node(communicator, free_computing_nodes, parameter_list, self.output_node_handler, self.report) del writer
def run(self): """ 10-05-05 10-12-05 use max_layer to control whether to turn on the gradient or not 10-16-05 transformed to MPI version if node_rank==0 --db_connect() --form_schema_tables() --form_schema_tables() --get_gene_no2go_no_set() --get_mcl_id2accuracy() elif computing_node: (prepare data) elif output_node: --db_connect() --form_schema_tables() --form_schema_tables() --view_from_table() --view_from_table() --view_from_table() --createGeneTable() --mpi_synchronize() if input_node: --input_node() --fetch_predictions() elif computing_node: --computing_node() --node_fire() --gradient_class() elif output_node: --output_node() --output_node_handler() --submit_to_p_gene_table() """ communicator = MPI.world.duplicate() node_rank = communicator.rank if node_rank == 0: (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) old_schema_instance = form_schema_tables(self.input_fname) new_schema_instance = form_schema_tables(self.jnput_fname) gene_no2go = get_gene_no2go_no_set(curs) gene_no2go_pickle = cPickle.dumps(gene_no2go, -1) #-1 means use the highest protocol if self.max_layer: crs_sentence = 'DECLARE crs CURSOR FOR SELECT p.p_gene_id, p.gene_no, p.go_no, p.is_correct, p.is_correct_l1, \ p.is_correct_lca, p.avg_p_value, p.no_of_clusters, p.cluster_array, p.p_value_cut_off, p.recurrence_cut_off, \ p.connectivity_cut_off, p.cluster_size_cut_off, p.unknown_cut_off, p.depth_cut_off, p.mcl_id, p.lca_list, \ p.vertex_gradient, p.edge_gradient, p2.vertex_set, p2.edge_set, p2.d_matrix, p2.recurrence_array from %s p, %s p2 where \ p.mcl_id=p2.id'%(old_schema_instance.p_gene_table, old_schema_instance.pattern_table) else: crs_sentence = "DECLARE crs CURSOR FOR SELECT p.p_gene_id, p.gene_no, p.go_no, p.is_correct, p.is_correct_l1, \ p.is_correct_lca, p.avg_p_value, p.no_of_clusters, p.cluster_array, p.p_value_cut_off, p.recurrence_cut_off, \ p.connectivity_cut_off, p.cluster_size_cut_off, p.unknown_cut_off, p.depth_cut_off, p.mcl_id, p.lca_list, p.vertex_gradient,\ p.edge_gradient, 'vertex_set', 'edge_set', 'd_matrix', 'recurrence_array' \ from %s p"%(old_schema_instance.p_gene_table) #some placeholders 'vertex_set', 'edge_set', 'd_matrix' for prediction_attributes() if self.acc_cut_off: mcl_id2accuracy = self.get_mcl_id2accuracy(curs, old_schema_instance.p_gene_table, crs_sentence, self.is_correct_type) else: mcl_id2accuracy = None mcl_id2accuracy_pickle = cPickle.dumps(mcl_id2accuracy, -1) #-1 means use the highest protocol for node in range(1, communicator.size-1): #send it to the computing_node communicator.send(gene_no2go_pickle, node, 0) for node in range(1, communicator.size-1): #send it to the computing_node communicator.send(mcl_id2accuracy_pickle, node, 0) elif node_rank<=communicator.size-2: #exclude the last node data, source, tag = communicator.receiveString(0, 0) gene_no2go = cPickle.loads(data) #take the data data, source, tag = communicator.receiveString(0, 0) mcl_id2accuracy = cPickle.loads(data) #take the data #choose a functor for recurrence_array functor_dict = {0: None, 1: lambda x: int(x>=self.recurrence_x), 2: lambda x: math.pow(x, self.recurrence_x)} functor = functor_dict[self.recurrence_x_type] elif node_rank==communicator.size-1: (conn, curs) = db_connect(self.hostname, self.dbname, self.schema) old_schema_instance = form_schema_tables(self.input_fname) new_schema_instance = form_schema_tables(self.jnput_fname) self.view_from_table(curs, old_schema_instance.splat_table, new_schema_instance.splat_table) self.view_from_table(curs, old_schema_instance.mcl_table, new_schema_instance.mcl_table) self.view_from_table(curs, old_schema_instance.pattern_table, new_schema_instance.pattern_table) self.createGeneTable(curs, new_schema_instance.p_gene_table) mpi_synchronize(communicator) if node_rank == 0: self.input_node(communicator, curs, old_schema_instance, crs_sentence, self.size) elif node_rank<=communicator.size-2: #exclude the last node self.computing_node(communicator, gene_no2go, self.exponent, self.score_list, \ self.max_layer, self.norm_exp, self.eg_d_type, mcl_id2accuracy, self.acc_cut_off, functor) elif node_rank==communicator.size-1: parameter_list = [curs, new_schema_instance.p_gene_table] free_computing_nodes = range(1,communicator.size-1) output_node(communicator, free_computing_nodes, parameter_list, self.output_node_handler) if self.commit: curs.execute("end")