def run(self): self.communicator = MPI.world.duplicate() node_rank = self.communicator.rank free_computing_nodes = range(1, self.communicator.size-1) #exclude the 1st and last node free_computing_node_set = Set(free_computing_nodes) output_node_rank = self.communicator.size-1 db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup(create_tables=False) session = db.session session.begin() if node_rank == 0: snps_context_wrapper = self.dealWithSnpsContextWrapper(self.snps_context_picklef, self.min_distance, self.get_closest) if not self.results_id_ls: pdata = PassingData(call_method_id=self.call_method_id, analysis_method_id=self.analysis_method_id) self.results_id_ls = self.getResultsMethodIDLs(pdata) snps_context_wrapper_pickle = cPickle.dumps(snps_context_wrapper, -1) for node in free_computing_nodes: #send it to the computing_node sys.stderr.write("passing initial data to nodes from %s to %s ... "%(node_rank, node)) self.communicator.send(snps_context_wrapper_pickle, node, 0) sys.stderr.write(".\n") del snps_context_wrapper_pickle, snps_context_wrapper elif node_rank in free_computing_node_set: data, source, tag = self.communicator.receiveString(0, 0) snps_context_wrapper = cPickle.loads(data) del data else: pass self.synchronize() if node_rank == 0: param_obj = PassingData(params_ls=self.results_id_ls, output_node_rank=output_node_rank, report=self.report, counter=0) self.input_node(param_obj, free_computing_nodes, input_handler=self.input_fetch_handler, message_size=self.message_size) elif node_rank in free_computing_node_set: param_data = PassingData(session=session) param_data.results_directory = self.input_db_directory param_data.default_output_db_directory = self.default_output_db_directory param_data.output_db_directory = self.output_db_directory param_data.commit = self.commit param_data.min_MAF = self.min_MAF param_data.min_distance = self.min_distance param_data.get_closest = self.get_closest param_data.snps_context_wrapper = snps_context_wrapper self.computing_node(param_data, self.computing_node_handler) else: param_obj = PassingData() self.output_node(free_computing_nodes, param_obj, self.output_node_handler) self.synchronize() #to avoid some node early exits
def run(self): """ 2008-07-17 """ if self.debug: import pdb pdb.set_trace() db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup(create_tables=False) session = db.session session.begin() snps_context_wrapper = self.dealWithSnpsContextWrapper( self.snps_context_picklef, self.min_distance, self.get_closest) param_data = PassingData() param_data.results_directory = self.input_db_directory param_data.default_output_db_directory = self.default_output_db_directory param_data.output_db_directory = self.output_db_directory param_data.commit = self.commit param_data.min_MAF = self.min_MAF param_data.min_distance = self.min_distance param_data.get_closest = self.get_closest param_data.snps_context_wrapper = snps_context_wrapper if not self.results_id_ls: pdata = PassingData(call_method_id=self.call_method_id, analysis_method_id=self.analysis_method_id, \ phenotype_method_id_ls=self.phenotype_method_id_ls) self.results_id_ls = self.getResultsMethodIDLs(pdata) for results_method_id in self.results_id_ls: rm = Stock_250kDB.ResultsMethod.get(results_method_id) if not rm: sys.stderr.write( "No results method available for results_method_id=%s.\n" % results_method_id) continue self.saveResultsByGene(session, rm, param_data) if self.commit: session.commit() session.clear() else: session.rollback()
def run(self): """ 2008-07-17 """ if self.debug: import pdb pdb.set_trace() db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup(create_tables=False) session = db.session session.begin() snps_context_wrapper = self.dealWithSnpsContextWrapper(self.snps_context_picklef, self.min_distance, self.get_closest) param_data = PassingData() param_data.results_directory = self.input_db_directory param_data.default_output_db_directory = self.default_output_db_directory param_data.output_db_directory = self.output_db_directory param_data.commit = self.commit param_data.min_MAF = self.min_MAF param_data.min_distance = self.min_distance param_data.get_closest = self.get_closest param_data.snps_context_wrapper = snps_context_wrapper if not self.results_id_ls: pdata = PassingData(call_method_id=self.call_method_id, analysis_method_id=self.analysis_method_id) self.results_id_ls = self.getResultsMethodIDLs(pdata) for results_method_id in self.results_id_ls: rm = Stock_250kDB.ResultsMethod.get(results_method_id) if not rm: sys.stderr.write("No results method available for results_method_id=%s.\n"%results_method_id) continue self.saveResultsByGene(session, rm, param_data) if self.commit: session.commit() session.clear() else: session.rollback()
def computing_node_handler(self, communicator, data, comp_param_obj): """ 2009-1-22 deal with option self.store_null_data 2008-11-12 turn runHGTest() back into life turn off runEnrichmentTestToGetNullData() 2008-10-31 runEnrichmentTestToGetNullData() is gonna get data at all different no_of_top_snps's or min_score's 2008-10-26 handle (min_score, rank_gap, stop_rank) handle scenario that rank_gap is negative and so the parameters tried are descending. 2008-08-20 """ node_rank = communicator.rank sys.stderr.write("Node no.%s working...\n"%node_rank) data = cPickle.loads(data) result_ls = [] null_data_ls = [] pd = PassingData(snps_context_wrapper=comp_param_obj.snps_context_wrapper,\ no_of_total_genes=comp_param_obj.no_of_total_genes, \ results_directory=comp_param_obj.results_directory, \ min_MAF=comp_param_obj.min_MAF, \ get_closest=self.get_closest, min_distance=self.min_distance, \ no_of_top_snps=self.no_of_top_snps, #2008-10-25 no_of_top_snps is useless. overwritten later min_sample_size=self.min_sample_size, test_type_id=self.test_type_id, \ results_type=self.results_type, no_of_permutations=self.no_of_permutations,\ no_of_min_breaks=self.no_of_min_breaks, type_id=comp_param_obj.type_id,\ null_distribution_type_id=self.null_distribution_type_id,\ allow_two_sample_overlapping=self.allow_two_sample_overlapping, total_gene_id_ls=comp_param_obj.total_gene_id_ls,\ min_score=self.min_score, commit=self.commit) #2008-10-25 min_score is useless. overwritten later #2008-10-25 #if rank_gap is negative, stop_marker means the minimum cutoff #if rank_gap is positive, stop_marker means the maximum cutoff #both signs have to be swapped in the case of negative rank_gap """ if self.rank_gap<0: stop_marker = -self.stop_rank else: stop_marker = self.stop_rank for results_id, list_type_id in data: if self.debug: sys.stderr.write("working on results_id=%s, list_type_id=%s, type_id=%s .\n"%(results_id, list_type_id, pd.type_id)) i = 0 #reset it to zero!! if self.rank_gap<0: #has to be less than -self.stop_rank in order to pass first round. because stop_marker=-stop_rank when rank_gap<0. current_marker = stop_marker - 1 else: current_marker = stop_marker -1 while current_marker<stop_marker: #add one more layer to look at certain top genes if self.min_score is not None: current_marker = self.min_score +i*self.rank_gap pd.min_score = current_marker else: current_marker = self.no_of_top_snps + i*self.rank_gap pd.no_of_top_snps = current_marker if self.rank_gap<0: current_marker = -current_marker else: current_marker = current_marker pd.results_id = results_id pd.list_type_id = list_type_id if self.debug: sys.stderr.write("working on results_id=%s, list_type_id=%s, current_marker=%s.\n"%\ (pd.results_id, pd.list_type_id, current_marker)) i += 1 result = self.runHGTest(pd) if result is not None: result_ls.append(result) """ pd.commit = 0 #commit once afterwards. commit runtime would render 'Lock wait timeout exceeded; try restarting transaction' for results_id, list_type_id, cutoff in data: if self.debug: sys.stderr.write("working on results_id=%s, list_type_id=%s, type_id=%s, cutoff %s.\n"%(results_id, list_type_id, pd.type_id, cutoff)) pd.results_id = results_id pd.list_type_id = list_type_id if self.min_score: pd.min_score_ls = [cutoff] pd.min_score = cutoff else: pd.no_of_top_snps_ls = [cutoff] pd.no_of_top_snps = cutoff if self.store_null_data: return_data = self.runEnrichmentTestToGetNullData(comp_param_obj.session, pd) else: return_data = self.runHGTest(pd) if return_data: result_ls += return_data.result_ls null_data_ls += return_data.null_data_ls #if self.commit: # comp_param_obj.session.flush() sys.stderr.write("Node no.%s done with %s results.\n"%(node_rank, len(result_ls))) return_data = PassingData(result_ls=result_ls, null_data_ls=null_data_ls) return return_data
def computing_node_handler(self, communicator, data, comp_param_obj): """ 2009-1-22 deal with option self.store_null_data 2008-11-12 turn runHGTest() back into life turn off runEnrichmentTestToGetNullData() 2008-10-31 runEnrichmentTestToGetNullData() is gonna get data at all different no_of_top_snps's or min_score's 2008-10-26 handle (min_score, rank_gap, stop_rank) handle scenario that rank_gap is negative and so the parameters tried are descending. 2008-08-20 """ node_rank = communicator.rank sys.stderr.write("Node no.%s working...\n" % node_rank) data = cPickle.loads(data) result_ls = [] null_data_ls = [] pd = PassingData(snps_context_wrapper=comp_param_obj.snps_context_wrapper,\ no_of_total_genes=comp_param_obj.no_of_total_genes, \ results_directory=comp_param_obj.results_directory, \ min_MAF=comp_param_obj.min_MAF, \ get_closest=self.get_closest, min_distance=self.min_distance, \ no_of_top_snps=self.no_of_top_snps, #2008-10-25 no_of_top_snps is useless. overwritten later min_sample_size=self.min_sample_size, test_type_id=self.test_type_id, \ results_type=self.results_type, no_of_permutations=self.no_of_permutations,\ no_of_min_breaks=self.no_of_min_breaks, type_id=comp_param_obj.type_id,\ null_distribution_type_id=self.null_distribution_type_id,\ allow_two_sample_overlapping=self.allow_two_sample_overlapping, total_gene_id_ls=comp_param_obj.total_gene_id_ls,\ min_score=self.min_score, commit=self.commit) #2008-10-25 min_score is useless. overwritten later #2008-10-25 #if rank_gap is negative, stop_marker means the minimum cutoff #if rank_gap is positive, stop_marker means the maximum cutoff #both signs have to be swapped in the case of negative rank_gap """ if self.rank_gap<0: stop_marker = -self.stop_rank else: stop_marker = self.stop_rank for results_id, list_type_id in data: if self.debug: sys.stderr.write("working on results_id=%s, list_type_id=%s, type_id=%s .\n"%(results_id, list_type_id, pd.type_id)) i = 0 #reset it to zero!! if self.rank_gap<0: #has to be less than -self.stop_rank in order to pass first round. because stop_marker=-stop_rank when rank_gap<0. current_marker = stop_marker - 1 else: current_marker = stop_marker -1 while current_marker<stop_marker: #add one more layer to look at certain top genes if self.min_score is not None: current_marker = self.min_score +i*self.rank_gap pd.min_score = current_marker else: current_marker = self.no_of_top_snps + i*self.rank_gap pd.no_of_top_snps = current_marker if self.rank_gap<0: current_marker = -current_marker else: current_marker = current_marker pd.results_id = results_id pd.list_type_id = list_type_id if self.debug: sys.stderr.write("working on results_id=%s, list_type_id=%s, current_marker=%s.\n"%\ (pd.results_id, pd.list_type_id, current_marker)) i += 1 result = self.runHGTest(pd) if result is not None: result_ls.append(result) """ pd.commit = 0 #commit once afterwards. commit runtime would render 'Lock wait timeout exceeded; try restarting transaction' for results_id, list_type_id, cutoff in data: if self.debug: sys.stderr.write( "working on results_id=%s, list_type_id=%s, type_id=%s, cutoff %s.\n" % (results_id, list_type_id, pd.type_id, cutoff)) pd.results_id = results_id pd.list_type_id = list_type_id if self.min_score: pd.min_score_ls = [cutoff] pd.min_score = cutoff else: pd.no_of_top_snps_ls = [cutoff] pd.no_of_top_snps = cutoff if self.store_null_data: return_data = self.runEnrichmentTestToGetNullData( comp_param_obj.session, pd) else: return_data = self.runHGTest(pd) if return_data: result_ls += return_data.result_ls null_data_ls += return_data.null_data_ls #if self.commit: # comp_param_obj.session.flush() sys.stderr.write("Node no.%s done with %s results.\n" % (node_rank, len(result_ls))) return_data = PassingData(result_ls=result_ls, null_data_ls=null_data_ls) return return_data