Beispiel #1
0
	def run(self):
		self.communicator = MPI.world.duplicate()
		node_rank = self.communicator.rank
		free_computing_nodes = range(1, self.communicator.size-1)	#exclude the 1st and last node
		free_computing_node_set = Set(free_computing_nodes)
		output_node_rank = self.communicator.size-1
		
		db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user,
				   password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema)
		db.setup(create_tables=False)
		session = db.session
		session.begin()
		
		if node_rank == 0:
			snps_context_wrapper = self.dealWithSnpsContextWrapper(self.snps_context_picklef, self.min_distance, self.get_closest)
			if not self.results_id_ls:
				pdata = PassingData(call_method_id=self.call_method_id, analysis_method_id=self.analysis_method_id)
				self.results_id_ls = self.getResultsMethodIDLs(pdata)
			
			snps_context_wrapper_pickle = cPickle.dumps(snps_context_wrapper, -1)
			for node in free_computing_nodes:	#send it to the computing_node
				sys.stderr.write("passing initial data to nodes from %s to %s ... "%(node_rank, node))
				self.communicator.send(snps_context_wrapper_pickle, node, 0)
				sys.stderr.write(".\n")
			del snps_context_wrapper_pickle, snps_context_wrapper
		elif node_rank in free_computing_node_set:
			data, source, tag = self.communicator.receiveString(0, 0)
			snps_context_wrapper =  cPickle.loads(data)
			del data
		else:
			pass
		
		self.synchronize()
		if node_rank == 0:
			param_obj = PassingData(params_ls=self.results_id_ls, output_node_rank=output_node_rank, report=self.report, counter=0)
			self.input_node(param_obj, free_computing_nodes, input_handler=self.input_fetch_handler, message_size=self.message_size)
		elif node_rank in free_computing_node_set:
			param_data = PassingData(session=session)
			param_data.results_directory = self.input_db_directory
			param_data.default_output_db_directory = self.default_output_db_directory
			param_data.output_db_directory = self.output_db_directory
			param_data.commit = self.commit
			param_data.min_MAF = self.min_MAF
			param_data.min_distance = self.min_distance
			param_data.get_closest = self.get_closest
			param_data.snps_context_wrapper = snps_context_wrapper
			self.computing_node(param_data, self.computing_node_handler)
		else:
			param_obj = PassingData()
			self.output_node(free_computing_nodes, param_obj, self.output_node_handler)
		self.synchronize()	#to avoid some node early exits
Beispiel #2
0
    def run(self):
        """
		2008-07-17
		"""
        if self.debug:
            import pdb
            pdb.set_trace()
        db = Stock_250kDB.Stock_250kDB(drivername=self.drivername,
                                       username=self.db_user,
                                       password=self.db_passwd,
                                       hostname=self.hostname,
                                       database=self.dbname,
                                       schema=self.schema)
        db.setup(create_tables=False)
        session = db.session
        session.begin()
        snps_context_wrapper = self.dealWithSnpsContextWrapper(
            self.snps_context_picklef, self.min_distance, self.get_closest)

        param_data = PassingData()
        param_data.results_directory = self.input_db_directory
        param_data.default_output_db_directory = self.default_output_db_directory
        param_data.output_db_directory = self.output_db_directory
        param_data.commit = self.commit
        param_data.min_MAF = self.min_MAF
        param_data.min_distance = self.min_distance
        param_data.get_closest = self.get_closest
        param_data.snps_context_wrapper = snps_context_wrapper

        if not self.results_id_ls:
            pdata = PassingData(call_method_id=self.call_method_id, analysis_method_id=self.analysis_method_id, \
                phenotype_method_id_ls=self.phenotype_method_id_ls)
            self.results_id_ls = self.getResultsMethodIDLs(pdata)

        for results_method_id in self.results_id_ls:
            rm = Stock_250kDB.ResultsMethod.get(results_method_id)
            if not rm:
                sys.stderr.write(
                    "No results method available for results_method_id=%s.\n" %
                    results_method_id)
                continue
            self.saveResultsByGene(session, rm, param_data)

        if self.commit:
            session.commit()
            session.clear()
        else:
            session.rollback()
Beispiel #3
0
	def run(self):
		"""
		2008-07-17
		"""
		if self.debug:
			import pdb
			pdb.set_trace()
		db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user,
				   password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema)
		db.setup(create_tables=False)
		session = db.session
		session.begin()
		snps_context_wrapper = self.dealWithSnpsContextWrapper(self.snps_context_picklef, self.min_distance, self.get_closest)
		
		param_data = PassingData()
		param_data.results_directory = self.input_db_directory
		param_data.default_output_db_directory = self.default_output_db_directory
		param_data.output_db_directory = self.output_db_directory
		param_data.commit = self.commit
		param_data.min_MAF = self.min_MAF
		param_data.min_distance = self.min_distance
		param_data.get_closest = self.get_closest
		param_data.snps_context_wrapper = snps_context_wrapper
		
		if not self.results_id_ls:
			pdata = PassingData(call_method_id=self.call_method_id, analysis_method_id=self.analysis_method_id)
			self.results_id_ls = self.getResultsMethodIDLs(pdata)
		
		for results_method_id in self.results_id_ls:
			rm = Stock_250kDB.ResultsMethod.get(results_method_id)
			if not rm:
				sys.stderr.write("No results method available for results_method_id=%s.\n"%results_method_id)
				continue
			self.saveResultsByGene(session, rm, param_data)
		
		if self.commit:
			session.commit()
			session.clear()
		else:
			session.rollback()
Beispiel #4
0
	def computing_node_handler(self, communicator, data, comp_param_obj):
		"""
		2009-1-22
			deal with option self.store_null_data
		2008-11-12
			turn runHGTest() back into life
			turn off runEnrichmentTestToGetNullData()
		2008-10-31
			runEnrichmentTestToGetNullData() is gonna get data at all different no_of_top_snps's or min_score's
		2008-10-26
			handle (min_score, rank_gap, stop_rank)
			handle scenario that rank_gap is negative and so the parameters tried are descending.
		2008-08-20
		"""
		node_rank = communicator.rank
		sys.stderr.write("Node no.%s working...\n"%node_rank)
		data = cPickle.loads(data)
		result_ls = []
		null_data_ls = []
		pd = PassingData(snps_context_wrapper=comp_param_obj.snps_context_wrapper,\
							no_of_total_genes=comp_param_obj.no_of_total_genes, \
							results_directory=comp_param_obj.results_directory, \
							min_MAF=comp_param_obj.min_MAF, \
							get_closest=self.get_closest, 
							min_distance=self.min_distance, \
							no_of_top_snps=self.no_of_top_snps, #2008-10-25 no_of_top_snps is useless. overwritten later
							min_sample_size=self.min_sample_size,
							test_type_id=self.test_type_id, \
							results_type=self.results_type, 
							no_of_permutations=self.no_of_permutations,\
							no_of_min_breaks=self.no_of_min_breaks,
							type_id=comp_param_obj.type_id,\
							null_distribution_type_id=self.null_distribution_type_id,\
							allow_two_sample_overlapping=self.allow_two_sample_overlapping,
							total_gene_id_ls=comp_param_obj.total_gene_id_ls,\
							min_score=self.min_score,
							commit=self.commit)	#2008-10-25 min_score is useless. overwritten later
		#2008-10-25
		#if rank_gap is negative, stop_marker means the minimum cutoff
		#if rank_gap is positive, stop_marker means the maximum cutoff
		#both signs have to be swapped in the case of negative rank_gap
		"""	
		if self.rank_gap<0:
			stop_marker = -self.stop_rank
		else:
			stop_marker = self.stop_rank
		
		for results_id, list_type_id in data:
			if self.debug:
				sys.stderr.write("working on results_id=%s, list_type_id=%s, type_id=%s .\n"%(results_id, list_type_id, pd.type_id))
			i = 0
			#reset it to zero!!
			if self.rank_gap<0:	#has to be less than -self.stop_rank in order to pass first round. because stop_marker=-stop_rank when rank_gap<0.
				current_marker = stop_marker - 1
			else:
				current_marker = stop_marker -1
			
			while current_marker<stop_marker:	#add one more layer to look at certain top genes
				if self.min_score is not None:
					current_marker = self.min_score +i*self.rank_gap
					pd.min_score = current_marker
				else:
					current_marker = self.no_of_top_snps + i*self.rank_gap
					pd.no_of_top_snps = current_marker
				
				if self.rank_gap<0:
					current_marker = -current_marker
				else:
					current_marker = current_marker
				
				pd.results_id = results_id
				pd.list_type_id = list_type_id
				if self.debug:
					sys.stderr.write("working on results_id=%s, list_type_id=%s, current_marker=%s.\n"%\
									(pd.results_id, pd.list_type_id, current_marker))
				i += 1
				result = self.runHGTest(pd)
				if result is not None:
					result_ls.append(result)
		"""
		
		pd.commit = 0	#commit once afterwards. commit runtime would render 'Lock wait timeout exceeded; try restarting transaction'
		for results_id, list_type_id, cutoff in data:
			if self.debug:
				sys.stderr.write("working on results_id=%s, list_type_id=%s, type_id=%s, cutoff %s.\n"%(results_id, list_type_id, pd.type_id, cutoff))
			pd.results_id = results_id
			pd.list_type_id = list_type_id
			if self.min_score:
				pd.min_score_ls = [cutoff]
				pd.min_score = cutoff
			else:
				pd.no_of_top_snps_ls = [cutoff]
				pd.no_of_top_snps = cutoff
			if self.store_null_data:
				return_data = self.runEnrichmentTestToGetNullData(comp_param_obj.session, pd)
			else:
				return_data = self.runHGTest(pd)
			if return_data:
				result_ls += return_data.result_ls
				null_data_ls += return_data.null_data_ls
		
		#if self.commit:
		#	comp_param_obj.session.flush()
		sys.stderr.write("Node no.%s done with %s results.\n"%(node_rank, len(result_ls)))
		return_data = PassingData(result_ls=result_ls, null_data_ls=null_data_ls)
		return return_data
Beispiel #5
0
    def computing_node_handler(self, communicator, data, comp_param_obj):
        """
		2009-1-22
			deal with option self.store_null_data
		2008-11-12
			turn runHGTest() back into life
			turn off runEnrichmentTestToGetNullData()
		2008-10-31
			runEnrichmentTestToGetNullData() is gonna get data at all different no_of_top_snps's or min_score's
		2008-10-26
			handle (min_score, rank_gap, stop_rank)
			handle scenario that rank_gap is negative and so the parameters tried are descending.
		2008-08-20
		"""
        node_rank = communicator.rank
        sys.stderr.write("Node no.%s working...\n" % node_rank)
        data = cPickle.loads(data)
        result_ls = []
        null_data_ls = []
        pd = PassingData(snps_context_wrapper=comp_param_obj.snps_context_wrapper,\
             no_of_total_genes=comp_param_obj.no_of_total_genes, \
             results_directory=comp_param_obj.results_directory, \
             min_MAF=comp_param_obj.min_MAF, \
             get_closest=self.get_closest,
             min_distance=self.min_distance, \
             no_of_top_snps=self.no_of_top_snps, #2008-10-25 no_of_top_snps is useless. overwritten later
             min_sample_size=self.min_sample_size,
             test_type_id=self.test_type_id, \
             results_type=self.results_type,
             no_of_permutations=self.no_of_permutations,\
             no_of_min_breaks=self.no_of_min_breaks,
             type_id=comp_param_obj.type_id,\
             null_distribution_type_id=self.null_distribution_type_id,\
             allow_two_sample_overlapping=self.allow_two_sample_overlapping,
             total_gene_id_ls=comp_param_obj.total_gene_id_ls,\
             min_score=self.min_score,
             commit=self.commit) #2008-10-25 min_score is useless. overwritten later
        #2008-10-25
        #if rank_gap is negative, stop_marker means the minimum cutoff
        #if rank_gap is positive, stop_marker means the maximum cutoff
        #both signs have to be swapped in the case of negative rank_gap
        """	
		if self.rank_gap<0:
			stop_marker = -self.stop_rank
		else:
			stop_marker = self.stop_rank
		
		for results_id, list_type_id in data:
			if self.debug:
				sys.stderr.write("working on results_id=%s, list_type_id=%s, type_id=%s .\n"%(results_id, list_type_id, pd.type_id))
			i = 0
			#reset it to zero!!
			if self.rank_gap<0:	#has to be less than -self.stop_rank in order to pass first round. because stop_marker=-stop_rank when rank_gap<0.
				current_marker = stop_marker - 1
			else:
				current_marker = stop_marker -1
			
			while current_marker<stop_marker:	#add one more layer to look at certain top genes
				if self.min_score is not None:
					current_marker = self.min_score +i*self.rank_gap
					pd.min_score = current_marker
				else:
					current_marker = self.no_of_top_snps + i*self.rank_gap
					pd.no_of_top_snps = current_marker
				
				if self.rank_gap<0:
					current_marker = -current_marker
				else:
					current_marker = current_marker
				
				pd.results_id = results_id
				pd.list_type_id = list_type_id
				if self.debug:
					sys.stderr.write("working on results_id=%s, list_type_id=%s, current_marker=%s.\n"%\
									(pd.results_id, pd.list_type_id, current_marker))
				i += 1
				result = self.runHGTest(pd)
				if result is not None:
					result_ls.append(result)
		"""

        pd.commit = 0  #commit once afterwards. commit runtime would render 'Lock wait timeout exceeded; try restarting transaction'
        for results_id, list_type_id, cutoff in data:
            if self.debug:
                sys.stderr.write(
                    "working on results_id=%s, list_type_id=%s, type_id=%s, cutoff %s.\n"
                    % (results_id, list_type_id, pd.type_id, cutoff))
            pd.results_id = results_id
            pd.list_type_id = list_type_id
            if self.min_score:
                pd.min_score_ls = [cutoff]
                pd.min_score = cutoff
            else:
                pd.no_of_top_snps_ls = [cutoff]
                pd.no_of_top_snps = cutoff
            if self.store_null_data:
                return_data = self.runEnrichmentTestToGetNullData(
                    comp_param_obj.session, pd)
            else:
                return_data = self.runHGTest(pd)
            if return_data:
                result_ls += return_data.result_ls
                null_data_ls += return_data.null_data_ls

        #if self.commit:
        #	comp_param_obj.session.flush()
        sys.stderr.write("Node no.%s done with %s results.\n" %
                         (node_rank, len(result_ls)))
        return_data = PassingData(result_ls=result_ls,
                                  null_data_ls=null_data_ls)
        return return_data