Exemple #1
0
    def run(self):
        """
		2009-2-2
		"""
        if self.debug:
            import pdb
            pdb.set_trace()
        db = Stock_250kDB.Stock_250kDB(drivername=self.drivername,
                                       username=self.db_user,
                                       password=self.db_passwd,
                                       hostname=self.hostname,
                                       database=self.dbname,
                                       schema=self.schema)
        db.setup(create_tables=False)

        session = db.session
        session.begin()

        PC_data = Association.getPCFromFile(self.eigen_vector_fname)
        eigen_value_ls = Association.getEigenValueFromFile(
            self.eigen_value_fname)
        eigen_value_ls = numpy.array(eigen_value_ls)
        explained_var = eigen_value_ls / numpy.sum(eigen_value_ls)

        self.putPCDataIntoDB(db, PC_data, self.call_method_id)
        self.putEigenValuesIntoDB(db, eigen_value_ls, explained_var,
                                  self.call_method_id)
        if self.commit:
            session.flush()
            session.commit()
Exemple #2
0
def make_association_to_part(part_no, filename):
    if part_no == 1:
        association = Association(SentenceAssociationStrategy, filename)
    elif part_no == 2:
        association = Association(WindowAssociationStrategy, filename, arg=2)
    else:
        association = Association(DependencyEdgeAssocaition, filename)
    return association
Exemple #3
0
    def IntLMOnTwoSNPs(cls, snp1_id, gene1_id, genotype_ls1, snp2_id, gene2_id, genotype_ls2, phenotype_index, phenotype_ls, \
        min_data_point=3):
        """
		2009-2-8
			interaction detection linear model
			y = b + SNP1xSNP2 + SNP1 + SNP2 + e
			interaction is the 1st term. therefore the pvalue directly returned is also for this term.
		"""
        return_ls = []
        genotype_ls1 = numpy.resize(genotype_ls1, [len(genotype_ls1), 1])
        genotype_ls2 = numpy.resize(genotype_ls2, [len(genotype_ls2), 1])
        snp_int_matrix = genotype_ls1 * genotype_ls2
        genotype_ls = numpy.hstack(
            (snp_int_matrix, genotype_ls1,
             genotype_ls2))  #interaction variable is the 1st position

        pdata = Association.linear_model(genotype_ls,
                                         phenotype_ls,
                                         min_data_point,
                                         snp_index=snp1_id + snp2_id)

        if pdata:
            pdata = PassingData(snp1_id=snp1_id, gene1_id=gene1_id, snp2_id=snp2_id, gene2_id=gene2_id, pvalue=pdata.pvalue,\
                count1=pdata.count_ls[0], count2=pdata.count_ls[1], bool_type=None, phenotype_index=phenotype_index,\
                var_perc=pdata.var_perc, coeff_list=pdata.coeff_list, coeff_p_value_list=pdata.coeff_p_value_list)
            return_ls.append(pdata)
        return return_ls
    def registerCustomExecutables(self, workflow=None):
        """
		2012.6.5
		"""
        AbstractVariationWorkflow.registerCustomExecutables(self,
                                                            workflow=workflow)

        namespace = self.namespace
        version = self.version
        operatingSystem = self.operatingSystem
        architecture = self.architecture
        clusters_size = self.clusters_size
        site_handler = self.site_handler

        executableClusterSizeMultiplierList = [
        ]  #2012.8.7 each cell is a tuple of (executable, clusterSizeMultipler (0 if u do not need clustering)

        Association = Executable(namespace=namespace, name="Association", version=version, \
            os=operatingSystem, arch=architecture, installed=True)
        Association.addPFN(
            PFN(
                "file://" + os.path.join(self.variationSrcPath,
                                         "association/Association.py"),
                site_handler))
        executableClusterSizeMultiplierList.append((Association, 0))

        Results2DB_250k = Executable(namespace=namespace, name="Results2DB_250k", version=version, \
            os=operatingSystem, arch=architecture, installed=True)
        Results2DB_250k.addPFN(
            PFN(
                "file://" +
                os.path.join(self.variationSrcPath, "db/Results2DB_250k.py"),
                site_handler))
        executableClusterSizeMultiplierList.append((Results2DB_250k, 0))

        OutputPhenotype = Executable(namespace=namespace, name="OutputPhenotype", version=version, \
            os=operatingSystem, arch=architecture, installed=True)
        OutputPhenotype.addPFN(
            PFN(
                "file://" + os.path.join(self.variationSrcPath,
                                         "db/output/OutputPhenotype.py"),
                site_handler))
        executableClusterSizeMultiplierList.append((OutputPhenotype, 0))

        self.addExecutableAndAssignProperClusterSize(
            executableClusterSizeMultiplierList,
            defaultClustersSize=self.clusters_size)
Exemple #5
0
	def IntLMOnTwoSNPs(cls, snp1_id, gene1_id, genotype_ls1, snp2_id, gene2_id, genotype_ls2, phenotype_index, phenotype_ls, \
					min_data_point=3):
		"""
		2009-2-8
			interaction detection linear model
			y = b + SNP1xSNP2 + SNP1 + SNP2 + e
			interaction is the 1st term. therefore the pvalue directly returned is also for this term.
		"""
		return_ls = []
		genotype_ls1 = numpy.resize(genotype_ls1, [len(genotype_ls1),1])
		genotype_ls2 = numpy.resize(genotype_ls2, [len(genotype_ls2),1])
		snp_int_matrix = genotype_ls1*genotype_ls2
		genotype_ls = numpy.hstack((snp_int_matrix, genotype_ls1, genotype_ls2))	#interaction variable is the 1st position
		
		pdata = Association.linear_model(genotype_ls, phenotype_ls, min_data_point, snp_index=snp1_id+snp2_id)
		
		if pdata:
			pdata = PassingData(snp1_id=snp1_id, gene1_id=gene1_id, snp2_id=snp2_id, gene2_id=gene2_id, pvalue=pdata.pvalue,\
							count1=pdata.count_ls[0], count2=pdata.count_ls[1], bool_type=None, phenotype_index=phenotype_index,\
							var_perc=pdata.var_perc, coeff_list=pdata.coeff_list, coeff_p_value_list=pdata.coeff_p_value_list)
			return_ls.append(pdata)
		return return_ls
Exemple #6
0
	def subplotLatLonPhenVsPC(self, ecotype_info, StrainID2PCAPosInfo, phenData, phenotype_col_index, \
							phenotype_cmap, phenotype_norm, which_figure=0, sub_title='',\
							ydata='latitude', which_PC_index=0, no_of_rows=2, dot_size=10, alpha=0.7):
		"""
		2008-12-08
			one single subplot in plotLatLonPhenVsPC()
		"""
		ax = pylab.subplot(no_of_rows,2,which_figure, frameon=False)
		pylab.ylabel(ydata)
		pylab.grid(True, alpha=0.3)
		
		x_ls = []
		y_ls = []
		for strain_id in StrainID2PCAPosInfo.strain_id_ls:
			ecotype_id = int(strain_id)
			ecotype_obj = ecotype_info.ecotype_id2ecotype_obj.get(ecotype_id)
			if ecotype_obj:
				lat, lon = ecotype_obj.latitude, ecotype_obj.longitude
				y_value = getattr(ecotype_obj, ydata, None)
			else:
				sys.stderr.write("Warning: Ecotype %s not in ecotype_info (fetched from stock db).\n"%ecotype_id)
				continue
			
			#x_value = StrainID2PCAPosInfo.PC_matrix[row_index][which_PC_index]
			if which_PC_index==1:
				x_value = StrainID2PCAPosInfo.strain_id2pca_x[strain_id]
				xlabel = 'PC%s'%(which_PC_index+1)
			elif which_PC_index==0:
				x_value = StrainID2PCAPosInfo.strain_id2pca_y[strain_id]
				xlabel = 'PC%s'%(which_PC_index+1)
			elif which_PC_index=='longitude':
				x_value = lon
				xlabel = which_PC_index
			elif which_PC_index =='latitude':
				x_value = lat
				xlabel = which_PC_index
			
			#img_y_pos = StrainID2PCAPosInfo.strain_id2img_y_pos[strain_id]

			#strain color according to phenotype
			phenotype_row_index = phenData.row_id2row_index[strain_id]
			phenotype = phenData.data_matrix[phenotype_row_index][phenotype_col_index]
			strain_fc = phenotype_cmap(phenotype_norm(phenotype))
			if numpy.isnan(phenotype):
				linewidth=0.5
				strain_fc = 'w'
				edgecolor = 'k'
				_alpha = 0	#facecolor gets very transparent
			else:
				linewidth=0
				strain_fc = strain_fc
				edgecolor = 'k'
				_alpha = alpha
				
			if ydata=='phenotype':
				y_value = phenotype
				if numpy.isnan(phenotype):	#can't do regression or plot
					continue
			if y_value is None or numpy.isnan(y_value):
				continue
			pylab.scatter([x_value],[y_value], s=dot_size, linewidth=linewidth, facecolor=strain_fc, alpha=_alpha, zorder=10)
			x_ls.append(x_value)
			y_ls.append(y_value)
		xlim = ax.get_xlim()
		ylim = ax.get_ylim()
		
		lm_result = Association.pure_linear_model(x_ls, y_ls)
		try:
			pvalue = '%.2f'%-math.log10(lm_result.pvalue)
		except:
			pvalue = '%s'%lm_result.pvalue
		beta0 = lm_result.coeff_list[0]
		beta = lm_result.coeff_list[1]
		#draw a line showing the trend
		x_lm_ls = [min(x_ls), max(x_ls)]
		lm_func = lambda x: lm_result.coeff_list[0]+lm_result.coeff_list[1]*x
		y_lm_ls = map(lm_func, x_lm_ls)
		ax.plot(x_lm_ls, y_lm_ls, alpha=0.5)
		ax.set_xlim(xlim)
		ax.set_ylim(ylim)
		
		pylab.xlabel('%s, pvalue=%s, beta=%.3f'%(xlabel, pvalue, beta))
    def __init__(self, **keywords):
        """
		2009-3-20
		"""
        Association.__init__(self, **keywords)
Exemple #8
0
	def __init__(self, **keywords):
		"""
		2009-3-20
		"""
		Association.__init__(self, **keywords)