Beispiel #1
0
    def __init__(self, inputFnameLs=None, **keywords):
        """
		"""
        AbstractMatrixFileWalker.__init__(
            self, inputFnameLs=inputFnameLs,
            **keywords)  #self.connectDB() called within its __init__()
        #if user wants to preserve data in a data structure that is visisble throughout reading different files.
        # then use this self.invariantPData.
        #AbstractMatrixFileWalker has initialized a structure like below.
        #self.invariantPData = PassingData()
        #2012.11.22
        self.xMin = None
        self.xMax = None
        self.yMin = None
        self.yMax = None
        #2012.11.25 for legend
        self.plotObjectLs = []
        self.plotObjectLegendLs = []

        #2013.07.01 set these attributes if the class or its descendants have valid values
        if getattr(self, 'defaultFontLabelSize', None):
            yh_matplotlib.setFontAndLabelSize(self.defaultFontLabelSize)
        if getattr(self, "defaultFigureWidth", None) and getattr(
                self, "defaultFigureHeight", None):
            yh_matplotlib.setDefaultFigureSize(
                (self.defaultFigureWidth, self.defaultFigureHeight))
        if getattr(self, "plotLeft", None) and getattr(self, "plotBottom",
                                                       None):
            yh_matplotlib.setPlotDimension(left=self.plotLeft,
                                           right=self.plotRight,
                                           bottom=self.plotBottom,
                                           top=self.plotTop)
Beispiel #2
0
    def __init__(self, inputFnameLs=None, **keywords):
        """
		"""
        AbstractVervetMapper.__init__(self, inputFnameLs=inputFnameLs, **keywords)
        if self.monkey1ID and self.monkey2ID:
            self.monkeyPairDesignated = [self.monkey1ID, self.monkey2ID]
            self.monkeyPairDesignated.sort()
            self.monkeyPairDesignated = tuple(self.monkeyPairDesignated)
        else:
            self.monkeyPairDesignated = None
        yh_matplotlib.setFontAndLabelSize(self.defaultFontLabelSize)
        yh_matplotlib.setDefaultFigureSize((self.defaultFigureWidth, self.defaultFigureHeight))
	def run(self,):
		if self.debug:	# 2010-4-18 enter debug mode "~/.../variation/misc.py -b"
			import pdb
			pdb.set_trace()
			debug = True
		else:
			debug =False
		sampleId2index, samplePair2data = self.readInput(self.inputFnameLs)
		
		sys.stderr.write("Calculating distance matrix for aggregated data ...")
		distanceMatrix = numpy.zeros([len(sampleId2index), len(sampleId2index)])
		for samplePair, data in samplePair2data.iteritems():
			no_of_mismatches, no_of_total_non_NA = data[:2]
			distance = no_of_mismatches/no_of_total_non_NA
			sample1Id, sample2Id = samplePair[:2]
			sample1Index = sampleId2index[sample1Id]
			sample2Index = sampleId2index[sample2Id]
			distanceMatrix[sample1Index, sample2Index] = distance
			distanceMatrix[sample2Index, sample1Index] = distance
		sys.stderr.write("Done.\n")
		
		sampleIdLs = sampleId2index.keys()
		for sampleId, list_index in sampleId2index.iteritems():
			sampleIdLs[list_index] = sampleId
		
		if self.outputFname:
			self.outputMismatchData(self.outputFname, samplePair2data, distanceMatrix, sampleId2index, sampleIdLs)
		
		massagedSampleIDLs = self.massageSampleId(sampleIdLs)
		
		#2012.9-6 stop massaging sample IDs for PCA output. mapper/AppendInfo2SmartPCAOutput.py could be applied to this.
		self.runPCAOnDistanceMatrix(distanceMatrix, col_id_ls=sampleIdLs, outputFname='%s_PCA.tsv'%(self.figureFnamePrefix))
		
		import pylab
		from hcluster import pdist, linkage, dendrogram
		pylab.clf()
		Z=linkage(distanceMatrix, 'single')
		yh_matplotlib.setFontAndLabelSize(base_size=3)
		dendrogram(Z, color_threshold=0.001, labels=massagedSampleIDLs, orientation='right', leaf_font_size=None)	#leaf_font_size=1 or 5 has no effect
		pylab.savefig('%s.svg'%self.figureFnamePrefix, dpi=200)
		pylab.savefig('%s.png'%self.figureFnamePrefix, dpi=300)
		sys.exit(0)