def __init__(self, inputFnameLs=None, **keywords): """ """ AbstractMatrixFileWalker.__init__( self, inputFnameLs=inputFnameLs, **keywords) #self.connectDB() called within its __init__() #if user wants to preserve data in a data structure that is visisble throughout reading different files. # then use this self.invariantPData. #AbstractMatrixFileWalker has initialized a structure like below. #self.invariantPData = PassingData() #2012.11.22 self.xMin = None self.xMax = None self.yMin = None self.yMax = None #2012.11.25 for legend self.plotObjectLs = [] self.plotObjectLegendLs = [] #2013.07.01 set these attributes if the class or its descendants have valid values if getattr(self, 'defaultFontLabelSize', None): yh_matplotlib.setFontAndLabelSize(self.defaultFontLabelSize) if getattr(self, "defaultFigureWidth", None) and getattr( self, "defaultFigureHeight", None): yh_matplotlib.setDefaultFigureSize( (self.defaultFigureWidth, self.defaultFigureHeight)) if getattr(self, "plotLeft", None) and getattr(self, "plotBottom", None): yh_matplotlib.setPlotDimension(left=self.plotLeft, right=self.plotRight, bottom=self.plotBottom, top=self.plotTop)
def __init__(self, inputFnameLs=None, **keywords): """ """ AbstractVervetMapper.__init__(self, inputFnameLs=inputFnameLs, **keywords) if self.monkey1ID and self.monkey2ID: self.monkeyPairDesignated = [self.monkey1ID, self.monkey2ID] self.monkeyPairDesignated.sort() self.monkeyPairDesignated = tuple(self.monkeyPairDesignated) else: self.monkeyPairDesignated = None yh_matplotlib.setFontAndLabelSize(self.defaultFontLabelSize) yh_matplotlib.setDefaultFigureSize((self.defaultFigureWidth, self.defaultFigureHeight))
def run(self,): if self.debug: # 2010-4-18 enter debug mode "~/.../variation/misc.py -b" import pdb pdb.set_trace() debug = True else: debug =False sampleId2index, samplePair2data = self.readInput(self.inputFnameLs) sys.stderr.write("Calculating distance matrix for aggregated data ...") distanceMatrix = numpy.zeros([len(sampleId2index), len(sampleId2index)]) for samplePair, data in samplePair2data.iteritems(): no_of_mismatches, no_of_total_non_NA = data[:2] distance = no_of_mismatches/no_of_total_non_NA sample1Id, sample2Id = samplePair[:2] sample1Index = sampleId2index[sample1Id] sample2Index = sampleId2index[sample2Id] distanceMatrix[sample1Index, sample2Index] = distance distanceMatrix[sample2Index, sample1Index] = distance sys.stderr.write("Done.\n") sampleIdLs = sampleId2index.keys() for sampleId, list_index in sampleId2index.iteritems(): sampleIdLs[list_index] = sampleId if self.outputFname: self.outputMismatchData(self.outputFname, samplePair2data, distanceMatrix, sampleId2index, sampleIdLs) massagedSampleIDLs = self.massageSampleId(sampleIdLs) #2012.9-6 stop massaging sample IDs for PCA output. mapper/AppendInfo2SmartPCAOutput.py could be applied to this. self.runPCAOnDistanceMatrix(distanceMatrix, col_id_ls=sampleIdLs, outputFname='%s_PCA.tsv'%(self.figureFnamePrefix)) import pylab from hcluster import pdist, linkage, dendrogram pylab.clf() Z=linkage(distanceMatrix, 'single') yh_matplotlib.setFontAndLabelSize(base_size=3) dendrogram(Z, color_threshold=0.001, labels=massagedSampleIDLs, orientation='right', leaf_font_size=None) #leaf_font_size=1 or 5 has no effect pylab.savefig('%s.svg'%self.figureFnamePrefix, dpi=200) pylab.savefig('%s.png'%self.figureFnamePrefix, dpi=300) sys.exit(0)