def __init__(self, dbFileName, force=False, scaleFactor=1000): # data self.dataManager = GMDataManager() # most data is saved to hdf self.dbFileName = dbFileName # db containing all the data we'd like to use self.condition = "" # condition will be supplied at loading time # --> NOTE: ALL of the arrays in this section are in sync # --> each one holds information for an individual contig self.indices = np_array( []) # indices into the data structure based on condition self.covProfiles = np_array([]) # coverage based coordinates self.transformedCP = np_array([]) # the munged data points self.corners = np_array([]) # the corners of the tranformed space self.TCentre = 0. # the centre of the coverage space self.transRadius = 0. # distance from corner to centre of transformed space self.averageCoverages = np_array( []) # average coverage across all stoits self.normCoverages = np_array([]) # norm of the raw coverage vectors self.kmerSigs = np_array([]) # raw kmer signatures self.kmerNormPC1 = np_array( []) # First PC of kmer sigs normalized to [0, 1] self.kmerPCs = np_array( []) # PCs of kmer sigs capturing specified variance self.kmerVarPC = np_array([]) # variance of each PC self.stoitColNames = np_array([]) self.contigNames = np_array([]) self.contigLengths = np_array([]) self.contigGCs = np_array([]) self.colorMapGC = None self.binIds = np_array([]) # list of bin IDs # --> end section # meta self.validBinIds = {} # valid bin ids -> numMembers self.isLikelyChimeric = { } # indicates if a bin is likely to be chimeric self.binnedRowIndices = { } # dictionary of those indices which belong to some bin self.restrictedRowIndices = { } # dictionary of those indices which can not be binned yet self.numContigs = 0 # this depends on the condition given self.numStoits = 0 # this depends on the data which was parsed # contig links self.links = {} # misc self.forceWriting = force # overwrite existng values silently? self.scaleFactor = scaleFactor # scale every thing in the transformed data to this dimension
def __init__(self, dbFileName, force=False, scaleFactor=1000): # data self.dataManager = GMDataManager() # most data is saved to hdf self.dbFileName = dbFileName # db containing all the data we'd like to use self.condition = "" # condition will be supplied at loading time # --> NOTE: ALL of the arrays in this section are in sync # --> each one holds information for an individual contig self.indices = np_array([]) # indices into the data structure based on condition self.covProfiles = np_array([]) # coverage based coordinates self.transformedCP = np_array([]) # the munged data points self.averageCoverages = np_array([]) # average coverage across all stoits self.kmerSigs = np_array([]) # raw kmer signatures self.kmerVals = np_array([]) # PCA'd kmer sigs self.contigNames = np_array([]) self.contigLengths = np_array([]) self.contigColours = np_array([]) # calculated from kmerVals self.binIds = np_array([]) # list of bin IDs # --> end section # meta self.validBinIds = {} # valid bin ids -> numMembers self.binnedRowIndicies = {} # dictionary of those indices which belong to some bin self.restrictedRowIndicies = {} # dictionary of those indices which can not be binned yet self.numContigs = 0 # this depends on the condition given self.numStoits = 0 # this depends on the data which was parsed # contig links self.links = {} # misc self.forceWriting = force # overwrite existng values silently? self.scaleFactor = scaleFactor # scale every thing in the transformed data to this dimension
def __init__(self, dbFileName, force=False, scaleFactor=1000): # data self.dataManager = GMDataManager() # most data is saved to hdf self.dbFileName = dbFileName # db containing all the data we'd like to use self.condition = "" # condition will be supplied at loading time # --> NOTE: ALL of the arrays in this section are in sync # --> each one holds information for an individual contig self.indices = np_array([]) # indices into the data structure based on condition self.covProfiles = np_array([]) # coverage based coordinates self.transformedCP = np_array([]) # the munged data points self.corners = np_array([]) # the corners of the tranformed space self.TCentre = 0. # the centre of the coverage space self.transRadius = 0. # distance from corner to centre of transformed space self.averageCoverages = np_array([])# average coverage across all stoits self.normCoverages = np_array([]) # norm of the raw coverage vectors self.kmerSigs = np_array([]) # raw kmer signatures self.kmerNormPC1 = np_array([]) # First PC of kmer sigs normalized to [0, 1] self.kmerPCs = np_array([]) # PCs of kmer sigs capturing specified variance self.kmerVarPC = np_array([]) # variance of each PC self.stoitColNames = np_array([]) self.contigNames = np_array([]) self.contigLengths = np_array([]) self.contigGCs = np_array([]) self.colorMapGC = None self.binIds = np_array([]) # list of bin IDs # --> end section # meta self.validBinIds = {} # valid bin ids -> numMembers self.isLikelyChimeric = {} # indicates if a bin is likely to be chimeric self.binnedRowIndices = {} # dictionary of those indices which belong to some bin self.restrictedRowIndices = {} # dictionary of those indices which can not be binned yet self.numContigs = 0 # this depends on the condition given self.numStoits = 0 # this depends on the data which was parsed # contig links self.links = {} # misc self.forceWriting = force # overwrite existng values silently? self.scaleFactor = scaleFactor # scale every thing in the transformed data to this dimension
def parseOptions(self, options ): timer = gtime.TimeKeeper() if(options.subparser_name == 'parse'): # parse raw input print "*******************************************************************************" print " [[GroopM %s]] Running in data parsing mode..." % self.GMVersion print "*******************************************************************************" # check this here: if len(options.bamfiles) < 3: print "Sorry, You must supply at least 3 bamFiles to use GroopM. (You supplied %d)\n Exiting..." % len(options.bamfiles) return GMdata = mstore.GMDataManager() success = GMdata.createDB(options.bamfiles, options.reference, options.dbname, options.cutoff, timer, force=options.force, threads=options.threads) if not success: print options.dbname,"not updated" elif(options.subparser_name == 'core'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in core creation mode..." % self.GMVersion print "*******************************************************************************" CE = cluster.ClusterEngine(options.dbname, timer, force=options.force, finalPlot=options.plot, plot=options.multiplot, minSize=options.size, minVol=options.bp) if options.graphfile is None: gf = "" else: gf=options.graphfile CE.makeCores(coreCut=options.cutoff, gf=gf) elif(options.subparser_name == 'refine'): # refine bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in core refining mode..." % self.GMVersion print "*******************************************************************************" bids = [] #if options.bids is not None: # bids = options.bids auto = options.auto transform=True^options.no_transform RE = refine.RefineEngine(timer, dbFileName=options.dbname, transform=transform, bids=bids, loadContigNames=True) if options.plot: pfx="REFINED" else: pfx="" print "Refine bins" RE.refineBins(timer, auto=auto, saveBins=True, plotFinal=pfx) elif(options.subparser_name == 'recruit'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in bin expansion mode..." % self.GMVersion print "*******************************************************************************" RE = refine.RefineEngine(timer, dbFileName=options.dbname, getUnbinned=True, loadContigNames=False, cutOff=options.cutoff) RE.recruitWrapper(timer, inclusivity=options.inclusivity, step=options.step, saveBins=True) elif(options.subparser_name == 'extract'): # Extract data print "*******************************************************************************" print " [[GroopM %s]] Running in '%s' extraction mode..." % (self.GMVersion, options.mode) print "*******************************************************************************" bids = [] if options.bids is not None: bids = options.bids BX = groopmUtils.GMExtractor(options.dbname, bids=bids, folder=options.out_folder ) if(options.mode=='contigs'): BX.extractContigs(timer, fasta=options.data, prefix=options.prefix, cutoff=options.cutoff) elif(options.mode=='reads'): BX.extractReads(timer, bams=options.data, prefix=options.prefix, mixBams=options.mix_bams, mixGroups=options.mix_groups, mixReads=options.mix_reads, interleaved=options.interleave, bigFile=options.no_gzip, headersOnly=options.headers_only, minMapQual=options.mapping_quality, maxMisMatches=options.max_distance, useSuppAlignments=options.use_supplementary, useSecondaryAlignments=options.use_secondary, verbose=options.verbose, threads=options.threads) else: raise ExtractModeNotAppropriateException("mode: "+ options.mode + " is unknown") elif(options.subparser_name == 'merge'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in bin merging mode..." % self.GMVersion print "*******************************************************************************" BM = binManager.BinManager(dbFileName=options.dbname) BM.loadBins(timer, makeBins=True, silent=False) BM.merge(options.bids, options.force, saveBins=True) elif(options.subparser_name == 'split'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in bin splitting mode..." % self.GMVersion print "*******************************************************************************" BM = binManager.BinManager(dbFileName=options.dbname) BM.loadBins(timer, makeBins=True, silent=False) BM.split(options.bid, options.parts, mode=options.mode, saveBins=True, auto=options.force) elif(options.subparser_name == 'delete'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in bin deleting mode..." % self.GMVersion print "*******************************************************************************" BM = binManager.BinManager(dbFileName=options.dbname) BM.loadBins(timer, makeBins=True, silent=True)#, bids=options.bids) BM.deleteBins(options.bids, force=options.force, saveBins=True, freeBinnedRowIndices=True) elif(options.subparser_name == 'plot'): print "*******************************************************************************" print " [[GroopM %s]] Running in bin plotting mode..." % self.GMVersion print "*******************************************************************************" BM = binManager.BinManager(dbFileName=options.dbname) if options.bids is None: bids = [] else: bids = options.bids BM.loadBins(timer, makeBins=True, silent=False, bids=bids, loadContigNames=False) BM.setColorMap(options.cm) BM.plotBins(FNPrefix=options.tag, plotEllipsoid=True, ignoreContigLengths=options.points, folder=options.folder) elif(options.subparser_name == 'explore'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in bin '%s' explorer mode..." % (self.GMVersion, options.mode) print "*******************************************************************************" transform=True^options.no_transform bids = [] if options.bids is not None: bids = options.bids BE = groopmUtils.BinExplorer(options.dbname, bids=bids, transform=transform, cmstring=options.cm, ignoreContigLengths=options.points) if(options.mode == 'binpoints'): BE.plotPoints(timer) elif(options.mode == 'binids'): BE.plotIds(timer) elif(options.mode == 'allcontigs'): BE.plotContigs(timer, coreCut=options.cutoff, all=True) elif(options.mode == 'unbinnedcontigs'): BE.plotUnbinned(timer, coreCut=options.cutoff) elif(options.mode == 'binnedcontigs'): BE.plotContigs(timer, coreCut=options.cutoff) elif(options.mode == 'binassignments'): BE.plotBinAssignents(timer, coreCut=options.cutoff) elif(options.mode == 'compare'): BE.plotCompare(timer, coreCut=options.cutoff) elif (options.mode == 'together'): BE.plotTogether(timer, coreCut=options.cutoff, doMers=options.kmers) elif (options.mode == 'sidebyside'): BE.plotSideBySide(timer, coreCut=options.cutoff) else: print "**Error: unknown mode:",options.mode elif(options.subparser_name == 'flyover'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Making a flyover..." % self.GMVersion print "*******************************************************************************" bids = [] if options.bids is not None: bids = options.bids BE = groopmUtils.BinExplorer(options.dbname, bids=bids, transform=True, ignoreContigLengths=options.points) BE.plotFlyOver(timer, fps=options.fps, totalTime=options.totalTime, percentFade=options.firstFade, prefix=options.prefix, showColorbar=options.colorbar, title=options.title, coreCut=options.cutoff, format=options.format) elif(options.subparser_name == 'highlight'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in highlighter mode..." % self.GMVersion print "*******************************************************************************" bids = [] if options.bids is not None: bids = options.bids BE = groopmUtils.BinExplorer(options.dbname, bids=bids, binLabelsFile = options.binlabels, contigColorsFile = options.contigcolors, ignoreContigLengths=options.points) BE.plotHighlights(timer, options.elevation, options.azimuth, options.file, options.filetype, options.dpi, drawRadius=options.radius, show=options.show, coreCut=options.cutoff, testing=options.place ) elif(options.subparser_name == 'print'): BM = binManager.BinManager(dbFileName=options.dbname) bids = [] if options.bids is not None: bids = options.bids BM.loadBins(timer, getUnbinned=options.unbinned, makeBins=True, silent=True, bids=bids) BM.printBins(options.format, fileName=options.outfile) elif(options.subparser_name == 'dump'): print "*******************************************************************************" print " [[GroopM %s]] Running in data dumping mode..." % self.GMVersion print "*******************************************************************************" # prep fields. Do this first cause users are mot likely to # mess this part up! allowable_fields = ['names', 'mers', 'gc', 'coverage', 'tcoverage', 'ncoverage', 'lengths', 'bins', 'all'] fields = options.fields.split(',') for field in fields: if field not in allowable_fields: print "ERROR: field '%s' not recognised. Allowable fields are:" % field print '\t',",".join(allowable_fields) return if options.separator == '\\t': separator = '\t' else: separator = options.separator DM = GMDataManager() DM.dumpData(options.dbname, fields, options.outfile, separator, not options.no_headers) return 0
def parseOptions(self, options): timer = gtime.TimeKeeper() if (options.subparser_name == 'parse'): # parse raw input print "*******************************************************************************" print " [[GroopM %s]] Running in data parsing mode..." % self.GMVersion print "*******************************************************************************" # check this here: if len(options.bamfiles) < 3: print "Sorry, You must supply at least 3 bamFiles to use GroopM. (You supplied %d)\n Exiting..." % len( options.bamfiles) return GMdata = mstore.GMDataManager() success = GMdata.createDB(options.bamfiles, options.reference, options.dbname, options.cutoff, timer, force=options.force, threads=options.threads) if not success: print options.dbname, "not updated" elif (options.subparser_name == 'core'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in core creation mode..." % self.GMVersion print "*******************************************************************************" CE = cluster.ClusterEngine(options.dbname, timer, force=options.force, finalPlot=options.plot, plot=options.multiplot, minSize=options.size, minVol=options.bp) if options.graphfile is None: gf = "" else: gf = options.graphfile CE.makeCores(coreCut=options.cutoff, gf=gf) elif (options.subparser_name == 'refine'): # refine bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in core refining mode..." % self.GMVersion print "*******************************************************************************" bids = [] #if options.bids is not None: # bids = options.bids auto = options.auto transform = True ^ options.no_transform RE = refine.RefineEngine(timer, dbFileName=options.dbname, transform=transform, bids=bids, loadContigNames=True) if options.plot: pfx = "REFINED" else: pfx = "" print "Refine bins" RE.refineBins(timer, auto=auto, saveBins=True, plotFinal=pfx) elif (options.subparser_name == 'recruit'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in bin expansion mode..." % self.GMVersion print "*******************************************************************************" RE = refine.RefineEngine(timer, dbFileName=options.dbname, getUnbinned=True, loadContigNames=False, cutOff=options.cutoff) RE.recruitWrapper(timer, inclusivity=options.inclusivity, step=options.step, saveBins=True) elif (options.subparser_name == 'extract'): # Extract data print "*******************************************************************************" print " [[GroopM %s]] Running in '%s' extraction mode..." % ( self.GMVersion, options.mode) print "*******************************************************************************" bids = [] if options.bids is not None: bids = options.bids BX = groopmUtils.GMExtractor(options.dbname, bids=bids, folder=options.out_folder) if (options.mode == 'contigs'): BX.extractContigs(timer, fasta=options.data, prefix=options.prefix, cutoff=options.cutoff) elif (options.mode == 'reads'): BX.extractReads(timer, bams=options.data, prefix=options.prefix, mixBams=options.mix_bams, mixGroups=options.mix_groups, mixReads=options.mix_reads, interleaved=options.interleave, bigFile=options.no_gzip, headersOnly=options.headers_only, minMapQual=options.mapping_quality, maxMisMatches=options.max_distance, useSuppAlignments=options.use_supplementary, useSecondaryAlignments=options.use_secondary, verbose=options.verbose, threads=options.threads) else: raise ExtractModeNotAppropriateException("mode: " + options.mode + " is unknown") elif (options.subparser_name == 'merge'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in bin merging mode..." % self.GMVersion print "*******************************************************************************" BM = binManager.BinManager(dbFileName=options.dbname) BM.loadBins(timer, makeBins=True, silent=False) BM.merge(options.bids, options.force, saveBins=True) elif (options.subparser_name == 'split'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in bin splitting mode..." % self.GMVersion print "*******************************************************************************" BM = binManager.BinManager(dbFileName=options.dbname) BM.loadBins(timer, makeBins=True, silent=False) BM.split(options.bid, options.parts, mode=options.mode, saveBins=True, auto=options.force) elif (options.subparser_name == 'delete'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in bin deleting mode..." % self.GMVersion print "*******************************************************************************" BM = binManager.BinManager(dbFileName=options.dbname) BM.loadBins(timer, makeBins=True, silent=True) #, bids=options.bids) BM.deleteBins(options.bids, force=options.force, saveBins=True, freeBinnedRowIndices=True) elif (options.subparser_name == 'plot'): print "*******************************************************************************" print " [[GroopM %s]] Running in bin plotting mode..." % self.GMVersion print "*******************************************************************************" BM = binManager.BinManager(dbFileName=options.dbname) if options.bids is None: bids = [] else: bids = options.bids BM.loadBins(timer, makeBins=True, silent=False, bids=bids, loadContigNames=False) BM.setColorMap(options.cm) BM.plotBins(FNPrefix=options.tag, plotEllipsoid=True, ignoreContigLengths=options.points, folder=options.folder) elif (options.subparser_name == 'explore'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in bin '%s' explorer mode..." % ( self.GMVersion, options.mode) print "*******************************************************************************" transform = True ^ options.no_transform bids = [] if options.bids is not None: bids = options.bids BE = groopmUtils.BinExplorer(options.dbname, bids=bids, transform=transform, cmstring=options.cm, ignoreContigLengths=options.points) if (options.mode == 'binpoints'): BE.plotPoints(timer) elif (options.mode == 'binids'): BE.plotIds(timer) elif (options.mode == 'allcontigs'): BE.plotContigs(timer, coreCut=options.cutoff, all=True) elif (options.mode == 'unbinnedcontigs'): BE.plotUnbinned(timer, coreCut=options.cutoff) elif (options.mode == 'binnedcontigs'): BE.plotContigs(timer, coreCut=options.cutoff) elif (options.mode == 'binassignments'): BE.plotBinAssignents(timer, coreCut=options.cutoff) elif (options.mode == 'compare'): BE.plotCompare(timer, coreCut=options.cutoff) elif (options.mode == 'together'): BE.plotTogether(timer, coreCut=options.cutoff, doMers=options.kmers) elif (options.mode == 'sidebyside'): BE.plotSideBySide(timer, coreCut=options.cutoff) else: print "**Error: unknown mode:", options.mode elif (options.subparser_name == 'flyover'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Making a flyover..." % self.GMVersion print "*******************************************************************************" bids = [] if options.bids is not None: bids = options.bids BE = groopmUtils.BinExplorer(options.dbname, bids=bids, transform=True, ignoreContigLengths=options.points) BE.plotFlyOver(timer, fps=options.fps, totalTime=options.totalTime, percentFade=options.firstFade, prefix=options.prefix, showColorbar=options.colorbar, title=options.title, coreCut=options.cutoff, format=options.format) elif (options.subparser_name == 'highlight'): # make bin cores print "*******************************************************************************" print " [[GroopM %s]] Running in highlighter mode..." % self.GMVersion print "*******************************************************************************" bids = [] if options.bids is not None: bids = options.bids BE = groopmUtils.BinExplorer(options.dbname, bids=bids, binLabelsFile=options.binlabels, contigColorsFile=options.contigcolors, ignoreContigLengths=options.points) BE.plotHighlights(timer, options.elevation, options.azimuth, options.file, options.filetype, options.dpi, drawRadius=options.radius, show=options.show, coreCut=options.cutoff, testing=options.place) elif (options.subparser_name == 'print'): BM = binManager.BinManager(dbFileName=options.dbname) bids = [] if options.bids is not None: bids = options.bids BM.loadBins(timer, getUnbinned=options.unbinned, makeBins=True, silent=True, bids=bids) BM.printBins(options.format, fileName=options.outfile) elif (options.subparser_name == 'dump'): print "*******************************************************************************" print " [[GroopM %s]] Running in data dumping mode..." % self.GMVersion print "*******************************************************************************" # prep fields. Do this first cause users are mot likely to # mess this part up! allowable_fields = [ 'names', 'mers', 'gc', 'coverage', 'tcoverage', 'ncoverage', 'lengths', 'bins', 'all' ] fields = options.fields.split(',') for field in fields: if field not in allowable_fields: print "ERROR: field '%s' not recognised. Allowable fields are:" % field print '\t', ",".join(allowable_fields) return if options.separator == '\\t': separator = '\t' else: separator = options.separator DM = GMDataManager() DM.dumpData(options.dbname, fields, options.outfile, separator, not options.no_headers) return 0
class ProfileManager: """Interacts with the groopm DataManager and local data fields Mostly a wrapper around a group of numpy arrays and a pytables quagmire """ def __init__(self, dbFileName, force=False, scaleFactor=1000): # data self.dataManager = GMDataManager() # most data is saved to hdf self.dbFileName = dbFileName # db containing all the data we'd like to use self.condition = "" # condition will be supplied at loading time # --> NOTE: ALL of the arrays in this section are in sync # --> each one holds information for an individual contig self.indices = np_array( []) # indices into the data structure based on condition self.covProfiles = np_array([]) # coverage based coordinates self.transformedCP = np_array([]) # the munged data points self.corners = np_array([]) # the corners of the tranformed space self.TCentre = 0. # the centre of the coverage space self.transRadius = 0. # distance from corner to centre of transformed space self.averageCoverages = np_array( []) # average coverage across all stoits self.normCoverages = np_array([]) # norm of the raw coverage vectors self.kmerSigs = np_array([]) # raw kmer signatures self.kmerNormPC1 = np_array( []) # First PC of kmer sigs normalized to [0, 1] self.kmerPCs = np_array( []) # PCs of kmer sigs capturing specified variance self.kmerVarPC = np_array([]) # variance of each PC self.stoitColNames = np_array([]) self.contigNames = np_array([]) self.contigLengths = np_array([]) self.contigGCs = np_array([]) self.colorMapGC = None self.binIds = np_array([]) # list of bin IDs # --> end section # meta self.validBinIds = {} # valid bin ids -> numMembers self.isLikelyChimeric = { } # indicates if a bin is likely to be chimeric self.binnedRowIndices = { } # dictionary of those indices which belong to some bin self.restrictedRowIndices = { } # dictionary of those indices which can not be binned yet self.numContigs = 0 # this depends on the condition given self.numStoits = 0 # this depends on the data which was parsed # contig links self.links = {} # misc self.forceWriting = force # overwrite existng values silently? self.scaleFactor = scaleFactor # scale every thing in the transformed data to this dimension def loadData( self, timer, condition, # condition as set by another function bids=[], # if this is set then only load those contigs with these bin ids verbose=True, # many to some output messages silent=False, # some to no output messages loadCovProfiles=True, loadKmerPCs=True, loadKmerVarPC=True, loadRawKmers=False, makeColors=True, loadContigNames=True, loadContigLengths=True, loadContigGCs=True, loadBins=False, loadLinks=False): """Load pre-parsed data""" timer.getTimeStamp() if (silent): verbose = False if verbose: print("Loading data from:", self.dbFileName) try: self.numStoits = self.getNumStoits() self.condition = condition self.indices = self.dataManager.getConditionalIndices( self.dbFileName, condition=condition, silent=silent) if (verbose): print(" Loaded indices with condition:", condition) self.numContigs = len(self.indices) if self.numContigs == 0: print(" ERROR: No contigs loaded using condition:", condition) return if (not silent): print(" Working with: %d contigs" % self.numContigs) if (loadCovProfiles): if (verbose): print(" Loading coverage profiles") self.covProfiles = self.dataManager.getCoverageProfiles( self.dbFileName, indices=self.indices) self.normCoverages = self.dataManager.getNormalisedCoverageProfiles( self.dbFileName, indices=self.indices) # work out average coverages self.averageCoverages = np_array( [sum(i) / self.numStoits for i in self.covProfiles]) if loadRawKmers: if (verbose): print(" Loading RAW kmer sigs") self.kmerSigs = self.dataManager.getKmerSigs( self.dbFileName, indices=self.indices) if (loadKmerPCs): self.kmerPCs = self.dataManager.getKmerPCAs( self.dbFileName, indices=self.indices) if (verbose): print(" Loading PCA kmer sigs (" + str(len(self.kmerPCs[0])) + " dimensional space)") self.kmerNormPC1 = np_copy(self.kmerPCs[:, 0]) self.kmerNormPC1 -= np_min(self.kmerNormPC1) self.kmerNormPC1 /= np_max(self.kmerNormPC1) if (loadKmerVarPC): self.kmerVarPC = self.dataManager.getKmerVarPC( self.dbFileName, indices=self.indices) if (verbose): print( " Loading PCA kmer variance (total variance: %.2f" % np_sum(self.kmerVarPC) + ")") if (loadContigNames): if (verbose): print(" Loading contig names") self.contigNames = self.dataManager.getContigNames( self.dbFileName, indices=self.indices) if (loadContigLengths): self.contigLengths = self.dataManager.getContigLengths( self.dbFileName, indices=self.indices) if (verbose): print(" Loading contig lengths (Total: %d BP)" % (sum(self.contigLengths))) if (loadContigGCs): self.contigGCs = self.dataManager.getContigGCs( self.dbFileName, indices=self.indices) if (verbose): print(" Loading contig GC ratios (Average GC: %0.3f)" % (np_mean(self.contigGCs))) if (makeColors): if (verbose): print(" Creating color map") # use HSV to RGB to generate colors S = 1 # SAT and VAL remain fixed at 1. Reduce to make V = 1 # Pastels if that's your preference... self.colorMapGC = self.createColorMapHSV() if (loadBins): if (verbose): print(" Loading bin assignments") self.binIds = self.dataManager.getBins(self.dbFileName, indices=self.indices) if len( bids ) != 0: # need to make sure we're not restricted in terms of bins bin_stats = self.getBinStats() for bid in bids: try: self.validBinIds[bid] = bin_stats[bid][0] self.isLikelyChimeric[bid] = bin_stats[bid][1] except KeyError: self.validBinIds[bid] = 0 self.isLikelyChimeric[bid] = False else: bin_stats = self.getBinStats() for bid in bin_stats: self.validBinIds[bid] = bin_stats[bid][0] self.isLikelyChimeric[bid] = bin_stats[bid][1] # fix the binned indices self.binnedRowIndices = {} for i in range(len(self.indices)): if (self.binIds[i] != 0): self.binnedRowIndices[i] = True else: # we need zeros as bin indicies then... self.binIds = np_zeros(len(self.indices)) if (loadLinks): self.loadLinks() self.stoitColNames = self.getStoitColNames() except: print("Error loading DB:", self.dbFileName, exc_info()[0]) raise def reduceIndices(self, deadRowIndices): """purge indices from the data structures Be sure that deadRowIndices are sorted ascending """ # strip out the other values self.indices = np_delete(self.indices, deadRowIndices, axis=0) self.covProfiles = np_delete(self.covProfiles, deadRowIndices, axis=0) self.transformedCP = np_delete(self.transformedCP, deadRowIndices, axis=0) self.contigNames = np_delete(self.contigNames, deadRowIndices, axis=0) self.contigLengths = np_delete(self.contigLengths, deadRowIndices, axis=0) self.contigGCs = np_delete(self.contigGCs, deadRowIndices, axis=0) #self.kmerSigs = np_delete(self.kmerSigs, deadRowIndices, axis=0) self.kmerPCs = np_delete(self.kmerPCs, deadRowIndices, axis=0) self.binIds = np_delete(self.binIds, deadRowIndices, axis=0) #------------------------------------------------------------------------------ # GET / SET def getNumStoits(self): """return the value of numStoits in the metadata tables""" return self.dataManager.getNumStoits(self.dbFileName) def getMerColNames(self): """return the value of merColNames in the metadata tables""" return self.dataManager.getMerColNames(self.dbFileName) def getMerSize(self): """return the value of merSize in the metadata tables""" return self.dataManager.getMerSize(self.dbFileName) def getNumMers(self): """return the value of numMers in the metadata tables""" return self.dataManager.getNumMers(self.dbFileName) ### USE the member vars instead! # def getNumCons(self): # """return the value of numCons in the metadata tables""" # return self.dataManager.getNumCons(self.dbFileName) def getNumBins(self): """return the value of numBins in the metadata tables""" return self.dataManager.getNumBins(self.dbFileName) def setNumBins(self, numBins): """set the number of bins""" self.dataManager.setNumBins(self.dbFileName, numBins) def getStoitColNames(self): """return the value of stoitColNames in the metadata tables""" return np_array( self.dataManager.getStoitColNames(self.dbFileName).split(",")) def isClustered(self): """Has the data been clustered already""" return self.dataManager.isClustered(self.dbFileName) def setClustered(self): """Save that the db has been clustered""" self.dataManager.setClustered(self.dbFileName, True) def isComplete(self): """Has the data been *completely* clustered already""" return self.dataManager.isComplete(self.dbFileName) def setComplete(self): """Save that the db has been completely clustered""" self.dataManager.setComplete(self.dbFileName, True) def getBinStats(self): """Go through all the "bins" array and make a list of unique bin ids vs number of contigs""" return self.dataManager.getBinStats(self.dbFileName) def setBinStats(self, binStats): """Store the valid bin Ids and number of members binStats is a list of tuples which looks like: [ (bid, numMembers, isLikelyChimeric) ] Note that this call effectively nukes the existing table """ self.dataManager.setBinStats(self.dbFileName, binStats) self.setNumBins(len(binStats)) def setBinAssignments(self, assignments, nuke=False): """Save our bins into the DB""" self.dataManager.setBinAssignments(self.dbFileName, assignments, nuke=nuke) def loadLinks(self): """Extra wrapper 'cause I am dumb""" self.links = self.getLinks() def getLinks(self): """Get contig links""" # first we get the absolute links absolute_links = self.dataManager.restoreLinks(self.dbFileName, self.indices) # now convert this into plain old row_indices reverse_index_lookup = {} for i in range(len(self.indices)): reverse_index_lookup[self.indices[i]] = i # now convert the absolute links to local ones relative_links = {} for cid in self.indices: local_cid = reverse_index_lookup[cid] relative_links[local_cid] = [] try: for link in absolute_links[cid]: relative_links[local_cid].append([ reverse_index_lookup[link[0]], link[1], link[2], link[3] ]) except KeyError: # not everyone is linked pass return relative_links #------------------------------------------------------------------------------ # DATA TRANSFORMATIONS def getAverageCoverage(self, rowIndex): """Return the average coverage for this contig across all stoits""" return sum(self.transformedCP[rowIndex]) / self.numStoits def shuffleBAMs(self): """Make the data transformation deterministic by reordering the bams""" # first we should make a subset of the total data # we'd like to take it down to about 1500 or so RI's # but we'd like to do this in a repeatable way ideal_contig_num = 1500 sub_cons = range(len(self.indices)) while len(sub_cons) > ideal_contig_num: # select every second contig when sorted by norm cov cov_sorted = np_argsort(self.normCoverages[sub_cons]) sub_cons = np_array([ sub_cons[cov_sorted[i * 2]] for i in np_arange(int(len(sub_cons) / 2)) ]) if len(sub_cons) > ideal_contig_num: # select every second contig when sorted by mer PC1 mer_sorted = np_argsort(self.kmerNormPC1[sub_cons]) sub_cons = np_array([ sub_cons[mer_sorted[i * 2]] for i in np_arange(int(len(sub_cons) / 2)) ]) # now that we have a subset, calculate the distance between each of the untransformed vectors num_sc = len(sub_cons) # log shift the coverages towards the origin sub_covs = np_transpose([ self.covProfiles[i] * (np_log10(self.normCoverages[i]) / self.normCoverages[i]) for i in sub_cons ]) sq_dists = cdist(sub_covs, sub_covs, 'cityblock') dists = squareform(sq_dists) # initialise a list of left, right neighbours lr_dict = {} for i in range(self.numStoits): lr_dict[i] = [] too_big = 10000 while True: closest = np_argmin(dists) if dists[closest] == too_big: break (i, j) = self.small2indices(closest, self.numStoits - 1) lr_dict[j].append(i) lr_dict[i].append(j) # mark these guys as neighbours if len(lr_dict[i]) == 2: # no more than 2 neighbours sq_dists[i, :] = too_big sq_dists[:, i] = too_big sq_dists[i, i] = 0.0 if len(lr_dict[j]) == 2: # no more than 2 neighbours sq_dists[j, :] = too_big sq_dists[:, j] = too_big sq_dists[j, j] = 0.0 # fix the dist matrix sq_dists[j, i] = too_big sq_dists[i, j] = too_big dists = squareform(sq_dists) # now make the ordering ordering = [0, lr_dict[0][0]] done = 2 while done < self.numStoits: last = ordering[done - 1] if lr_dict[last][0] == ordering[done - 2]: ordering.append(lr_dict[last][1]) last = lr_dict[last][1] else: ordering.append(lr_dict[last][0]) last = lr_dict[last][0] done += 1 # reshuffle the contig order! # yay for bubble sort! working = np_arange(self.numStoits) for i in range(1, self.numStoits): # where is this guy in the list loc = list(working).index(ordering[i]) if loc != i: # swap the columns self.covProfiles[:, [i, loc]] = self.covProfiles[:, [loc, i]] self.stoitColNames[[i, loc]] = self.stoitColNames[[loc, i]] working[[i, loc]] = working[[loc, i]] def transformCP(self, timer, silent=False, nolog=False): """Do the main transformation on the coverage profile data""" if (not silent): print(" Reticulating splines") self.transformedCP = self.dataManager.getTransformedCoverageProfiles( self.dbFileName, indices=self.indices) self.corners = self.dataManager.getTransformedCoverageCorners( self.dbFileName) self.TCentre = np_mean(self.corners, axis=0) self.transRadius = np_norm(self.corners[0] - self.TCentre) #------------------------------------------------------------------------------ # DEBUG CRUFT def rewriteBins(self): """rewrite the bins table in hdf5 based on numbers in meta-contigs""" bins = self.dataManager.getBins(self.dbFileName) bin_store = {} for c in bins: if c != 0: try: bin_store[c] += 1 except KeyError: bin_store[c] = 1 bin_stats = [] for bid in bin_store: # [(bid, size, likelyChimeric)] bin_stats.append((bid, bin_store[bid], False)) self.setBinStats(bin_stats) #------------------------------------------------------------------------------ # IO and IMAGE RENDERING def createColorMapHSV(self): S = 1.0 V = 1.0 return LinearSegmentedColormap.from_list('GC', [ htr((1.0 + np_sin(np_pi * (val / 1000.0) - np_pi / 2)) / 2., S, V) for val in xrange(0, 1000) ], N=1000) def setColorMap(self, colorMapStr): if colorMapStr == 'HSV': S = 1 V = 1 self.colorMapGC = self.createColorMapHSV() elif colorMapStr == 'Accent': self.colorMapGC = get_cmap('Accent') elif colorMapStr == 'Blues': self.colorMapGC = get_cmap('Blues') elif colorMapStr == 'Spectral': self.colorMapGC = get_cmap('spectral') elif colorMapStr == 'Grayscale': self.colorMapGC = get_cmap('gist_yarg') elif colorMapStr == 'Discrete': discrete_map = [(0, 0, 0)] discrete_map.append((0, 0, 0)) discrete_map.append((0, 0, 0)) discrete_map.append((0, 0, 0)) discrete_map.append((141 / 255.0, 211 / 255.0, 199 / 255.0)) discrete_map.append((255 / 255.0, 255 / 255.0, 179 / 255.0)) discrete_map.append((190 / 255.0, 186 / 255.0, 218 / 255.0)) discrete_map.append((251 / 255.0, 128 / 255.0, 114 / 255.0)) discrete_map.append((128 / 255.0, 177 / 255.0, 211 / 255.0)) discrete_map.append((253 / 255.0, 180 / 255.0, 98 / 255.0)) discrete_map.append((179 / 255.0, 222 / 255.0, 105 / 255.0)) discrete_map.append((252 / 255.0, 205 / 255.0, 229 / 255.0)) discrete_map.append((217 / 255.0, 217 / 255.0, 217 / 255.0)) discrete_map.append((188 / 255.0, 128 / 255.0, 189 / 255.0)) discrete_map.append((204 / 255.0, 235 / 255.0, 197 / 255.0)) discrete_map.append((255 / 255.0, 237 / 255.0, 111 / 255.0)) discrete_map.append((1, 1, 1)) discrete_map.append((0, 0, 0)) discrete_map.append((0, 0, 0)) discrete_map.append((0, 0, 0)) self.colorMapGC = LinearSegmentedColormap.from_list('GC_DISCRETE', discrete_map, N=20) elif colorMapStr == 'DiscretePaired': discrete_map = [(0, 0, 0)] discrete_map.append((0, 0, 0)) discrete_map.append((0, 0, 0)) discrete_map.append((0, 0, 0)) discrete_map.append((166 / 255.0, 206 / 255.0, 227 / 255.0)) discrete_map.append((31 / 255.0, 120 / 255.0, 180 / 255.0)) discrete_map.append((178 / 255.0, 223 / 255.0, 138 / 255.0)) discrete_map.append((51 / 255.0, 160 / 255.0, 44 / 255.0)) discrete_map.append((251 / 255.0, 154 / 255.0, 153 / 255.0)) discrete_map.append((227 / 255.0, 26 / 255.0, 28 / 255.0)) discrete_map.append((253 / 255.0, 191 / 255.0, 111 / 255.0)) discrete_map.append((255 / 255.0, 127 / 255.0, 0 / 255.0)) discrete_map.append((202 / 255.0, 178 / 255.0, 214 / 255.0)) discrete_map.append((106 / 255.0, 61 / 255.0, 154 / 255.0)) discrete_map.append((255 / 255.0, 255 / 255.0, 179 / 255.0)) discrete_map.append((217 / 255.0, 95 / 255.0, 2 / 255.0)) discrete_map.append((1, 1, 1)) discrete_map.append((0, 0, 0)) discrete_map.append((0, 0, 0)) discrete_map.append((0, 0, 0)) self.colorMapGC = LinearSegmentedColormap.from_list('GC_DISCRETE', discrete_map, N=20) def plotStoitNames(self, ax): """Plot stoit names on an existing axes""" outer_index = 0 for corner in self.corners: ax.text(corner[0], corner[1], corner[2], self.stoitColNames[outer_index], color='#000000') outer_index += 1 def plotUnbinned(self, timer, coreCut, transform=True, ignoreContigLengths=False): """Plot all contigs over a certain length which are unbinned""" self.loadData(timer, "((length >= " + str(coreCut) + ") & (bid == 0))") if transform: self.transformCP(timer) else: if self.numStoits == 3: self.transformedCP = self.covProfiles else: print("Number of stoits != 3. You need to transform") self.transformCP(timer) fig = plt.figure() ax1 = fig.add_subplot(111, projection='3d') if ignoreContigLengths: sc = ax1.scatter(self.transformedCP[:, 0], self.transformedCP[:, 1], self.transformedCP[:, 2], edgecolors='none', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, s=10, marker='.') else: sc = ax1.scatter(self.transformedCP[:, 0], self.transformedCP[:, 1], self.transformedCP[:, 2], edgecolors='k', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, s=np_sqrt(self.contigLengths), marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args: None # disable depth transparency effect self.plotStoitNames(ax1) try: plt.show() plt.close(fig) except: print("Error showing image", exc_info()[0]) raise del fig def plotAll(self, timer, coreCut, transform=True, ignoreContigLengths=False): """Plot all contigs over a certain length which are unbinned""" self.loadData(timer, "((length >= " + str(coreCut) + "))") if transform: self.transformCP(timer) else: if self.numStoits == 3: self.transformedCP = self.covProfiles else: print("Number of stoits != 3. You need to transform") self.transformCP(timer) fig = plt.figure() ax1 = fig.add_subplot(111, projection='3d') if ignoreContigLengths: sc = ax1.scatter(self.transformedCP[:, 0], self.transformedCP[:, 1], self.transformedCP[:, 2], edgecolors='none', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, marker='.', s=10.) else: sc = ax1.scatter(self.transformedCP[:, 0], self.transformedCP[:, 1], self.transformedCP[:, 2], edgecolors='k', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, marker='.', s=np_sqrt(self.contigLengths)) sc.set_edgecolors = sc.set_facecolors = lambda *args: None # disable depth transparency effect self.plotStoitNames(ax1) cbar = plt.colorbar(sc, shrink=0.5) cbar.ax.tick_params() cbar.ax.set_title("% GC", size=10) cbar.set_ticks([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]) #import IPython; IPython.embed() cbar.ax.set_ylim([0.15, 0.85]) mungeCbar(cbar) try: plt.show() plt.close(fig) except: print("Error showing image", exc_info()[0]) raise del fig def plotTransViews(self, tag="fordens"): """Plot top, side and front views of the transformed data""" self.renderTransData(tag + "_top.png", azim=0, elev=90) self.renderTransData(tag + "_front.png", azim=0, elev=0) self.renderTransData(tag + "_side.png", azim=90, elev=0) def renderTransCPData(self, fileName="", show=True, elev=45, azim=45, all=False, showAxis=False, primaryWidth=12, primarySpace=3, dpi=300, format='png', fig=None, highlight=None, restrictedBids=[], alpha=1, ignoreContigLengths=False): """Plot transformed data in 3D""" del_fig = False if (fig is None): fig = plt.figure() del_fig = True else: plt.clf() if (all): myAXINFO = { 'x': { 'i': 0, 'tickdir': 1, 'juggled': (1, 0, 2), 'color': (0, 0, 0, 0, 0) }, 'y': { 'i': 1, 'tickdir': 0, 'juggled': (0, 1, 2), 'color': (0, 0, 0, 0, 0) }, 'z': { 'i': 2, 'tickdir': 0, 'juggled': (0, 2, 1), 'color': (0, 0, 0, 0, 0) }, } ax = fig.add_subplot(131, projection='3d') sc = ax.scatter(self.transformedCP[:, 0], self.transformedCP[:, 1], self.transformedCP[:, 2], edgecolors='k', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args: None # disable depth transparency effect ax.azim = 0 ax.elev = 0 ax.set_xlim3d(0, self.scaleFactor) ax.set_ylim3d(0, self.scaleFactor) ax.set_zlim3d(0, self.scaleFactor) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) for axis in ax.w_xaxis, ax.w_yaxis, ax.w_zaxis: for elt in axis.get_ticklines() + axis.get_ticklabels(): elt.set_visible(False) ax.w_xaxis._AXINFO = myAXINFO ax.w_yaxis._AXINFO = myAXINFO ax.w_zaxis._AXINFO = myAXINFO ax = fig.add_subplot(132, projection='3d') sc = ax.scatter(self.transformedCP[:, 0], self.transformedCP[:, 1], self.transformedCP[:, 2], edgecolors='k', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args: None # disable depth transparency effect ax.azim = 90 ax.elev = 0 ax.set_xlim3d(0, self.scaleFactor) ax.set_ylim3d(0, self.scaleFactor) ax.set_zlim3d(0, self.scaleFactor) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) for axis in ax.w_xaxis, ax.w_yaxis, ax.w_zaxis: for elt in axis.get_ticklines() + axis.get_ticklabels(): elt.set_visible(False) ax.w_xaxis._AXINFO = myAXINFO ax.w_yaxis._AXINFO = myAXINFO ax.w_zaxis._AXINFO = myAXINFO ax = fig.add_subplot(133, projection='3d') sc = ax.scatter(self.transformedCP[:, 0], self.transformedCP[:, 1], self.transformedCP[:, 2], edgecolors='k', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args: None # disable depth transparency effect ax.azim = 0 ax.elev = 90 ax.set_xlim3d(0, self.scaleFactor) ax.set_ylim3d(0, self.scaleFactor) ax.set_zlim3d(0, self.scaleFactor) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) for axis in ax.w_xaxis, ax.w_yaxis, ax.w_zaxis: for elt in axis.get_ticklines() + axis.get_ticklabels(): elt.set_visible(False) ax.w_xaxis._AXINFO = myAXINFO ax.w_yaxis._AXINFO = myAXINFO ax.w_zaxis._AXINFO = myAXINFO else: ax = fig.add_subplot(111, projection='3d') if len(restrictedBids) == 0: if highlight is None: print("BF:", np_shape(self.transformedCP)) if ignoreContigLengths: sc = ax.scatter(self.transformedCP[:, 0], self.transformedCP[:, 1], self.transformedCP[:, 2], edgecolors='none', c=self.contigGCs, cmap=self.colorMapGC, s=10., vmin=0.0, vmax=1.0, marker='.') else: sc = ax.scatter(self.transformedCP[:, 0], self.transformedCP[:, 1], self.transformedCP[:, 2], edgecolors='none', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, s=np_sqrt(self.contigLengths), marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args: None # disable depth transparency effect else: #draw the opaque guys first """ sc = ax.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors='none', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, s=100., marker='s', alpha=alpha) sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect """ # now replot the highlighted guys disp_vals = np_array([]) disp_GCs = np_array([]) thrower = {} hide_vals = np_array([]) hide_GCs = np_array([]) num_points = 0 for bin in highlight: for row_index in bin.rowIndices: num_points += 1 disp_vals = np_append( disp_vals, self.transformedCP[row_index]) disp_GCs = np_append(disp_GCs, self.contigGCs[row_index]) thrower[row_index] = False # reshape disp_vals = np_reshape(disp_vals, (num_points, 3)) num_points = 0 for i in range(len(self.indices)): try: thrower[i] except KeyError: num_points += 1 hide_vals = np_append(hide_vals, self.transformedCP[i]) hide_GCs = np_append(hide_GCs, self.contigGCs[i]) # reshape hide_vals = np_reshape(hide_vals, (num_points, 3)) sc = ax.scatter(hide_vals[:, 0], hide_vals[:, 1], hide_vals[:, 2], edgecolors='none', c=hide_GCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, s=100., marker='s', alpha=alpha) sc.set_edgecolors = sc.set_facecolors = lambda *args: None # disable depth transparency effect sc = ax.scatter(disp_vals[:, 0], disp_vals[:, 1], disp_vals[:, 2], edgecolors='none', c=disp_GCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, s=10., marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args: None # disable depth transparency effect print(np_shape(disp_vals), np_shape(hide_vals), np_shape(self.transformedCP)) # render color bar cbar = plt.colorbar(sc, shrink=0.5) cbar.ax.tick_params() cbar.ax.set_title("% GC", size=10) cbar.set_ticks([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]) cbar.ax.set_ylim([0.15, 0.85]) mungeCbar(cbar) else: r_trans = np_array([]) r_cols = np_array([]) num_added = 0 for i in range(len(self.indices)): if self.binIds[i] not in restrictedBids: r_trans = np_append(r_trans, self.transformedCP[i]) r_cols = np_append(r_cols, self.contigGCs[i]) num_added += 1 r_trans = np_reshape(r_trans, (num_added, 3)) print(np_shape(r_trans)) #r_cols = np_reshape(r_cols, (num_added,3)) sc = ax.scatter(r_trans[:, 0], r_trans[:, 1], r_trans[:, 2], edgecolors='none', c=r_cols, cmap=self.colorMapGC, s=10., vmin=0.0, vmax=1.0, marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args: None # disable depth transparency effect # render color bar cbar = plt.colorbar(sc, shrink=0.5) cbar.ax.tick_params() cbar.ax.set_title("% GC", size=10) cbar.set_ticks([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]) cbar.ax.set_ylim([0.15, 0.85]) mungeCbar(cbar) ax.azim = azim ax.elev = elev ax.set_xlim3d(0, self.scaleFactor) ax.set_ylim3d(0, self.scaleFactor) ax.set_zlim3d(0, self.scaleFactor) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) if (not showAxis): ax.set_axis_off() if (fileName != ""): try: if (all): fig.set_size_inches(3 * primaryWidth + 2 * primarySpace, primaryWidth) else: fig.set_size_inches(primaryWidth, primaryWidth) plt.savefig(fileName, dpi=dpi, format=format) except: print("Error saving image", fileName, exc_info()[0]) raise elif (show): try: plt.show() except: print("Error showing image", exc_info()[0]) raise if del_fig: plt.close(fig) del fig ############################################################################### ############################################################################### ############################################################################### ############################################################################### def r2nderTransCPData( self, fig, alphaIndices=[], visibleIndices=[], alpha=1, ignoreContigLengths=False, elev=45, azim=45, fileName="", dpi=300, format='png', primaryWidth=6, title="", showAxis=False, showColorbar=True, ): """Plot transformed data in 3D""" # clear any existing plot plt.clf() ax = fig.add_subplot(111, projection='3d') # work out the coords an colours based on indices alpha_coords = self.transformedCP[alphaIndices] alpha_GCs = self.contigGCs[alphaIndices] visible_coords = self.transformedCP[visibleIndices] visible_GCs = self.contigGCs[visibleIndices] # lengths if needed if not ignoreContigLengths: alpha_lengths = self.contigLengths[alphaIndices] visible_lengths = self.contigLengths[visibleIndices] else: alpha_lengths = 10. visible_lengths = 10. # first plot alpha points if len(alpha_GCs) > 0: sc = ax.scatter(alpha_coords[:, 0], alpha_coords[:, 1], alpha_coords[:, 2], edgecolors='none', c=alpha_GCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, s=alpha_lengths, marker='.', alpha=alpha) sc.set_edgecolors = sc.set_facecolors = lambda *args: None # disable depth transparency effect # then plot full visible points if len(visible_GCs) > 0: sc = ax.scatter(visible_coords[:, 0], visible_coords[:, 1], visible_coords[:, 2], edgecolors='none', c=visible_GCs, cmap=self.colorMapGC, s=visible_lengths, vmin=0.0, vmax=1.0, marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args: None # disable depth transparency effect # render color bar if showColorbar: cbar = plt.colorbar(sc, shrink=0.5) cbar.ax.tick_params() cbar.ax.set_title("% GC", size=10) cbar.set_ticks([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]) cbar.ax.set_ylim([0.15, 0.85]) mungeCbar(cbar) # set aspect ax.azim = azim ax.elev = elev # make it purdy ax.set_xlim3d(0, self.scaleFactor) ax.set_ylim3d(0, self.scaleFactor) ax.set_zlim3d(0, self.scaleFactor) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) plt.tight_layout() if title != "": plt.title(title) if (not showAxis): ax.set_axis_off() if (fileName != ""): try: fig.set_size_inches(primaryWidth, primaryWidth) plt.savefig(fileName, dpi=dpi, format=format) except: print("Error saving image", fileName, exc_info()[0]) raise else: try: plt.show() except: print("Error showing image", exc_info()[0]) raise
class ProfileManager: """Interacts with the groopm DataManager and local data fields Mostly a wrapper around a group of numpy arrays and a pytables quagmire """ def __init__(self, dbFileName, force=False, scaleFactor=1000): # data self.dataManager = GMDataManager() # most data is saved to hdf self.dbFileName = dbFileName # db containing all the data we'd like to use self.condition = "" # condition will be supplied at loading time # --> NOTE: ALL of the arrays in this section are in sync # --> each one holds information for an individual contig self.indices = np_array([]) # indices into the data structure based on condition self.covProfiles = np_array([]) # coverage based coordinates self.transformedCP = np_array([]) # the munged data points self.corners = np_array([]) # the corners of the tranformed space self.TCentre = 0. # the centre of the coverage space self.transRadius = 0. # distance from corner to centre of transformed space self.averageCoverages = np_array([])# average coverage across all stoits self.normCoverages = np_array([]) # norm of the raw coverage vectors self.kmerSigs = np_array([]) # raw kmer signatures self.kmerNormPC1 = np_array([]) # First PC of kmer sigs normalized to [0, 1] self.kmerPCs = np_array([]) # PCs of kmer sigs capturing specified variance self.kmerVarPC = np_array([]) # variance of each PC self.stoitColNames = np_array([]) self.contigNames = np_array([]) self.contigLengths = np_array([]) self.contigGCs = np_array([]) self.colorMapGC = None self.binIds = np_array([]) # list of bin IDs # --> end section # meta self.validBinIds = {} # valid bin ids -> numMembers self.isLikelyChimeric = {} # indicates if a bin is likely to be chimeric self.binnedRowIndices = {} # dictionary of those indices which belong to some bin self.restrictedRowIndices = {} # dictionary of those indices which can not be binned yet self.numContigs = 0 # this depends on the condition given self.numStoits = 0 # this depends on the data which was parsed # contig links self.links = {} # misc self.forceWriting = force # overwrite existng values silently? self.scaleFactor = scaleFactor # scale every thing in the transformed data to this dimension def loadData(self, timer, condition, # condition as set by another function bids=[], # if this is set then only load those contigs with these bin ids verbose=True, # many to some output messages silent=False, # some to no output messages loadCovProfiles=True, loadKmerPCs=True, loadKmerVarPC=True, loadRawKmers=False, makeColors=True, loadContigNames=True, loadContigLengths=True, loadContigGCs=True, loadBins=False, loadLinks=False): """Load pre-parsed data""" timer.getTimeStamp() if(silent): verbose=False if verbose: print "Loading data from:", self.dbFileName try: self.numStoits = self.getNumStoits() self.condition = condition self.indices = self.dataManager.getConditionalIndices(self.dbFileName, condition=condition, silent=silent) if(verbose): print " Loaded indices with condition:", condition self.numContigs = len(self.indices) if self.numContigs == 0: print " ERROR: No contigs loaded using condition:", condition return if(not silent): print " Working with: %d contigs" % self.numContigs if(loadCovProfiles): if(verbose): print " Loading coverage profiles" self.covProfiles = self.dataManager.getCoverageProfiles(self.dbFileName, indices=self.indices) self.normCoverages = self.dataManager.getNormalisedCoverageProfiles(self.dbFileName, indices=self.indices) # work out average coverages self.averageCoverages = np_array([sum(i)/self.numStoits for i in self.covProfiles]) if loadRawKmers: if(verbose): print " Loading RAW kmer sigs" self.kmerSigs = self.dataManager.getKmerSigs(self.dbFileName, indices=self.indices) if(loadKmerPCs): self.kmerPCs = self.dataManager.getKmerPCAs(self.dbFileName, indices=self.indices) if(verbose): print " Loading PCA kmer sigs (" + str(len(self.kmerPCs[0])) + " dimensional space)" self.kmerNormPC1 = np_copy(self.kmerPCs[:,0]) self.kmerNormPC1 -= np_min(self.kmerNormPC1) self.kmerNormPC1 /= np_max(self.kmerNormPC1) if(loadKmerVarPC): self.kmerVarPC = self.dataManager.getKmerVarPC(self.dbFileName, indices=self.indices) if(verbose): print " Loading PCA kmer variance (total variance: %.2f" % np_sum(self.kmerVarPC) + ")" if(loadContigNames): if(verbose): print " Loading contig names" self.contigNames = self.dataManager.getContigNames(self.dbFileName, indices=self.indices) if(loadContigLengths): self.contigLengths = self.dataManager.getContigLengths(self.dbFileName, indices=self.indices) if(verbose): print " Loading contig lengths (Total: %d BP)" % ( sum(self.contigLengths) ) if(loadContigGCs): self.contigGCs = self.dataManager.getContigGCs(self.dbFileName, indices=self.indices) if(verbose): print " Loading contig GC ratios (Average GC: %0.3f)" % ( np_mean(self.contigGCs) ) if(makeColors): if(verbose): print " Creating color map" # use HSV to RGB to generate colors S = 1 # SAT and VAL remain fixed at 1. Reduce to make V = 1 # Pastels if that's your preference... self.colorMapGC = self.createColorMapHSV() if(loadBins): if(verbose): print " Loading bin assignments" self.binIds = self.dataManager.getBins(self.dbFileName, indices=self.indices) if len(bids) != 0: # need to make sure we're not restricted in terms of bins bin_stats = self.getBinStats() for bid in bids: try: self.validBinIds[bid] = bin_stats[bid][0] self.isLikelyChimeric[bid]= bin_stats[bid][1] except KeyError: self.validBinIds[bid] = 0 self.isLikelyChimeric[bid]= False else: bin_stats = self.getBinStats() for bid in bin_stats: self.validBinIds[bid] = bin_stats[bid][0] self.isLikelyChimeric[bid] = bin_stats[bid][1] # fix the binned indices self.binnedRowIndices = {} for i in range(len(self.indices)): if(self.binIds[i] != 0): self.binnedRowIndices[i] = True else: # we need zeros as bin indicies then... self.binIds = np_zeros(len(self.indices)) if(loadLinks): self.loadLinks() self.stoitColNames = self.getStoitColNames() except: print "Error loading DB:", self.dbFileName, exc_info()[0] raise def reduceIndices(self, deadRowIndices): """purge indices from the data structures Be sure that deadRowIndices are sorted ascending """ # strip out the other values self.indices = np_delete(self.indices, deadRowIndices, axis=0) self.covProfiles = np_delete(self.covProfiles, deadRowIndices, axis=0) self.transformedCP = np_delete(self.transformedCP, deadRowIndices, axis=0) self.contigNames = np_delete(self.contigNames, deadRowIndices, axis=0) self.contigLengths = np_delete(self.contigLengths, deadRowIndices, axis=0) self.contigGCs = np_delete(self.contigGCs, deadRowIndices, axis=0) #self.kmerSigs = np_delete(self.kmerSigs, deadRowIndices, axis=0) self.kmerPCs = np_delete(self.kmerPCs, deadRowIndices, axis=0) self.binIds = np_delete(self.binIds, deadRowIndices, axis=0) #------------------------------------------------------------------------------ # GET / SET def getNumStoits(self): """return the value of numStoits in the metadata tables""" return self.dataManager.getNumStoits(self.dbFileName) def getMerColNames(self): """return the value of merColNames in the metadata tables""" return self.dataManager.getMerColNames(self.dbFileName) def getMerSize(self): """return the value of merSize in the metadata tables""" return self.dataManager.getMerSize(self.dbFileName) def getNumMers(self): """return the value of numMers in the metadata tables""" return self.dataManager.getNumMers(self.dbFileName) ### USE the member vars instead! # def getNumCons(self): # """return the value of numCons in the metadata tables""" # return self.dataManager.getNumCons(self.dbFileName) def getNumBins(self): """return the value of numBins in the metadata tables""" return self.dataManager.getNumBins(self.dbFileName) def setNumBins(self, numBins): """set the number of bins""" self.dataManager.setNumBins(self.dbFileName, numBins) def getStoitColNames(self): """return the value of stoitColNames in the metadata tables""" return np_array(self.dataManager.getStoitColNames(self.dbFileName).split(",")) def isClustered(self): """Has the data been clustered already""" return self.dataManager.isClustered(self.dbFileName) def setClustered(self): """Save that the db has been clustered""" self.dataManager.setClustered(self.dbFileName, True) def isComplete(self): """Has the data been *completely* clustered already""" return self.dataManager.isComplete(self.dbFileName) def setComplete(self): """Save that the db has been completely clustered""" self.dataManager.setComplete(self.dbFileName, True) def getBinStats(self): """Go through all the "bins" array and make a list of unique bin ids vs number of contigs""" return self.dataManager.getBinStats(self.dbFileName) def setBinStats(self, binStats): """Store the valid bin Ids and number of members binStats is a list of tuples which looks like: [ (bid, numMembers, isLikelyChimeric) ] Note that this call effectively nukes the existing table """ self.dataManager.setBinStats(self.dbFileName, binStats) self.setNumBins(len(binStats)) def setBinAssignments(self, assignments, nuke=False): """Save our bins into the DB""" self.dataManager.setBinAssignments(self.dbFileName, assignments, nuke=nuke) def loadLinks(self): """Extra wrapper 'cause I am dumb""" self.links = self.getLinks() def getLinks(self): """Get contig links""" # first we get the absolute links absolute_links = self.dataManager.restoreLinks(self.dbFileName, self.indices) # now convert this into plain old row_indices reverse_index_lookup = {} for i in range(len(self.indices)): reverse_index_lookup[self.indices[i]] = i # now convert the absolute links to local ones relative_links = {} for cid in self.indices: local_cid = reverse_index_lookup[cid] relative_links[local_cid] = [] try: for link in absolute_links[cid]: relative_links[local_cid].append([reverse_index_lookup[link[0]], link[1], link[2], link[3]]) except KeyError: # not everyone is linked pass return relative_links #------------------------------------------------------------------------------ # DATA TRANSFORMATIONS def getAverageCoverage(self, rowIndex): """Return the average coverage for this contig across all stoits""" return sum(self.transformedCP[rowIndex])/self.numStoits def shuffleBAMs(self): """Make the data transformation deterministic by reordering the bams""" # first we should make a subset of the total data # we'd like to take it down to about 1500 or so RI's # but we'd like to do this in a repeatable way ideal_contig_num = 1500 sub_cons = range(len(self.indices)) while len(sub_cons) > ideal_contig_num: # select every second contig when sorted by norm cov cov_sorted = np_argsort(self.normCoverages[sub_cons]) sub_cons = np_array([sub_cons[cov_sorted[i*2]] for i in np_arange(int(len(sub_cons)/2))]) if len(sub_cons) > ideal_contig_num: # select every second contig when sorted by mer PC1 mer_sorted = np_argsort(self.kmerNormPC1[sub_cons]) sub_cons = np_array([sub_cons[mer_sorted[i*2]] for i in np_arange(int(len(sub_cons)/2))]) # now that we have a subset, calculate the distance between each of the untransformed vectors num_sc = len(sub_cons) # log shift the coverages towards the origin sub_covs = np_transpose([self.covProfiles[i]*(np_log10(self.normCoverages[i])/self.normCoverages[i]) for i in sub_cons]) sq_dists = cdist(sub_covs,sub_covs,'cityblock') dists = squareform(sq_dists) # initialise a list of left, right neighbours lr_dict = {} for i in range(self.numStoits): lr_dict[i] = [] too_big = 10000 while True: closest = np_argmin(dists) if dists[closest] == too_big: break (i,j) = self.small2indices(closest, self.numStoits-1) lr_dict[j].append(i) lr_dict[i].append(j) # mark these guys as neighbours if len(lr_dict[i]) == 2: # no more than 2 neighbours sq_dists[i,:] = too_big sq_dists[:,i] = too_big sq_dists[i,i] = 0.0 if len(lr_dict[j]) == 2: # no more than 2 neighbours sq_dists[j,:] = too_big sq_dists[:,j] = too_big sq_dists[j,j] = 0.0 # fix the dist matrix sq_dists[j,i] = too_big sq_dists[i,j] = too_big dists = squareform(sq_dists) # now make the ordering ordering = [0, lr_dict[0][0]] done = 2 while done < self.numStoits: last = ordering[done-1] if lr_dict[last][0] == ordering[done-2]: ordering.append(lr_dict[last][1]) last = lr_dict[last][1] else: ordering.append(lr_dict[last][0]) last = lr_dict[last][0] done+=1 # reshuffle the contig order! # yay for bubble sort! working = np_arange(self.numStoits) for i in range(1, self.numStoits): # where is this guy in the list loc = list(working).index(ordering[i]) if loc != i: # swap the columns self.covProfiles[:,[i,loc]] = self.covProfiles[:,[loc,i]] self.stoitColNames[[i,loc]] = self.stoitColNames[[loc,i]] working[[i,loc]] = working[[loc,i]] def transformCP(self, timer, silent=False, nolog=False): """Do the main transformation on the coverage profile data""" if(not silent): print " Reticulating splines" self.transformedCP = self.dataManager.getTransformedCoverageProfiles(self.dbFileName, indices=self.indices) self.corners = self.dataManager.getTransformedCoverageCorners(self.dbFileName) self.TCentre = np_mean(self.corners, axis=0) self.transRadius = np_norm(self.corners[0] - self.TCentre) #------------------------------------------------------------------------------ # DEBUG CRUFT def rewriteBins(self): """rewrite the bins table in hdf5 based on numbers in meta-contigs""" bins = self.dataManager.getBins(self.dbFileName) bin_store = {} for c in bins: if c != 0: try: bin_store[c] += 1 except KeyError: bin_store[c] = 1 bin_stats = [] for bid in bin_store: # [(bid, size, likelyChimeric)] bin_stats.append((bid, bin_store[bid], False)) self.setBinStats(bin_stats) #------------------------------------------------------------------------------ # IO and IMAGE RENDERING def createColorMapHSV(self): S = 1.0 V = 1.0 return LinearSegmentedColormap.from_list('GC', [htr((1.0 + np_sin(np_pi * (val/1000.0) - np_pi/2))/2., S, V) for val in xrange(0, 1000)], N=1000) def setColorMap(self, colorMapStr): if colorMapStr == 'HSV': S = 1 V = 1 self.colorMapGC = self.createColorMapHSV() elif colorMapStr == 'Accent': self.colorMapGC = get_cmap('Accent') elif colorMapStr == 'Blues': self.colorMapGC = get_cmap('Blues') elif colorMapStr == 'Spectral': self.colorMapGC = get_cmap('spectral') elif colorMapStr == 'Grayscale': self.colorMapGC = get_cmap('gist_yarg') elif colorMapStr == 'Discrete': discrete_map = [(0,0,0)] discrete_map.append((0,0,0)) discrete_map.append((0,0,0)) discrete_map.append((0,0,0)) discrete_map.append((141/255.0,211/255.0,199/255.0)) discrete_map.append((255/255.0,255/255.0,179/255.0)) discrete_map.append((190/255.0,186/255.0,218/255.0)) discrete_map.append((251/255.0,128/255.0,114/255.0)) discrete_map.append((128/255.0,177/255.0,211/255.0)) discrete_map.append((253/255.0,180/255.0,98/255.0)) discrete_map.append((179/255.0,222/255.0,105/255.0)) discrete_map.append((252/255.0,205/255.0,229/255.0)) discrete_map.append((217/255.0,217/255.0,217/255.0)) discrete_map.append((188/255.0,128/255.0,189/255.0)) discrete_map.append((204/255.0,235/255.0,197/255.0)) discrete_map.append((255/255.0,237/255.0,111/255.0)) discrete_map.append((1,1,1)) discrete_map.append((0,0,0)) discrete_map.append((0,0,0)) discrete_map.append((0,0,0)) self.colorMapGC = LinearSegmentedColormap.from_list('GC_DISCRETE', discrete_map, N=20) elif colorMapStr == 'DiscretePaired': discrete_map = [(0,0,0)] discrete_map.append((0,0,0)) discrete_map.append((0,0,0)) discrete_map.append((0,0,0)) discrete_map.append((166/255.0,206/255.0,227/255.0)) discrete_map.append((31/255.0,120/255.0,180/255.0)) discrete_map.append((178/255.0,223/255.0,138/255.0)) discrete_map.append((51/255.0,160/255.0,44/255.0)) discrete_map.append((251/255.0,154/255.0,153/255.0)) discrete_map.append((227/255.0,26/255.0,28/255.0)) discrete_map.append((253/255.0,191/255.0,111/255.0)) discrete_map.append((255/255.0,127/255.0,0/255.0)) discrete_map.append((202/255.0,178/255.0,214/255.0)) discrete_map.append((106/255.0,61/255.0,154/255.0)) discrete_map.append((255/255.0,255/255.0,179/255.0)) discrete_map.append((217/255.0,95/255.0,2/255.0)) discrete_map.append((1,1,1)) discrete_map.append((0,0,0)) discrete_map.append((0,0,0)) discrete_map.append((0,0,0)) self.colorMapGC = LinearSegmentedColormap.from_list('GC_DISCRETE', discrete_map, N=20) def plotStoitNames(self, ax): """Plot stoit names on an existing axes""" outer_index = 0 for corner in self.corners: ax.text(corner[0], corner[1], corner[2], self.stoitColNames[outer_index], color='#000000' ) outer_index += 1 def plotUnbinned(self, timer, coreCut, transform=True, ignoreContigLengths=False): """Plot all contigs over a certain length which are unbinned""" self.loadData(timer, "((length >= "+str(coreCut)+") & (bid == 0))") if transform: self.transformCP(timer) else: if self.numStoits == 3: self.transformedCP = self.covProfiles else: print "Number of stoits != 3. You need to transform" self.transformCP(timer) fig = plt.figure() ax1 = fig.add_subplot(111, projection='3d') if ignoreContigLengths: sc = ax1.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors='none', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, s=10, marker='.') else: sc = ax1.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors='k', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, s=np_sqrt(self.contigLengths), marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect self.plotStoitNames(ax1) try: plt.show() plt.close(fig) except: print "Error showing image", exc_info()[0] raise del fig def plotAll(self, timer, coreCut, transform=True, ignoreContigLengths=False): """Plot all contigs over a certain length which are unbinned""" self.loadData(timer, "((length >= "+str(coreCut)+"))") if transform: self.transformCP(timer) else: if self.numStoits == 3: self.transformedCP = self.covProfiles else: print "Number of stoits != 3. You need to transform" self.transformCP(timer) fig = plt.figure() ax1 = fig.add_subplot(111, projection='3d') if ignoreContigLengths: sc = ax1.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors='none', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, marker='.', s=10. ) else: sc = ax1.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors='k', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, marker='.', s=np_sqrt(self.contigLengths) ) sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect self.plotStoitNames(ax1) cbar = plt.colorbar(sc, shrink=0.5) cbar.ax.tick_params() cbar.ax.set_title("% GC", size=10) cbar.set_ticks([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]) #import IPython; IPython.embed() cbar.ax.set_ylim([0.15, 0.85]) mungeCbar(cbar) try: plt.show() plt.close(fig) except: print "Error showing image", exc_info()[0] raise del fig def plotTransViews(self, tag="fordens"): """Plot top, side and front views of the transformed data""" self.renderTransData(tag+"_top.png",azim = 0, elev = 90) self.renderTransData(tag+"_front.png",azim = 0, elev = 0) self.renderTransData(tag+"_side.png",azim = 90, elev = 0) def renderTransCPData(self, fileName="", show=True, elev=45, azim=45, all=False, showAxis=False, primaryWidth=12, primarySpace=3, dpi=300, format='png', fig=None, highlight=None, restrictedBids=[], alpha=1, ignoreContigLengths=False): """Plot transformed data in 3D""" del_fig = False if(fig is None): fig = plt.figure() del_fig = True else: plt.clf() if(all): myAXINFO = { 'x': {'i': 0, 'tickdir': 1, 'juggled': (1, 0, 2), 'color': (0, 0, 0, 0, 0)}, 'y': {'i': 1, 'tickdir': 0, 'juggled': (0, 1, 2), 'color': (0, 0, 0, 0, 0)}, 'z': {'i': 2, 'tickdir': 0, 'juggled': (0, 2, 1), 'color': (0, 0, 0, 0, 0)}, } ax = fig.add_subplot(131, projection='3d') sc = ax.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors='k', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect ax.azim = 0 ax.elev = 0 ax.set_xlim3d(0,self.scaleFactor) ax.set_ylim3d(0,self.scaleFactor) ax.set_zlim3d(0,self.scaleFactor) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) for axis in ax.w_xaxis, ax.w_yaxis, ax.w_zaxis: for elt in axis.get_ticklines() + axis.get_ticklabels(): elt.set_visible(False) ax.w_xaxis._AXINFO = myAXINFO ax.w_yaxis._AXINFO = myAXINFO ax.w_zaxis._AXINFO = myAXINFO ax = fig.add_subplot(132, projection='3d') sc = ax.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors='k', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect ax.azim = 90 ax.elev = 0 ax.set_xlim3d(0,self.scaleFactor) ax.set_ylim3d(0,self.scaleFactor) ax.set_zlim3d(0,self.scaleFactor) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) for axis in ax.w_xaxis, ax.w_yaxis, ax.w_zaxis: for elt in axis.get_ticklines() + axis.get_ticklabels(): elt.set_visible(False) ax.w_xaxis._AXINFO = myAXINFO ax.w_yaxis._AXINFO = myAXINFO ax.w_zaxis._AXINFO = myAXINFO ax = fig.add_subplot(133, projection='3d') sc = ax.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors='k', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect ax.azim = 0 ax.elev = 90 ax.set_xlim3d(0,self.scaleFactor) ax.set_ylim3d(0,self.scaleFactor) ax.set_zlim3d(0,self.scaleFactor) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) for axis in ax.w_xaxis, ax.w_yaxis, ax.w_zaxis: for elt in axis.get_ticklines() + axis.get_ticklabels(): elt.set_visible(False) ax.w_xaxis._AXINFO = myAXINFO ax.w_yaxis._AXINFO = myAXINFO ax.w_zaxis._AXINFO = myAXINFO else: ax = fig.add_subplot(111, projection='3d') if len(restrictedBids) == 0: if highlight is None: print "BF:", np_shape(self.transformedCP) if ignoreContigLengths: sc = ax.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors='none', c=self.contigGCs, cmap=self.colorMapGC, s=10., vmin=0.0, vmax=1.0, marker='.') else: sc = ax.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors='none', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, s=np_sqrt(self.contigLengths), marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect else: #draw the opaque guys first """ sc = ax.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors='none', c=self.contigGCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, s=100., marker='s', alpha=alpha) sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect """ # now replot the highlighted guys disp_vals = np_array([]) disp_GCs = np_array([]) thrower = {} hide_vals = np_array([]) hide_GCs = np_array([]) num_points = 0 for bin in highlight: for row_index in bin.rowIndices: num_points += 1 disp_vals = np_append(disp_vals, self.transformedCP[row_index]) disp_GCs = np_append(disp_GCs, self.contigGCs[row_index]) thrower[row_index] = False # reshape disp_vals = np_reshape(disp_vals, (num_points, 3)) num_points = 0 for i in range(len(self.indices)): try: thrower[i] except KeyError: num_points += 1 hide_vals = np_append(hide_vals, self.transformedCP[i]) hide_GCs = np_append(hide_GCs, self.contigGCs[i]) # reshape hide_vals = np_reshape(hide_vals, (num_points, 3)) sc = ax.scatter(hide_vals[:,0], hide_vals[:,1], hide_vals[:,2], edgecolors='none', c=hide_GCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, s=100., marker='s', alpha=alpha) sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect sc = ax.scatter(disp_vals[:,0], disp_vals[:,1], disp_vals[:,2], edgecolors='none', c=disp_GCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, s=10., marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect print np_shape(disp_vals), np_shape(hide_vals), np_shape(self.transformedCP) # render color bar cbar = plt.colorbar(sc, shrink=0.5) cbar.ax.tick_params() cbar.ax.set_title("% GC", size=10) cbar.set_ticks([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]) cbar.ax.set_ylim([0.15, 0.85]) mungeCbar(cbar) else: r_trans = np_array([]) r_cols=np_array([]) num_added = 0 for i in range(len(self.indices)): if self.binIds[i] not in restrictedBids: r_trans = np_append(r_trans, self.transformedCP[i]) r_cols = np_append(r_cols, self.contigGCs[i]) num_added += 1 r_trans = np_reshape(r_trans, (num_added,3)) print np_shape(r_trans) #r_cols = np_reshape(r_cols, (num_added,3)) sc = ax.scatter(r_trans[:,0], r_trans[:,1], r_trans[:,2], edgecolors='none', c=r_cols, cmap=self.colorMapGC, s=10., vmin=0.0, vmax=1.0, marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect # render color bar cbar = plt.colorbar(sc, shrink=0.5) cbar.ax.tick_params() cbar.ax.set_title("% GC", size=10) cbar.set_ticks([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]) cbar.ax.set_ylim([0.15, 0.85]) mungeCbar(cbar) ax.azim = azim ax.elev = elev ax.set_xlim3d(0,self.scaleFactor) ax.set_ylim3d(0,self.scaleFactor) ax.set_zlim3d(0,self.scaleFactor) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) if(not showAxis): ax.set_axis_off() if(fileName != ""): try: if(all): fig.set_size_inches(3*primaryWidth+2*primarySpace,primaryWidth) else: fig.set_size_inches(primaryWidth,primaryWidth) plt.savefig(fileName,dpi=dpi,format=format) except: print "Error saving image",fileName, exc_info()[0] raise elif(show): try: plt.show() except: print "Error showing image", exc_info()[0] raise if del_fig: plt.close(fig) del fig ############################################################################### ############################################################################### ############################################################################### ############################################################################### def r2nderTransCPData(self, fig, alphaIndices=[], visibleIndices=[], alpha=1, ignoreContigLengths=False, elev=45, azim=45, fileName="", dpi=300, format='png', primaryWidth=6, title="", showAxis=False, showColorbar=True,): """Plot transformed data in 3D""" # clear any existing plot plt.clf() ax = fig.add_subplot(111, projection='3d') # work out the coords an colours based on indices alpha_coords = self.transformedCP[alphaIndices] alpha_GCs = self.contigGCs[alphaIndices] visible_coords = self.transformedCP[visibleIndices] visible_GCs = self.contigGCs[visibleIndices] # lengths if needed if not ignoreContigLengths: alpha_lengths = self.contigLengths[alphaIndices] visible_lengths = self.contigLengths[visibleIndices] else: alpha_lengths = 10. visible_lengths = 10. # first plot alpha points if len(alpha_GCs) > 0: sc = ax.scatter(alpha_coords[:,0], alpha_coords[:,1], alpha_coords[:,2], edgecolors='none', c=alpha_GCs, cmap=self.colorMapGC, vmin=0.0, vmax=1.0, s=alpha_lengths, marker='.', alpha=alpha) sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect # then plot full visible points if len(visible_GCs) > 0: sc = ax.scatter(visible_coords[:,0], visible_coords[:,1], visible_coords[:,2], edgecolors='none', c=visible_GCs, cmap=self.colorMapGC, s=visible_lengths, vmin=0.0, vmax=1.0, marker='.') sc.set_edgecolors = sc.set_facecolors = lambda *args:None # disable depth transparency effect # render color bar if showColorbar: cbar = plt.colorbar(sc, shrink=0.5) cbar.ax.tick_params() cbar.ax.set_title("% GC", size=10) cbar.set_ticks([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]) cbar.ax.set_ylim([0.15, 0.85]) mungeCbar(cbar) # set aspect ax.azim = azim ax.elev = elev # make it purdy ax.set_xlim3d(0,self.scaleFactor) ax.set_ylim3d(0,self.scaleFactor) ax.set_zlim3d(0,self.scaleFactor) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) plt.tight_layout() if title != "": plt.title(title) if(not showAxis): ax.set_axis_off() if(fileName != ""): try: fig.set_size_inches(primaryWidth,primaryWidth) plt.savefig(fileName,dpi=dpi,format=format) except: print "Error saving image",fileName, exc_info()[0] raise else: try: plt.show() except: print "Error showing image", exc_info()[0] raise
class ProfileManager: """Interacts with the groopm DataManager and local data fields Mostly a wrapper around a group of numpy arrays and a pytables quagmire """ def __init__(self, dbFileName, force=False, scaleFactor=1000): # data self.dataManager = GMDataManager() # most data is saved to hdf self.dbFileName = dbFileName # db containing all the data we'd like to use self.condition = "" # condition will be supplied at loading time # --> NOTE: ALL of the arrays in this section are in sync # --> each one holds information for an individual contig self.indices = np_array([]) # indices into the data structure based on condition self.covProfiles = np_array([]) # coverage based coordinates self.transformedCP = np_array([]) # the munged data points self.averageCoverages = np_array([]) # average coverage across all stoits self.kmerSigs = np_array([]) # raw kmer signatures self.kmerVals = np_array([]) # PCA'd kmer sigs self.contigNames = np_array([]) self.contigLengths = np_array([]) self.contigColours = np_array([]) # calculated from kmerVals self.binIds = np_array([]) # list of bin IDs # --> end section # meta self.validBinIds = {} # valid bin ids -> numMembers self.binnedRowIndicies = {} # dictionary of those indices which belong to some bin self.restrictedRowIndicies = {} # dictionary of those indices which can not be binned yet self.numContigs = 0 # this depends on the condition given self.numStoits = 0 # this depends on the data which was parsed # contig links self.links = {} # misc self.forceWriting = force # overwrite existng values silently? self.scaleFactor = scaleFactor # scale every thing in the transformed data to this dimension def loadData(self, condition="", # condition as set by another function bids=[], # if this is set then only load those contigs with these bin ids verbose=True, # many to some output messages silent=False, # some to no output messages loadCovProfiles=True, loadKmerSigs=True, makeColours=True, loadContigNames=True, loadContigLengths=True, loadBins=False, loadLinks=False): """Load pre-parsed data""" if(verbose): print "Loading data from:", self.dbFileName # check to see if we need to override the condition if(len(bids) != 0): condition = "((bid == "+str(bids[0])+")" for index in range (1,len(bids)): condition += " | (bid == "+str(bids[index])+")" condition += ")" if(silent): verbose=False try: self.numStoits = self.getNumStoits() self.condition = condition if(verbose): print " Loading indices (", condition,")" self.indices = self.dataManager.getConditionalIndicies(self.dbFileName, condition=condition) self.numContigs = len(self.indices) if(not silent): print " Working with: %d contigs" % self.numContigs if(loadCovProfiles): if(verbose): print " Loading coverage profiles" self.covProfiles = self.dataManager.getCoverageProfiles(self.dbFileName, indices=self.indices) # work out average coverages self.averageCoverages = np_array([sum(i)/self.numStoits for i in self.covProfiles]) if(loadKmerSigs): if(verbose): print " Loading kmer sigs" self.kmerSigs = self.dataManager.getKmerSigs(self.dbFileName, indices=self.indices) if(makeColours): if(verbose): print " Creating colour profiles" self.makeColourProfile() # use HSV to RGB to generate colours S = 1 # SAT and VAL remain fixed at 1. Reduce to make V = 1 # Pastels if that's your preference... self.contigColours = np_array([htr(val, S, V) for val in self.kmerVals]) if(loadContigNames): if(verbose): print " Loading contig names" self.contigNames = self.dataManager.getContigNames(self.dbFileName, indices=self.indices) if(loadContigLengths): if(verbose): print " Loading contig lengths" self.contigLengths = self.dataManager.getContigLengths(self.dbFileName, indices=self.indices) print " Contigs contain %d BP" % ( sum(self.contigLengths) ) if(loadBins): if(verbose): print " Loading bins" self.binIds = self.dataManager.getBins(self.dbFileName, indices=self.indices) if len(bids) != 0: # need to make sure we're not restricted in terms of bins tmp_bids = self.getBinStats() for bid in bids: self.validBinIds[bid] = tmp_bids[bid] else: self.validBinIds = self.getBinStats() # fix the binned indices self.binnedRowIndicies = {} for i in range(len(self.indices)): if(self.binIds[i] != 0): self.binnedRowIndicies[i] = True else: # we need zeros as bin indicies then... self.binIds = np_zeros(len(self.indices)) if(loadLinks): self.loadLinks() except: print "Error loading DB:", self.dbFileName, exc_info()[0] raise def reduceIndicies(self, deadRowIndicies): """purge indices from the data structures Be sure that deadRowIndicies are sorted ascending """ # strip out the other values self.indices = np_delete(self.indices, deadRowIndicies, axis=0) self.covProfiles = np_delete(self.covProfiles, deadRowIndicies, axis=0) self.transformedCP = np_delete(self.transformedCP, deadRowIndicies, axis=0) self.contigNames = np_delete(self.contigNames, deadRowIndicies, axis=0) self.contigLengths = np_delete(self.contigLengths, deadRowIndicies, axis=0) self.contigColours = np_delete(self.contigColours, deadRowIndicies, axis=0) self.kmerSigs = np_delete(self.kmerSigs, deadRowIndicies, axis=0) self.kmerVals = np_delete(self.kmerVals, deadRowIndicies, axis=0) self.binIds = np_delete(self.binIds, deadRowIndicies, axis=0) #------------------------------------------------------------------------------ # GET / SET def getNumStoits(self): """return the value of numStoits in the metadata tables""" return self.dataManager.getNumStoits(self.dbFileName) def getMerColNames(self): """return the value of merColNames in the metadata tables""" return self.dataManager.getMerColNames(self.dbFileName) def getMerSize(self): """return the value of merSize in the metadata tables""" return self.dataManager.getMerSize(self.dbFileName) def getNumMers(self): """return the value of numMers in the metadata tables""" return self.dataManager.getNumMers(self.dbFileName) ### USE the member vars instead! # def getNumCons(self): # """return the value of numCons in the metadata tables""" # return self.dataManager.getNumCons(self.dbFileName) def getNumBins(self): """return the value of numBins in the metadata tables""" return self.dataManager.getNumBins(self.dbFileName) def setNumBins(self, numBins): """set the number of bins""" self.dataManager.setNumBins(self.dbFileName, numBins) def getStoitColNames(self): """return the value of stoitColNames in the metadata tables""" return self.dataManager.getStoitColNames(self.dbFileName) def isClustered(self): """Has the data been clustered already""" return self.dataManager.isClustered(self.dbFileName) def setClustered(self): """Save that the db has been clustered""" self.dataManager.setClustered(self.dbFileName, True) def isComplete(self): """Has the data been *completely* clustered already""" return self.dataManager.isComplete(self.dbFileName) def setComplete(self): """Save that the db has been completely clustered""" self.dataManager.setComplete(self.dbFileName, True) def getBinStats(self): """Go through all the "bins" array and make a list of unique bin ids vs number of contigs""" return self.dataManager.getBinStats(self.dbFileName) def setBinStats(self, binStats): """Store the valid bin Ids and number of members binStats is a dictionary which looks like: { tableRow : [bid , numMembers] } """ self.dataManager.setBinStats(self.dbFileName, binStats) self.setNumBins(len(binStats.keys())) def setBinAssignments(self, assignments): """Save our bins into the DB""" self.dataManager.setBinAssignments(self.dbFileName, assignments) def loadLinks(self): """Extra wrapper 'cause I am dumb""" self.links = self.getLinks() def getLinks(self): """Get contig links""" # first we get the absolute links absolute_links = self.dataManager.restoreLinks(self.dbFileName, self.indices) # now convert this into plain old row_indices reverse_index_lookup = {} for i in range(len(self.indices)): reverse_index_lookup[self.indices[i]] = i # now convert the absolute links to local ones relative_links = {} for cid in self.indices: local_cid = reverse_index_lookup[cid] relative_links[local_cid] = [] try: for link in absolute_links[cid]: relative_links[local_cid].append([reverse_index_lookup[link[0]], link[1], link[2], link[3]]) except KeyError: # not everyone is linked pass return relative_links #------------------------------------------------------------------------------ # DATA TRANSFORMATIONS def getAverageCoverage(self, rowIndex): """Return the average coverage for this contig across all stoits""" return sum(self.transformedCP[rowIndex])/self.numStoits def transformCP(self, silent=False, nolog=False, min=None, max=None): """Do the main ransformation on the coverage profile data""" shrinkFn = np_log10 if(nolog): shrinkFn = lambda x:x s = (self.numContigs,3) self.transformedCP = np_zeros(s) if(not silent): print " Dimensionality reduction" # get the median distance from the origin unit_vectors = [(np_cos(i*2*np_pi/self.numStoits),np_sin(i*2*np_pi/self.numStoits)) for i in range(self.numStoits)] for i in range(len(self.indices)): norm = np_norm(self.covProfiles[i]) if(norm != 0): radial = shrinkFn(norm) else: radial = norm shifted_vector = np_array([0.0,0.0]) flat_vector = (self.covProfiles[i] / sum(self.covProfiles[i])) for j in range(self.numStoits): shifted_vector[0] += unit_vectors[j][0] * flat_vector[j] shifted_vector[1] += unit_vectors[j][1] * flat_vector[j] # log scale it towards the centre scaling_vector = shifted_vector * self.scaleFactor sv_size = np_norm(scaling_vector) if(sv_size > 1): shifted_vector /= shrinkFn(sv_size) self.transformedCP[i,0] = shifted_vector[0] self.transformedCP[i,1] = shifted_vector[1] self.transformedCP[i,2] = radial if(not silent): print " Reticulating splines" # finally scale the matrix to make it equal in all dimensions if(min is None): min = np_amin(self.transformedCP, axis=0) max = np_amax(self.transformedCP, axis=0) max = max - min max = max / (self.scaleFactor-1) for i in range(0,3): self.transformedCP[:,i] = (self.transformedCP[:,i] - min[i])/max[i] return(min,max) def makeColourProfile(self): """Make a colour profile based on ksig information""" working_data = np_array(self.kmerSigs, copy=True) Center(working_data,verbose=0) p = PCA(working_data) components = p.pc() # now make the colour profile based on PC1 self.kmerVals = np_array([float(i) for i in components[:,0]]) # normalise to fit between 0 and 1 self.kmerVals -= np_min(self.kmerVals) self.kmerVals /= np_max(self.kmerVals) if(False): plt.figure(1) plt.subplot(111) plt.plot(components[:,0], components[:,1], 'r.') plt.show() def rotateVectorAndScale(self, point, las, centerVector, delta_max=0.25): """ Move a vector closer to the center of the positive quadrant Find the co-ordinates of its projection onto the surface of a hypersphere with radius R What?... ...First some definitions: For starters, think in 3 dimensions, then take it out to N. Imagine all points (x,y,z) on the surface of a sphere such that all of x,y,z > 0. ie trapped within the positive quadrant. Consider the line x = y = z which passes through the origin and the point on the surface at the "center" of this quadrant. Call this line the "main mapping axis". Let the unit vector coincident with this line be called A. Now think of any other vector V also located in the positive quadrant. The goal of this function is to move this vector closer to the MMA. Specifically, if we think about the plane which contains both V and A, we'd like to rotate V within this plane about the origin through phi degrees in the direction of A. Once this has been done, we'd like to project the rotated co-ords onto the surface of a hypersphere with radius R. This is a simple scaling operation. The idea is that vectors closer to the corners should be pertubed more than those closer to the center. Set delta_max as the max percentage of the existing angle to be removed """ theta = self.getAngBetween(point, centerVector) A = delta_max/((las)**2) B = delta_max/las delta = 2*B*theta - A *(theta**2) # the amount to shift V_p = point*(1-delta) + centerVector*delta return V_p/np_norm(V_p) def rad2deg(self, anglein): return 180*anglein/np_pi def getAngBetween(self, P1, P2): """Return the angle between two points (in radians)""" # find the existing angle between them theta c = np_dot(P1,P2)/np_norm(P1)/np_norm(P2) # rounding errors hurt everyone... if(c > 1): c = 1 elif(c < -1): c = -1 return np_arccos(c) # in radians #------------------------------------------------------------------------------ # IO and IMAGE RENDERING def plotUnbinned(self, coreCut): """Plot all contigs over a certain length which are unbinned""" self.loadData(condition="((length >= "+str(coreCut)+") & (bid == 0))") self.transformCP() fig = plt.figure() ax1 = fig.add_subplot(111, projection='3d') ax1.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors=self.contigColours, c=self.contigColours, marker='.') try: plt.show() plt.close(fig) except: print "Error showing image", exc_info()[0] raise del fig def plotTransViews(self, tag="fordens"): """Plot top, side and front views of the transformed data""" self.renderTransData(tag+"_top.png",azim = 0, elev = 90) self.renderTransData(tag+"_front.png",azim = 0, elev = 0) self.renderTransData(tag+"_side.png",azim = 90, elev = 0) def renderTransCPData(self, fileName="", show=True, elev=45, azim=45, all=False, showAxis=False, primaryWidth=12, primarySpace=3, dpi=300, format='png', fig=None): """Plot transformed data in 3D""" del_fig = False if(fig is None): fig = plt.figure() del_fig = True else: plt.clf() if(all): myAXINFO = { 'x': {'i': 0, 'tickdir': 1, 'juggled': (1, 0, 2), 'color': (0, 0, 0, 0, 0)}, 'y': {'i': 1, 'tickdir': 0, 'juggled': (0, 1, 2), 'color': (0, 0, 0, 0, 0)}, 'z': {'i': 2, 'tickdir': 0, 'juggled': (0, 2, 1), 'color': (0, 0, 0, 0, 0)}, } ax = fig.add_subplot(131, projection='3d') ax.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors=self.contigColours, c=self.contigColours, marker='.') ax.azim = 0 ax.elev = 0 ax.set_xlim3d(0,self.scaleFactor) ax.set_ylim3d(0,self.scaleFactor) ax.set_zlim3d(0,self.scaleFactor) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) for axis in ax.w_xaxis, ax.w_yaxis, ax.w_zaxis: for elt in axis.get_ticklines() + axis.get_ticklabels(): elt.set_visible(False) ax.w_xaxis._AXINFO = myAXINFO ax.w_yaxis._AXINFO = myAXINFO ax.w_zaxis._AXINFO = myAXINFO ax = fig.add_subplot(132, projection='3d') ax.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors=self.contigColours, c=self.contigColours, marker='.') ax.azim = 90 ax.elev = 0 ax.set_xlim3d(0,self.scaleFactor) ax.set_ylim3d(0,self.scaleFactor) ax.set_zlim3d(0,self.scaleFactor) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) for axis in ax.w_xaxis, ax.w_yaxis, ax.w_zaxis: for elt in axis.get_ticklines() + axis.get_ticklabels(): elt.set_visible(False) ax.w_xaxis._AXINFO = myAXINFO ax.w_yaxis._AXINFO = myAXINFO ax.w_zaxis._AXINFO = myAXINFO ax = fig.add_subplot(133, projection='3d') ax.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors=self.contigColours, c=self.contigColours, marker='.') ax.azim = 0 ax.elev = 90 ax.set_xlim3d(0,self.scaleFactor) ax.set_ylim3d(0,self.scaleFactor) ax.set_zlim3d(0,self.scaleFactor) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) for axis in ax.w_xaxis, ax.w_yaxis, ax.w_zaxis: for elt in axis.get_ticklines() + axis.get_ticklabels(): elt.set_visible(False) ax.w_xaxis._AXINFO = myAXINFO ax.w_yaxis._AXINFO = myAXINFO ax.w_zaxis._AXINFO = myAXINFO else: ax = fig.add_subplot(111, projection='3d') ax.scatter(self.transformedCP[:,0], self.transformedCP[:,1], self.transformedCP[:,2], edgecolors='none', c=self.contigColours, s=2, marker='.') ax.azim = azim ax.elev = elev ax.set_xlim3d(0,self.scaleFactor) ax.set_ylim3d(0,self.scaleFactor) ax.set_zlim3d(0,self.scaleFactor) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_zticks([]) if(not showAxis): ax.set_axis_off() if(fileName != ""): try: if(all): fig.set_size_inches(3*primaryWidth+2*primarySpace,primaryWidth) else: fig.set_size_inches(primaryWidth,primaryWidth) plt.savefig(fileName,dpi=dpi,format=format) except: print "Error saving image",fileName, exc_info()[0] raise elif(show): try: plt.show() except: print "Error showing image", exc_info()[0] raise if del_fig: plt.close(fig) del fig