def checkOptions( args, options, parser, data ): allowedKeys = set( [ 'maf', 'maf1e2', 'maf1e3', 'maf1e4', 'maf1e5', 'maf1e6', 'maf1e7', 'blockEdgeCount', 'mafCpl1e2', 'mafCpl1e3', 'mafCpl1e4', 'mafCpl1e5', 'mafCpl1e6', 'mafCpl1e7', 'mafCtg1e2', 'mafCtg1e3', 'mafCtg1e4', 'mafCtg1e5', 'mafCtg1e6', 'mafCtg1e7', 'mafSpl1e2', 'mafSpl1e3', 'mafSpl1e4', 'mafSpl1e5', 'mafSpl1e6', 'mafSpl1e7', 'mafCpEdgeCount', 'mafCpErrorCount','mafCpScafGapCount', 'CDSCount', 'UTRCount', 'NXECount', 'NGECount', 'islandCount', 'tandemCount', 'repeatCount', 'CDSMax', 'UTRMax', 'NXEMax', 'NGEMax', 'islandMax', 'tandemMax', 'repeatMax', 'xAxis', 'columnsInBlocks' ]) if options.printAllowedKeys: for k in allowedKeys: print k sys.exit( 0 ) if options.key == '': parser.error('please specify --key\n') if options.key not in allowedKeys: parser.error('please specify --key from one of the %s\n' % allowedKeys ) if len( args ) < 1: parser.error('please specify at least one pickle to inspect as a positional argument.\n' ) for f in args: if not os.path.exists( f ): parser.error('file "%s" does not exist.\n' % f ) if not f.endswith('.pickle'): parser.error('file "%s" does not end in ".pickle".\n' % f ) if options.printChroms: for f in args: d = unpackData( f, options, {} ) sys.stdout.write('%s\t' % f ) for c in d: sys.stdout.write('\t%s' % c) sys.stdout.write('\n') sys.exit(0) if options.chr is not None: options.chrSet = set( options.chr.split(',') ) for f in args: d = unpackData( f, options, {} ) for c in options.chrSet: if c not in d: sys.stderr.write('chromosome %s is not present in %s\n' % ( c, f )) else: options.chrSet = set() for f in args: d = unpackData( f, options, {} ) for c in d: if c not in options.chrSet: options.chrSet.add( c )
def checkOptions(args, options, parser, data): allowedKeys = set([ 'maf', 'maf1e2', 'maf1e3', 'maf1e4', 'maf1e5', 'maf1e6', 'maf1e7', 'blockEdgeCount', 'mafCpl1e2', 'mafCpl1e3', 'mafCpl1e4', 'mafCpl1e5', 'mafCpl1e6', 'mafCpl1e7', 'mafCtg1e2', 'mafCtg1e3', 'mafCtg1e4', 'mafCtg1e5', 'mafCtg1e6', 'mafCtg1e7', 'mafSpl1e2', 'mafSpl1e3', 'mafSpl1e4', 'mafSpl1e5', 'mafSpl1e6', 'mafSpl1e7', 'mafCpEdgeCount', 'mafCpErrorCount', 'mafCpScafGapCount', 'CDSCount', 'UTRCount', 'NXECount', 'NGECount', 'islandCount', 'tandemCount', 'repeatCount', 'CDSMax', 'UTRMax', 'NXEMax', 'NGEMax', 'islandMax', 'tandemMax', 'repeatMax', 'xAxis', 'columnsInBlocks' ]) if options.printAllowedKeys: for k in allowedKeys: print k sys.exit(0) if options.key == '': parser.error('please specify --key\n') if options.key not in allowedKeys: parser.error('please specify --key from one of the %s\n' % allowedKeys) if len(args) < 1: parser.error( 'please specify at least one pickle to inspect as a positional argument.\n' ) for f in args: if not os.path.exists(f): parser.error('file "%s" does not exist.\n' % f) if not f.endswith('.pickle'): parser.error('file "%s" does not end in ".pickle".\n' % f) if options.printChroms: for f in args: d = unpackData(f, options, {}) sys.stdout.write('%s\t' % f) for c in d: sys.stdout.write('\t%s' % c) sys.stdout.write('\n') sys.exit(0) if options.chr is not None: options.chrSet = set(options.chr.split(',')) for f in args: d = unpackData(f, options, {}) for c in options.chrSet: if c not in d: sys.stderr.write('chromosome %s is not present in %s\n' % (c, f)) else: options.chrSet = set() for f in args: d = unpackData(f, options, {}) for c in d: if c not in options.chrSet: options.chrSet.add(c)
def loadMafs( options, data ): # sort of like loadAnnots, but needs an added loop that pulls # from a glob of all the mafs in the maf directory. data.mafWigDict = {} data.mafNamesDict = {} patStr = '\S+\.(\S+)\.maf.pickle' pat = re.compile( patStr ) mafFiles = glob.glob( os.path.join( options.mafDir, '%s*maf.pickle' % ( options.ref ))) for f in mafFiles: m = re.search( pat, f ) if m is None: sys.stderr.write('unable to find genome name in filename %s using regex %s\n' % f, patStr ) sys.exit( 1 ) name = m.group(1) if options.subsetFile: if name not in options.assemblySubset: continue if name not in data.mafNamesDict: data.mafNamesDict[ name ] = 0 # this serves the duel purpose of storing # all seen names and the count of bases aligned dataByChrom = unpackData( f, options, data ) for c in dataByChrom: if c not in data.mafWigDict: data.mafWigDict[ c ] = {} data.mafWigDict[ c ][ name ] = dataByChrom[ c ] # calculate data used for sorting for c in data.chrNames: for n in data.mafNamesDict: if options.sortOn == 'c': data.mafNamesDict[ n ] += data.mafWigDict[ c ][ n ]['columnsInBlocks'] else: data.mafNamesDict[ n ] += lengthData( data.mafWigDict[ c ][ n ], options, data ) if not options.forceOrder: data.orderedMafs = sorted( data.mafNamesDict, key=lambda key: data.mafNamesDict[ key ], reverse=True ) else: spokenFor = set() data.orderedMafs = options.forceOrder.split(',') for n in data.orderedMafs: spokenFor.add( n ) sortNames = sorted( data.mafNamesDict, key=lambda key: key ) for n in sortNames: if n not in spokenFor: data.orderedMafs.append( n ) data.numberOfMafs = len( data.mafNamesDict ) data.numRows = data.numberOfMafs + len( data.annotationOrder ) + 1.0 # 12.0 + 10.0 # 55.0 # data.numberOfMafs + 10 # number of total rows in the figure # discover which size categories are absent from all datasets... used in legend plotting labs = [ '1e2', '1e3', '1e4', '1e5', '1e6', '1e7' ] data.lengthThresholdPresent = {} for c in data.chrNames: for n in data.mafWigDict[ c ]: for l in labs: if l in data.lengthThresholdPresent: continue if options.stackFillBlocks: key = 'maf' + l elif options.stackFillContigPaths: key = 'mafCpl' + l elif options.stackFillContigs: key = 'mafCtg' + l elif options.stackFillScaffPaths: key = 'mafSpl' + l else: continue if key not in data.mafWigDict[ c ][ n ]: print 'thats weird, this key: %s is not in file: %s chr: %s' % ( key, n, c ) continue if sum( data.mafWigDict[ c ][ n ][ key ] ) > 0: data.lengthThresholdPresent[ l ] = True
def loadAnnots( options, data ): data.annotWigDict = {} f = os.path.join( options.annotDir, '%s.annots.pickle' % ( options.ref )) data.annotWigDict = unpackData( f, options, data )
def loadPickles( args, options, data ): for f in args: valuesDict = unpackData( f, options, data ) if options.verify: verify( valuesDict, options, data ) printData( valuesDict, options, data )
def loadPickles(args, options, data): for f in args: valuesDict = unpackData(f, options, data) if options.verify: verify(valuesDict, options, data) printData(valuesDict, options, data)