def by_genome(x, args): """All processing conducted per genome. Parameters ---------- x : list [inFile,taxonName] inFile -- genome sequence file name taxonName -- taxon name of genome args : dict user-provided args Returns ------- l2d -- list of lists for each fragment: [taxonName,scaf,start,end,GC] """ taxonName,inFile = x # status sys.stderr.write('Processing: "{}"\n'.format(taxonName)) # making genome object assert '--fr' in args, '"--fr" must be provided in args' genome = Genome(inFile, taxonName, args['--fr']) # MFEprimer.py executable MFEprimerExe = args['--MFE'] # sequenced read template location: amplicons if genome.primerFile is not None: # in-silico PCR assert '--rtr' in args, '"--rtr" must be in args' genome.callMFEprimer(rtr=args['--rtr'], MFEprimerExe=MFEprimerExe) # filtering overlapping in-silico amplicons genome.filterOverlaps() # simulating fragments simFO = SimFrags(fld=args['--fld'], flr=args['--flr'], rtl=args['--rtl']) nFragsMade = 0 fragList = dict() ## if no amplicons if genome.nAmplicons == 0: pass ## if using coverage elif args['--nf'].endswith('X') or args['--nf'].endswith('x'): coverage = float(args['--nf'].rstrip('xX')) fragLenCov = genome.length * coverage fragLenTotal = 0 while 1: (scaf,fragStart,fragLen,fragGC) = simFO.simFrag(genome) try: type(fragList[scaf]) except KeyError: fragList[scaf] = [] if fragStart == "NA": break elif fragLenTotal > fragLenCov: break fragLenTotal += fragLen nFragsMade += 1 fragList[scaf].append([fragStart, fragLen, fragGC]) ## if using fixed number of fragments else: for i in xrange(int(args['--nf'])): (scaf,fragStart,fragLen,fragGC) = simFO.simFrag(genome) try: type(fragList[scaf]) except KeyError: fragList[scaf] = [] if fragStart == "NA": break nFragsMade += 1 fragList[scaf].append([fragStart, fragLen, fragGC]) # status sys.stderr.write(' Genome name: {}\n'.format(genome.taxonName)) sys.stderr.write(' Genome length (bp): {}\n'.format(genome.length)) if args['--nf']: msg = ' Number of amplicons: {}\n' sys.stderr.write(msg.format(genome.nAmplicons)) msg = ' Number of fragments simulated: {}\n' sys.stderr.write(msg.format(nFragsMade)) return [genome.taxonName, fragList]