def main(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--input') parser.add_argument('-r', '--ref') parser.add_argument('-v', dest='verbose', action='store_true') args = parser.parse_args() try: len(args.input) > 0 except: usage() sys.exit(2) with AlignmentSet(args.input) as peekCmpH5: print "test" print peekCmpH5 logging.info("Peeking at CmpH5 file %s" % (args.input)) logging.info("Input CmpH5 data: numAlnHits=%d" % len(peekCmpH5)) cmpContigNames = set(peekCmpH5.refNames) print cmpContigNames reference.loadFromFile(args.ref, peekCmpH5) f = ReferenceSet(args.ref) f.assertIndexed() for fastaRecord in f.contigs: refName = fastaRecord.id print refName
def _loadReference(self, alnFile): logging.info("Loading reference") reference.loadFromFile(options.referenceFilename, alnFile) # Grok the referenceWindow spec, if any. if options.referenceWindowsAsString is None: options.referenceWindows = () elif options.skipUnrecognizedContigs: # This is a workaround for smrtpipe scatter/gather. options.referenceWindows = [] for s in options.referenceWindowsAsString.split(","): try: win = reference.stringToWindow(s) options.referenceWindows.append(win) except Exception: msg = traceback.format_exc() logging.debug(msg) pass else: options.referenceWindows = map(reference.stringToWindow, options.referenceWindowsAsString.split(",")) if options.referenceWindowsFromAlignment: options.referenceWindows = alnFile.refWindows
def _loadReference(self, alnFile): logging.info("Loading reference") reference.loadFromFile(options.referenceFilename, alnFile) # Grok the referenceWindow spec, if any. if options.referenceWindowsAsString is None: options.referenceWindows = () elif options.skipUnrecognizedContigs: # This is a workaround for smrtpipe scatter/gather. options.referenceWindows = [] for s in options.referenceWindowsAsString.split(","): try: win = reference.stringToWindow(s) options.referenceWindows.append(win) except Exception: msg = traceback.format_exc() logging.debug(msg) pass else: options.referenceWindows = map( reference.stringToWindow, options.referenceWindowsAsString.split(",")) if options.referenceWindowsFromAlignment: options.referenceWindows = alnFile.refWindows
def _loadReference(self, cmpH5): logging.info("Loading reference") err = reference.loadFromFile(options.referenceFilename, cmpH5) if err: die("Error loading reference") # Grok the referenceWindow spec, if any. if options.referenceWindowsAsString is None: options.referenceWindows = () elif options.skipUnrecognizedContigs: # This is a workaround for smrtpipe scatter/gather. options.referenceWindows = [] for s in options.referenceWindowsAsString.split(","): try: win = reference.stringToWindow(s) options.referenceWindows.append(win) except: pass else: options.referenceWindows = map(reference.stringToWindow, options.referenceWindowsAsString.split(","))
def _loadReference(self, cmpH5): logging.info("Loading reference") err = reference.loadFromFile(options.referenceFilename, cmpH5) if err: die("Error loading reference") # Grok the referenceWindow spec, if any. if options.referenceWindowsAsString is None: options.referenceWindows = () elif options.skipUnrecognizedContigs: # This is a workaround for smrtpipe scatter/gather. options.referenceWindows = [] for s in options.referenceWindowsAsString.split(","): try: win = reference.stringToWindow(s) options.referenceWindows.append(win) except: pass else: options.referenceWindows = map( reference.stringToWindow, options.referenceWindowsAsString.split(","))
import numpy as np from pbcore.io import CmpH5Reader from GenomicConsensus import reference from projutils import getReads from bqcy.bqcy import run_bqcy cmpH5 = CmpH5Reader( '/home/nick/workspace/btry6790_project/PXO99A_ref_wo_one_copy_212kb_repeat.cmp.h5' ) reference.loadFromFile( "/home/nick/workspace/btry6790_project/ref_PXO99A_genome_reference_wo_one_copy_212k_repeat/sequence/ref_PXO99A_genome_reference_wo_one_copy_212k_repeat.fasta", cmpH5) tmplSeq, realTmplLen, readSeqs, qvInfo = getReads(cmpH5, reference, (146000, 146050), 64, 100) #print(readSeqs[:, 65:]) #exit() print("POA Consensus: " + ''.join(map(chr, tmplSeq.tolist()))) tmplSeq = np.zeros((64), dtype=np.uint8) tmplOrds = map(ord, "A" * 50) tmplSeq[:len(tmplOrds)] = tmplOrds results = np.zeros(8 * tmplSeq.shape[0], dtype=np.float64) origTmplScore, bestMutantScore, bestMutatedSeq = run_bqcy( tmplSeq, readSeqs, qvInfo, results) print("Polished: " + ''.join(map(chr, np.asarray(bestMutatedSeq).tolist()))) print("Fake Template: " + ''.join(map(chr, np.asarray(tmplSeq).tolist()))) print(results)
import numpy as np from pbcore.io import CmpH5Reader from GenomicConsensus import reference from projutils import getReads from bqcy.bqcy import getTemplateScore from bqfast.bqfast import run_bqfast cmpH5 = CmpH5Reader('/home/nick/workspace/btry6790_project/PXO99A_ref_wo_one_copy_212kb_repeat.cmp.h5') reference.loadFromFile("/home/nick/workspace/btry6790_project/ref_PXO99A_genome_reference_wo_one_copy_212k_repeat/sequence/ref_PXO99A_genome_reference_wo_one_copy_212k_repeat.fasta", cmpH5) #tmplSeq, realTmplLen, readSeqs, qvInfo = getReads(cmpH5, reference, (146000, 146100), 128, 100) tmplSeq, realTmplLen, readSeqs, qvInfo = getReads(cmpH5, reference, (146000, 146050), 64, 100) print("Real Template: " + ''.join(map(chr, tmplSeq.tolist()))) tmplSeq = np.zeros((64), dtype=np.uint8) tmplOrds = map(ord, "A" * 50) tmplSeq[:len(tmplOrds)] = tmplOrds tmplScore = getTemplateScore(tmplSeq, readSeqs, qvInfo) results, polishedTmplSeq = run_bqfast(tmplSeq, tmplSeq.shape[0], tmplScore, readSeqs, qvInfo) print("Polished: " + ''.join(map(chr, polishedTmplSeq.tolist()))) print("Fake Template: " + ''.join(map(chr, tmplSeq.tolist()))) print(results[:tmplSeq.shape[0] * 8])