Exemple #1
0
import numpy as np
from pbcore.io import CmpH5Reader
from GenomicConsensus import reference
from projutils import getReads
from bqcy.bqcy import run_bqcy

cmpH5 = CmpH5Reader(
    '/home/nick/workspace/btry6790_project/PXO99A_ref_wo_one_copy_212kb_repeat.cmp.h5'
)
reference.loadFromFile(
    "/home/nick/workspace/btry6790_project/ref_PXO99A_genome_reference_wo_one_copy_212k_repeat/sequence/ref_PXO99A_genome_reference_wo_one_copy_212k_repeat.fasta",
    cmpH5)

tmplSeq, realTmplLen, readSeqs, qvInfo = getReads(cmpH5, reference,
                                                  (146000, 146050), 64, 100)

#print(readSeqs[:, 65:])
#exit()

print("POA Consensus: " + ''.join(map(chr, tmplSeq.tolist())))

tmplSeq = np.zeros((64), dtype=np.uint8)
tmplOrds = map(ord, "A" * 50)
tmplSeq[:len(tmplOrds)] = tmplOrds

results = np.zeros(8 * tmplSeq.shape[0], dtype=np.float64)
origTmplScore, bestMutantScore, bestMutatedSeq = run_bqcy(
    tmplSeq, readSeqs, qvInfo, results)
print("Polished: " + ''.join(map(chr, np.asarray(bestMutatedSeq).tolist())))
print("Fake Template: " + ''.join(map(chr, np.asarray(tmplSeq).tolist())))
print(results)
Exemple #2
0
import numpy as np
from projutils import getReads

tmplSeq, realTmplLen, readSeqs, qvInfo = getReads()
#sanity_check(tmplSeq, realTmplLen, readSeqs, qvInfo)


#[C2.AllQVsModel]
# s stands for slope
Match            =  0.2627555
Mismatch         = -1.09688872
MismatchS        = -0.01637988
Branch           = -0.60275947
BranchS          = -0.02682689
DeletionN        = -1.00012494
DeletionWithTag  =  0.06000148
DeletionWithTagS = -0.02579358
Nce              = -0.15864559
NceS             = -0.04403654
Merge            = -1.02398814
MergeS           = -0.12135255

#metrics
InsertionIdx = 0
MergeIdx = 1
DeletionIdx = 2
DeletionTagIdx = 3
SubstitutionIdx = 4

readLength = 256
tmplLength = 256
Exemple #3
0
import numpy as np
from pbcore.io import CmpH5Reader
from GenomicConsensus import reference
from projutils import getReads
from bqcy.bqcy import getTemplateScore
from bqfast.bqfast import run_bqfast

cmpH5 = CmpH5Reader('/home/nick/workspace/btry6790_project/PXO99A_ref_wo_one_copy_212kb_repeat.cmp.h5')
reference.loadFromFile("/home/nick/workspace/btry6790_project/ref_PXO99A_genome_reference_wo_one_copy_212k_repeat/sequence/ref_PXO99A_genome_reference_wo_one_copy_212k_repeat.fasta", cmpH5)

#tmplSeq, realTmplLen, readSeqs, qvInfo = getReads(cmpH5, reference, (146000, 146100), 128, 100)
tmplSeq, realTmplLen, readSeqs, qvInfo = getReads(cmpH5, reference, (146000, 146050), 64, 100)

print("Real Template: " + ''.join(map(chr, tmplSeq.tolist())))

tmplSeq = np.zeros((64), dtype=np.uint8)
tmplOrds = map(ord, "A" * 50)
tmplSeq[:len(tmplOrds)] = tmplOrds

tmplScore = getTemplateScore(tmplSeq, readSeqs, qvInfo)
results, polishedTmplSeq = run_bqfast(tmplSeq, tmplSeq.shape[0], tmplScore, readSeqs, qvInfo)

print("Polished: " + ''.join(map(chr, polishedTmplSeq.tolist())))
print("Fake Template: " + ''.join(map(chr, tmplSeq.tolist())))


print(results[:tmplSeq.shape[0] * 8])
Exemple #4
0
import numpy as np
from projutils import getReads

tmplSeq, realTmplLen, readSeqs, qvInfo = getReads()
#sanity_check(tmplSeq, realTmplLen, readSeqs, qvInfo)

#[C2.AllQVsModel]
# s stands for slope
Match = 0.2627555
Mismatch = -1.09688872
MismatchS = -0.01637988
Branch = -0.60275947
BranchS = -0.02682689
DeletionN = -1.00012494
DeletionWithTag = 0.06000148
DeletionWithTagS = -0.02579358
Nce = -0.15864559
NceS = -0.04403654
Merge = -1.02398814
MergeS = -0.12135255

#metrics
InsertionIdx = 0
MergeIdx = 1
DeletionIdx = 2
DeletionTagIdx = 3
SubstitutionIdx = 4

readLength = 256
tmplLength = 256
from ConsensusCore import *
import numpy as np
from projutils import getReads, sanity_check
from pbcore.io import CmpH5Reader
from GenomicConsensus import reference

cmpH5 = CmpH5Reader('/home/nick/workspace/btry6790_project/PXO99A_ref_wo_one_copy_212kb_repeat.cmp.h5')
reference.loadFromFile("/home/nick/workspace/btry6790_project/ref_PXO99A_genome_reference_wo_one_copy_212k_repeat/sequence/ref_PXO99A_genome_reference_wo_one_copy_212k_repeat.fasta", cmpH5)

tmplSeq, realTmplLen, fwdSeqs, qvInfo = getReads(cmpH5, reference, (146000, 146050), 64, 100, real_quiver=True)
np.set_printoptions(linewidth=200)

totalScore = 0
for read in range(len(fwdSeqs)):
    
    features = QvSequenceFeatures(fwdSeqs[read],
                                   FloatFeature(qvInfo[0, :len(fwdSeqs[read]), read].astype(np.float32)),
                                   FloatFeature(qvInfo[4, :len(fwdSeqs[read]), read].astype(np.float32)),
                                   FloatFeature(qvInfo[2, :len(fwdSeqs[read]), read].astype(np.float32)),
                                   FloatFeature(qvInfo[3, :len(fwdSeqs[read]), read].astype(np.float32)),
                                   FloatFeature(qvInfo[1, :len(fwdSeqs[read]), read].astype(np.float32)))
    
    params = QvModelParams(0.2627555,
                           -1.09688872,
                           -0.01637988,
                           -0.60275947,
                           -0.02682689,
                           -1.00012494,
                           0.06000148,
                           -0.02579358,
                           -0.15864559,