def pwmoccsmethod(seqsdata, pwm, lambda_, Widx, numsamples, args): logger.debug('Importance sampling using PWM importance weights') calculateZn = jem.createZncalculatorFn(pwm, lambda_) numpositive = numsamples / 2 # Sample half in each orientation W = args.Ws[Widx] childoccfreqs = jis.DistForFreqs(seqsdata.childoccfreqs[:, Widx]) cb = jis.importancesample( seqsdata.index, W, jis.WeightedSamplingDist(childoccfreqs, jis.PWMImportanceWeight(pwm)), numpositive, jis.ISMemoCbAdaptor(W, jis.ZnSumCb(W), calculateZn)) return seqsdata.numoccs[Widx], jis.importancesample( seqsdata.index, W, jis.WeightedSamplingDist(childoccfreqs, jis.PWMImportanceWeight(jem.pwmrevcomp(pwm))), numsamples - numpositive, cb)
def uniformuniquemethod(seqsdata, pwm, lambda_, Widx, numsamples, args): logger.debug('Importance sampling using uniform weights') calculateZn = jem.createZncalculatorFn(pwm, lambda_) W = args.Ws[Widx] childuniquefreqs = jis.DistForFreqs(seqsdata.childuniquefreqs[:, Widx]) return seqsdata.numunique[Widx], jis.importancesample( seqsdata.index, W, jis.WeightedSamplingDist(childuniquefreqs, jis.UniformImportanceWeight()), numsamples, jis.ISMemoCbAdaptor(W, jis.ZnSumCb(W), calculateZn, unique=True))
def pwmuniquemethod(seqsdata, pwm, lambda_, Widx, numsamples, args): logger.debug('Importance sampling using PWM weights over unique W-mers') calculateZn = jem.createZncalculatorFn(pwm, lambda_) W = args.Ws[Widx] childuniquefreqs = jis.DistForFreqs(seqsdata.childuniquefreqs[:, Widx]) childoccfreqs = jis.DistForFreqs(seqsdata.childoccfreqs[:, Widx]) numpositive = numsamples / 2 # Sample half in each orientation cb = jis.importancesample( seqsdata.index, W, jis.WeightedSamplingDist( childuniquefreqs, jis.PWMImportanceWeightUnique(pwm, childoccfreqs)), numpositive, jis.ISMemoCbAdaptor(W, jis.ZnSumCb(W), calculateZn, unique=True)) return seqsdata.numunique[Widx], jis.importancesample( seqsdata.index, W, jis.WeightedSamplingDist( childuniquefreqs, jis.PWMImportanceWeightUnique(jem.pwmrevcomp(pwm), childoccfreqs)), numsamples - numpositive, cb)
def uniformuniquemethod(seqsdata, pwm, lambda_, Widx, numsamples, args): logger.debug('Importance sampling using uniform weights') calculateZn = jem.createZncalculatorFn(pwm, lambda_) W = args.Ws[Widx] childuniquefreqs = jis.DistForFreqs(seqsdata.childuniquefreqs[:, Widx]) return seqsdata.numunique[Widx], jis.importancesample( seqsdata.index, W, jis.WeightedSamplingDist( childuniquefreqs, jis.UniformImportanceWeight()), numsamples, jis.ISMemoCbAdaptor(W, jis.ZnSumCb(W), calculateZn, unique=True))
def pwmoccsmethod(seqsdata, pwm, lambda_, Widx, numsamples, args): logger.debug('Importance sampling using PWM importance weights') calculateZn = jem.createZncalculatorFn(pwm, lambda_) numpositive = numsamples / 2 # Sample half in each orientation W = args.Ws[Widx] childoccfreqs = jis.DistForFreqs(seqsdata.childoccfreqs[:, Widx]) cb = jis.importancesample( seqsdata.index, W, jis.WeightedSamplingDist( childoccfreqs, jis.PWMImportanceWeight(pwm)), numpositive, jis.ISMemoCbAdaptor(W, jis.ZnSumCb(W), calculateZn)) return seqsdata.numoccs[Widx], jis.importancesample( seqsdata.index, W, jis.WeightedSamplingDist( childoccfreqs, jis.PWMImportanceWeight(jem.pwmrevcomp(pwm))), numsamples - numpositive, cb)
def generateseed(args): """Generate seed from possible fasta files and motif widths.""" fasta = rdm.choice(args.fastas) Widx = rdm.randint(len(args.Ws)) W = args.Ws[Widx] logger.info('Generating seed of width %d from %s', W, fasta) seqsdata = getseqsdata(fasta, args.Ws) logger.info('Importance sampling using background model to find seed') W = args.Ws[Widx] childfreqs = jis.DistForFreqs(seqsdata.childoccfreqs[:, Widx]) memocb = jis.importancesample( seqsdata.index, W, jis.WeightedSamplingDist(childfreqs, jis.UniformImportanceWeight()), numsamples=1, callback=jis.ISCbMemo()) return seqsdata, Widx, memocb.its[0].representative[:W]
def generateseed(args): """Generate seed from possible fasta files and motif widths.""" fasta = rdm.choice(args.fastas) Widx = rdm.randint(len(args.Ws)) W = args.Ws[Widx] logger.info('Generating seed of width %d from %s', W, fasta) seqsdata = getseqsdata(fasta, args.Ws) logger.info('Importance sampling using background model to find seed') W = args.Ws[Widx] childfreqs = jis.DistForFreqs(seqsdata.childoccfreqs[:, Widx]) memocb = jis.importancesample(seqsdata.index, W, jis.WeightedSamplingDist( childfreqs, jis.UniformImportanceWeight()), numsamples=1, callback=jis.ISCbMemo()) return seqsdata, Widx, memocb.its[0].representative[:W]
index = seqan.IndexStringDNASetESA(seqs) logging.info('Counting W-mers') Ws = [W] Wmercounts = npy.zeros((2*len(index), len(Ws)), dtype=npy.uint) numWmers = wmers.countWmersMulti(index.topdownhistory(), Ws, Wmercounts)[0] logging.info('Got %d %d-mers', numWmers, W) childWmerfreqs = npy.zeros((2*len(index), len(Ws), jem.SIGMA)) wmers.countWmerChildren(index.topdownhistory(), W, Wmercounts, childWmerfreqs) childWmerfreqs = jem.normalisearray(childWmerfreqs) sumestimator = jis.makesumestimator(numWmers) logging.info('Importance sampling using background model to find one seed') rdm.seed(2) memocb = jis.importancesample( index, W, childWmerfreqs[:, 0], jis.UniformImportanceWeight(), numsamples=1, callback=jis.ISCbMemo()) pwm = jem.pwmfromWmer(memocb.Xns[0], numseedsites, 1.) jem.logo(pwm, 'seed') numsamples = 3000 distsbs = [] distsbg = [] truesums = [] varratios = [] pwmtrue = pwm.copy() for iteration in xrange(5): logging.debug('Calculating true Zn sums') summer = jis.ZnSumCb(W) calculateZn = jem.createZncalculatorFn(pwmtrue, lambda_) sumvisitor = jis.ZnCalcVisitor(W, calculateZn, summer)