def do(options): outputFileName_core = util.addArguments("EmptyBackground", [ util.ArgumentToAdd(options.seqLength, "seqLength"), util.ArgumentToAdd(options.numSeqs, "numSeqs") ]) embedInBackground = sn.EmbedInABackground( backgroundGenerator=sn.ZeroOrderBackgroundGenerator( seqLength=options.seqLength), embedders=[]) sequenceSet = sn.GenerateSequenceNTimes(embedInBackground, options.numSeqs) sn.printSequences(outputFileName_core + ".simdata", sequenceSet, includeFasta=True, includeEmbeddings=True)
def do(options): if (options.seed is not None): import numpy as np np.random.seed(options.seed) import random random.seed(options.seed) outputFileName_core = util.addArguments("DensityEmbedding", [ util.ArgumentToAdd(options.prefix, "prefix"), util.BooleanArgument(options.bestHit, "bestHit"), util.ArrArgument(options.motifNames, "motifs"), util.ArgumentToAdd(options.min_motifs, "min"), util.ArgumentToAdd(options.max_motifs, "max"), util.ArgumentToAdd(options.mean_motifs, "mean"), util.FloatArgument(options.zero_prob, "zeroProb"), util.ArgumentToAdd(options.seqLength, "seqLength"), util.ArgumentToAdd(options.numSeqs, "numSeqs") ]) loadedMotifs = synthetic.LoadedEncodeMotifs(options.pathToMotifs, pseudocountProb=0.001) Constructor = synthetic.BestHitPwmFromLoadedMotifs if options.bestHit else synthetic.PwmSamplerFromLoadedMotifs embedInBackground = synthetic.EmbedInABackground( backgroundGenerator=synthetic.ZeroOrderBackgroundGenerator( seqLength=options.seqLength), embedders=[ synthetic.RepeatedEmbedder( synthetic.SubstringEmbedder( synthetic.ReverseComplementWrapper( substringGenerator=Constructor( loadedMotifs=loadedMotifs, motifName=motifName), reverseComplementProb=options.rc_prob), positionGenerator=synthetic.UniformPositionGenerator()), quantityGenerator=synthetic.ZeroInflater( synthetic.MinMaxWrapper(synthetic.PoissonQuantityGenerator( options.mean_motifs), theMax=options.max_motifs, theMin=options.min_motifs), zeroProb=options.zero_prob)) for motifName in options.motifNames ]) sequenceSet = synthetic.GenerateSequenceNTimes(embedInBackground, options.numSeqs) synthetic.printSequences(outputFileName_core + ".simdata", sequenceSet, includeFasta=True, includeEmbeddings=True, prefix=options.prefix)