Ejemplo n.º 1
0
    def test_run(self):
        dnaseSimulationFileName = "temp_dnaseSimulationFile.txt"
        dnaseSimFh = fp.getFileHandle(dnaseSimulationFileName, 'w')
        dnaseSimFh.write("sequenceName\tsequence\tmotifs\n")
        dnaseSimFh.write(
            "seq1\tACGTgaTATGATAGCACATGTCGTCAGTACCATGGTCGCCGCTTGCATAGGCAAACATAATTGG\tGATA4_HUMAN.H10MO.B-10,TAL1_known1-30,GATA4_HUMAN.H10MO.B-60\n"
        )
        dnaseSimFh.write(
            "seq2\tACGTGAtaTGATAGCACATGTCGTCAGTACCATGGTCGCCGCTTGCATAGGCAAACATAATTGG\tGATA4_HUMAN.H10MO.B-5,TAL1_known1-35\n"
        )
        dnaseSimFh.write(
            "seq3\tACGTGAtaTGATAGCACATGTCGTCAGTACCATGGTCGCCGCTTGCATAGGCAAACATAATTGG\t"
            + "GATA_disc1-5,GATA_known1-5,TAL1_known1-5," +
            "GATA_disc1-55,GATA_known1-55,TAL1_known1-55\n"
        )  #last TAL1 won't get embedded
        dnaseSimFh.write(
            "seq4\tACGTGAtaTGATAGCACATGTCGTCAGTACCATGGTCGCCGCTTGCATAGGCAAACATAATTGG\t"
            + "GATA_disc1-30,GATA_known1-30,TAL1_known1-30,TAL1_known1-30\n")
        dnaseSimFh.close()

        dnaseSimulation = sn.DnaseSimulation(
            dnaseSimulationFile=dnaseSimulationFileName,
            loadedMotifs=sn.LoadedEncodeMotifs(
                simdna.ENCODE_MOTIFS_PATH, pseudocountProb=0.001).addMotifs(
                    sn.LoadedHomerMotifs(simdna.HOCOMOCO_MOTIFS_PATH,
                                         pseudocountProb=0.000)),
            shuffler=sn.DinucleotideShuffler())
        sn.printSequences("temp_dnaseSimulation.simdata",
                          dnaseSimulation,
                          includeFasta=False,
                          includeEmbeddings=True,
                          prefix=None)
Ejemplo n.º 2
0
def do(options):
    outputFileName_core = util.addArguments("EmptyBackground", [
        util.ArgumentToAdd(options.seqLength, "seqLength"),
        util.ArgumentToAdd(options.numSeqs, "numSeqs")
    ])
    embedInBackground = sn.EmbedInABackground(
        backgroundGenerator=sn.ZeroOrderBackgroundGenerator(
            seqLength=options.seqLength),
        embedders=[])
    sequenceSet = sn.GenerateSequenceNTimes(embedInBackground, options.numSeqs)
    sn.printSequences(outputFileName_core + ".simdata",
                      sequenceSet,
                      includeFasta=True,
                      includeEmbeddings=True)
Ejemplo n.º 3
0
def do(options):
    if (options.seed is not None):
        import numpy as np
        np.random.seed(options.seed)
        import random
        random.seed(options.seed)

    outputFileName_core = util.addArguments("DensityEmbedding", [
        util.ArgumentToAdd(options.prefix, "prefix"),
        util.BooleanArgument(options.bestHit, "bestHit"),
        util.ArrArgument(options.motifNames, "motifs"),
        util.ArgumentToAdd(options.min_motifs, "min"),
        util.ArgumentToAdd(options.max_motifs, "max"),
        util.ArgumentToAdd(options.mean_motifs, "mean"),
        util.FloatArgument(options.zero_prob, "zeroProb"),
        util.ArgumentToAdd(options.seqLength, "seqLength"),
        util.ArgumentToAdd(options.numSeqs, "numSeqs")
    ])

    loadedMotifs = synthetic.LoadedEncodeMotifs(options.pathToMotifs,
                                                pseudocountProb=0.001)
    Constructor = synthetic.BestHitPwmFromLoadedMotifs if options.bestHit else synthetic.PwmSamplerFromLoadedMotifs
    embedInBackground = synthetic.EmbedInABackground(
        backgroundGenerator=synthetic.ZeroOrderBackgroundGenerator(
            seqLength=options.seqLength),
        embedders=[
            synthetic.RepeatedEmbedder(
                synthetic.SubstringEmbedder(
                    synthetic.ReverseComplementWrapper(
                        substringGenerator=Constructor(
                            loadedMotifs=loadedMotifs, motifName=motifName),
                        reverseComplementProb=options.rc_prob),
                    positionGenerator=synthetic.UniformPositionGenerator()),
                quantityGenerator=synthetic.ZeroInflater(
                    synthetic.MinMaxWrapper(synthetic.PoissonQuantityGenerator(
                        options.mean_motifs),
                                            theMax=options.max_motifs,
                                            theMin=options.min_motifs),
                    zeroProb=options.zero_prob))
            for motifName in options.motifNames
        ])
    sequenceSet = synthetic.GenerateSequenceNTimes(embedInBackground,
                                                   options.numSeqs)
    synthetic.printSequences(outputFileName_core + ".simdata",
                             sequenceSet,
                             includeFasta=True,
                             includeEmbeddings=True,
                             prefix=options.prefix)
def variableSpacingGrammar(options):
    pc = 0.001
    pathToMotifs = options.pathToMotifs
    loadedMotifs = synthetic.LoadedEncodeMotifs(pathToMotifs, pseudocountProb=pc)
    motifName1 = options.motifName1
    motifName2 = options.motifName2
    seqLength = options.seqLength
    numSeq = options.numSeq
    outputFileName = ("variableSpacingGrammarSimulation_"
                      +"prefix-"+options.prefix
                      +"_motif1-"+motifName1+"_motif2-"+motifName2
                      +"_seqLength"+str(seqLength)+"_numSeq"
                      +str(numSeq)+".simdata")

    kwargs={'loadedMotifs':loadedMotifs}
    theClass=synthetic.PwmSamplerFromLoadedMotifs
    motif1Generator=theClass(motifName=motifName1,**kwargs)
    motif2Generator=theClass(motifName=motifName2,**kwargs)
    motif1Embedder=synthetic.SubstringEmbedder(substringGenerator=motif1Generator)
    motif2Embedder=synthetic.SubstringEmbedder(substringGenerator=motif2Generator)

    embedders = []
    separationGenerator=synthetic.MinMaxWrapper(
        synthetic.PoissonQuantityGenerator(options.meanSpacing),
        theMin=options.minSpacing,
        theMax=options.maxSpacing) 
    embedders.append(synthetic.EmbeddableEmbedder(
                        embeddableGenerator=synthetic.PairEmbeddableGenerator(
                            embeddableGenerator1=motif1Generator
                            ,embeddableGenerator2=motif2Generator
                            ,separationGenerator=separationGenerator
                        )
                    ))

    embedInBackground = synthetic.EmbedInABackground(
        backgroundGenerator=synthetic.ZeroOrderBackgroundGenerator(seqLength) 
        , embedders=embedders
    )

    sequenceSet = synthetic.GenerateSequenceNTimes(embedInBackground, numSeq)
    synthetic.printSequences(outputFileName, sequenceSet,
                             includeFasta=True, includeEmbeddings=True,
                             prefix=options.prefix)
Ejemplo n.º 5
0
def motifGrammarSimulation(options):
    pc = 0.001
    bestHit = options.bestHit
    pathToMotifs = options.pathToMotifs
    loadedMotifs = synthetic.LoadedEncodeMotifs(pathToMotifs,
                                                pseudocountProb=pc)
    motifName1 = options.motifName1
    motifName2 = options.motifName2
    seqLength = options.seqLength
    numSeq = options.numSeq
    generationSetting = options.generationSetting
    outputFileName = "motifGrammarSimulation_" + generationSetting + (
        "_bestHit" if bestHit else "")
    if (generationSetting is not generationSettings.singleMotif2):
        outputFileName += "_motif1-" + motifName1
    if (generationSetting is not generationSettings.singleMotif1):
        outputFileName += "_motif2-" + motifName2
    outputFileName += "_seqLength" + str(seqLength) + "_numSeq" + str(
        numSeq) + ".simdata"

    kwargs = {'loadedMotifs': loadedMotifs}
    if (bestHit):
        theClass = synthetic.BestHitPwmFromLoadedMotifs
    else:
        theClass = synthetic.PwmSamplerFromLoadedMotifs

    motif1Generator = theClass(motifName=motifName1, **kwargs)
    motif2Generator = theClass(motifName=motifName2, **kwargs)
    motif1Embedder = synthetic.SubstringEmbedder(
        substringGenerator=motif1Generator)
    motif2Embedder = synthetic.SubstringEmbedder(
        substringGenerator=motif2Generator)

    embedders = []
    if (generationSetting == generationSettings.allBackground
            or generationSetting == generationSettings.twoMotifs):
        namePrefix = "synthNeg"
    else:
        namePrefix = "synthPos"
    if (generationSetting == generationSettings.allBackground):
        pass
    elif (generationSetting in [
            generationSettings.singleMotif1, generationSettings.twoMotifs,
            generationSettings.singleMotif2
    ]):
        if (generationSetting == generationSettings.singleMotif1):
            embedders.append(motif1Embedder)
        elif (generationSetting == generationSettings.singleMotif2):
            embedders.append(motif2Embedder)
        elif (generationSetting == generationSettings.twoMotifs):
            embedders.append(motif1Embedder)
            embedders.append(motif2Embedder)
        else:
            raise RuntimeError("Unsupported generation setting: " +
                               generationSetting)
    elif (generationSetting in [
            generationSettings.twoMotifsFixedSpacing,
            generationSettings.twoMotifsVariableSpacing
    ]):
        if (generationSetting == generationSettings.twoMotifsFixedSpacing):
            separationGenerator = synthetic.FixedQuantityGenerator(
                options.fixedSpacingOrMinSpacing)
        elif (generationSetting == generationSettings.twoMotifsVariableSpacing
              ):
            separationGenerator = synthetic.UniformIntegerGenerator(
                minVal=options.fixedSpacingOrMinSpacing,
                maxVal=options.maxSpacing)
        else:
            raise RuntimeError("unsupported generationSetting:" +
                               generationSetting)
        embedders.append(
            synthetic.EmbeddableEmbedder(
                embeddableGenerator=synthetic.PairEmbeddableGenerator(
                    embeddableGenerator1=motif1Generator,
                    embeddableGenerator2=motif2Generator,
                    separationGenerator=separationGenerator)))
    else:
        raise RuntimeError("unsupported generationSetting:" +
                           generationSetting)

    embedInBackground = synthetic.EmbedInABackground(
        backgroundGenerator=synthetic.ZeroOrderBackgroundGenerator(seqLength),
        embedders=embedders,
        namePrefix=namePrefix)

    sequenceSet = synthetic.GenerateSequenceNTimes(embedInBackground, numSeq)
    synthetic.printSequences(outputFileName,
                             sequenceSet,
                             includeFasta=True,
                             includeEmbeddings=True)