Beispiel #1
0
def read_simdata_file(simdata_file, one_hot_encode=False, ids_to_load=None):
    ids = []
    sequences = []
    embeddings = []
    labels = []
    if (ids_to_load is not None):
        ids_to_load = set(ids_to_load)
    def action(inp, line_number):
        if (line_number > 1):
            if (ids_to_load is None or (inp[0] in ids_to_load)):
                ids.append(inp[0]) 
                sequences.append(inp[1])
                embeddings.append(getEmbeddingsFromString(inp[2]))
                labels.append([int(x) for x in inp[3:]])
    util.perform_action_on_each_line_of_file(
        file_handle=util.get_file_handle(simdata_file),
        action=action,
        transformation=util.default_tab_seppd)
    return util.enum(
            ids=ids,
            sequences=sequences,
            embeddings=embeddings,
            labels=np.array(labels))
Beispiel #2
0
#!/usr/bin/env python
import os
import sys
import simdna
import simdna.util as util
import simdna.synthetic as synthetic
import simdna.pwm as pwm

generationSettings = util.enum(
    allBackground="allBackground",
    singleMotif1="singleMotif1"  #embeds first motif
    ,
    singleMotif2="singleMotif2"  #embeds second motif
    ,
    twoMotifs="twoMotifs"  #embeds one of both motifs
    ,
    twoMotifsFixedSpacing=
    "twoMotifsFixedSpacing"  #embeds both motifs with a fixed spacing
    ,
    twoMotifsVariableSpacing=
    "twoMotifsVariableSpacing"  #embeds both motifs with a variable spacing
)


def motifGrammarSimulation(options):
    pc = 0.001
    bestHit = options.bestHit
    pathToMotifs = options.pathToMotifs
    loadedMotifs = synthetic.LoadedEncodeMotifs(pathToMotifs,
                                                pseudocountProb=pc)
    motifName1 = options.motifName1