def translate(word):
    return [{"A":0, "T":1, "G":2, "C":3}[i] for i in word]

def translateRaw(array):
    return "".join([{0:"A", 1:"T", 2:"G", 3:"C"}[i] for i in array])


scoreFolder = sys.argv[1]
upstreamList = sys.argv[2]
fullUpstreamList = sys.argv[3]


if not os.path.exists("temp"):
    os.mkdir("temp")
current = PatternFinder(6, "temp")
current.loadSequences(upstreamList)

full = PatternFinder(6, scoreFolder)
full.loadSequences(fullUpstreamList)

temp = PatternFinder(6, scoreFolder)



def makeTextShuffleControl(filename, minLen=10000):
    cdata = np.array([translate(i.replace("\n", "").replace("\r", "")) for i in open(filename).readlines() if len(i) > 10]).T

    controls = []
    repeat = 1 + minLen / len(cdata[0])
Example #2
0
    [latent, coeff] = scipy.sparse.linalg.eigsh(covM, numPCs)
    if verbose:
        print "Eigenvalues are:", latent
    return (np.transpose(coeff[:, ::-1]), latent[::-1])



def translate(word):
    return [{"A":0, "T":1, "G":2, "C":3}[i] for i in word]

def translateRaw(array):
    return "".join([{0:"A", 1:"T", 2:"G", 3:"C"}[i] for i in array])



a = PatternFinder(6, scoreFolder)
a.loadSequences(sequenceFile)
allSeqs = a.rawSequences

data = pd.read_csv(os.path.join(scoreFolder, "sortedBy/Best10000_sortBy_ScoreNew_8.csv"))

lef = data["Pos l"].values
rig = data["Pos r"].values
mask = (abs(rig - POSITION_RIGHT) < 4) * (abs(lef - POSITION_LEFT) < 4)
data = data[mask]
patLeftBest = data["Patt l"].values[0]
patRightBest = data["Patt r"].values[0]

data = data[:300]
assert len(data) > 90  # check that we have at least 90 unique patterns