totalReadLength = readLength * 2 + insertSize
numGenes = len(genes)
r90 = int(numGenes * .9)
r80 = int(numGenes * .8)
r70 = int(numGenes * .7)
r60 = int(numGenes * .6)
r50 = int(numGenes * .5)
r40 = int(numGenes * .4)
r30 = int(numGenes * .3)
r20 = int(numGenes * .2)
r10 = int(numGenes * .1)

for key in genes:
    thisGeneName = key
    if len(genes[thisGeneName]) > totalReadLength:
        complete = makeRandomTestData.outputFastaSeq(filename + 'E1R100G100.fa', thisGeneName, genes[thisGeneName])
        levelKey = random.randint(0, 2)
        if levelKey == 0:
            level = 10
        elif levelKey == 1:
            level = 100
        elif levelKey == 2:
            level = 1000
        else:
            level = 0

        complete = makeRandomTestData.makeRandomlyPlacedPairedEndReads(genes[thisGeneName], thisGeneName, readLength, insertSize, level, filename + 'E1R100G100RandomReadsPair1.fa', filename + 'E1R100G100RandomReadsPair2.fa', filename + 'E1R100G100RandomReads.txt', 1, 0)

        count += 1

genes2 = makeRandomTestData.inputFastaSeq(infileName)
    x = os.remove(randomTrans + '.fa')
for i in range(0, numTranscripts):
    isoform = random.randint(1, 100)
    if isoform <= isoformProb and len(genes) > 2:
        geneSelected = random.choice(genes.keys())
        geneSelectedParts = geneSelected.split(' ')
        geneSelectedId = ''
        for part in geneSelectedParts:
            geneSelectedId += part
        identifier = 'testgene_' + str(i) + ' Isoform_' + geneSelectedId
        thisGeneName, seq = makeRandomTestData.generateIsoforms(identifier, genes[geneSelected], 'random', 0, 0, 0, min, max)
        genes[thisGeneName] = seq
    else:
        thisGeneName =  'testgene_' + str(i) + ' ' + randomTrans
        genes[thisGeneName] = makeRandomTestData.makeKmerCountData(random.randint(101, 4999), 'default')
    complete = makeRandomTestData.outputFastaSeq(randomTrans + '.fa', thisGeneName, genes[thisGeneName])

infileName = randomTrans + '.fa'
filename = 'DataSet' + name
genes = makeRandomTestData.inputFastaSeq(infileName)
errors  = 1

if makeRandomTestData.checkForFasta(filename + 'E1R100G100.fa'):
    x = os.remove(filename + 'E1R100G100.fa')
if makeRandomTestData.checkForFasta(filename + 'E1R100G100RandomReads.fa'):
    x = os.remove(filename + 'E1R100G100RandomReads.fa')
if makeRandomTestData.checkForFasta(filename + 'E1R100G100RandomReads.txt'):
    x = os.remove(filename + 'E1R100G100RandomReads.txt')
if makeRandomTestData.checkForFasta(filename + 'E1R90G100.fa'):
    x = os.remove(filename + 'E1R90G100.fa')
if makeRandomTestData.checkForFasta(filename + 'E1R80G100.fa'):
filename = 'DataSet' + name
genes = makeRandomTestData.inputFastaSeq(infileName)
errors  = 0

if makeRandomTestData.checkForFasta(filename + 'E1R100G100.fa'):
    x = os.remove(filename + 'E1R100G100.fa')
if makeRandomTestData.checkForFasta(filename + 'E1R100G100RandomReadsPair1.fa'):
    x = os.remove(filename + 'E1R100G100RandomReadsPair1.fa')
if makeRandomTestData.checkForFasta(filename + 'E1R100G100RandomReadsPair2.fa'):
    x = os.remove(filename + 'E1R100G100RandomReadsPair2.fa')
if makeRandomTestData.checkForFasta(filename + 'E1R100G100RandomReads.txt'):
    x = os.remove(filename + 'E1R100G100RandomReads.txt')

count = 0
readLength = 100
insertSize = 400
totalReadLength = readLength * 2 + insertSize
numGenes = len(genes)

for key in genes:
    thisGeneName = key
    if len(genes[thisGeneName]) > totalReadLength:
        complete = makeRandomTestData.outputFastaSeq(filename + 'E1R100G100.fa', thisGeneName, genes[thisGeneName])
        level = 20

        complete = makeRandomTestData.makeRandomlyPlacedPairedEndReads(genes[thisGeneName], thisGeneName, readLength, insertSize, level, filename + 'E1R100G100RandomReadsPair1.fa', filename + 'E1R100G100RandomReadsPair2.fa', filename + 'E1R100G100RandomReads.txt', errors, 0)

        count += 1

print 'Finished'
    x = os.remove(filename + 'E1R100G100.fa')
if makeRandomTestData.checkForFasta(filename + 'E1R100G100RandomReads.fa'):
    x = os.remove(filename + 'E1R100G100RandomReads.fa')
if makeRandomTestData.checkForFasta(filename + 'E1R100G100RandomReads.txt'):
    x = os.remove(filename + 'E1R100G100RandomReads.txt')

count = 0
count2 = 0
readLength = 100
numGenes = len(genes)

for key in genes:
    thisGeneName = key
    if len(genes[thisGeneName]) > readLength:
        count2 += 1
        complete = makeRandomTestData.outputFastaSeq(filename + 'E1R100G100.fa', thisGeneName, genes[thisGeneName])
#        levelKey = random.randint(0, 5)
#        if levelKey == 0:
#            level = 10
#        elif levelKey == 1:
#            level = 100
#        elif levelKey == 2:
#            level = 1000
#        else:
#            level = 0
        level = 20
        coverageReq = float(len(genes[thisGeneName])) / float(readLength) * float(level)
        for i in range(0, int(coverageReq)):
            position = random.randint(0,len(genes[thisGeneName])-readLength)
            read = genes[thisGeneName][position:position + readLength]
            identifier1 = key + 'Count' + str(i) + 'Begin' + str(position) + 'End' + str(position + readLength - 1)
# A script to take ensembl gene ids and get those seqs from a fasta file.
# Original: 2011.11.10
# Original: A. Black P.
# Last modified: 2011.11.10
# Last modified: A. Black P.

# Usage: python extractSeqsFastaById.py input.fasta input.ids output.fasta

import sys, makeRandomTestData

print 'Loading/counting data\n'

count = 0

genes = makeRandomTestData.inputFastaSeq(sys.argv[1])

for newString in open(sys.argv[2], 'r'):
    name = newString.rstrip()
    for key in genes:
        if name in key:
            complete = makeRandomTestData.outputFastaSeq(
                sys.argv[3], key, genes[key])
    count += 1

print count
print 'Finished!'
# A script to take ensembl gene ids and get those seqs from a fasta file.
# Original: 2011.11.10
# Original: A. Black P.
# Last modified: 2011.11.10
# Last modified: A. Black P.

# Usage: python extractSeqsFastaById.py input.fasta input.ids output.fasta

import sys, makeRandomTestData

print 'Loading/counting data\n'

count = 0

genes = makeRandomTestData.inputFastaSeq(sys.argv[1])

for newString in open(sys.argv[2], 'r'):
    name = newString.rstrip()
    for key in genes:
        if name in key:
            complete = makeRandomTestData.outputFastaSeq(sys.argv[3], key, genes[key])
    count += 1

print count
print 'Finished!'