import makeRandomTestData, os, random

genes = {}
name = '245rep3'
infileName = 'Gallus_gallus.WASHUC2.63.cdna.all.fa'
filename = 'DataSet' + name
genes = makeRandomTestData.inputFastaSeq(infileName)
errors  = 1

if makeRandomTestData.checkForFasta(filename + 'E1R100G100.fa'):
    x = os.remove(filename + 'E1R100G100.fa')
if makeRandomTestData.checkForFasta(filename + 'E1R100G100RandomReadsPair1.fa'):
    x = os.remove(filename + 'E1R100G100RandomReadsPair1.fa')
if makeRandomTestData.checkForFasta(filename + 'E1R100G100RandomReadsPair2.fa'):
    x = os.remove(filename + 'E1R100G100RandomReadsPair2.fa')
if makeRandomTestData.checkForFasta(filename + 'E1R100G100RandomReads.txt'):
    x = os.remove(filename + 'E1R100G100RandomReads.txt')
if makeRandomTestData.checkForFasta(filename + 'E1R90G100.fa'):
    x = os.remove(filename + 'E1R90G100.fa')
if makeRandomTestData.checkForFasta(filename + 'E1R80G100.fa'):
    x = os.remove(filename + 'E1R80G100.fa')
if makeRandomTestData.checkForFasta(filename + 'E1R70G100.fa'):
    x = os.remove(filename + 'E1R70G100.fa')
if makeRandomTestData.checkForFasta(filename + 'E1R60G100.fa'):
    x = os.remove(filename + 'E1R60G100.fa')
if makeRandomTestData.checkForFasta(filename + 'E1R50G100.fa'):
    x = os.remove(filename + 'E1R50G100.fa')
if makeRandomTestData.checkForFasta(filename + 'E1R40G100.fa'):
    x = os.remove(filename + 'E1R40G100.fa')
if makeRandomTestData.checkForFasta(filename + 'E1R30G100.fa'):
    x = os.remove(filename + 'E1R30G100.fa')
# A script to take ensembl gene ids and get those seqs from a fasta file.
# Original: 2011.11.10
# Original: A. Black P.
# Last modified: 2011.11.10
# Last modified: A. Black P.

# Usage: python extractSeqsFastaById.py input.fasta input.ids output.fasta

import sys, makeRandomTestData

print 'Loading/counting data\n'

count = 0

genes = makeRandomTestData.inputFastaSeq(sys.argv[1])

for newString in open(sys.argv[2], 'r'):
    name = newString.rstrip()
    for key in genes:
        if name in key:
            complete = makeRandomTestData.outputFastaSeq(
                sys.argv[3], key, genes[key])
    count += 1

print count
print 'Finished!'
# A script to take the simulated reads for a gene and determine the actual coverage of the gene in a random + coverage simulation
# Date Created: 2011.11.15
# Author: A. Black P.

# Usage: python getSeqCoverageReal.py inputGene.fa inputReads.fa output.txt

import makeRandomTestData, os, random, sys, aabpPyLib


infileGeneName = sys.argv[1]
infileReadsName = sys.argv[2]
outfile = open(sys.argv[3], 'w')
genes = makeRandomTestData.inputFastaSeq(infileGeneName)
geneId = genes.keys()[0]
print geneId, len(genes[geneId])
geneLength = len(genes[geneId])
Coverage = [0] * geneLength
reads = makeRandomTestData.inputFastaSeq(infileReadsName)
for key in reads.keys():
    positionCut = key.split('Begin')
    parts = positionCut[1].split('End')
    print parts
    for i in range(int(parts[0]), int(parts[1])):
        Coverage[i] += 1

for i in range(0, geneLength):
    outfile.write(str(Coverage[i]) + '\n')

median = aabpPyLib.findMedian(Coverage)
print median
# A script to take ensembl gene ids and get those seqs from a fasta file.
# Original: 2011.11.10
# Original: A. Black P.
# Last modified: 2011.11.10
# Last modified: A. Black P.

# Usage: python extractSeqsFastaById.py input.fasta input.ids output.fasta

import sys, makeRandomTestData

print 'Loading/counting data\n'

count = 0

genes = makeRandomTestData.inputFastaSeq(sys.argv[1])

for newString in open(sys.argv[2], 'r'):
    name = newString.rstrip()
    for key in genes:
        if name in key:
            complete = makeRandomTestData.outputFastaSeq(sys.argv[3], key, genes[key])
    count += 1

print count
print 'Finished!'
# A script to take a set of transcripts and produce overlapping reads for all transcripts for use in finding transcript families

# Usage: python testTranscripts.py input.fa output.fa readLength

import sys, makeRandomTestData

inputFileName = sys.argv[1]
outputFileName = sys.argv[2]
readLength = int(sys.argv[3])

transcripts = makeRandomTestData.inputFastaSeq(inputFileName)

for i in sorted(transcripts):
    complete = makeRandomTestData.makeAllOverlapReads(readLength, transcripts[i], i, outputFileName)

print 'Finished!'
# A script to take a set of transcripts and produce overlapping reads for all transcripts for use in finding transcript families

# Usage: python testTranscripts.py input.fa output.fa readLength

import sys, makeRandomTestData

inputFileName = sys.argv[1]
outputFileName = sys.argv[2]
readLength = int(sys.argv[3])

transcripts = makeRandomTestData.inputFastaSeq(inputFileName)

for i in sorted(transcripts):
    complete = makeRandomTestData.makeAllOverlapReads(readLength,
                                                      transcripts[i], i,
                                                      outputFileName)

print 'Finished!'