import makeRandomTestData, os, random genes = {} name = '245rep3' infileName = 'Gallus_gallus.WASHUC2.63.cdna.all.fa' filename = 'DataSet' + name genes = makeRandomTestData.inputFastaSeq(infileName) errors = 1 if makeRandomTestData.checkForFasta(filename + 'E1R100G100.fa'): x = os.remove(filename + 'E1R100G100.fa') if makeRandomTestData.checkForFasta(filename + 'E1R100G100RandomReadsPair1.fa'): x = os.remove(filename + 'E1R100G100RandomReadsPair1.fa') if makeRandomTestData.checkForFasta(filename + 'E1R100G100RandomReadsPair2.fa'): x = os.remove(filename + 'E1R100G100RandomReadsPair2.fa') if makeRandomTestData.checkForFasta(filename + 'E1R100G100RandomReads.txt'): x = os.remove(filename + 'E1R100G100RandomReads.txt') if makeRandomTestData.checkForFasta(filename + 'E1R90G100.fa'): x = os.remove(filename + 'E1R90G100.fa') if makeRandomTestData.checkForFasta(filename + 'E1R80G100.fa'): x = os.remove(filename + 'E1R80G100.fa') if makeRandomTestData.checkForFasta(filename + 'E1R70G100.fa'): x = os.remove(filename + 'E1R70G100.fa') if makeRandomTestData.checkForFasta(filename + 'E1R60G100.fa'): x = os.remove(filename + 'E1R60G100.fa') if makeRandomTestData.checkForFasta(filename + 'E1R50G100.fa'): x = os.remove(filename + 'E1R50G100.fa') if makeRandomTestData.checkForFasta(filename + 'E1R40G100.fa'): x = os.remove(filename + 'E1R40G100.fa') if makeRandomTestData.checkForFasta(filename + 'E1R30G100.fa'): x = os.remove(filename + 'E1R30G100.fa')
# A script to take ensembl gene ids and get those seqs from a fasta file. # Original: 2011.11.10 # Original: A. Black P. # Last modified: 2011.11.10 # Last modified: A. Black P. # Usage: python extractSeqsFastaById.py input.fasta input.ids output.fasta import sys, makeRandomTestData print 'Loading/counting data\n' count = 0 genes = makeRandomTestData.inputFastaSeq(sys.argv[1]) for newString in open(sys.argv[2], 'r'): name = newString.rstrip() for key in genes: if name in key: complete = makeRandomTestData.outputFastaSeq( sys.argv[3], key, genes[key]) count += 1 print count print 'Finished!'
# A script to take the simulated reads for a gene and determine the actual coverage of the gene in a random + coverage simulation # Date Created: 2011.11.15 # Author: A. Black P. # Usage: python getSeqCoverageReal.py inputGene.fa inputReads.fa output.txt import makeRandomTestData, os, random, sys, aabpPyLib infileGeneName = sys.argv[1] infileReadsName = sys.argv[2] outfile = open(sys.argv[3], 'w') genes = makeRandomTestData.inputFastaSeq(infileGeneName) geneId = genes.keys()[0] print geneId, len(genes[geneId]) geneLength = len(genes[geneId]) Coverage = [0] * geneLength reads = makeRandomTestData.inputFastaSeq(infileReadsName) for key in reads.keys(): positionCut = key.split('Begin') parts = positionCut[1].split('End') print parts for i in range(int(parts[0]), int(parts[1])): Coverage[i] += 1 for i in range(0, geneLength): outfile.write(str(Coverage[i]) + '\n') median = aabpPyLib.findMedian(Coverage) print median
# A script to take ensembl gene ids and get those seqs from a fasta file. # Original: 2011.11.10 # Original: A. Black P. # Last modified: 2011.11.10 # Last modified: A. Black P. # Usage: python extractSeqsFastaById.py input.fasta input.ids output.fasta import sys, makeRandomTestData print 'Loading/counting data\n' count = 0 genes = makeRandomTestData.inputFastaSeq(sys.argv[1]) for newString in open(sys.argv[2], 'r'): name = newString.rstrip() for key in genes: if name in key: complete = makeRandomTestData.outputFastaSeq(sys.argv[3], key, genes[key]) count += 1 print count print 'Finished!'
# A script to take a set of transcripts and produce overlapping reads for all transcripts for use in finding transcript families # Usage: python testTranscripts.py input.fa output.fa readLength import sys, makeRandomTestData inputFileName = sys.argv[1] outputFileName = sys.argv[2] readLength = int(sys.argv[3]) transcripts = makeRandomTestData.inputFastaSeq(inputFileName) for i in sorted(transcripts): complete = makeRandomTestData.makeAllOverlapReads(readLength, transcripts[i], i, outputFileName) print 'Finished!'