else: spliceAcceptorCase = 'SpliceAcceptorIsNotAG' agCase = None if "AG" == sequence[1:3] or "AG" == sequence[7:9]: agCase = 'FoundAGOnSides' elif sequence[1:3][0] == 'A' or sequence[1:3][1] == 'G' or sequence[ 7:9][0] == 'A' or sequence[7:9][1] == 'G': agCase = 'FoundEitherAOrGOnSides' if agCase: nagAnnotationFile.write("\t".join([ records[0]['chromosome'], 'mayur', 'NAGNAG', records[0] ['geneID'], records[0]['geneName'], records[0]['geneType'], lastTranscriptID, records[0]['transcriptType'], direction, str(startPosition), str(endPosition), sequence, lowerPosition, higherPosition, spliceAcceptorCase, agCase ]) + "\n") if __name__ == '__main__': arguments = gencode.getArguments() nagAnnotationFile = open( os.path.join(arguments[2], "nagnag_cases_test_ignore.gtf"), "w") gencode.run(arguments[0], arguments[1], None, transcriptCallback) nagAnnotationFile.close()
fields = line.split() RefSeqID = fields[0] position_to_aTIS = int(fields[4]) annotation = fields[5] RLTM_RCHX = fields[6] codon = fields[12] EnsemblID = RefSeq_Ensembl_dict.get(RefSeqID) if EnsemblID: uORF_number = Ensembl_count_dict.get(EnsemblID) lee_TIS_dict.setdefault((EnsemblID, uORF_number), (RefSeqID, position_to_aTIS, annotation, RLTM_RCHX, codon, uORF_number)) Ensembl_count_dict[EnsemblID] = uORF_number + 1 leeToFritschFile = open("lee_uORF2.bed", "w") leeToFritschFile.write('#chrom\tchromStart\tchromEnd\tname\tRLTM-RCHX\tstrand\n') # header line tooFarleeuorfsFile = open("lee_toofar_uORFs.bed", "w") tooFarleeuorfsFile.write("#These are the bad uorfs that don't match too far from corresponding codons in the GENCODE transcripts\n" + "#chrom\tchromStart\tchromEnd\tname\tRLTM-RCHX\tstrand\tdistance-from-nearest-GENCODE-codon\n") gencode.run(sys.argv[1], sys.argv[2], None, transcriptCallback) leeToFritschFile.close() tooFarleeuorfsFile.close()
#Coder: Mayur Pawashe #Program: Writes out five primes #Dependencies: gencode.py import os import gencode global fivePrimesSequenceFile global fivePrimesAnnotationFile def newChromosomeCallback(newChromosome): fivePrimesSequenceFile.write(">" + newChromosome + "\n") def transcriptCallback(records, sequenceData, lastTranscriptID, fivePrimeUTRs, fivePrimeContent, cdss, stopCodon, direction): if fivePrimeContent != "": fivePrimesSequenceFile.write(">" + str(records[0]['geneID']) + "|" + str(lastTranscriptID) + "|" + str(records[0]["geneName"]) + "\n") fivePrimesSequenceFile.write(fivePrimeContent + "\n") for fivePrime in fivePrimeUTRs: fivePrimesAnnotationFile.write(fivePrime['line']) if __name__ == '__main__': arguments = gencode.getArguments() fivePrimesSequenceFile = open(os.path.join(arguments[2], "five_primes.fa"), "w") fivePrimesAnnotationFile = open(os.path.join(arguments[2], "five_primes.gtf"), "w") gencode.run(arguments[0], arguments[1], newChromosomeCallback, transcriptCallback) fivePrimesSequenceFile.close() fivePrimesAnnotationFile.close()