def main(): GTFgen = GFF.parse(GTFfile) GFFlist = makeGFFlist(GTFgen) ucscIDlist, transcriptdict = build_stopcodon_table(GFFlist, inculde_noncanon_start, include_noncanon_stop) write_utr_stopcodon_csvfile(ucscIDlist, transcriptdict)
def main(gff_file, fasta_file): out_file = "%s.gbk" % os.path.splitext(gff_file)[0] fasta_input = SeqIO.to_dict(SeqIO.parse(fasta_file, "fasta", generic_dna)) gff_iter = GFF.parse(gff_file, fasta_input) record = next(_check_gff(_fix_ncbi_id(gff_iter))) ## hack to fix bug where DNAAlphabet turns into SingleLetterAlphabet() in the parser.. record.seq.alphabet = generic_dna SeqIO.write(record, out_file, "genbank")
def writegene_wf2(shift5, shift3, riboshift, densityfile, feature, gfffile, utrgfffilename, outfile): GFFgen = GFF.parse(gfffile) counts1 = readcountsf(densityfile + "_plus_") counts2 = readcountsf(densityfile + "_minus_") counts = [counts1, counts2] idtable = makeidtable2(GFFgen) GFFgen = GFF.parse(gfffile) GFFlist = makeGFFlist(GFFgen) goodgenes = 2 print feature if utrgfffilename == "-1": utrtable = {} else: utrtable = utrgffgen = GFF.parse(utrgfffilename) utrtable = genometools.makeutrtable(utrgffgen) bp1 = shift5 chromosome = idtable[feature][2] featurenum = idtable[feature][1] longfeature = idtable[feature][0].id if utrtable.has_key(longfeature): bp2 = utrtable[longfeature][1] - utrtable[longfeature][0] + shift3 else: bp2 = 0 + shift3 bp = [bp1, bp2, riboshift] retval = givegene(chromosome, featurenum, GFFlist, counts, bp, goodgenes) if retval[0] == -1: print "Not a good gene..." t = [] t.append(["pos", "rpm"]) i = -shift5 while i < len(retval[0]) - shift5: newline = [i, retval[0][i + shift5]] t.append(newline) i += 1 fcsv = open(outfile + "_" + feature + ".csv", "w") writer = csv.writer(fcsv) writer.writerows(t) fcsv.close()
def main(): ### not sure what I was using the for here... # # mRNAdict = pandas.read_csv(mRNAseqsInfile, index_col=0, skiprows=1).T.to_dict() # mRNAdict = pandas.read_csv(mRNAseqsInfile, index_col=0, skiprows=1) # print mRNAdict.head() # # print mRNAdict[''] ### The actual funciton: GTFgen = GFF.parse(GTFfile) GFFlist = makeGFFlist(GTFgen) ucscIDlist, transcriptdict = get_Prot_sequence(GFFlist) write_utr_stopcodon_csvfile(ucscIDlist, transcriptdict)
def parse_GFF(utrGFF): """ Very thin wrapper that tries to do makeutrtable(GFF.parse(utrGFF)) but checks for IOError and returns an empty dictionary instead. """ try: return makeutrtable(GFF.parse(utrGFF)) except IOError: print "Warning! " + utrGFF + " couldn't be found." if raw_input("'c' to continue with empty dictionary\n") == 'c': return {} else: quit()
def makeGFFlist(GFFname): """ Tool for loading the entire yeast genome into memory From seqtools Called st.makeGFFlist(GFF.parse(codingGFF)) Returns dictionary GFFlist[chr.id] = chr for chr in GFFgen Called for main coding GFF but not utr5GFF and utr3GFF -- generalize? Will this be affected it GFF is changed? Probably not, no parsing here, just storing """ GFFlist = {} for chr in GFF.parse(GFFname): GFFlist[chr.id] = chr return GFFlist
def main(): GTFgen = GFF.parse(GTFfile) GFFlist = makeGFFlist(GTFgen) ucscIDlist, transcriptdict = build_utr3_stop_positions(GFFlist) write_utr_stopcodon_csvfile(ucscIDlist, transcriptdict)
parser.add_argument('--threshold', default=1, help='thresholding read counts') parser.add_argument('--totreads', default=-1, help='reads for normalization') parser.add_argument('--outputdata', help='output data filepath') parser.add_argument('--bamfileoutput', help='output bam file') parser.add_argument args = parser.parse_args() import ast riboshiftdict = ast.literal_eval( args.riboshiftdict) #convert string into dictionary print riboshiftdict print "parsing gff..." GTFgen = GFF.parse(args.GTFfile) print "loading bam file..." bamfile = pysam.AlignmentFile(args.bamfileinput, "rb") print "loading genome..." genome = twobitreader.TwoBitFile(args.twobitfile) print "writing bam out file..." bamfileout = pysam.AlignmentFile(args.bamfileoutput, "wb", template=bamfile) rfpdense = densebuilder(bamfile, GTFgen, genome, riboshiftdict, int(args.threshold), args.totreads, args.outputdata, args.assignment, bamfileout) rfpdense.builddense()
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--GFFfile', help='coding GFF or GTF file') parser.add_argument('--bamfile1', help='bamfile from first genome mapping') parser.add_argument('--bamfile2', help='bamfile from polyA removal') parser.add_argument('--densitypathandfilestring', help='density file output path') parser.add_argument('--wigpathandfile', help='wig file output path') parser.add_argument('--totreads', default=-1, help='total reads for normalization') parser.add_argument('--assignment', help='5 or 3 end', required=True) parser.add_argument('--riboshiftdict', help='dictionary of riboshifts') parser.add_argument('--bamfileoutput', help='output bam file') parser.add_argument('--softclipped', help='number of soft clipped allowed') args = parser.parse_args() import ast riboshiftdict = ast.literal_eval(args.riboshiftdict) GFFgen = GFF.parse(args.GFFfile) bamgen0 = pysam.AlignmentFile(args.bamfile1, "rb") bamfileout = pysam.AlignmentFile(args.bamfileoutput, "wb", template=bamgen0) rfpdense = densebuilder(GFFgen, args.bamfile1, args.bamfile2, args.densitypathandfilestring, args.wigpathandfile, args.totreads, args.assignment, riboshiftdict, bamfileout, args.softclipped) rfpdense.setdense()
help = ".gff file") parser.add_argument("--out", metavar = "STRING", type = str, help = "Output directory", default = ".") args = parser.parse_args() filBAM = args.bam name = os.path.splitext(os.path.basename(filBAM))[0] filGFF = args.gff # Open the gff file. gffHandle = GFF.parse(open(filGFF)) # Open the bam file bamHandle = pysam.AlignmentFile(filBAM, "rb") ### Functions for read directions ### def Forward(read): if read.is_reverse: return False else: return True def Reverse(read): if read.is_reverse: return True else: return False ### CALCULATE COVERAGE AND ANNOTATION FOR EVERY LOCUS_TAG ### coverages=list()
def main(): GTFgen = GFF.parse(GTFfile) GFFlist = makeGFFlist(GTFgen) ucscIDlist, transcriptdict = get_mRNA_sequence(GFFlist) write_utr_stopcodon_csvfile(ucscIDlist, transcriptdict)
def main(): GTFgen = GFF.parse(GTFfile) GFFlist = makeGFFlist(GTFgen) find_uORFs(GFFlist)