def main():
    GTFgen = GFF.parse(GTFfile)
    GFFlist = makeGFFlist(GTFgen)
    ucscIDlist, transcriptdict = build_stopcodon_table(GFFlist,
                                                       inculde_noncanon_start,
                                                       include_noncanon_stop)
    write_utr_stopcodon_csvfile(ucscIDlist, transcriptdict)
Exemple #2
0
def main(gff_file, fasta_file):
    out_file = "%s.gbk" % os.path.splitext(gff_file)[0]
    fasta_input = SeqIO.to_dict(SeqIO.parse(fasta_file, "fasta", generic_dna))
    gff_iter = GFF.parse(gff_file, fasta_input)
    record = next(_check_gff(_fix_ncbi_id(gff_iter)))
    ## hack to fix bug where DNAAlphabet turns into SingleLetterAlphabet() in the parser..
    record.seq.alphabet = generic_dna
    SeqIO.write(record, out_file, "genbank")
Exemple #3
0
def writegene_wf2(shift5, shift3, riboshift, densityfile, feature, gfffile,
                  utrgfffilename, outfile):
    GFFgen = GFF.parse(gfffile)
    counts1 = readcountsf(densityfile + "_plus_")
    counts2 = readcountsf(densityfile + "_minus_")
    counts = [counts1, counts2]
    idtable = makeidtable2(GFFgen)
    GFFgen = GFF.parse(gfffile)
    GFFlist = makeGFFlist(GFFgen)
    goodgenes = 2
    print feature
    if utrgfffilename == "-1": utrtable = {}
    else:
        utrtable = utrgffgen = GFF.parse(utrgfffilename)
        utrtable = genometools.makeutrtable(utrgffgen)
    bp1 = shift5
    chromosome = idtable[feature][2]
    featurenum = idtable[feature][1]
    longfeature = idtable[feature][0].id

    if utrtable.has_key(longfeature):
        bp2 = utrtable[longfeature][1] - utrtable[longfeature][0] + shift3
    else:
        bp2 = 0 + shift3

    bp = [bp1, bp2, riboshift]
    retval = givegene(chromosome, featurenum, GFFlist, counts, bp, goodgenes)
    if retval[0] == -1: print "Not a good gene..."

    t = []
    t.append(["pos", "rpm"])
    i = -shift5
    while i < len(retval[0]) - shift5:
        newline = [i, retval[0][i + shift5]]
        t.append(newline)
        i += 1

    fcsv = open(outfile + "_" + feature + ".csv", "w")
    writer = csv.writer(fcsv)
    writer.writerows(t)
    fcsv.close()
Exemple #4
0
def main():
    ### not sure what I was using the for here...
    # # mRNAdict = pandas.read_csv(mRNAseqsInfile, index_col=0, skiprows=1).T.to_dict()
    # mRNAdict = pandas.read_csv(mRNAseqsInfile, index_col=0, skiprows=1)
    # print mRNAdict.head()
    # # print mRNAdict['']

    ### The actual funciton:
    GTFgen = GFF.parse(GTFfile)
    GFFlist = makeGFFlist(GTFgen)
    ucscIDlist, transcriptdict = get_Prot_sequence(GFFlist)
    write_utr_stopcodon_csvfile(ucscIDlist, transcriptdict)
Exemple #5
0
def parse_GFF(utrGFF):
    """ Very thin wrapper that tries to do makeutrtable(GFF.parse(utrGFF))
        but checks for IOError and returns an empty dictionary instead.
    """
    try:
        return makeutrtable(GFF.parse(utrGFF))
    except IOError:
        print "Warning! " + utrGFF + " couldn't be found."
        if raw_input("'c' to continue with empty dictionary\n") == 'c':
            return {}
        else:
            quit()
Exemple #6
0
def makeGFFlist(GFFname):
    """ Tool for loading the entire yeast genome into memory
    From seqtools
    Called st.makeGFFlist(GFF.parse(codingGFF))
    Returns dictionary GFFlist[chr.id] = chr for chr in GFFgen
    Called for main coding GFF but not utr5GFF and utr3GFF -- generalize?
    Will this be affected it GFF is changed? Probably not, no parsing here, just storing
    """
    GFFlist = {}
    for chr in GFF.parse(GFFname):
        GFFlist[chr.id] = chr
    return GFFlist
def main():
    GTFgen = GFF.parse(GTFfile)
    GFFlist = makeGFFlist(GTFgen)
    ucscIDlist, transcriptdict = build_utr3_stop_positions(GFFlist)
    write_utr_stopcodon_csvfile(ucscIDlist, transcriptdict)
Exemple #8
0
    parser.add_argument('--threshold',
                        default=1,
                        help='thresholding read counts')
    parser.add_argument('--totreads',
                        default=-1,
                        help='reads for normalization')
    parser.add_argument('--outputdata', help='output data filepath')
    parser.add_argument('--bamfileoutput', help='output bam file')
    parser.add_argument
    args = parser.parse_args()

    import ast
    riboshiftdict = ast.literal_eval(
        args.riboshiftdict)  #convert string into dictionary
    print riboshiftdict
    print "parsing gff..."
    GTFgen = GFF.parse(args.GTFfile)
    print "loading bam file..."
    bamfile = pysam.AlignmentFile(args.bamfileinput, "rb")
    print "loading genome..."
    genome = twobitreader.TwoBitFile(args.twobitfile)
    print "writing bam out file..."
    bamfileout = pysam.AlignmentFile(args.bamfileoutput,
                                     "wb",
                                     template=bamfile)

    rfpdense = densebuilder(bamfile, GTFgen, genome, riboshiftdict,
                            int(args.threshold), args.totreads,
                            args.outputdata, args.assignment, bamfileout)
    rfpdense.builddense()
Exemple #9
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--GFFfile', help='coding GFF or GTF file')
    parser.add_argument('--bamfile1', help='bamfile from first genome mapping')
    parser.add_argument('--bamfile2', help='bamfile from polyA removal')
    parser.add_argument('--densitypathandfilestring',
                        help='density file output path')
    parser.add_argument('--wigpathandfile', help='wig file output path')
    parser.add_argument('--totreads',
                        default=-1,
                        help='total reads for normalization')
    parser.add_argument('--assignment', help='5 or 3 end', required=True)
    parser.add_argument('--riboshiftdict', help='dictionary of riboshifts')
    parser.add_argument('--bamfileoutput', help='output bam file')
    parser.add_argument('--softclipped', help='number of soft clipped allowed')
    args = parser.parse_args()

    import ast
    riboshiftdict = ast.literal_eval(args.riboshiftdict)
    GFFgen = GFF.parse(args.GFFfile)
    bamgen0 = pysam.AlignmentFile(args.bamfile1, "rb")
    bamfileout = pysam.AlignmentFile(args.bamfileoutput,
                                     "wb",
                                     template=bamgen0)
    rfpdense = densebuilder(GFFgen, args.bamfile1, args.bamfile2,
                            args.densitypathandfilestring, args.wigpathandfile,
                            args.totreads, args.assignment, riboshiftdict,
                            bamfileout, args.softclipped)
    rfpdense.setdense()
Exemple #10
0
  help    = ".gff file")

parser.add_argument("--out",
  metavar = "STRING",
  type    = str,
  help    = "Output directory",
  default = ".")

args = parser.parse_args()

filBAM = args.bam
name   = os.path.splitext(os.path.basename(filBAM))[0]
filGFF = args.gff

# Open the gff file.
gffHandle = GFF.parse(open(filGFF))
# Open the bam file
bamHandle = pysam.AlignmentFile(filBAM, "rb")

### Functions for read directions ###
def Forward(read):
	if read.is_reverse: return False
	else: return True

def Reverse(read):
	if read.is_reverse: return True
	else: return False

### CALCULATE COVERAGE AND ANNOTATION FOR EVERY LOCUS_TAG ###
coverages=list()
Exemple #11
0
def main():
	GTFgen = GFF.parse(GTFfile)
	GFFlist = makeGFFlist(GTFgen)
	ucscIDlist, transcriptdict = get_mRNA_sequence(GFFlist)
	write_utr_stopcodon_csvfile(ucscIDlist, transcriptdict)
def main():
    GTFgen = GFF.parse(GTFfile)
    GFFlist = makeGFFlist(GTFgen)
    find_uORFs(GFFlist)