def combine_field_from_intervals( infasta, ingff, outfasta, field, filt ):
    intervals = read_interval( infasta )
    fields = ['seqid','source','type','start','end','score','strand','phase','attributes']
    template = '\t'.join(['%%(%s)s' % f for f in fields])  + '\n'
    features = read_gff( ingff, fields )
    #features = add_ID( features )
    parents = by_key( features, 'ID' )
    fff = by_key( features, field )
    with outfasta as out:
        for ff in sorted(fff.keys()):
            seqs = ''
            note = ''
            name = ''
            for f in parents[ff]:
                if f['type'] in ['mRNA', 'transcript', 'gene']:
                    if 'Note' in f['atts']:
                        note = urllib.unquote(f['atts']['Note'])
                    elif 'note' in f:
                        note = urllib.unquote(f['atts']['note'])
                    if 'Name' in f['atts'] and f['atts']['Name'] != ff:
                        name = f['atts']['Name']
                    elif 'name' in f['atts'] and f['atts']['name'] != ff:
                        name = f['atts']['name']
            for f in sorted(fff[ff], lambda x,y: cmp(int(x['start']), int(y['start']))):
                if f['type'] in [filt] and (f['seqid'], f['start'], f['end']) in intervals:
                    if 'name' in f['atts'] and f['atts']['name'] != ff:
                        name = f['atts']['name']
                    if f['strand'] == '-':
                        seqs = intervals[(f['seqid'], f['start'], f['end'])] + seqs
                    else:
                        seqs += intervals[(f['seqid'], f['start'], f['end'])]
            if seqs != '':
                out.write( '>%s %s|%s %s(%s)\n%s' % (name, ff, note,  ','.join(['%s:%s' %(f['start'], f['end']) for f in fff[ff]]), f['strand'], seqs ) )
def get_name_from_intervals( infasta, ingff, outfasta, field, filt ):
    intervals = read_interval( infasta )
    fields = ['seqid','source','type','start','end','score','strand','phase','attributes']
    template = '\t'.join(['%%(%s)s' % f for f in fields])  + '\n'
    with outfasta as out:
        for feature in read_gff( ingff, fields ):
            if type(feature) is dict  and (feature['seqid'], feature['start'], feature['end']) in intervals:
                    if field in feature['atts']:
                        out.write( '>%s %s:%s(%s)\n%s' % (feature['atts'][field], feature['start'], feature['end'], feature['strand'], intervals[(feature['seqid'], feature['start'], feature['end'])]))
                    elif field in feature:
                        out.write( '>%s\n%s' % (feature[field], intervals[(feature['seqid'], feature['start'], feature['end'])]))
Beispiel #3
0
def gff3_to_hints( ingff3, outhints ):
    gff = gff_utils.read_gff( ingff3 )
    hints  = []
    for f in gff:
        if type(f) is str:
            hints.append( f )
        elif f['type'] == 'CDS':
            f['attributes'] = 'src=M;pri=100;grp=%s' % f['atts']['Parent']
            f['type'] = 'CDSpart'
            hints.append( f )
    gff_utils.write_gff( outhints, hints )