def combine_field_from_intervals( infasta, ingff, outfasta, field, filt ): intervals = read_interval( infasta ) fields = ['seqid','source','type','start','end','score','strand','phase','attributes'] template = '\t'.join(['%%(%s)s' % f for f in fields]) + '\n' features = read_gff( ingff, fields ) #features = add_ID( features ) parents = by_key( features, 'ID' ) fff = by_key( features, field ) with outfasta as out: for ff in sorted(fff.keys()): seqs = '' note = '' name = '' for f in parents[ff]: if f['type'] in ['mRNA', 'transcript', 'gene']: if 'Note' in f['atts']: note = urllib.unquote(f['atts']['Note']) elif 'note' in f: note = urllib.unquote(f['atts']['note']) if 'Name' in f['atts'] and f['atts']['Name'] != ff: name = f['atts']['Name'] elif 'name' in f['atts'] and f['atts']['name'] != ff: name = f['atts']['name'] for f in sorted(fff[ff], lambda x,y: cmp(int(x['start']), int(y['start']))): if f['type'] in [filt] and (f['seqid'], f['start'], f['end']) in intervals: if 'name' in f['atts'] and f['atts']['name'] != ff: name = f['atts']['name'] if f['strand'] == '-': seqs = intervals[(f['seqid'], f['start'], f['end'])] + seqs else: seqs += intervals[(f['seqid'], f['start'], f['end'])] if seqs != '': out.write( '>%s %s|%s %s(%s)\n%s' % (name, ff, note, ','.join(['%s:%s' %(f['start'], f['end']) for f in fff[ff]]), f['strand'], seqs ) )
def get_name_from_intervals( infasta, ingff, outfasta, field, filt ): intervals = read_interval( infasta ) fields = ['seqid','source','type','start','end','score','strand','phase','attributes'] template = '\t'.join(['%%(%s)s' % f for f in fields]) + '\n' with outfasta as out: for feature in read_gff( ingff, fields ): if type(feature) is dict and (feature['seqid'], feature['start'], feature['end']) in intervals: if field in feature['atts']: out.write( '>%s %s:%s(%s)\n%s' % (feature['atts'][field], feature['start'], feature['end'], feature['strand'], intervals[(feature['seqid'], feature['start'], feature['end'])])) elif field in feature: out.write( '>%s\n%s' % (feature[field], intervals[(feature['seqid'], feature['start'], feature['end'])]))
def gff3_to_hints( ingff3, outhints ): gff = gff_utils.read_gff( ingff3 ) hints = [] for f in gff: if type(f) is str: hints.append( f ) elif f['type'] == 'CDS': f['attributes'] = 'src=M;pri=100;grp=%s' % f['atts']['Parent'] f['type'] = 'CDSpart' hints.append( f ) gff_utils.write_gff( outhints, hints )