def publocus(args): """ %prog publocus idsfile > idsfiles.publocus Given a list of model identifiers, convert each into a GenBank approved pub_locus. Example output: Medtr1g007020.1 MTR_1g007020 Medtr1g007030.1 MTR_1g007030 Medtr1g007060.1 MTR_1g007060A Medtr1g007060.2 MTR_1g007060B """ from jcvi.utils.cbook import AutoVivification p = OptionParser(publocus.__doc__) p.add_option("--locus_tag", default="MTR_", help="GenBank locus tag [default: %default]") opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) locus_tag = opts.locus_tag index = AutoVivification() idsfile, = args fp = must_open(idsfile) for row in fp: locus, chrom, sep, rank, iso = atg_name( row, retval="locus,chr,sep,rank,iso") if None in (locus, chrom, sep, rank, iso): logging.warning( "{0} is not a valid gene model identifier".format(row)) continue if locus not in index.keys(): pub_locus = gene_name(chrom, rank, prefix=locus_tag, sep=sep) index[locus]['pub_locus'] = pub_locus index[locus]['isos'] = set() index[locus]['isos'].add(int(iso)) for locus in index: pub_locus = index[locus]['pub_locus'] Index[locus]['isos'] = sorted(index[locus]['isos']) if len(index[locus]['isos']) > 1: new = [chr(n + 64) for n in index[locus]['isos'] if n < 27] for i, ni in zip(index[locus]['isos'], new): print "\t".join(x for x in ("{0}.{1}".format(locus, i), \ "{0}{1}".format(pub_locus, ni))) else: print "\t".join(x for x in ("{0}.{1}".format(locus, index[locus]['isos'][0]), \ pub_locus))
def publocus(args): """ %prog publocus idsfile > idsfiles.publocus Given a list of model identifiers, convert each into a GenBank approved pub_locus. Example output: Medtr1g007020.1 MTR_1g007020 Medtr1g007030.1 MTR_1g007030 Medtr1g007060.1 MTR_1g007060A Medtr1g007060.2 MTR_1g007060B """ from jcvi.utils.cbook import AutoVivification p = OptionParser(publocus.__doc__) p.add_option("--locus_tag", default="MTR_", help="GenBank locus tag [default: %default]") opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) locus_tag = opts.locus_tag index = AutoVivification() idsfile, = args fp = must_open(idsfile) for row in fp: locus, chrom, sep, rank, iso = atg_name(row, retval="locus,chr,sep,rank,iso") if None in (locus, chrom, sep, rank, iso): logging.warning("{0} is not a valid gene model identifier".format(row)) continue if locus not in index.keys(): pub_locus = gene_name(chrom, rank, prefix=locus_tag, sep=sep) index[locus]['pub_locus'] = pub_locus index[locus]['isos'] = set() index[locus]['isos'].add(int(iso)) for locus in index: pub_locus = index[locus]['pub_locus'] Index[locus]['isos'] = sorted(index[locus]['isos']) if len(index[locus]['isos']) > 1: new = [chr(n+64) for n in index[locus]['isos'] if n < 27] for i, ni in zip(index[locus]['isos'], new): print "\t".join(x for x in ("{0}.{1}".format(locus, i), \ "{0}{1}".format(pub_locus, ni))) else: print "\t".join(x for x in ("{0}.{1}".format(locus, index[locus]['isos'][0]), \ pub_locus))
def group(args): """ %prog group tabfile > tabfile.grouped Given a tab-delimited file, either group all elements within the file or group the elements in the value column(s) based on the key (groupby) column For example, convert this | into this --------------------------------------- a 2 3 4 | a,2,3,4,5,6 a 5 6 | b,7,8 b 7 8 | c,9,10,11 c 9 | c 10 11 | If grouping by a particular column, convert this | into this: --------------------------------------------- a 2 3 4 | a 2,5 3,6 4 a 5 6 | b 7 8 b 7 8 | c 9,10 11 c 9 | c 10 11 | By default, it uniqifies all the grouped elements """ from jcvi.utils.cbook import AutoVivification from jcvi.utils.grouper import Grouper p = OptionParser(group.__doc__) p.set_sep() p.add_option("--groupby", default=None, type="int", help="Default column to groupby") p.add_option("--groupsep", default=",", help="Separator to join the grouped elements") p.add_option( "--nouniq", default=False, action="store_true", help="Do not uniqify the grouped elements", ) opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) (tabfile, ) = args sep = opts.sep groupby = opts.groupby groupsep = opts.groupsep cols = [] grouper = AutoVivification() if groupby is not None else Grouper() fp = must_open(tabfile) for row in fp: row = row.rstrip() atoms = row.split(sep) if groupby is not None: if len(cols) < len(atoms): cols = [x for x in range(len(atoms))] if groupby not in cols: logging.error( "groupby col index `{0}` is out of range".format(groupby)) sys.exit() key = atoms[groupby] for col in cols: if col == groupby: continue if not grouper[key][col]: grouper[key][col] = [] if opts.nouniq else set() if col < len(atoms): if groupsep in atoms[col]: for atom in atoms[col].split(groupsep): if opts.nouniq: grouper[key][col].append(atom) else: grouper[key][col].add(atom) else: if opts.nouniq: grouper[key][col].append(atoms[col]) else: grouper[key][col].add(atoms[col]) else: grouper.join(*atoms) for key in grouper: if groupby is not None: line = [] for col in cols: if col == groupby: line.append(key) elif col in grouper[key].keys(): line.append(groupsep.join(grouper[key][col])) else: line.append("na") print(sep.join(line)) else: print(groupsep.join(key))
""" Connect to databases (Sybase, MySQL and PostgreSQL database backends) """ import os.path as op import sys import logging import re from jcvi.formats.base import must_open from jcvi.apps.base import OptionParser, ActionDispatcher, sh, getusername from jcvi.utils.cbook import AutoVivification # set up valid database connection params valid_dbconn = AutoVivification() for (dbconn, port, module, host) in zip(("Sybase", "MySQL", "PostgreSQL", "Oracle"), \ (2025, 3306, 5432, 1521), \ ("Sybase", "MySQLdb", "psycopg2", "cx_Oracle"), \ ("SYBPROD", "mysql-lan-dev", "pgsql-lan-dev", "DBNAME.tacc.utexas.edu")): valid_dbconn[dbconn]['port'] = port valid_dbconn[dbconn]['module'] = module valid_dbconn[dbconn]['hostname'] = host def db_defaults(connector='Sybase'): """ JCVI legacy Sybase, MySQL and PostgreSQL database connection defaults """ return valid_dbconn[connector]['hostname'], "access", "access"
def group(args): """ %prog group tabfile > tabfile.grouped Given a tab-delimited file, either group all elements within the file or group the elements in the value column(s) based on the key (groupby) column For example, convert this | into this --------------------------------------- a 2 3 4 | a,2,3,4,5,6 a 5 6 | b,7,8 b 7 8 | c,9,10,11 c 9 | c 10 11 | If grouping by a particular column, convert this | into this: --------------------------------------------- a 2 3 4 | a 2,5 3,6 4 a 5 6 | b 7 8 b 7 8 | c 9,10 11 c 9 | c 10 11 | By default, it uniqifies all the grouped elements """ from jcvi.utils.cbook import AutoVivification from jcvi.utils.grouper import Grouper p = OptionParser(group.__doc__) p.set_sep() p.add_option("--groupby", default=None, type='int', help="Default column to groupby [default: %default]") p.add_option("--groupsep", default=',', help="Separator to join the grouped elements [default: `%default`]") p.add_option("--nouniq", default=False, action="store_true", help="Do not uniqify the grouped elements [default: %default]") opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) tabfile, = args sep = opts.sep groupby = opts.groupby groupsep = opts.groupsep cols = [] grouper = AutoVivification() if groupby is not None else Grouper() fp = must_open(tabfile) for row in fp: row = row.rstrip() atoms = row.split(sep) if groupby is not None: if len(cols) < len(atoms): cols = [x for x in xrange(len(atoms))] if groupby not in cols: logging.error("groupby col index `{0}` is out of range".format(groupby)) sys.exit() key = atoms[groupby] for col in cols: if col == groupby: continue if not grouper[key][col]: grouper[key][col] = [] if opts.nouniq else set() if col < len(atoms): if groupsep in atoms[col]: for atom in atoms[col].split(groupsep): if opts.nouniq: grouper[key][col].append(atom) else: grouper[key][col].add(atom) else: if opts.nouniq: grouper[key][col].append(atoms[col]) else: grouper[key][col].add(atoms[col]) else: grouper.join(*atoms) for key in grouper: if groupby is not None: line = [] for col in cols: if col == groupby: line.append(key) elif col in grouper[key].keys(): line.append(groupsep.join(grouper[key][col])) else: line.append("na") print sep.join(line) else: print groupsep.join(key)
def reindex(args): """ %prog reindex gffile pep.fasta ref.pep.fasta Reindex the splice isoforms (mRNA) in input GFF file, preferably generated after PASA annotation update In the input GFF file, there can be several types of mRNA within a locus: * CDS matches reference, UTR extended, inherits reference mRNA ID * CDS (slightly) different from reference, inherits reference mRNA ID * Novel isoform added by PASA, have IDs like "LOCUS.1.1", "LOCUS.1.2" * Multiple mRNA collapsed due to shared structure, have IDs like "LOCUS.1-LOCUS.1.1" In the case of multiple mRNA which have inherited the same reference mRNA ID, break ties by comparing the new protein with the reference protein using EMBOSS `needle` to decide which mRNA retains ID and which is assigned a new ID. All mRNA identifiers should follow the AGI naming conventions. When reindexing the isoform identifiers, order mRNA based on: * decreasing transcript length * decreasing support from multiple input datasets used to run pasa.consolidate() """ from jcvi.formats.gff import make_index from jcvi.formats.fasta import Fasta from jcvi.apps.emboss import needle from jcvi.formats.base import FileShredder from tempfile import mkstemp p = OptionParser(reindex.__doc__) p.add_option("--scores", type="str", \ help="read from existing EMBOSS `needle` scores file") p.set_outfile() opts, args = p.parse_args(args) if len(args) != 3: sys.exit(not p.print_help()) gffile, pep, refpep, = args gffdb = make_index(gffile) reffasta = Fasta(refpep) if not opts.scores: fh, pairsfile = mkstemp(prefix='pairs', suffix=".txt", dir=".") fw = must_open(pairsfile, "w") conflict, novel = AutoVivification(), {} for gene in gffdb.features_of_type('gene', order_by=('seqid', 'start')): geneid = atg_name(gene.id, retval='locus') novel[geneid] = [] updated_mrna, hybrid_mrna = [], [] for mrna in gffdb.children(gene, featuretype='mRNA', order_by=('seqid', 'start')): if re.match(atg_name_pat, mrna.id) is not None and "_" not in mrna.id: pf, mrnaid = parse_prefix(mrna.id) mlen = gffdb.children_bp(mrna, child_featuretype='exon') if "-" in mrna.id: hybrid_mrna.append((mrna.id, mrna.start, mlen, len(pf))) else: updated_mrna.append((mrna.id, mrna.start, mlen, len(pf))) for mrna in sorted(updated_mrna, key=lambda k:(k[1], -k[2], -k[3])): pf, mrnaid = parse_prefix(mrna[0]) mstart, mlen = mrna[1], mrna[2] iso = atg_name(mrnaid, retval='iso') newiso = "{0}{1}".format(iso, re.sub(atg_name_pat, "", mrnaid)) if iso == newiso: if iso not in conflict[geneid]: conflict[geneid][iso] = [] conflict[geneid][iso].append((mrna[0], iso, newiso, \ mstart, mlen, len(pf))) else: novel[geneid].append((mrna[0], None, newiso, \ mstart, mlen, len(pf))) for mrna in sorted(hybrid_mrna, key=lambda k:(k[1], -k[2], -k[3])): pf, mrnaid = parse_prefix(mrna[0]) mstart, mlen = mrna[1], mrna[2] _iso, _newiso = [], [] for id in sorted(mrnaid.split("-")): a = atg_name(id, retval='iso') b = "{0}{1}".format(a, re.sub(atg_name_pat, "", id)) _iso.append(a) _newiso.append(b) _novel = None newiso = "-".join(str(x) for x in set(_newiso)) for iso, niso in zip(_iso, _newiso): if iso == niso: if iso not in conflict[geneid]: conflict[geneid][iso] = \ [(mrna[0], iso, newiso, mstart, mlen, len(pf))] _novel = None break _novel = True if _novel is not None: novel[geneid].append((mrna[0], None, newiso, \ mstart, mlen, len(pf))) if not opts.scores: for isoform in sorted(conflict[geneid]): mrnaid = "{0}.{1}".format(geneid, isoform) if mrnaid in reffasta.keys(): for mrna in conflict[geneid][isoform]: print >> fw, "\t".join(str(x) for x in (mrnaid, mrna[0])) scoresfile = None if not opts.scores: fw.close() needle([pairsfile, refpep, pep]) FileShredder([pairsfile], verbose=False) scoresfile = "{0}.scores".format(pairsfile.rsplit(".")[0]) else: scoresfile = opts.scores scores = read_scores(scoresfile, sort=True, trimsuffix=False) primary = {} for geneid in conflict: primary[geneid] = [] for iso in sorted(conflict[geneid]): conflict[geneid][iso].sort(key=lambda k:(k[3], -k[4], -k[5])) _iso = "{0}.{1}".format(geneid, iso) if _iso not in scores: novel[geneid].extend(conflict[geneid][iso]) continue top_score = scores[_iso][0][1] result = next((i for i, v in enumerate(conflict[geneid][iso]) if v[0] == top_score), None) if result is not None: primary[geneid].append(conflict[geneid][iso][result]) del conflict[geneid][iso][result] if geneid not in novel: novel[geneid] = [] novel[geneid].extend(conflict[geneid][iso]) novel[geneid].sort(key=lambda k:(k[3], -k[4], -k[5])) fw = must_open(opts.outfile, 'w') for gene in gffdb.features_of_type('gene', order_by=('seqid', 'start')): geneid = gene.id print >> fw, gene seen = [] if geneid in primary: all_mrna = primary[geneid] all_mrna.extend(novel[geneid]) for iso, mrna in enumerate(all_mrna): _mrna = gffdb[mrna[0]] _iso = mrna[1] if mrna not in novel[geneid]: seen.append(int(mrna[1])) else: mseen = 0 if len(seen) == 0 else max(seen) _iso = (mseen + iso + 1) - len(seen) _mrnaid = "{0}.{1}".format(geneid, _iso) _mrna['ID'], _mrna['_old_ID'] = [_mrnaid], [_mrna.id] print >> fw, _mrna for c in gffdb.children(_mrna, order_by=('start')): c['Parent'] = [_mrnaid] print >> fw, c else: for feat in gffdb.children(gene, order_by=('seqid', 'start')): print >> fw, feat fw.close()
def consolidate(args): """ %prog consolidate gffile1 gffile2 ... > consolidated.out Given 2 or more gff files generated by pasa annotation comparison, iterate through each locus (shared locus name or overlapping CDS) and identify same/different isoforms (shared splicing structure) across the input datasets. If `slop` is enabled, consolidation will collapse any variation in terminal UTR lengths, keeping the longest as representative. """ from jcvi.formats.base import longest_unique_prefix from jcvi.formats.gff import make_index, match_subfeats from jcvi.utils.cbook import AutoVivification from jcvi.utils.grouper import Grouper from itertools import combinations, product supported_modes = ["name", "coords"] p = OptionParser(consolidate.__doc__) p.add_option("--slop", default=False, action="store_true", help="allow minor variation in terminal 5'/3' UTR" + \ " start/stop position [default: %default]") p.add_option("--inferUTR", default=False, action="store_true", help="infer presence of UTRs from exon coordinates") p.add_option("--mode", default="name", choices=supported_modes, help="method used to determine overlapping loci") p.add_option("--summary", default=False, action="store_true", help="Generate summary table of consolidation process") p.add_option("--clusters", default=False, action="store_true", help="Generate table of cluster members after consolidation") p.set_outfile() opts, args = p.parse_args(args) slop = opts.slop inferUTR = opts.inferUTR mode = opts.mode if len(args) < 2: sys.exit(not p.print_help()) gffdbx = {} for gffile in args: dbn = longest_unique_prefix(gffile, args) gffdbx[dbn] = make_index(gffile) loci = Grouper() for dbn in gffdbx: odbns = [odbn for odbn in gffdbx if dbn != odbn] for gene in gffdbx[dbn].features_of_type('gene', order_by=('seqid', 'start')): if mode == "name": loci.join(gene.id, (gene.id, dbn)) else: if (gene.id, dbn) not in loci: loci.join((gene.id, dbn)) gene_cds = list(gffdbx[dbn].children(gene, \ featuretype='CDS', order_by=('start'))) gene_cds_start, gene_cds_stop = gene_cds[0].start, \ gene_cds[-1].stop for odbn in odbns: for ogene_cds in gffdbx[odbn].region(seqid=gene.seqid, \ start=gene_cds_start, end=gene_cds_stop, \ strand=gene.strand, featuretype='CDS'): for ogene in gffdbx[odbn].parents(ogene_cds, featuretype='gene'): loci.join((gene.id, dbn), (ogene.id, odbn)) gfeats = {} mrna = AutoVivification() for i, locus in enumerate(loci): gene = "gene_{0:0{pad}}".format(i, pad=6) \ if mode == "coords" else None for elem in locus: if type(elem) == tuple: _gene, dbn = elem if gene is None: gene = _gene g = gffdbx[dbn][_gene] if gene not in gfeats: gfeats[gene] = g gfeats[gene].attributes['ID'] = [gene] else: if g.start < gfeats[gene].start: gfeats[gene].start = g.start if g.stop > gfeats[gene].stop: gfeats[gene].stop = g.stop c = list(gffdbx[dbn].children(_gene, featuretype='mRNA', order_by='start')) if len(c) > 0: mrna[gene][dbn] = c fw = must_open(opts.outfile, "w") print("##gff-version 3", file=fw) seen = {} if opts.summary: summaryfile = "{0}.summary.txt".format(opts.outfile.rsplit(".")[0]) sfw = must_open(summaryfile, "w") summary = ["id"] summary.extend(gffdbx.keys()) print("\t".join(str(x) for x in summary), file=sfw) if opts.clusters: clustersfile = "{0}.clusters.txt".format(opts.outfile.rsplit(".")[0]) cfw = must_open(clustersfile, "w") clusters = ["id", "dbns", "members", "trlens"] print("\t".join(str(x) for x in clusters), file=cfw) for gene in mrna: g = Grouper() dbns = list(combinations(mrna[gene], 2)) if len(dbns) > 0: for dbn1, dbn2 in dbns: dbx1, dbx2 = gffdbx[dbn1], gffdbx[dbn2] for mrna1, mrna2 in product(mrna[gene][dbn1], mrna[gene][dbn2]): mrna1s, mrna2s = mrna1.stop - mrna1.start + 1, \ mrna2.stop - mrna2.start + 1 g.join((dbn1, mrna1.id, mrna1s)) g.join((dbn2, mrna2.id, mrna2s)) if match_subfeats(mrna1, mrna2, dbx1, dbx2, featuretype='CDS'): res = [] ftypes = ['exon'] if inferUTR else ['five_prime_UTR', 'three_prime_UTR'] for ftype in ftypes: res.append(match_subfeats(mrna1, mrna2, dbx1, dbx2, featuretype=ftype, slop=slop)) if all(r == True for r in res): g.join((dbn1, mrna1.id, mrna1s), (dbn2, mrna2.id, mrna2s)) else: for dbn1 in mrna[gene]: for mrna1 in mrna[gene][dbn1]: g.join((dbn1, mrna1.id, mrna1.stop - mrna1.start + 1)) print(gfeats[gene], file=fw) for group in g: group.sort(key=lambda x: x[2], reverse=True) dbs, mrnas = [el[0] for el in group], [el[1] for el in group] d, m = dbs[0], mrnas[0] dbid, _mrnaid = "|".join(str(x) for x in set(dbs)), [] for x in mrnas: if x not in _mrnaid: _mrnaid.append(x) mrnaid = "{0}|{1}".format(dbid, "-".join(_mrnaid)) if mrnaid not in seen: seen[mrnaid] = 0 else: seen[mrnaid] += 1 mrnaid = "{0}-{1}".format(mrnaid, seen[mrnaid]) _mrna = gffdbx[d][m] _mrna.attributes['ID'] = [mrnaid] _mrna.attributes['Parent'] = [gene] children = gffdbx[d].children(m, order_by='start') print(_mrna, file=fw) for child in children: child.attributes['ID'] = ["{0}|{1}".format(dbid, child.id)] child.attributes['Parent'] = [mrnaid] print(child, file=fw) if opts.summary: summary = [mrnaid] summary.extend(['Y' if db in set(dbs) else 'N' for db in gffdbx]) print("\t".join(str(x) for x in summary), file=sfw) if opts.clusters: clusters = [mrnaid] clusters.append(",".join(str(el[0]) for el in group)) clusters.append(",".join(str(el[1]) for el in group)) clusters.append(",".join(str(el[2]) for el in group)) print("\t".join(str(x) for x in clusters), file=cfw) fw.close() if opts.summary: sfw.close() if opts.clusters: cfw.close()
def consolidate(args): """ %prog consolidate gffile1 gffile2 ... > consolidated.out Given 2 or more gff files generated by pasa annotation comparison, iterate through every gene locus and identify all cases of same and different isoforms across the different input datasets. """ from jcvi.formats.base import longest_unique_prefix from jcvi.formats.gff import make_index from jcvi.utils.cbook import AutoVivification from jcvi.utils.grouper import Grouper from itertools import combinations, product p = OptionParser(consolidate.__doc__) p.add_option("--slop", default=False, action="store_true", help="allow minor variation in terminal 5'/3' UTR" + \ " start/stop position [default: %default]") p.set_outfile() opts, args = p.parse_args(args) slop = opts.slop if len(args) < 2: sys.exit(not p.print_help()) gffdbx = {} gene_coords = {} mrna = AutoVivification() for gffile in args: dbn = longest_unique_prefix(gffile, args) gffdbx[dbn] = make_index(gffile) for gene in gffdbx[dbn].features_of_type('gene', order_by=('seqid', 'start')): if gene.id not in gene_coords: gene_coords[gene.id] = [] gene_coords[gene.id].extend([gene.start, gene.stop]) c = list(gffdbx[dbn].children(gene, featuretype='mRNA', order_by='start')) if len(c) > 0: mrna[gene.id][dbn] = c fw = must_open(opts.outfile, "w") print >> fw, "##gff-version 3" summary = ["id"] summary.extend(gffdbx.keys()) print >> sys.stderr, "\t".join(str(x) for x in summary) for gene in mrna: g = Grouper() dbns = list(combinations(mrna[gene], 2)) if len(dbns) > 0: for dbn1, dbn2 in dbns: for mrna1, mrna2 in product(mrna[gene][dbn1], mrna[gene][dbn2]): g.join((dbn1, mrna1.id)) g.join((dbn2, mrna2.id)) fUTR, tUTR = None, None if match_subfeats(mrna1, mrna2, gffdbx[dbn1], gffdbx[dbn2]): fUTR = match_subfeats(mrna1, mrna2, gffdbx[dbn1], gffdbx[dbn2], \ featuretype='five_prime_UTR', slop=slop) tUTR = match_subfeats(mrna1, mrna2, gffdbx[dbn1], gffdbx[dbn2], \ featuretype='three_prime_UTR', slop=slop) if fUTR and tUTR: g.join((dbn1, mrna1.id), (dbn2, mrna2.id)) else: for dbn1 in mrna[gene]: for mrna1 in mrna[gene][dbn1]: g.join((dbn1, mrna1.id)) dbn = mrna[gene].keys()[0] gene_coords[gene].sort() _gene = gffdbx[dbn][gene] _gene.start, _gene.stop = gene_coords[gene][0], gene_coords[gene][-1] print >> fw, _gene logging.debug(list(g)) for group in g: dbs, mrnas = [el[0] for el in group], [el[1] for el in group] d, m = dbs[0], mrnas[0] if slop: mlen = 0 for D, M in zip(dbs, mrnas): _mrna = gffdbx[D][M] _mlen = (_mrna.stop - _mrna.start) + 1 if _mlen > mlen: d, m, mlen = D, M, _mlen dbid, _mrnaid = "".join(str(x) for x in set(dbs)), [] _mrnaid = [x for x in mrnas if x not in _mrnaid] mrnaid = "{0}:{1}".format(dbid, "-".join(_mrnaid)) _mrna = gffdbx[d][m] _mrna.attributes['ID'] = [mrnaid] children = gffdbx[d].children(m, order_by='start') print >> fw, _mrna for child in children: child.attributes['ID'] = ["{0}:{1}".format(dbid, child.id)] child.attributes['Parent'] = [mrnaid] print >> fw, child summary = [mrnaid] summary.extend(['Y' if db in set(dbs) else 'N' for db in gffdbx]) print >> sys.stderr, "\t".join(str(x) for x in summary) fw.close()