def save_predotar(): genomes = ppr.get_genomes() data = [] vals = [ u"none", u"mitochondrial", u"plastid", u"er", u"elsewhere", u"possibly mitochondrial", u"possibly plastid", u"possibly er", u"possibly elsewhere", ] for g in genomes: pprs = ppr.load_records(g) if len(pprs) < 50: continue row = [0] * len(vals) total = float(len(pprs)) for p in pprs: pred = p.annotations["predotar"] if pred not in vals: raise ValueError("didn't expect {}".format(pred)) else: row[vals.index(pred)] += 1 row = [float(r) / total for r in row] data.append([short_name(g)] + row) data.sort(key=lambda d: d[1]) utils.write_data(["genome"] + vals, data, "output/ppr_predotar.dat")
def annotate_all(genome_name='Arabidopsis_thaliana',plastid_name='NC_000932'): pprs = [p for p in PPR.load_records(genome_name) if (len(p.features) >= 23 and p.annotations.get('localization','').upper() == 'C')] plastid = utils.load_plastid(plastid_name) plastid.features = [f for f in plastid.features if f.type.lower() == 'gene'] annotate_binding_domains(pprs, plastid) ofile = os.path.join(utils.OutDir, "Binding/{}--{}.gb".format(genome_name, plastid_name)) SeqIO.write(plastid, ofile, 'gb')
def save_predotar(): genomes = ppr.get_genomes() data = [] vals = [ u'none', u'mitochondrial', u'plastid', u'er', u'elsewhere', u'possibly mitochondrial', u'possibly plastid', u'possibly er', u'possibly elsewhere', ] for g in genomes: pprs = ppr.load_records(g) if len(pprs) < 50: continue row = [ 0, ] * len(vals) total = float(len(pprs)) for p in pprs: pred = p.annotations['predotar'] if pred not in vals: raise ValueError("didn't expect {}".format(pred)) else: row[vals.index(pred)] += 1 row = [float(r) / total for r in row] data.append([ short_name(g), ] + row) data.sort(key=lambda d: d[1]) utils.write_data([ "genome", ] + vals, data, "output/ppr_predotar.dat")