def save_predotar(): genomes = ppr.get_genomes() data = [] vals = [ u"none", u"mitochondrial", u"plastid", u"er", u"elsewhere", u"possibly mitochondrial", u"possibly plastid", u"possibly er", u"possibly elsewhere", ] for g in genomes: pprs = ppr.load_records(g) if len(pprs) < 50: continue row = [0] * len(vals) total = float(len(pprs)) for p in pprs: pred = p.annotations["predotar"] if pred not in vals: raise ValueError("didn't expect {}".format(pred)) else: row[vals.index(pred)] += 1 row = [float(r) / total for r in row] data.append([short_name(g)] + row) data.sort(key=lambda d: d[1]) utils.write_data(["genome"] + vals, data, "output/ppr_predotar.dat")
def main(): genomes = ppr.get_genomes() hist = [] numbers = [] locale = [] family = [] for g in genomes: print g pprs = list(ppr.load_pprs(g)) numbers.append((short_name(g), ) + get_numbers(pprs)) hist.append(length_hist(pprs)) locale.append((short_name(g), ) + get_localization(pprs)) family.append((short_name(g), ) + get_family(pprs)) numbers.sort(key=lambda n: n[1]) family.sort(key=lambda f: sum(f[1:])) hist = fmt_hist(hist) utils.write_data(('genome', ) + get_numbers_hdr(), numbers, "output/ppr_numbers.dat") utils.write_data([ 'length', ] + [short_name(g) for g in genomes], hist, "output/ppr_lengths.dat") utils.write_data(('genome', ) + get_localization_hdr(), locale, "output/ppr_localization.dat") utils.write_data(('genome', ) + get_family_hdr(), family, "output/ppr_families.dat")
def save_predotar(): genomes = ppr.get_genomes() data = [] vals = [ u'none', u'mitochondrial', u'plastid', u'er', u'elsewhere', u'possibly mitochondrial', u'possibly plastid', u'possibly er', u'possibly elsewhere', ] for g in genomes: pprs = ppr.load_records(g) if len(pprs) < 50: continue row = [ 0, ] * len(vals) total = float(len(pprs)) for p in pprs: pred = p.annotations['predotar'] if pred not in vals: raise ValueError("didn't expect {}".format(pred)) else: row[vals.index(pred)] += 1 row = [float(r) / total for r in row] data.append([ short_name(g), ] + row) data.sort(key=lambda d: d[1]) utils.write_data([ "genome", ] + vals, data, "output/ppr_predotar.dat")
def main(): genomes = ppr.get_genomes() hist = [] numbers = [] locale = [] family = [] for g in genomes: print g pprs = list(ppr.load_pprs(g)) numbers.append((short_name(g),) + get_numbers(pprs)) hist.append(length_hist(pprs)) locale.append((short_name(g),) + get_localization(pprs)) family.append((short_name(g),) + get_family(pprs)) numbers.sort(key=lambda n: n[1]) family.sort(key=lambda f: sum(f[1:])) hist = fmt_hist(hist) utils.write_data(("genome",) + get_numbers_hdr(), numbers, "output/ppr_numbers.dat") utils.write_data(["length"] + [short_name(g) for g in genomes], hist, "output/ppr_lengths.dat") utils.write_data(("genome",) + get_localization_hdr(), locale, "output/ppr_localization.dat") utils.write_data(("genome",) + get_family_hdr(), family, "output/ppr_families.dat")