Esempio n. 1
0
def save_predotar():
    genomes = ppr.get_genomes()
    data = []
    vals = [
        u"none",
        u"mitochondrial",
        u"plastid",
        u"er",
        u"elsewhere",
        u"possibly mitochondrial",
        u"possibly plastid",
        u"possibly er",
        u"possibly elsewhere",
    ]
    for g in genomes:
        pprs = ppr.load_records(g)
        if len(pprs) < 50:
            continue

        row = [0] * len(vals)

        total = float(len(pprs))

        for p in pprs:
            pred = p.annotations["predotar"]
            if pred not in vals:
                raise ValueError("didn't expect {}".format(pred))
            else:
                row[vals.index(pred)] += 1
        row = [float(r) / total for r in row]
        data.append([short_name(g)] + row)

    data.sort(key=lambda d: d[1])
    utils.write_data(["genome"] + vals, data, "output/ppr_predotar.dat")
Esempio n. 2
0
def main():
    genomes = ppr.get_genomes()
    hist = []
    numbers = []
    locale = []
    family = []
    for g in genomes:
        print g
        pprs = list(ppr.load_pprs(g))
        numbers.append((short_name(g), ) + get_numbers(pprs))
        hist.append(length_hist(pprs))
        locale.append((short_name(g), ) + get_localization(pprs))
        family.append((short_name(g), ) + get_family(pprs))

    numbers.sort(key=lambda n: n[1])
    family.sort(key=lambda f: sum(f[1:]))
    hist = fmt_hist(hist)

    utils.write_data(('genome', ) + get_numbers_hdr(), numbers,
                     "output/ppr_numbers.dat")
    utils.write_data([
        'length',
    ] + [short_name(g) for g in genomes], hist, "output/ppr_lengths.dat")
    utils.write_data(('genome', ) + get_localization_hdr(), locale,
                     "output/ppr_localization.dat")
    utils.write_data(('genome', ) + get_family_hdr(), family,
                     "output/ppr_families.dat")
Esempio n. 3
0
def save_predotar():
    genomes = ppr.get_genomes()
    data = []
    vals = [
        u'none',
        u'mitochondrial',
        u'plastid',
        u'er',
        u'elsewhere',
        u'possibly mitochondrial',
        u'possibly plastid',
        u'possibly er',
        u'possibly elsewhere',
    ]
    for g in genomes:
        pprs = ppr.load_records(g)
        if len(pprs) < 50:
            continue

        row = [
            0,
        ] * len(vals)

        total = float(len(pprs))

        for p in pprs:
            pred = p.annotations['predotar']
            if pred not in vals:
                raise ValueError("didn't expect {}".format(pred))
            else:
                row[vals.index(pred)] += 1
        row = [float(r) / total for r in row]
        data.append([
            short_name(g),
        ] + row)

    data.sort(key=lambda d: d[1])
    utils.write_data([
        "genome",
    ] + vals, data, "output/ppr_predotar.dat")
Esempio n. 4
0
def main():
    genomes = ppr.get_genomes()
    hist = []
    numbers = []
    locale = []
    family = []
    for g in genomes:
        print g
        pprs = list(ppr.load_pprs(g))
        numbers.append((short_name(g),) + get_numbers(pprs))
        hist.append(length_hist(pprs))
        locale.append((short_name(g),) + get_localization(pprs))
        family.append((short_name(g),) + get_family(pprs))

    numbers.sort(key=lambda n: n[1])
    family.sort(key=lambda f: sum(f[1:]))
    hist = fmt_hist(hist)

    utils.write_data(("genome",) + get_numbers_hdr(), numbers, "output/ppr_numbers.dat")
    utils.write_data(["length"] + [short_name(g) for g in genomes], hist, "output/ppr_lengths.dat")
    utils.write_data(("genome",) + get_localization_hdr(), locale, "output/ppr_localization.dat")
    utils.write_data(("genome",) + get_family_hdr(), family, "output/ppr_families.dat")