Esempio n. 1
def post_gmap_processing(db_name='cogent', gff_filename='in.trimmed.fa.gff', output_prefix='cogent2', seqrecs=[]):
    good_for = defaultdict(lambda: [])
    reader = GFF.gmapGFFReader(gff_filename)
    for r in reader:
        assert r.chr.startswith('path')  # chr should be path0, path1, etc
        if r.coverage >= 98.: good_for[r.seqid].append(int(r.chr[4:]))

    touse = []
    if len(good_for) == 0:
        log.warning("[BUG] good_for in post_gmap_processing is empty. Probably from cycles. CHECK!")
        N = max(max(v) for v in good_for.itervalues())+1
        prob = make_into_lp_problem(good_for.items(), N)
        for v in prob.variables():
            log.debug("{0} = {1}".format(, v.varValue))
            if v.varValue == 1: touse.append(int(

    with open(output_prefix + '.fa', 'w') as f:
        for r in SeqIO.parse(open(db_name + '.fa'),'fasta'):
            if int([4:]) in touse:
                f.write(">{0}\n{1}\n".format(, r.seq))
        # if there are some sequences that didn't map (possibly from cycles)
        # then just use THEMSELVES
        fake_path_i = max(touse)+1 if len(touse) >= 1 else 0
        for r in seqrecs:
            if not in good_for:
                log.warning("[BUG] {0} is not fully mapped to cogent in GMAP. \
                Likely cycle issues. Use itself in output.".format(
                f.write(">path{0}\n{1}\n".format(fake_path_i, r.seq))
                fake_path_i += 1
Esempio n. 2
def post_gmap_processing(db_name='cogent', gff_filename='in.trimmed.fa.gff', output_prefix='cogent2', seqrecs=[]):
    good_for = defaultdict(lambda: [])
    reader = GFF.gmapGFFReader(gff_filename)
    for r in reader:
        assert r.chr.startswith('path')  # chr should be path0, path1, etc
        if r.coverage >= 98.: good_for[r.seqid].append(int(r.chr[4:]))

    touse = []
    if len(good_for) == 0:
        log.warning("[BUG] good_for in post_gmap_processing is empty. Probably from cycles. CHECK!")
        N = max(max(v) for v in good_for.itervalues())+1
        prob = make_into_lp_problem(good_for.items(), N)
        for v in prob.variables():
            log.debug("{0} = {1}".format(, v.varValue))
            if v.varValue == 1: touse.append(int(

    with open(output_prefix + '.fa', 'w') as f:
        for r in SeqIO.parse(open(db_name + '.fa'),'fasta'):
            if int([4:]) in touse:
                f.write(">{0}\n{1}\n".format(, r.seq))
        # if there are some sequences that didn't map (possibly from cycles)
        # then just use THEMSELVES
        fake_path_i = max(touse)+1 if len(touse) >= 1 else 0
        for r in seqrecs:
            if not in good_for:
                log.warning("[BUG] {0} is not fully mapped to cogent in GMAP. \
                Likely cycle issues. Use itself in output.".format(
                f.write(">path{0}\n{1}\n".format(fake_path_i, r.seq))
                fake_path_i += 1
Esempio n. 3
#!/usr/bin/env python
import os, sys
from Cogent import GFF

input = sys.argv[1]
output = input[:input.rfind('.')] + '.collapsed.gff'

f = open(output, 'w')
reader = GFF.gmapGFFReader(input)
for r in reader: GFF.write_collapseGFF_format(f, r)
Esempio n. 4
#!/usr/bin/env python
import os, sys
from Cogent import GFF

input = sys.argv[1]
output = input[:input.rfind('.')] + '.collapsed.gff'

f = open(output, 'w')
reader = GFF.gmapGFFReader(input)
for r in reader:
    GFF.write_collapseGFF_format(f, r)