Python ContigCollection.add Examples

Programming Language: Python

Namespace/Package Name: common.sequences

Class/Type: ContigCollection

Method/Function: add

Examples at hotexamples.com: 2

Python ContigCollection.add - 2 examples found. These are the top rated real world Python examples of common.sequences.ContigCollection.add extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ContigCollection(14)

unique(4)

add(2)

addAll(1)

load(1)

loadFromFasta(1)

print_fasta(1)

print_names(1)

Example #1

Show file

File: error_rate_stats.py Project: AntonBankevich/pacbio

def main(reads_file, ref_file, dir, error_rate):
    sys.stderr.write("Reading reference" + "\n")
    ref = sorted(list(SeqIO.parse_fasta(open(ref_file, "r"))),
                 key=lambda rec: len(rec))[-1]
    ref = Contig(ref.seq, ref.id)
    refs = ContigCollection()
    for i in range(0, len(ref) - 500, 500):
        if random.random() > 0.95:
            tmp = list(ref.segment(i, i + 500).Seq())
            for j in range(error_rate * 500 / 100):
                pos = random.randint(0, 499)
                tmp[pos] = basic.rc[tmp[pos]]
            refs.add(
                Contig("".join(tmp),
                       ref.id + "(" + str(i) + "," + str(i + 500) + ")"))
    refs.print_names(sys.stderr)
    sys.stderr.write("Reading reads" + "\n")
    reads = ReadCollection()
    reads.loadFromFasta(open(reads_file, "r"))

    sys.stderr.write("Aligning reads" + "\n")
    basic.ensure_dir_existance(dir)
    aligner = Aligner(DirDistributor(dir))
    aligner.alignReadCollection(reads, refs)
    sys.stderr.write("Analysing alignments" + "\n")
    alignments = []
    for read in reads:
        alignments.extend(read.alignments)
    alignments = filter(lambda al: len(al) > 450, alignments)
    alignments = sorted(alignments,
                        key=lambda al:
                        (al.seg_to.contig.id, al.seg_from.contig.id))
    scorer = Scorer()
    scorer.scores.homo_score = 3
    scorer.scores.ins_score = 5
    scorer.scores.del_score = 5
    cnt = 0
    for contig, iter in itertools.groupby(alignments,
                                          key=lambda al: al.seg_to.contig):
        iter = list(iter)
        sys.stderr.write(str(contig) + " " + str(len(iter)) + "\n")
        if len(iter) < 150:
            for al in iter:
                print scorer.accurateScore(al.matchingSequence(),
                                           params.alignment_correction_radius)
                cnt += 1
                if cnt >= 5000:
                    break
        if cnt >= 5000:
            break

Example #2

Show file

File: initialization.py Project: AntonBankevich/mosaic

def CreateContigCollection(graph_file, contigs_file, min_cov, aligner, polisher, reads, force_unique, all_unique):
    sys.stdout.info("Creating contig collection")
    if force_unique is None and not all_unique:
        graph = SimpleGraph().ReadDot(graph_file)
        graph.FillSeq(contigs_file)
        covs = []
        for e in graph.e.values():
            covs.append((e.len, e.cov))
        tmp_cov = []
        total = sum(l for c,l in covs) / 2
        for l, c in sorted(covs)[::-1]:
            if total < 0:
                break
            tmp_cov.append((l, c))
            total -= l
        avg_cov = float(sum([l * c for l, c in tmp_cov])) / sum(l for l, c in tmp_cov)
        sys.stdout.info("Average coverage determined:", avg_cov)
        nonunique = set()
        for edge in graph.e.values():
            if edge.unique and edge.len < 20000 and len(graph.v[edge.start].out) > 1:
                if edge.cov >= min_cov and (edge.cov < 0.8 * avg_cov or edge.len > 40000):
                    alter = ContigStorage()
                    for e in graph.v[edge.start].out:
                        if e != edge:
                            alter.add(Contig(e.seq, e.id))
                    for al in aligner.localAlign([Contig(edge.seq, edge.id)], alter):#type: AlignmentPiece
                        if al.percentIdentity() > 0.98 and (al.seg_from.left < 100 and al.seg_to.left < 100 and len(al) > min(500, edge.len)):
                            nonunique.add(edge.id)
                            nonunique.add(basic.Reverse(edge.id))
        contigs = ContigCollection()
        for edge in graph.e.values():
            if basic.isCanonocal(edge.id):
                if edge.unique and (edge.len > params.min_isolated_length or len(graph.v[edge.end].out) > 0 or len(graph.v[edge.start].inc) > 0):
                    if edge.cov >= min_cov and (edge.cov < 1.5 * avg_cov or edge.len > 40000):
                        if edge.id in nonunique:
                            sys.stdout.info("Edge removed based on alignment to alternative:", edge.id, edge.cov, edge.len)
                        else:
                            contigs.add(Contig(edge.seq, edge.id))
                    else:
                        sys.stdout.info("Edge removed based on coverage:", edge.id, edge.cov, edge.len)
                elif (edge.len > 100000 and edge.cov < 1.5 * avg_cov) or (edge.len > 40000 and 1.3 * avg_cov > edge.cov > 0.7 * avg_cov):
                    contigs.add(Contig(edge.seq, edge.id))
                    sys.stdout.info("Edge added based on length and coverage:", edge.id, edge.cov, edge.len)

    elif force_unique is not None:
        sys.stdout.info("Using forced unique edge set")
        sys.stdout.trace(force_unique)
        contigs = ContigCollection().loadFromFile(contigs_file).filter(lambda contig: contig.id in force_unique)
    else:
        sys.stdout.info("Considering all contigs unique")
        contigs = ContigCollection().loadFromFile(contigs_file)
    # contigs.loadFromFasta(open(contigs_file, "r"), num_names=True)
    # contigs = contigs.filter(lambda contig: contig.id not in nonunique and len(contig) > params.k + 20)
    sys.stdout.info("Created", len(contigs), "initial contigs")
    if not all_unique or force_unique is not None:
        sys.stdout.info("Polishing contigs")
        polished_contigs = polisher.polishMany(reads, list(contigs.unique()))
        contigs = ContigCollection().addAll(polished_contigs)
    else:
        sys.stdout.info("Skipping contig polishing step since manual unique contig initialization was used")
    return contigs