def break_contigs(contigs_file, sam_file, output_file):
    contigs = list(SeqIO.parse(open(contigs_file, "rU"), "fasta"))
    # sam = sam_parser.SamChain([sam_parser.Samfile(sam_file) for sam_file in sam_files])
    sam = sam_parser.Samfile(sam_file)
    # last two arguments: K, min0 stretch length to break
    coverage_breaker = break_by_coverage.ContigBreaker(contigs, sam, 100, 50)
    coverage_breaker.OutputBroken(output_file)
def ConstructSubreferenceFromSam(sam_files):
    #todo: make online
    #todo: use config
    recs = []
    for sam_file in sam_files:
        sam = sam_parser.Samfile(sam_file)
        for rec in sam:
            if rec.pos != -1:
                recs.append(Record(rec))
    recs.sort()
    filtered_recs = CollectParts(recs, 0, 2, 500)
    subreferences = CollectParts(filtered_recs, 20000, 0, 7000)
    return filtered_recs, subreferences
예제 #3
0
def moleculo_postprocessing(contigs_file, output_file, sam_files, log):
    log.info("===== Starting postprocessing based on read alignment")
    log.info("Processing scaffolds from " + contigs_file)
    log.info("Using read alignments to break and filter scaffolds")
    contigs = list(SeqIO.parse(open(contigs_file, "rU"), "fasta"))
    sam = sam_parser.SamChain(
        [sam_parser.Samfile(sam_file) for sam_file in sam_files])
    generate_quality.GenerateQuality(contigs, sam)
    pattern_filter = moleculo_filter_contigs.PatternContigFilter(
        contigs, sam, pattern, rc_pattern)
    length_filter = moleculo_filter_contigs.ContigLengthFilter(1500)
    coverage_breaker = break_by_coverage.ContigBreaker(contigs, sam, 100, 50)
    pattern_breaker = break_by_coverage.PatternBreaker(pattern, rc_pattern,
                                                       150)
    n_breaker = break_by_coverage.NBreaker(3)
    result = SplitAndFilter(contigs, coverage_breaker, length_filter,
                            n_breaker, pattern_breaker, pattern_filter)
    OutputResults(output_file, "fasta", result)
    OutputResults(output_file, "fastq", result)
    log.info("===== Postprocessing finished. Results can be found in " +
             output_file + ".fastq")