def break_contigs(contigs_file, sam_file, output_file):
    contigs = list(SeqIO.parse(open(contigs_file, "rU"), "fasta"))
    # sam = sam_parser.SamChain([sam_parser.Samfile(sam_file) for sam_file in sam_files])
    sam = sam_parser.Samfile(sam_file)
    # last two arguments: K, min0 stretch length to break
    coverage_breaker = break_by_coverage.ContigBreaker(contigs, sam, 100, 50)
    coverage_breaker.OutputBroken(output_file)
Beispiel #2
0
def moleculo_postprocessing(contigs_file, output_file, sam_files, log):
    log.info("===== Starting postprocessing based on read alignment")
    log.info("Processing scaffolds from " + contigs_file)
    log.info("Using read alignments to break and filter scaffolds")
    contigs = list(SeqIO.parse(open(contigs_file, "rU"), "fasta"))
    sam = sam_parser.SamChain([sam_parser.Samfile(sam_file) for sam_file in sam_files])
    generate_quality.GenerateQuality(contigs, sam)
    pattern_filter = moleculo_filter_contigs.PatternContigFilter(contigs, sam, pattern, rc_pattern)
    length_filter = moleculo_filter_contigs.ContigLengthFilter(1500)
    coverage_breaker = break_by_coverage.ContigBreaker(contigs, sam, 100, 50)
    pattern_breaker = break_by_coverage.PatternBreaker(pattern, rc_pattern, 150)
    n_breaker = break_by_coverage.NBreaker(3)
    result = SplitAndFilter(contigs, coverage_breaker, length_filter, n_breaker, pattern_breaker, pattern_filter)
    OutputResults(output_file, "fasta", result)
    OutputResults(output_file, "fastq", result)
    log.info("===== Postprocessing finished. Results can be found in " + output_file + ".fastq")