Beispiel #1
0
def printAlignments(sam_handler, reference_handler, reads_handler):
    print "Loading reference"
    cc = ContigStorage(add_rc=False).loadFromFasta(reference_handler, False)
    print "Loading query"
    reads = ContigStorage().loadFromFasta(reads_handler, False)
    print "Loading result"
    res = []
    for rec in sam_parser.Samfile(sam_handler):
        if rec.query_name in reads.items and cc[rec.tname] is not None:
            al = AlignmentPiece.FromSamRecord(reads[rec.query_name], cc[rec.tname], rec)
            if al is None:
                print rec.query_name, rec.tname
                continue
            if al.seg_to.contig not in cc:
                al = al.rc
            res.append(al)
    print "Printing result", len(res)
    res = sorted(res, key = lambda al: al.seg_to.left)
#    res = sorted(res, key = lambda al: len(al))[::-1]
    up = 0
    down = 0
    for al in res:
        print al
        print list(al.splitRead())
        s1, s2 = al.asMatchingStrings()
        up += s1.count("-")
        down += s2.count("-")
        s = []
        if len(list(al.splitRead())) > 1:
            nums = []
            for al1 in al.splitRead():
                nums.append(al1.seg_from.left)
                nums.append(al1.seg_from.right - 1)
            cur_num = 0
            cur = al.seg_from.left

            for c in s1:
                if cur == nums[cur_num] and c != "-":
                    if cur_num % 2 == 0:
                        s.append("[")
                    else:
                        s.append("]")
                    cur_num += 1
                else:
                    if cur_num % 2 == 0:
                        s.append("-")
                    else:
                        s.append("+")
                if c != "-":
                    cur += 1
            print "".join(s)
        print s1
        print s2
    print up, down
Beispiel #2
0
def ConstructSubreferenceFromSam(sam_files):
    #todo: make online
    #todo: use config
    recs = []
    for sam_file in sam_files:
        sam = sam_parser.Samfile(sam_file)
        for rec in sam:
            if rec.pos != -1:
                recs.append(Record(rec))
    recs.sort()
    filtered_recs = CollectParts(recs, 0, 2, 500)
    subreferences = CollectParts(filtered_recs, 20000, 0, 7000)
    return filtered_recs, subreferences
Beispiel #3
0
def moleculo_postprocessing(contigs_file, output_file, sam_files, log):
    log.info("===== Starting postprocessing based on read alignment")
    log.info("Processing scaffolds from " + contigs_file)
    log.info("Using read alignments to break and filter scaffolds")
    contigs = list(SeqIO.parse(open(contigs_file, "rU"), "fasta"))
    sam = sam_parser.SamChain([sam_parser.Samfile(sam_file) for sam_file in sam_files])
    generate_quality.GenerateQuality(contigs, sam)
    pattern_filter = moleculo_filter_contigs.PatternContigFilter(contigs, sam, pattern, rc_pattern)
    length_filter = moleculo_filter_contigs.ContigLengthFilter(1500)
    coverage_breaker = break_by_coverage.ContigBreaker(contigs, sam, 100, 50)
    pattern_breaker = break_by_coverage.PatternBreaker(pattern, rc_pattern, 150)
    n_breaker = break_by_coverage.NBreaker(3)
    result = SplitAndFilter(contigs, coverage_breaker, length_filter, n_breaker, pattern_breaker, pattern_filter)
    OutputResults(output_file, "fasta", result)
    OutputResults(output_file, "fastq", result)
    log.info("===== Postprocessing finished. Results can be found in " + output_file + ".fastq")
Beispiel #4
0
 def align(self, reads, reference, mode):
     # type: (Iterable[NamedSequence], Iterable[Contig], str) -> sam_parser.Samfile
     reference = list(reference)
     dir, new_files, same = self.dir_distributor.fillNextDir([(reference, "contigs.fasta"), (list(reads), "reads.fasta")])
     contigs_file = new_files[0]
     reads_file = new_files[1]
     alignment_dir = os.path.join(dir, "alignment")
     alignment_file = os.path.join(dir, "alignment.sam")
     basic.ensure_dir_existance(dir)
     basic.ensure_dir_existance(alignment_dir)
     if same and not params.clean and os.path.exists(alignment_file):
         sys.stdout.log(common.log_params.LogPriority.alignment_files, "Alignment reused:", alignment_file)
         pass
     else:
         if os.path.isfile(alignment_file):
             os.remove(alignment_file)
         self.align_files(contigs_file, [reads_file], self.threads, params.technology, mode, alignment_file)
     return sam_parser.Samfile(open(alignment_file, "r"))
contigs_file = os.path.join(sys.argv[4], "contigs.fasta")
contigs_handler = open(contigs_file, "w")
for rec in SeqIO.parse_fasta(open(sys.argv[2], "r")):
    if rec.id in names:
        if names[rec.id]:
            rec.seq = RC(rec.seq)
            rec.id += "RC"
        SeqIO.write(rec, contigs_handler, "fasta")
contigs_handler.close()

alignment_dir = os.path.join(sys.argv[4], "alignment")
if not os.path.exists(alignment_dir):
    os.makedirs(alignment_dir)

alignment = os.path.join(sys.argv[4], "alignment.sam")
make_alignment(contigs_file, [sys.argv[3]], 8, alignment_dir, "pacbio",
               alignment)

aligned = set()
for rec in sam_parser.Samfile(open(alignment, "r")):
    if rec.is_unmapped:
        continue
    aligned.add(rec.query_name)

output = open(os.path.join(dir, "filtered_reads.fasta"), "w")
for rec in SeqIO.parse_fasta(open(sys.argv[3], "r")):
    if rec.id.split()[0] in aligned:
        SeqIO.write(rec, output, "fasta")
output.close()