Example #1
0
    def _look_for_blast_matches(self, seqrecords, blastdb):
        "it makes the blast and filters the results"
        blasts, blast_fhand = _do_blast_2(blastdb, seqrecords, self.program, params=self.params)
        # print open(blast_fhand.name).read()
        if self.filters is not None:
            blasts = filter_alignments(blasts, config=self.filters)

        indexed_match_parts = {}
        for blast in blasts:
            query = blast["query"]
            for match in blast["matches"]:
                subject = match["subject"]
                if self.elongate_for_global:
                    elongate_match_parts_till_global(
                        match["match_parts"],
                        query_length=query["length"],
                        subject_length=subject["length"],
                        align_completely=SUBJECT,
                    )
                match_parts = match["match_parts"]
                try:
                    indexed_match_parts[query["name"]].extend(match_parts)
                except KeyError:
                    indexed_match_parts[query["name"]] = match_parts

        blast_fhand.close()
        return indexed_match_parts
Example #2
0
    def _look_for_blast_matches(self, seq_fpath, oligos, seqs_type):
        'It looks for the oligos in the given sequence files'
        # we need to keep the blast_fhands, because they're temp files and
        # otherwise they might be removed
        temp_dir = TemporaryDir()
        dbpath = os.path.join(temp_dir.name, os.path.basename(seq_fpath))
        seqio([open(seq_fpath)],
              open(dbpath, 'w'),
              out_format='fasta',
              copy_if_same_format=False)

        blasts, blast_fhand = _do_blast_2(dbpath,
                                          oligos,
                                          params=self.params,
                                          program=self.program,
                                          dbtype=seqs_type)
        if self.filters is not None:
            blasts = filter_alignments(blasts, config=self.filters)

        # Which are the regions covered in each sequence?
        indexed_match_parts = {}
        for blast in blasts:
            oligo = blast['query']
            for match in blast['matches']:
                read = match['subject']
                if self.elongate_for_global:
                    elongate_match_parts_till_global(
                        match['match_parts'],
                        query_length=oligo['length'],
                        subject_length=read['length'],
                        align_completely=QUERY)

                # match_parts = [m['match_parts'] for m in blast['matches']]
                match_parts = match['match_parts']
                try:
                    indexed_match_parts[read['name']].extend(match_parts)
                except KeyError:
                    indexed_match_parts[read['name']] = match_parts

        temp_dir.close()
        blast_fhand.close()
        return indexed_match_parts
Example #3
0
    def _look_for_blast_matches(self, seq_fpath, oligos):
        "It looks for the oligos in the given sequence files"
        # we need to keep the blast_fhands, because they're temp files and
        # otherwise they might be removed
        temp_dir = TemporaryDir()
        dbpath = os.path.join(temp_dir.name, os.path.basename(seq_fpath))
        seqio([open(seq_fpath)], [open(dbpath, "w")], out_format="fasta", copy_if_same_format=False)

        blasts, blast_fhand = _do_blast_2(dbpath, oligos, params=self.params, program=self.program)
        if self.filters is not None:
            blasts = filter_alignments(blasts, config=self.filters)

        # Which are the regions covered in each sequence?
        indexed_match_parts = {}
        one_oligo = True if len(oligos) == 1 else False
        for blast in blasts:
            oligo = blast["query"]
            for match in blast["matches"]:
                read = match["subject"]
                if self.elongate_for_global:
                    elongate_match_parts_till_global(
                        match["match_parts"],
                        query_length=oligo["length"],
                        subject_length=read["length"],
                        align_completely=QUERY,
                    )

                # match_parts = [m['match_parts'] for m in blast['matches']]
                match_parts = match["match_parts"]
                if one_oligo:
                    indexed_match_parts[read["name"]] = match_parts
                else:
                    try:
                        indexed_match_parts[read["name"]].extend(match_parts)
                    except KeyError:
                        indexed_match_parts[read["name"]] = match_parts

        temp_dir.close()
        blast_fhand.close()
        return indexed_match_parts
Example #4
0
    def _look_for_blast_matches(self, seq_fpath, oligos, seqs_type):
        'It looks for the oligos in the given sequence files'
        # we need to keep the blast_fhands, because they're temp files and
        # otherwise they might be removed
        temp_dir = TemporaryDir()
        dbpath = os.path.join(temp_dir.name, os.path.basename(seq_fpath))
        seqio([open(seq_fpath)], open(dbpath, 'w'), out_format='fasta',
              copy_if_same_format=False)

        blasts, blast_fhand = _do_blast_2(dbpath, oligos, params=self.params,
                                          program=self.program,
                                          dbtype=seqs_type)
        if self.filters is not None:
            blasts = filter_alignments(blasts, config=self.filters)

        # Which are the regions covered in each sequence?
        indexed_match_parts = {}
        for blast in blasts:
            oligo = blast['query']
            for match in blast['matches']:
                read = match['subject']
                if self.elongate_for_global:
                    elongate_match_parts_till_global(match['match_parts'],
                                                 query_length=oligo['length'],
                                                 subject_length=read['length'],
                                                 align_completely=QUERY)

                # match_parts = [m['match_parts'] for m in blast['matches']]
                match_parts = match['match_parts']
                try:
                    indexed_match_parts[read['name']].extend(match_parts)
                except KeyError:
                    indexed_match_parts[read['name']] = match_parts

        temp_dir.close()
        blast_fhand.close()
        return indexed_match_parts
Example #5
0
    def test_match_part_elongation(self):
        'The alignments get elongated till they are like global alignments'
        # elongate on the rigth
        match_part = {'query_start': 10, 'query_end': 13,
                      'subject_start': 0, 'subject_end': 3}
        elongate_match_parts_till_global([match_part], subject_length=5,
                                         query_length=20,
                                         align_completely=SUBJECT)
        assert match_part == {'query_start': 10, 'query_end': 14,
                              'subject_start': 0, 'subject_end': 4,
                              'elongated': 1}
        # elongate on the left
        match_part = {'query_start': 3, 'query_end': 5,
                      'subject_start': 3, 'subject_end': 5}
        elongate_match_parts_till_global([match_part], subject_length=7,
                                         query_length=20,
                                         align_completely=SUBJECT)
        assert match_part == {'subject_start': 0, 'query_start': 0,
                              'query_end': 6, 'subject_end': 6, 'elongated': 4}

        # reversed
        match_part = {'query_end': 3, 'query_start': 5,
                      'subject_end': 3, 'subject_start': 5}
        elongate_match_parts_till_global([match_part], subject_length=7,
                                         query_length=20,
                                         align_completely=SUBJECT)
        assert match_part == {'subject_end': 0, 'query_end': 0,
                              'query_start': 6, 'subject_start': 6,
                              'elongated': 4}

        # partial elongation
        match_part = {'query_start': 1, 'query_end': 3,
                      'subject_start': 2, 'subject_end': 4}
        elongate_match_parts_till_global([match_part], subject_length=7,
                                         query_length=5,
                                         align_completely=SUBJECT)
        assert match_part == {'subject_start': 1, 'query_start': 0,
                              'query_end': 4, 'subject_end': 5, 'elongated': 2}

        # No elongation
        match_part = {'query_start': 1, 'query_end': 3,
                      'subject_start': 0, 'subject_end': 2}
        elongate_match_parts_till_global([match_part], subject_length=3,
                                         query_length=30,
                                         align_completely=SUBJECT)
        assert match_part == {'subject_start': 0, 'query_start': 1,
                              'query_end': 3, 'subject_end': 2}

        # the query should be completely aligned
        match_part = {'query_start': 1, 'query_end': 40,
                      'subject_start': 59, 'subject_end': 98}
        elongate_match_parts_till_global([match_part], subject_length=200,
                                         query_length=42,
                                         align_completely=QUERY)
        assert match_part == {'query_start': 0, 'query_end': 41,
                              'subject_start': 58, 'subject_end': 99,
                              'elongated': 2}