예제 #1
0
    def get_fancy_results_dict(self, max_per_query = 10, defline_white_space_mask = None):
        b6 = b6lib.B6Source(self.output)

        input_fasta = u.SequenceSource(self.input)
        target_db = u.SequenceSource(self.target)

        query_counts = {}
        fancy_results_dict = {}

        while next(b6):
            if b6.entry.query_id not in query_counts:
                query_counts[b6.entry.query_id] = 1

            if query_counts[b6.entry.query_id] - 1 == max_per_query:
                continue
            else:
                query_counts[b6.entry.query_id] += 1

            if b6.entry.query_id not in fancy_results_dict:
                fancy_results_dict[b6.entry.query_id] = []

            query_seq = input_fasta.get_seq_by_read_id(b6.entry.query_id).replace('-', '')
            target_seq = target_db.get_seq_by_read_id(b6.entry.subject_id)
            
            if defline_white_space_mask:
                b6.entry = remove_white_space_mask_from_B6_entry(b6.entry, defline_white_space_mask)
            
            # parts that were aligned during the search are being aligned to each other to generate
            # hsp_match data to include into results
            query_aligned, target_aligned = nw_align(query_seq[int(b6.entry.q_start) - 1:int(b6.entry.q_end)],\
                                                         target_seq[int(b6.entry.s_start) - 1:int(b6.entry.s_end)]) 

            query_aligned, target_aligned = query_aligned.upper(), target_aligned.upper()

            coverage = (b6.entry.q_end - (b6.entry.q_start - 1)) * 100.0 / b6.entry.q_len
            hsp_match = ''.join(['|' if query_aligned[i] == target_aligned[i] else ' ' for i in range(0, len(query_aligned))])
            
            entry = copy.deepcopy(b6.entry)
            entry.coverage = coverage
            entry.hsp_query = query_aligned
            entry.hsp_subject = target_aligned
            entry.hsp_match = hsp_match
            
            entry = remove_white_space_mask_from_B6_entry(entry)

            fancy_results_dict[entry.query_id].append(entry)

        return fancy_results_dict
예제 #2
0
    def get_results_dict(self, mismatches = None, gaps = None, min_identity = None, max_identity = None, penalty_for_terminal_gaps = True):
        results_dict = {}
        
        b6 = b6lib.B6Source(self.output)

        ids_with_hits = set()
        while b6.next():
            if b6.entry.query_id == b6.entry.subject_id:
                continue

            if penalty_for_terminal_gaps:
                # following correction is to take secret gaps into consideration.
                # because we are working with reads that are supposed to be almost the
                # same length, we want query and target to be aligned 100%. sometimes it
                # is not the case, and mismatches are being calculated by the aligned
                # part of query or target. for instance if query is this:
                #
                #    ATCGATCG
                #
                # and target is this:
                #
                #   TATCGATCG
                #
                # the alignment discards the T at the beginning and gives 0 mismatches.
                # here we introduce those gaps back:
                additional_gaps = 0
                if b6.entry.q_start != 1 or b6.entry.s_start != 1:
                    additional_gaps += (b6.entry.q_start - 1) if b6.entry.q_start > b6.entry.s_start else (b6.entry.s_start - 1)
                if additional_gaps != b6.entry.q_len or b6.entry.s_end != b6.entry.s_len:
                    additional_gaps += (b6.entry.q_len - b6.entry.q_end) if (b6.entry.q_len - b6.entry.q_end) > (b6.entry.s_len - b6.entry.s_end) else (b6.entry.s_len - b6.entry.s_end)
    
                identity_penalty = additional_gaps * 100.0 / (b6.entry.q_len + additional_gaps)
                
                if identity_penalty:
                    b6.entry.gaps += additional_gaps
                    b6.entry.identity -= identity_penalty
                    
                # done correcting the hit. carry on.

            if max_identity is not None:
                if round(b6.entry.identity, 1) >= round(max_identity, 1):
                    continue

            if min_identity is not None:
                if round(b6.entry.identity, 1) < round(min_identity, 1):
                    continue
            
            if mismatches is not None:
                if b6.entry.mismatches != mismatches:
                    continue
                
            if gaps is not None:
                if b6.entry.gaps != gaps:
                    continue
            
            # if it made here, we are interested in this one, after all.
            if b6.entry.query_id not in ids_with_hits:
                results_dict[b6.entry.query_id] = set()
                ids_with_hits.add(b6.entry.query_id)
                
            results_dict[b6.entry.query_id].add(b6.entry.subject_id)
                
        b6.close()
        
        return results_dict