예제 #1
0
    def predict(self):
        self.h2_prediction = self.get_antigen_gene_blast_results(
            self.h2_prediction, FLJB_FASTA_PATH)
        if not self.h2_prediction.is_missing:
            if not self.h2_prediction.is_perfect_match:
                top_result = self.h2_prediction.top_result
                match_len = top_result['length']
                pident = top_result['pident']

                # short lower %ID matches are treated as missing or '-' for H2
                if match_len <= 600 and pident < 88.0:
                    self.h2_prediction.h2 = '-'
                    self.h2_prediction.is_missing = True
                    return

                if match_len <= 600 and not self.h2_prediction.is_trunc:
                    self.h2_prediction.h2 = '-'
                    self.h2_prediction.is_missing = True
                    return

                df_blast_results = pd.DataFrame(
                    self.h2_prediction.blast_results)
                df_blast_results = df_blast_results[
                    (df_blast_results['mismatch'] <= 50)
                    & (df_blast_results['length'] >= 700)]

                if df_blast_results.shape[0] == 0:
                    self.h2_prediction.is_missing = True
                    self.h2_prediction.top_result = None
                    self.h2_prediction.h2 = '-'
                    return

                df_blast_results_over1000 = df_blast_results[
                    (df_blast_results['mismatch'] <= 5)
                    & (df_blast_results['length'] >= 1000)]

                if df_blast_results_over1000.shape[0] > 0:
                    df_blast_results = df_blast_results_over1000.sort_values(
                        by='mismatch')
                else:
                    df_blast_results = df_blast_results.sort_values(
                        by='bitscore', ascending=False)

                result_dict = BlastReader.df_first_row_to_dict(
                    df_blast_results)
                result_trunc = BlastReader.is_blast_result_trunc(
                    qstart=result_dict['qstart'],
                    qend=result_dict['qend'],
                    sstart=result_dict['sstart'],
                    send=result_dict['send'],
                    qlen=result_dict['qlen'],
                    slen=result_dict['slen'])
                self.h2_prediction.top_result = result_dict
                self.h2_prediction.is_trunc = result_trunc
            self.h2_prediction.h2 = get_antigen_name(
                self.h2_prediction.top_result['qseqid'])

        if self.h2_prediction.is_missing:
            self.h2_prediction.h2 = '-'
예제 #2
0
def test_BlastReader_is_blast_result_trunc():

    # not truncated; match found in the middle of the subject sequence
    assert not BlastReader.is_blast_result_trunc(qstart=1,
                                                 qend=100,
                                                 sstart=101,
                                                 send=200,
                                                 qlen=100,
                                                 slen=1000)

    # not truncated; shorter match (-10bp) found in the middle of the subject
    # sequence
    assert not BlastReader.is_blast_result_trunc(qstart=1,
                                                 qend=90,
                                                 sstart=101,
                                                 send=190,
                                                 qlen=100,
                                                 slen=1000)

    # not truncated; shorter match (-20bp) found in the middle of the subject
    # sequence
    assert not BlastReader.is_blast_result_trunc(qstart=1,
                                                 qend=80,
                                                 sstart=101,
                                                 send=180,
                                                 qlen=100,
                                                 slen=1000)


    # truncated at the start of the subject
    assert BlastReader.is_blast_result_trunc(qstart=51,
                                             qend=100,
                                             sstart=1,
                                             send=50,
                                             qlen=100,
                                             slen=1000)

    # truncated at the end of the subject
    assert BlastReader.is_blast_result_trunc(qstart=51,
                                             qend=100,
                                             sstart=951,
                                             send=1000,
                                             qlen=100,
                                             slen=1000)
예제 #3
0
    def predict(self, filter=['N/A']):
        self.h1_prediction = self.get_antigen_gene_blast_results(
            self.h1_prediction, FLIC_FASTA_PATH, filter)
        if not self.h1_prediction.is_missing and self.h1_prediction.top_result is not None:
            if not self.h1_prediction.is_perfect_match:
                df_blast_results = pd.DataFrame(
                    self.h1_prediction.blast_results)
                df_blast_results = df_blast_results[
                    (df_blast_results['mismatch'] <= 25)
                    & (df_blast_results['length'] >= 700)]

                if df_blast_results.shape[0] == 0:
                    df_blast_results = pd.DataFrame(
                        self.h1_prediction.blast_results)
                    df_blast_results = df_blast_results[
                        (df_blast_results['mismatch'] <= 0)
                        & (df_blast_results['length'] >= 400)]
                    if df_blast_results.shape[0] == 0:
                        self.h1_prediction.is_missing = True
                        self.h1_prediction.top_result = None
                        self.h1_prediction.h1 = None
                        return

                df_blast_results_over1000 = df_blast_results[
                    (df_blast_results['mismatch'] <= 5)
                    & (df_blast_results['length'] >= 1000)]

                if df_blast_results_over1000.shape[0] > 0:
                    df_blast_results = df_blast_results_over1000.sort_values(
                        by='mismatch')
                else:
                    df_blast_results = df_blast_results.sort_values(
                        by='bitscore', ascending=False)

                result_dict = BlastReader.df_first_row_to_dict(
                    df_blast_results)
                result_trunc = BlastReader.is_blast_result_trunc(
                    qstart=result_dict['qstart'],
                    qend=result_dict['qend'],
                    sstart=result_dict['sstart'],
                    send=result_dict['send'],
                    qlen=result_dict['qlen'],
                    slen=result_dict['slen'])
                self.h1_prediction.top_result = result_dict
                self.h1_prediction.is_trunc = result_trunc
            self.h1_prediction.h1 = get_antigen_name(
                self.h1_prediction.top_result['qseqid'])