def test_epitope_unequal(self):
        seq1_example = 'aaAA'
        seq2_example = 'aaaA'

        epitope1 = Epitope(SeqRecord(Seq(seq1_example)))
        epitope2 = Epitope(SeqRecord(Seq(seq2_example)))

        self.assertNotEqual(epitope1, epitope2)
    def test_get_epitopes_with_max_verified_regions(self):
        expected_epitopes_dataset = EpitopesDataset([
            Epitope(SeqRecord(Seq('AaAA'))),
            Epitope(SeqRecord(Seq('bBBBB'))),
            Epitope(SeqRecord(Seq('DDdDdD')))
        ])

        epitopes_clusters = EpitopesClusters(EPITOPES_CLUSTERS1_PATH,
                                             EPITOPES_FASTA1_PATH)
        actual_epitopes_dataset = get_epitopes_with_max_verified_regions(
            epitopes_clusters)

        self.assertEqual(expected_epitopes_dataset, actual_epitopes_dataset)
    def test_add_verified_region(self):
        seq_example = 'AAaa'
        verified_region_to_add_example = (3, 3)

        expected_verified_regions = [(0, 1), (3, 3)]
        expected_record_seq = 'AAaA'

        epitope = Epitope(SeqRecord(Seq(seq_example)))
        epitope.add_verified_region(verified_region_to_add_example)
        actual_verified_regions = epitope.verified_regions
        actual_record_seq = str(epitope.record.seq)

        self.assertEqual(expected_verified_regions, actual_verified_regions)
        self.assertEqual(expected_record_seq, actual_record_seq)
    def test_merge_identical_seqs(self):
        expected_epitopes = \
            [
                add_verified_regions_lst(Epitope(SeqRecord(Seq('aaaA'))), [(2, 3), (3, 3), (1, 2)]),
                Epitope(SeqRecord(Seq('B'))),
                add_verified_regions_lst(Epitope(SeqRecord(Seq('bBBbb'))), [(3, 4), (2, 3)]),
                add_verified_regions_lst(Epitope(SeqRecord(Seq('cccCC'))), [(2, 3)]),
                add_verified_regions_lst(Epitope(SeqRecord(Seq('Dd'))), [(1, 1)])
            ]

        epitopes_dataset = EpitopesDataset(EPITOPES_BATCHES_PATHS)
        epitopes_dataset.merge_identical_seqs()
        actual_epitopes = list(epitopes_dataset)

        self.assertEqual(expected_epitopes, actual_epitopes)
    def __parse_clstr_file(clstr_file_path: str,
                           records_fasta_path: str) -> List[List[Epitope]]:

        with open(records_fasta_path) as records_fasta_file:
            records_dict = SeqIO.to_dict(
                SeqIO.parse(records_fasta_file, 'fasta'))

        epitopes_clusters_lst = []

        with open(clstr_file_path) as epitopes_ids_clusters_file:
            curr_cluster = []
            for line in epitopes_ids_clusters_file.readlines():
                line = line.strip()
                if line != '':
                    # when new cluster found appending the current cluster set and creating new one
                    # if the cluster set is not empty (should occur on first line)
                    if line.startswith(CLUSTER_PREFIX):
                        if len(curr_cluster) > 0:
                            epitopes_clusters_lst.append(curr_cluster)
                            curr_cluster = []
                    else:
                        epitope_id = line.split(EPITOPE_ID_PREFIX)[1].split(
                            EPITOPE_ID_SUFFIX)[0]
                        seq_record = records_dict[epitope_id]
                        epitope = Epitope(seq_record)
                        curr_cluster.append(epitope)

            # adding last cluster ser
            if len(curr_cluster) > 0:
                epitopes_clusters_lst.append(curr_cluster)

        return epitopes_clusters_lst
    def test_epitope_init_verified_region3(self):
        seq_example = 'AAaa'

        expected_verified_regions = [(0, 1)]

        epitope = Epitope(SeqRecord(Seq(seq_example)))
        actual_verified_regions = epitope.verified_regions

        self.assertEqual(expected_verified_regions, actual_verified_regions)
    def test_init1(self):
        seq_example = 'aaaA'

        expected_seq = seq_example
        expected_verified_regions = [(3, 3)]

        actual_epitope = Epitope(SeqRecord(Seq(seq_example)))
        actual_verified_regions = actual_epitope.verified_regions

        self.assertEqual(expected_seq, str(actual_epitope))
        self.assertEqual(expected_verified_regions, actual_verified_regions)
Exemple #8
0
    def __parse_records_batches_fasta_files(
            records_batches_fasta_paths: List[str]) -> List[Epitope]:
        raw_records = []
        for records_batch_fasta_path in records_batches_fasta_paths:
            with open(records_batch_fasta_path) as records_batch_file:
                records_batch = [
                    Epitope(seq_record)
                    for seq_record in SeqIO.parse(records_batch_file, 'fasta')
                ]
                raw_records.extend(records_batch)

        return raw_records
    def test_unequal(self):
        epitopes_dataset1 = EpitopesDataset(
            [
                Epitope(SeqRecord(Seq('a'))),
                Epitope(SeqRecord(Seq('A'))),
                Epitope(SeqRecord(Seq('aa'))),
                Epitope(SeqRecord(Seq('aa'))),
            ]
        )

        epitopes_dataset2 = EpitopesDataset(
            [
                Epitope(SeqRecord(Seq('a'))),
                Epitope(SeqRecord(Seq('B'))),
                Epitope(SeqRecord(Seq('aa'))),
                Epitope(SeqRecord(Seq('aa'))),
            ]
        )

        self.assertNotEqual(epitopes_dataset1, epitopes_dataset2)
Exemple #10
0
    def test_init(self):
        expected_epitopes_clusters_lst = [[
            Epitope(SeqRecord('AaAA')),
            Epitope(SeqRecord('aaaa'))
        ], [Epitope(SeqRecord('bBBBB'))],
                                          [
                                              Epitope(SeqRecord('ddDDD')),
                                              Epitope(SeqRecord('DDdDdD')),
                                              Epitope(SeqRecord('DDdddD'))
                                          ]]

        actual_epitopes_clusters = EpitopesClusters(EPITOPES_CLUSTERS_PATH,
                                                    EPITOPES_FASTA_PATH)

        self.assertEqual(expected_epitopes_clusters_lst,
                         list(actual_epitopes_clusters))
def _add_verified_regions_lst(epitope: Epitope, verified_regions_lst: List[Tuple[int, int]]) -> Epitope:
    for verified_region in verified_regions_lst:
        epitope.add_verified_region(verified_region)

    return epitope
    def test_init_epitopes_dataset(self):
        expected_epitopes = \
            [
                Epitope(SeqRecord(Seq('aaaA'))),
                Epitope(SeqRecord(Seq('aaAA'))),
                Epitope(SeqRecord(Seq('B'))),
                Epitope(SeqRecord(Seq('aaaA'))),
                Epitope(SeqRecord(Seq('aAAa'))),
                Epitope(SeqRecord(Seq('bBBbb'))),
                Epitope(SeqRecord(Seq('bbbBB'))),
                Epitope(SeqRecord(Seq('bbBBb'))),
                Epitope(SeqRecord(Seq('cccCC'))),
                Epitope(SeqRecord(Seq('ccCCc'))),
                Epitope(SeqRecord(Seq('Dd'))),
                Epitope(SeqRecord(Seq('dD')))
            ]

        actual_epitopes_dataset = EpitopesDataset(EPITOPES_BATCHES_PATHS)

        self.assertEqual(expected_epitopes, list(actual_epitopes_dataset))
    def test_split_epitopes_clusters_to_cv_groups_cv10(self):
        cv_fold = 10
        expected_epitopes_cv_datasets = [
            EpitopesDataset([
                Epitope(SeqRecord(Seq('aaaAA'))),
                Epitope(SeqRecord(Seq('Aaa'))),
                Epitope(SeqRecord(Seq('bbbBC'))),
                Epitope(SeqRecord(Seq('cccaaCd'))),
                Epitope(SeqRecord(Seq('DcDcDc'))),
                Epitope(SeqRecord(Seq('AAAAA')))
            ]),
            EpitopesDataset([
                Epitope(SeqRecord(Seq('aaaG'))),
                Epitope(SeqRecord(Seq('GCAcGcGa'))),
                Epitope(SeqRecord(Seq('aCGPfpc'))),
                Epitope(SeqRecord(Seq('cccccCCCccc'))),
                Epitope(SeqRecord(Seq('GgG'))),
                Epitope(SeqRecord(Seq('DDDDD')))
            ]),
            EpitopesDataset([
                Epitope(SeqRecord(Seq('EEeeeGGGDDD'))),
                Epitope(SeqRecord(Seq('BBBbbb'))),
                Epitope(SeqRecord(Seq('NMnMnM'))),
                Epitope(SeqRecord(Seq('KPkgK'))),
                Epitope(SeqRecord(Seq('AAAaaA'))),
                Epitope(SeqRecord(Seq('AAAaaaA')))
            ]),
            EpitopesDataset([
                Epitope(SeqRecord(Seq('BBBbbBbB'))),
                Epitope(SeqRecord(Seq('CCCccC'))),
                Epitope(SeqRecord(Seq('GGGggGG'))),
                Epitope(SeqRecord(Seq('CcCcCc'))),
                Epitope(SeqRecord(Seq('cCcccc'))),
                Epitope(SeqRecord(Seq('ccCccC'))),
            ]),
            EpitopesDataset([
                Epitope(SeqRecord(Seq('CccCC'))),
                Epitope(SeqRecord(Seq('cccCCcC'))),
                Epitope(SeqRecord(Seq('CccCCCccC'))),
                Epitope(SeqRecord(Seq('CcccCCcccc'))),
                Epitope(SeqRecord(Seq('cccCCCccC'))),
                Epitope(SeqRecord(Seq('cccCccccc')))
            ]),
            EpitopesDataset([
                Epitope(SeqRecord(Seq('aaAAAaaAA'))),
                Epitope(SeqRecord(Seq('BBBBbbBB'))),
                Epitope(SeqRecord(Seq('bbbBBBBB'))),
                Epitope(SeqRecord(Seq('GGGGgg'))),
                Epitope(SeqRecord(Seq('GGGG')))
            ]),
            EpitopesDataset([
                Epitope(SeqRecord(Seq('TTTtTTT'))),
                Epitope(SeqRecord(Seq('HHHHHHhhh'))),
                Epitope(SeqRecord(Seq('HHHhhhhKK'))),
                Epitope(SeqRecord(Seq('kkkkKKKk'))),
                Epitope(SeqRecord(Seq('UUUuuuU'))),
                Epitope(SeqRecord(Seq('GFgGgF'))),
                Epitope(SeqRecord(Seq('CCCcCBBb'))),
                Epitope(SeqRecord(Seq('mmmmmmMMMmm'))),
                Epitope(SeqRecord(Seq('BBbb')))
            ]),
            EpitopesDataset([
                Epitope(SeqRecord(Seq('GGGg'))),
                Epitope(SeqRecord(Seq('AAaa'))),
                Epitope(SeqRecord(Seq('AAAa')))
            ])
        ]

        epitopes_clusters = EpitopesClusters(EPITOPES_CLUSTERS2_PATH,
                                             EPITOPES_FASTA2_PATH)
        actual_epitopes_cv_datasets = split_epitopes_clusters_to_cv_datasets(
            epitopes_clusters, cv_fold, shuffle_clusters=False)

        self.assertEqual(expected_epitopes_cv_datasets,
                         actual_epitopes_cv_datasets)