Beispiel #1
0
 def test_ChainCollection_Hmatrix_calculation(self):
     antibody_collection_1 = ChainCollection(
         path='./tests/Data/chain_collection_fasta_test.fasta')
     antibody_collection_1.load(show_progressbar=False, verbose=False)
     # if this fails it means that abysis has been updated
     self.assertEqual(antibody_collection_1.hydrophobicity_matrix().shape,
                      (1, 158))
Beispiel #2
0
 def test_ChainCollection_append_2(self):
     antibody_collection_1 = ChainCollection(
         path='./tests/Data/chain_collection_1_heavy.json')
     antibody_collection_1.load(show_progressbar=False, verbose=False)
     antibody_collection_2 = ChainCollection(
         path='./tests/Data/chain_collection_2_heavy.json')
     antibody_collection_2.load(show_progressbar=False, verbose=False)
     antibody_collection_1.append(antibody_collection_2)
     self.assertEqual(antibody_collection_1.hydrophobicity_matrix().shape,
                      (2, 158))
Beispiel #3
0
class Cluster:
    def __init__(self,
                 antibodies,
                 metric='hydrophobicity',
                 clustering_method='kmeans',
                 decomposition_method='PCA'):

        if isinstance(antibodies, ChainCollection):
            self.antibodies = antibodies
        elif isinstance(antibodies, str):
            self.antibodies = ChainCollection(path=antibodies)
        if self.antibodies.n_ab == 0:
            self.antibodies.load()

        self.metric = metric
        self.clustering_method = clustering_method
        self.decomposition_method = decomposition_method
        self.cluster_assignment = np.zeros(self.antibodies.n_ab, dtype=int)
        self.cluster_assignment_dict = dict()
        self._data = None

    def _collect_data(self):
        if self.metric == 'hydrophobicity':
            return self.antibodies.hydrophobicity_matrix()

    def cluster(self, n_components=0.95, n_clusters=3):

        if self.decomposition_method == 'PCA':
            decomposition_obj = decomposition.PCA(n_components)

        self._data = decomposition_obj.fit_transform(self._collect_data())

        if self.clustering_method == 'kmeans':
            clustering_obj = cluster.KMeans(n_clusters=n_clusters)

        self.cluster_assignment = clustering_obj.fit_predict(self._data)

        for i, antibody_obj in enumerate(self.antibodies.antibody_objects):

            assignment = 'Cluster_{}'.format(self.cluster_assignment[i])

            if assignment not in self.cluster_assignment_dict:
                self.cluster_assignment_dict[assignment] = list()

            self.cluster_assignment_dict[assignment].append(antibody_obj)

    def plot_cluster(self):

        if len(self.cluster_assignment_dict) == 0:
            self.cluster()

        color = iter(plt.get_cmap('Vega20').colors)

        plt.figure(figsize=(8, 8))

        for assignment in np.unique(self.cluster_assignment):
            c = next(color)

            plt.scatter(self._data[self.cluster_assignment == assignment, 0],
                        self._data[self.cluster_assignment == assignment, 1],
                        c=c,
                        label='Cluster {}'.format(assignment))

            plt.legend(loc='best', prop={"size": 14})