Esempio n. 1
0
    def clusterize_by_rt(self, error_rt):
        """
        PUBLIC function        
        Provide a basic clustering home made
        :param error_rt:
        return: list of clusters(as set)
        
        """
        if self.rt_method == 1:
            logging.info("Basic clustering with rt_error:{0}".format(error_rt))
            if not isinstance(error_rt, float):
                raise TypeError("[clusterize]: args[0] is not a float")
            return clusterize_basic(self.peakels, self.BASIC_RT_CALLABLE, error_rt)

        elif self.rt_method == 2:
            rts = [[x.rt] for x in self.peakels]
            matrix_dist = sp.spatial.distance.pdist(np.array(rts))  # metric = eclidean by default
            return list(clusterize_hierarchical(self.peakels, matrix_dist, "", error_rt).values())

        elif self.rt_method == 3:
            logging.info("DB SCAN clustering with error_rt:{0}".format(error_rt))
            rts = [[x.rt] for x in self.peakels]
            clusters = clusterize_dbscan(rts, self.peakels, eps=0.35)
            # with open('clusters.txt', 'w') as f:
            #     for c in clusters:
            #         for fe in c:
            #             f.write(str(fe.rt) + '\n')
            #         f.write('\n')
            return clusters  # eps=error_rt / 2.0, min_samples=1)

        else:
            raise ValueError("wrong clustering technique !")
Esempio n. 2
0
    def clusterize_by_rt(self, error_rt):
        """
        PUBLIC function        
        Provide a basic clustering home made
        :param error_rt:
        return: list of clusters(as set)
        
        """
        if self.rt_method == 1:
            logging.info("Basic clustering with rt_error:{0}".format(error_rt))
            if not isinstance(error_rt, float):
                raise TypeError("[clusterize]: args[0] is not a float")
            return clusterize_basic(self.peakels, self.BASIC_RT_CALLABLE, error_rt)

        elif self.rt_method == 2:
            rts = [[x.rt] for x in self.peakels]
            matrix_dist = sp.spatial.distance.pdist(np.array(rts))  # metric = eclidean by default
            return list(clusterize_hierarchical(self.peakels, matrix_dist, "", error_rt).values())

        elif self.rt_method == 3:
            logging.info('DB SCAN clustering with error_rt:{0}'.format(error_rt))
            rts = [[x.rt] for x in self.peakels]
            clusters = clusterize_dbscan(rts, self.peakels, eps=0.35)
            # with open('clusters.txt', 'w') as f:
            #     for c in clusters:
            #         for fe in c:
            #             f.write(str(fe.rt) + '\n')
            #         f.write('\n')
            return clusters  # eps=error_rt / 2.0, min_samples=1)

        else:
            raise ValueError("wrong clustering technique !")
Esempio n. 3
0
    def _check_update_corr_shape_in_rt_cluster(self, rt_cluster, distance_corr=DEFAULT_SHAPE_CORR):
        """
        PRIVATE function
        calculate corral        
        """
        clust_list = None
        if self.corr_shape_method == 1:
            clust_list = clusterize_basic(rt_cluster, self.BASIC_CORR_SHAPE_CALLABLE, distance_corr)

        elif self.corr_shape_method == 2:
            ints = [[y.intensity for y in x.peaks] if len(x.peaks) else [0] for x in rt_cluster]
            matrix_dist = sp.spatial.distance.pdist(np.array(ints), metric="correlation")
            clust_list = clusterize_hierarchical(rt_cluster, matrix_dist, distance_corr, clip=True)

        return self._split_rt_cluster(clust_list)
Esempio n. 4
0
 def _check_update_corr_shape_in_rt_cluster(self, rt_cluster, distance_corr=DEFAULT_SHAPE_CORR):
     """
     PRIVATE function
     calculate corral        
     """
     clust_list = None
     if self.corr_shape_method == 1:
         clust_list = clusterize_basic(rt_cluster, self.BASIC_CORR_SHAPE_CALLABLE, distance_corr)
     
     elif self.corr_shape_method == 2:
         ints = [[y.intensity for y in x.peaks] if len(x.peaks) else [0] for x in rt_cluster]
         matrix_dist = sp.spatial.distance.pdist(np.array(ints), metric='correlation')
         clust_list = clusterize_hierarchical(rt_cluster, matrix_dist, distance_corr, clip=True)
     
     return self._split_rt_cluster(clust_list)
Esempio n. 5
0
    def _check_update_corr_intensity_in_rt_cluster(self, rt_cluster, distance_corr=DEFAULT_INT_CORR):
        """
        Private function
        """
        if len(rt_cluster) == 1:
            return []  # rt_cluster, []

        # clust_list = None
        if self.corr_int_method == 1:
            clust_list = clusterize_basic(rt_cluster, self.BASIC_CORR_INT_CALLABLE, distance_corr)

        elif self.corr_int_method == 2:
            ints = [list(x.area_by_sample_name.values()) for x in rt_cluster]  #
            # matrix_dist = sp.spatial.distance.pdist(np.array(ints), metric='correlation')
            # ude by default all cores on the machine
            matrix_dist = pairwise_distances(np.array(ints), metric="correlation")  # , n_jobs=-1)
            clust_list = clusterize_hierarchical(rt_cluster, matrix_dist, distance_corr, clip=True)
        else:
            raise ValueError("dbscan not supported for intensities correlation clustering")

        return clust_list  # self._split_rt_cluster(clust_list)
Esempio n. 6
0
 def _check_update_corr_intensity_in_rt_cluster(self, rt_cluster, distance_corr=DEFAULT_INT_CORR):
     """
     Private function
     """
     if len(rt_cluster) == 1:
         return []  # rt_cluster, []
     
     # clust_list = None
     if self.corr_int_method == 1:
         clust_list = clusterize_basic(rt_cluster, self.BASIC_CORR_INT_CALLABLE, distance_corr)
     
     elif self.corr_int_method == 2:
         ints = [list(x.area_by_sample_name.values()) for x in rt_cluster]  #
         # matrix_dist = sp.spatial.distance.pdist(np.array(ints), metric='correlation')
         # ude by default all cores on the machine
         matrix_dist = pairwise_distances(np.array(ints), metric='correlation')  # , n_jobs=-1)
         clust_list = clusterize_hierarchical(rt_cluster, matrix_dist, distance_corr, clip=True)
     else:
         raise ValueError("dbscan not supported for intensities correlation clustering")
     
     return clust_list  # self._split_rt_cluster(clust_list)
Esempio n. 7
0
 def test_clusterize_hierarchical_int(self):
     ints = [list(f.area_by_sample_name.values()) for f in self.features]
     matrix_dist = sp.spatial.distance.pdist(np.array(ints), metric="correlation")  # euclidean distance
     clusters = clusterize_hierarchical(self.features, matrix_dist, 0.1, clip=True)
     print(("len clusters hierarchical int: {0}".format(len(clusters))))
     self.assertGreaterEqual(4, len(clusters))
Esempio n. 8
0
 def test_clusterize_hierarchical(self):
     rts = [[f.rt] for f in self.features]
     matrix_dist = sp.spatial.distance.pdist(np.array(rts))  # euclidean distance
     clusters = clusterize_hierarchical(self.features, matrix_dist, 3.0)
     print(("len clusters hierarchical: {0}".format(len(clusters))))
     self.assertGreaterEqual(4, len(clusters))