Example #1
0
    def clusterize_by_rt(self, error_rt):
        """
        PUBLIC function        
        Provide a basic clustering home made
        :param error_rt:
        return: list of clusters(as set)
        
        """
        if self.rt_method == 1:
            logging.info("Basic clustering with rt_error:{0}".format(error_rt))
            if not isinstance(error_rt, float):
                raise TypeError("[clusterize]: args[0] is not a float")
            return clusterize_basic(self.peakels, self.BASIC_RT_CALLABLE, error_rt)

        elif self.rt_method == 2:
            rts = [[x.rt] for x in self.peakels]
            matrix_dist = sp.spatial.distance.pdist(np.array(rts))  # metric = eclidean by default
            return list(clusterize_hierarchical(self.peakels, matrix_dist, "", error_rt).values())

        elif self.rt_method == 3:
            logging.info("DB SCAN clustering with error_rt:{0}".format(error_rt))
            rts = [[x.rt] for x in self.peakels]
            clusters = clusterize_dbscan(rts, self.peakels, eps=0.35)
            # with open('clusters.txt', 'w') as f:
            #     for c in clusters:
            #         for fe in c:
            #             f.write(str(fe.rt) + '\n')
            #         f.write('\n')
            return clusters  # eps=error_rt / 2.0, min_samples=1)

        else:
            raise ValueError("wrong clustering technique !")
Example #2
0
    def clusterize_by_rt(self, error_rt):
        """
        PUBLIC function        
        Provide a basic clustering home made
        :param error_rt:
        return: list of clusters(as set)
        
        """
        if self.rt_method == 1:
            logging.info("Basic clustering with rt_error:{0}".format(error_rt))
            if not isinstance(error_rt, float):
                raise TypeError("[clusterize]: args[0] is not a float")
            return clusterize_basic(self.peakels, self.BASIC_RT_CALLABLE, error_rt)

        elif self.rt_method == 2:
            rts = [[x.rt] for x in self.peakels]
            matrix_dist = sp.spatial.distance.pdist(np.array(rts))  # metric = eclidean by default
            return list(clusterize_hierarchical(self.peakels, matrix_dist, "", error_rt).values())

        elif self.rt_method == 3:
            logging.info('DB SCAN clustering with error_rt:{0}'.format(error_rt))
            rts = [[x.rt] for x in self.peakels]
            clusters = clusterize_dbscan(rts, self.peakels, eps=0.35)
            # with open('clusters.txt', 'w') as f:
            #     for c in clusters:
            #         for fe in c:
            #             f.write(str(fe.rt) + '\n')
            #         f.write('\n')
            return clusters  # eps=error_rt / 2.0, min_samples=1)

        else:
            raise ValueError("wrong clustering technique !")
Example #3
0
    def test_clusterize_dbscan_rt(self):
        clusters = clusterize_dbscan([[x.rt] for x in self.features], self.features, eps=3.0, min_samples=1)
        for c in clusters:
            for f in c:
                print(f.rt)
            print('\n')

        print(("len clusters dbscan: {0}".format(len(clusters))))
        self.assertGreaterEqual(4, len(clusters))
Example #4
0
    def test_dbscan_clustering_for_alignment(self):
        f1 = Peakel(1256.52, 0.0, 0.0, 100.0)
        f2 = Peakel(1258.52, 0.0, 0.0, 500.52)
        f3 = Peakel(1257.52, 0.0, 0.0, 101.52)
        f4 = Peakel(1600.52, 0.0, 0.0, 99.52)
        f7 = Peakel(1600.86, 0.0, 0.0, 107.12)
        f5 = Peakel(1600.52, 0.0, 0.0, 3205.52)
        f6 = Peakel(1456.52, 0.0, 0.0, 600.52)

        peakels = [f1, f2, f3, f4, f5, f6, f7]
        peakels_by_sample = {'a': {f1, f2, f4}, 'b': {f3, f5, f6}}

        sample_by_peakel = {f1: 'a', f2: 'a', f4: 'a', f3: 'b', f5: 'b', f6: 'b', f7: 'b'}
        values = [[x.moz, x.rt] for x in peakels]

        clusters = clusterize_dbscan(values, peakels, eps=5, min_samples=1)

        for c in clusters:
            for p in c:
                print(p.moz, p.rt, sample_by_peakel[p])
            print('\n')