def clusterize_by_rt(self, error_rt): """ PUBLIC function Provide a basic clustering home made :param error_rt: return: list of clusters(as set) """ if self.rt_method == 1: logging.info("Basic clustering with rt_error:{0}".format(error_rt)) if not isinstance(error_rt, float): raise TypeError("[clusterize]: args[0] is not a float") return clusterize_basic(self.peakels, self.BASIC_RT_CALLABLE, error_rt) elif self.rt_method == 2: rts = [[x.rt] for x in self.peakels] matrix_dist = sp.spatial.distance.pdist(np.array(rts)) # metric = eclidean by default return list(clusterize_hierarchical(self.peakels, matrix_dist, "", error_rt).values()) elif self.rt_method == 3: logging.info("DB SCAN clustering with error_rt:{0}".format(error_rt)) rts = [[x.rt] for x in self.peakels] clusters = clusterize_dbscan(rts, self.peakels, eps=0.35) # with open('clusters.txt', 'w') as f: # for c in clusters: # for fe in c: # f.write(str(fe.rt) + '\n') # f.write('\n') return clusters # eps=error_rt / 2.0, min_samples=1) else: raise ValueError("wrong clustering technique !")
def clusterize_by_rt(self, error_rt): """ PUBLIC function Provide a basic clustering home made :param error_rt: return: list of clusters(as set) """ if self.rt_method == 1: logging.info("Basic clustering with rt_error:{0}".format(error_rt)) if not isinstance(error_rt, float): raise TypeError("[clusterize]: args[0] is not a float") return clusterize_basic(self.peakels, self.BASIC_RT_CALLABLE, error_rt) elif self.rt_method == 2: rts = [[x.rt] for x in self.peakels] matrix_dist = sp.spatial.distance.pdist(np.array(rts)) # metric = eclidean by default return list(clusterize_hierarchical(self.peakels, matrix_dist, "", error_rt).values()) elif self.rt_method == 3: logging.info('DB SCAN clustering with error_rt:{0}'.format(error_rt)) rts = [[x.rt] for x in self.peakels] clusters = clusterize_dbscan(rts, self.peakels, eps=0.35) # with open('clusters.txt', 'w') as f: # for c in clusters: # for fe in c: # f.write(str(fe.rt) + '\n') # f.write('\n') return clusters # eps=error_rt / 2.0, min_samples=1) else: raise ValueError("wrong clustering technique !")
def test_clusterize_dbscan_rt(self): clusters = clusterize_dbscan([[x.rt] for x in self.features], self.features, eps=3.0, min_samples=1) for c in clusters: for f in c: print(f.rt) print('\n') print(("len clusters dbscan: {0}".format(len(clusters)))) self.assertGreaterEqual(4, len(clusters))
def test_dbscan_clustering_for_alignment(self): f1 = Peakel(1256.52, 0.0, 0.0, 100.0) f2 = Peakel(1258.52, 0.0, 0.0, 500.52) f3 = Peakel(1257.52, 0.0, 0.0, 101.52) f4 = Peakel(1600.52, 0.0, 0.0, 99.52) f7 = Peakel(1600.86, 0.0, 0.0, 107.12) f5 = Peakel(1600.52, 0.0, 0.0, 3205.52) f6 = Peakel(1456.52, 0.0, 0.0, 600.52) peakels = [f1, f2, f3, f4, f5, f6, f7] peakels_by_sample = {'a': {f1, f2, f4}, 'b': {f3, f5, f6}} sample_by_peakel = {f1: 'a', f2: 'a', f4: 'a', f3: 'b', f5: 'b', f6: 'b', f7: 'b'} values = [[x.moz, x.rt] for x in peakels] clusters = clusterize_dbscan(values, peakels, eps=5, min_samples=1) for c in clusters: for p in c: print(p.moz, p.rt, sample_by_peakel[p]) print('\n')