def NOtest_vec_distances(self): s=Scheduler() vec=VECLoader(get_dataset('warlogs'),scheduler=s) dis=PairwiseDistances(metric='cosine',scheduler=s) dis.input.df = vec.output.df dis.input.array = vec.output.array cnt = Every(proc=print_len,constant_time=True,scheduler=s) cnt.input.df = dis.output.dist global times times = 0 s.start() df = vec.df() computed = dis.dist() self.assertEquals(computed.shape[0], len(df)) truth = pairwise_distances(vec.toarray(), metric=dis._metric) self.assertTrue(np.allclose(truth, computed))
def test_csv_distances(self): s=Scheduler() vec=CSVLoader(get_dataset('smallfile'),index_col=False,header=None,scheduler=s) dis=PairwiseDistances(metric='euclidean',scheduler=s) dis.input.df = vec.output.df cnt = Every(proc=print_len,constant_time=True,scheduler=s) cnt.input.df = dis.output.dist global times times = 0 s.start(ten_times) df = vec.df() computed = dis.dist() #self.assertEquals(computed.shape[0], len(df)) del df[CSVLoader.UPDATE_COLUMN] offset=0 size=offset+5000 truth = pairwise_distances(df.iloc[offset:size], metric=dis._metric) dist = computed[offset:size,offset:size] self.assertTrue(np.allclose(truth, dist,atol=1e-7)) # reduce tolerance