Example #1
0
 def NOtest_vec_distances(self):
     s=Scheduler()
     vec=VECLoader(get_dataset('warlogs'),scheduler=s)
     dis=PairwiseDistances(metric='cosine',scheduler=s)
     dis.input.df = vec.output.df
     dis.input.array = vec.output.array
     cnt = Every(proc=print_len,constant_time=True,scheduler=s)
     cnt.input.df = dis.output.dist
     global times
     times = 0
     s.start()
     df = vec.df()
     computed = dis.dist()
     self.assertEquals(computed.shape[0], len(df))
     truth = pairwise_distances(vec.toarray(), metric=dis._metric)
     self.assertTrue(np.allclose(truth, computed))
Example #2
0
    def test_csv_distances(self):
        s=Scheduler()
        vec=CSVLoader(get_dataset('smallfile'),index_col=False,header=None,scheduler=s)
        dis=PairwiseDistances(metric='euclidean',scheduler=s)
        dis.input.df = vec.output.df
        cnt = Every(proc=print_len,constant_time=True,scheduler=s)
        cnt.input.df = dis.output.dist
        global times
        times = 0
        s.start(ten_times)
        df = vec.df()
        computed = dis.dist()
        #self.assertEquals(computed.shape[0], len(df))

        del df[CSVLoader.UPDATE_COLUMN]
        offset=0
        size=offset+5000
        truth = pairwise_distances(df.iloc[offset:size], metric=dis._metric)
        dist = computed[offset:size,offset:size]
        self.assertTrue(np.allclose(truth, dist,atol=1e-7)) # reduce tolerance