def test_spatial_markov(self): """Test Spatial Markov.""" data = [ { 'id': d['id'], 'attr1': d['y1995'], 'attr2': d['y1996'], 'attr3': d['y1997'], 'attr4': d['y1998'], 'attr5': d['y1999'], 'attr6': d['y2000'], 'attr7': d['y2001'], 'attr8': d['y2002'], 'attr9': d['y2003'], 'attr10': d['y2004'], 'attr11': d['y2005'], 'attr12': d['y2006'], 'attr13': d['y2007'], 'attr14': d['y2008'], 'attr15': d['y2009'], 'neighbors': d['neighbors'] } for d in self.neighbors_data] print(str(data[0])) plpy._define_result('select', data) random_seeds.set_random_seeds(1234) result = std.spatial_markov_trend('subquery', ['y1995', 'y1996', 'y1997', 'y1998', 'y1999', 'y2000', 'y2001', 'y2002', 'y2003', 'y2004', 'y2005', 'y2006', 'y2007', 'y2008', 'y2009'], 5, 'knn', 5, 0, 'the_geom', 'cartodb_id') self.assertTrue(result != None) result = [(row[0], row[1], row[2], row[3], row[4]) for row in result] print result[0] expected = self.markov_data for ([res_trend, res_up, res_down, res_vol, res_id], [exp_trend, exp_up, exp_down, exp_vol, exp_id] ) in zip(result, expected): self.assertAlmostEqual(res_trend, exp_trend)
def test_moran_local_rate(self): """Test Moran's I rate""" data = [ { 'id': d['id'], 'attr1': d['value'], 'attr2': 1, 'neighbors': d['neighbors'] } for d in self.neighbors_data] plpy._define_result('select', data) random_seeds.set_random_seeds(1234) result = cc.moran_local_rate('table', 'numerator', 'denominator', 0.05, 5, 99, 'the_geom', 'cartodb_id', 'knn') result = [(row[0], row[1]) for row in result] expected = self.moran_data for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected): self.assertAlmostEqual(res_val, exp_val)
def test_moran(self): """Test Moran's I global""" data = [{"id": d["id"], "attr1": d["value"], "neighbors": d["neighbors"]} for d in self.neighbors_data] plpy._define_result("select", data) random_seeds.set_random_seeds(1235) result = cc.moran("table", "value", "knn", 5, 99, "the_geom", "cartodb_id") print "result == None?", result == None result_moran = result[0][0] expected_moran = np.array([row[0] for row in self.moran_data]).mean() self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2)
def test_moran(self): """Test Moran's I global""" data = [{ 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data] plpy._define_result('select', data) random_seeds.set_random_seeds(1235) result = cc.moran('table', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id') print 'result == None?', result == None result_moran = result[0][0] expected_moran = np.array([row[0] for row in self.moran_data]).mean() self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2)
def test_kmeans(self): data = self.cluster_data plpy._define_result('select' ,data) clusters = cc.kmeans('subquery', 2) labels = [a[1] for a in clusters] c1 = [a for a in clusters if a[1]==0] c2 = [a for a in clusters if a[1]==1] self.assertEqual(len(np.unique(labels)),2) self.assertEqual(len(c1),20) self.assertEqual(len(c2),20)
def test_moran_local(self): """Test Moran's I local""" data = [{"id": d["id"], "attr1": d["value"], "neighbors": d["neighbors"]} for d in self.neighbors_data] plpy._define_result("select", data) random_seeds.set_random_seeds(1234) result = cc.moran_local("subquery", "value", "knn", 5, 99, "the_geom", "cartodb_id") result = [(row[0], row[1]) for row in result] expected = self.moran_data for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected): self.assertAlmostEqual(res_val, exp_val) self.assertEqual(res_quad, exp_quad)
def test_moran_local(self): """Test Moran's I local""" data = [ { 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data] plpy._define_result('select', data) random_seeds.set_random_seeds(1234) result = cc.moran_local('subquery', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id') result = [(row[0], row[1]) for row in result] expected = self.moran_data for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected): self.assertAlmostEqual(res_val, exp_val) self.assertEqual(res_quad, exp_quad)
def test_kmeans(self): data = self.cluster_data plpy._define_result('select', data) clusters = cc.kmeans('subquery', 2) labels = [a[1] for a in clusters] c1 = [a for a in clusters if a[1] == 0] c2 = [a for a in clusters if a[1] == 1] self.assertEqual(len(np.unique(labels)), 2) self.assertEqual(len(c1), 20) self.assertEqual(len(c2), 20)
def test_spatial_markov(self): """Test Spatial Markov.""" data = [{ 'id': d['id'], 'attr1': d['y1995'], 'attr2': d['y1996'], 'attr3': d['y1997'], 'attr4': d['y1998'], 'attr5': d['y1999'], 'attr6': d['y2000'], 'attr7': d['y2001'], 'attr8': d['y2002'], 'attr9': d['y2003'], 'attr10': d['y2004'], 'attr11': d['y2005'], 'attr12': d['y2006'], 'attr13': d['y2007'], 'attr14': d['y2008'], 'attr15': d['y2009'], 'neighbors': d['neighbors'] } for d in self.neighbors_data] print(str(data[0])) plpy._define_result('select', data) random_seeds.set_random_seeds(1234) result = std.spatial_markov_trend('subquery', [ 'y1995', 'y1996', 'y1997', 'y1998', 'y1999', 'y2000', 'y2001', 'y2002', 'y2003', 'y2004', 'y2005', 'y2006', 'y2007', 'y2008', 'y2009' ], 5, 'knn', 5, 0, 'the_geom', 'cartodb_id') self.assertTrue(result != None) result = [(row[0], row[1], row[2], row[3], row[4]) for row in result] print result[0] expected = self.markov_data for ([res_trend, res_up, res_down, res_vol, res_id], [exp_trend, exp_up, exp_down, exp_vol, exp_id]) in zip(result, expected): self.assertAlmostEqual(res_trend, exp_trend)
def test_create_and_predict_segment(self): n_samples = 1000 random_state_train = np.random.RandomState(13) random_state_test = np.random.RandomState(134) training_data = self.generate_random_data(n_samples, random_state_train) test_data, test_y = self.generate_random_data(n_samples, random_state_test, row_type=True) ids = [{'cartodb_ids': range(len(test_data))}] rows = [{'x1': 0, 'x2': 0, 'x3': 0, 'y': 0, 'cartodb_id': 0}] plpy._define_result( 'select \* from \(select \* from training\) a limit 1', rows) plpy._define_result('.*from \(select \* from training\) as a', training_data) plpy._define_result( 'select array_agg\(cartodb\_id order by cartodb\_id\) as cartodb_ids from \(.*\) a', ids) plpy._define_result('.*select \* from test.*', test_data) model_parameters = { 'n_estimators': 1200, 'max_depth': 3, 'subsample': 0.5, 'learning_rate': 0.01, 'min_samples_leaf': 1 } result = segmentation.create_and_predict_segment( 'select * from training', 'target', 'select * from test', model_parameters) prediction = [r[1] for r in result] accuracy = np.sqrt( np.mean(np.square(np.array(prediction) - np.array(test_y)))) self.assertEqual(len(result), len(test_data)) self.assertTrue(result[0][2] < 0.01) self.assertTrue(accuracy < 0.5 * np.mean(test_y))
def test_create_and_predict_segment(self): n_samples = 1000 random_state_train = np.random.RandomState(13) random_state_test = np.random.RandomState(134) training_data = self.generate_random_data(n_samples, random_state_train) test_data, test_y = self.generate_random_data(n_samples, random_state_test, row_type=True) ids = [{'cartodb_ids': range(len(test_data))}] rows = [{'x1': 0,'x2':0,'x3':0,'y':0,'cartodb_id':0}] plpy._define_result('select \* from \(select \* from training\) a limit 1',rows) plpy._define_result('.*from \(select \* from training\) as a' ,training_data) plpy._define_result('select array_agg\(cartodb\_id order by cartodb\_id\) as cartodb_ids from \(.*\) a',ids) plpy._define_result('.*select \* from test.*' ,test_data) model_parameters = {'n_estimators': 1200, 'max_depth': 3, 'subsample' : 0.5, 'learning_rate': 0.01, 'min_samples_leaf': 1} result = segmentation.create_and_predict_segment( 'select * from training', 'target', 'select * from test', model_parameters) prediction = [r[1] for r in result] accuracy =np.sqrt(np.mean( np.square( np.array(prediction) - np.array(test_y)))) self.assertEqual(len(result),len(test_data)) self.assertTrue( result[0][2] < 0.01) self.assertTrue( accuracy < 0.5*np.mean(test_y) )