def test_spatial_markov(self):
        """Test Spatial Markov."""
        data = [ { 'id': d['id'],
                   'attr1': d['y1995'],
                   'attr2': d['y1996'],
                   'attr3': d['y1997'],
                   'attr4': d['y1998'],
                   'attr5': d['y1999'],
                   'attr6': d['y2000'],
                   'attr7': d['y2001'],
                   'attr8': d['y2002'],
                   'attr9': d['y2003'],
                   'attr10': d['y2004'],
                   'attr11': d['y2005'],
                   'attr12': d['y2006'],
                   'attr13': d['y2007'],
                   'attr14': d['y2008'],
                   'attr15': d['y2009'],
                   'neighbors': d['neighbors'] } for d in self.neighbors_data]
        print(str(data[0]))
        plpy._define_result('select', data)
        random_seeds.set_random_seeds(1234)

        result = std.spatial_markov_trend('subquery', ['y1995', 'y1996', 'y1997', 'y1998', 'y1999', 'y2000', 'y2001', 'y2002', 'y2003', 'y2004', 'y2005', 'y2006', 'y2007', 'y2008', 'y2009'], 5, 'knn', 5, 0, 'the_geom', 'cartodb_id')

        self.assertTrue(result != None)
        result = [(row[0], row[1], row[2], row[3], row[4]) for row in result]
        print result[0]
        expected = self.markov_data
        for ([res_trend, res_up, res_down, res_vol, res_id],
             [exp_trend, exp_up, exp_down, exp_vol, exp_id]
             ) in zip(result, expected):
            self.assertAlmostEqual(res_trend, exp_trend)
Example #2
0
 def test_moran_local_rate(self):
     """Test Moran's I rate"""
     data = [ { 'id': d['id'], 'attr1': d['value'], 'attr2': 1, 'neighbors': d['neighbors'] } for d in self.neighbors_data]
     plpy._define_result('select', data)
     random_seeds.set_random_seeds(1234)
     result = cc.moran_local_rate('table', 'numerator', 'denominator', 0.05, 5, 99, 'the_geom', 'cartodb_id', 'knn')
     result = [(row[0], row[1]) for row in result]
     expected = self.moran_data
     for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected):
         self.assertAlmostEqual(res_val, exp_val)
 def test_moran(self):
     """Test Moran's I global"""
     data = [{"id": d["id"], "attr1": d["value"], "neighbors": d["neighbors"]} for d in self.neighbors_data]
     plpy._define_result("select", data)
     random_seeds.set_random_seeds(1235)
     result = cc.moran("table", "value", "knn", 5, 99, "the_geom", "cartodb_id")
     print "result == None?", result == None
     result_moran = result[0][0]
     expected_moran = np.array([row[0] for row in self.moran_data]).mean()
     self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2)
 def test_moran(self):
     """Test Moran's I global"""
     data = [{ 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data]
     plpy._define_result('select', data)
     random_seeds.set_random_seeds(1235)
     result = cc.moran('table', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id')
     print 'result == None?', result == None
     result_moran = result[0][0]
     expected_moran = np.array([row[0] for row in self.moran_data]).mean()
     self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2)
    def test_kmeans(self):
        data = self.cluster_data
        plpy._define_result('select' ,data)
        clusters = cc.kmeans('subquery', 2)
        labels  = [a[1] for a in clusters]
        c1 = [a for a in clusters if a[1]==0]
        c2 = [a for a in clusters if a[1]==1]

        self.assertEqual(len(np.unique(labels)),2)
        self.assertEqual(len(c1),20)
        self.assertEqual(len(c2),20)
 def test_moran_local(self):
     """Test Moran's I local"""
     data = [{"id": d["id"], "attr1": d["value"], "neighbors": d["neighbors"]} for d in self.neighbors_data]
     plpy._define_result("select", data)
     random_seeds.set_random_seeds(1234)
     result = cc.moran_local("subquery", "value", "knn", 5, 99, "the_geom", "cartodb_id")
     result = [(row[0], row[1]) for row in result]
     expected = self.moran_data
     for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected):
         self.assertAlmostEqual(res_val, exp_val)
         self.assertEqual(res_quad, exp_quad)
 def test_moran_local(self):
     """Test Moran's I local"""
     data = [ { 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data]
     plpy._define_result('select', data)
     random_seeds.set_random_seeds(1234)
     result = cc.moran_local('subquery', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id')
     result = [(row[0], row[1]) for row in result]
     expected = self.moran_data
     for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected):
         self.assertAlmostEqual(res_val, exp_val)
         self.assertEqual(res_quad, exp_quad)
    def test_kmeans(self):
        data = self.cluster_data
        plpy._define_result('select', data)
        clusters = cc.kmeans('subquery', 2)
        labels = [a[1] for a in clusters]
        c1 = [a for a in clusters if a[1] == 0]
        c2 = [a for a in clusters if a[1] == 1]

        self.assertEqual(len(np.unique(labels)), 2)
        self.assertEqual(len(c1), 20)
        self.assertEqual(len(c2), 20)
Example #9
0
    def test_spatial_markov(self):
        """Test Spatial Markov."""
        data = [{
            'id': d['id'],
            'attr1': d['y1995'],
            'attr2': d['y1996'],
            'attr3': d['y1997'],
            'attr4': d['y1998'],
            'attr5': d['y1999'],
            'attr6': d['y2000'],
            'attr7': d['y2001'],
            'attr8': d['y2002'],
            'attr9': d['y2003'],
            'attr10': d['y2004'],
            'attr11': d['y2005'],
            'attr12': d['y2006'],
            'attr13': d['y2007'],
            'attr14': d['y2008'],
            'attr15': d['y2009'],
            'neighbors': d['neighbors']
        } for d in self.neighbors_data]
        print(str(data[0]))
        plpy._define_result('select', data)
        random_seeds.set_random_seeds(1234)

        result = std.spatial_markov_trend('subquery', [
            'y1995', 'y1996', 'y1997', 'y1998', 'y1999', 'y2000', 'y2001',
            'y2002', 'y2003', 'y2004', 'y2005', 'y2006', 'y2007', 'y2008',
            'y2009'
        ], 5, 'knn', 5, 0, 'the_geom', 'cartodb_id')

        self.assertTrue(result != None)
        result = [(row[0], row[1], row[2], row[3], row[4]) for row in result]
        print result[0]
        expected = self.markov_data
        for ([res_trend, res_up, res_down, res_vol,
              res_id], [exp_trend, exp_up, exp_down, exp_vol,
                        exp_id]) in zip(result, expected):
            self.assertAlmostEqual(res_trend, exp_trend)
    def test_create_and_predict_segment(self):
        n_samples = 1000

        random_state_train = np.random.RandomState(13)
        random_state_test = np.random.RandomState(134)
        training_data = self.generate_random_data(n_samples,
                                                  random_state_train)
        test_data, test_y = self.generate_random_data(n_samples,
                                                      random_state_test,
                                                      row_type=True)

        ids = [{'cartodb_ids': range(len(test_data))}]
        rows = [{'x1': 0, 'x2': 0, 'x3': 0, 'y': 0, 'cartodb_id': 0}]

        plpy._define_result(
            'select \* from  \(select \* from training\) a  limit 1', rows)
        plpy._define_result('.*from \(select \* from training\) as a',
                            training_data)
        plpy._define_result(
            'select array_agg\(cartodb\_id order by cartodb\_id\) as cartodb_ids from \(.*\) a',
            ids)
        plpy._define_result('.*select \* from test.*', test_data)

        model_parameters = {
            'n_estimators': 1200,
            'max_depth': 3,
            'subsample': 0.5,
            'learning_rate': 0.01,
            'min_samples_leaf': 1
        }

        result = segmentation.create_and_predict_segment(
            'select * from training', 'target', 'select * from test',
            model_parameters)

        prediction = [r[1] for r in result]

        accuracy = np.sqrt(
            np.mean(np.square(np.array(prediction) - np.array(test_y))))

        self.assertEqual(len(result), len(test_data))
        self.assertTrue(result[0][2] < 0.01)
        self.assertTrue(accuracy < 0.5 * np.mean(test_y))
Example #11
0
    def test_create_and_predict_segment(self):
        n_samples = 1000

        random_state_train = np.random.RandomState(13)
        random_state_test = np.random.RandomState(134)
        training_data = self.generate_random_data(n_samples, random_state_train)
        test_data, test_y = self.generate_random_data(n_samples, random_state_test, row_type=True)


        ids =  [{'cartodb_ids': range(len(test_data))}]
        rows =  [{'x1': 0,'x2':0,'x3':0,'y':0,'cartodb_id':0}]

        plpy._define_result('select \* from  \(select \* from training\) a  limit 1',rows)
        plpy._define_result('.*from \(select \* from training\) as a' ,training_data)
        plpy._define_result('select array_agg\(cartodb\_id order by cartodb\_id\) as cartodb_ids from \(.*\) a',ids)
        plpy._define_result('.*select \* from test.*' ,test_data)

        model_parameters =  {'n_estimators': 1200,
                             'max_depth': 3,
                             'subsample' : 0.5,
                             'learning_rate': 0.01,
                             'min_samples_leaf': 1}

        result = segmentation.create_and_predict_segment(
                'select * from training',
                'target',
                'select * from test',
                model_parameters)

        prediction = [r[1] for r in result]

        accuracy =np.sqrt(np.mean( np.square( np.array(prediction) - np.array(test_y))))

        self.assertEqual(len(result),len(test_data))
        self.assertTrue( result[0][2] < 0.01)
        self.assertTrue( accuracy < 0.5*np.mean(test_y)  )