def test_augment_with_persist_as(self):
        """DataObsClient.augment with persist_as"""
        do = DataObsClient(self.credentials)

        meta = do.discovery(self.test_read_table,
                            keywords=('poverty', ),
                            time=('2010 - 2014', ))
        gdf = do.augment(self.test_data_table, meta)
        anscols = set(meta['suggested_name'])
        origcols = set(
            read_carto(self.test_data_table,
                       credentials=self.credentials,
                       limit=1,
                       decode_geom=True).columns)
        self.assertSetEqual(
            anscols,
            set(gdf.columns) - origcols - {'the_geom', 'cartodb_id'})

        meta = [
            {
                'numer_id': 'us.census.acs.B19013001',
                'geom_id': 'us.census.tiger.block_group',
                'numer_timespan': '2011 - 2015'
            },
        ]
        gdf = do.augment(self.test_data_table,
                         meta,
                         persist_as=self.test_write_table)
        self.assertSetEqual(
            set(('median_income_2011_2015', )),
            set(gdf.columns) - origcols - {'the_geom', 'cartodb_id'})
        self.assertEqual(gdf.index.name, 'cartodb_id')
        self.assertEqual(gdf.index.dtype, 'int64')

        df = read_carto(self.test_write_table,
                        credentials=self.credentials,
                        decode_geom=False)

        self.assertEqual(df.index.name, 'cartodb_id')
        self.assertEqual(df.index.dtype, 'int64')

        # same number of rows
        self.assertEqual(len(df), len(gdf), msg='Expected number or rows')

        # same type of object
        self.assertIsInstance(df, pd.DataFrame, 'Should be a pandas DataFrame')
        # same column names
        self.assertSetEqual(set(gdf.columns.values),
                            set(df.columns.values),
                            msg='Should have the columns requested')

        # should have exected schema
        self.assertEqual(sorted(tuple(str(d) for d in df.dtypes)),
                         sorted(tuple(str(d) for d in gdf.dtypes)),
                         msg='Should have same schema/types')
    def test_augment_deprecation(self):
        with warnings.catch_warnings(record=True) as w:
            do = DataObsClient(self.credentials)

        with warnings.catch_warnings(record=True) as w:
            try:
                do.augment()
            except Exception:
                pass

            assert issubclass(w[-1].category, DeprecationWarning)
            assert 'deprecated' in str(w[-1].message)
    def test_augment_column_name_collision(self):
        """DataObsClient.augment column name collision"""
        dup_col = 'female_third_level_studies_2011_by_female_pop'
        self.sql_client.query("""
            create table {table} as (
                select cdb_latlng(40.4165,-3.70256) the_geom,
                       1 {dup_col})
            """.format(dup_col=dup_col, table=self.test_write_table))
        self.sql_client.query(
            "select cdb_cartodbfytable('public', '{table}')".format(
                table=self.test_write_table))

        do = DataObsClient(self.credentials)
        meta = do.discovery(region=self.test_write_table, keywords='female')
        meta = meta[meta.suggested_name == dup_col]
        gdf = do.augment(self.test_write_table,
                         meta[meta.suggested_name == dup_col])

        self.assertIn('_' + dup_col, gdf.keys())
    def test_augment(self):
        """DataObsClient.augment"""
        do = DataObsClient(self.credentials)

        meta = do.discovery(self.test_read_table,
                            keywords=('poverty', ),
                            time=('2010 - 2014', ))
        gdf = do.augment(self.test_data_table, meta)
        anscols = set(meta['suggested_name'])
        origcols = set(
            read_carto(self.test_data_table,
                       credentials=self.credentials,
                       limit=1,
                       decode_geom=True).columns)
        self.assertSetEqual(
            anscols,
            set(gdf.columns) - origcols - {'the_geom', 'cartodb_id'})

        meta = [
            {
                'numer_id': 'us.census.acs.B19013001',
                'geom_id': 'us.census.tiger.block_group',
                'numer_timespan': '2011 - 2015'
            },
        ]
        gdf = do.augment(self.test_data_table, meta)
        self.assertSetEqual(
            set(('median_income_2011_2015', )),
            set(gdf.columns) - origcols - {'the_geom', 'cartodb_id'})

        with self.assertRaises(ValueError, msg='no measures'):
            meta = do.discovery('United States', keywords='not a measure')
            do.augment(self.test_read_table, meta)

        with self.assertRaises(ValueError, msg='too many metadata measures'):
            # returns ~180 measures
            meta = do.discovery(region='united states', keywords='education')
            do.augment(self.test_read_table, meta)
    def test_discovery(self):
        """DataObsClient.discovery"""
        do = DataObsClient(self.credentials)

        meta = do.discovery(self.test_read_table,
                            keywords=('poverty', ),
                            time=('2010 - 2014', ))
        meta_columns = set(
            ('denom_aggregate', 'denom_colname', 'denom_description',
             'denom_geomref_colname', 'denom_id', 'denom_name',
             'denom_reltype', 'denom_t_description', 'denom_tablename',
             'denom_type', 'geom_colname', 'geom_description',
             'geom_geomref_colname', 'geom_id', 'geom_name',
             'geom_t_description', 'geom_tablename', 'geom_timespan',
             'geom_type', 'id', 'max_score_rank', 'max_timespan_rank',
             'normalization', 'num_geoms', 'numer_aggregate', 'numer_colname',
             'numer_description', 'numer_geomref_colname', 'numer_id',
             'numer_name', 'numer_t_description', 'numer_tablename',
             'numer_timespan', 'numer_type', 'score', 'score_rank',
             'score_rownum', 'suggested_name', 'target_area', 'target_geoms',
             'timespan_rank', 'timespan_rownum'))
        self.assertSetEqual(set(meta.columns),
                            meta_columns,
                            msg='metadata columns are all there')
        self.assertTrue((meta['numer_timespan'] == '2010 - 2014').all())
        self.assertTrue(
            (meta['numer_description'].str.contains('poverty')).all())

        # test region = list of lng/lats
        with self.assertRaises(ValueError):
            do.discovery([1, 2, 3])

        switzerland = [
            5.9559111595, 45.8179931641, 10.4920501709, 47.808380127
        ]
        dd = do.discovery(switzerland, keywords='freight', time='2010')
        self.assertEqual(dd['numer_id'][0], 'eu.eurostat.tgs00078')

        dd = do.discovery('Australia', regex='.*Torres Strait Islander.*')
        for nid in dd['numer_id'].values:
            self.assertRegexpMatches(
                nid,
                r'^au\.data\.B01_Indig_[A-Za-z_]+Torres_St[A-Za-z_]+[FMP]$')

        with self.assertRaises(CartoException):
            do.discovery('non_existent_table_abcdefg')

        dd = do.discovery('United States',
                          boundaries='us.epa.huc.hydro_unit',
                          time=(
                              '2006',
                              '2010',
                          ))
        self.assertTrue(dd.shape[0] >= 1)

        poverty = do.discovery('United States',
                               boundaries='us.census.tiger.census_tract',
                               keywords=[
                                   'poverty status',
                               ],
                               time='2011 - 2015',
                               include_quantiles=False)
        df_quantiles = poverty[poverty.numer_aggregate == 'quantile']
        self.assertEqual(df_quantiles.shape[0], 0)

        poverty = do.discovery('United States',
                               boundaries='us.census.tiger.census_tract',
                               keywords=[
                                   'poverty status',
                               ],
                               time='2011 - 2015',
                               include_quantiles=True)
        df_quantiles = poverty[poverty.numer_aggregate == 'quantile']
        self.assertTrue(df_quantiles.shape[0] > 0)
    def test_boundaries(self):
        """DataObsClient.boundaries"""
        do = DataObsClient(self.credentials)

        # all boundary metadata
        boundary_meta = do.boundaries()
        self.assertTrue(boundary_meta.shape[0] > 0,
                        msg='has non-zero number of boundaries')
        meta_cols = set((
            'geom_id',
            'geom_tags',
            'geom_type',
        ))
        self.assertTrue(meta_cols & set(boundary_meta.columns))

        # boundary metadata with correct timespan
        meta_2015 = do.boundaries(timespan='2015')
        self.assertTrue(meta_2015[meta_2015.valid_timespan].shape[0] > 0)

        # test for no data with an incorrect or invalid timespan
        meta_9999 = do.boundaries(timespan='invalid_timespan')
        self.assertTrue(meta_9999[meta_9999.valid_timespan].shape[0] == 0)

        # boundary metadata in a region
        regions = (
            self.test_read_table,
            self.test_data_table,
            [5.9559111595, 45.8179931641, 10.4920501709, 47.808380127],
            'Australia',
        )
        for region in regions:
            boundary_meta = do.boundaries(region=region)
            self.assertTrue(meta_cols & set(boundary_meta.columns))
            self.assertTrue(boundary_meta.shape[0] > 0,
                            msg='has non-zero number of boundaries')

        #  boundaries for world
        boundaries = do.boundaries(boundary='us.census.tiger.state')
        self.assertTrue(boundaries.shape[0] > 0)
        self.assertEqual(boundaries.shape[1], 2)
        self.assertSetEqual(set((
            'the_geom',
            'geom_refs',
        )), set(boundaries.columns))

        # boundaries for region
        boundaries = ('us.census.tiger.state', )
        for b in boundaries:
            geoms = do.boundaries(boundary=b, region=self.test_data_table)
            self.assertTrue(geoms.shape[0] > 0)
            self.assertEqual(geoms.shape[1], 2)
            self.assertSetEqual(set((
                'the_geom',
                'geom_refs',
            )), set(geoms.columns))

        # presence or lack of clipped boundaries
        nonclipped = (
            True,
            False,
        )
        for tf in nonclipped:
            meta = do.boundaries(include_nonclipped=tf)
            self.assertEqual('us.census.tiger.state' in set(meta.geom_id), tf)

        with self.assertRaises(ValueError):
            do.boundaries(region=[1, 2, 3])

        with self.assertRaises(ValueError):
            do.boundaries(region=10)
 def test_class_deprecation(self):
     with warnings.catch_warnings(record=True) as w:
         _ = DataObsClient(self.credentials)
         assert issubclass(w[-1].category, DeprecationWarning)
         assert 'deprecated' in str(w[-1].message)