Python read_csv 예제들, metacsv.read_csv Python 예제들

예제 #1

0

파일 보기

파일: test_metacsv.py 프로젝트: ClimateImpactLab/metacsv

def test_write_and_read_equivalency(setup_env):
    '''CSV Test 4: Ensure data and attr consistency after write and re-read'''

    csv1 = metacsv.read_csv(os.path.join(
        testdata_prefix, 'test1.csv'))
    csv1.attrs['other stuff'] = 'this should show up after write'
    csv1['new_col'] = (np.random.random((len(csv1), 1)))
    tmpfile = os.path.join(test_tmp_prefix, 'test_write_1.csv')
    csv1.to_csv(tmpfile)

    csv2 = metacsv.read_csv(tmpfile)

    csv1.__repr__()
    csv2.__repr__()

    assert ((abs(csv1.values - csv2.values) < 1e-7).all().all())
    assert csv1.coords == csv2.coords
    assert csv1.variables == csv2.variables

    with open(tmpfile, 'w+') as tmp:
        csv1.to_csv(tmp)

    with open(tmpfile, 'r') as tmp:
        csv2 = metacsv.read_csv(tmp)

    assert ((abs(csv1.values - csv2.values) < 1e-7).all().all())
    assert csv1.coords == csv2.coords
    assert csv1.variables == csv2.variables

예제 #2

0

파일 보기

파일: test_metacsv.py 프로젝트: gitter-badger/metacsv

    def test_assertions(self):
        fp = os.path.join(self.testdata_prefix, 'test7.csv')

        df = metacsv.read_csv(
            fp,
            parse_vars=True,
            assertions={'attrs': {
                'version': 'test5.2016-05-01.01'
            }})

        df = metacsv.read_csv(fp,
                              parse_vars=True,
                              assertions={
                                  'attrs': {
                                      'version':
                                      lambda x: x > 'test5.2016-05-01.00'
                                  }
                              })

        df = metacsv.read_csv(
            fp,
            parse_vars=True,
            assertions={'variables': {
                'col2': {
                    'unit': 'digits'
                }
            }})

예제 #3

0

파일 보기

파일: test_metacsv.py 프로젝트: jgerardsimcock/metacsv

    def test_write_and_read_equivalency(self):
        '''CSV Test 4: Ensure data and attr consistency after write and re-read'''

        csv1 = metacsv.read_csv(os.path.join(
            self.testdata_prefix, 'test1.csv'))
        csv1.attrs['other stuff'] = 'this should show up after write'
        csv1['new_col'] = (np.random.random((len(csv1), 1)))
        tmpfile = os.path.join(self.test_tmp_prefix, 'test_write_1.csv')
        csv1.to_csv(tmpfile)

        csv2 = metacsv.read_csv(tmpfile)

        csv1.__repr__()
        csv2.__repr__()

        self.assertTrue((abs(csv1.values - csv2.values) < 1e-7).all().all())
        self.assertEqual(csv1.coords, csv2.coords)
        self.assertEqual(csv1.variables, csv2.variables)

        with open(tmpfile, 'w+') as tmp:
            csv1.to_csv(tmp)

        with open(tmpfile, 'r') as tmp:
            csv2 = metacsv.read_csv(tmp)

        self.assertTrue((abs(csv1.values - csv2.values) < 1e-7).all().all())
        self.assertEqual(csv1.coords, csv2.coords)
        self.assertEqual(csv1.variables, csv2.variables)

예제 #4

0

파일 보기

    def __init__(self, iam, ssp):
        """iam and ssp should be as described in the files (e.g., iam = 'low', ssp = 'SSP3')"""

        super(GDPpcProvider, self).__init__(iam, ssp, baseline_year)

        # Load the baseline values, and split by priority of data
        df = metacsv.read_csv(files.sharedpath(baseline_filepath))
        self.df_baseline_this = df.loc[(df.model == iam) & (df.scenario == ssp)
                                       & (df.year == baseline_year)]
        self.df_baseline_anyiam = df.loc[(df.scenario == ssp) & (
            df.year == baseline_year)].groupby('iso').median()
        self.baseline_global = df.loc[(df.scenario == ssp)
                                      & (df.year == baseline_year)].median()

        # Load the growth rates, and split by priority of data
        df_growth = metacsv.read_csv(files.sharedpath(growth_filepath))
        df_growth['yearindex'] = np.int_((df_growth.year - baseline_year) / 5)
        self.df_growth_this = df_growth.loc[(df_growth.model == iam)
                                            & (df_growth.scenario == ssp)]
        self.df_growth_anyiam = df_growth.loc[(
            df_growth.scenario == ssp)].groupby(['iso', 'year']).median()
        self.growth_global = df_growth.loc[(df_growth.scenario == ssp)
                                           & (df_growth.model == iam)].groupby(
                                               ['year']).median()

        # Load the nightlights
        self.df_nightlights = metacsv.read_csv(
            files.sharedpath(nightlights_filepath))

        # Cache for ISO-level GDPpc series
        self.cached_iso_gdppcs = {}

예제 #5

0

파일 보기

파일: test_metacsv.py 프로젝트: ClimateImpactLab/metacsv

def test_assertions(setup_env):
    fp = os.path.join(testdata_prefix, 'test7.csv')

    df = metacsv.read_csv(fp, parse_vars=True, 
        assertions={'attrs': {'version': 'test5.2016-05-01.01'}})

    df = metacsv.read_csv(fp, parse_vars=True, 
        assertions={'attrs': {'version': lambda x: x>'test5.2016-05-01.00'}})

    df = metacsv.read_csv(fp, parse_vars=True, 
        assertions={'variables': {'col2': {'unit': 'digits'}}})

예제 #6

0

파일 보기

    def retrieve(cls, archive_name, api=None):

        if archive_name in cls._data:
            return cls._data[archive_name]

        if api is None:

            if cls._api is None:
                cls._api = datafs.get_api()

            api = cls._api

        archive = api.get_archive(archive_name)

        if archive_name.endswith('.csv'):
            with archive.open() as f:
                data = metacsv.read_csv(f)

        elif os.path.splitext(archive_name)[-1][:3] == '.nc':
            with archive.get_local_path() as fp:
                data = xr.open_dataset(fp).load()

        else:
            raise ValueError(
                'file type not recognized: "{}"'.format(archive_name))

        cls._data[archive_name] = data

        return data

예제 #7

0

파일 보기

파일: test_metacsv.py 프로젝트: jgerardsimcock/metacsv

    def test_attr_updating(self):

        df = metacsv.read_csv(os.path.join(self.testdata_prefix, 'test6.csv'))
        df.coords.update({'ind3': ['s2'], 's2': None})
        coords = df.coords

        # Send to xarray.Dataset
        ds = df.to_xarray()

        del df.coords

        # Create a similarly indexed series by
        # applying coords after the slice operation
        s = df['col1']
        s.coords = coords

        # Send to xarray.DataArray
        da = s.to_xarray()

        self.assertTrue((ds.col1 == da).all().all())

        df = metacsv.DataFrame(np.random.random((3,4)))
        df.add_coords()
        del df.coords

        df.index = pd.MultiIndex.from_tuples([('a','x'),('b','y'),('c','z')])
        df.add_coords()

예제 #8

0

파일 보기

def get_pop():

    lspop = pd.read_csv(
        files.sharedpath('social/processed/LandScan2011/gcpregions.csv'),
        header=17)

    dfir = metacsv.read_csv(files.sharedpath("regions/hierarchy_metacsv.csv"))
    dfir = dfir.loc[dfir.is_terminal].reset_index(drop=True)
    dfir.drop([
        'parent-key', 'name', 'alternatives', 'is_terminal', 'gadmid',
        'agglomid', 'notes'
    ],
              axis=1,
              inplace=True)
    dfir.rename(columns={'region-key': 'region'}, inplace=True)
    dfir['iso'] = dfir.region.apply(lambda x: x[:3])

    df = pd.read_csv(
        files.sharedpath(
            'social/baselines/population/merged/population-merged.all.csv'))

    df.rename(columns={'value': 'pop'}, inplace=True)
    df['iso'] = df.region.apply(lambda x: x[:3])
    df.drop(['index'], axis=1, inplace=True)
    df = df.loc[df.year > 2005]

    dff = df.append(get_missing_pop(df, dfir, lspop),
                    sort=False).sort_values(['region',
                                             'year']).reset_index(drop=True)

    return dff

예제 #9

0

파일 보기

def get_gdppc_all_regions(model, ssp):

    print(model + " " + ssp)
    os.chdir(os.getenv("HOME") + '/repos/impact-calculations')
    moddict = {'high': 'OECD Env-Growth', 'low': 'IIASA GDP'}
    df = metacsv.read_csv(files.sharedpath("regions/hierarchy_metacsv.csv"))
    tmplist = []
    yvect = np.arange(2010, 2101)
    svect = np.repeat(ssp, yvect.size)
    mvect = np.repeat(moddict[model], yvect.size)
    provider = gdppc.GDPpcProvider(model, ssp)

    for ii in np.where(df.is_terminal)[0]:
        series = provider.get_timeseries(df['region-key'][ii])
        rvect = np.repeat(df['region-key'][ii], yvect.size)
        tmp = pd.DataFrame({
            'region': rvect,
            'year': yvect,
            'gdppc': series,
            'ssp': svect,
            'model': mvect
        })
        tmplist.append(tmp)

    dfg = pd.concat(tmplist, sort=False)

    return dfg

예제 #10

0

파일 보기

파일: test_metacsv.py 프로젝트: ClimateImpactLab/metacsv

def test_attr_updating(setup_env):

    df = metacsv.read_csv(os.path.join(testdata_prefix, 'test6.csv'))
    df.coords.update({'ind3': ['s2'], 's2': None})
    coords = df.coords

    # Send to xarray.Dataset
    ds = df.to_xarray()

    del df.coords

    # Create a similarly indexed series by
    # applying coords after the slice operation
    s = df['col1']
    s.coords = coords

    # Send to xarray.DataArray
    da = s.to_xarray()

    assert ((ds.col1 == da).all().all())

    df = metacsv.DataFrame(np.random.random((3,4)))
    df.add_coords()
    del df.coords

    df.index = pd.MultiIndex.from_tuples([('a','x'),('b','y'),('c','z')])
    df.add_coords()

예제 #11

0

파일 보기

파일: test_metacsv.py 프로젝트: jgerardsimcock/metacsv

    def test_series_conversion_to_xarray(self):
        '''CSV Test 5: Check conversion of metacsv.Series to xarray.DataArray'''

        csv1 = metacsv.read_csv(os.path.join(
            self.testdata_prefix, 'test5.csv'), squeeze=True)
        self.assertEqual(len(csv1.shape), 1)

        self.assertEqual(csv1.to_xarray().shape, csv1.shape)
        self.assertTrue((csv1.to_xarray().values == csv1.values).all())

예제 #12

0

파일 보기

파일: test_metacsv.py 프로젝트: ClimateImpactLab/metacsv

def test_series_conversion_to_xarray(setup_env):
    '''CSV Test 5: Check conversion of metacsv.Series to xarray.DataArray'''

    csv1 = metacsv.read_csv(os.path.join(
        testdata_prefix, 'test5.csv'), squeeze=True)
    assert len(csv1.shape) == 1

    assert csv1.to_xarray().shape == csv1.shape
    assert ((csv1.to_xarray().values == csv1.values).all())

예제 #13

0

파일 보기

파일: test_metacsv.py 프로젝트: ClimateImpactLab/metacsv

def test_parse_vars(setup_env):
    df = metacsv.read_csv(
        os.path.join(testdata_prefix, 'test8.csv'), 
        parse_vars=True, 
        index_col=[0,1,2],
        coords={'ind1':None, 'ind2':None, 'ind3':['ind2']})

    assert (df.hasattr(df.variables['col1'], 'description'))
    df.variables['col1']['description'] == 'The first column'
    df.variables['col2']['unit'] == 'digits'

예제 #14

0

파일 보기

파일: test_metacsv.py 프로젝트: jgerardsimcock/metacsv

    def test_parse_vars(self):

        df = metacsv.read_csv(os.path.join(
            self.testdata_prefix, 'test7.csv'), parse_vars=True, index_col=0)
        ds = df.to_xarray()

        self.assertEqual(ds.col1.attrs['description'], 'The first column')
        self.assertEqual(ds.col1.attrs['unit'], 'wigits')
        self.assertEqual(ds.col2.attrs['description'], 'The second column')
        self.assertEqual(ds.col2.attrs['unit'], 'digits')

예제 #15

0

파일 보기

파일: test_metacsv.py 프로젝트: ClimateImpactLab/metacsv

def test_parse_vars(setup_env):

    df = metacsv.read_csv(os.path.join(
        testdata_prefix, 'test7.csv'), parse_vars=True, index_col=0)
    ds = df.to_xarray()

    assert ds.col1.attrs['description'] == 'The first column'
    assert ds.col1.attrs['unit'] == 'wigits'
    assert ds.col2.attrs['description'] == 'The second column'
    assert ds.col2.attrs['unit'] == 'digits'

예제 #16

0

파일 보기

파일: test_metacsv.py 프로젝트: jgerardsimcock/metacsv

    def test_parse_vars(self):
        df = metacsv.read_csv(
            os.path.join(self.testdata_prefix, 'test8.csv'), 
            parse_vars=True, 
            index_col=[0,1,2],
            coords={'ind1':None, 'ind2':None, 'ind3':['ind2']})

        self.assertTrue(df.hasattr(df.variables['col1'], 'description'))
        self.assertEqual(df.variables['col1']['description'], 'The first column')
        self.assertEqual(df.variables['col2']['unit'], 'digits')

예제 #17

0

파일 보기

파일: test_metacsv.py 프로젝트: jgerardsimcock/metacsv

    def test_coordinate_conversion_to_xarray(self):
        '''CSV Test 2: Make sure only base coordinates are used in determining xarray dimensionality'''

        df = metacsv.read_csv(os.path.join(self.testdata_prefix, 'test6.csv'))

        df_str = df.__repr__()

        self.assertEqual(df.to_xarray().isnull().sum().col1, 0)
        self.assertEqual(df.to_xarray().isnull().sum().col2, 0)

        # Test automatic coords assignment
        df = metacsv.read_csv(os.path.join(
            self.testdata_prefix, 'test5.csv'), squeeze=True)
        del df.coords

        ds = df.to_xarray()

        self.assertNotEqual(len(df.shape), len(ds.shape))
        self.assertEqual(df.shape[0], ds.shape[0])
        self.assertTrue(ds.shape[1] > 1)

예제 #18

0

파일 보기

파일: test_metacsv.py 프로젝트: ClimateImpactLab/metacsv

def test_coordinate_conversion_to_xarray(setup_env):
    '''CSV Test 2: Make sure only base coordinates are used in determining xarray dimensionality'''

    df = metacsv.read_csv(os.path.join(testdata_prefix, 'test6.csv'))

    df_str = df.__repr__()

    assert df.to_xarray().isnull().sum().col1 == 0
    assert df.to_xarray().isnull().sum().col2 == 0

    # Test automatic coords assignment
    df = metacsv.read_csv(os.path.join(
        testdata_prefix, 'test5.csv'), squeeze=True)

    del df.coords

    ds = df.to_xarray()

    assert len(df.shape) != len(ds.shape)
    assert df.shape[0] == ds.shape[0]
    assert (ds.shape[1] > 1)

예제 #19

0

파일 보기

파일: test_metacsv.py 프로젝트: jgerardsimcock/metacsv

    def test_read_csv(self):
        """CSV Test 1: Check DataFrame data for CSVs with and without yaml headers"""

        csv1 = metacsv.read_csv(os.path.join(
            self.testdata_prefix, 'test1.csv'))
        csv2 = pd.read_csv(os.path.join(self.testdata_prefix, 'test2.csv'))

        csv1.__repr__()
        csv2.__repr__()

        self.assertTrue(
            (csv1.values == csv2.set_index('ind').values).all().all())

예제 #20

0

파일 보기

파일: test_metacsv.py 프로젝트: ClimateImpactLab/metacsv

def test_read_csv(setup_env):
    """CSV Test 1: Check DataFrame data for CSVs with and without yaml headers"""

    csv1 = metacsv.read_csv(os.path.join(
        testdata_prefix, 'test1.csv'))
    csv2 = pd.read_csv(os.path.join(testdata_prefix, 'test2.csv'))

    csv1.__repr__()
    csv2.__repr__()

    assert (
        (csv1.values == csv2.set_index('ind').values).all().all())

예제 #21

0

파일 보기

파일: test_metacsv.py 프로젝트: ClimateImpactLab/metacsv

def test_for_series_attributes(setup_env):
    '''CSV Test 3: Ensure read_csv preserves attrs with squeeze=True conversion to Series

    This test is incomplete - a complete test would check that attrs are preserved
    when index_col is not set and the index is set by coords. Currently, this 
    does not work.
    '''

    s = metacsv.read_csv(os.path.join(
        testdata_prefix, 'test5.csv'), squeeze=True, index_col=[0, 1])

    s.__repr__()

    assert (hasattr(s, 'attrs') and ('author' in s.attrs))
    assert s.attrs['author'] == 'series creator'

예제 #22

0

파일 보기

파일: test_metacsv.py 프로젝트: jgerardsimcock/metacsv

    def test_header_writer(self):
        fp = os.path.join(self.testdata_prefix, 'test9.csv')

        attrs = {'author': 'test author', 'contact': '*****@*****.**'}
        coords = {'ind1': None, 'ind2': None, 'ind3': 'ind2'}
        variables = {'col1': dict(description='my first column'), 'col2': dict(description='my second column')}

        tmpheader = os.path.join(self.test_tmp_prefix, 'test_header.header')
        metacsv.to_header(tmpheader, attrs=attrs, coords=coords, variables=variables)

        df = metacsv.read_csv(fp, header_file=tmpheader)

        self.assertEqual(df.attrs, attrs)
        self.assertEqual(df.coords, coords)
        self.assertEqual(df.variables, variables)

예제 #23

0

파일 보기

파일: test_metacsv.py 프로젝트: jgerardsimcock/metacsv

    def test_for_series_attributes(self):
        '''CSV Test 3: Ensure read_csv preserves attrs with squeeze=True conversion to Series

        This test is incomplete - a complete test would check that attrs are preserved
        when index_col is not set and the index is set by coords. Currently, this 
        does not work.
        '''

        s = metacsv.read_csv(os.path.join(
            self.testdata_prefix, 'test5.csv'), squeeze=True, index_col=[0, 1])

        s.__repr__()

        self.assertTrue(hasattr(s, 'attrs') and ('author' in s.attrs))
        self.assertEqual(s.attrs['author'], 'series creator')

예제 #24

0

파일 보기

파일: test_metacsv.py 프로젝트: ClimateImpactLab/metacsv

def test_header_writer(setup_env):
    fp = os.path.join(testdata_prefix, 'test9.csv')

    attrs = {'author': 'test author', 'contact': '*****@*****.**'}
    coords = {'ind1': None, 'ind2': None, 'ind3': 'ind2'}
    variables = {'col1': dict(description='my first column'), 'col2': dict(description='my second column')}

    tmpheader = os.path.join(test_tmp_prefix, 'test_header.header')
    metacsv.to_header(tmpheader, attrs=attrs, coords=coords, variables=variables)

    df = metacsv.read_csv(fp, header_file=tmpheader)

    assert df.attrs == attrs
    assert df.coords == coords
    assert df.variables == variables

예제 #25

0

파일 보기

파일: test_metacsv.py 프로젝트: jgerardsimcock/metacsv

    def test_change_dims(self):
        testfile = os.path.join(self.testdata_prefix, 'test6.csv')
        df = metacsv.read_csv(testfile)

        # Test DataFrame._constructor_sliced
        series = df[df.columns[0]]
        self.assertTrue(hasattr(series, 'coords'))

        # Test Series._constructor_expanddims
        df2 = metacsv.DataFrame({df.columns[0]: series})
        self.assertTrue(hasattr(df2, 'coords'))

        # Test DataFrame._constructor_expanddims
        panel = metacsv.Panel({'df': df})
        self.assertTrue(hasattr(panel, 'coords'))

        # Test Panel._constructor_sliced
        df3 = panel['df']
        self.assertTrue(hasattr(df3, 'coords'))

예제 #26

0

파일 보기

파일: test_metacsv.py 프로젝트: ClimateImpactLab/metacsv

def test_change_dims(setup_env):
    testfile = os.path.join(testdata_prefix, 'test6.csv')
    df = metacsv.read_csv(testfile)

    # Test DataFrame._constructor_sliced
    series = df[df.columns[0]]
    assert (hasattr(series, 'coords'))

    # Test Series._constructor_expanddims
    df2 = metacsv.DataFrame({df.columns[0]: series})
    assert (hasattr(df2, 'coords'))

    # Test DataFrame._constructor_expanddims
    panel = metacsv.Panel({'df': df})
    assert (hasattr(panel, 'coords'))

    # Test Panel._constructor_sliced
    df3 = panel['df']
    assert (hasattr(df3, 'coords'))

예제 #27

0

파일 보기

파일: test_metacsv.py 프로젝트: jgerardsimcock/metacsv

    def test_command_line_converter(self):

        convert_script = 'metacsv.scripts.convert'

        testfile = os.path.join(self.testdata_prefix, 'test6.csv')
        newname = os.path.splitext(os.path.basename(testfile))[0] + '.nc'
        outfile = os.path.join(self.test_tmp_prefix, newname)

        p = subprocess.Popen(
            ['python', '-m', convert_script, 'netcdf', testfile, outfile],
            stderr=subprocess.PIPE,
            stdout=subprocess.PIPE)

        out, err = p.communicate()
        if len(err.strip()) != 0:
            print(err.strip().decode(locale.getpreferredencoding()))
        self.assertEqual(len(err.strip()), 0)

        df = metacsv.read_csv(testfile)

        with xr.open_dataset(outfile) as ds:
            self.assertTrue((abs(df.values - ds.to_dataframe().set_index(
                [i for i in df.coords if i not in df.base_coords]).values) < 1e-7).all().all())

예제 #28

0

파일 보기

파일: test_metacsv.py 프로젝트: ClimateImpactLab/metacsv

def test_command_line_converter(setup_env):

    convert_script = 'metacsv.scripts.convert'

    testfile = os.path.join(testdata_prefix, 'test6.csv')
    newname = os.path.splitext(os.path.basename(testfile))[0] + '.nc'
    outfile = os.path.join(test_tmp_prefix, newname)

    p = subprocess.Popen(
        ['python', '-m', convert_script, 'netcdf', testfile, outfile],
        stderr=subprocess.PIPE,
        stdout=subprocess.PIPE)

    out, err = p.communicate()
    if len(err.strip()) != 0:
        print(err.strip().decode(locale.getpreferredencoding()))
    assert len(err.strip()) == 0

    df = metacsv.read_csv(testfile)

    with xr.open_dataset(outfile) as ds:
        assert ((abs(df.values - ds.to_dataframe().set_index(
            [i for i in df.coords if i not in df.base_coords]).values) < 1e-7).all().all())

예제 #29

0

파일 보기

파일: test_metacsv.py 프로젝트: jgerardsimcock/metacsv

    def test_command_line_version_check(self):
        def get_version(readfile):
            version_check_script = 'metacsv.scripts.version'

            p = subprocess.Popen(
                ['python', '-m', version_check_script, readfile],
                stderr=subprocess.PIPE,
                stdout=subprocess.PIPE)

            out, err = p.communicate()
            if len(err) != 0:
                raise VersionError(err.strip())
            else:
                return out.strip().decode(locale.getpreferredencoding())

        testfile = os.path.join(self.testdata_prefix, 'test6.csv')

        with self.assertRaises(VersionError):
            get_version(testfile)

        testfile = os.path.join(self.testdata_prefix, 'test5.csv')
        df = metacsv.read_csv(testfile)

        self.assertEqual(get_version(testfile), df.attrs['version'])

예제 #30

0

파일 보기

파일: test_metacsv.py 프로젝트: ClimateImpactLab/metacsv

def test_command_line_version_check(setup_env):
    def get_version(readfile):
        version_check_script = 'metacsv.scripts.version'

        p = subprocess.Popen(
            ['python', '-m', version_check_script, readfile],
            stderr=subprocess.PIPE,
            stdout=subprocess.PIPE)

        out, err = p.communicate()
        if len(err) != 0:
            raise VersionError(err.strip())
        else:
            return out.strip().decode(locale.getpreferredencoding())

    testfile = os.path.join(testdata_prefix, 'test6.csv')

    with pytest.raises(VersionError):
        get_version(testfile)

    testfile = os.path.join(testdata_prefix, 'test5.csv')
    df = metacsv.read_csv(testfile)

    assert get_version(testfile) == df.attrs['version']

예제 #31

0

파일 보기

파일: test_metacsv.py 프로젝트: ClimateImpactLab/metacsv

def test_xarray_variable_attribute_persistence(setup_env):
    testfile = os.path.join(testdata_prefix, 'test6.csv')
    assert (metacsv.read_csv(
            testfile).to_xarray().col1.attrs['unit'] == 'wigits')

예제 #32

0

파일 보기

파일: explore.py 프로젝트: ClimateImpactLab/imperics

    from imperact.lib.errors import UserException
    template_root = 'imperact'
    CUSTOM_CONTENT_TYPE = 'image/png'
    import imperact as aggregator
else:
    from aggregator.lib.base import BaseController
    from aggregator.lib.errors import UserException
    template_root = 'aggregator'
    from tg.controllers import CUSTOM_CONTENT_TYPE
    import aggregator

directory_root = '/shares/gcp/outputs'
scripts_root = '/home/jrising/research/gcp/imperactive/scripts'
last_purge = time.mktime(datetime.datetime(2017, 7, 20, 0, 0, 0).timetuple())

hierarchy = metacsv.read_csv("/shares/gcp/regions/hierarchy_metacsv.csv")

irlevel_keys = np.concatenate(
    (hierarchy['region-key'][hierarchy['is_terminal']],
     hierarchy['region-key'][hierarchy['is_terminal']],
     hierarchy['region-key'][(hierarchy['alternatives'].astype('S') != 'nan')
                             & (hierarchy['is_terminal'])])).astype('S')
irlevel_labels = np.concatenate(
    (hierarchy['region-key'][hierarchy['is_terminal']],
     hierarchy['name'][hierarchy['is_terminal']],
     hierarchy['alternatives'][(hierarchy['alternatives'].astype('S') != 'nan')
                               & (hierarchy['is_terminal'])])).astype('S')
irlevel_search = np.core.defchararray.lower(irlevel_labels)

aggregated_keys = np.concatenate(
    (['',

예제 #33

0

파일 보기

파일: version.py 프로젝트: ClimateImpactLab/metacsv

def get_version(readfile):
    df = metacsv.read_csv(readfile)
    version = df.attrs.get('version', None)
    return version

예제 #34

0

파일 보기

파일: test_metacsv.py 프로젝트: jgerardsimcock/metacsv

 def test_xarray_variable_attribute_persistence(self):
     testfile = os.path.join(self.testdata_prefix, 'test6.csv')
     self.assertTrue(metacsv.read_csv(
         testfile).to_xarray().col1.attrs['unit'], 'wigits')

예제 #35

0

파일 보기

파일: test_metacsv.py 프로젝트: jgerardsimcock/metacsv

    def test_standalone_properties(self):

        df = metacsv.read_csv(os.path.join(self.testdata_prefix, 'test3.csv'))

        df.columns = ['index', 'column1', 'column2']
        df.set_index('index', inplace=True)

        variables = metacsv.core.internals.Variables({
            'column1': {
                'units': 'wigits'
            },
            'column2': {
                'units': 'wigits'
            }})

        df.variables = variables

        self.assertEqual(df.variables, variables)
        self.assertEqual(df.variables.__repr__(), variables.__repr__())
        self.assertEqual(df.variables[df.columns[1]]['units'], 'wigits')

        attrs = metacsv.core.internals.Attributes()
        self.assertEqual(attrs, None)
        self.assertFalse('author' in attrs.copy())

        with self.assertRaises(KeyError):
            del attrs['author']

        with self.assertRaises(KeyError):
            err = attrs['author']

        with self.assertRaises(KeyError):
            err = attrs.get('author')

        self.assertEqual(attrs.get('author', None), None)

        with self.assertRaises(ValueError):
            err = attrs.get('author', 1, 2)

        self.assertEqual(attrs.pop('author', None), None)

        with self.assertRaises(KeyError):
            err = attrs.pop('author')

        with self.assertRaises(ValueError):
            err = attrs.pop('author', 1, 2)

        self.assertEqual(attrs, None)
        self.assertEqual(attrs.__repr__(), '<Empty Attributes>')

        attrs['author'] = 'My Name'
        attrs['contact'] = '*****@*****.**'

        self.assertEqual(attrs.pop('author', None), 'My Name')
        self.assertEqual(attrs.pop('author', None), None)

        df.attrs.update(attrs)
        df.attrs.update({'project': 'metacsv'})

        with self.assertRaises(TypeError):
            df.attrs.update(1)

        self.assertNotEqual(df.attrs, attrs)
        del df.attrs['project']
        self.assertEqual(df.attrs, attrs)

        self.assertEqual(df.attrs['contact'], '*****@*****.**')
        self.assertEqual(df.attrs.get('contact'), '*****@*****.**')
        self.assertEqual(df.attrs.get('other', 'thing'), 'thing')
        self.assertEqual(df.attrs.pop('contact'), '*****@*****.**')
        self.assertEqual(df.attrs.pop('contact', 'nope'), 'nope')
        self.assertNotEqual(df.attrs, attrs)

        attrs['author'] = 'My Name'
        df.variables['column1'] = attrs
        self.assertEqual(df.variables['column1']['author'], 'My Name')

        var = df.variables.copy()
        self.assertEqual(df.variables, var)

        with self.assertRaises(TypeError):
            var.parse_string_var(['unit'])

        self.assertTrue('description' in var.parse_string_var('variable name [unit]'))
        self.assertEqual(var.parse_string_var('variable [ name'), 'variable [ name')
        
        with self.assertRaises(TypeError):
            df.variables = []

        del df.variables

        # Test round trip
        df2 = metacsv.read_csv(
            os.path.join(self.testdata_prefix, 'test3.csv'),
            index_col=[0], skiprows=1,
            names=['column1', 'column2'])

        df2.index.names = ['index']

        self.assertTrue((df == df2).all().all())

예제 #36

0

파일 보기

파일: test_metacsv.py 프로젝트: ClimateImpactLab/metacsv

def test_converters(setup_env):

    tmpfile = os.path.join(test_tmp_prefix, 'test_write_1.csv')
    tmpnc = os.path.join(test_tmp_prefix, 'test_write_1.nc')

    df = pd.DataFrame(np.random.random((3,4)), columns=list('abcd'))
    df.index.names = ['ind']

    attrs = {'author': 'My Name'}

    metacsv.to_csv(df, tmpfile, attrs=attrs, coords={'ind': None})
    da = metacsv.to_dataarray(df, attrs=attrs, coords={'ind': None})
    ds1 = metacsv.to_xarray(df, attrs=attrs, coords={'ind': None})
    ds2 = metacsv.to_dataset(df, attrs=attrs, coords={'ind': None})
    
    df2 = metacsv.DataFrame(df, attrs=attrs)
    df2.add_coords()

    df3 = metacsv.read_csv(tmpfile)

    assert df2.coords == df3.coords

    assert ((ds1 == ds2).all().all())
    assert df.shape[0]*df.shape[1] == da.shape[0]*da.shape[1]

    attrs = metacsv.core.internals.Attributes()
    attrs.update(da.attrs)
    assert df2.attrs == attrs

    df = metacsv.read_csv(os.path.join(testdata_prefix, 'test6.csv'))
    ds = df.to_xarray()
    da = df.to_dataarray()
    assert not ds.col2.isnull().any()

    attrs = df.attrs.copy()
    coords = df.coords.copy()
    variables = df.variables.copy()

    df.columns.names = ['cols']

    s = df.stack('cols')
    metacsv.to_csv(s, tmpfile, attrs={'author': 'my name'})
    s = metacsv.Series(s)
    coords.update({'cols': None})
    s.attrs = attrs
    s.coords = coords
    s.variables = variables

    s.to_xarray()
    s.to_dataarray()
    s.to_dataset()

    with pytest.raises(TypeError):
        metacsv.to_xarray(['a','b','c'])

    metacsv.to_csv(
        os.path.join(testdata_prefix, 'test6.csv'), 
        tmpfile, 
        attrs={'author': 'test author'},
        variables={'col1': {'unit': 'digits'}})


    df = metacsv.read_csv(tmpfile)
    assert (df.attrs['author']  == 'test author')

    ds = metacsv.to_xarray(tmpfile)
    assert ds.col1.attrs['unit'] == 'digits'


    metacsv.to_netcdf(tmpfile, tmpnc)
    with xr.open_dataset(tmpnc) as ds:
        assert (ds.col1.attrs['unit'] == 'digits')

예제 #37

0

파일 보기

파일: test_metacsv.py 프로젝트: jgerardsimcock/metacsv

    def test_converters(self):

        tmpfile = os.path.join(self.test_tmp_prefix, 'test_write_1.csv')
        tmpnc = os.path.join(self.test_tmp_prefix, 'test_write_1.nc')

        df = pd.DataFrame(np.random.random((3,4)), columns=list('abcd'))
        df.index.names = ['ind']

        attrs = {'author': 'My Name'}

        metacsv.to_csv(df, tmpfile, attrs=attrs, coords={'ind': None})
        da = metacsv.to_dataarray(df, attrs=attrs, coords={'ind': None})
        ds1 = metacsv.to_xarray(df, attrs=attrs, coords={'ind': None})
        ds2 = metacsv.to_dataset(df, attrs=attrs, coords={'ind': None})
        
        df2 = metacsv.DataFrame(df, attrs=attrs)
        df2.add_coords()

        df3 = metacsv.read_csv(tmpfile)

        self.assertEqual(df2.coords, df3.coords)

        self.assertTrue((ds1 == ds2).all().all())
        self.assertEqual(df.shape[0]*df.shape[1], da.shape[0]*da.shape[1])

        attrs = metacsv.core.internals.Attributes()
        attrs.update(da.attrs)
        self.assertEqual(df2.attrs, attrs)

        df = metacsv.read_csv(os.path.join(self.testdata_prefix, 'test6.csv'))
        ds = df.to_xarray()
        da = df.to_dataarray()
        self.assertFalse(ds.col2.isnull().any())

        attrs = df.attrs.copy()
        coords = df.coords.copy()
        variables = df.variables.copy()

        df.columns.names = ['cols']

        s = df.stack('cols')
        metacsv.to_csv(s, tmpfile, attrs={'author': 'my name'})
        s = metacsv.Series(s)
        coords.update({'cols': None})
        s.attrs = attrs
        s.coords = coords
        s.variables = variables

        s.to_xarray()
        s.to_dataarray()
        s.to_dataset()

        with self.assertRaises(TypeError):
            metacsv.to_xarray(['a','b','c'])

        metacsv.to_csv(
            os.path.join(self.testdata_prefix, 'test6.csv'), 
            tmpfile, 
            attrs={'author': 'test author'},
            variables={'col1': {'unit': 'digits'}})


        df = metacsv.read_csv(tmpfile)
        self.assertEqual(df.attrs['author'], 'test author')

        ds = metacsv.to_xarray(tmpfile)
        self.assertEqual(ds.col1.attrs['unit'], 'digits')


        metacsv.to_netcdf(tmpfile, tmpnc)
        with xr.open_dataset(tmpnc) as ds:
            self.assertEqual(ds.col1.attrs['unit'], 'digits')

예제 #38

0

파일 보기

파일: test_metacsv.py 프로젝트: ClimateImpactLab/metacsv

def test_standalone_properties(setup_env):

    df = metacsv.read_csv(os.path.join(testdata_prefix, 'test3.csv'))

    df.columns = ['index', 'column1', 'column2']
    df.set_index('index', inplace=True)

    variables = metacsv.core.internals.Variables({
        'column1': {
            'units': 'wigits'
        },
        'column2': {
            'units': 'wigits'
        }})

    df.variables = variables

    assert df.variables == variables
    assert df.variables.__repr__() == variables.__repr__()
    assert df.variables[df.columns[1]]['units'] == 'wigits'

    attrs = metacsv.core.internals.Attributes()
    assert attrs == None
    assert not 'author' in attrs.copy()

    with pytest.raises(KeyError):
        del attrs['author']

    with pytest.raises(KeyError):
        err = attrs['author']

    with pytest.raises(KeyError):
        err = attrs.get('author')

    assert attrs.get('author', None) == None

    with pytest.raises(ValueError):
        err = attrs.get('author', 1, 2)

    assert attrs.pop('author', None) == None

    with pytest.raises(KeyError):
        err = attrs.pop('author')

    with pytest.raises(ValueError):
        err = attrs.pop('author', 1, 2)

    assert attrs == None
    assert attrs.__repr__() == '<Empty Attributes>'

    attrs['author'] = 'My Name'
    attrs['contact'] = '*****@*****.**'

    assert attrs.pop('author', None) == 'My Name'
    assert attrs.pop('author', None) == None

    df.attrs.update(attrs)
    df.attrs.update({'project': 'metacsv'})

    with pytest.raises(TypeError):
        df.attrs.update(1)

    assert not df.attrs == attrs
    del df.attrs['project']
    assert df.attrs == attrs

    assert df.attrs['contact'] == '*****@*****.**'
    assert df.attrs.get('contact') == '*****@*****.**'
    assert df.attrs.get('other', 'thing') == 'thing'
    assert df.attrs.pop('contact') == '*****@*****.**'
    assert df.attrs.pop('contact', 'nope') == 'nope'
    assert df.attrs != attrs

    attrs['author'] = 'My Name'
    df.variables['column1'] = attrs
    assert df.variables['column1']['author'] == 'My Name'

    var = df.variables.copy()
    assert df.variables == var

    with pytest.raises(TypeError):
        var.parse_string_var(['unit'])

    assert ('description' in var.parse_string_var('variable name [unit]'))
    assert var.parse_string_var('variable [ name') == 'variable [ name'
    
    with pytest.raises(TypeError):
        df.variables = []

    del df.variables

    # Test round trip
    df2 = metacsv.read_csv(
        os.path.join(testdata_prefix, 'test3.csv'),
        index_col=[0], skiprows=1,
        names=['column1', 'column2'])

    df2.index.names = ['index']

    assert ((df == df2).all().all())

예제 #39

0

파일 보기

        if df.shape[0] > 0:
            return df

        if iso in df_anyiam.index:
            df = df_anyiam.loc[iso]
            if df.shape[0] > 0:
                return df

        return df_global


if __name__ == '__main__':
    # Test the provider
    import time

    time0 = time.time()
    provider = GDPpcProvider('low', 'SSP3')
    df = metacsv.read_csv(files.sharedpath("regions/hierarchy_metacsv.csv"))
    time1 = time.time()
    print "Load time: %s seconds" % (time1 - time0)

    for ii in np.where(df.is_terminal)[0]:
        xx = provider.get_timeseries(df.iloc[ii, 0])
    time2 = time.time()
    print "First pass: %s seconds" % (time2 - time1)

    for ii in np.where(df.is_terminal)[0]:
        xx = provider.get_timeseries(df.iloc[ii, 0])
    time3 = time.time()
    print "Second pass: %s seconds" % (time3 - time2)