예제 #1
0
    def test_001_process(self):
        """Test ISO2 primary_geo; build a date day, month, year; no primary_date; feature qualifies another feature."""

        # Define mixmasta inputs:
        mp = f"inputs{sep}test1_input.json"
        fp = f"inputs{sep}test1_input.csv"
        geo = 'admin2'
        outf = f"outputs{sep}unittests"

        # Process:
        df, dct = mixmasta.process(fp, mp, geo, outf)

        # Load expected output:
        output_df = pd.read_csv(f'outputs{sep}test1_output.csv', index_col=False)
        output_df = mixmasta.optimize_df_types(output_df)
        with open(f'outputs{sep}test1_dict.json') as f:
            output_dict = json.loads(f.read())

        # Sort both data frames and reindex for comparison,.
        cols = ['timestamp','country','admin1','admin2','admin3','lat','lng','feature','value']
        df.sort_values(by=cols, inplace=True)
        output_df.sort_values(by=cols, inplace=True)
        df.reset_index(drop=True, inplace=True)
        output_df.reset_index(drop =True, inplace=True)

        # Assertions
        assert_frame_equal(df, output_df)
        assert_dict_equal(dct, output_dict)
예제 #2
0
    def test_006_process(self):
        """Test multi primary_geo, resolve_to_gadm"""

        # Define mixmasta inputs:
        mp = f'inputs{sep}test6_hoa_conflict_input.json'
        fp = f'inputs{sep}test6_hoa_conflict_input.csv'
        geo = 'admin2'
        outf = f'outputs{sep}unittests'

        # Process:
        df, dct = mixmasta.process(fp, mp, geo, outf)

        # Load expected output:
        output_df = pd.read_csv(f'outputs{sep}test6_hoa_conflict_output.csv', index_col=False)
        output_df = mixmasta.optimize_df_types(output_df)
        with open(f'outputs{sep}test6_hoa_conflict_dict.json') as f:
            output_dict = json.loads(f.read())

        # Sort both data frames and reindex for comparison,.
        cols = ['timestamp','country','admin1','admin2','admin3','lat','lng','feature','value']
        df.sort_values(by=cols, inplace=True)
        output_df.sort_values(by=cols, inplace=True)

        df.reset_index(drop=True, inplace=True)
        output_df.reset_index(drop =True, inplace=True)

        # Make the datatypes the same for value/feature and qualifying columns.
        df['value'] = df['value'].astype('str')
        df['feature'] = df['feature'].astype('str')
        output_df['value'] = output_df['value'].astype('str')
        output_df['feature'] = output_df['feature'].astype('str')

        # Assertions
        assert_frame_equal(df, output_df, check_categorical = False)
        assert_dict_equal(dct, output_dict)
예제 #3
0
    def test_002_process(self):
        """
        Test GeoTiff This tests that multi-band geotiff processing is the same. Uses the
        asset_wealth tif which has 4 bands of different years representing a
        measure of wealth.
        """

        # Define mixmasta inputs:
        mp = f'inputs{sep}test2_assetwealth_input.json'
        fp = f'inputs{sep}test2_assetwealth_input.tif'
        geo = 'admin2'
        outf = f'outputs{sep}unittests'

        # Process:
        df, dct = mixmasta.process(fp, mp, geo, outf)
        #categories = df.select_dtypes(include=['category']).columns.tolist()
        df['value'] = df['value'].astype('str')

        # Load expected output:
        output_df = pd.read_csv(f'outputs{sep}test2_assetwealth_output.csv', index_col=False)

        with open(f'outputs{sep}test2_assetwealth_dict.json') as f:
            output_dict = json.loads(f.read())

        # Sort both data frames and reindex for comparison,.
        cols = ['timestamp','country','admin1','admin2','admin3','lat','lng','feature','value']
        df = df[cols]
        output_df = output_df[cols]

        # Optimize datatypes for output_df.
        floats = output_df.select_dtypes(include=['float64']).columns.tolist()
        output_df[floats] = output_df[floats].apply(pd.to_numeric, downcast='float')

        ints = output_df.select_dtypes(include=['int64']).columns.tolist()
        output_df[ints] = output_df[ints].apply(pd.to_numeric, downcast='integer')

        # Standardize value and feature columns to str for comparison.
        df['value'] = df['value'].astype('str')
        df['feature'] = df['feature'].astype('str')
        output_df['value'] = output_df['value'].astype('str')
        output_df['feature'] = output_df['feature'].astype('str')

        # Sort and reindex.
        df.sort_values(by=cols, inplace=True)
        df.reset_index(drop=True, inplace=True)
        output_df.sort_values(by=cols, inplace=True)
        output_df.reset_index(drop =True, inplace=True)

        # Assertions
        assert_frame_equal(df, output_df, check_categorical = False)
        assert_dict_equal(dct, output_dict)