def test_001_process(self): """Test ISO2 primary_geo; build a date day, month, year; no primary_date; feature qualifies another feature.""" # Define mixmasta inputs: mp = f"inputs{sep}test1_input.json" fp = f"inputs{sep}test1_input.csv" geo = 'admin2' outf = f"outputs{sep}unittests" # Process: df, dct = mixmasta.process(fp, mp, geo, outf) # Load expected output: output_df = pd.read_csv(f'outputs{sep}test1_output.csv', index_col=False) output_df = mixmasta.optimize_df_types(output_df) with open(f'outputs{sep}test1_dict.json') as f: output_dict = json.loads(f.read()) # Sort both data frames and reindex for comparison,. cols = ['timestamp','country','admin1','admin2','admin3','lat','lng','feature','value'] df.sort_values(by=cols, inplace=True) output_df.sort_values(by=cols, inplace=True) df.reset_index(drop=True, inplace=True) output_df.reset_index(drop =True, inplace=True) # Assertions assert_frame_equal(df, output_df) assert_dict_equal(dct, output_dict)
def test_006_process(self): """Test multi primary_geo, resolve_to_gadm""" # Define mixmasta inputs: mp = f'inputs{sep}test6_hoa_conflict_input.json' fp = f'inputs{sep}test6_hoa_conflict_input.csv' geo = 'admin2' outf = f'outputs{sep}unittests' # Process: df, dct = mixmasta.process(fp, mp, geo, outf) # Load expected output: output_df = pd.read_csv(f'outputs{sep}test6_hoa_conflict_output.csv', index_col=False) output_df = mixmasta.optimize_df_types(output_df) with open(f'outputs{sep}test6_hoa_conflict_dict.json') as f: output_dict = json.loads(f.read()) # Sort both data frames and reindex for comparison,. cols = ['timestamp','country','admin1','admin2','admin3','lat','lng','feature','value'] df.sort_values(by=cols, inplace=True) output_df.sort_values(by=cols, inplace=True) df.reset_index(drop=True, inplace=True) output_df.reset_index(drop =True, inplace=True) # Make the datatypes the same for value/feature and qualifying columns. df['value'] = df['value'].astype('str') df['feature'] = df['feature'].astype('str') output_df['value'] = output_df['value'].astype('str') output_df['feature'] = output_df['feature'].astype('str') # Assertions assert_frame_equal(df, output_df, check_categorical = False) assert_dict_equal(dct, output_dict)
def test_002_process(self): """ Test GeoTiff This tests that multi-band geotiff processing is the same. Uses the asset_wealth tif which has 4 bands of different years representing a measure of wealth. """ # Define mixmasta inputs: mp = f'inputs{sep}test2_assetwealth_input.json' fp = f'inputs{sep}test2_assetwealth_input.tif' geo = 'admin2' outf = f'outputs{sep}unittests' # Process: df, dct = mixmasta.process(fp, mp, geo, outf) #categories = df.select_dtypes(include=['category']).columns.tolist() df['value'] = df['value'].astype('str') # Load expected output: output_df = pd.read_csv(f'outputs{sep}test2_assetwealth_output.csv', index_col=False) with open(f'outputs{sep}test2_assetwealth_dict.json') as f: output_dict = json.loads(f.read()) # Sort both data frames and reindex for comparison,. cols = ['timestamp','country','admin1','admin2','admin3','lat','lng','feature','value'] df = df[cols] output_df = output_df[cols] # Optimize datatypes for output_df. floats = output_df.select_dtypes(include=['float64']).columns.tolist() output_df[floats] = output_df[floats].apply(pd.to_numeric, downcast='float') ints = output_df.select_dtypes(include=['int64']).columns.tolist() output_df[ints] = output_df[ints].apply(pd.to_numeric, downcast='integer') # Standardize value and feature columns to str for comparison. df['value'] = df['value'].astype('str') df['feature'] = df['feature'].astype('str') output_df['value'] = output_df['value'].astype('str') output_df['feature'] = output_df['feature'].astype('str') # Sort and reindex. df.sort_values(by=cols, inplace=True) df.reset_index(drop=True, inplace=True) output_df.sort_values(by=cols, inplace=True) output_df.reset_index(drop =True, inplace=True) # Assertions assert_frame_equal(df, output_df, check_categorical = False) assert_dict_equal(dct, output_dict)