def apply(self, parser): """ Extract data from output data and accumulate in same bins as reference. """ # Load data from simulation data = parser.raw_data[self.filenames[0]] # Get channels by age and time series channel_series = summary_channel_to_pandas(data, self.channel) population_series = summary_channel_to_pandas(data, self.population_channel) channel_data = pd.concat([channel_series, population_series], axis=1) # Convert Average Population to Person Years person_years = convert_annualized(channel_data[self.population_channel], start_day=channel_series.Start_Day, reporting_interval=channel_series.Reporting_Interval) channel_data['Trials'] = person_years with thread_lock: # TODO: re-code following block to ensure thread safety (Issue #758)? # Calculate Incidents from Annual Incidence and Person Years channel_data['Observations'] = convert_to_counts(channel_data[self.channel], channel_data.Trials) # Reset multi-index and perform transformations on index columns df = channel_data.reset_index() df = age_from_birth_cohort(df) # calculate age from time for birth cohort # Re-bin according to reference and return single-channel Series sim_data = aggregate_on_index(df, self.reference.index, keep=['Observations', 'Trials']) sim_data.sample = parser.sim_data.get('__sample_index__') sim_data.sim_id = parser.sim_id return sim_data
def test_parser(self): population = summary_channel_to_pandas( self.data, 'Average Population by Age Bin') self.assertListEqual(population.index.names, ['Time', 'Age Bin']) self.assertAlmostEqual(population.loc[31, 80], 16.602738, places=5) parasite_channel = 'PfPR by Parasitemia and Age Bin' parasites = summary_channel_to_pandas(self.data, parasite_channel) self.assertEqual(parasites.name, parasite_channel) self.assertAlmostEqual(parasites.loc[1095, 500, 100], 0.026666, places=5) self.assertAlmostEqual( parasites.loc[31, :, 20].sum(), 1) # on given day + age, density-bin fractions sum to 1 counts = convert_to_counts(parasites, population) self.assertEqual(counts.name, parasites.name) self.assertListEqual(counts.index.names, parasites.index.names) self.assertListEqual(counts.iloc[7:13].astype(int).tolist(), [281, 13, 19, 7, 9, 6]) df = parasites.reset_index() df = age_from_birth_cohort(df) self.assertListEqual((df.Time / 365.0).tolist(), df['Age Bin'].tolist()) months_df = season_from_time(df) months = months_df.Month.unique() self.assertEqual(len(months), 12) self.assertEqual(months_df.Month.iloc[0], 'February') seasons = {'fall': ['September', 'October'], 'winter': ['January']} seasons_by_month = {} for s, mm in seasons.items(): for m in mm: seasons_by_month[m] = s seasons_df = season_from_time(df, seasons=seasons_by_month) months = seasons_df.Month.unique() self.assertEqual(len(months), 3) self.assertEqual(seasons_df.Month.iloc[0], 'September') self.assertEqual(seasons_df.Season.iloc[0], 'fall')
def apply(self, parser): """ Extract data from output simulation data and accumulate in same bins as reference. """ # Load data from simulation data = parser.raw_data[self.filenames[0]] # Population by age and time series (to convert parasite prevalence to counts) population = summary_channel_to_pandas(data, self.population_channel) # Coerce channel data into format for comparison with reference channel_data_dict = {} for channel in self.channels: # Prevalence by density, age, and time series channel_data = summary_channel_to_pandas(data, channel) with thread_lock: # TODO: re-code following block to ensure thread safety (Issue #758)? # Calculate counts from prevalence and population channel_counts = convert_to_counts(channel_data, population) # Reset multi-index and perform transformations on index columns df = channel_counts.reset_index() df = age_from_birth_cohort(df) # calculate age from time for birth cohort df = season_from_time(df, seasons=self.seasons) # calculate month from time # Re-bin according to reference and return single-channel Series rebinned = aggregate_on_index(df, self.reference.loc(axis=1)[channel].index, keep=[channel]) channel_data_dict[channel] = rebinned[channel].rename('Counts') sim_data = pd.concat(channel_data_dict.values(), keys=channel_data_dict.keys(), names=['Channel']) sim_data = pd.DataFrame(sim_data) # single-column DataFrame for standardized combine/compare pattern sim_data.sample = parser.sim_data.get('__sample_index__') sim_data.sim_id = parser.sim_id return sim_data
def test_site_analyzer(self): analyzers = self.site.analyzers self.assertTrue(len(analyzers), 1) analyzer = analyzers[0] self.assertTrue(analyzer.name, self.analyzer_name) reference = analyzer.reference self.assertIsInstance(reference, pd.DataFrame) ############# # Test annualized functions channel_data = pd.concat([ summary_channel_to_pandas(self.data, c) for c in (analyzer.channel, analyzer.population_channel) ], axis=1) person_years = convert_annualized( channel_data[analyzer.population_channel]) channel_data['Trials'] = person_years channel_data['Observations'] = convert_to_counts( channel_data[analyzer.channel], channel_data['Trials']) for ix, row in channel_data.loc[31].iterrows(): self.assertAlmostEqual( row['Trials'], row[analyzer.population_channel] * 31 / 365.0) self.assertAlmostEqual(row['Observations'], row[analyzer.channel] * row['Trials']) ############# # TEST APPLY sim_data = analyzer.apply(self.parser) self.assertListEqual(reference.index.names, sim_data.index.names) self.assertSetEqual(set(sim_data.columns.tolist()), {'Observations', 'Trials'}) self.assertEqual(self.parser.sim_id, 'dummy_id') self.assertEqual(self.parser.sim_data.get('__sample_index__'), 'dummy_index') # Make multiple dummy copies of the same parser # with unique sim_id and subset of different sample points n_sims, n_samples = 8, 4 parsers = self.get_dummy_parsers(sim_data, id(analyzer), n_sims=n_sims, n_samples=n_samples) ############# # TEST COMBINE analyzer.combine(parsers) # Verify averaging of sim_id by sample_index is done correctly # e.g. sample0 = (id0, id2) = (1x, 3x) => avg0 = 2 avg = [ np.arange(i + 1, n_sims + 1, n_samples).mean() for i in range(n_samples) ] for ix, row in analyzer.data.iterrows(): for isample in range(1, n_samples): self.assertAlmostEqual(row[0, 'Observations'] / avg[0], row[i, 'Observations'] / avg[i]) self.assertAlmostEqual(row[0, 'Trials'] / avg[0], row[i, 'Trials'] / avg[i]) ############# # TEST COMPARE analyzer.finalize( ) # applies compare_fn to each sample setting self.result for i in range(n_samples): sample_data = analyzer.data.xs(i, level='sample', axis=1) self.assertAlmostEqual( analyzer.result[i], self.compare_with_nested_loops(sample_data, reference)) ############# # TEST CACHE cache = analyzer.cache( ) # concats reference to columns of simulation outcomes by sample-point index self.assertListEqual(['ref', 'samples'], cache.keys()) self.assertEqual(n_samples, len(cache['samples'])) with open( os.path.join(self.input_path, 'cache_%s.json' % analyzer.__class__.__name__), 'w') as fp: json.dump(cache, fp, indent=4, cls=NumpyEncoder)
population_channel = 'Average Population by Age Bin' ref_ix = reference.index channels_ix = ref_ix.names.index('Channel') channels = ref_ix.levels[channels_ix].values seasons = { 'May': 'DH2', 'September': 'W2', 'January': 'DC2' } file = 'C://Users//pselvaraj//Desktop//MalariaSummaryReport_Monthly_Report.json' data = json.load(open(file)) # Population by age and time series (to convert parasite prevalence to counts) population = summary_channel_to_pandas(data, population_channel) # Coerce channel data into format for comparison with reference channel_data_dict = {} for channel in channels: # Prevalence by density, age, and time series channel_data = summary_channel_to_pandas(data, channel) # Calculate counts from prevalence and population channel_counts = convert_to_counts(channel_data, population) # Reset multi-index and perform transformations on index columns df = channel_counts.reset_index() df = age_from_birth_cohort(df) # calculate age from time for birth cohort df = season_from_time(df, seasons=seasons) # calculate month from time