Python summary_channel_to_pandasの例、dtk.utils.parsers.malaria_summary.summary_channel_to_pandas Pythonの例

コード例 #1

0

ファイルを表示

ファイル: ChannelByAgeCohortAnalyzer.py プロジェクト: bertozzivill/dtk-tools-malaria-old

    def apply(self, parser):
        """
        Extract data from output data and accumulate in same bins as reference.
        """

        # Load data from simulation
        data = parser.raw_data[self.filenames[0]]

        # Get channels by age and time series
        channel_series = summary_channel_to_pandas(data, self.channel)
        population_series = summary_channel_to_pandas(data, self.population_channel)
        channel_data = pd.concat([channel_series, population_series], axis=1)

        # Convert Average Population to Person Years
        person_years = convert_annualized(channel_data[self.population_channel],
                                          start_day=channel_series.Start_Day,
                                          reporting_interval=channel_series.Reporting_Interval)
        channel_data['Trials'] = person_years

        with thread_lock:  # TODO: re-code following block to ensure thread safety (Issue #758)?

            # Calculate Incidents from Annual Incidence and Person Years
            channel_data['Observations'] = convert_to_counts(channel_data[self.channel], channel_data.Trials)

            # Reset multi-index and perform transformations on index columns
            df = channel_data.reset_index()
            df = age_from_birth_cohort(df)  # calculate age from time for birth cohort

            # Re-bin according to reference and return single-channel Series
            sim_data = aggregate_on_index(df, self.reference.index, keep=['Observations', 'Trials'])

        sim_data.sample = parser.sim_data.get('__sample_index__')
        sim_data.sim_id = parser.sim_id

        return sim_data

コード例 #2

0

ファイルを表示

    def test_parser(self):
        population = summary_channel_to_pandas(
            self.data, 'Average Population by Age Bin')
        self.assertListEqual(population.index.names, ['Time', 'Age Bin'])
        self.assertAlmostEqual(population.loc[31, 80], 16.602738, places=5)

        parasite_channel = 'PfPR by Parasitemia and Age Bin'
        parasites = summary_channel_to_pandas(self.data, parasite_channel)
        self.assertEqual(parasites.name, parasite_channel)
        self.assertAlmostEqual(parasites.loc[1095, 500, 100],
                               0.026666,
                               places=5)
        self.assertAlmostEqual(
            parasites.loc[31, :, 20].sum(),
            1)  # on given day + age, density-bin fractions sum to 1

        counts = convert_to_counts(parasites, population)
        self.assertEqual(counts.name, parasites.name)
        self.assertListEqual(counts.index.names, parasites.index.names)
        self.assertListEqual(counts.iloc[7:13].astype(int).tolist(),
                             [281, 13, 19, 7, 9, 6])

        df = parasites.reset_index()

        df = age_from_birth_cohort(df)
        self.assertListEqual((df.Time / 365.0).tolist(),
                             df['Age Bin'].tolist())

        months_df = season_from_time(df)
        months = months_df.Month.unique()
        self.assertEqual(len(months), 12)
        self.assertEqual(months_df.Month.iloc[0], 'February')

        seasons = {'fall': ['September', 'October'], 'winter': ['January']}
        seasons_by_month = {}
        for s, mm in seasons.items():
            for m in mm:
                seasons_by_month[m] = s
        seasons_df = season_from_time(df, seasons=seasons_by_month)
        months = seasons_df.Month.unique()
        self.assertEqual(len(months), 3)
        self.assertEqual(seasons_df.Month.iloc[0], 'September')
        self.assertEqual(seasons_df.Season.iloc[0], 'fall')

コード例 #3

0

ファイルを表示

ファイル: ChannelBySeasonAgeDensityCohortAnalyzer.py プロジェクト: m-v-nikolov/dtk-tools-py3

    def apply(self, parser):
        """
        Extract data from output simulation data and accumulate in same bins as reference.
        """

        # Load data from simulation
        data = parser.raw_data[self.filenames[0]]

        # Population by age and time series (to convert parasite prevalence to counts)
        population = summary_channel_to_pandas(data, self.population_channel)

        # Coerce channel data into format for comparison with reference
        channel_data_dict = {}
        for channel in self.channels:

            # Prevalence by density, age, and time series
            channel_data = summary_channel_to_pandas(data, channel)

            with thread_lock:  # TODO: re-code following block to ensure thread safety (Issue #758)?

                # Calculate counts from prevalence and population
                channel_counts = convert_to_counts(channel_data, population)

                # Reset multi-index and perform transformations on index columns
                df = channel_counts.reset_index()
                df = age_from_birth_cohort(df)  # calculate age from time for birth cohort
                df = season_from_time(df, seasons=self.seasons)  # calculate month from time

                # Re-bin according to reference and return single-channel Series
                rebinned = aggregate_on_index(df, self.reference.loc(axis=1)[channel].index, keep=[channel])
                channel_data_dict[channel] = rebinned[channel].rename('Counts')

        sim_data = pd.concat(channel_data_dict.values(), keys=channel_data_dict.keys(), names=['Channel'])
        sim_data = pd.DataFrame(sim_data)  # single-column DataFrame for standardized combine/compare pattern
        sim_data.sample = parser.sim_data.get('__sample_index__')
        sim_data.sim_id = parser.sim_id

        return sim_data

コード例 #4

0

ファイルを表示

    def test_site_analyzer(self):

        analyzers = self.site.analyzers
        self.assertTrue(len(analyzers), 1)

        analyzer = analyzers[0]
        self.assertTrue(analyzer.name, self.analyzer_name)

        reference = analyzer.reference
        self.assertIsInstance(reference, pd.DataFrame)

        #############
        # Test annualized functions
        channel_data = pd.concat([
            summary_channel_to_pandas(self.data, c)
            for c in (analyzer.channel, analyzer.population_channel)
        ],
                                 axis=1)
        person_years = convert_annualized(
            channel_data[analyzer.population_channel])
        channel_data['Trials'] = person_years
        channel_data['Observations'] = convert_to_counts(
            channel_data[analyzer.channel], channel_data['Trials'])
        for ix, row in channel_data.loc[31].iterrows():
            self.assertAlmostEqual(
                row['Trials'], row[analyzer.population_channel] * 31 / 365.0)
            self.assertAlmostEqual(row['Observations'],
                                   row[analyzer.channel] * row['Trials'])

        #############
        # TEST APPLY
        sim_data = analyzer.apply(self.parser)
        self.assertListEqual(reference.index.names, sim_data.index.names)
        self.assertSetEqual(set(sim_data.columns.tolist()),
                            {'Observations', 'Trials'})

        self.assertEqual(self.parser.sim_id, 'dummy_id')
        self.assertEqual(self.parser.sim_data.get('__sample_index__'),
                         'dummy_index')

        # Make multiple dummy copies of the same parser
        # with unique sim_id and subset of different sample points
        n_sims, n_samples = 8, 4
        parsers = self.get_dummy_parsers(sim_data,
                                         id(analyzer),
                                         n_sims=n_sims,
                                         n_samples=n_samples)

        #############
        # TEST COMBINE
        analyzer.combine(parsers)

        # Verify averaging of sim_id by sample_index is done correctly
        # e.g. sample0 = (id0, id2) = (1x, 3x) => avg0 = 2
        avg = [
            np.arange(i + 1, n_sims + 1, n_samples).mean()
            for i in range(n_samples)
        ]
        for ix, row in analyzer.data.iterrows():
            for isample in range(1, n_samples):
                self.assertAlmostEqual(row[0, 'Observations'] / avg[0],
                                       row[i, 'Observations'] / avg[i])
                self.assertAlmostEqual(row[0, 'Trials'] / avg[0],
                                       row[i, 'Trials'] / avg[i])

        #############
        # TEST COMPARE
        analyzer.finalize(
        )  # applies compare_fn to each sample setting self.result

        for i in range(n_samples):
            sample_data = analyzer.data.xs(i, level='sample', axis=1)
            self.assertAlmostEqual(
                analyzer.result[i],
                self.compare_with_nested_loops(sample_data, reference))

        #############
        # TEST CACHE
        cache = analyzer.cache(
        )  # concats reference to columns of simulation outcomes by sample-point index
        self.assertListEqual(['ref', 'samples'], cache.keys())
        self.assertEqual(n_samples, len(cache['samples']))

        with open(
                os.path.join(self.input_path,
                             'cache_%s.json' % analyzer.__class__.__name__),
                'w') as fp:
            json.dump(cache, fp, indent=4, cls=NumpyEncoder)

コード例 #5

0

ファイルを表示

ファイル: test_code.py プロジェクト: m-v-nikolov/dtk-tools-py3

population_channel = 'Average Population by Age Bin'

ref_ix = reference.index
channels_ix = ref_ix.names.index('Channel')
channels = ref_ix.levels[channels_ix].values
seasons = {
            'May': 'DH2',
            'September': 'W2',
            'January': 'DC2'
        }

file = 'C://Users//pselvaraj//Desktop//MalariaSummaryReport_Monthly_Report.json'
data = json.load(open(file))

# Population by age and time series (to convert parasite prevalence to counts)
population = summary_channel_to_pandas(data, population_channel)

# Coerce channel data into format for comparison with reference
channel_data_dict = {}
for channel in channels:

    # Prevalence by density, age, and time series
    channel_data = summary_channel_to_pandas(data, channel)

        # Calculate counts from prevalence and population
    channel_counts = convert_to_counts(channel_data, population)

    # Reset multi-index and perform transformations on index columns
    df = channel_counts.reset_index()
    df = age_from_birth_cohort(df)  # calculate age from time for birth cohort
    df = season_from_time(df, seasons=seasons)  # calculate month from time