Exemple #1
0
    def test_target_diagram(self):
        values =           np.array([3, 3, 2, 3, 6, 8, 5, 3, 4, 6, 4, 1, 7, 7, 6])
        reference_values = np.array([2, 5, 1, 5, 5, 9, 4, 5, 3, 8, 3, 3, 6, 9, 5])
        stats = processor.calculate_statistics(model_values=values, reference_values=reference_values, model_name='Linda', unit='g')

        values1 =           np.array([2, 14, 8, 6, 10, 9, 6, 7, 2, 15, 10, 0, 2, 2, 8])
        reference_values1 = np.array([5, 11, 6, 4, 11, 8, 7, 9, 2, 5, 11, -2, 1, 3, 9])
        stats1 = processor.calculate_statistics(model_values=values1, reference_values=reference_values1, model_name='Kate', unit='mg')

        values2 =           np.array([-2, -14, -8, -6, -10, -9, -6, 7, 2, 15, 10, 0, 2, 2, 8])
        reference_values2 = np.array([-1, -10, -5, -5, -11, -8, -7, 5, 3, 13, 10, 2, 2, -1, 7])
        stats2 = processor.calculate_statistics(model_values=values2, reference_values=reference_values2, model_name='Naomi', unit='kg')

     #   print('ref_stddev: %s' % stats['ref_stddev'])
     #   print('stddev: %s' % stats['stddev'])
     #   print('unbiased rmse: %s' % stats['unbiased_rmse'])
     #   print('corrcoeff: %s' % stats['corrcoeff'])

     #   print('ref_stddev: %s' % stats2['ref_stddev'])
     #   print('stddev: %s' % stats2['stddev'])
     #   print('unbiased rmse: %s' % stats2['unbiased_rmse'])
     #   print('corrcoeff: %s' % stats2['corrcoeff'])

        diagram = plotter.create_target_diagram((stats, stats1, stats2))
        diagram.write('resources/target_test.png')
Exemple #2
0
    def test_taylor_diagram(self):
        values = np.array([0, 15, 2, 3, 15, 8, 5, 3, 9, 11, 12, 1, 7, 7, 6])
        reference_values = np.array([9, 10, 1, 2, 11, 3, 7, 5, 4, 12, 7, 8, 5, 1, 14])
        stats = processor.calculate_statistics(model_values=values, reference_values=reference_values, unit='mg')

        values1 = np.array([2, 14, 8, 6, 10, 9, 6, 7, 2, 15, 10, 0, 2, 2, 8])
        reference_values1 = np.array([9, 10, 1, 2, 11, 3, 7, 5, 4, 12, 7, 8, 5, 1, 14])
        stats1 = processor.calculate_statistics(model_values=values1, reference_values=reference_values1, model_name='Kate', unit='mg')

        values2 = np.array([-2, -14, -8, -6, -10, -9, -6, 7, 2, 15, 10, 0, 2, 2, 8])
        reference_values2 = np.array([9, 10, 1, 2, 11, 3, 7, 5, 4, 12, 7, 8, 5, 1, 14])
        stats2 = processor.calculate_statistics(model_values=values2, reference_values=reference_values2, unit='g')

     #   print('ref_stddev: %s' % stats['ref_stddev'])
     #   print('stddev: %s' % stats['stddev'])
     #   print('unbiased rmse: %s' % stats['unbiased_rmse'])
     #   print('corrcoeff: %s' % stats['corrcoeff'])

     #   print('ref_stddev: %s' % stats2['ref_stddev'])
     #   print('stddev: %s' % stats2['stddev'])
     #   print('unbiased rmse: %s' % stats2['unbiased_rmse'])
     #   print('corrcoeff: %s' % stats2['corrcoeff'])

        diagram = plotter.create_taylor_diagrams((stats, stats1))[0]
        diagram.plot_sample(stats2['corrcoeff'], stats2['stddev'], model_name='Linda', unit=stats2['unit'])
        diagram.write('resources/taylor_test.png')
Exemple #3
0
    def test_taylor_diagrams(self):
        values = np.array([0, 15, 2, 3, 15, 8, 5, 3, 9, 11, 12, 1, 7, 7, 6])
        reference_values = np.array(
            [9, 10, 1, 2, 11, 3, 7, 5, 4, 12, 7, 8, 5, 1, 14])
        stats = processor.calculate_statistics(
            model_values=values,
            reference_values=reference_values,
            model_name='Kate',
            unit='megazork')

        values1 = np.array([2, 14, 8, 6, 10, 9, 6, 7, 2, 15, 10, 0, 2, 2, 8])
        reference_values1 = np.array(
            [9, 10, 1, 2, 11, 3, 7, 5, 4, 12, 7, 8, 5, 1, 14])
        stats1 = processor.calculate_statistics(
            model_values=values1,
            reference_values=reference_values1,
            model_name='Linda',
            unit='megazork')

        values2 = np.array(
            [-2, -14, -8, -6, -10, -9, -6, 7, 2, 15, 10, 0, 2, 2, 8])
        reference_values2 = np.array(
            [9, 10, 1, 2, 11, 3, 7, 5, 4, 12, 7, 8, 5, 1, 14])
        stats2 = processor.calculate_statistics(
            model_values=values2,
            reference_values=reference_values2,
            model_name='Linda',
            unit='gimpel/m^3')

        diagrams = plotter.create_taylor_diagrams((stats, stats1, stats2))
        self.assertEqual(2, len(diagrams))

        for i, d in enumerate(diagrams):
            d.write('resources/taylor_test_%s.png' % i)
Exemple #4
0
    def test_output_csv(self):
        chl_ref_chl = ('chl', 'Ref_chl')
        chl_ref2_chl = ('chl', 'Ref2_chl')
        sst_ref_sst = ('sst', 'sst_reference')
        sst_sst = ('sst', 'sst')
        mappings = [chl_ref_chl, chl_ref2_chl, sst_ref_sst, sst_sst]

        statistics = {}
        statistics[chl_ref_chl] = processor.calculate_statistics(np.array([11, 9, 11.2, 10.5]), np.array([10, 10, 10, 10]), 'chl', 'Ref_chl')
        statistics[chl_ref2_chl] = processor.calculate_statistics(np.array([12, 2, 3, 5]), np.array([2, 3, 4, 6]), 'chl', 'Ref2_chl')
        statistics[sst_ref_sst] = processor.calculate_statistics(np.array([8, 9, 15, 4]), np.array([6, 8, 2, 1]), 'sst', 'Ref_sst')
        statistics[sst_sst] = processor.calculate_statistics(np.array([8, 10, 2, 55]), np.array([99, 5, 5, 23]), 'sst', 'sst')

        output = Output()
        output.csv(mappings, statistics, 10957, matchups=None, target_file='c:\\temp\\output\\benchmark\\test.csv')
Exemple #5
0
    def test_compute_statistics_with_extreme_reference_values(self):
        model_values = np.array([1, 1, 1, 1])
        ref_values = np.array([1.1, 2.2, 2.9, 3.7])
        stats = calculate_statistics(model_values=model_values, reference_values=ref_values, config=self.config)
        self.assertAlmostEqual(0.954921, stats['unbiased_rmse'], 5)
        self.assertAlmostEqual(1.757128, stats['rmse'], 5)
        self.assertAlmostEqual(59.595959, stats['pbias'], 5)
        self.assertAlmostEqual(-1.475, stats['bias'], 5)
        self.assertTrue(np.isnan(stats['corrcoeff']))
        self.assertAlmostEqual(1.49908579, stats['reliability_index'], 5)
        self.assertAlmostEqual(-2.38588, stats['model_efficiency'], 5)
        self.assertAlmostEqual(1.0, stats['mean'], 5)
        self.assertAlmostEqual(2.475, stats['ref_mean'], 5)
        self.assertAlmostEqual(0, stats['stddev'], 5)
        self.assertAlmostEqual(0.954921, stats['ref_stddev'], 5)
        self.assertAlmostEqual(0.0, stats['normalised_stddev'], 5)
        self.assertAlmostEqual(1, stats['median'], 5)
        self.assertAlmostEqual(2.545, stats['ref_median'], 2)
        self.assertAlmostEqual(1, stats['p90'], 5)
        self.assertAlmostEqual(3.46, stats['ref_p90'], 5)
        self.assertAlmostEqual(1, stats['p95'], 5)
        self.assertAlmostEqual(3.58, stats['ref_p95'], 2)
        self.assertAlmostEqual(1, stats['min'], 5)
        self.assertAlmostEqual(1.1, stats['ref_min'], 5)
        self.assertAlmostEqual(1, stats['max'], 5)
        self.assertAlmostEqual(3.7, stats['ref_max'], 5)

        self.assertAlmostEqual(stats['rmse'] ** 2, stats['bias'] ** 2 + stats['unbiased_rmse'] ** 2, 5)
Exemple #6
0
    def test_compute_statistics(self):
        model_values = np.array(range(1, 5, 1)) # [1, 2, 3, 4]
        ref_values = np.array([1.1, 2.2, 2.9, 3.7])
        stats = calculate_statistics(model_values=model_values, reference_values=ref_values, config=self.config)
        self.assertIsNone(stats['model_name'])
        self.assertIsNone(stats['ref_name'])
        self.assertAlmostEqual(0.192028, stats['unbiased_rmse'], 5)
        self.assertAlmostEqual(0.193649, stats['rmse'], 5)
        self.assertAlmostEqual(0.2010936411, stats['normalised_rmse'], 5)
        self.assertAlmostEqual(-1.0101, stats['pbias'], 5)
        self.assertAlmostEqual(0.025, stats['bias'], 5)
        self.assertAlmostEqual(0.99519, stats['corrcoeff'], 5)
        self.assertAlmostEqual(1.03521, stats['reliability_index'], 5)
        self.assertAlmostEqual(0.9588759, stats['model_efficiency'], 5)
        self.assertAlmostEqual(2.5, stats['mean'], 5)
        self.assertAlmostEqual(2.475, stats['ref_mean'], 5)
        self.assertAlmostEqual(1.11803, stats['stddev'], 5)
        self.assertAlmostEqual(0.954921, stats['ref_stddev'], 5)
        self.assertAlmostEqual(1.170808, stats['normalised_stddev'], 5)
        self.assertAlmostEqual(2.5, stats['median'], 5)
        self.assertAlmostEqual(2.55, stats['ref_median'], 5)
        self.assertAlmostEqual(3.7, stats['p90'], 5)
        self.assertAlmostEqual(3.46, stats['ref_p90'], 5)
        self.assertAlmostEqual(3.85, stats['p95'], 5)
        self.assertAlmostEqual(3.58, stats['ref_p95'], 5)
        self.assertAlmostEqual(1, stats['min'], 5)
        self.assertAlmostEqual(1.1, stats['ref_min'], 5)
        self.assertAlmostEqual(4, stats['max'], 5)
        self.assertAlmostEqual(3.7, stats['ref_max'], 5)

        self.assertAlmostEqual(stats['rmse'] ** 2, stats['bias'] ** 2 + stats['unbiased_rmse'] ** 2, 5)
Exemple #7
0
    def test_compute_statistics_with_masked_values(self):
        model_values = ma.array(np.arange(1.0, 5.0, 1), mask=np.array([False, False, True, False])) # [1, 2, --, 4]
        ref_values = ma.array([1.1, 2.2, 2.9, 3.7])
        ref_values, model_values = utils.harmonise(ref_values, model_values)
        ref_values = ref_values.compressed()
        model_values = model_values.compressed()
        stats = calculate_statistics(model_values=model_values, reference_values=ref_values, config=self.config, model_name='kate', ref_name='ref')
        self.assertEqual('kate', stats['model_name'])
        self.assertEqual('ref', stats['ref_name'])
        self.assertAlmostEqual(0.216024, stats['unbiased_rmse'], 5)
        self.assertAlmostEqual(0.216024, stats['rmse'], 5)
        self.assertAlmostEqual(6.344131e-15, stats['pbias'], 5)
        self.assertAlmostEqual(0.0, stats['bias'], 5)
        self.assertAlmostEqual(0.99484975, stats['corrcoeff'], 5)
        self.assertAlmostEqual(1.039815, stats['reliability_index'], 5)
        self.assertAlmostEqual(0.9589041, stats['model_efficiency'], 5)
        self.assertAlmostEqual(2.33333, stats['mean'], 5)
        self.assertAlmostEqual(2.33333, stats['ref_mean'], 5)
        self.assertAlmostEqual(1.24722, stats['stddev'], 5)
        self.assertAlmostEqual(1.06562, stats['ref_stddev'], 5)
        self.assertAlmostEqual(1.17041, stats['normalised_stddev'], 5)
        self.assertAlmostEqual(2, stats['median'], 5)
        self.assertAlmostEqual(2.2, stats['ref_median'], 5)
        self.assertAlmostEqual(3.6, stats['p90'], 5)
        self.assertAlmostEqual(3.4, stats['ref_p90'], 5)
        self.assertAlmostEqual(3.8, stats['p95'], 5)
        self.assertAlmostEqual(3.55, stats['ref_p95'], 5)
        self.assertAlmostEqual(1, stats['min'], 5)
        self.assertAlmostEqual(1.1, stats['ref_min'], 5)
        self.assertAlmostEqual(4, stats['max'], 5)
        self.assertAlmostEqual(3.7, stats['ref_max'], 5)

        self.assertAlmostEqual(stats['rmse'] ** 2, stats['bias'] ** 2 + stats['unbiased_rmse'] ** 2, 5)
Exemple #8
0
def calculate_statistics(model_name, ref_name, data, config=None):
    """
    Calculates the statistics for the given model and reference variables located in the data file. Calculation will be
    performed according to the provided configuration.
    @param model_name: the name of the model variable.
    @param ref_name: the name of the reference variable.
    @param data: the input data object.
    @param config: the optional configuration.
    @return: a dictionary of statistics.
    """

    if config is None:
        config = get_default_config()

    is_gridded = len(data.get_reference_dimensions(ref_name)) > 1
    if is_gridded:
        reference_values, model_values = data.get_values(ref_name, model_name)
        unit = data.unit(model_name)
        return processor.calculate_statistics(model_values, reference_values, model_name, ref_name, unit, config)

    me = MatchupEngine(data, config)
    matchups = me.find_all_matchups()
    if config.remove_empty_matchups:
        matchups = me.remove_empty_matchups(matchups)
    if len(matchups) == 0:
        print("No matchups found; maybe allow higher maximum time delta.")
        return
    unit = data.unit(model_name)
    return calculate_statistics_from_matchups(matchups, model_name, ref_name, data, unit, config=None)
Exemple #9
0
def calculate_statistics_from_matchups(matchups,
                                       model_name,
                                       ref_name,
                                       data,
                                       unit=None,
                                       config=None):
    """
    Calculates the statistics for the given matchups and model and reference variable. Calculation will be
    performed according to the provided configuration.
    @param matchups: an iterable of 'Matchup' objects.
    @param model_name: the name of the model variable.
    @param ref_name: the name of the reference variable.
    @param data: the input data object.
    @param config: the optional configuration.
    @return: a dictionary of statistics.
    """
    reference_values, model_values = extract_values_from_matchups(
        matchups, data, model_name, ref_name)

    return processor.calculate_statistics(model_values,
                                          reference_values,
                                          model_name,
                                          ref_name,
                                          unit=unit,
                                          config=config)
Exemple #10
0
    def test_compute_statistics_with_extreme_model_values(self):
        model_values = np.array(range(1, 5, 1)) # [1, 2, 3, 4]
        ref_values = np.array([1, 1, 1, 1])
        stats = calculate_statistics(model_values=model_values, reference_values=ref_values, config=self.config)
        self.assertAlmostEqual(1.118034, stats['unbiased_rmse'], 5)
        self.assertAlmostEqual(1.870829, stats['rmse'], 5)
        self.assertAlmostEqual(-150, stats['pbias'], 5)
        self.assertAlmostEqual(1.5, stats['bias'], 5)
        self.assertTrue(np.isnan(stats['corrcoeff']))
        self.assertAlmostEqual(1.5106421, stats['reliability_index'], 5)
        self.assertTrue(np.isnan(stats['model_efficiency']))
        self.assertAlmostEqual(2.5, stats['mean'], 5)
        self.assertAlmostEqual(1, stats['ref_mean'], 5)
        self.assertAlmostEqual(1.11803, stats['stddev'], 5)
        self.assertAlmostEqual(0.0, stats['ref_stddev'], 5)
        self.assertTrue(np.isnan, stats['normalised_stddev'])
        self.assertAlmostEqual(2.5, stats['median'], 5)
        self.assertAlmostEqual(1, stats['ref_median'], 5)
        self.assertAlmostEqual(3.7, stats['p90'], 5)
        self.assertAlmostEqual(1, stats['ref_p90'], 5)
        self.assertAlmostEqual(3.85, stats['p95'], 5)
        self.assertAlmostEqual(1, stats['ref_p95'], 5)
        self.assertAlmostEqual(1, stats['min'], 5)
        self.assertAlmostEqual(1, stats['ref_min'], 5)
        self.assertAlmostEqual(4, stats['max'], 5)
        self.assertAlmostEqual(1, stats['ref_max'], 5)

        self.assertAlmostEqual(stats['rmse'] ** 2, stats['bias'] ** 2 + stats['unbiased_rmse'] ** 2, 5)
Exemple #11
0
    def test_taylor_diagrams(self):
        values = np.array([0, 15, 2, 3, 15, 8, 5, 3, 9, 11, 12, 1, 7, 7, 6])
        reference_values = np.array([9, 10, 1, 2, 11, 3, 7, 5, 4, 12, 7, 8, 5, 1, 14])
        stats = processor.calculate_statistics(model_values=values, reference_values=reference_values, model_name='Kate', unit='megazork')

        values1 = np.array([2, 14, 8, 6, 10, 9, 6, 7, 2, 15, 10, 0, 2, 2, 8])
        reference_values1 = np.array([9, 10, 1, 2, 11, 3, 7, 5, 4, 12, 7, 8, 5, 1, 14])
        stats1 = processor.calculate_statistics(model_values=values1, reference_values=reference_values1, model_name='Linda', unit='megazork')

        values2 = np.array([-2, -14, -8, -6, -10, -9, -6, 7, 2, 15, 10, 0, 2, 2, 8])
        reference_values2 = np.array([9, 10, 1, 2, 11, 3, 7, 5, 4, 12, 7, 8, 5, 1, 14])
        stats2 = processor.calculate_statistics(model_values=values2, reference_values=reference_values2, model_name='Linda', unit='gimpel/m^3')

        diagrams = plotter.create_taylor_diagrams((stats, stats1, stats2))
        self.assertEqual(2, len(diagrams))

        for i, d in enumerate(diagrams):
            d.write('resources/taylor_test_%s.png' % i)
Exemple #12
0
def calculate_statistics_from_values(model_values, ref_values, model_name=None, ref_name=None, unit=None, config=None):
    """
    Calculates the statistics for two given numpy arrays; the first is considered the model data, the second is
    considered the reference data. Calculation will be performed according to the provided configuration. Note that the
    condition len(model_values) == len(ref_values) must hold.
    @param model_values: numpy array containing the model values.
    @param ref_values: numpy array containing the reference values.
    @param config: the optional configuration.
    @return: a dictionary of statistics.
    """
    return processor.calculate_statistics(model_values, ref_values, model_name, ref_name, unit, config=config)
Exemple #13
0
    def test_target_diagram(self):
        values = np.array([3, 3, 2, 3, 6, 8, 5, 3, 4, 6, 4, 1, 7, 7, 6])
        reference_values = np.array(
            [2, 5, 1, 5, 5, 9, 4, 5, 3, 8, 3, 3, 6, 9, 5])
        stats = processor.calculate_statistics(
            model_values=values,
            reference_values=reference_values,
            model_name='Linda',
            unit='g')

        values1 = np.array([2, 14, 8, 6, 10, 9, 6, 7, 2, 15, 10, 0, 2, 2, 8])
        reference_values1 = np.array(
            [5, 11, 6, 4, 11, 8, 7, 9, 2, 5, 11, -2, 1, 3, 9])
        stats1 = processor.calculate_statistics(
            model_values=values1,
            reference_values=reference_values1,
            model_name='Kate',
            unit='mg')

        values2 = np.array(
            [-2, -14, -8, -6, -10, -9, -6, 7, 2, 15, 10, 0, 2, 2, 8])
        reference_values2 = np.array(
            [-1, -10, -5, -5, -11, -8, -7, 5, 3, 13, 10, 2, 2, -1, 7])
        stats2 = processor.calculate_statistics(
            model_values=values2,
            reference_values=reference_values2,
            model_name='Naomi',
            unit='kg')

        #   print('ref_stddev: %s' % stats['ref_stddev'])
        #   print('stddev: %s' % stats['stddev'])
        #   print('unbiased rmse: %s' % stats['unbiased_rmse'])
        #   print('corrcoeff: %s' % stats['corrcoeff'])

        #   print('ref_stddev: %s' % stats2['ref_stddev'])
        #   print('stddev: %s' % stats2['stddev'])
        #   print('unbiased rmse: %s' % stats2['unbiased_rmse'])
        #   print('corrcoeff: %s' % stats2['corrcoeff'])

        diagram = plotter.create_target_diagram((stats, stats1, stats2))
        diagram.write('resources/target_test.png')
Exemple #14
0
    def test_taylor_diagram(self):
        values = np.array([0, 15, 2, 3, 15, 8, 5, 3, 9, 11, 12, 1, 7, 7, 6])
        reference_values = np.array(
            [9, 10, 1, 2, 11, 3, 7, 5, 4, 12, 7, 8, 5, 1, 14])
        stats = processor.calculate_statistics(
            model_values=values, reference_values=reference_values, unit='mg')

        values1 = np.array([2, 14, 8, 6, 10, 9, 6, 7, 2, 15, 10, 0, 2, 2, 8])
        reference_values1 = np.array(
            [9, 10, 1, 2, 11, 3, 7, 5, 4, 12, 7, 8, 5, 1, 14])
        stats1 = processor.calculate_statistics(
            model_values=values1,
            reference_values=reference_values1,
            model_name='Kate',
            unit='mg')

        values2 = np.array(
            [-2, -14, -8, -6, -10, -9, -6, 7, 2, 15, 10, 0, 2, 2, 8])
        reference_values2 = np.array(
            [9, 10, 1, 2, 11, 3, 7, 5, 4, 12, 7, 8, 5, 1, 14])
        stats2 = processor.calculate_statistics(
            model_values=values2, reference_values=reference_values2, unit='g')

        #   print('ref_stddev: %s' % stats['ref_stddev'])
        #   print('stddev: %s' % stats['stddev'])
        #   print('unbiased rmse: %s' % stats['unbiased_rmse'])
        #   print('corrcoeff: %s' % stats['corrcoeff'])

        #   print('ref_stddev: %s' % stats2['ref_stddev'])
        #   print('stddev: %s' % stats2['stddev'])
        #   print('unbiased rmse: %s' % stats2['unbiased_rmse'])
        #   print('corrcoeff: %s' % stats2['corrcoeff'])

        diagram = plotter.create_taylor_diagrams((stats, stats1))[0]
        diagram.plot_sample(stats2['corrcoeff'],
                            stats2['stddev'],
                            model_name='Linda',
                            unit=stats2['unit'])
        diagram.write('resources/taylor_test.png')
Exemple #15
0
def calculate_statistics_from_matchups(matchups, model_name, ref_name, data, unit=None, config=None):
    """
    Calculates the statistics for the given matchups and model and reference variable. Calculation will be
    performed according to the provided configuration.
    @param matchups: an iterable of 'Matchup' objects.
    @param model_name: the name of the model variable.
    @param ref_name: the name of the reference variable.
    @param data: the input data object.
    @param config: the optional configuration.
    @return: a dictionary of statistics.
    """
    reference_values, model_values = extract_values_from_matchups(matchups, data, model_name, ref_name)

    return processor.calculate_statistics(model_values, reference_values, model_name, ref_name, unit=unit, config=config)
Exemple #16
0
    def test_output_csv(self):
        chl_ref_chl = ('chl', 'Ref_chl')
        chl_ref2_chl = ('chl', 'Ref2_chl')
        sst_ref_sst = ('sst', 'sst_reference')
        sst_sst = ('sst', 'sst')
        mappings = [chl_ref_chl, chl_ref2_chl, sst_ref_sst, sst_sst]

        statistics = {}
        statistics[chl_ref_chl] = processor.calculate_statistics(
            np.array([11, 9, 11.2, 10.5]), np.array([10, 10, 10, 10]), 'chl',
            'Ref_chl')
        statistics[chl_ref2_chl] = processor.calculate_statistics(
            np.array([12, 2, 3, 5]), np.array([2, 3, 4, 6]), 'chl', 'Ref2_chl')
        statistics[sst_ref_sst] = processor.calculate_statistics(
            np.array([8, 9, 15, 4]), np.array([6, 8, 2, 1]), 'sst', 'Ref_sst')
        statistics[sst_sst] = processor.calculate_statistics(
            np.array([8, 10, 2, 55]), np.array([99, 5, 5, 23]), 'sst', 'sst')

        output = Output()
        output.csv(mappings,
                   statistics,
                   10957,
                   matchups=None,
                   target_file='c:\\temp\\output\\benchmark\\test.csv')
Exemple #17
0
def calculate_statistics_from_values(model_values,
                                     ref_values,
                                     model_name=None,
                                     ref_name=None,
                                     unit=None,
                                     config=None):
    """
    Calculates the statistics for two given numpy arrays; the first is considered the model data, the second is
    considered the reference data. Calculation will be performed according to the provided configuration. Note that the
    condition len(model_values) == len(ref_values) must hold.
    @param model_values: numpy array containing the model values.
    @param ref_values: numpy array containing the reference values.
    @param config: the optional configuration.
    @return: a dictionary of statistics.
    """
    return processor.calculate_statistics(model_values,
                                          ref_values,
                                          model_name,
                                          ref_name,
                                          unit,
                                          config=config)
Exemple #18
0
def calculate_statistics(model_name, ref_name, data, config=None):
    """
    Calculates the statistics for the given model and reference variables located in the data file. Calculation will be
    performed according to the provided configuration.
    @param model_name: the name of the model variable.
    @param ref_name: the name of the reference variable.
    @param data: the input data object.
    @param config: the optional configuration.
    @return: a dictionary of statistics.
    """

    if config is None:
        config = get_default_config()

    is_gridded = len(data.get_reference_dimensions(ref_name)) > 1
    if is_gridded:
        reference_values, model_values = data.get_values(ref_name, model_name)
        unit = data.unit(model_name)
        return processor.calculate_statistics(model_values, reference_values,
                                              model_name, ref_name, unit,
                                              config)

    me = MatchupEngine(data, config)
    matchups = me.find_all_matchups()
    if config.remove_empty_matchups:
        matchups = me.remove_empty_matchups(matchups)
    if len(matchups) == 0:
        print("No matchups found; maybe allow higher maximum time delta.")
        return
    unit = data.unit(model_name)
    return calculate_statistics_from_matchups(matchups,
                                              model_name,
                                              ref_name,
                                              data,
                                              unit,
                                              config=None)
Exemple #19
0
def main():
    parsed_args = parse_arguments(sys.argv[1:])
    config = Configuration(properties_file_name=parsed_args.config, target_dir=parsed_args.output_dir,
                           target_prefix=parsed_args.prefix)
    file_handler = setup_logging(config)
    if parsed_args.reference_file is not None:
        data = Data(parsed_args.path, parsed_args.reference_file, config.max_cache_size)
    else:
        data = Data(parsed_args.path, max_cache_size=config.max_cache_size)

    output = Output(config=config)

    matchups = None
    if data.has_one_dim_ref_var():
        me = MatchupEngine(data, config)
        matchups = me.find_all_matchups()
        if not matchups:
            logging.warning('No matchups found. System will exit.')
            exit(0)
        if config.remove_empty_matchups:
            matchups = me.remove_empty_matchups(matchups)

    if not os.name == 'nt':
        logging.debug('Memory after matchups have been found: %s' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)

    matchup_count = 0 if matchups is None else len(matchups)
    collected_statistics = {}
    density_plot_files = []
    target_files = []
    density_plots = {}

    for (model_name, ref_name) in parsed_args.variable_mappings:
        unit = data.unit(model_name)
        is_gridded = len(data.get_reference_dimensions(ref_name)) > 1
        if is_gridded:
            reference_values, model_values = data.get_values(ref_name, model_name)
            matchup_count += ma.count(reference_values)
        else:
            reference_values, model_values = utils.extract_values(matchups, data, ref_name, model_name)
            reference_values, model_values = utils.harmonise(reference_values, model_values)
            logging.debug('Compressing ref-variable %s' % ref_name)
            reference_values = reference_values.compressed()
            logging.debug('Compressing model-variable %s' % model_name)
            model_values = model_values.compressed()

        logging.info('Calculating statistics for \'%s\' with \'%s\'' % (model_name, ref_name))
        stats = processor.calculate_statistics(model_values, reference_values, model_name, ref_name, unit, config)
        collected_statistics[(model_name, ref_name)] = stats

        if config.write_density_plots:
            axis_min = min(stats['min'], stats['ref_min'])
            axis_max = max(stats['p90'], stats['ref_p90'])
            logging.info('Creating density plot for \'%s\' and \'%s\'' % (model_name, ref_name))
            density_plots[model_name + ref_name] = output.density_plot(model_name, ref_name, model_values,
                                                                       reference_values, config.density_plot_log_scaled,
                                                                       None, axis_min, axis_max, data.unit(model_name))

    if not os.name == 'nt':
        logging.debug(
            'Memory after statistics have been computed: %s' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)

    if config.write_csv:
        csv_target_file = '%s/%sstatistics.csv' % (parsed_args.output_dir, config.target_prefix)
        target_files.append(csv_target_file)
        output.csv(data, parsed_args.variable_mappings, collected_statistics, matchup_count, matchups=matchups, source_file=parsed_args.path, target_file=csv_target_file)
        logging.info('CSV output written to \'%s\'' % csv_target_file)
        if matchups is not None:
            matchup_filename = '%s_matchups.csv' % os.path.splitext(csv_target_file)[0]
            logging.info('Matchups written to \'%s\'' % matchup_filename)
            target_files.append(matchup_filename)

    taylor_target_files = []
    if config.write_taylor_diagrams:
        taylor_target_file = '%s/%staylor.png' % (parsed_args.output_dir, config.target_prefix)
        written_taylor_diagrams, d = output.taylor(list(collected_statistics.values()), taylor_target_file)
        del d
        if written_taylor_diagrams:
            for written_taylor_diagram in written_taylor_diagrams:
                logging.info('Taylor diagram written to \'%s\'' % written_taylor_diagram)
                target_files.append(written_taylor_diagram)
                taylor_target_files.append(written_taylor_diagram)

    if config.write_density_plots:
        for (model_name, ref_name) in parsed_args.variable_mappings:
            density_target = '%s/density-%s-%s.png' % (parsed_args.output_dir, model_name, ref_name)
            density_plot_files.append(density_target)
            target_files.append(density_target)
            output.write_density_plot(density_plots[model_name + ref_name], density_target)
            logging.info('Density plot written to \'%s\'' % density_target)

    target_diagram_file = None
    if config.write_target_diagram:
        target_diagram_file = '%s/%starget.png' % (parsed_args.output_dir, config.target_prefix)
        output.target_diagram(list(collected_statistics.values()), target_diagram_file)
        logging.info('Target diagram written to \'%s\'' % target_diagram_file)
        target_files.append(target_diagram_file)

    if config.write_xhtml:
        xml_target_file = '%s/%sreport.xml' % (parsed_args.output_dir, config.target_prefix)
        path = str(os.path.dirname(os.path.realpath(__file__))) + '/../resources/'
        xsl = path + 'analysis-summary.xsl'
        css = path + 'styleset.css'
        xsl_target = '%s/%s' % (parsed_args.output_dir, os.path.basename(xsl))
        css_target = '%s/%s' % (parsed_args.output_dir, os.path.basename(css))
        output.xhtml(list(collected_statistics.values()), matchup_count, matchups, data, xml_target_file, taylor_target_files,
                     target_diagram_file, density_plot_files)
        logging.info('XHTML report written to \'%s\'' % xml_target_file)
        shutil.copy(xsl, parsed_args.output_dir)
        logging.info('XHTML support file written to \'%s/%s\'' % (parsed_args.output_dir, 'analysis-summary.xsl'))
        shutil.copy(css, parsed_args.output_dir)
        logging.info('XHTML support file written to \'%s/%s\'' % (parsed_args.output_dir, 'styleset.xsl'))
        target_files.append(xml_target_file)
        target_files.append(xsl_target)
        target_files.append(css_target)

    if config.zip:
        create_zip(target_files, config, file_handler, parsed_args)

    logging.info('End of process')