Ejemplo n.º 1
0
 def test_duplicate_classifier_value_name_value_error(self):
     """checks that an error is raise if any 2 classifier value names are
     identical for a given classifier
     """
     sit_classifiers_table = pd.DataFrame(
         data=[("1", "_CLASSIFIER", "classifier1", np.nan,
                np.nan), (1, "SAME_NAME", "a", np.nan,
                          np.nan), (1, "SAME_NAME", "b", np.nan,
                                    np.nan), (1, "agg1", "agg2", "a", "b"),
               (1, "agg2", "agg2", "a",
                "b"), (2, "_CLASSIFIER", "classifier2", np.nan,
                       np.nan), (2, "a", "a", np.nan,
                                 np.nan), (2, "agg1", "agg1", "a", np.nan)])
     with self.assertRaises(ValueError):
         sit_classifier_parser.parse(sit_classifiers_table)
Ejemplo n.º 2
0
    def test_duplicate_classifier_name_value_error(self):
        """checks that an error is raised when any 2 classifiers have the
        same name
        """
        sit_classifiers_table = pd.DataFrame(
            data=[("1", "_CLASSIFIER", "SAME_NAME", np.nan,
                   np.nan), (1, "a", "a", np.nan,
                             np.nan), (1, "b", "b", np.nan,
                                       np.nan), (1, "agg1", "agg2", "a", "b"),
                  (1, "agg2", "agg2", "a",
                   "b"), (2, "_CLASSIFIER", "SAME_NAME", np.nan,
                          np.nan), (2, "a", "a", np.nan,
                                    np.nan), (2, "agg1", "agg1", "a", np.nan)])

        with self.assertRaises(ValueError):
            sit_classifier_parser.parse(sit_classifiers_table)
Ejemplo n.º 3
0
    def test_multiple_classifier_per_id_column_error(self):
        """checks if an error is raise with multiple classifiers defined for a
        single id
        """

        # in the following data (1, _CLASSIFIER) appears on 2 different rows
        sit_classifiers_table = pd.DataFrame(
            data=[("1", "_CLASSIFIER", "classifier1", np.nan,
                   np.nan), (1, "_CLASSIFIER", "a", np.nan,
                             np.nan), (1, "b", "b", np.nan,
                                       np.nan), (1, "agg1", "agg2", "a", "b"),
                  (1, "agg2", "agg2", "a",
                   "b"), (2, "_CLASSIFIER", "classifier2", np.nan,
                          np.nan), (2, "a", "a", np.nan,
                                    np.nan), (2, "agg1", "agg1", "a", np.nan)])

        with self.assertRaises(ValueError):
            sit_classifier_parser.parse(sit_classifiers_table)
Ejemplo n.º 4
0
    def test_expected_result(self):

        sit_classifiers_table = pd.DataFrame(
            data=[("1", "_CLASSIFIER", "classifier1", np.nan,
                   np.nan), (1, "a", "a", np.nan,
                             np.nan), (1, "b", "b", np.nan,
                                       np.nan), (1, "agg1", "agg1", "a", "b"),
                  (1, "agg2", "agg2", "a",
                   "b"), (2, "_CLASSIFIER", "classifier2", np.nan,
                          np.nan), (2, "a", "a", np.nan,
                                    np.nan), (2, "agg1", "agg1", "a", np.nan)])

        classifiers, classifier_values, classifier_aggregates = \
            sit_classifier_parser.parse(sit_classifiers_table)

        self.assertTrue(list(classifiers.id) == [1, 2])
        self.assertTrue(
            list(classifiers.name) == ["classifier1", "classifier2"])

        self.assertTrue(list(classifier_values.classifier_id) == [1, 1, 2])
        self.assertTrue(list(classifier_values.name) == ["a", "b", "a"])
        self.assertTrue(list(classifier_values.description) == ["a", "b", "a"])

        self.assertTrue(len(classifier_aggregates) == 3)
        self.assertTrue(
            classifier_aggregates[0] == {
                'classifier_id': 1,
                'name': 'agg1',
                'description': 'agg1',
                'classifier_values': ['a', 'b']
            })

        self.assertTrue(
            classifier_aggregates[1] == {
                'classifier_id': 1,
                'name': 'agg2',
                'description': 'agg2',
                'classifier_values': ['a', 'b']
            })

        self.assertTrue(
            classifier_aggregates[2] == {
                'classifier_id': 2,
                'name': 'agg1',
                'description': 'agg1',
                'classifier_values': ['a']
            })
Ejemplo n.º 5
0
    def test_expected_result_with_classifier_id_out_of_order(self):
        """checks that numeric values are converted to strings
        """

        sit_classifiers_table = pd.DataFrame(
            data=[("2", "_CLASSIFIER", "999", np.nan,
                   np.nan), (2, 1.0, "a", np.nan,
                             np.nan), (2, "b", "b", np.nan,
                                       np.nan), (2, 2.0, "agg1", 1.0, "b"),
                  (2, "agg2", "agg2", 1.0,
                   "b"), (1, "_CLASSIFIER", 700, np.nan,
                          np.nan), (1, 5, "a", np.nan,
                                    np.nan), (1, 6, "agg1", "5", np.nan)])

        classifiers, _, _ = \
            sit_classifier_parser.parse(sit_classifiers_table)

        self.assertTrue(list(classifiers.id) == [1, 2])
Ejemplo n.º 6
0
    def test_classifier_aggregate_validation_errors(self):
        """checks that the function validates classifier aggregates
        """

        with self.assertRaises(ValueError):
            # no two aggregates can have the same name
            sit_classifier_parser.parse(
                pd.DataFrame(
                    data=[("1", "_CLASSIFIER", "classifier1", np.nan,
                           np.nan), (1, "a", "a", np.nan,
                                     np.nan), (1, "b", "b", np.nan, np.nan),
                          (1, "SAME_NAME", "agg2", "a",
                           "b"), (1, "SAME_NAME", "agg2", "a", "b"),
                          (2, "_CLASSIFIER", "classifier2", np.nan,
                           np.nan), (2, "a", "a", np.nan,
                                     np.nan), (2, "agg1", "agg1", "a",
                                               np.nan)]))

        with self.assertRaises(ValueError):
            # error when a value in the aggregate is not a defined classifier
            # value name for the classifier
            sit_classifier_parser.parse(
                pd.DataFrame(
                    data=[("1", "_CLASSIFIER", "classifier1", np.nan,
                           np.nan), (1, "a", "a", np.nan,
                                     np.nan), (1, "b", "b", np.nan, np.nan),
                          (1, "agg1", "agg2", "MISSING",
                           "b"), (1, "agg2", "agg2", "a", "b"),
                          (2, "_CLASSIFIER", "classifier2", np.nan,
                           np.nan), (2, "a", "a", np.nan,
                                     np.nan), (2, "agg1", "agg1", "a",
                                               np.nan)]))

        with self.assertRaises(ValueError):
            # error when a value in the aggregate is duplicated
            sit_classifier_parser.parse(
                pd.DataFrame(
                    data=[("1", "_CLASSIFIER", "classifier1", np.nan,
                           np.nan), (1, "DUPLICATE", "a", np.nan,
                                     np.nan), (1, "b", "b", np.nan, np.nan),
                          (1, "agg1", "agg2", "DUPLICATE",
                           "DUPLICATE"), (1, "agg2", "agg2", "a", "b"),
                          (2, "_CLASSIFIER", "classifier2", np.nan,
                           np.nan), (2, "a", "a", np.nan,
                                     np.nan), (2, "agg1", "agg1", "a",
                                               np.nan)]))
Ejemplo n.º 7
0
def parse(sit_classifiers,
          sit_disturbance_types,
          sit_age_classes,
          sit_inventory,
          sit_yield,
          sit_events=None,
          sit_transitions=None,
          sit_eligibilities=None):
    """Parses and validates CBM Standard import tool formatted data including
    the complicated interdependencies in the SIT format. Returns an object
    containing the validated result.

    The returned object has the following properties:

     - classifiers: a pandas.DataFrame of classifiers in the sit_classifiers
        input
     - classifier_values: a pandas.DataFrame of the classifier values in the
        sit_classifiers input
     - classifier_aggregates: a dictionary of the classifier aggregates
        in the sit_classifiers input
     - disturbance_types: a pandas.DataFrame based on the disturbance types in
        the sit_disturbance_types input
     - age_classes: a pandas.DataFrame of the age classes based on
        sit_age_classes
     - inventory: a pandas.DataFrame of the inventory based on sit_inventory
     - yield_table: a pandas.DataFrame of the merchantable volume yield curves
        in the sit_yield input
     - disturbance_events: a pandas.DataFrame of the disturbance events based
        on sit_events.  If the sit_events parameter is None this field is None.
     - transition_rules: a pandas.DataFrame of the transition rules based on
        sit_transitions.  If the sit_transitions parameter is None this field
        is None.
     - disturbance_eligibilities: a pandas.DataFrame of the disturbance event
        eligibilities based on sit_eligibilities.  If the sit_events parameter
        is None this field is None.

    Args:
        sit_classifiers (pandas.DataFrame): SIT formatted classifiers
        sit_disturbance_types (pandas.DataFrame): SIT formatted disturbance
            types
        sit_age_classes (pandas.DataFrame): SIT formatted age classes
        sit_inventory (pandas.DataFrame): SIT formatted inventory
        sit_yield (pandas.DataFrame): SIT formatted yield curves
        sit_events (pandas.DataFrame, optional): SIT formatted disturbance
            events
        sit_transitions (pandas.DataFrame, optional): SIT formatted transition
            rules. Defaults to None.
        sit_eligibilities (pandas.DataFrame, optional): SIT formatted
            disturbance eligibilities. Defaults to None.

    Returns:
        object: an object containing parsed and validated SIT dataset
    """
    s = SimpleNamespace()
    classifiers, classifier_values, classifier_aggregates = \
        sit_classifier_parser.parse(sit_classifiers)
    s.classifiers = classifiers
    s.classifier_values = classifier_values
    s.classifier_aggregates = classifier_aggregates
    s.disturbance_types = sit_disturbance_type_parser.parse(
        sit_disturbance_types)
    s.age_classes = sit_age_class_parser.parse(sit_age_classes)
    s.inventory = sit_inventory_parser.parse(sit_inventory, classifiers,
                                             classifier_values,
                                             s.disturbance_types,
                                             s.age_classes)
    s.yield_table = sit_yield_parser.parse(sit_yield, s.classifiers,
                                           s.classifier_values, s.age_classes)

    if sit_events is not None:
        separate_eligibilities = False
        if sit_eligibilities is not None:
            separate_eligibilities = True
        s.disturbance_events = sit_disturbance_event_parser.parse(
            sit_events, s.classifiers, s.classifier_values,
            s.classifier_aggregates, s.disturbance_types, s.age_classes,
            separate_eligibilities)
        if sit_eligibilities is not None:
            s.disturbance_eligibilities = \
                sit_disturbance_event_parser.parse_eligibilities(
                    s.disturbance_events, sit_eligibilities)
        else:
            s.disturbance_eligibilities = None
    else:
        s.disturbance_events = None
        s.disturbance_eligibilities = None
    if sit_transitions is not None:
        s.transition_rules = sit_transition_rule_parser.parse(
            sit_transitions, s.classifiers, s.classifier_values,
            s.classifier_aggregates, s.disturbance_types, s.age_classes)
    else:
        s.transition_rules = None
    return s