Example #1
0
    def predict(self, input_data, full=False):
        """Returns the prediction and the confidence intervals

        input_data: Input data to be predicted
        full: Boolean that controls whether to include the prediction's
              attributes. By default, only the prediction is produced. If set
              to True, the rest of available information is added in a
              dictionary format. The dictionary keys can be:
                  - prediction: the prediction value
                  - unused_fields: list of fields in the input data that
                                   are not being used in the model

        """

        # Checks and cleans input_data leaving the fields used in the model
        unused_fields = []
        norm_input_data = self.filter_input_data( \
            input_data,
            add_unused_fields=full)
        if full:
            norm_input_data, unused_fields = norm_input_data

        # Strips affixes for numeric values and casts to the final field type
        cast(norm_input_data, self.fields)

        # In case that the training data has no missings, input data shouldn't
        check_no_training_missings(norm_input_data, self.model_fields,
                                   self.weight_field,
                                   self.objective_id)

        # Computes text and categorical field expansion
        unique_terms = self.get_unique_terms(norm_input_data)

        # Creates an input vector with the values for all expanded fields.
        input_array = self.expand_input(norm_input_data, unique_terms)
        compact_input_array = self.expand_input(norm_input_data, unique_terms,
                                                True)

        prediction = dot([flatten(self.coefficients)], [input_array])[0][0]

        result = {
            "prediction": prediction}
        if self.xtx_inverse:
            result.update({"confidence_bounds": self.confidence_bounds( \
                compact_input_array)})

        if full:
            result.update({"unused_fields": unused_fields})
        else:
            result = result["prediction"]

        return result
Example #2
0
    def predict(self, input_data, full=False):
        """Returns the prediction and the confidence intervals

        input_data: Input data to be predicted
        full: Boolean that controls whether to include the prediction's
              attributes. By default, only the prediction is produced. If set
              to True, the rest of available information is added in a
              dictionary format. The dictionary keys can be:
                  - prediction: the prediction value
                  - unused_fields: list of fields in the input data that
                                   are not being used in the model

        """

        # Checks and cleans input_data leaving the fields used in the model
        unused_fields = []
        new_data = self.filter_input_data( \
            input_data,
            add_unused_fields=full)
        if full:
            new_data, unused_fields = new_data

        # Strips affixes for numeric values and casts to the final field type
        cast(new_data, self.fields)

        # In case that the training data has no missings, input data shouldn't
        check_no_training_missings(new_data, self.fields, self.weight_field,
                                   self.objective_id)

        # Computes text and categorical field expansion
        unique_terms = self.get_unique_terms(new_data)

        # Creates an input vector with the values for all expanded fields.
        input_array = self.expand_input(new_data, unique_terms)
        compact_input_array = self.expand_input(new_data, unique_terms, True)

        prediction = dot([flatten(self.coefficients)], [input_array])[0][0]

        result = {
            "prediction": prediction}
        if self.xtx_inverse is not None:
            result.update({"confidence_bounds": self.confidence_bounds( \
                compact_input_array)})

        if full:
            result.update({"unused_fields": unused_fields})
        else:
            result = result["prediction"]

        return result
Example #3
0
    def categorical_encoding(self, inputs, field_id, compact):
        """Returns the result of combining the encoded categories
        according to the field_codings projections

        The result is the components generated by the categorical field
        """

        new_inputs = inputs[:]

        projections = self.field_codings[field_id].get( \
                CONTRAST, self.field_codings[field_id].get(OTHER))
        if projections is not None:
            new_inputs = flatten(dot(projections, [new_inputs]))

        if compact and self.field_codings[field_id].get(DUMMY) is not None:
            dummy_class = self.field_codings[field_id][DUMMY]
            index = self.categories[field_id].index(dummy_class)
            cat_new_inputs = new_inputs[0:index]
            if len(new_inputs) > (index + 1):
                cat_new_inputs.extend(new_inputs[index + 1:])
            new_inputs = cat_new_inputs

        return new_inputs
Example #4
0
    def categorical_encoding(self, inputs, field_id, compact):
        """Returns the result of combining the encoded categories
        according to the field_codings projections

        The result is the components generated by the categorical field
        """

        new_inputs = inputs[:]

        projections = self.field_codings[field_id].get( \
                CONTRAST, self.field_codings[field_id].get(OTHER))
        if projections is not None:
            new_inputs = flatten(dot(projections, [new_inputs]))

        if compact and self.field_codings[field_id].get(DUMMY) is not None:
            dummy_class = self.field_codings[field_id][DUMMY]
            index = self.categories[field_id].index(dummy_class)
            cat_new_inputs = new_inputs[0: index]
            if len(new_inputs) > (index + 1):
                cat_new_inputs.extend(new_inputs[index + 1 :])
            new_inputs = cat_new_inputs

        return new_inputs