def predict(self, input_data, full=False): """Returns the prediction and the confidence intervals input_data: Input data to be predicted full: Boolean that controls whether to include the prediction's attributes. By default, only the prediction is produced. If set to True, the rest of available information is added in a dictionary format. The dictionary keys can be: - prediction: the prediction value - unused_fields: list of fields in the input data that are not being used in the model """ # Checks and cleans input_data leaving the fields used in the model unused_fields = [] norm_input_data = self.filter_input_data( \ input_data, add_unused_fields=full) if full: norm_input_data, unused_fields = norm_input_data # Strips affixes for numeric values and casts to the final field type cast(norm_input_data, self.fields) # In case that the training data has no missings, input data shouldn't check_no_training_missings(norm_input_data, self.model_fields, self.weight_field, self.objective_id) # Computes text and categorical field expansion unique_terms = self.get_unique_terms(norm_input_data) # Creates an input vector with the values for all expanded fields. input_array = self.expand_input(norm_input_data, unique_terms) compact_input_array = self.expand_input(norm_input_data, unique_terms, True) prediction = dot([flatten(self.coefficients)], [input_array])[0][0] result = { "prediction": prediction} if self.xtx_inverse: result.update({"confidence_bounds": self.confidence_bounds( \ compact_input_array)}) if full: result.update({"unused_fields": unused_fields}) else: result = result["prediction"] return result
def predict(self, input_data, full=False): """Returns the prediction and the confidence intervals input_data: Input data to be predicted full: Boolean that controls whether to include the prediction's attributes. By default, only the prediction is produced. If set to True, the rest of available information is added in a dictionary format. The dictionary keys can be: - prediction: the prediction value - unused_fields: list of fields in the input data that are not being used in the model """ # Checks and cleans input_data leaving the fields used in the model unused_fields = [] new_data = self.filter_input_data( \ input_data, add_unused_fields=full) if full: new_data, unused_fields = new_data # Strips affixes for numeric values and casts to the final field type cast(new_data, self.fields) # In case that the training data has no missings, input data shouldn't check_no_training_missings(new_data, self.fields, self.weight_field, self.objective_id) # Computes text and categorical field expansion unique_terms = self.get_unique_terms(new_data) # Creates an input vector with the values for all expanded fields. input_array = self.expand_input(new_data, unique_terms) compact_input_array = self.expand_input(new_data, unique_terms, True) prediction = dot([flatten(self.coefficients)], [input_array])[0][0] result = { "prediction": prediction} if self.xtx_inverse is not None: result.update({"confidence_bounds": self.confidence_bounds( \ compact_input_array)}) if full: result.update({"unused_fields": unused_fields}) else: result = result["prediction"] return result
def categorical_encoding(self, inputs, field_id, compact): """Returns the result of combining the encoded categories according to the field_codings projections The result is the components generated by the categorical field """ new_inputs = inputs[:] projections = self.field_codings[field_id].get( \ CONTRAST, self.field_codings[field_id].get(OTHER)) if projections is not None: new_inputs = flatten(dot(projections, [new_inputs])) if compact and self.field_codings[field_id].get(DUMMY) is not None: dummy_class = self.field_codings[field_id][DUMMY] index = self.categories[field_id].index(dummy_class) cat_new_inputs = new_inputs[0:index] if len(new_inputs) > (index + 1): cat_new_inputs.extend(new_inputs[index + 1:]) new_inputs = cat_new_inputs return new_inputs
def categorical_encoding(self, inputs, field_id, compact): """Returns the result of combining the encoded categories according to the field_codings projections The result is the components generated by the categorical field """ new_inputs = inputs[:] projections = self.field_codings[field_id].get( \ CONTRAST, self.field_codings[field_id].get(OTHER)) if projections is not None: new_inputs = flatten(dot(projections, [new_inputs])) if compact and self.field_codings[field_id].get(DUMMY) is not None: dummy_class = self.field_codings[field_id][DUMMY] index = self.categories[field_id].index(dummy_class) cat_new_inputs = new_inputs[0: index] if len(new_inputs) > (index + 1): cat_new_inputs.extend(new_inputs[index + 1 :]) new_inputs = cat_new_inputs return new_inputs