def visit_FunctionCallNode(self, node): # Special casing join fn because it has some tricky dynamics (possibly # should be parsed into a specialized node type?) if node.fn_name == 'join': return self._eval_join(node) evaled_args = [self.visit(arg_node) for arg_node in node.args] try: # TODO: This is sort of a hack. Should come up with a more principled # approach. if node.fn_name in RANDOM_SAMPLING_FNS: # For randomness functions, it's important that we proactively # expand the input args to the full dataset dimensionality. Otherwise, # we'll end up sampling a set of values for one row, then later # repeat those values across the row dimension. args = [ self._add_dataset_dimension(arg) for arg in evaled_args ] return sample(node.fn, args) return self._broadcasting_fn_call(node.fn, *evaled_args) except Exception as e: raise visibly_wrapped_exception( e, f"calling function {node.fn_name}", EvaluationError, )
def visit_ModelMethodNode(self, node): evaled_args = [self.visit(arg_node) for arg_node in node.args] try: return self._broadcasting_fn_call(node.method, *evaled_args) except Exception as e: raise visibly_wrapped_exception( e, f"calling {node.model.name}.{node.method_name}", EvaluationError, )
def predict(self, *args): model_input = self._reshape_inputs(*args) output_shape = args[0].shape if np.isnan(model_input).any(): out = np.empty(output_shape) out[:] = np.nan else: try: predictions = self._model.predict(model_input) except Exception as e: raise visibly_wrapped_exception(e, f"calling {self.name}.predict_prob", EquationError, ) out = predictions.reshape(output_shape) return out
def _load_pickled_model(raw_bytes, label): global sklearnbase, sklearnpipeline if sklearnbase is None: import sklearn.base as sklearnbase import sklearn.pipeline as sklearnpipeline try: model = pickle.loads(raw_bytes) except Exception as e: raise visibly_wrapped_exception( e, f"unpickling {label} model file", ModelError, ) relevant_model = model if isinstance(model, sklearnpipeline.Pipeline): # If this is a pipeline, then use the last step of the pipeline to # determine whether the overall model is a classifier or regressor. name, relevant_model = model.steps[-1] if isinstance(relevant_model, sklearnbase.ClassifierMixin): cls, inner_cls = ClassifierModel, SklearnClassifier if hasattr(relevant_model, 'n_classes_'): n_classes = relevant_model.n_classes_ elif hasattr(relevant_model, 'classes_'): n_classes = len(relevant_model.classes_) else: # Uh oh, can't figure out how many classes this model has. Crossing our fingers... n_classes = 2 if n_classes != 2: raise ModelError( "Only binary classifiers supported. Got classifier" f" with {relevant_model.n_classes_} classes.") elif isinstance(relevant_model, sklearnbase.RegressorMixin): cls, inner_cls = RegressionModel, SklearnRegressor else: if isinstance(model, sklearnpipeline.Pipeline): raise ModelError( "Last step of pipeline must be an sklearn classifier" " or regressor, but was of class: " + str(relevant_model.__class__)) else: raise ModelError("Serialized model must be an sklearn classifier" " or regressor, but was of class: " + str(model.__class__)) inner = inner_cls(model) return cls(label, inner)
def _eval_join(self, node): if len(node.args) != 2: raise EvaluationError( f"Join takes exactly 2 arguments, got {len(node.args)}.") mask_node, col_node = node.args ds = dataset_for_node(col_node) if ds is None: raise EvaluationError("Second argument to join() must be a " "dataset variable or column name.") with self.in_join_mode(ds): mask, col = self.visit(mask_node), self.visit(col_node) try: # node.fn should be provided_functions.join return self._broadcasting_fn_call(node.fn, mask, col) except Exception as e: raise visibly_wrapped_exception( e, f"calling function {node.fn_name}", EvaluationError, )
def predict_proba(self, *args): model_input = self._reshape_inputs(*args) output_shape = args[0].shape # We're going to allow string variables in some cases, in which case # model_input will have dtype string or object (in case of mixed types) if (model_input.dtype.type not in (np.object_, np.str_)) and np.isnan(model_input).any(): out = np.empty(output_shape) out[:] = np.nan else: try: probs = self._model.predict_proba(model_input) except Exception as e: raise visibly_wrapped_exception(e, f"calling {self.name}.predict_proba", EquationError, ) assert probs.shape[-1] == 2, "Expected 2 classes" # By convention we'll return the probability of the second class positive_probs = probs[:,1] out = positive_probs.reshape(output_shape) return out
def _load_tf_model(local_path, label): global tfkerasmodels if tfkerasmodels is None: import tensorflow.keras.models as tfkerasmodels try: model = tfkerasmodels.load_model(local_path, compile=False) except Exception as e: raise visibly_wrapped_exception( e, f"loading {label} model file", ModelError, ) cls, inner_cls = RegressionModel, TfRegressor if hasattr(model, 'output_shape'): n_outputs = model.output_shape[-1] if n_outputs == 2: cls, inner_cls = ClassifierModel, TfClassifier elif n_outputs > 2: raise ModelError( "Only binary classifiers supported. Got classifier" f" with {n_outputs} classes.") inner = inner_cls(model) return cls(label, inner)