def test_sklearn_compatibility_reg(self): reg = DAGRegressor( alpha=0.0, fit_intercept=True, dependent_target=True, hidden_layer_units=[0], standardize=True, ) reg.get_params(deep=True)
def test_nonlinear_performance(self, standardize): np.random.seed(42) sm = dg.generate_structure(num_nodes=10, degree=3) sm.threshold_till_dag() data = dg.generate_continuous_dataframe(sm, n_samples=1000, intercept=True, seed=42, noise_scale=0.1, kernel=RBF(1)) node = 1 y = data.iloc[:, node] X = data.drop(node, axis=1) reg = DAGRegressor( alpha=0.0, fit_intercept=True, dependent_target=True, hidden_layer_units=[0], standardize=standardize, ) linear_score = cross_val_score(reg, X, y, cv=KFold(shuffle=True, random_state=42)).mean() reg = DAGRegressor( alpha=0.1, fit_intercept=True, hidden_layer_units=[2], standardize=standardize, ) small_nl_score = cross_val_score(reg, X, y, cv=KFold(shuffle=True, random_state=42)).mean() reg = DAGRegressor( alpha=0.1, fit_intercept=True, hidden_layer_units=[4], standardize=standardize, ) medium_nl_score = cross_val_score(reg, X, y, cv=KFold(shuffle=True, random_state=42)).mean() assert small_nl_score > linear_score assert medium_nl_score > small_nl_score
def test_feature_importances(self, hidden_layer_units): reg = DAGRegressor(hidden_layer_units=hidden_layer_units) X, y = ( pd.DataFrame(np.random.normal(size=(100, 1))), pd.Series(np.random.normal(size=(100, ))), ) X["true_feat"] = y * -3 reg.fit(X, y) assert isinstance(reg.feature_importances_, np.ndarray) coef_ = pd.Series(reg.feature_importances_, index=X.columns) # assert that the sign of the coefficient is positive for both nonlinear and linear cases assert coef_["true_feat"] > 0
def test_wrong_target_dist_error(self, target_dist_type): with pytest.raises( NotImplementedError, # match=f"Currently only implements [{', '.join(DAGRegressor._supported_types)}] dist types." # " Got: {target_dist_type}" ): DAGRegressor(target_dist_type=target_dist_type)
def train(self, data, train_sample_fraction, target_col): self.target_col = target_col self.features = [col for col in data.columns if col not in [target_col, 'interval'] and 'fleet-dispatch' not in col] tabu_child_nodes = [col for col in self.generic_tabu_edges if col in self.features] self.regressor = DAGRegressor(threshold=0.0, alpha=0.0001, beta=0.5, fit_intercept=True, hidden_layer_units=[10], standardize=True, tabu_child_nodes=tabu_child_nodes, tabu_edges=self._expand_tabu_edges(self.features) ) n_rows = len(data.index) sample_size = int(n_rows * train_sample_fraction) train = data.sample(sample_size, random_state=1) train = train.reset_index(drop=True) X, y = train.loc[:, self.features], np.asarray(train[target_col]) self.regressor.fit(X, y)
def test_X_dtype_prediction(standardize): """ tests whether providing an int or float X returns the same prediction """ training_data = pd.DataFrame( {"x": np.linspace(0, 500, num=500), "y": np.linspace(0, 500, num=500)} ) reg = DAGRegressor( threshold=0.0, alpha=0.0001, beta=0.5, fit_intercept=True, hidden_layer_units=[10], standardize=standardize, ) X = training_data.loc[:, ["x"]] y = training_data["y"] reg.fit(X, y) test_data_int = pd.DataFrame({"x": [0, 250, 500]}) test_data_float = pd.DataFrame({"x": [0.0, 250.0, 500.0]}) pred_int = reg.predict(test_data_int) pred_float = reg.predict(test_data_float) assert np.all(pred_float == pred_int)
def test_independent_predictions(hidden_layer_units): x = np.linspace(0.0, 100, 100) X = pd.DataFrame({"x": x}) Y = pd.Series(x**2, name="y") reg = DAGRegressor( threshold=0.0, alpha=0.0, beta=0.5, fit_intercept=True, hidden_layer_units=hidden_layer_units, standardize=False, ) reg.fit(X, Y) pred_alone = reg.predict(pd.DataFrame({"x": [10.0]})) pred_joint0 = reg.predict(pd.DataFrame({"x": [10.0, 0.0]})) pred_joint1 = reg.predict(pd.DataFrame({"x": [10.0] + x.tolist()})) assert np.isclose(pred_alone[0], pred_joint0[0]) assert np.isclose(pred_alone[0], pred_joint1[0]) assert np.isclose(pred_joint0[0], pred_joint1[0])
def test_glm(self, target_dist_type, y): reg = DAGRegressor(target_dist_type=target_dist_type) X = np.random.normal(size=(100, 2)) reg.fit(X, y) reg.predict(X)
class Forecaster: def __init__(self, tabu_child_nodes=['hour', 'dayofweak', 'dayofyear'], tabu_edges=[('demand', 'demand')]): self.generic_tabu_child_nodes = tabu_child_nodes self.generic_tabu_edges = tabu_edges def _expand_tabu_edges(self, data_columns): """Prepare the tabu_edges input for the DAGregressor Examples -------- >>> f = Forecaster() >>> f._expand_tabu_edges(data_columns=['demand-1', 'demand-2', 'constraint-1', ... 'availability-1', 'availability-2']) Parameters ---------- data_columns Returns ------- """ expanded_edges = [] for generic_edge in self.generic_tabu_edges: first_generic_node = generic_edge[0] second_generic_node = generic_edge[1] specific_first_nodes = [col for col in data_columns if first_generic_node in col] specific_second_nodes = [col for col in data_columns if second_generic_node in col] specific_edges = product(specific_first_nodes, specific_second_nodes) specific_edges = [edge for edge in specific_edges if edge[0] != edge[1]] expanded_edges += specific_edges return expanded_edges def train(self, data, train_sample_fraction, target_col): self.target_col = target_col self.features = [col for col in data.columns if col not in [target_col, 'interval'] and 'fleet-dispatch' not in col] tabu_child_nodes = [col for col in self.generic_tabu_edges if col in self.features] self.regressor = DAGRegressor(threshold=0.0, alpha=0.0001, beta=0.5, fit_intercept=True, hidden_layer_units=[10], standardize=True, tabu_child_nodes=tabu_child_nodes, tabu_edges=self._expand_tabu_edges(self.features) ) n_rows = len(data.index) sample_size = int(n_rows * train_sample_fraction) train = data.sample(sample_size, random_state=1) train = train.reset_index(drop=True) X, y = train.loc[:, self.features], np.asarray(train[target_col]) self.regressor.fit(X, y) def price_forecast_with_generation_sensitivities(self, forward_data, region, market, min_delta, max_delta, steps): prediction = forward_data.loc[:, ['interval']] if market + '-fleet-dispatch' in forward_data.columns: forward_data['old_demand'] = forward_data[region + '-demand'] + forward_data[market + '-fleet-dispatch'] else: forward_data['old_demand'] = forward_data[region + '-demand'] delta_step_size = max(int((max_delta - min_delta) / steps), 1) for delta in range(int(min_delta), int(max_delta) + delta_step_size, delta_step_size): forward_data[region + '-demand'] = forward_data['old_demand'] - delta X = forward_data.loc[:, self.features] Y = self.regressor.predict(X) prediction[delta] = Y return prediction def single_trace_forecast(self, forward_data): prediction = forward_data.loc[:, ['interval']] X = forward_data.loc[:, self.features] Y = self.regressor.predict(X) prediction[self.target_col] = Y return prediction
data = load_boston() X, y = data.data, data.target names = data["feature_names"] from sklearn.preprocessing import StandardScaler ss = StandardScaler() X = ss.fit_transform(X) y = (y - y.mean()) / y.std() from causalnex.structure.pytorch import DAGRegressor reg = DAGRegressor( alpha=0.1, beta=0.9, fit_intercept=True, hidden_layer_units=None, dependent_target=True, enforce_dag=True, ) from sklearn.model_selection import cross_val_score from sklearn.model_selection import KFold scores = cross_val_score(reg, X, y, cv=KFold(shuffle=True, random_state=42)) print(f'MEAN R2: {np.mean(scores).mean():.3f}') X = pd.DataFrame(X, columns=names) y = pd.Series(y, name="MEDV") reg.fit(X, y) print(pd.Series(reg.coef_, index=names)) reg.plot_dag(enforce_dag=True)