def __init__(self): self.theta = T.matrix() # define output for b combinations = PolynomialFeatures._combinations(2, 3, False, False) n_output_features_ = sum(1 for _ in combinations) + 1 self.A_b = theano.shared( value=np.ones((n_output_features_,), dtype=theano.config.floatX), borrow=True, name='A_b') self.b_b = theano.shared(value=1., borrow=True, name='b_b') combinations = PolynomialFeatures._combinations(2, 3, False, False) L = [(self.theta[:, 0] ** 0).reshape([-1, 1])] for i, c in enumerate(combinations): L.append(self.theta[:, c].prod(1).reshape([-1, 1])) self.XF3 = T.concatenate(L, axis=1) b = (T.dot(self.XF3, self.A_b) + self.b_b).reshape([-1, 1]) # define output for k combinations = PolynomialFeatures._combinations(2, 2, False, False) n_output_features_ = sum(1 for _ in combinations) + 1 self.rho_k = theano.shared( value=np.ones((n_output_features_,), dtype=theano.config.floatX), borrow=True, name='rho_k') combinations = PolynomialFeatures._combinations(2, 2, False, False) L = [(self.theta[:, 0] ** 0).reshape([-1, 1])] for i, c in enumerate(combinations): L.append(self.theta[:, c].prod(1).reshape([-1, 1])) self.XF2 = T.concatenate(L, axis=1) k = T.dot(self.XF2, self.rho_k).reshape([-1, 1]) self.outputs = [T.concatenate([b, k], axis=1)] self.inputs = [self.theta] self.trainable_weights = [self.A_b, self.b_b, self.rho_k]
def test_num_combinations( n_features, min_degree, max_degree, interaction_only, include_bias, ): """ Test that n_output_features_ is calculated correctly. """ x = sparse.csr_matrix(([1], ([0], [n_features - 1]))) est = PolynomialFeatures( degree=max_degree, interaction_only=interaction_only, include_bias=include_bias, ) est.fit(x) num_combos = est.n_output_features_ combos = PolynomialFeatures._combinations( n_features=n_features, min_degree=0, max_degree=max_degree, interaction_only=interaction_only, include_bias=include_bias, ) assert num_combos == sum([1 for _ in combos])
def poly_transform_(arr, axis=None, deg=2): """ Ad-hoc workaround of polynomial transform for higher-dimensional tensors, as current scikit only accept vectors as samples. (a0, a1, ..., an) --> (a0, a1, ..., axis-1, axis+1, ..., axis_transform) Input array are assumed to be shape: (sample_no, sample.shape) Elementwise, poly_transform(deg = 2) is equivalent to PolynomialFeatures.fit_transform( 2, interaction_only = True, include_Bias = False) If each sample is of shape (x_dim, y_dim), then Poly_transform(arr, axis = 0) apply Polynomial only in x_directions. The output of each sample is flattened for Linear Model fit. """ arr = arr if axis else np.expand_dims(arr, -1) axis = axis if axis else -1 para_no = arr.shape[axis] arr = np.moveaxis(arr, axis, -1) comb = PolynomialFeatures._combinations(para_no, deg, interaction_only=False, include_bias=False) poly_deg = np.vstack([np.bincount(c, minlength=para_no) for c in comb]) poly_deg = poly_deg.reshape((poly_deg.shape[0], ) + (1, ) * (arr.ndim - 1) + (poly_deg.shape[1], )) out_arr = np.power(arr, poly_deg).prod(axis=-1) return np.moveaxis(out_arr, 0, -1)
def __init__(self): self.theta = T.matrix() # define output for b combinations = PolynomialFeatures._combinations(2, 3, False, False) n_output_features_ = sum(1 for _ in combinations) + 1 self.A_b = theano.shared(value=np.ones((n_output_features_, ), dtype=theano.config.floatX), borrow=True, name='A_b') self.b_b = theano.shared(value=1., borrow=True, name='b_b') combinations = PolynomialFeatures._combinations(2, 3, False, False) L = [(self.theta[:, 0]**0).reshape([-1, 1])] for i, c in enumerate(combinations): L.append(self.theta[:, c].prod(1).reshape([-1, 1])) self.XF3 = T.concatenate(L, axis=1) b = (T.dot(self.XF3, self.A_b) + self.b_b).reshape([-1, 1]) # define output for k combinations = PolynomialFeatures._combinations(2, 2, False, False) n_output_features_ = sum(1 for _ in combinations) + 1 self.rho_k = theano.shared(value=np.ones((n_output_features_, ), dtype=theano.config.floatX), borrow=True, name='rho_k') combinations = PolynomialFeatures._combinations(2, 2, False, False) L = [(self.theta[:, 0]**0).reshape([-1, 1])] for i, c in enumerate(combinations): L.append(self.theta[:, c].prod(1).reshape([-1, 1])) self.XF2 = T.concatenate(L, axis=1) k = T.dot(self.XF2, self.rho_k).reshape([-1, 1]) self.outputs = [T.concatenate([b, k], axis=1)] self.inputs = [self.theta] self.trainable_weights = [self.A_b, self.b_b, self.rho_k]
class Symbolic_Metamodel: def __init__(self, n_dim=10, batch_size=100, num_iter=30, learning_rate=1e-3, feature_types=None, **kwargs): self.n_dim = n_dim self.batch_size = batch_size self.num_iter = num_iter self.learning_rate = learning_rate self.exact_grad = False self.epsilon = 1e-6 self.feature_types = feature_types def fit(self, pred_model, x_train): self.pred_model = pred_model self.x_train = x_train self.n_dim = x_train.shape[1] self.initialize_thetas() thetas_sgd, Losses_ = self.SGD_optimizer() self.thetas_opt = thetas_sgd[-1] self.metamodel, dims_ = self.get_exact_Kolmogorov_expression( self.thetas_opt) self.exact_pred_expr = pred_model def get_exact_Kolmogorov_expression(self, theta): Thetas, Thetas_out = get_theta_parameters(theta.reshape( (-1, )), self.Orders_in, self.Orders_out, self.n_dim) Thetas_in_0, Thetas_out_0 = get_theta_parameters( self.theta_0.reshape((-1, )), self.Orders_in, self.Orders_out, self.n_dim) symbols_ = 'X0 ' for m in range(self.single_dim - 1): if m < self.single_dim - 2: symbols_ += 'X' + str(m + 1) + ' ' else: symbols_ += 'X' + str(m + 1) dims_ = symbols(symbols_) inner_funcs = [ MeijerG(theta=Thetas[k], order=self.Orders_in[k]) for k in range(self.n_dim) ] outer_funcs = MeijerG(theta=Thetas_out[0], order=self.Orders_out[0]) inner_fun_0 = [ MeijerG(theta=Thetas_in_0[k], order=self.Orders_in[k]) for k in range(self.n_dim) ] out_expr_ = 0 x = symbols('x') for v in range(self.n_dim): if v < self.single_dim: if v not in self.zero_locs: out_expr_ += sympify(str(re( inner_funcs[v].expression()))).subs(x, dims_[v]) else: if v not in self.zero_locs: dim_0 = self.dim_combins[self.single_dim + (v - self.single_dim)][0] dim_1 = self.dim_combins[self.single_dim + (v - self.single_dim)][1] out_expr_ += sympify(str(re( inner_funcs[v].expression()))).subs( x, dims_[dim_0] * dims_[dim_1]) out_expr_ = simplify( sympify(str(re(outer_funcs.expression()))).subs(x, out_expr_)) final_expr = simplify( sympify(str(self.init_scale / (self.init_scale + out_expr_)))) return final_expr, dims_ def initialize_thetas(self): self.poly = PolynomialFeatures(interaction_only=True, include_bias=False) self.poly.fit(self.x_train) self.origin_x_train = self.x_train self.x_train = self.poly.transform(self.x_train) self.single_dim = self.n_dim self.n_dim = self.x_train.shape[1] self.dim_combins = list( self.poly._combinations(self.single_dim, degree=2, interaction_only=True, include_bias=False)) initializer_model = LogisticRegression() binarized_thresh = 0.5 if hasattr(self.pred_model, "predict_proba"): initializer_model.fit(self.x_train, (self.pred_model.predict_proba( self.origin_x_train)[:, 1] > binarized_thresh) * 1) else: initializer_model.fit(self.x_train, (self.pred_model.predict( self.origin_x_train) > binarized_thresh) * 1) self.init_coeff = initializer_model.coef_[0] + self.epsilon self.init_scale = np.exp(initializer_model.intercept_) self.zero_locs = list( np.where(np.abs(self.init_coeff) <= self.epsilon)[0]) Thetas_in_0, Thetas_out_0 = self.initialize_hyperparameters() self.theta_0 = np.hstack((Thetas_in_0, Thetas_out_0)).reshape((-1, )) def initialize_hyperparameters(self): self.Orders_in = [[0, 1, 3, 1]] * self.n_dim self.Orders_out = [[1, 0, 0, 1]] Thetas = [ np.array([2.0, 2.0, 2.0, 1.0, self.init_coeff[k]]) for k in range(self.n_dim) ] Thetas_out = [np.array([0.0, 1.0])] Thetas_in_0 = np.array(Thetas).reshape((1, -1)) Thetas_out_0 = np.array(Thetas_out).reshape((1, -1)) return Thetas_in_0, Thetas_out_0 def SGD_optimizer(self): theta_ = self.theta_0.reshape((-1, 1)) losses_ = [] thetas_opt = [] beta_1 = 0.9 beta_2 = 0.999 eps_stable = 1e-8 m = 0 v = 0 step_size = 0.001 for _ in range(self.num_iter): start_time = time.time() loss_, loss_grad = self.Loss_grad(theta_) # loss_grad[self.single_dim:loss_grad.shape[0] - 2, 0] = 0 # if _ <= self.num_iter - 1: theta_ = theta_ - self.learning_rate * loss_grad m = beta_1 * m + (1 - beta_1) * loss_grad v = beta_2 * v + (1 - beta_2) * np.power(loss_grad, 2) m_hat = m / (1 - np.power(beta_1, _)) v_hat = v / (1 - np.power(beta_2, _)) #theta_ = theta_ - (step_size * m_hat / (np.sqrt(v_hat) + eps_stable)) print( "-- Search epoch: %s --- Loss: %0.5f --- Run time: %0.2f seconds ---" % (_, loss_, time.time() - start_time)) losses_.append(loss_) thetas_opt.append(theta_) return thetas_opt, losses_ def Loss_grad(self, theta): subsamples_ = np.random.choice(list(range(self.x_train.shape[0])), size=self.batch_size, replace=False) x_ = self.x_train[subsamples_, :] x_original = self.origin_x_train[subsamples_, :] f_est, f_grad = eval_Kolmogorov_gradient_only(self.init_scale, x_, theta.reshape((-1, )), self.Orders_in, self.Orders_out, h=0.01) f_est = f_est.reshape((-1, 1)) if hasattr(self.pred_model, "predict_proba"): #y_true = ((self.pred_model.predict_proba(x_original)[:,1] > 0.5)*1).reshape((-1,1)) f_true = self.pred_model.predict_proba(x_original)[:, 1].reshape( (-1, 1)) else: #y_true = ((self.pred_model.predict(x_original)[:,1] > 0.5)*1).reshape((-1,1)) f_true = self.pred_model.predict(x_original)[:, 1].reshape((-1, 1)) loss_type = 'mean_square_error' if loss_type == 'mean_square_error': loss_per_param = np.tile(-1 * 2 * (f_true - f_est), [1, f_grad.shape[1]]) loss_grad = np.mean(loss_per_param * f_grad, axis=0).reshape( (-1, 1)) loss_ = np.mean((f_true - f_est)**2) elif loss_type == 'cross_entropy': loss_per_param = np.tile(-(f_true - f_est) / ((1 - f_est) * f_est), [1, f_grad.shape[1]]) loss_grad = np.mean(loss_per_param * f_grad, axis=0).reshape( (-1, 1)) loss_ = np.mean(-1 * (np.multiply(f_true, np.log(f_est)) + np.multiply( (1 - f_true), np.log(1 - f_est)))) #loss_c = roc_auc_score(y_true, f_est) #print("AUC", loss_c) return loss_, loss_grad def evaluate(self, x_in): y_est, _ = eval_Kolmogorov_gradient_only(self.init_scale, self.poly.transform(x_in), self.thetas_opt, self.Orders_in, self.Orders_out, h=0.01) return y_est def get_gradient(self, x_in): grad_ = [] h = 0.01 x_in = x_in.reshape((1, -1)) if self.exact_grad: gradients_ = [] for var in vars_: gradients_.append(diff(sym_meta_mod, var)) for k in range(x_test.shape[0]): grad_.append([ gradients_[v].subs(vars_[0], x_test[k, 0]).subs( vars_[1], x_test[k, 1]).subs(vars_[2], x_test[k, 2]).subs( vars_[3], x_test[k, 3]).subs(vars_[4], x_test[k, 4]).subs( vars_[5], x_test[k, 5]).subs( vars_[6], x_test[k, 6]).subs( vars_[7], x_test[k, 7]).subs( vars_[8], x_test[k, 8]).subs( vars_[9], x_test[k, 9]) for v in range(len(gradients_)) ]) else: for u in range(x_in.shape[1]): x_in_h = deepcopy(x_in) if self.feature_types is not None: x_in_h[0, u] = ((self.feature_types[u] == 'c') * 1) * (x_in_h[0, u] + h) + ( (self.feature_types[u] == 'b') * 1) * ( (x_in_h[0, u] == 0) * 1) else: x_in_h[0, u] = x_in_h[0, u] + h grad_.append( np.abs( self.exact_pred_expr.predict_proba(x_in_h)[:, 1] - self.exact_pred_expr.predict_proba(x_in)[:, 1]) / h) return np.array(grad_) def get_instancewise_scores(self, x_in): scores = [] for v in range(x_in.shape[0]): scores.append(self.get_gradient(x_in[v, :])) return scores