def _one_step_lr(self): """Computes the best learning rate for G-Shapley algorithm.""" if self.directory is None: address = None else: address = os.path.join(self.directory, 'weights') best_acc = 0.0 for i in np.arange(1, 5, 0.5): model = ShapNN(self.problem, batch_size=1, max_epochs=1, learning_rate=10**(-i), weight_decay=0., validation_fraction=0, optimizer='sgd', warm_start=False, address=address, hidden_units=self.hidden_units) accs = [] for _ in range(10): model.fit(np.zeros((0, self.X.shape[-1])), self.y) model.fit(self.X, self.y) accs.append(model.score(self.X_test, self.y_test)) if np.mean(accs) - np.std(accs) > best_acc: best_acc = np.mean(accs) - np.std(accs) learning_rate = 10**(-i) return learning_rate
def _g_shap(self, iterations, err=None, learning_rate=None, sources=None): """Method for running G-Shapley algorithm. Args: iterations: Number of iterations of the algorithm. err: Stopping error criteria learning_rate: Learning rate used for the algorithm. If None calculates the best learning rate. sources: If values are for sources of data points rather than individual points. In the format of an assignment array or dict. """ if sources is None: sources = {i:np.array([i]) for i in range(len(self.X))} elif not isinstance(sources, dict): sources = {i:np.where(sources==i)[0] for i in set(sources)} address = None if self.directory is not None: address = os.path.join(self.directory, 'weights') if learning_rate is None: try: learning_rate = self.g_shap_lr except AttributeError: self.g_shap_lr = self._one_step_lr() learning_rate = self.g_shap_lr model = ShapNN(self.problem, batch_size=1, max_epochs=1, learning_rate=learning_rate, weight_decay=0., validation_fraction=0, optimizer='sgd', address=address, hidden_units=self.hidden_units) for iteration in range(iterations): model.fit(np.zeros((0, self.X.shape[-1])), self.y) if 10 * (iteration+1) / iterations % 1 == 0: print('{} out of {} G-Shapley iterations'.format( iteration + 1, iterations)) marginal_contribs = np.zeros(len(sources.keys())) model.fit(self.X, self.y, self.X_test, self.y_test, sources=sources, metric=self.metric, max_epochs=1, batch_size=1) val_result = model.history['metrics'] marginal_contribs[1:] += val_result[0][1:] marginal_contribs[1:] -= val_result[0][:-1] individual_contribs = np.zeros(len(self.X)) for i, index in enumerate(model.history['idxs'][0]): individual_contribs[sources[index]] += marginal_contribs[i] individual_contribs[sources[index]] /= len(sources[index]) self.mem_g = np.concatenate( [self.mem_g, np.reshape(individual_contribs, (1,-1))]) self.idxs_g = np.concatenate( [self.idxs_g, np.reshape(model.history['idxs'][0], (1,-1))])