def test_tcorex_real_data(): r""" Test pytorch implementation of T-CorEx on a real-world dataset. """ print("=" * 100) print("Testing PyTorch T-CorEx on a real-world dataset ...") resources = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'resources') data_file = os.path.join(resources, 'test_tcorex_real_data.npy') data = np.load(data_file) print("Data is loaded, shape = {}".format(data.shape)) train_data = data[:, :40, :] test_data = data[:, 40:, :] scores = [] for _ in tqdm(range(5)): tc = TCorex(n_hidden=8, nv=train_data.shape[-1], nt=train_data.shape[0], max_iter=500, anneal=True, l1=0.3, gamma=0.4, reg_type='W', init=True, device='cpu', verbose=1) tc.fit(train_data) covs = tc.get_covariance() cur_score = calculate_nll_score(data=test_data, covs=covs) scores.append(cur_score) score_mean = np.mean(scores) need_score = 396.1597 print("score: {:.4f}, need score: {:.4f}".format(score_mean, need_score)) assert (score_mean - need_score) / need_score < 0.01
def evaluate(self, test_data, verbose=True): assert self._trained if verbose: print("Evaluating {} ...".format(self.name)) nll = calculate_nll_score(data=test_data, covs=self._covs) if verbose: print("\tScore: {:.4f}".format(nll)) return nll
def test_corex(): r""" Test pytorch linear CorEx implementation. Check if the performance of pytorch CorEx matches that of standard CorEx. """ print("=" * 100) print("Testing PyTorch Linear CorEx ...") # load data resources = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'resources') data_file = os.path.join(resources, 'test_corex_data.npy') data = np.load(data_file) print("Data is loaded, shape = {}".format(data.shape)) # train linear corex lc_scores = [] for i in tqdm(range(5)): X = data[32 * i:32 * (i + 1)] lc = linearcorex.Corex(n_hidden=8, max_iter=500, verbose=0) lc.fit(X) covs = lc.get_covariance() cur_score = calculate_nll_score(data=[X], covs=[covs]) lc_scores.append(cur_score) # train pytorch corex pylc_scores = [] for i in tqdm(range(5)): X = data[32 * i:32 * (i + 1)] lc = Corex(nv=128, n_hidden=8, max_iter=1000, verbose=0) lc.fit(X) covs = lc.get_covariance() cur_score = calculate_nll_score(data=[X], covs=[covs]) pylc_scores.append(cur_score) lc_mean = np.mean(lc_scores) pylc_mean = np.mean(pylc_scores) print("pylc score: {:.4f}, lc score: {:.4f}".format(pylc_mean, lc_mean)) assert (pylc_mean - lc_mean) / (np.abs(lc_mean) + 1e-6) < 0.01
def select(self, train_data, val_data, params, verbose=True): if verbose: print( "\n{}\nSelecting the best parameter values for {} ...".format( '-' * 80, self.name)) best_score = 1e18 best_params = None best_covs = None best_method = None results = [] random_iters = None if '_random_iters' in params: random_iters = params['_random_iters'] del params['_random_iters'] const_params = dict() search_params = [] for k, v in params.items(): if isinstance(v, list): arr = [(k, x) for x in v] search_params.append(arr) elif isinstance(v, dict): arr = [] for param_k, param_v in v.items(): arr += list([(param_k, x) for x in param_v]) search_params.append(arr) else: const_params[k] = v # add a dummy variable if the grid is empty if len(search_params) == 0: search_params = [[('__dummy__', None)]] grid = list(itertools.product(*search_params)) if random_iters is not None: random.shuffle(grid) grid = grid[:random_iters] for index, cur_params in enumerate(grid): if verbose: print("done {} / {}".format(index, len(grid)), end='') print(" | running with ", end='') for k, v in cur_params: if k != '__dummy__': print('{}: {}\t'.format(k, v), end='') print('') cur_params = dict(cur_params) for k, v in const_params.items(): cur_params[k] = v # divide into buckets if needed try: if 'window' in cur_params: assert 'stride' in cur_params cur_window = cur_params.pop('window') cur_stride = cur_params.pop('stride') bucketed_train_data, index_to_bucket = make_buckets( train_data, cur_window, cur_stride) (cur_covs, cur_method) = self._train(bucketed_train_data, cur_params, verbose) if cur_covs is not None: cur_covs = [ cur_covs[index_to_bucket[i]] for i in range(len(train_data)) ] cur_params['window'] = cur_window cur_params['stride'] = cur_stride else: (cur_covs, cur_method) = self._train(train_data, cur_params, verbose) cur_score = calculate_nll_score(data=val_data, covs=cur_covs) except Exception as e: print("Failed to train and evaluate method: {}, message: {}". format(self.name, str(e))) cur_score = None cur_covs = None cur_method = None results.append((cur_params, cur_score)) if verbose: print('\tcurrent score: {}'.format(cur_score)) if (best_params is None) or (not np.isnan(cur_score) and cur_score < best_score): best_score = cur_score best_params = cur_params best_covs = cur_covs best_method = cur_method if verbose: print( '\nFinished with best validation score: {}'.format(best_score)) self._trained = True self._val_score = best_score self._params = best_params self._covs = best_covs self._method = best_method return best_score, best_params, best_covs, best_method, results
def __init__(self, covs, test_data, **kwargs): super(GroundTruth, self).__init__(**kwargs) self._score = calculate_nll_score(data=test_data, covs=covs) self._covs = covs self._trained = True