Esempio n. 1
0
def test_tcorex_real_data():
    r""" Test pytorch implementation of T-CorEx on a real-world dataset.
    """
    print("=" * 100)
    print("Testing PyTorch T-CorEx on a real-world dataset ...")
    resources = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             'resources')
    data_file = os.path.join(resources, 'test_tcorex_real_data.npy')
    data = np.load(data_file)
    print("Data is loaded, shape = {}".format(data.shape))
    train_data = data[:, :40, :]
    test_data = data[:, 40:, :]

    scores = []
    for _ in tqdm(range(5)):
        tc = TCorex(n_hidden=8,
                    nv=train_data.shape[-1],
                    nt=train_data.shape[0],
                    max_iter=500,
                    anneal=True,
                    l1=0.3,
                    gamma=0.4,
                    reg_type='W',
                    init=True,
                    device='cpu',
                    verbose=1)
        tc.fit(train_data)
        covs = tc.get_covariance()
        cur_score = calculate_nll_score(data=test_data, covs=covs)
        scores.append(cur_score)

    score_mean = np.mean(scores)
    need_score = 396.1597
    print("score: {:.4f}, need score: {:.4f}".format(score_mean, need_score))
    assert (score_mean - need_score) / need_score < 0.01
Esempio n. 2
0
 def evaluate(self, test_data, verbose=True):
     assert self._trained
     if verbose:
         print("Evaluating {} ...".format(self.name))
     nll = calculate_nll_score(data=test_data, covs=self._covs)
     if verbose:
         print("\tScore: {:.4f}".format(nll))
     return nll
Esempio n. 3
0
def test_corex():
    r""" Test pytorch linear CorEx implementation.
    Check if the performance of pytorch CorEx matches that of standard CorEx.
    """
    print("=" * 100)
    print("Testing PyTorch Linear CorEx ...")

    # load data
    resources = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             'resources')
    data_file = os.path.join(resources, 'test_corex_data.npy')
    data = np.load(data_file)
    print("Data is loaded, shape = {}".format(data.shape))

    # train linear corex
    lc_scores = []
    for i in tqdm(range(5)):
        X = data[32 * i:32 * (i + 1)]
        lc = linearcorex.Corex(n_hidden=8, max_iter=500, verbose=0)
        lc.fit(X)
        covs = lc.get_covariance()
        cur_score = calculate_nll_score(data=[X], covs=[covs])
        lc_scores.append(cur_score)

    # train pytorch corex
    pylc_scores = []
    for i in tqdm(range(5)):
        X = data[32 * i:32 * (i + 1)]
        lc = Corex(nv=128, n_hidden=8, max_iter=1000, verbose=0)
        lc.fit(X)
        covs = lc.get_covariance()
        cur_score = calculate_nll_score(data=[X], covs=[covs])
        pylc_scores.append(cur_score)

    lc_mean = np.mean(lc_scores)
    pylc_mean = np.mean(pylc_scores)
    print("pylc score: {:.4f}, lc score: {:.4f}".format(pylc_mean, lc_mean))
    assert (pylc_mean - lc_mean) / (np.abs(lc_mean) + 1e-6) < 0.01
Esempio n. 4
0
    def select(self, train_data, val_data, params, verbose=True):
        if verbose:
            print(
                "\n{}\nSelecting the best parameter values for {} ...".format(
                    '-' * 80, self.name))

        best_score = 1e18
        best_params = None
        best_covs = None
        best_method = None
        results = []

        random_iters = None
        if '_random_iters' in params:
            random_iters = params['_random_iters']
            del params['_random_iters']

        const_params = dict()
        search_params = []
        for k, v in params.items():
            if isinstance(v, list):
                arr = [(k, x) for x in v]
                search_params.append(arr)
            elif isinstance(v, dict):
                arr = []
                for param_k, param_v in v.items():
                    arr += list([(param_k, x) for x in param_v])
                search_params.append(arr)
            else:
                const_params[k] = v

        # add a dummy variable if the grid is empty
        if len(search_params) == 0:
            search_params = [[('__dummy__', None)]]

        grid = list(itertools.product(*search_params))
        if random_iters is not None:
            random.shuffle(grid)
            grid = grid[:random_iters]

        for index, cur_params in enumerate(grid):
            if verbose:
                print("done {} / {}".format(index, len(grid)), end='')
                print(" | running with ", end='')
                for k, v in cur_params:
                    if k != '__dummy__':
                        print('{}: {}\t'.format(k, v), end='')
                print('')

            cur_params = dict(cur_params)
            for k, v in const_params.items():
                cur_params[k] = v

            # divide into buckets if needed
            try:
                if 'window' in cur_params:
                    assert 'stride' in cur_params
                    cur_window = cur_params.pop('window')
                    cur_stride = cur_params.pop('stride')
                    bucketed_train_data, index_to_bucket = make_buckets(
                        train_data, cur_window, cur_stride)
                    (cur_covs,
                     cur_method) = self._train(bucketed_train_data, cur_params,
                                               verbose)
                    if cur_covs is not None:
                        cur_covs = [
                            cur_covs[index_to_bucket[i]]
                            for i in range(len(train_data))
                        ]
                    cur_params['window'] = cur_window
                    cur_params['stride'] = cur_stride
                else:
                    (cur_covs,
                     cur_method) = self._train(train_data, cur_params, verbose)
                cur_score = calculate_nll_score(data=val_data, covs=cur_covs)
            except Exception as e:
                print("Failed to train and evaluate method: {}, message: {}".
                      format(self.name, str(e)))
                cur_score = None
                cur_covs = None
                cur_method = None
            results.append((cur_params, cur_score))

            if verbose:
                print('\tcurrent score: {}'.format(cur_score))

            if (best_params is None) or (not np.isnan(cur_score)
                                         and cur_score < best_score):
                best_score = cur_score
                best_params = cur_params
                best_covs = cur_covs
                best_method = cur_method
        if verbose:
            print(
                '\nFinished with best validation score: {}'.format(best_score))

        self._trained = True
        self._val_score = best_score
        self._params = best_params
        self._covs = best_covs
        self._method = best_method

        return best_score, best_params, best_covs, best_method, results
Esempio n. 5
0
 def __init__(self, covs, test_data, **kwargs):
     super(GroundTruth, self).__init__(**kwargs)
     self._score = calculate_nll_score(data=test_data, covs=covs)
     self._covs = covs
     self._trained = True