Exemple #1
0
def main():
    train_data, train_labels = load_data('spam_train.csv', start=1)
    test_data, test_labels = load_data('spam_test.csv', start=1)

    # train and test without normalization
    print "without normalization"
    for k in K:
        predicted = predict(test_data, train_data, train_labels, k)
        error = mse(test_labels, predicted)
        print "k=%d accr=%.3f" % (k, 1.0 - error)
    print

    # train and test with normalization
    print "with normalization"
    train_data = zscore(train_data)
    test_data = zscore(test_data)
    for k in K:
        predicted = predict(test_data, train_data, train_labels, k)
        error = mse(test_labels, predicted)
        print "k=%d accr=%.3f" % (k, 1.0 - error)
    print

    # report labels for first 50 data points
    print "labels for first 50 data points"
    test_data = test_data[:NUM_SAMPLES]
    predicted = np.zeros((len(K), NUM_SAMPLES))
    for i, k in enumerate(K):
        predicted[i] = predict(test_data, train_data, train_labels, k)
    print ' k= %s' % ' '.join(['%3d' % k for k in K])
    for i in range(NUM_SAMPLES):
        labels = ['yes' if y == 1 else 'no ' for y in predicted[:, i]]
        print '%2d: %s' % (i + 1, ' '.join(labels))
def match(ref_block, frame, last_match, R=8, trans=None):
    ref = (trans(ref_block)) if trans else ref_block
    last_match_top, last_match_left = last_match
    min_mad = float('inf')
    arg_min_mad = None
    mse_ = 0
    for i in range(-R, R + 1):
        if last_match_top + i < 0 \
                or last_match_top + i + BLOCK_SIZE >= frame.shape[0]:
            continue
        for j in range(-R, R + 1):
            if last_match_left + j < 0 \
                    or last_match_left + j + BLOCK_SIZE >= frame.shape[1]:
                continue
            block = frame[last_match_top + i:last_match_top + i + BLOCK_SIZE,
                          last_match_left + j:last_match_left + j + BLOCK_SIZE]
            if trans:
                m = mad(ref, trans(block))
            else:
                m = mad(ref_block, block)
            if m < min_mad:
                min_mad = m
                arg_min_mad = (last_match_top + i, last_match_left + j)
                mse_ = mse(ref_block, block)
    return arg_min_mad, mse_
Exemple #3
0
def curve(train_data, train_labels, test_data, test_labels, lagrange):
    avg_errors = np.zeros(train_data.shape[DATA_AXIS])
    for trial in range(10):
        indices = [i for i in range(train_data.shape[DATA_AXIS])]
        shuffle(indices)
        for num_samples in range(1, train_data.shape[DATA_AXIS] + 1):
            data = train_data[indices[:num_samples]]
            labels = train_labels[indices[:num_samples]]
            coefs = regress(data, labels, lagrange)
            predicted = predict(test_data, coefs)
            avg_errors[num_samples - 1] += mse(test_labels, predicted)
    return avg_errors / 10
Exemple #4
0
    def get_polynomial_log_likelihood(self, ys, tree):
        """Minus ABC distance instead of log p(ys | tree, xs) where xs is
            torch.linspace(-10, 10, 100). ABC distance is log(1 + mse).

        Args:
            ys: torch.tensor of shape [100]
            tree: list of lists or string

        Returns: -log(1 + mse(ys, eval(tree))); scalar tensor
        """

        return -torch.log(1 +
                          util.mse(ys, util.eval_polynomial(tree, self.xs)))
def plot_regression(name, train_file, test_file):
    stdout.write("Drawing plot for data set '%s'... " % name)
    stdout.flush()
    train_data, train_labels = load_data(train_file, dummy=1.0)
    test_data, test_labels = load_data(test_file, dummy=1.0)
    lagranges = [lagrange for lagrange in range(151)]
    train_errors = []
    test_errors = []
    log = open('logs/q1/%s.log' % name, 'w')

    # for each lagrange regress and calculate error
    for lagrange in lagranges:
        coefs = regress(train_data, train_labels, lagrange)
        predicted = predict(train_data, coefs)
        train_error = mse(train_labels, predicted)
        train_errors.append(train_error)
        predicted = predict(test_data, coefs)
        test_error = mse(test_labels, predicted)
        test_errors.append(test_error)
        message = 'lagrange=%d train_error=%.3f test_error=%.3f\n'
        log.write(message % (lagrange, train_error, test_error))

    # plot errors as a function of the lagrange
    pyplot.figure()
    pyplot.xlim(0, 150)
    pyplot.title("Data set '%s'" % name)
    pyplot.xlabel('Lagrange multiplier')
    pyplot.ylabel('Mean squared error')
    pyplot.plot(lagranges, train_errors, label="Training")
    pyplot.plot(lagranges, test_errors, label="Testing")
    pyplot.legend(loc='lower right')
    pyplot.savefig('plots/q1/%s.png' % name)
    stdout.write("done.\n")
    stdout.write("Plot image written to 'plots/q1/%s.png'.\n" % name)
    stdout.write("Plot data written to '%s'.\n" % log.name)
    stdout.flush()
    log.close()
Exemple #6
0
 def test_one_epoch(self, loader, epoch):
     self.G.eval()
     self.D.eval()
     test_loss = 0.0
     num_examples = 0
     imgs = []
     pred_labels = []
     labels = []
     for data in tqdm(loader):
         img, label = data
         img = img.to(self.device)
         label = label.to(self.device)
         pred_label = self.predict(img)
         loss = self.criterion(pred_label, label)
         batch_size = img.size(0)
         test_loss += loss.item() * batch_size
         num_examples += batch_size
         imgs.append(img.cpu().numpy())
         labels.append(label.cpu().numpy())
         pred_labels.append(pred_label.detach().cpu().numpy())
     img = np.concatenate(imgs, axis=0)
     label = np.concatenate(labels, axis=0)
     pred_label = np.concatenate(pred_labels, axis=0)
     log = {
         'loss': test_loss / num_examples,
         'img': img,
         'label': label,
         'pred_label': pred_label,
         'pp_r2': pp_r2(pred_label, label),
         'mse': mse(pred_label, label),
         'rmse': rmse(pred_label, label),
         'mae': mae(pred_label, label),
         'pp_mse': pp_mse(pred_label, label).tolist(),
         'pp_rmse': pp_rmse(pred_label, label).tolist(),
         'pp_mae': pp_mae(pred_label, label).tolist(),
     }
     log['avg_r2'] = np.mean(log['pp_r2'])
     self.logger.write(log, epoch=epoch, stage='test')
     if test_loss < self.best_test_loss:
         self.best_test_loss = test_loss
         self.save(os.path.join(self.exp_path, 'models', 'model.best.t7'))
     return log
def cross_validate(name, file):
    data, labels = load_data(file, dummy=1.0)
    log = open('logs/q3/%s.log' % name, 'w')
    stdout.write("Evaluating data set '%s'..." % name)
    stdout.flush()

    # split the data into folds
    indices = [i for i in range(data.shape[DATA_AXIS])]
    shuffle(indices)
    fold_size = ceil(float(data.shape[DATA_AXIS]) / NUM_FOLDS)

    # evaluate each lagrange
    best_error = maxint
    for lagrange in range(0, 151):
        avg_error = 0.0

        # try each fold average errors
        for i in range(NUM_FOLDS):
            low = int(i * fold_size)
            high = int((i + 1) * fold_size)
            train_indices = indices[:low] + indices[high:]
            test_indices = indices[low:high]
            coefs = regress(data[train_indices], labels[train_indices],
                            lagrange)
            predicted = predict(data[test_indices], coefs)
            error = mse(labels[test_indices], predicted)
            avg_error += error / NUM_FOLDS
            message = 'lagrange=%d fold=%d error=%.3f\n'
            log.write(message % (lagrange, i, error))

        # update best error and lagrange if result is better
        if avg_error < best_error:
            best_error = avg_error
            best_lagrange = lagrange

    # report the results
    stdout.write('done.\n')
    stdout.write('Best Lagrange value is %d.\n' % best_lagrange)
    stdout.write('Best error is %.3f.\n' % best_error)
    stdout.write("Logs written to '%s'.\n" % log.name)
    stdout.flush()
    log.close()
Exemple #8
0
 def train_one_epoch(self, loader, epoch):
     self.G.train()
     self.scheduler.step()
     train_loss = 0.0
     num_examples = 0
     pred_labels = []
     labels = []
     for data in tqdm(loader):
         img, label = data
         img = img.to(self.device)
         label = label.to(self.device)
         self.opt.zero_grad()
         pred_label = self.predict(img)
         pred_labels.append(pred_label.detach().cpu().numpy())
         labels.append(label.cpu().numpy())
         loss = self.criterion(pred_label*self.param_scale, label*self.param_scale) \
                + torch.sum((self.psf*(pred_label-label))**2)
         loss.backward()
         self.opt.step()
         batch_size = img.size(0)
         train_loss += loss.item() * batch_size
         num_examples += batch_size
     pred_label = np.concatenate(pred_labels, axis=0)
     label = np.concatenate(labels, axis=0)
     log = {
         'loss': train_loss / num_examples,
         'pp_r2': pp_r2(pred_label, label),
         'mse': mse(pred_label, label),
         'rmse': rmse(pred_label, label),
         'mae': mae(pred_label, label),
         'pp_mse': pp_mse(pred_label, label).tolist(),
         'pp_rmse': pp_rmse(pred_label, label).tolist(),
         'pp_mae': pp_mae(pred_label, label).tolist(),
     }
     log['avg_r2'] = np.mean(log['pp_r2'])
     self.logger.write(log, epoch=epoch)
     self.save(os.path.join(self.exp_path, 'models', 'model.%d.t7' % epoch))
     return log
Exemple #9
0
def cv(X, y, c=1, wts=None, nfolds=10):
    """
    Runs nfold cross-validation on the input data set. Uses ridge regression
    as the training algorithm.

    Parameters
    ----------
    X: array of shape = [n_samples, n_features]
        Input examples
    y: array of shape = [n_samples]
        labels
    c: positive real number
        regularization parameter
    wts: array of shape = [n_samples]
        example weights
    nfolds: scalar
        no. of folds in cross-validation

    Returns
    -------
    average mean squared error (cross-validation error)
    """

    kf = StratifiedKFold(sign(y), n_folds=nfolds)

    err = []
    for tr_ids, te_ids in kf:
        model = Ridge(alpha=c)

        if wts is not None:
            model.fit(X[tr_ids], y[tr_ids], sample_weight=wts[tr_ids])
        else:
            model.fit(X[tr_ids], y[tr_ids])

        f = model.predict(X[te_ids])
        err.append(util.mse(f, y[te_ids]))
    return mean(err)