def main(): train_data, train_labels = load_data('spam_train.csv', start=1) test_data, test_labels = load_data('spam_test.csv', start=1) # train and test without normalization print "without normalization" for k in K: predicted = predict(test_data, train_data, train_labels, k) error = mse(test_labels, predicted) print "k=%d accr=%.3f" % (k, 1.0 - error) print # train and test with normalization print "with normalization" train_data = zscore(train_data) test_data = zscore(test_data) for k in K: predicted = predict(test_data, train_data, train_labels, k) error = mse(test_labels, predicted) print "k=%d accr=%.3f" % (k, 1.0 - error) print # report labels for first 50 data points print "labels for first 50 data points" test_data = test_data[:NUM_SAMPLES] predicted = np.zeros((len(K), NUM_SAMPLES)) for i, k in enumerate(K): predicted[i] = predict(test_data, train_data, train_labels, k) print ' k= %s' % ' '.join(['%3d' % k for k in K]) for i in range(NUM_SAMPLES): labels = ['yes' if y == 1 else 'no ' for y in predicted[:, i]] print '%2d: %s' % (i + 1, ' '.join(labels))
def match(ref_block, frame, last_match, R=8, trans=None): ref = (trans(ref_block)) if trans else ref_block last_match_top, last_match_left = last_match min_mad = float('inf') arg_min_mad = None mse_ = 0 for i in range(-R, R + 1): if last_match_top + i < 0 \ or last_match_top + i + BLOCK_SIZE >= frame.shape[0]: continue for j in range(-R, R + 1): if last_match_left + j < 0 \ or last_match_left + j + BLOCK_SIZE >= frame.shape[1]: continue block = frame[last_match_top + i:last_match_top + i + BLOCK_SIZE, last_match_left + j:last_match_left + j + BLOCK_SIZE] if trans: m = mad(ref, trans(block)) else: m = mad(ref_block, block) if m < min_mad: min_mad = m arg_min_mad = (last_match_top + i, last_match_left + j) mse_ = mse(ref_block, block) return arg_min_mad, mse_
def curve(train_data, train_labels, test_data, test_labels, lagrange): avg_errors = np.zeros(train_data.shape[DATA_AXIS]) for trial in range(10): indices = [i for i in range(train_data.shape[DATA_AXIS])] shuffle(indices) for num_samples in range(1, train_data.shape[DATA_AXIS] + 1): data = train_data[indices[:num_samples]] labels = train_labels[indices[:num_samples]] coefs = regress(data, labels, lagrange) predicted = predict(test_data, coefs) avg_errors[num_samples - 1] += mse(test_labels, predicted) return avg_errors / 10
def get_polynomial_log_likelihood(self, ys, tree): """Minus ABC distance instead of log p(ys | tree, xs) where xs is torch.linspace(-10, 10, 100). ABC distance is log(1 + mse). Args: ys: torch.tensor of shape [100] tree: list of lists or string Returns: -log(1 + mse(ys, eval(tree))); scalar tensor """ return -torch.log(1 + util.mse(ys, util.eval_polynomial(tree, self.xs)))
def plot_regression(name, train_file, test_file): stdout.write("Drawing plot for data set '%s'... " % name) stdout.flush() train_data, train_labels = load_data(train_file, dummy=1.0) test_data, test_labels = load_data(test_file, dummy=1.0) lagranges = [lagrange for lagrange in range(151)] train_errors = [] test_errors = [] log = open('logs/q1/%s.log' % name, 'w') # for each lagrange regress and calculate error for lagrange in lagranges: coefs = regress(train_data, train_labels, lagrange) predicted = predict(train_data, coefs) train_error = mse(train_labels, predicted) train_errors.append(train_error) predicted = predict(test_data, coefs) test_error = mse(test_labels, predicted) test_errors.append(test_error) message = 'lagrange=%d train_error=%.3f test_error=%.3f\n' log.write(message % (lagrange, train_error, test_error)) # plot errors as a function of the lagrange pyplot.figure() pyplot.xlim(0, 150) pyplot.title("Data set '%s'" % name) pyplot.xlabel('Lagrange multiplier') pyplot.ylabel('Mean squared error') pyplot.plot(lagranges, train_errors, label="Training") pyplot.plot(lagranges, test_errors, label="Testing") pyplot.legend(loc='lower right') pyplot.savefig('plots/q1/%s.png' % name) stdout.write("done.\n") stdout.write("Plot image written to 'plots/q1/%s.png'.\n" % name) stdout.write("Plot data written to '%s'.\n" % log.name) stdout.flush() log.close()
def test_one_epoch(self, loader, epoch): self.G.eval() self.D.eval() test_loss = 0.0 num_examples = 0 imgs = [] pred_labels = [] labels = [] for data in tqdm(loader): img, label = data img = img.to(self.device) label = label.to(self.device) pred_label = self.predict(img) loss = self.criterion(pred_label, label) batch_size = img.size(0) test_loss += loss.item() * batch_size num_examples += batch_size imgs.append(img.cpu().numpy()) labels.append(label.cpu().numpy()) pred_labels.append(pred_label.detach().cpu().numpy()) img = np.concatenate(imgs, axis=0) label = np.concatenate(labels, axis=0) pred_label = np.concatenate(pred_labels, axis=0) log = { 'loss': test_loss / num_examples, 'img': img, 'label': label, 'pred_label': pred_label, 'pp_r2': pp_r2(pred_label, label), 'mse': mse(pred_label, label), 'rmse': rmse(pred_label, label), 'mae': mae(pred_label, label), 'pp_mse': pp_mse(pred_label, label).tolist(), 'pp_rmse': pp_rmse(pred_label, label).tolist(), 'pp_mae': pp_mae(pred_label, label).tolist(), } log['avg_r2'] = np.mean(log['pp_r2']) self.logger.write(log, epoch=epoch, stage='test') if test_loss < self.best_test_loss: self.best_test_loss = test_loss self.save(os.path.join(self.exp_path, 'models', 'model.best.t7')) return log
def cross_validate(name, file): data, labels = load_data(file, dummy=1.0) log = open('logs/q3/%s.log' % name, 'w') stdout.write("Evaluating data set '%s'..." % name) stdout.flush() # split the data into folds indices = [i for i in range(data.shape[DATA_AXIS])] shuffle(indices) fold_size = ceil(float(data.shape[DATA_AXIS]) / NUM_FOLDS) # evaluate each lagrange best_error = maxint for lagrange in range(0, 151): avg_error = 0.0 # try each fold average errors for i in range(NUM_FOLDS): low = int(i * fold_size) high = int((i + 1) * fold_size) train_indices = indices[:low] + indices[high:] test_indices = indices[low:high] coefs = regress(data[train_indices], labels[train_indices], lagrange) predicted = predict(data[test_indices], coefs) error = mse(labels[test_indices], predicted) avg_error += error / NUM_FOLDS message = 'lagrange=%d fold=%d error=%.3f\n' log.write(message % (lagrange, i, error)) # update best error and lagrange if result is better if avg_error < best_error: best_error = avg_error best_lagrange = lagrange # report the results stdout.write('done.\n') stdout.write('Best Lagrange value is %d.\n' % best_lagrange) stdout.write('Best error is %.3f.\n' % best_error) stdout.write("Logs written to '%s'.\n" % log.name) stdout.flush() log.close()
def train_one_epoch(self, loader, epoch): self.G.train() self.scheduler.step() train_loss = 0.0 num_examples = 0 pred_labels = [] labels = [] for data in tqdm(loader): img, label = data img = img.to(self.device) label = label.to(self.device) self.opt.zero_grad() pred_label = self.predict(img) pred_labels.append(pred_label.detach().cpu().numpy()) labels.append(label.cpu().numpy()) loss = self.criterion(pred_label*self.param_scale, label*self.param_scale) \ + torch.sum((self.psf*(pred_label-label))**2) loss.backward() self.opt.step() batch_size = img.size(0) train_loss += loss.item() * batch_size num_examples += batch_size pred_label = np.concatenate(pred_labels, axis=0) label = np.concatenate(labels, axis=0) log = { 'loss': train_loss / num_examples, 'pp_r2': pp_r2(pred_label, label), 'mse': mse(pred_label, label), 'rmse': rmse(pred_label, label), 'mae': mae(pred_label, label), 'pp_mse': pp_mse(pred_label, label).tolist(), 'pp_rmse': pp_rmse(pred_label, label).tolist(), 'pp_mae': pp_mae(pred_label, label).tolist(), } log['avg_r2'] = np.mean(log['pp_r2']) self.logger.write(log, epoch=epoch) self.save(os.path.join(self.exp_path, 'models', 'model.%d.t7' % epoch)) return log
def cv(X, y, c=1, wts=None, nfolds=10): """ Runs nfold cross-validation on the input data set. Uses ridge regression as the training algorithm. Parameters ---------- X: array of shape = [n_samples, n_features] Input examples y: array of shape = [n_samples] labels c: positive real number regularization parameter wts: array of shape = [n_samples] example weights nfolds: scalar no. of folds in cross-validation Returns ------- average mean squared error (cross-validation error) """ kf = StratifiedKFold(sign(y), n_folds=nfolds) err = [] for tr_ids, te_ids in kf: model = Ridge(alpha=c) if wts is not None: model.fit(X[tr_ids], y[tr_ids], sample_weight=wts[tr_ids]) else: model.fit(X[tr_ids], y[tr_ids]) f = model.predict(X[te_ids]) err.append(util.mse(f, y[te_ids])) return mean(err)