def main(train_path, test_path, num_examples, **kwargs): train_y, fold_indices = get_labels_and_fold_indices(cifar10 = True, stl10 = False) del fold_indices assert train_y is not None train_X = get_features(train_path, split = False) assert str(train_X.dtype) == 'float32' assert train_X.shape[0] == 50000 assert train_y.shape == (50000,) test_X = get_features(test_path, split = False) test_y = get_test_labels(cifar10 = True, stl10 = False) accs = [] while True: accs.append(run_experiment(train_X,train_y,test_X, test_y,num_examples,**kwargs)) v = np.asarray(accs) mn = v.mean() sd = v.std() print 'accuracy: %f +- %f' % (mn, sd)
def main(train_path, out_path, split, **kwargs): y_fine, y_coarse, fold_indices = get_labels_and_fold_indices() gc.collect() print 'loading training features' train_X = get_features(train_path, split) #assert train_X.flags.c_contiguous gc.collect() assert str(train_X.dtype) == 'float32' assert train_X.shape[0] == 120 assert y_fine.shape == (120,) assert y_coarse.shape == (120,) report = Report(train_path, split) gc.collect() print 'making omnivore classifiers' omnivore_classifiers = get_classifiers('omnivore',train_X,y_fine,y_coarse,fold_indices) print 'making fruit classifiers' fruit_classifiers = get_classifiers('fruit',train_X,y_fine,y_coarse,fold_indices) print 'loading cifar features' aux_features = get_features(train_path.replace('aux','train'), False) print 'loading cifar labels' aux_labels = CIFAR100(which_set='train').y_coarse print 'making masks' mask = np.zeros( aux_labels.shape, dtype='uint8') for label in [4,11,3,12,7,6]: mask += (aux_labels == label) print 'restricting classes' aux_features = aux_features[mask,:] aux_labels = aux_labels[mask] print 'downsampling data' aux_features = aux_features[0:300,:] aux_labels = aux_labels[0:300] print 'main train loop' model = train(fold_indices, omnivore_classifiers, fruit_classifiers, train_X, y_fine, y_coarse, aux_features, aux_labels, report, **kwargs) serial.save(out_path+'.model.pkl', model) report.write(out_path+'.validation_report.txt')
def main(model_path, test_path, output_path, dataset, split, **kwargs): model = serial.load(model_path) cifar100 = dataset == 'cifar100' cifar10 = dataset == 'cifar10' stl10 = dataset == 'stl10' assert cifar10 + cifar100 + stl10 == 1 y = get_test_labels(cifar10, cifar100, stl10) X = get_features(test_path, split, False) if stl10: num_examples = 8000 if cifar10 or cifar100: num_examples = 10000 if not X.shape[0] == num_examples: raise AssertionError('Expected %d examples but got %d' % (num_examples, X.shape[0])) assert y.shape[0] == num_examples test(model,X,y,output_path)
def main(train_path, out_path, split, **kwargs): print 'loading training features' train_X = get_features(train_path, split) #assert train_X.flags.c_contiguous gc.collect() assert str(train_X.dtype) == 'float32' assert train_X.shape[0] == 120 report = Report(train_path, split) train_X_omnivore, train_y, fold_indices = get_training_subset( train_X, 'omnivore') model = train(fold_indices, train_X_omnivore, train_y, report, **kwargs) serial.save(out_path + '.omnivore.model.pkl', model) report.write(out_path + '.omnivore.validation_report.txt') report = Report(train_path, split) train_X_fruit, train_y, fold_indices = get_training_subset( train_X, 'fruit') model = train(fold_indices, train_X_fruit, train_y, report, **kwargs) serial.save(out_path + '.fruit.model.pkl', model) report.write(out_path + '.fruit.validation_report.txt')
def main(train_path, out_path, dataset, standardize, C, **kwargs): stl10 = dataset == 'stl10' cifar10 = dataset == 'cifar10' cifar100 = dataset == 'cifar100' assert stl10 + cifar10 + cifar100 == 1 print('getting labels and oflds') train_y, fold_indices = get_labels_and_fold_indices(cifar10, cifar100, stl10) gc.collect() assert train_y is not None print('loading training features') train_X = get_features(train_path, split = False, standardize = standardize) assert str(train_X.dtype) == 'float32' if stl10: assert train_X.shape[0] == 5000 if cifar10 or cifar100: assert train_X.shape[0] == 50000 assert train_y.shape == (50000,) print('training model') model = train(train_X, train_y, C) print('saving model') serial.save(out_path, model)
def main(train_path, out_path, dataset, standardize, C, **kwargs): stl10 = dataset == 'stl10' cifar10 = dataset == 'cifar10' cifar100 = dataset == 'cifar100' assert stl10 + cifar10 + cifar100 == 1 print 'getting labels and oflds' train_y, fold_indices = get_labels_and_fold_indices( cifar10, cifar100, stl10) gc.collect() assert train_y is not None print 'loading training features' train_X = get_features(train_path, split=False, standardize=standardize) assert str(train_X.dtype) == 'float32' if stl10: assert train_X.shape[0] == 5000 if cifar10 or cifar100: assert train_X.shape[0] == 50000 assert train_y.shape == (50000, ) print 'training model' model = train(train_X, train_y, C) print 'saving model' serial.save(out_path, model)
def main(train_path, out_path, split, **kwargs): print 'loading training features' train_X = get_features(train_path, split) #assert train_X.flags.c_contiguous gc.collect() assert str(train_X.dtype) == 'float32' assert train_X.shape[0] == 120 report = Report(train_path, split) train_X_omnivore, train_y, fold_indices = get_training_subset(train_X, 'omnivore') model = train(fold_indices, train_X_omnivore, train_y, report, **kwargs) serial.save(out_path+'.omnivore.model.pkl', model) report.write(out_path+'.omnivore.validation_report.txt') report = Report(train_path, split) train_X_fruit, train_y, fold_indices = get_training_subset(train_X, 'fruit') model = train(fold_indices, train_X_fruit, train_y, report, **kwargs) serial.save(out_path+'.fruit.model.pkl', model) report.write(out_path+'.fruit.validation_report.txt')
def main(train_path, out_path, split, **kwargs): y_fine, y_coarse, fold_indices = get_labels_and_fold_indices() gc.collect() print 'loading training features' train_X = get_features(train_path, split) #assert train_X.flags.c_contiguous gc.collect() assert str(train_X.dtype) == 'float32' assert train_X.shape[0] == 120 assert y_fine.shape == (120, ) assert y_coarse.shape == (120, ) report = Report(train_path, split) gc.collect() print 'making omnivore classifiers' omnivore_classifiers = get_classifiers('omnivore', train_X, y_fine, y_coarse, fold_indices) print 'making fruit classifiers' fruit_classifiers = get_classifiers('fruit', train_X, y_fine, y_coarse, fold_indices) model = train(fold_indices, omnivore_classifiers, fruit_classifiers, train_X, y_fine, y_coarse, report, **kwargs) serial.save(out_path + '.model.pkl', model) report.write(out_path + '.validation_report.txt')
def main(model_path, data_path, split, **kwargs): model = serial.load(model_path) raw_dataset = get_test_data() X = get_features(data_path, split, False) assert X.shape[0] == 8000 size = 100 for start in xrange(0,X.shape[0]-size,size): y = raw_dataset.y[start:start+size] pred_y = model.predict(X[start:start+size,:]) wrong_mask = y != pred_y raw_X = raw_dataset.X[start:start+size,:] pv = make_viewer(raw_X / 127.5, rescale = False, is_color = True, activation = wrong_mask ) pv.show() right = 0 for i in xrange(y.shape[0]): if y[i] == pred_y[i]: right += 1 print str(start+i)+': correct ('+raw_dataset.class_names[y[i]-1]+')' else: print str(start+i)+': mistook '+raw_dataset.class_names[y[i]-1]+' for '+raw_dataset.class_names[pred_y[i]-1] print 'accuracy this batch : ',float(right)/float(size) x = raw_input() if x == 'q': break
def main(model_path, data_path, split, **kwargs): model = serial.load(model_path) raw_dataset = get_test_data() X = get_features(data_path, split, False) assert X.shape[0] == 8000 size = 25 for start in xrange(0,X.shape[0]-size,size): y = raw_dataset.y[start:start+size] pred_y = model.predict(X[start:start+size,:]) wrong_mask = y != pred_y raw_X = raw_dataset.X[start:start+size,:] pv = make_viewer(raw_X / 127.5, rescale = False, is_color = True, activation = wrong_mask ) pv.show() right = 0 for i in xrange(y.shape[0]): if y[i] == pred_y[i]: right += 1 print str(start+i)+': correct ('+raw_dataset.class_names[y[i]-1]+')' else: print str(start+i)+': mistook '+raw_dataset.class_names[y[i]-1]+' for '+raw_dataset.class_names[pred_y[i]-1] print 'accuracy this batch : ',float(right)/float(size) x = raw_input() if x == 'q': break
def main(train_path, out_path, split, **kwargs): if mem: print 'mem usage before getting labels and folds '+str(mem.usage()) train_y, test_y = get_labels() if mem: print 'mem usage after getting labels and folds '+str(mem.usage()) gc.collect() assert train_y is not None print 'loading training features' if mem: print 'mem usage before getting features '+str(mem.usage()) train_X = get_features(train_path.replace('aux','train'),split) test_X = get_features(train_path, split) #assert train_X.flags.c_contiguous gc.collect() if mem: print 'mem usage after getting features '+str(mem.usage()) assert train_X.shape[0] == 50000 assert train_y.shape == (50000,) assert str(train_X.dtype) == 'float32' assert test_X.shape[0] == 120 assert test_y.shape == (120,) report = Report(train_path, split) gc.collect() if mem: print 'mem usage before calling train: '+str(mem.usage()) model = train(train_X, train_y, test_X, test_y, report, **kwargs) serial.save(out_path+'.model.pkl', model) report.write(out_path+'.validation_report.txt')
def main(train_path, out_path, split, dataset, standardize, **kwargs): stl10 = dataset == 'stl10' cifar10 = dataset == 'cifar10' cifar100 = dataset == 'cifar100' assert stl10 + cifar10 + cifar100 == 1 if mem: print 'mem usage before getting labels and folds '+str(mem.usage()) train_y, fold_indices = get_labels_and_fold_indices(cifar10, cifar100, stl10) if mem: print 'mem usage after getting labels and folds '+str(mem.usage()) gc.collect() assert train_y is not None print 'loading training features' if mem: print 'mem usage before getting features '+str(mem.usage()) train_X = get_features(train_path, split, standardize) if not train_X.flags.c_contiguous: print 'not C contiguous, reshaping' assert len(train_X.shape) == 2 train_X = np.ascontiguousarray(train_X) assert train_X.flags.c_contiguous print 'success, contiguous now' gc.collect() if mem: print 'mem usage after getting features '+str(mem.usage()) if str(train_X.dtype) != 'float32': warnings.warn('Your features are not float32, you may be wasting memory') if stl10: assert train_X.shape[0] == 5000 if cifar10 or cifar100: assert train_X.shape[0] == 50000 assert train_y.shape == (50000,) report = Report(train_path, split, stl10, cifar10, cifar100) gc.collect() if mem: print 'mem usage before calling train: '+str(mem.usage()) model = train(fold_indices, train_X, train_y, report, **kwargs) serial.save(out_path+'.model.pkl', model) report.write(out_path+'.validation_report.txt')
def main(train_path, out_path, split, **kwargs): if mem: print 'mem usage before getting labels and folds ' + str(mem.usage()) train_y, test_y = get_labels() if mem: print 'mem usage after getting labels and folds ' + str(mem.usage()) gc.collect() assert train_y is not None print 'loading training features' if mem: print 'mem usage before getting features ' + str(mem.usage()) train_X = get_features(train_path.replace('aux', 'train'), split) test_X = get_features(train_path, split) #assert train_X.flags.c_contiguous gc.collect() if mem: print 'mem usage after getting features ' + str(mem.usage()) assert train_X.shape[0] == 50000 assert train_y.shape == (50000, ) assert str(train_X.dtype) == 'float32' assert test_X.shape[0] == 120 assert test_y.shape == (120, ) report = Report(train_path, split) gc.collect() if mem: print 'mem usage before calling train: ' + str(mem.usage()) model = train(train_X, train_y, test_X, test_y, report, **kwargs) serial.save(out_path + '.model.pkl', model) report.write(out_path + '.validation_report.txt')
def main(train_path, out_path, dataset, standardize, fold, C, log, **kwargs): log.write('in main\n') log.flush() stl10 = dataset == 'stl10' cifar10 = dataset == 'cifar10' cifar100 = dataset == 'cifar100' assert stl10 + cifar10 + cifar100 == 1 print 'getting labels and oflds' if mem: print 'mem usage before getting labels and folds '+str(mem.usage()) train_y, fold_indices = get_labels_and_fold_indices(cifar10, cifar100, stl10) if mem: print 'mem usage after getting labels and folds '+str(mem.usage()) gc.collect() assert train_y is not None log.write('got labels and folds') log.flush() print 'loading training features' train_X = get_features(train_path, split = False, standardize = standardize) log.write('got features') log.flush() assert str(train_X.dtype) == 'float32' if stl10: assert train_X.shape[0] == 5000 if cifar10 or cifar100: assert train_X.shape[0] == 50000 assert train_y.shape == (50000,) print 'running validate' acc = validate(train_X, train_y, fold_indices[fold,:], C, log, **kwargs) report = open(out_path, 'w') report.write('C\tfold\tvalidation accuracy\n%f\t%d\t%f\n' % (C, fold, acc)) report.close()
def main(train_path, out_path, split, dataset, standardize, **kwargs): stl10 = dataset == 'stl10' cifar10 = dataset == 'cifar10' cifar100 = dataset == 'cifar100' assert stl10 + cifar10 + cifar100 == 1 if mem: print 'mem usage before getting labels and folds '+str(mem.usage()) train_y, fold_indices = get_labels_and_fold_indices(cifar10, cifar100, stl10) if mem: print 'mem usage after getting labels and folds '+str(mem.usage()) gc.collect() assert train_y is not None print 'loading training features' if mem: print 'mem usage before getting features '+str(mem.usage()) train_X = get_features(train_path, split, standardize) #assert train_X.flags.c_contiguous gc.collect() if mem: print 'mem usage after getting features '+str(mem.usage()) assert str(train_X.dtype) == 'float32' if stl10: assert train_X.shape[0] == 5000 if cifar10 or cifar100: assert train_X.shape[0] == 50000 assert train_y.shape == (50000,) #report = Report(train_path, split, stl10, cifar10, cifar100) gc.collect() if mem: print 'mem usage before calling train: '+str(mem.usage()) models = train(fold_indices, train_X, train_y, **kwargs) serial.save(out_path+'.models.pkl', models)
def main(train_path, out_path, dataset, standardize, fold, C, log, **kwargs): log.write('in main\n') log.flush() stl10 = dataset == 'stl10' cifar10 = dataset == 'cifar10' cifar100 = dataset == 'cifar100' assert stl10 + cifar10 + cifar100 == 1 print 'getting labels and oflds' if mem: print 'mem usage before getting labels and folds ' + str(mem.usage()) train_y, fold_indices = get_labels_and_fold_indices( cifar10, cifar100, stl10) if mem: print 'mem usage after getting labels and folds ' + str(mem.usage()) gc.collect() assert train_y is not None log.write('got labels and folds') log.flush() print 'loading training features' train_X = get_features(train_path, split=False, standardize=standardize) log.write('got features') log.flush() assert str(train_X.dtype) == 'float32' if stl10: assert train_X.shape[0] == 5000 if cifar10 or cifar100: assert train_X.shape[0] == 50000 assert train_y.shape == (50000, ) print 'running validate' acc = validate(train_X, train_y, fold_indices[fold, :], C, log, **kwargs) report = open(out_path, 'w') assert fold is not None assert C is not None assert acc is not None report.write('C\tfold\tvalidation accuracy\n%f\t%d\t%f\n' % (C, fold, acc)) report.close()
def main(model_path, test_path, dataset, **kwargs): model = serial.load(model_path) cifar100 = dataset == 'cifar100' cifar10 = dataset == 'cifar10' stl10 = dataset == 'stl10' assert cifar10 + cifar100 + stl10 == 1 y = get_test_labels(cifar10, cifar100, stl10) X = get_features(test_path, False, False) if stl10: num_examples = 8000 if cifar10 or cifar100: num_examples = 10000 if not X.shape[0] == num_examples: raise AssertionError('Expected %d examples but got %d' % (num_examples, X.shape[0])) assert y.shape[0] == num_examples test(model, X, y)
def main(train_path, out_path, dataset, standardize, C, **kwargs): stl10 = dataset == 'stl10' cifar10 = dataset == 'cifar10' cifar100 = dataset == 'cifar100' assert stl10 + cifar10 + cifar100 == 1 print 'getting labels and oflds' if mem: print 'mem usage before getting labels and folds '+str(mem.usage()) train_y, fold_indices = get_labels_and_fold_indices(cifar10, cifar100, stl10) if mem: print 'mem usage after getting labels and folds '+str(mem.usage()) gc.collect() assert train_y is not None print 'loading training features' train_X = get_features(train_path, split = False, standardize = standardize) assert str(train_X.dtype) == 'float32' if stl10: assert train_X.shape[0] == 5000 if cifar10 or cifar100: assert train_X.shape[0] == 50000 assert train_y.shape == (50000,) model = train_model(train_X, train_y, C, **kwargs) serial.save(out_path, model)
def main(train_path, out_path, split, **kwargs): y_fine, y_coarse, fold_indices = get_labels_and_fold_indices() gc.collect() print 'loading training features' train_X = get_features(train_path, split) #assert train_X.flags.c_contiguous gc.collect() assert str(train_X.dtype) == 'float32' assert train_X.shape[0] == 120 assert y_fine.shape == (120,) assert y_coarse.shape == (120,) report = Report(train_path, split) gc.collect() print 'making omnivore classifiers' omnivore_classifiers = get_classifiers('omnivore',train_X,y_fine,y_coarse,fold_indices) print 'making fruit classifiers' fruit_classifiers = get_classifiers('fruit',train_X,y_fine,y_coarse,fold_indices) model = train(fold_indices, omnivore_classifiers, fruit_classifiers, train_X, y_fine, y_coarse, report, **kwargs) serial.save(out_path+'.model.pkl', model) report.write(out_path+'.validation_report.txt')