def predict(num=100, each=100, prediction_file='predictions.csv', results_file='results.csv', max_length=3720, models=None): """Run `num` rounds of `each` predictions on the models""" print('Running {} prediction rounds of {} each'.format(num, each)) print("Total predictions: {}".format(num * each)) if not models: print("Loading models...") models = [ BayesianRidgeRegression(), LogisticRegression(), RandomForestClassifier(), SupportVectorClassifier() ] for m in default_models: print("Loading {!s}...".format(m)) m.load() predictions = pd.DataFrame(columns=['model','predicted_y','actual_y','correct']) results = pd.DataFrame(columns=['model','accuracy']) for i in range(num): print('----------------------------') print('Round: {} [of {}]'.format(i + 1, num)) (predictions, results) = _run_predictions(predictions, results, models, max_length, each) print('----------------------------') print('Saving predictions...') predictions.to_csv(prediction_file, index=False) results.to_csv(results_file, index=False) print('Finished predicting.') return (prediction_file, results_file)
from utils import load_data, optimizer, Accuracy np.random.seed(2020) # Data generation train_data, test_data = load_data('RedWine') x_train, y_train = train_data[0], train_data[1] x_test, y_test = test_data[0], test_data[1] # Hyper-parameter _epoch=1000 _batch_size=32 _lr = 0.001 _optim = 'SGD' # Build model model = LogisticRegression(num_features=x_train.shape[1]) optimizer = optimizer(_optim) # Solve print('Train start!') model.fit(x=x_train, y=y_train, epochs=_epoch, batch_size=_batch_size, lr=_lr, optim=optimizer) print('Trained done.') # Inference print('Predict on test data') inference = model.eval(x_test) # Assess model error = Accuracy(inference, y_test) print('Accuracy on Test Data : %.4f' % error)
_optim = 'SGD' _batch_size = 50 # ========================= EDIT HERE ======================== ''' Tuning hyper-parameters. Here, tune two kinds of hyper-parameters, # of epochs (_epoch) and learning_rate (_lr). ''' _epoch = 10000 _lr = 0.002 # ============================================================ # Build model model = LogisticRegression(num_features=x_new_data.shape[1]) optimizer = optimizer(_optim) # Solve print('Train start.') model.fit(x=x_new_data, y=y_train, epochs=_epoch, batch_size=_batch_size, lr=_lr, optim=optimizer) print('Trained done.') # Inference print('Predict on test data') inference = model.eval(feature_func_(x_test))
numConfigs = len(tradeoff) * len(rate) for tradeoffConstant in tradeoff: for r in rate: print "Evaluating configuration %i of %i. (%.2f%%)" % ( configCount, numConfigs, float(configCount) * 100 / float(numConfigs)) print "Tradeoff: %.4f r: %.2f" % (tradeoffConstant, r) configCount += 1 splitCount = 1 accuracies = [] for trainInds, evalInds in StratifiedKFold(n_splits=5, random_state=0).split(x_train, y_train): x_train_split = x_train[trainInds] y_train_split = y_train[trainInds] x_eval_split = x_train[evalInds] y_eval_split = y_train[evalInds] model = LogisticRegression(sigma=tradeoffConstant, r=r) model.fit(x_train_split, y_train_split) predictions = model.predict(x_eval_split) accuracy = float(np.sum(np.where(predictions == y_eval_split, 1, 0))) / float(len(y_eval_split)) accuracies.append(accuracy) print "Split %i of %i. Accuracy: %.2f" % (splitCount, 5, accuracy) splitCount += 1 averageAccuracy = np.mean(np.array(accuracies)) if averageAccuracy > bestParams["accuracy"]: bestParams["accuracy"] = averageAccuracy bestParams["params"]["sigma"] = tradeoffConstant bestParams["params"]["r"] = r print "Best params for %s:" % type(model) print bestParams["params"] print "Best Average Training Accuracy:"
def main(): global opt opt = parser.parse_args() use_gpu = torch.cuda.is_available() # Set up logging if opt.savepath == None: path = os.path.join('save', datetime.datetime.now().strftime("%d-%H-%M-%S")) else: path = opt.savepath os.makedirs(path, exist_ok=True) logger = utils.Logger(path) # Keep track of accuracies val_accuracies = [] test_accuracies = [] # Seed for cross-val split seed = random.randint(0,10000) if opt.seed < 0 else opt.seed logger.log('SEED: {}'.format(seed), stdout=False) # Load data if opt.preloaded_splits.lower() == 'none': start = time.time() data, label = get_data(opt.data, opt.label) logger.log('Data loaded in {:.1f}s\n'.format(time.time() - start)) else: data, label = np.zeros(5), np.zeros(5) # dummy labels for iterating over logger.log('Using preloaded splits\n') # Create cross-validation splits kf = StratifiedKFold(n_splits=5, random_state=seed, shuffle=True) # Cross validate for i, (train_index, test_index) in enumerate(kf.split(data, label)): # Log split logger.log('------------- SPLIT {} --------------\n'.format(i+1)) # Train / test split (ignored if opt.preloaded_splits is not 'none') X, X_test = data[train_index], data[test_index] y, y_test = label[train_index], label[test_index] # Perform PCA and generate dataloader or load from saved file start = time.time() apply_pca_transform = (opt.arch not in ['exp']) train_loader, val_loader, test_loader, pca_components, input_size, num_classes, pca_matrix = \ get_dataloader(opt.preloaded_splits, X, X_test, y, y_test, batch_size=opt.b, val_fraction=opt.val_fraction, pca_components=opt.pca_components, apply_pca_transform=apply_pca_transform, imputation_dim=opt.impute, split=i, save_dataset=(not opt.no_save_dataset)) logger.log('Dataloader loaded in {:.1f}s\n'.format(time.time() - start)) # Model arch = opt.arch.lower() assert arch in ['logreg', 'mlp', 'exp'] if arch == 'logreg': model = LogisticRegression(input_size, opt.pca_components, num_classes) elif arch == 'mlp': model = MLP(input_size, opt.hidden_size, num_classes, opt.dp) elif arch == 'exp': model = ExperimentalModel(input_size, opt.pca_components, opt.hidden_size, num_classes, opt.dp) # Pretrained / Initialization if opt.model is not None and os.path.isfile(opt.model): # Pretrained model model.load_state_dict(torch.load(opt.model)) logger.log('Loaded pretrained model.', stdout=(i==0)) else: # Initialize model uniformly for p in model.parameters(): p.data.uniform_(-0.1, 0.1) logger.log('Initialized model from scratch.', stdout=(i==0)) model = model.cuda() if use_gpu else model print(model) # Initialize first layer with PCA and fix PCA weights if model requires if opt.arch in ['exp']: model.first_layer.weight.data.copy_(pca_matrix) logger.log('Initialized first layer as PCA', stdout=(i==0)) if not opt.finetune_pca: model.first_layer.weight.requires_grad = False logger.log('Fixed PCA weights', stdout=(i==0)) # Loss function and optimizer criterion = nn.CrossEntropyLoss(size_average=False) optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr, weight_decay=opt.wd) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=opt.lr_decay_patience, factor=opt.lr_decay_factor, verbose=True, cooldown=opt.lr_decay_cooldown) # Log parameters logger.log('COMMAND LINE ARGS: ' + ' '.join(sys.argv), stdout=False) logger.log('ARGS: {}\nOPTIMIZER: {}\nLEARNING RATE: {}\nSCHEDULER: {}\nMODEL: {}\n'.format( opt, optimizer, opt.lr, vars(scheduler), model), stdout=False) # If specified, only evaluate model if opt.evaluate: assert opt.model != None, 'no pretrained model to evaluate' total_correct, total, _ = validate(model, val_loader, criterion) logger.log('Accuracy: {:.3f} \t Total correct: {} \t Total: {}'.format( total_correct/total, total_correct, total)) return # Train model start_time = time.time() best_acc = train(model, train_loader, val_loader, optimizer, criterion, logger, num_epochs=opt.epochs, print_freq=opt.print_freq, model_id=i) logger.log('Best train accuracy: {:.2f}% \t Finished split {} in {:.2f}s\n'.format( 100 * best_acc, i+1, time.time() - start_time)) val_accuracies.append(best_acc) # Best evaluation on validation set best_model_path = os.path.join(path, 'model_{}.pth'.format(i)) model.load_state_dict(torch.load(best_model_path)) # load best model total_correct, total, _ = validate(model, val_loader, criterion) # check val set logger.log('Val Accuracy: {:.3f} \t Total correct: {} \t Total: {}'.format( total_correct/total, total_correct, total)) # Optionally also evaluate on test set if opt.test: total_correct, total, visualize = validate(model, test_loader, criterion, visualize=True) # run test set logger.log('Test Accuracy: {:.3f} \t Total correct: {} \t Total: {}\n'.format( total_correct/total, total_correct, total)) logger.save_model(visualize, 'visualize_{}.pth'.format(i)) test_accuracies.append(total_correct/total) # Log after training logger.log('Val Accuracies: {}'.format(val_accuracies)) logger.log('Test Accuracies: {}'.format(test_accuracies)) logger.log('Run id: {} \t Test Accuracies: {}'.format(opt.id, test_accuracies))
## ---------------------------------------------------------------------------------------------------- # Logistic回归 算法测试用例 import numpy as np import math from models.LogisticRegression import LogisticRegression iris = datasets.load_iris() X = iris['data'] y = iris['target'] X = X[y!=2] y = y[y!=2] # 将学习率固定在 0.01 Logstic = LogisticRegression(X, y, threshold = 0.5) Logstic.fit(alpha = 0.01, accuracy = 0.001) print("epoch:", Logstic.epoch) print("theta:", Logstic.thetas) y_predict = Logstic.predict() y_predict # 使用自动控制的下降学习率 Logstic2 = LogisticRegression(X, y, threshold = 0.5) Logstic2.auto_fit(accuracy = 0.001) print("epoch:",Logstic2.epoch) print("theta:",Logstic2.thetas) y_predict = Logstic2.predict() y_predict ## ----------------------------------------------------------------------------------------------------