Ejemplo n.º 1
0
def predict(num=100, each=100, prediction_file='predictions.csv', results_file='results.csv', max_length=3720, models=None):
    """Run `num` rounds of `each` predictions on the models"""
    print('Running {} prediction rounds of {} each'.format(num, each))
    print("Total predictions: {}".format(num * each))

    if not models:
        print("Loading models...")
        models = [
            BayesianRidgeRegression(),
            LogisticRegression(),
            RandomForestClassifier(),
            SupportVectorClassifier()
        ]
        for m in default_models:
            print("Loading {!s}...".format(m))
            m.load()

    predictions = pd.DataFrame(columns=['model','predicted_y','actual_y','correct'])
    results = pd.DataFrame(columns=['model','accuracy'])
    for i in range(num):
        print('----------------------------')
        print('Round: {} [of {}]'.format(i + 1, num))
        (predictions, results) = _run_predictions(predictions, results, models, max_length, each)

    print('----------------------------')
    print('Saving predictions...')
    predictions.to_csv(prediction_file, index=False)
    results.to_csv(results_file, index=False)

    print('Finished predicting.')
    return (prediction_file, results_file)
Ejemplo n.º 2
0
from utils import load_data, optimizer, Accuracy

np.random.seed(2020)

# Data generation
train_data, test_data = load_data('RedWine')
x_train, y_train = train_data[0], train_data[1]
x_test, y_test = test_data[0], test_data[1]

# Hyper-parameter
_epoch=1000
_batch_size=32
_lr = 0.001
_optim = 'SGD'

# Build model
model = LogisticRegression(num_features=x_train.shape[1])
optimizer = optimizer(_optim)

# Solve
print('Train start!')
model.fit(x=x_train, y=y_train, epochs=_epoch, batch_size=_batch_size, lr=_lr, optim=optimizer)
print('Trained done.')

# Inference
print('Predict on test data')
inference = model.eval(x_test)

# Assess model
error = Accuracy(inference, y_test)
print('Accuracy on Test Data : %.4f' % error)
Ejemplo n.º 3
0
numConfigs = len(tradeoff) * len(rate)
for tradeoffConstant in tradeoff:
    for r in rate:
        print "Evaluating configuration %i of %i. (%.2f%%)" % (
        configCount, numConfigs, float(configCount) * 100 / float(numConfigs))
        print "Tradeoff: %.4f      r: %.2f" % (tradeoffConstant, r)
        configCount += 1
        splitCount = 1
        accuracies = []
        for trainInds, evalInds in StratifiedKFold(n_splits=5, random_state=0).split(x_train, y_train):
            x_train_split = x_train[trainInds]
            y_train_split = y_train[trainInds]
            x_eval_split = x_train[evalInds]
            y_eval_split = y_train[evalInds]

            model = LogisticRegression(sigma=tradeoffConstant, r=r)
            model.fit(x_train_split, y_train_split)
            predictions = model.predict(x_eval_split)
            accuracy = float(np.sum(np.where(predictions == y_eval_split, 1, 0))) / float(len(y_eval_split))
            accuracies.append(accuracy)
            print "Split %i of %i. Accuracy: %.2f" % (splitCount, 5, accuracy)
            splitCount += 1
        averageAccuracy = np.mean(np.array(accuracies))
        if averageAccuracy > bestParams["accuracy"]:
            bestParams["accuracy"] = averageAccuracy
            bestParams["params"]["sigma"] = tradeoffConstant
            bestParams["params"]["r"] = r

print "Best params for %s:" % type(model)
print bestParams["params"]
print "Best Average Training Accuracy:"
np.random.seed(10)

Dataset = np.loadtxt('data/logistic_check_data.txt')
x_data, y_data = Dataset[:, :-1], Dataset[:, -1]

_epoch = 100
_batch_size = 5
_lr = 0.01
_optim = 'SGD'

#======================================================================================================
print('=' * 20, 'Sigmoid Test', '=' * 20)
test_case_1 = np.array([0.5, 0.5, 0.5])
test_case_2 = np.array([[6.23, -7.234, 8.3], [-1, -6.23, -9]])
test_case_3 = np.array([[[1.0, 1.1], [5.672, -4]], [[0.0, 9], [-9, 0.1]]])
test_result_1 = LogisticRegression._sigmoid(None, test_case_1)
test_result_2 = LogisticRegression._sigmoid(None, test_case_2)
test_result_3 = LogisticRegression._sigmoid(None, test_case_3)

print('## Test case 1')
print('Input:\n', test_case_1)
print('Output:\n', test_result_1, end='\n\n')
print('## Test case 2')
print('Input:\n', test_case_2)
print('Output:\n', test_result_2, end='\n\n')
print('## Test case 3')
print('Input:\n', test_case_3)
print('Output:\n', test_result_3, end='\n\n')
'''
You should get results as:
Ejemplo n.º 5
0
# model.fit(x=x_new_data, y=y_train, epochs=_epoch, batch_size=_batch_size, lr=_lr, optim=optimizer)
# print('Trained done.')
#
# # Inference
# print('Predict on test data')
# inference = model.eval(feature_func_(x_test))
#
# # Assess model
# error = Accuracy(inference, y_test)
# print('Accuracy on test data : %.4f' % error)

_lr = [0.02, 0.01, 0.005, 0.002, 0.001]
_epoch = [1000, 2000, 3000, 4000, 5000, 10000, 20000, 30000, 40000]
for lr in _lr:
    for n_iter in _epoch:
        model = LogisticRegression(num_features=x_train.shape[1])
        _optimizer = optimizer(_optim)

        # Solve
        print('Train start.')
        model.fit(x=x_new_data,
                  y=y_train,
                  epochs=n_iter,
                  batch_size=_batch_size,
                  lr=lr,
                  optim=_optimizer)
        print('Trained done.')

        # Inference
        print('Predict on test data')
        inference = model.eval(feature_func_(x_test))
Ejemplo n.º 6
0
def main():
    global opt
    opt = parser.parse_args()
    use_gpu = torch.cuda.is_available()

    # Set up logging
    if opt.savepath == None:
        path = os.path.join('save', datetime.datetime.now().strftime("%d-%H-%M-%S"))
    else:
        path = opt.savepath
    os.makedirs(path, exist_ok=True)
    logger = utils.Logger(path)

    # Keep track of accuracies 
    val_accuracies = []
    test_accuracies = []

    # Seed for cross-val split
    seed = random.randint(0,10000) if opt.seed < 0 else opt.seed
    logger.log('SEED: {}'.format(seed), stdout=False)

    # Load data
    if opt.preloaded_splits.lower() == 'none':
        start = time.time()
        data, label = get_data(opt.data, opt.label)
        logger.log('Data loaded in {:.1f}s\n'.format(time.time() - start))
    else:
        data, label = np.zeros(5), np.zeros(5) # dummy labels for iterating over
        logger.log('Using preloaded splits\n')

    # Create cross-validation splits
    kf = StratifiedKFold(n_splits=5, random_state=seed, shuffle=True)

    # Cross validate 
    for i, (train_index, test_index) in enumerate(kf.split(data, label)):

        # Log split
        logger.log('------------- SPLIT {} --------------\n'.format(i+1))

        # Train / test split (ignored if opt.preloaded_splits is not 'none')
        X, X_test = data[train_index], data[test_index]
        y, y_test = label[train_index], label[test_index]

        # Perform PCA and generate dataloader or load from saved file
        start = time.time()
        apply_pca_transform = (opt.arch not in ['exp'])
        train_loader, val_loader, test_loader, pca_components, input_size, num_classes, pca_matrix = \
            get_dataloader(opt.preloaded_splits, X, X_test, y, y_test, batch_size=opt.b, val_fraction=opt.val_fraction, 
                           pca_components=opt.pca_components, apply_pca_transform=apply_pca_transform, 
                           imputation_dim=opt.impute, split=i, save_dataset=(not opt.no_save_dataset))
        logger.log('Dataloader loaded in {:.1f}s\n'.format(time.time() - start))

        # Model 
        arch = opt.arch.lower()
        assert arch in ['logreg', 'mlp', 'exp']
        if arch == 'logreg': 
            model = LogisticRegression(input_size, opt.pca_components, num_classes)
        elif arch == 'mlp':
            model = MLP(input_size, opt.hidden_size, num_classes, opt.dp) 
        elif arch == 'exp': 
            model = ExperimentalModel(input_size, opt.pca_components, opt.hidden_size, num_classes, opt.dp) 
        
        # Pretrained / Initialization
        if opt.model is not None and os.path.isfile(opt.model):
            # Pretrained model
            model.load_state_dict(torch.load(opt.model))
            logger.log('Loaded pretrained model.', stdout=(i==0))
        else:
            # Initialize model uniformly
            for p in model.parameters():
                p.data.uniform_(-0.1, 0.1)
            logger.log('Initialized model from scratch.', stdout=(i==0))
        model = model.cuda() if use_gpu else model
        print(model)

        # Initialize first layer with PCA and fix PCA weights if model requires
        if opt.arch in ['exp']:
            model.first_layer.weight.data.copy_(pca_matrix)
            logger.log('Initialized first layer as PCA', stdout=(i==0))
            if not opt.finetune_pca:
                model.first_layer.weight.requires_grad = False
                logger.log('Fixed PCA weights', stdout=(i==0))

        # Loss function and optimizer
        criterion = nn.CrossEntropyLoss(size_average=False)
        optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr, weight_decay=opt.wd) 
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=opt.lr_decay_patience, 
                        factor=opt.lr_decay_factor, verbose=True, cooldown=opt.lr_decay_cooldown)

        # Log parameters
        logger.log('COMMAND LINE ARGS: ' + ' '.join(sys.argv), stdout=False)
        logger.log('ARGS: {}\nOPTIMIZER: {}\nLEARNING RATE: {}\nSCHEDULER: {}\nMODEL: {}\n'.format(
            opt, optimizer, opt.lr, vars(scheduler), model), stdout=False)

        # If specified, only evaluate model
        if opt.evaluate:
            assert opt.model != None, 'no pretrained model to evaluate'
            total_correct, total, _ = validate(model, val_loader, criterion)
            logger.log('Accuracy: {:.3f} \t Total correct: {} \t Total: {}'.format(
                total_correct/total, total_correct, total))
            return 

        # Train model 
        start_time = time.time()
        best_acc = train(model, train_loader, val_loader, optimizer, criterion, logger, 
            num_epochs=opt.epochs, print_freq=opt.print_freq, model_id=i)
        logger.log('Best train accuracy: {:.2f}% \t Finished split {} in {:.2f}s\n'.format(
            100 * best_acc, i+1, time.time() - start_time))
        val_accuracies.append(best_acc)

        # Best evaluation on validation set
        best_model_path = os.path.join(path, 'model_{}.pth'.format(i))
        model.load_state_dict(torch.load(best_model_path)) # load best model
        total_correct, total, _ = validate(model, val_loader, criterion) # check val set
        logger.log('Val Accuracy: {:.3f} \t Total correct: {} \t Total: {}'.format(
            total_correct/total, total_correct, total))

        # Optionally also evaluate on test set
        if opt.test:
            total_correct, total, visualize = validate(model, test_loader, criterion, visualize=True) # run test set
            logger.log('Test Accuracy: {:.3f} \t Total correct: {} \t Total: {}\n'.format(
                total_correct/total, total_correct, total))
            logger.save_model(visualize, 'visualize_{}.pth'.format(i))
            test_accuracies.append(total_correct/total)
    
    # Log after training
    logger.log('Val Accuracies: {}'.format(val_accuracies))
    logger.log('Test Accuracies: {}'.format(test_accuracies))
    logger.log('Run id: {} \t Test Accuracies: {}'.format(opt.id, test_accuracies))
Ejemplo n.º 7
0
## ----------------------------------------------------------------------------------------------------
# Logistic回归 算法测试用例

import numpy as np
import math
from models.LogisticRegression import LogisticRegression

iris = datasets.load_iris()
X = iris['data']
y = iris['target']
X = X[y!=2]
y = y[y!=2]

# 将学习率固定在 0.01
Logstic = LogisticRegression(X, y, threshold = 0.5)    
Logstic.fit(alpha = 0.01, accuracy = 0.001)
print("epoch:", Logstic.epoch)
print("theta:", Logstic.thetas)
y_predict = Logstic.predict()
y_predict

# 使用自动控制的下降学习率
Logstic2 = LogisticRegression(X, y, threshold = 0.5)    
Logstic2.auto_fit(accuracy = 0.001)
print("epoch:",Logstic2.epoch)
print("theta:",Logstic2.thetas)
y_predict = Logstic2.predict()
y_predict

## ----------------------------------------------------------------------------------------------------