Ejemplo n.º 1
0
def train(hidden_size, num_layers, lr, weight_decay):
    region = "germany"

    log_name = log_pattern.format(region=region,
                                  num_layers=num_layers,
                                  hidden_size=hidden_size,
                                  lr=lr,
                                  weight_decay=weight_decay)
    log_path = os.path.join(log_dir, log_name)

    if os.path.exists(log_path):
        print(f"{log_path} exists. skipping...")
        return

    try:
        model, dataset, validdataset, dataloader, validdataloader, optimizer = setup(
            hidden_size, num_layers, lr, weight_decay)
        stats = list()
        for epoch in range(epochs):
            trainloss = train_epoch(model, dataloader, optimizer, criterion,
                                    device)
            testmetrics, testloss = test_epoch(model,
                                               validdataloader,
                                               device,
                                               criterion,
                                               n_predictions=1)
            metric_msg = ", ".join([
                f"{name}={metric.compute():.2f}"
                for name, metric in testmetrics.items()
            ])
            msg = f"epoch {epoch}: train loss {trainloss:.2f}, test loss {testloss:.2f}, {metric_msg}"
            print(msg)

            #test_model(model, validdataset, device)

            model_name = name_pattern.format(region=region,
                                             num_layers=num_layers,
                                             hidden_size=hidden_size,
                                             lr=lr,
                                             weight_decay=weight_decay,
                                             epoch=epoch)
            pth = os.path.join(model_dir, model_name + ".pth")
            print(f"saving model snapshot to {pth}")
            snapshot(model, optimizer, pth)
            stat = dict()
            stat["epoch"] = epoch
            for name, metric in testmetrics.items():
                stat[name] = metric.compute()

            stat["trainloss"] = trainloss.cpu().detach().numpy()
            stat["testloss"] = testloss.cpu().detach().numpy()
            stats.append(stat)

    finally:
        df = pd.DataFrame(stats)
        df.to_csv(log_path)
        print(f"saving log to {log_path}")
Ejemplo n.º 2
0
def main(args):
    mode = "evaluation" + str(args.fold)
    traindataloader, testdataloader, meta = get_dataloader(
        args.datapath,
        mode,
        args.batchsize,
        args.workers,
        level=args.level,
        preload_ram=args.preload_ram)

    num_classes = meta["num_classes"]
    ndims = meta["ndims"]
    sequencelength = meta["sequencelength"]

    print(f"Logging results to {args.logdir}")
    logdir = os.path.join(args.logdir, str(args.fold))
    os.makedirs(logdir, exist_ok=True)

    epochs, learning_rate, weight_decay = select_hyperparameter(args.model)

    device = torch.device(args.device)
    model = get_model(args.model, ndims, num_classes, sequencelength, device)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate,
                                 weight_decay=weight_decay)
    model.modelname += f"_learning-rate={learning_rate}_weight-decay={weight_decay}"
    print(f"Initialized {model.modelname}")
    criterion = torch.nn.CrossEntropyLoss(reduction="mean")

    for epoch in range(epochs):
        print(f"train epoch {epoch}")
        train_epoch(model, optimizer, criterion, traindataloader, device)
    losses, y_true, y_pred, y_score, field_ids = test_epoch(
        model, criterion, dataloader=testdataloader, device=device)

    logdir = os.path.join(logdir, args.model)
    os.makedirs(logdir, exist_ok=True)
    print(f"saving results to {logdir}")
    print(sklearn.metrics.classification_report(y_true.cpu(), y_pred.cpu()),
          file=open(os.path.join(logdir, "classification_report.txt"), "w"))
    np.save(os.path.join(logdir, "y_pred.npy"), y_pred.cpu().numpy())
    np.save(os.path.join(logdir, "y_true.npy"), y_true.cpu().numpy())
    np.save(os.path.join(logdir, "y_score.npy"), y_score.cpu().numpy())
    np.save(os.path.join(logdir, "field_ids.npy"), field_ids.numpy())
    save(model, os.path.join(logdir, model.modelname + ".pth"))
Ejemplo n.º 3
0
def test(model, data_path, label_file, save, batch_size):

    data_list = os.listdir(data_path)
    dataset = octDataset(data_path, data_list, label_file, argument=False)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=0,
                                             collate_fn=dataset.collate_fn)

    if torch.cuda.is_available():
        model = model.cuda()

    model.load_state_dict(torch.load(os.path.join(save, 'v1_test_model.dat')))

    _, loss, error = test_epoch(model, dataloader, is_test=True)

    return error.avg
Ejemplo n.º 4
0
# dataset
train_data, test_data = data.train_loader(args, kwargs), data.test_loader(
    args, kwargs)

# model
net = model.MnistClassifer().to(device)
print(net)

# optimizer
optimizer = optim.SGD(net.parameters(), lr=args.lr)
# optimizer = optim.Adam(net.parameters(), lr = 0.0001)

# train
train_losses, train_accuracies = [], []
test_losses, test_accuracies = [], []

for epoch in range(1, args.epochs + 1):
    print("Epoch: {}".format(epoch))
    train_loss, train_accuracy = train_epoch(args, net, device, train_data,
                                             optimizer, epoch)
    test_loss, test_accuracy = test_epoch(args, net, device, test_data)

    train_losses.append(train_loss)
    test_losses.append(test_loss)
    train_accuracies.append(train_accuracy)
    test_accuracies.append(test_accuracy)

save_graph_image(epoch, train_losses, train_accuracies, test_losses,
                 test_accuracies)
torch.save(net.state_dict(), 'mnist_model_params.pth')
Ejemplo n.º 5
0
def main():
    best_prec1 = 0
    test = True
    log = True
    save_best = True
    sample_length = 0.5
    num_samples = np.int(np.round(
        5000 /
        sample_length))  # together I want about 5000 seconds from each subject
    batch_size = 100
    num_epochs = 200
    dropout = 0.4
    task = 'subject_prediction'

    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    torch.backends.cudnn.benchmark = True

    root_path = pathlib.Path.cwd()
    matrix = root_path.joinpath(
        'data', f'cleaned_{sample_length}sec_{num_samples}.npy')

    training_dataset = LFPData(data_file=matrix,
                               split='train',
                               standardize=True)
    training_loader = DataLoader(training_dataset,
                                 shuffle=True,
                                 batch_size=batch_size,
                                 pin_memory=True,
                                 num_workers=1)

    validation_set = LFPData(data_file=matrix, split='valid', standardize=True)
    validation_loader = DataLoader(validation_set,
                                   shuffle=False,
                                   batch_size=batch_size,
                                   pin_memory=True,
                                   num_workers=1)
    # input_shape = (2, np.int(422 * sample_length))  # this is a hack to figure out shape of fc layer
    # net = conv1d_nn.Net(input_shape=input_shape, dropout=dropout)
    net = conv1d_nn.FCN(in_channels=2, num_classes=9)
    net.apply(init_weights)
    net.cuda()

    criterion = nn.CrossEntropyLoss()
    criterion.cuda()
    # optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    optimizer = optim.Adam(net.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-8)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     'min',
                                                     patience=5,
                                                     threshold=1e-2)
    stop_criterion = EarlyStopping()

    title = f'FCN2_cleaned_{sample_length}sec_{num_samples}'
    if log:
        log_dir = root_path.joinpath('logs', title)
        if not log_dir.exists():
            log_dir.mkdir()
        training_log = log_dir.joinpath('log')
        if not training_log.exists():
            open(str(training_log), 'w').close()
        result_writer = ResultsWriter(str(training_log), overwrite=True)

    mlog = MeterLogger(server='localhost',
                       port=8097,
                       nclass=9,
                       title=title,
                       env=title)

    for epoch in range(1, num_epochs + 1):
        mlog.timer.reset()

        train_epoch(training_loader, net, criterion, optimizer, mlog)

        if log:
            result_writer.update(title, {'Train': mlog.peek_meter()})
        mlog.print_meter(mode="Train", iepoch=epoch)
        mlog.reset_meter(mode="Train", iepoch=epoch)
        validation_loss = val_epoch(validation_loader, net, criterion, mlog)

        prec1 = mlog.meter['accuracy'].value()[0]

        if save_best:
            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            if is_best:
                best_prec1 = max(prec1, best_prec1)
                save_checkpoint(
                    root_path.joinpath('checkpoints', title), {
                        'epoch': epoch + 1,
                        'state_dict': net.state_dict(),
                        'best_prec1': best_prec1,
                        'optimizer': optimizer.state_dict(),
                    }, is_best)

        if log:
            result_writer.update(title, {'Validation': mlog.peek_meter()})
        mlog.print_meter(mode="Test", iepoch=epoch)
        mlog.reset_meter(mode="Test", iepoch=epoch)

        stop_criterion.eval_loss(validation_loss)
        if stop_criterion.get_nsteps() >= 30:
            print('Early stopping')
            break
        print(optimizer.param_groups[0]['lr'])
        scheduler.step(validation_loss)

    print('Training finished', best_prec1)

    if test:
        test_set = LFPData(data_file=matrix, split='test', standardize=True)
        test_loader = DataLoader(test_set,
                                 shuffle=False,
                                 batch_size=batch_size,
                                 pin_memory=True,
                                 num_workers=1)
        test_loss, test_acc = test_epoch(test_loader, net, criterion, mlog)

        result_writer.update(
            title, {'Test': {
                'loss': test_loss,
                'accuracy': test_acc
            }})

        print(test_loss, test_acc)

    # save pngs of visdom plot into log path
    plot_visdom(mlog, log_dir)
Ejemplo n.º 6
0
def main(args):
    method = args.method
    pretrainEmbedding = args.pretrainEmbedding
    makeCSVfile = args.makeCSVfile

    if makeCSVfile:
        make_csv_file_from_rawtext()

    tokenize = lambda x: x.split()
    TEXT = Field(sequential=True,
                 use_vocab=True,
                 tokenize=tokenize,
                 lower=True,
                 pad_first=True)
    LABEL = Field(sequential=False,
                  use_vocab=False,
                  pad_token=None,
                  unk_token=None)

    tv_datafield = [("id", None), ("text", TEXT), ("label", LABEL)]
    train_data = TabularDataset(path='./data/train_log.csv',
                                format='csv',
                                fields=[('text', TEXT), ('label', LABEL)],
                                skip_header=True)
    test_data = TabularDataset(path='./data/test_log.csv',
                               format='csv',
                               fields=[('text', TEXT), ('label', LABEL)],
                               skip_header=True)

    if pretrainEmbedding:
        vectors = Vectors(name="./data/all.review.vec.txt", cache='./')
        TEXT.build_vocab(train_data, max_size=10000, vectors=vectors)
    else:
        TEXT.build_vocab(train_data, max_size=10000)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    vocab_size = len(TEXT.vocab)

    if method == "RNN":
        model = LSTMBaseline(vocab_size)
    else:
        model = CNNBaseline(vocab_size)

    model = model.to(device)
    traindl, testdl = torchtext.data.BucketIterator.splits(
        datasets=(train_data,
                  test_data),  # specify train and validation Tabulardataset
        batch_sizes=(32, 32),  # batch size of train and validation
        sort_key=lambda x: len(
            x.text),  # on what attribute the text should be sorted
        device=device,  # -1 mean cpu and 0 or None mean gpu
        sort_within_batch=True,
        repeat=False)

    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    criterion = nn.BCEWithLogitsLoss()
    epochs = 100

    trainAccuracy = []
    testAccuracy = []
    trainLoss = []
    testLoss = []
    trainTime = 0

    for epoch in range(1, epochs + 1):
        loss, acc = train_epoch(model, traindl, optimizer, criterion)
        trainLoss.append(loss)
        trainAccuracy.append(acc)

        loss, acc = test_epoch(model, testdl, optimizer, criterion)
        testLoss.append(loss)
        testAccuracy.append(acc)

    print("train Accuracy :", trainAccuracy[-1].item())
    print("test Accuracy :", testAccuracy[-1].item())
Ejemplo n.º 7
0
    optimizer = optim.SGD(model.parameters(), lr=cfg.lr)

    # 打开日志文件
    log = log_txt(path=cfg.work_dir, description=cfg.exp)

    # 加载预训练参数
    start_epoch = 0
    if cfg.resume_from is not None:
        print('loading pretrained model from %s' % cfg.resume_from)
        checkpoint = torch.load(cfg.resume_from)
        model.load_state_dict(checkpoint['state_dict'], strict=False)
        start_epoch = checkpoint['epoch']

    # 训练和验证模式
    dataloader = load_data(cfg)
    train_loader = dataloader['train']
    test_loader = dataloader['test']
    train4val_loader = dataloader['train4val']

    # 配置训练策略
    iter_per_epoch = len(train_loader)
    train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=cfg.milestone,
                                                     gamma=0.5)

    for i in range(0, cfg.epoch_size):
        train_epoch(i, model, train_loader, criterion, optimizer, cfg, log,
                    train_scheduler)
        test_epoch(model, train4val_loader, test_loader, L1_measure, cfg, log,
                   i)
Ejemplo n.º 8
0
def main():
    best_prec1 = 0
    test = True
    transfer_learning = True
    batch_size = 50
    sample_length = 3
    num_epochs = 50
    task = 'state_prediciton'

    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    torch.backends.cudnn.benchmark = True

    root_path = pathlib.Path.home().joinpath('deep_LFP')
    matrix = root_path.joinpath(
        'data', f'cleaned_state_matrix_{sample_length}sec.npz')

    training_dataset = LFPDataStates(data_file=matrix,
                                     split='train',
                                     standardize=True)
    training_loader = DataLoader(training_dataset,
                                 shuffle=True,
                                 batch_size=batch_size,
                                 pin_memory=True,
                                 num_workers=1)

    validation_set = LFPDataStates(data_file=matrix,
                                   split='valid',
                                   standardize=True)
    validation_loader = DataLoader(validation_set,
                                   shuffle=False,
                                   batch_size=batch_size,
                                   pin_memory=True,
                                   num_workers=1)
    input_shape = (2, np.int(422 * sample_length)
                   )  # this is a hack to figure out shape of fc layer
    net = conv1d_nn.Net(input_shape=input_shape, dropout=0)
    if transfer_learning:
        num_samples_prev_model = np.int(np.round(5000 / sample_length))
        previous_model = f'cleaned_{sample_length}sec_{num_samples_prev_model}_model_best.pth.tar'
        previous_model_weights = os.path.join(root_path, 'checkpoints',
                                              previous_model)
        net.load_state_dict(torch.load(previous_model_weights)['state_dict'])
        for param in net.parameters():
            param.requires_grad = False

        num_features = net.fc1.in_features
        net.fc1 = nn.Linear(num_features, 2040)
    net.fc2 = nn.Linear(2040, 4)
    net.cuda()

    criterion = nn.CrossEntropyLoss()
    criterion.cuda()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     'min',
                                                     patience=100,
                                                     threshold=1e-3)
    stop_criterion = EarlyStopping()

    title = f'cleaned_state_prediction_{sample_length}sec_transfer_learning'
    training_log_path = '/data/eaxfjord/deep_LFP/logs/' + title + '/log'
    log_dir = os.path.dirname(training_log_path)
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)
    if not os.path.exists(training_log_path):
        open(training_log_path, 'w').close()

    result_writer = ResultsWriter(training_log_path, overwrite=True)

    mlog = MeterLogger(server='localhost',
                       port=8097,
                       nclass=4,
                       title=title,
                       env=f'state_prediction_{sample_length}sec')

    for epoch in range(1, num_epochs + 1):
        mlog.timer.reset()

        train_epoch(training_loader, net, criterion, optimizer, mlog)

        result_writer.update(task, {'Train': mlog.peek_meter()})
        mlog.print_meter(mode="Train", iepoch=epoch)
        mlog.reset_meter(mode="Train", iepoch=epoch)
        validation_loss = val_epoch(validation_loader, net, criterion, mlog)

        prec1 = mlog.meter['accuracy'].value()[0]

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        if is_best:
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(
                os.path.join(root_path, 'checkpoints', title), {
                    'epoch': epoch + 1,
                    'state_dict': net.state_dict(),
                    'best_prec1': best_prec1,
                    'optimizer': optimizer.state_dict(),
                }, is_best)

        result_writer.update(task, {'Validation': mlog.peek_meter()})

        mlog.print_meter(mode="Test", iepoch=epoch)
        mlog.reset_meter(mode="Test", iepoch=epoch)

        # stop_criterion.eval_loss(validation_loss)
        # if stop_criterion.get_nsteps() >= 30:
        #     print('Early stopping')
        #     break
        print(optimizer.param_groups[0]['lr'])
        scheduler.step(validation_loss)

    print('Training finished', best_prec1)

    if test:
        test_set = LFPDataStates(data_file=matrix,
                                 split='test',
                                 standardize=True)
        test_loader = DataLoader(test_set,
                                 shuffle=False,
                                 batch_size=batch_size,
                                 pin_memory=True,
                                 num_workers=1)
        test_loss, test_acc = test_epoch(test_loader, net, criterion, mlog)

        result_writer.update(task, {'Test': test_acc})
        print(test_loss, test_acc)

    # when finished get data from visdom plot, and save to png
    plot_visdom(mlog, log_dir)
Ejemplo n.º 9
0
                    regularizer))  #
    model.eval()
    mse, preds, trues = eval_epoch(valid_loader, model, loss_fun)
    valid_mse.append(mse)
    if valid_mse[-1] < min_mse:
        min_mse = valid_mse[-1]
        best_model = model
        torch.save(best_model, "model.pth")
    end = time.time()
    if (len(train_mse) > 50
            and np.mean(valid_mse[-5:]) >= np.mean(valid_mse[-10:-5])):
        break
    print(train_mse[-1], valid_mse[-1], round((end - start) / 60, 5))
print(time_range, min_mse)

loss_fun = torch.nn.MSELoss()
best_model = torch.load("model.pth")
test_set = Dataset(test_indices, input_length + time_range - 1, 40, 60,
                   test_direc, True)
test_loader = data.DataLoader(test_set,
                              batch_size=batch_size,
                              shuffle=False,
                              num_workers=8)
preds, trues, loss_curve = test_epoch(test_loader, best_model, loss_fun)

torch.save({
    "preds": preds,
    "trues": trues,
    "loss_curve": loss_curve
}, "results.pt")
Ejemplo n.º 10
0
def main():
    logging.basicConfig(
        level=logging.DEBUG,
        format='%(asctime)s:%(process)d:%(levelname)s:%(name)s:%(message)s')
    parser = argparse.ArgumentParser(description='GMRT CNN Training')
    parser.add_argument('--batch-size',
                        type=int,
                        default=20000,
                        metavar='N',
                        help='input batch size for training (default: 20000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=5,
                        metavar='N',
                        help='number of epochs to train (default: 5)')
    parser.add_argument('--learning-rate',
                        type=float,
                        default=0.01,
                        metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.5,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--keep-probability',
                        type=float,
                        default=0.6,
                        metavar='K',
                        help='Dropout keep probability (default: 0.6)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--num-processes',
                        type=int,
                        default=4,
                        metavar='N',
                        help='how many training processes to use (default: 4)')
    parser.add_argument('--use-gpu',
                        action='store_true',
                        default=False,
                        help='use the GPU if it is available')
    parser.add_argument('--data-path',
                        default='./data',
                        help='the path to the data file')
    parser.add_argument('--data-file',
                        default='data.h5',
                        help='the name of the data file')
    parser.add_argument('--sequence-length',
                        type=int,
                        default=10,
                        help='how many elements in a sequence')
    parser.add_argument('--validation-percentage',
                        type=int,
                        default=10,
                        help='amount of data used for validation')
    parser.add_argument('--training-percentage',
                        type=int,
                        default=80,
                        help='amount of data used for training')
    parser.add_argument('--seed',
                        type=int,
                        default=None,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--learning-rate-decay',
                        type=float,
                        default=0.8,
                        metavar='LRD',
                        help='the initial learning rate decay rate')
    parser.add_argument('--start-learning-rate-decay',
                        type=int,
                        default=5,
                        help='the epoch to start applying the LRD')
    parser.add_argument('--short_run',
                        type=int,
                        default=None,
                        help='use a short run of the test data')
    parser.add_argument('--save',
                        type=str,
                        default=None,
                        help='path to save the final model')

    kwargs = vars(parser.parse_args())
    LOGGER.debug(kwargs)

    # If the have specified a seed get a random
    if kwargs['seed'] is not None:
        np.random.seed(kwargs['seed'])
    else:
        np.random.seed()

    if kwargs['use_gpu'] and torch.cuda.is_available():
        LOGGER.info('Using cuda devices: {}'.format(torch.cuda.device_count()))
        kwargs['cuda_device_count'] = torch.cuda.device_count()
        kwargs['using_gpu'] = True
    else:
        LOGGER.info('Using CPU')
        kwargs['cuda_device_count'] = 0
        kwargs['using_gpu'] = False

    # Do this first so all the data is built before we go parallel and get race conditions
    with Timer('Checking/Building data file'):
        build_data(**kwargs)

    rfi_data = RfiData(**kwargs)

    if kwargs['using_gpu']:
        # The DataParallel will distribute the model to all the available GPUs
        # model = nn.DataParallel(GmrtCNN(kwargs['keep_probability'])).cuda()
        model = nn.DataParallel(
            GmrtLinear(kwargs['keep_probability'],
                       kwargs['sequence_length'])).cuda()

        # Train
        train(model, rfi_data, **kwargs)

    else:
        # This uses the HOGWILD! approach to lock free SGD
        # model = GmrtCNN(kwargs['keep_probability'])
        model = GmrtLinear(kwargs['keep_probability'],
                           kwargs['sequence_length'])
        model.share_memory(
        )  # gradients are allocated lazily, so they are not shared here

        processes = []
        for rank in range(kwargs['num_processes']):
            p = mp.Process(target=train,
                           args=(model, rfi_data, rank),
                           kwargs=kwargs)
            p.start()
            processes.append(p)
        for p in processes:
            p.join()

    with Timer('Reading final test data'):
        test_loader = data.DataLoader(
            rfi_data.get_rfi_dataset('test',
                                     short_run_size=kwargs['short_run']),
            batch_size=kwargs['batch_size'],
            num_workers=1,
            pin_memory=kwargs['using_gpu'],
        )
    with Timer('Final test'):
        test_epoch(model, test_loader, kwargs['log_interval'])

    if kwargs['save'] is not None:
        with Timer('Saving model'):
            with open(kwargs['save'], 'wb') as save_file:
                torch.save(model.state_dict(), save_file)