Beispiel #1
0
def main():
    train_engine = DataEngine(config, args.data_dir, args.img_dir, args.year,
                              args.test_set, 'train')
    train_dataloader = DataLoader(train_engine,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=4,
                                  pin_memory=True)
    val_engine = DataEngine(config, args.data_dir, args.img_dir, args.year,
                            args.test_set, 'val')
    val_dataloader = DataLoader(val_engine,
                                batch_size=batch_size,
                                shuffle=False,
                                num_workers=4,
                                pin_memory=True)

    model = Net(config=config,
                no_words=train_engine.tokenizer.no_words,
                no_answers=train_engine.tokenizer.no_answers,
                resnet_model=resnet_model,
                lstm_size=lstm_size,
                emb_size=emb_size,
                use_pretrained=args.use_pretrained).cuda()

    optimizer = optim.Adam(model.parameters(), lr=lr)

    train(train_dataloader, val_dataloader, model, optimizer)
Beispiel #2
0
def main():

    device = (torch.device('cuda') if torch.cuda.is_available()
              else torch.device('cpu'))
     
    model = Net(1).to(device=device)

    data_path = "../Mnist/"

    mnist = instantiate_training_data(data_path)
    mnist_val = instantiate_val_data(data_path)
    
    train_loader = torch.utils.data.DataLoader(mnist, batch_size=64)
    val_loader = torch.utils.data.DataLoader(mnist_val, batch_size=64)
    
    optimizer = optim.SGD(model.parameters(), lr=1e-2)
    
    loss_fn = nn.CrossEntropyLoss()

    training_string = "Training"
    val_string = "Val"
    
    print(f"Training on device {device}.")
    
    training_loop(
        n_epochs = 100,
        optimizer = optimizer,
        model = model,
        loss_fn = loss_fn,
        train_loader = train_loader,
        device = device,
        )
    
    evaluate_training(model, train_loader, training_string)
    evaluate_validation(model, val_loader, val_string)
Beispiel #3
0
def train(net: Net, data_path: str, batch_size: int, num_epochs: int,
          learning_rate: float):
    trans = transforms.Compose([
        transforms.ToTensor(),
    ])

    train_dataset = torchvision.datasets.ImageFolder(root=data_path,
                                                     transform=trans)
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=batch_size,
                              shuffle=True)

    criterion = nn.CrossEntropyLoss()
    optimizer = adam.Adam(net.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        show_loss = lambda _: '[{}, {:3f}]'.format(epoch + 1, running_loss)

        with click.progressbar(train_loader, item_show_func=show_loss) as bar:
            for inputs, labels in bar:
                if cuda.is_available():
                    inputs, labels = inputs.to('cuda'), labels.to('cuda')

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward + backward + optimize
                outputs = net(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                # print statistics
                running_loss += loss.item()
def main():
    if args.use_cuda:
        torch.cuda.set_device(args.gpu)

    dataloader = DataLoader(dict_path=args.dict_path,
                            glove_path=args.glove_path,
                            data_path=args.data_path,
                            batch_size=args.batch_size,
                            use_glove=args.use_glove)

    model = Net(no_words=dataloader.tokenizer.no_words,
                lstm_size=args.lstm_size,
                emb_size=args.emb_size,
                depth=args.depth)
    if args.use_cuda:
        model = model.cuda()

    if args.start_iter != 0:
        # load the model state from pre-specified iteration (saved model available)
        model.load_state_dict(torch.load(
            os.path.join(args.save_dir, 'iter_%d.pth' % (args.start_iter))),
                              strict=False)

    tokenizer = Tokenizer(args.dict_path)

    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    train(dataloader, model, optimizer, tokenizer)
def main():
	dataloader = DataLoader(config, args.data_dir, args.img_dir, args.year, args.test_set, batch_size)

	model = Net(config=config, no_words=dataloader.tokenizer.no_words, no_answers=dataloader.tokenizer.no_answers,
				resnet_model=resnet_model, lstm_size=lstm_size, emb_size=emb_size, use_pretrained=False).cuda()
	
	optimizer = optim.Adam(model.parameters(), lr=lr)

	train(dataloader, model, optimizer)
def main(args):
    #### basic torch setup
    use_cuda = not args['no_cuda'] and torch.cuda.is_available()  # use cuda
    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    torch.manual_seed(args['seed'])  # seed

    #### data pipeline
    data_dir = os.path.join(args['data_dir'], nni.get_trial_id())

    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        data_dir,
        train=True,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                               batch_size=args['batch_size'],
                                               shuffle=True,
                                               **kwargs)
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        data_dir,
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                              batch_size=1000,
                                              shuffle=True,
                                              **kwargs)

    #### define model
    hidden_size = args['hidden_size']

    model = Net(hidden_size=hidden_size).to(device)
    optimizer = optim.SGD(model.parameters(),
                          lr=args['lr'],
                          momentum=args['momentum'])

    #### train
    for epoch in range(1, args['epochs'] + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test_acc = test(args, model, device, test_loader)

        if epoch < args['epochs']:
            # report intermediate result
            nni.report_intermediate_result(test_acc)
            logger.debug('test accuracy %g', test_acc)
            logger.debug('Pipe send intermediate result done.')
        else:
            # report final result
            nni.report_final_result(test_acc)
            logger.debug('Final result is %g', test_acc)
            logger.debug('Send final result done.')
Beispiel #7
0
def main():
    """Main function
    """
    # Load the parameters from json file
    args = args_parser()
    json_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(
        json_path), "No json configuration file found at {}".format(json_path)
    params = utils.Params(json_path)

    # Create summary writer for use with tensorboard
    writer = SummaryWriter(os.path.join(args.model_dir, 'runs', 'train'))

    # use GPU if available
    params.cuda = torch.cuda.is_available()

    # Set the random seed for reproducible experiments
    torch.manual_seed(230)
    if params.cuda:
        torch.cuda.manual_seed(230)
        params.device = "cuda:0"
    else:
        params.device = "cpu"

    # Set the logger
    utils.set_logger(os.path.join(args.model_dir, 'train.log'))

    # Create the input data pipeline
    logging.info("Loading the datasets...")

    # fetch dataloaders
    dataloaders = d_l.get_dataloader(['train', 'val'], args.data_dir, params)
    train_dl = dataloaders['train']
    val_dl = dataloaders['val']

    logging.info("- done.")

    # Define the model and optimizer
    model = Net(params)
    if params.cuda:
        model = model.to(params.device)
    writer.add_graph(model, next(iter(train_dl))[0])

    optimizer = torch.optim.Adam(model.parameters(), lr=params.learning_rate)

    # fetch loss function and metrics
    criterion = loss_fn
    metrics = get_metrics()

    # Train the model
    logging.info("Starting training for %d epoch(s)", params.num_epochs)
    train_and_evaluate(model, train_dl, val_dl, optimizer, criterion, metrics,
                       params, args.model_dir, writer, args.restore_file)
    writer.close()
Beispiel #8
0
def main():
    test_path = Path.cwd() / 'data_in' / 'test.txt'
    vocab_path = Path.cwd() / 'data_in' / 'vocab.pkl'

    with open(vocab_path, mode='rb') as io:
        vocab = pickle.load(io)

    tokenizer = MeCab()
    padder = PadSequence(length=70, pad_val=vocab.token_to_idx['<pad>'])
    test_ds = Corpus(test_path, vocab, tokenizer, padder)
    test_dl = DataLoader(test_ds, batch_size=1024)

    model = Net(vocab_len=len(vocab))

    loss_fn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    for epoch in range(1):
        model.train()
        index = 0
        acc = 0
        for label, sen1, sen2 in tqdm(test_dl, disable=True):
            optimizer.zero_grad()

            pre_label = model(sen1, sen2)

            loss = loss_fn(pre_label, label)
            loss.backward()
            optimizer.step()

            pred_cls = pre_label.data.max(1)[1]
            acc += pred_cls.eq(label.data).cpu().sum()

            print("epoch: {}, index: {}, loss: {}".format((epoch + 1), index,
                                                          loss.item()))
            index += len(label)

        print('Accuracy : %d %%' % (100 * acc / index))
Beispiel #9
0
def run(load_last_checkpoint=False):
    save_dir = f'{OUTPUT_PATH}/models/'
    os.makedirs(save_dir, exist_ok=True)
    neural_net = Net()
    loss_fn = Loss()
    optim = torch.optim.SGD(neural_net.parameters(), DEFAULT_LR, momentum=0.9, weight_decay=1e-4)
    starting_epoch = 0
    initial_loss = None
    if load_last_checkpoint:
        model_paths = glob(f'''{save_dir}*.ckpt''')
        model_names = [int(i.split('/')[-1][:-5]) for i in model_paths]
        latest_model_path = f'''{save_dir}{max(model_names)}.ckpt'''
        print('loading latest model from:', latest_model_path)
        checkpoint = torch.load(latest_model_path)
        neural_net.load_state_dict(checkpoint['model_state_dict'])
        optim.load_state_dict(checkpoint['optimizer_state_dict'])
        starting_epoch = checkpoint['epoch']
        initial_loss = checkpoint['loss']
    if torch.cuda.is_available():
        neural_net = neural_net.cuda()
        loss_fn = loss_fn.cuda()
    print(f'''Training from epoch: {starting_epoch} towards: {TOTAL_EPOCHS},
with learning rate starting from: {get_lr(starting_epoch)}, and loss: {initial_loss}''')
    meta = pd.read_csv(f'{OUTPUT_PATH}/augmented_meta.csv', index_col=0).sample(frac=1).reset_index(drop=True)
    meta_group_by_series = meta.groupby(['seriesuid']).indices
    list_of_groups = [{i: list(meta_group_by_series[i])} for i in meta_group_by_series.keys()]
    random.Random(0).shuffle(list_of_groups)
    val_split = int(VAL_PCT * len(list_of_groups))
    val_indices = list(itertools.chain(*[list(i.values())[0] for i in list_of_groups[:val_split]]))
    train_indices = list(itertools.chain(*[list(i.values())[0] for i in list_of_groups[val_split:]]))
    ltd = LunaDataSet(train_indices, meta)
    lvd = LunaDataSet(val_indices, meta)
    train_loader = DataLoader(ltd, batch_size=1, shuffle=False)
    val_loader = DataLoader(lvd, batch_size=1, shuffle=False)

    for ep in range(starting_epoch, TOTAL_EPOCHS):
        train(train_loader, neural_net, loss_fn, ep, optim, get_lr, save_dir=save_dir)
        validate(val_loader, neural_net, loss_fn)
Beispiel #10
0
def train(train_data, val_data, fold_idx=None):
    train_data = MyDataset(train_data, train_transform)
    train_loader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True)

    val_data = MyDataset(val_data, val_transform)
    val_loader = DataLoader(val_data, batch_size=config.batch_size, shuffle=False)

    model = Net(model_name).to(device)
    # criterion = nn.CrossEntropyLoss()
    criterion = FocalLoss(0.5)
    # optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    # scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)
    optimizer = Ranger(model.parameters(), lr=1e-3, weight_decay=0.0005)
    # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=4)

    if fold_idx is None:
        print('start')
        model_save_path = os.path.join(config.model_path, '{}.bin'.format(model_name))
    else:
        print('start fold: {}'.format(fold_idx + 1))
        model_save_path = os.path.join(config.model_path, '{}_fold{}.bin'.format(model_name, fold_idx))
    # if os.path.isfile(model_save_path):
    #     print('加载之前的训练模型')
    #     model.load_state_dict(torch.load(model_save_path))

    best_val_score = 0
    best_val_score_cnt = 0
    last_improved_epoch = 0
    adjust_lr_num = 0
    for cur_epoch in range(config.epochs_num):
        start_time = int(time.time())
        model.train()
        print('epoch:{}, step:{}'.format(cur_epoch + 1, len(train_loader)))
        cur_step = 0
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)

            optimizer.zero_grad()
            probs = model(batch_x)

            train_loss = criterion(probs, batch_y)
            train_loss.backward()
            optimizer.step()

            cur_step += 1
            if cur_step % config.train_print_step == 0:
                train_acc = accuracy(probs, batch_y)
                msg = 'the current step: {0}/{1}, train loss: {2:>5.2}, train acc: {3:>6.2%}'
                print(msg.format(cur_step, len(train_loader), train_loss.item(), train_acc[0].item()))
        val_loss, val_score = evaluate(model, val_loader, criterion)
        if val_score >= best_val_score:
            if val_score == best_val_score:
                best_val_score_cnt += 1
            best_val_score = val_score
            torch.save(model.state_dict(), model_save_path)
            improved_str = '*'
            last_improved_epoch = cur_epoch
        else:
            improved_str = ''
        msg = 'the current epoch: {0}/{1}, val loss: {2:>5.2}, val acc: {3:>6.2%}, cost: {4}s {5}'
        end_time = int(time.time())
        print(msg.format(cur_epoch + 1, config.epochs_num, val_loss, val_score,
                         end_time - start_time, improved_str))
        if cur_epoch - last_improved_epoch >= config.patience_epoch or best_val_score_cnt >= 3:
            if adjust_lr_num >= config.adjust_lr_num:
                print("No optimization for a long time, auto stopping...")
                break
            print("No optimization for a long time, adjust lr...")
            # scheduler.step()
            last_improved_epoch = cur_epoch  # 加上,不然会连续更新的
            adjust_lr_num += 1
            best_val_score_cnt = 0
        scheduler.step()
    del model
    gc.collect()

    if fold_idx is not None:
        model_score[fold_idx] = best_val_score
Beispiel #11
0
def train(train_data, val_data, fold_idx=None):
    train_data = MyDataset(train_data, train_transform)
    train_loader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True)

    val_data = MyDataset(val_data, val_transform)
    val_loader = DataLoader(val_data, batch_size=config.batch_size, shuffle=False)

    model = Net(model_name).to(device)
    criterion = nn.CrossEntropyLoss()
    # criterion = FocalLoss(0.5)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    # optimizer = torch.optim.Adagrad(model.parameters(), lr=1e-3)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    # config.model_save_path = os.path.join(config.model_path, '{}.bin'.format(model_name))

    best_val_acc = 0
    last_improved_epoch = 0
    if fold_idx is None:
        print('start')
        model_save_path = os.path.join(config.model_path, '{}.bin'.format(model_name))
    else:
        print('start fold: {}'.format(fold_idx + 1))
        model_save_path = os.path.join(config.model_path, '{}_fold{}.bin'.format(model_name, fold_idx))
    for cur_epoch in range(config.epochs_num):
        start_time = int(time.time())
        model.train()
        print('epoch: ', cur_epoch + 1)
        cur_step = 0
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)

            optimizer.zero_grad()
            probs = model(batch_x)

            train_loss = criterion(probs, batch_y)
            train_loss.backward()
            optimizer.step()

            cur_step += 1
            if cur_step % config.train_print_step == 0:
                train_acc = accuracy(probs, batch_y)
                msg = 'the current step: {0}/{1}, train loss: {2:>5.2}, train acc: {3:>6.2%}'
                print(msg.format(cur_step, len(train_loader), train_loss.item(), train_acc[0].item()))
        val_loss, val_acc = evaluate(model, val_loader, criterion)
        if val_acc >= best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), model_save_path)
            improved_str = '*'
            last_improved_epoch = cur_epoch
        else:
            improved_str = ''
        # msg = 'the current epoch: {0}/{1}, train loss: {2:>5.2}, train acc: {3:>6.2%},  ' \
        #       'val loss: {4:>5.2}, val acc: {5:>6.2%}, {6}'
        msg = 'the current epoch: {0}/{1}, val loss: {2:>5.2}, val acc: {3:>6.2%}, cost: {4}s {5}'
        end_time = int(time.time())
        print(msg.format(cur_epoch + 1, config.epochs_num, val_loss, val_acc,
                         end_time - start_time, improved_str))
        scheduler.step()
        if cur_epoch - last_improved_epoch > config.patience_epoch:
            print("No optimization for a long time, auto-stopping...")
            break
    del model
    gc.collect()
Beispiel #12
0
     k: v
     for k, v in pretrained_dict.items()
     if np.shape(model_dict[k]) == np.shape(v)
 }
 model_dict.update(pretrained_dict)
 model.load_state_dict(model_dict)
 print('-- Loading weights finished.')
 # 2.多GPU并行
 if torch.cuda.is_available():
     model = torch.nn.DataParallel(model)
     cudnn.benchmark = True
     model = model.cuda()
 # 3.创建计算loss的类
 criterion = MultiBoxLoss()
 # 4.创建优化器
 optimizer = optim.Adam(model.parameters(), lr=args.lr)
 model.train()
 # 5.读取数据开始训练Epoch轮
 for epoch in range(args.Epoch):
     # 5.1每轮使用不同学习率
     if epoch % 10 == 0:
         adjust_learning_rate(optimizer, args.lr, 0.95, epoch)
     # 5.2创建数据加载器
     train_data = MyDataSet(args.annotation_path,
                            Config['input_size'],
                            transform=my_transform,
                            loader=default_loader)
     # 因为每张图像上的目标个数不确定,所以batch_size只能为1。DataLoader自动把np.array转换成tensor
     data_loader = torch.utils.data.DataLoader(dataset=train_data,
                                               batch_size=1,
                                               shuffle=False)
Beispiel #13
0
    for i in torch.arange(x.shape[0]):
        include = np.delete(np.arange(x.shape[0]), i)
        # p = get_class_probs(out_z[i,:], c[include,:], l[include], out_w[include], params)
        p = get_class_probs(out_z[i, :], c[include, :], l[include], None,
                            params)
        loss += loss_fn(p, l[i], params)

    print(", loss: {}".format(loss.item()))

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


if __name__ == "__main__":
    # Load the parameters from json file
    args = parser.parse_args()

    params = utils.Params("params.json")

    model = Net(params)
    optimizer = optim.Adam(model.parameters(), params.lr)

    x, t = simulate_data(params)
    data = {"x": x, "target": t}
    storage = {}

    for epoch in range(params.epochs):
        train(data, model, optimizer, storage, args, params, epoch + 1)
Beispiel #14
0
    # Set the random seed for reproducible experiments
    torch.manual_seed(230)
    if params.cuda:
        torch.cuda.manual_seed(230)

    # Set the logger
    utils.set_logger(os.path.join(args.model_dir, 'train.log'))

    # Create the input data pipeline
    logging.info("Loading the datasets...")

    # fetch dataloaders
    dataloaders = data_loader.fetch_data_loader(['train', 'val'], args.data_dir, params)
    train_dl = dataloaders['train']
    val_dl = dataloaders['val']

    logging.info("dataset loading - done.")

    # Define the model and optimizer
    model = Net().cuda() if params.cuda else Net()
    optimizer = optim.Adam(model.parameters(), lr=params.learning_rate)

    # fetch loss function and metrics
    loss_fn = loss_fn
    metrics = metrics

    # Train the model
    logging.info("Starting training for {} epoch(s)".format(params.num_epochs))
    train_and_evaluate(model, train_dl, val_dl, optimizer, loss_fn, metrics, params, args.model_dir, args.restore_file)