예제 #1
0
 def train_dataloader(self):
     return DataLoader(dataset=MyDataset(self.data_path,
                                         split='TRAIN',
                                         input_type=self.input_type,
                                         input_length=self.input_length,
                                         w2v_type=self.w2v_type,
                                         is_balanced=self.is_balanced,
                                         is_subset=self.is_subset),
                       batch_size=self.batch_size,
                       shuffle=True,
                       drop_last=False,
                       num_workers=self.num_workers)
예제 #2
0
 def test_dataloader(self):
     return DataLoader(dataset=MyDataset(self.data_path,
                                         split='TEST',
                                         input_type=self.input_type,
                                         input_length=self.input_length,
                                         num_chunk=self.num_chunk,
                                         w2v_type=self.w2v_type,
                                         is_balanced=self.is_balanced,
                                         is_subset=self.is_subset),
                       batch_size=self.batch_size // self.num_chunk,
                       shuffle=False,
                       drop_last=False,
                       num_workers=self.num_workers)
예제 #3
0
    args = configparser.ConfigParser()
    args.read('argsConfig.ini')
    log_dir = args.get('Test', 'model_log_dir')
    writer = SummaryWriter(log_dir)
    log_file = log_dir + 'log.txt'

    with open(log_file, 'a') as f:
        f.write('=' * 50)
        f.write('Testing')
        f.write('=' * 50)

    # load testing data
    print("\nLoading testing data...")
    texts, labels, number_of_classes, sample_weights = load_data(args, 'test')

    test_dataset = MyDataset(texts, labels, args)
    print("Transferring testing data to iterator...")
    testing_params = {"batch_size": args.getint('Train', 'batch_size'),
                       "shuffle": False,
                       "num_workers": args.getint('Train', 'workers'),
                       "drop_last": True}
    test_generator = DataLoader(test_dataset, **testing_params)

    print('\nNumber of testing samples: '+str(test_dataset.__len__()))
    with open(log_file, 'a') as f:
        f.write('\nNumber of testing samples: '+str(test_dataset.__len__())+'\n')

    model = CharacterLevelCNN(number_of_classes, args)

    print("=> loading weights from '{}'".format(args.get('Test', 'model_to_test')))
    #assert os.path.isfile(args.get('Test', 'model_to_test')), "=> no checkpoint found at '{}'".format(args.get('Test', 'model_to_test'))
예제 #4
0
import torchvision.transforms as transforms

from data_loader import MyDataset


# hyper-parameters
batch_size = 1
USE_CUDA = torch.cuda.is_available()


root_dir = './data/test/'
fn_list = glob.glob(root_dir + '*.csv')
fn_list.sort()
ids = [os.path.basename(fn).split('.')[0] for fn in fn_list]

my_dataset = MyDataset(root_dir=root_dir,
                       ids=ids)

# sample = my_dataset[0]
# print(sample['pts_xyz'], sample['pts_label'], sample['pts_bbox'])

# test loader
test_loader = torch.utils.data.DataLoader(my_dataset, batch_size=batch_size, shuffle=False)

# load trained model
model = torch.load('model/model_127.pkl', map_location='cpu').eval()

if USE_CUDA:
    model = model.cuda()

for i, data in enumerate(test_loader, 0):
def predict(dim,
            names,
            weight,
            batch_size,
            pretrain_model_path,
            model_types=None):

    print('-' * 100)
    print('multi-models begin predicting ...')
    print('-' * 100)

    # read test data
    test_file = '/kaggle/input/quora-question-pairs/test.csv.zip'

    # data
    test_df = pd.read_csv(test_file)
    test_ids = test_df['test_id'].values.tolist()

    result_prob_tmp = torch.zeros((len(test_ids), 2))
    # load model
    for i, name in enumerate(names):

        # 3.17 add
        weight_ = weight[i]

        #model_path = '../model/' + name + '.pkl'
        output_model_file = os.path.join('output', name + '.pkl')
        state = torch.load(output_model_file)

        # 3.10 add
        model_type = model_types[i]
        if model_type == 'mlp':
            test_iter = MyDataset(file=test_file,
                                  is_train=False,
                                  pretrain_model_path=pretrain_model_path[i])
            test_iter = get_dataloader(test_iter,
                                       batch_size,
                                       shuffle=False,
                                       drop_last=False)
            model = MyModel(dim=dim[i],
                            pretrain_model_path=pretrain_model_path[i])

        elif model_type == 'cnn':
            test_iter = MyDataset(file=test_file,
                                  is_train=False,
                                  pretrain_model_path=pretrain_model_path[i])
            test_iter = get_dataloader(test_iter,
                                       batch_size,
                                       shuffle=False,
                                       drop_last=False)
            model = MyTextCNNModel(dim=dim[i],
                                   pretrain_model_path=pretrain_model_path[i])

        elif model_type == 'rcnn':
            test_iter = MyDataset(file=test_file,
                                  is_train=False,
                                  pretrain_model_path=pretrain_model_path[i])
            test_iter = get_dataloader(test_iter,
                                       batch_size,
                                       shuffle=False,
                                       drop_last=False)
            model = MyRCNNModel(dim=dim[i],
                                pretrain_model_path=pretrain_model_path[i])

        model.to(device)
        model.load_state_dict(state['model_state'])
        model.eval()
        print('-' * 20, 'model', i, '-' * 20)
        print('load model:%s, loss:%.4f, e:%d, lr:%.7f, time:%d' %
              (name, state['loss'], state['e'], state['lr'], state['time']))
        # predict
        with torch.no_grad():
            j = 0
            for batch in tqdm(test_iter):

                batch = [b.cuda() for b in batch]
                out = model(batch, task='eval')
                out = out.cpu()  # gpu -> cpu

                if j == 0:
                    tmp = out  # 初始化 tmp
                else:
                    tmp = torch.cat([tmp, out], dim=0)  # 将之后的预测结果拼接到 tmp 中
                j += 1

        # 当前 模型预测完成
        print('model', i, 'predict finished!\n')
        # 3.17 按权重融合
        result_prob_tmp += (weight_ / len(names)) * tmp

        # 删除模型
        del model
        gc.collect()

        time.sleep(1)

    # 3.10 当前融合策略:prob 简单的取 avg
    _, result = torch.max(result_prob_tmp, dim=-1)
    result = result.numpy()

    # 3.16 update: label 0的prob 大于 3,就认为是 label=0
    #     with open('tmp.txt', 'w', encoding='utf-8') as f:
    #         for r in result_prob_tmp:
    #             f.write(str(r) + '\n')

    # save result
    df = pd.DataFrame()
    df['test_id'] = test_ids
    df['is_duplicate'] = result
    df.to_csv("submission.csv", encoding='utf-8', index=False)
예제 #6
0
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        out = net(images)
        _, preds = torch.max(out.data, 1)
        correct = preds.eq(labels.data).sum().item()
        acc += correct
        n += labels.size(0)
    return acc / n


# train_data = torchvision.datasets.ImageFolder(root='/data/datasets/mnist/train',
#  transform=transforms.Compose(
#  [transforms.Resize(227), transforms.ToTensor()]))
# 训练时间更短,不知道为何
train_data = MyDataset(txt='/data1/zj/data/mnist/train.txt',
                       data_shape=(227, 227),
                       channel=3,
                       transform=transforms.ToTensor())
train_loader = Data.DataLoader(dataset=train_data,
                               batch_size=batch_size,
                               shuffle=True,
                               num_workers=3)

test_data = MyDataset(txt='/data1/zj/data/mnist/test.txt',
                      data_shape=(227, 227),
                      channel=3,
                      transform=transforms.ToTensor())
test_loader = Data.DataLoader(dataset=test_data,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=3)
예제 #7
0
def train():
    # Model
    efficient_transformer = Linformer(
        dim=128,
        seq_len=300 + 1,  # 7x7 patches + 1 cls-token
        depth=12,
        heads=8,
        k=64)
    my_model = ViT(
        dim=128,
        image_size=320,
        patch_size=16,
        num_classes=25,
        transformer=efficient_transformer,
        channels=3,
    ).to(device)

    if os.path.exists('transformer/my_model.pt'):
        my_model.load_state_dict(torch.load('transformer/my_model.pt'))
        print('Load my_model.pt')

    batch_size = 32
    num_epoch = 100
    num_classes = 25
    learning_rate = 8e-4

    train_set = MyDataset(is_train=True, num_cat=num_classes)
    validation_set = MyDataset(is_train=False, num_cat=num_classes)

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               pin_memory=True)
    validation_loader = torch.utils.data.DataLoader(validation_set,
                                                    batch_size=32,
                                                    shuffle=True,
                                                    pin_memory=True)

    optimizer = torch.optim.Adam(my_model.parameters(), lr=learning_rate)
    loss_func = torch.nn.CrossEntropyLoss()
    scheduler = ReduceLROnPlateau(optimizer,
                                  'max',
                                  factor=0.5,
                                  patience=5,
                                  threshold=2e-1,
                                  verbose=True,
                                  min_lr=1e-5)
    bestTestAccuracy = 0

    print('Start training')
    train_size = len(train_loader.dataset)
    test_size = len(validation_loader.dataset)
    for epoch in range(num_epoch):
        total = 0
        correct = 0
        my_model.train()
        for i, data in enumerate(train_loader, 0):
            labels = data['label'].to(device)
            img = data['img'].to(device).float()
            prediction = my_model(img)

            loss = loss_func(prediction, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            _, predicted = torch.max(prediction, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            print(
                f'Train | Epoch {epoch}/{num_epoch}, Batch {i}/{int(train_size/batch_size)} '
                f' Loss: {loss.clone().item():.3f} LR: {get_lr(optimizer):.6f}'
                f' Acc: {(100 * correct / total):.3f}')

        total = 0
        correct = 0
        my_model.eval()
        for i, data in enumerate(validation_loader, 0):
            labels = data['label'].to(device)
            img = data['img'].to(device).float()
            prediction = my_model(img)

            _, predicted = torch.max(prediction, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            print(
                f'Test | Epoch {epoch}/{num_epoch}, Batch {i}/{int(test_size/batch_size)} '
                f' Loss: {loss.clone().item():.3f} LR: {get_lr(optimizer):.6f}'
                f' Acc: {(100 * correct / total):.3f} Best-so-far: {100*bestTestAccuracy:.5f}'
            )

        if (correct / total) > bestTestAccuracy:
            bestTestAccuracy = correct / total
            print(f'Update best test: {100*bestTestAccuracy:.5f}')
            torch.save(
                my_model.state_dict(),
                f"transformer/my_model_{str(round(100*bestTestAccuracy,2)).replace('.', '_')}.pt"
            )

        scheduler.step(bestTestAccuracy)
예제 #8
0
파일: train.py 프로젝트: tpvt99/nus-assign3
def train():
    my_model = Resnet(kernel_size=3,
                      filters=64,
                      inChannels=3,
                      input_shape=(3, 240, 320),
                      conv_nonlinearity='relu',
                      num_class=25)
    my_model = my_model.to(device)
    if os.path.exists('my_model.pt'):
        my_model.load_state_dict(torch.load('my_model.pt'))
        print('Load my_model.pt')
    batch_size = 32
    num_epoch = 100
    num_classes = 25
    learning_rate = 8e-4

    train_set = MyDataset(is_train=True, num_cat=num_classes)
    validation_set = MyDataset(is_train=False, num_cat=num_classes)

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               pin_memory=True)
    validation_loader = torch.utils.data.DataLoader(validation_set,
                                                    batch_size=32,
                                                    shuffle=True,
                                                    pin_memory=True)

    optimizer = torch.optim.Adam(my_model.parameters(), lr=learning_rate)
    loss_func = torch.nn.NLLLoss()
    scheduler = ReduceLROnPlateau(optimizer,
                                  'max',
                                  factor=0.5,
                                  patience=10,
                                  threshold=2e-1,
                                  verbose=True,
                                  min_lr=1e-5)
    bestTestAccuracy = 0

    print('Start training')
    train_size = len(train_loader.dataset)
    test_size = len(validation_loader.dataset)
    for epoch in range(num_epoch):
        total = 0
        correct = 0
        my_model.train()
        for i, data in enumerate(train_loader, 0):
            labels = data['label'].to(device)
            img = data['img'].to(device).float()
            prediction = my_model(img)

            loss = loss_func(prediction, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            _, predicted = torch.max(prediction, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            print(
                f'Train | Epoch {epoch}/{num_epoch}, Batch {i}/{int(train_size/batch_size)} '
                f' Loss: {loss.clone().item():.3f} LR: {get_lr(optimizer):.6f}'
                f' Acc: {(100 * correct / total):.3f}')

        total = 0
        correct = 0
        my_model.eval()
        for i, data in enumerate(validation_loader, 0):
            labels = data['label'].to(device)
            img = data['img'].to(device).float()
            prediction = my_model(img)

            _, predicted = torch.max(prediction, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            print(
                f'Test | Epoch {epoch}/{num_epoch}, Batch {i}/{int(test_size/batch_size)} '
                f' Loss: {loss.clone().item():.3f} LR: {get_lr(optimizer):.6f}'
                f' Acc: {(100 * correct / total):.3f} Best-so-far: {100*bestTestAccuracy:.5f}'
            )

        if (correct / total) > bestTestAccuracy:
            bestTestAccuracy = correct / total
            print(f'Update best test: {100*bestTestAccuracy:.5f}')
            torch.save(
                my_model.state_dict(),
                f"my_model_{str(round(100*bestTestAccuracy,2)).replace('.', '_')}.pt"
            )

        scheduler.step(bestTestAccuracy)
예제 #9
0
def train(
        batch_size=16,
        pretrain_model_path='',
        name='',
        model_type='mlp',
        after_bert_choice='last_cls',
        dim=1024,
        lr=1e-5,
        epoch=12,
        smoothing=0.05,
        sample=False,
        #open_ad='',
        dialog_name='xxx'):

    if not pretrain_model_path or not name:
        assert 1 == -1

    print('\n********** model type:', model_type, '**********')
    print('batch_size:', batch_size)

    # load dataset
    train_file = '/kaggle/input/dataset/my_train.csv'
    dev_file = '/kaggle/input/dataset/my_dev.csv'

    train_num = len(pd.read_csv(train_file).values.tolist())
    val_num = len(pd.read_csv(dev_file).values.tolist())
    print('train_num: %d, dev_num: %d' % (train_num, val_num))

    # 选择模型
    if model_type in ['siam', 'esim', 'sbert']:
        assert 1 == -1

    else:
        train_iter = MyDataset(file=train_file,
                               is_train=True,
                               sample=sample,
                               pretrain_model_path=pretrain_model_path)
        train_iter = get_dataloader(train_iter,
                                    batch_size,
                                    shuffle=True,
                                    drop_last=True)
        dev_iter = MyDataset(file=dev_file,
                             is_train=True,
                             sample=sample,
                             pretrain_model_path=pretrain_model_path)
        dev_iter = get_dataloader(dev_iter,
                                  batch_size,
                                  shuffle=False,
                                  drop_last=False)

        if model_type == 'mlp':
            model = MyModel(dim=dim,
                            pretrain_model_path=pretrain_model_path,
                            smoothing=smoothing,
                            after_bert_choice='last_cls')

        elif model_type == 'cnn':
            model = MyTextCNNModel(dim=dim,
                                   pretrain_model_path=pretrain_model_path,
                                   smoothing=smoothing)

        elif model_type == 'rcnn':
            model = MyRCNNModel(dim=dim,
                                pretrain_model_path=pretrain_model_path,
                                smoothing=smoothing)

    #模型加载到gpu
    model.to(device)
    model_param_num = 0

    ##### 3.24 muppti-gpu-training
    if n_gpu > 1:
        model = torch.nn.DataParallel(model)

    for p in model.parameters():
        if p.requires_grad:
            model_param_num += p.nelement()
    print('param_num:%d\n' % model_param_num)

    # 加入对抗训练,提升泛化能力;但是训练速度明显变慢 (插件式调用)
    # 3.12 change to FGM 更快!
    """
    if open_ad == 'fgm':
        fgm = FGM(model)
    elif open_ad == 'pgd':
        pgd = PGD(model)
        K = 3
    """
    # model-store-path
    #model_path = '/kaggle/output/' + name + '.pkl' # 输出模型默认存放在当前路径下
    output_dir = 'output'
    state = {}
    time0 = time.time()
    best_loss = 999
    early_stop = 0
    for e in range(epoch):
        print("*" * 100)
        print("Epoch:", e)
        param_optimizer = list(model.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.01
        }, {
            'params':
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            'weight_decay':
            0.0
        }]
        optimizer = BertAdam(optimizer_grouped_parameters,
                             lr=lr,
                             warmup=0.05,
                             t_total=len(train_iter))  # 设置优化器
        train_loss = 0
        train_c = 0
        train_right_num = 0

        model.train()  # 将模型设置成训练模式(Sets the module in training mode)
        print('training..., %s, e:%d, lr:%7f' % (name, e, lr))
        for batch in tqdm(train_iter):  # 每一次返回 batch_size 条数据

            optimizer.zero_grad()  # 清空梯度
            batch = [b.to(device) for b in batch]  # cpu -> GPU

            # 正常训练
            labels = batch[-1].view(-1).cpu().numpy()
            loss, bert_enc = model(batch, task='train',
                                   epoch=epoch)  # 进行前向传播,真正开始训练;计算 loss
            right_num = count_right_num(bert_enc, labels)

            # multi-gpu training!
            if n_gpu > 1:
                loss = loss.mean()

            loss.backward()  # 反向传播计算参数的梯度

            #"""
            if open_ad == 'fgm':
                # 对抗训练
                fgm.attack()  # 在embedding上添加对抗扰动

                if model_type == 'multi-task':
                    loss_adv, _, _ = model(batch, task='train')
                else:
                    loss_adv, _ = model(batch, task='train')

                if n_gpu > 1:
                    loss_adv = loss_adv.mean()

                loss_adv.backward()  # 反向传播,并在正常的grad基础上,累加对抗训练的梯度
                fgm.restore()  # 恢复embedding参数

            elif open_ad == 'pgd':
                pgd.backup_grad()
                # 对抗训练
                for t in range(K):
                    pgd.attack(is_first_attack=(
                        t == 0
                    ))  # 在embedding上添加对抗扰动, first attack时备份param.data
                    if t != K - 1:
                        optimizer.zero_grad()
                    else:
                        pgd.restore_grad()

                    if model_type == 'multi-task':
                        loss_adv, _, _ = model(batch, task='train')
                    else:
                        loss_adv, _ = model(batch, task='train')

                    if n_gpu > 1:
                        loss_adv = loss_adv.mean()

                    loss_adv.backward()  # 反向传播,并在正常的grad基础上,累加对抗训练的梯度
                pgd.restore()  # 恢复embedding参数
            #"""
            optimizer.step()  # 更新参数

            train_loss += loss.item()  # loss 求和
            train_c += 1
            train_right_num += right_num

        val_loss = 0
        val_c = 0
        val_right_num = 0

        model.eval()
        print('eval...')
        with torch.no_grad():  # 不进行梯度的反向传播
            for batch in tqdm(dev_iter):  # 每一次返回 batch_size 条数据
                batch = [b.to(device) for b in batch]

                labels = batch[-1].view(-1).cpu().numpy()
                loss, bert_enc = model(batch, task='train',
                                       epoch=epoch)  # 进行前向传播,真正开始训练;计算 loss
                right_num = count_right_num(bert_enc, labels)

                if n_gpu > 1:
                    loss = loss.mean()

                val_c += 1
                val_loss += loss.item()
                val_right_num += right_num

        train_acc = train_right_num / train_num
        val_acc = val_right_num / val_num

        print('train_acc: %.4f, val_acc: %.4f' % (train_acc, val_acc))
        print('train_loss: %.4f, val_loss: %.4f, time: %d' %
              (train_loss / train_c, val_loss / val_c, time.time() - time0))

        if val_loss / val_c < best_loss:
            early_stop = 0
            best_loss = val_loss / val_c
            best_acc = val_acc

            # 3.24 update 多卡训练时模型保存避坑:
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)

            model_to_save = model.module if hasattr(model, 'module') else model
            state['model_state'] = model_to_save.state_dict()
            state['loss'] = val_loss / val_c
            state['acc'] = val_acc
            state['e'] = e
            state['time'] = time.time() - time0
            state['lr'] = lr

            output_model_file = os.path.join(output_dir, name + '.pkl')
            torch.save(state, output_model_file)
            #torch.save(state, model_path)

            best_epoch = e
            cost_time = time.time() - time0
            tmp_train_acc = train_acc
            best_model = model

        else:
            early_stop += 1
            if early_stop == 2:
                break

            model = best_model
            lr = lr * 0.5
        print("best_loss:", best_loss)

    # 3.12 add 打印显示最终的最优结果
    print('-' * 30)
    print('best_epoch:', best_epoch, 'best_loss:', best_loss, 'best_acc:',
          best_acc, 'reach time:', cost_time, '\n')

    # model-clean
    del model
    gc.collect()

    # 实验结果写入日志
    """
예제 #10
0
def main():
    args = configparser.ConfigParser()
    args.read('argsConfig.ini')


    if args.getboolean('Log', 'flush_history') == 1:
        objects = os.listdir(args.get('Log', 'log_path'))
        for f in objects:
            if os.path.isdir(args.get('Log', 'log_path') + f):
                shutil.rmtree(args.get('Log', 'log_path') + f)

    if args.getboolean('Log', 'delete_model_name_dir'):
        objects = os.listdir(args.get('Log', 'output'))
        for f in objects:
            if f == args.get('Log', 'model_name'):
                shutil.rmtree(args.get('Log', 'output') + args.get('Log', 'model_name') + '/')

    now = datetime.now()
    logdir = args.get('Log', 'log_path') + now.strftime("%Y%m%d-%H%M%S") + "/"
    os.makedirs(logdir)
    log_file = logdir + 'log.txt'
    writer = SummaryWriter(logdir)

    texts, labels, number_of_classes, sample_weights = load_data(args, 'train')

    class_names = sorted(list(set(labels)))
    class_names = [str(class_name - 1) for class_name in class_names]

    train_texts, X_dev, train_labels, y_dev_labels, train_sample_weights, _ = train_test_split(texts,
                                                                                               labels,
                                                                                               sample_weights,
                                                                                               train_size=args.getfloat(
                                                                                                   'Train',
                                                                                                   'train_size'),
                                                                                               test_size=args.getfloat(
                                                                                                   'Train',
                                                                                                   'dev_size'),
                                                                                               random_state=42,
                                                                                               stratify=labels)

    training_set = MyDataset(train_texts, train_labels, args)
    validation_set = MyDataset(X_dev, y_dev_labels, args)

    training_params = {"batch_size": args.getint('Train', 'batch_size'),
                       "shuffle": True,
                       "num_workers": args.getint('Train', 'workers'),
                       "drop_last": True}

    validation_params = {"batch_size": args.getint('Train', 'batch_size'),
                         "shuffle": False,
                         "num_workers": args.getint('Train', 'workers'),
                         "drop_last": True}

    if args.getboolean('Train', 'use_sampler'):
        train_sample_weights = torch.from_numpy(train_sample_weights)
        sampler = WeightedRandomSampler(train_sample_weights.type(
            'torch.DoubleTensor'), len(train_sample_weights))
        training_params['sampler'] = sampler
        training_params['shuffle'] = False

    training_generator = DataLoader(training_set, **training_params)
    validation_generator = DataLoader(validation_set, **validation_params)

    model = CharacterLevelCNN(number_of_classes, args)

    if args.getboolean('Model', 'visualize_model_graph'):
        x = torch.zeros((args.getint('Train', 'batch_size'),
                       args.getint('DataSet', 'char_num'),
                       args.getint('DataSet', 'l0')))
        out = model(x)
        make_dot(out).render("CharacterLevelCNN", format="png", quiet_view=True)

    if torch.cuda.is_available():
        model.cuda()

    # todo check other other loss functions for binary and multi-label problems
    if args.get('Train', 'criterion') == 'nllloss':
        criterion = nn.NLLLoss()

    # criterion = nn.BCELoss()

    # optimization scheme
    if args.get('Train', 'optimizer') == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=args.getfloat('Train', 'lr'))
    elif args.get('Train', 'optimizer') == 'SGD':
        if args.get('Train', 'scheduler') == 'clr':
            optimizer = torch.optim.SGD(
                model.parameters(), lr=1, momentum=0.9, weight_decay=0.00001
            )
        else:
            optimizer = optim.SGD(model.parameters(), lr=args.getfloat('Train', 'lr'), momentum=0.9)
    elif args.get('Train', 'optimizer') == 'ASGD':
        optimizer = optim.ASGD(model.parameters(), lr=args.getfloat('Train', 'lr'))

    if os.path.isfile(args.get('Log', 'continue_from_model_checkpoint')):
        print("=> loading checkpoint from '{}'".format(args.get('Log', 'continue_from_model_checkpoint')))
        checkpoint = torch.load(args.get('Log', 'continue_from_model_checkpoint'))
        start_epoch = checkpoint['epoch']
        start_iter = checkpoint.get('iter', None)
        best_f1 = checkpoint.get('best_f1', None)
        if start_iter is None:
            start_epoch += 1  # Assume that we saved a model after an epoch finished, so start at the next epoch.
            start_iter = 0
        else:
            start_iter += 1
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
    else:
        start_iter = 0
        start_epoch = 0
        best_f1 = 0
        best_epoch = 0

    if args.get('Train', 'scheduler') == 'clr':
        stepsize = int(args.getint('Train', 'clr_step_size') * len(training_generator))
        clr = utils.cyclical_lr(stepsize, args.getfloat('Train', 'clr_min_lr'), args.getfloat('Train', 'clr_max_lr'))
        scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [clr])
    else:
        scheduler = None
        lr_half_cnt = 0

    utils.init_log(log_file=log_file, args=args, labels=class_names)
    try:
        for epoch in range(start_epoch, args.getint('Train', 'epochs')):

            training_loss, training_accuracy, train_f1 = train(model,
                                                               training_generator,
                                                               optimizer,
                                                               criterion,
                                                               epoch,
                                                               start_iter,
                                                               writer,
                                                               log_file,
                                                               scheduler,
                                                               class_names,
                                                               args,
                                                               args.getint('Log', 'print_out_every'))

            validation_loss, validation_accuracy, validation_f1 = evaluate(model,
                                                                           validation_generator,
                                                                           criterion,
                                                                           epoch,
                                                                           writer,
                                                                           log_file)


            print('\n[Epoch: {} / {}]\ttrain_loss: {:.4f} \ttrain_acc: {:.4f} \tval_loss: {:.4f} \tval_acc: {:.4f}'.
                  format(epoch + 1, args.getint('Train', 'epochs'), training_loss, training_accuracy, validation_loss,
                         validation_accuracy))
            print("=" * 50)

            with open(log_file, 'a') as f:
                f.write('[Epoch: {} / {}]\ttrain_loss: {:.4f} \ttrain_acc: {:.4f} \tval_loss: {:.4f} \tval_acc: {:.4f}\n'.
                  format(epoch + 1, args.getint('Train', 'epochs'), training_loss, training_accuracy, validation_loss,
                         validation_accuracy))
                f.write('=' * 50)

            # learning rate scheduling
            if args.get('Train', 'scheduler') == 'step':
                if args.get('Train', 'optimizer') == 'SGD' and ((epoch + 1) % 3 == 0) and lr_half_cnt < 10:
                    current_lr = optimizer.state_dict()['param_groups'][0]['lr']
                    current_lr /= 2
                    lr_half_cnt += 1
                    print('Decreasing learning rate to {0}'.format(current_lr))
                    with open(log_file, 'a') as f:
                        f.write('Decreasing learning rate to {0}\n'.format(current_lr))
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = current_lr

            if args.getboolean('Log', 'checkpoint'):

                state = {'epoch': epoch, 'optimizer': optimizer.state_dict(), 'best_f1': best_f1}

                if args.getint('Log', 'save_interval') > 0 and epoch % args.getint('Log', 'save_interval') == 0:
                    save_checkpoint(model, state, optimizer, args, epoch, validation_loss, validation_accuracy,
                                    validation_f1)

                if validation_f1 > best_f1:
                    best_f1 = validation_f1
                    best_epoch = epoch
                    save_checkpoint(model, state, optimizer, args, epoch, validation_loss, validation_accuracy,
                                    validation_f1)

            if args.getboolean('Train', 'early_stopping'):
                if epoch - best_epoch > args.getint('Train', 'patience') > 0:
                    print("Early-stopping: Stop training at epoch {}. The lowest loss achieved is {} at epoch {}".format(
                        epoch, validation_loss, best_epoch))
                    break
    except KeyboardInterrupt:
        print('Exit Keyboard interrupt\n')
        save_checkpoint(model, state, optimizer, args, epoch, validation_loss, validation_accuracy, validation_f1)
예제 #11
0
파일: train.py 프로젝트: luhavefun/segment
def train(opts):
    # device = torch.device('cpu') if not torch.cuda.is_available or opts.cpu else torch.device('cuda')
    device = torch.device("cuda")
    print(device)
    # load dataset

    dataset_train = MyDataset('train.txt')
    dataset_test = MyDataset('test.txt')
    # define training and validation data loaders
    data_loader_train = torch.utils.data.DataLoader(dataset_train,
                                                    batch_size=opts.batch_size,
                                                    shuffle=True,
                                                    num_workers=1)

    data_loader_test = torch.utils.data.DataLoader(dataset_test,
                                                   batch_size=opts.batch_size,
                                                   shuffle=False,
                                                   num_workers=1)

    model = Net()
    # model = nn.DataParallel(model)
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=opts.lr,
                                 betas=(0.9, 0.99))
    # optimizer = torch.optim.Adamax(model.parameters(),lr=opts.lr,betas=(0.9,0.999),eps=1e-8,weight_decay=0.1)
    # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
    #                                                step_size=50,
    #                                                gamma=0.1)
    # lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,milestones=[50,100,150,200,250,300],gamma=0.1)
    weights = torch.FloatTensor([6, 2, 5, 1]).to(device)
    loss_fct = MSELoss()  #
    print("Model's state_dict:")
    for param_tensor in model.state_dict():
        print(param_tensor, "\t", model.state_dict()[param_tensor].size())

    # Print optimizer's state_dict
    print("Optimizer's state_dict:")
    for var_name in optimizer.state_dict():
        print(var_name, "\t", optimizer.state_dict()[var_name])
    train_loss_list = []
    train_acc_list = []
    test_loss_list = []
    test_acc_list = []
    # writer = SummaryWriter(log_dir='')
    for epoch in range(opts.epochs):
        train_batch_num = 0
        train_loss = 0.0
        model.train()
        counts = 0
        for seq, label in data_loader_train:
            seq = seq.to(device)
            label = label.to(device)
            seq = seq.unsqueeze(1)
            optimizer.zero_grad()
            pred = model(seq)
            loss = loss_fct(pred, label.view(-1))
            loss.backward()
            optimizer.step()
            train_batch_num += 1
            train_loss += loss.item()
            predict = pred.argmax(dim=1, keepdims=True)
            counts += predict.cpu().eq(
                label.cpu().view_as(predict)).sum().item()
        avg_acc = counts * 1.0 / len(data_loader_train.dataset)
        train_loss_list.append(train_loss / len(data_loader_train.dataset))
        train_acc_list.append(avg_acc)
        # writer.add_graph(model, seq)
        # write csv file
        train_loss_dataframe = pd.DataFrame(data=train_loss_list)
        train_acc_dataframe = pd.DataFrame(data=train_acc_list)
        train_loss_dataframe.to_csv('./output_results/train_loss.csv',
                                    index=False)
        train_acc_dataframe.to_csv('./output_results/train_accuracy.csv',
                                   index=False)

        model.eval()
        # for name,layer in model._modules.items():
        #     # view feature map
        #     seq_1 = seq.transpose(0,1)
        #     seq_grid = vutils.make_grid(seq_1,normalize=True,scale_each=True)
        #     writer.add_image(f'{name}_feature_maps',seq_grid,global_step=0)

        test_y = []
        test_y_pred = []
        counts = 0
        test_loss = 0
        test_batch_num = 0
        outs = []
        labels = []
        with torch.no_grad():
            for test_seq, test_label in data_loader_test:
                test_seq = test_seq.to(device)
                test_label = test_label.to(device)
                test_seq = test_seq.unsqueeze(1)
                t_pred = model(test_seq)
                outs.append(t_pred.cpu())
                labels.append(test_label.cpu())
                # accuracy
                loss = loss_fct(t_pred, test_label.view(-1))
                test_loss += loss.item()
                test_batch_num += 1
                test_y += list(test_label.data.cpu().numpy().flatten())
                test_y_pred += list(t_pred.data.cpu().numpy().flatten())
                predict = t_pred.argmax(dim=1, keepdims=True)
                counts += predict.cpu().eq(
                    test_label.cpu().view_as(predict)).sum().item()

        outs = torch.cat(outs, dim=0)
        labels = torch.cat(labels).reshape(-1)
        avg_acc = counts * 1.0 / len(data_loader_test.dataset)
        test_acc_list.append(avg_acc)
        test_loss_list.append(test_loss / len(data_loader_test.dataset))
        print(
            'epoch: %d, train loss: %.4f, test loss: %.4f,test accuracy: %.4f'
            % (epoch, train_loss / train_batch_num, test_loss / test_batch_num,
               avg_acc))
        # writer.add_scalar('scalar/train_loss', train_loss / train_batch_num, epoch)
        # writer.add_scalar('scalar/test_loss', test_loss / test_batch_num, epoch)
        # write csv file
        test_loss_dataframe = pd.DataFrame(data=test_loss_list)
        test_acc_dataframe = pd.DataFrame(data=test_acc_list)
        test_loss_dataframe.to_csv('./output_results/test_loss.csv',
                                   index=False)
        test_acc_dataframe.to_csv('./output_results/test_accuracy.csv',
                                  index=False)
    # writer.close()

    draw_test_info(test_loss_list, test_acc_list)
    draw_train_info(train_loss_list, train_acc_list)
    draw_roc_confusion(outs, labels)
예제 #12
0
파일: train.py 프로젝트: jehovahxu/AdaIN
decoder = net.decoder
vgg = net.vgg

vgg.load_state_dict(torch.load(args.vgg))
vgg = nn.Sequential(*list(vgg.children())[:31])
network = net.Net(vgg, decoder)
network.train()
network.to(device)

content_tf = train_transform()
style_tf = train_transform()

# content_dataset = FlatFolderDataset(args.content_dir, content_tf)
# style_dataset = FlatFolderDataset(args.style_dir, style_tf)
content_dataset = MyDataset(dataroot=args.dataroot, datalist='files/list_train.txt', is_content=True)
style_dataset = MyDataset(dataroot=args.dataroot, datalist='files/list_train.txt', is_content=False)

content_iter = iter(data.DataLoader(
    content_dataset, batch_size=args.batch_size,
    sampler=InfiniteSamplerWrapper(content_dataset),
    num_workers=args.n_threads))
style_iter = iter(data.DataLoader(
    style_dataset, batch_size=args.batch_size,
    sampler=InfiniteSamplerWrapper(style_dataset),
    num_workers=args.n_threads))

optimizer = torch.optim.Adam(network.decoder.parameters(), lr=args.lr)

for i in tqdm(range(args.max_iter)):
    adjust_learning_rate(optimizer, iteration_count=i)