Example #1
0
def train(args):
    assert args.num_classes
    common.make_dir(args.checkout_dir)
    nnet = DNN((args.left_context + args.right_context + 1) * args.feat_dim, hidden_layer, \
            hidden_size, args.num_classes, dropout=dropout)
    print(nnet)
    nnet.cuda()

    criterion = nn.CrossEntropyLoss()
    optimizer = th.optim.Adam(nnet.parameters(), lr=args.learning_rate)

    train_dataset = THCHS30(root=args.data_dir, data_type='train', left_context=left_context,
            right_context=right_context, model_type='dnn')
    train_loader  = data.DataLoader(dataset=train_dataset, batch_size=args.min_batch,
                                    shuffle=True, num_workers=6)

    test_dataset = THCHS30(root=args.data_dir, data_type='test', left_context=left_context,
            right_context=right_context, model_type='dnn')
    test_loader  = data.DataLoader(dataset=test_dataset, batch_size=args.min_batch,
                                    shuffle=True, num_workers=6)

    cross_validate(-1, nnet, test_loader, test_dataset.num_frames)    
    for epoch in range(args.num_epochs):
        common.train_one_epoch(nnet, criterion, optimizer, train_loader)
        cross_validate(epoch, nnet, test_loader, test_dataset.num_frames)    
        th.save(nnet, common.join_path(args.checkout_dir, 'dnn.{}.pkl'.format(epoch + 1)))
Example #2
0
def run():
    df = pd.read_csv(config.TRAIN_PATH)
    kfold = KFold(n_splits=5, random_state=config.SEED, shuffle=True)
    fold_losses = []

    for i, (train_idx, val_idx) in enumerate(kfold.split(df)):
        print("-------------------------------------------------------")
        print(f"Training fold {i}")
        print("-------------------------------------------------------")
        train = df.iloc[train_idx]
        validation = df.iloc[val_idx]
        train_dataset = PicDataset(train)
        train_data_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=config.BATCH_SIZE
        )

        val_dataset = PicDataset(validation)
        val_data_loader = torch.utils.data.DataLoader(
            val_dataset,
            batch_size=config.BATCH_SIZE
        )

        device = 'cuda:0' if torch.cuda.is_available() else "cpu"
        model = DNN()
        model.to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=config.LR)
        loss = 0

        for _ in range(config.EPOCHS):
            engine.train_fn(train_data_loader, model, optimizer, device)
            loss = engine.eval_fn(val_data_loader, model, device)
        print(f"Loss on fold {i} is {loss}")
        fold_losses.append(loss)
        torch.save(model.state_dict(), f'./models/model_{i}.bin')

    print(f"Average loss on cross validation is {sum(fold_losses) / 5}")
Example #3
0
    weight = torch.FloatTensor(list(weight.values())).to(device)

data = sorted(corpus.examples.get('seq'), key=lambda x: len(x), reverse=True)

vocab_size = len(corpus.words2id)
logging.info('vocabulary size: {}'.format(vocab_size))
model = DNN(vocab_size=vocab_size,
            embedding_size=200,
            hidden_size=512,
            embedding=embedding)

model.to(device)

loss_function = nn.CrossEntropyLoss(weight=weight)

optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

model.train()

total_data = len(data)
batch_size = args['batch_size']
total_step = math.ceil(total_data / batch_size)
last_training_loss = 1000000000000
for epoch in range(args.get('epoch')):
    start = 0
    training_loss = 0
    for _ in tqdm(range(int(total_step)), total=total_step):
        batch = data[start:start + batch_size]
        start += batch_size
Example #4
0
def train(args, config, io):
    train_loader, validation_loader = get_loader(args, config)
    device = torch.device("cuda" if args.cuda else "cpu")
    # print(len(train_loader), len(validation_loader))

    #Try to load models
    model = DNN(args).to(device)
    """if device == torch.device("cuda"):
        model = nn.DataParallel(model)"""
    if args.model_path != "":
        model.load_state_dict(torch.load(args.model_path))

    # for para in list(model.parameters())[:-5]:
    #     para.requires_grad=False
    # print(model)

    if args.use_sgd:
        # print("Use SGD")
        opt = optim.SGD(model.parameters(),
                        lr=args.lr * 100,
                        momentum=args.momentum,
                        weight_decay=1e-4)
    else:
        # print("Use Adam")
        opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4)
        """opt = optim.Adam([
        {'params': list(model.parameters())[:-1], 'lr':args.lr/50, 'weight_decay': 1e-4},
        {'params': list(model.parameters())[-1], 'lr':args.lr, 'weight_decay': 1e-4}
        ])
        """

    scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr)

    criterion = nn.MSELoss()

    best_test_loss = 9999999.
    for epoch in range(args.epochs):
        startTime = time.time()

        ####################
        # Train
        ####################
        train_loss = 0.0
        train_dis = 0.0
        count = 0.0
        model.train()
        for data, label in train_loader:
            data, label = data.to(device), label.to(device)
            data = drop(jitter(data, device), device)
            # data = jitter(data, device, delta=0.05)
            batch_size = data.shape[0]
            logits = model(data)
            loss = criterion(logits, label)
            opt.zero_grad()
            loss.backward()
            opt.step()
            dis = distance(logits, label)
            count += batch_size
            train_loss += loss.item() * batch_size
            train_dis += dis.item() * batch_size
        scheduler.step()
        outstr = 'Train %d, loss: %.6f, distance: %.6f' % (
            epoch, train_loss * 1.0 / count, train_dis * 1.0 / count)
        io.cprint(outstr)

        ####################
        # Evaluation
        ####################
        test_loss = 0.0
        test_dis = 0.0
        count = 0.0
        model.eval()
        with torch.no_grad():
            for data, label in validation_loader:
                data, label = data.to(device), label.to(device)
                batch_size = data.shape[0]
                logits = model(data)
                loss = criterion(logits, label)
                dis = distance(logits, label)
                count += batch_size
                test_loss += loss.item() * batch_size
                test_dis += dis.item() * batch_size
        outstr = 'Test %d, loss: %.6f, distance: %.6f' % (
            epoch, test_loss * 1.0 / count, test_dis * 1.0 / count)
        io.cprint(outstr)
        if test_loss <= best_test_loss:
            best_test_loss = test_loss
            torch.save(model.state_dict(),
                       'checkpoints/%s/models/model.t7' % args.exp_name)
            torch.save(model, (config.root + config.model_path))
        io.cprint('Time: %.3f sec' % (time.time() - startTime))
Example #5
0
        pass
    try:
        os.makedirs(args.log)
    except OSError:
        pass

    # 加载训练数据和测试数据
    train_loader, test_loader = get_data(args)

    # 如果有cuda就用cuda
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    input_size = 28 * 28
    output_size = args.num_classes
    model = DNN(input_size=input_size, output_size=output_size).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # 每训练完一个mini-batch就计算一次loss,acc
    model.train()
    for epoch in range(args.epochs):
        correct = 0
        total = 0
        for idx, (x, y) in enumerate(train_loader):
            x, y = x.to(device), y.to(device)
            y_pred = model(x)
            _, y_pred_t = torch.max(y_pred.data, 1)
            total += y.size(0)
            # print(y_pred_t, y.data)
            # print((y_pred_t == y).sum().item())
Example #6
0
def train():
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    data_dict, topic_dict = dh.load_data(
    )  # data_dict, [group2topic, mem2topic]

    train_data, train_label, dev_data, dev_label, test_data, test_label = dh.data_split(
        data_dict, topic_dict)
    train_dataset = dh.Dataset(train_data, train_label)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True)
    dev_dataset = dh.Dataset(dev_data, dev_label)
    dev_loader = DataLoader(dev_dataset, batch_size=128, shuffle=True)

    lambda1 = lambda epoch: (
        epoch / args.warm_up_step
    ) if epoch < args.warm_up_step else 0.5 * (math.cos(
        (epoch - args.warm_up_step) /
        (args.n_epoch * len(train_dataset) - args.warm_up_step) * math.pi) + 1)

    model = DNN(args).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.init_lr)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer,
        len(train_loader) * args.n_epoch)

    global_step = 0
    best_f1 = 0.
    loss_deq = collections.deque([], args.report_step)
    for epoch in range(args.n_epoch):
        for batch in tqdm(train_loader):
            optimizer.zero_grad()
            inputs = batch['input'].to(device)
            group_topic = batch['group_topic'].to(device)
            mem_topic = batch['mem_topic'].to(device)
            labels = batch['label'].to(device)
            output = model(inputs, mem_topic, group_topic, label=labels)
            loss = output[0]
            loss.backward()
            loss_deq.append(loss.item())
            optimizer.step()
            scheduler.step()
            global_step += 1

            if global_step % args.report_step == 0:
                logger.info('loss: {}, lr: {}, epoch: {}'.format(
                    np.average(loss_deq).item(),
                    optimizer.param_groups[0]['lr'],
                    global_step / len(train_dataset)))
            if global_step % args.eval_step == 0:
                model.eval()
                eval_result = evaluation(model,
                                         data_loader=dev_loader,
                                         device=device)
                logger.info(eval_result)
                if eval_result['f1'] > best_f1:
                    torch.save(model,
                               './model/{}/torch.pt'.format(args.task_name))
                    best_f1 = eval_result['f1']
                model.train()
Example #7
0
        for w in st:
            tmp.add(w)
    word_index = {w: i for i, w in enumerate(tmp)}
    #import pickle
    #with open("bow.pkl", "wb") as f:
    #    pickle.dump(word_index, f)

    x = torch.zeros(len(train_x1), len(word_index))
    for i in range(len(train_x1)):
        for w in train_x1[i]:
            x[i][word_index[w]] += 1
    print(x.size())

    print("\nConstructing model...", flush=True)
    model = DNN(x.size(1)).to(device)
    total_param = sum(p.numel() for p in model.parameters())
    trainable_param = sum(p.numel() for p in model.parameters()
                          if p.requires_grad)
    print("{} parameters with {} trainable".format(total_param,
                                                   trainable_param),
          flush=True)

    print("\nStart training...", flush=True)
    train_dataset1 = TwitterDataset(x, train_y1)
    train_loader1 = torch.utils.data.DataLoader(dataset=train_dataset1,
                                                batch_size=BATCH,
                                                shuffle=True,
                                                num_workers=4)
    train_model(train_loader1, model, device, LR)

    print("\nStart testing...", flush=True)
def train(args, config, io):
    train_loader, validation_loader, unlabelled_loader = get_loader(
        args, config)

    device = torch.device("cuda" if args.cuda else "cpu")

    #Try to load models
    model = DNN(args).to(device)
    ema_model = DNN(args).to(device)
    for param in ema_model.parameters():
        param.detach_()
    if device == torch.device("cuda"):
        model = nn.DataParallel(model)
        ema_model = nn.DataParallel(ema_model)
    if args.model_path != "":
        model.load_state_dict(torch.load(args.model_path))
        ema_model.load_state_dict(torch.load(args.model_path))

    if args.use_sgd:
        print("Use SGD")
        opt = optim.SGD(model.parameters(),
                        lr=args.lr * 100,
                        momentum=args.momentum,
                        weight_decay=1e-4)
    else:
        print("Use Adam")
        opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4)

    scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr)

    criterion = nn.MSELoss()
    consistency_criterion = nn.MSELoss()

    best_test_loss = 9999999.
    global_step = 0
    for epoch in range(args.epochs):
        startTime = time.time()

        ####################
        # Train
        ####################
        train_loss = 0.0
        count = 0.0
        model.train()
        ema_model.train()
        i = -1
        for (data, label), (u, _) in zip(cycle(train_loader),
                                         unlabelled_loader):
            i = i + 1
            if data.shape[0] != u.shape[0]:
                bt_size = np.minimum(data.shape[0], u.shape[0])
                data = data[0:bt_size]
                label = label[0:bt_size]
                u = u[0:bt_size]
            data, label, u = data.to(device), label.to(device), u.to(device)
            batch_size = data.shape[0]
            logits = model(data)
            class_loss = criterion(logits, label)

            u_student = jitter(u, device)
            u_teacher = jitter(u, device)
            logits_unlabeled = model(u_student)
            ema_logits_unlabeled = ema_model(u_teacher)
            ema_logits_unlabeled = Variable(ema_logits_unlabeled.detach().data,
                                            requires_grad=False)
            consistency_loss = consistency_criterion(logits_unlabeled,
                                                     ema_logits_unlabeled)
            if epoch < args.consistency_rampup_starts:
                consistency_weight = 0.0
            else:
                consistency_weight = get_current_consistency_weight(
                    args, args.final_consistency, epoch, i,
                    len(unlabelled_loader))

            consistency_loss = consistency_weight * consistency_loss
            loss = class_loss + consistency_loss

            opt.zero_grad()
            loss.backward()
            opt.step()

            global_step += 1
            # print(global_step)
            update_ema_variables(model, ema_model, args.ema_decay, global_step)

            count += batch_size
            train_loss += loss.item() * batch_size
        scheduler.step()
        outstr = 'Train %d, loss: %.6f' % (epoch, train_loss * 1.0 / count)
        io.cprint(outstr)

        ####################
        # Evaluation
        ####################
        test_loss = 0.0
        count = 0.0
        model.eval()
        ema_model.eval()
        for data, label in validation_loader:
            data, label = data.to(device), label.to(device)
            batch_size = data.shape[0]
            logits = ema_model(data)
            loss = criterion(logits, label)
            count += batch_size
            test_loss += loss.item() * batch_size
        outstr = 'Test %d, loss: %.6f' % (epoch, test_loss * 1.0 / count)
        io.cprint(outstr)
        if test_loss <= best_test_loss:
            best_test_loss = test_loss
            torch.save(ema_model.state_dict(),
                       'checkpoints/%s/models/model.t7' % args.exp_name)
            torch.save(ema_model, (config.root + config.model_path))
        io.cprint('Time: %.3f sec' % (time.time() - startTime))
Example #9
0
def main():
    print('> Starting execution...')

    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    group = parser.add_mutually_exclusive_group()
    group.add_argument('--fit',
                       action='store_true',
                       help='fit the tuned model on digits 0-4')
    group.add_argument('--transfer',
                       action='store_true',
                       help='train a pretrained model on digits 5-9')

    parser.add_argument('--batch-size',
                        type=int,
                        default=256,
                        metavar='N',
                        help='input batch size for training (default: 256)')
    parser.add_argument('--epochs',
                        type=int,
                        default=50,
                        metavar='E',
                        help='number of epochs to train (default: 50)')
    parser.add_argument('--lr',
                        type=float,
                        default=1e-3,
                        metavar='L',
                        help='learning rate (default: 1e-3)')
    parser.add_argument('--early-stopping',
                        type=int,
                        default=7,
                        metavar='E',
                        help='early stopping (default: 7 epochs)')
    parser.add_argument(
        '--size',
        type=int,
        default=100,
        metavar='S',
        help='size of the training data for transfer learning (default: 100)')

    parser.add_argument('--seed',
                        type=int,
                        default=23,
                        metavar='S',
                        help='random seed (default: 23)')

    args = parser.parse_args()

    use_cuda = torch.cuda.is_available()  # use cuda if available
    device = torch.device("cuda" if use_cuda else "cpu")
    torch.manual_seed(args.seed)  # random seed

    print('> Loading MNIST data')
    train_set = datasets.MNIST(MNIST_DATA_DIR,
                               train=True,
                               download=True,
                               transform=transforms.Compose([
                                   transforms.ToTensor(),
                                   transforms.Normalize((0.1307, ), (0.3081, ))
                               ]))

    test_set = datasets.MNIST(MNIST_DATA_DIR,
                              train=False,
                              download=True,
                              transform=transforms.Compose([
                                  transforms.ToTensor(),
                                  transforms.Normalize((0.1307, ), (0.3081, ))
                              ]))

    train_digits_04 = np.where(train_set.train_labels < 5)[0]
    train_digits_59 = np.where(train_set.train_labels > 4)[0]

    test_digits_04 = np.where(test_set.test_labels < 5)[0]
    test_digits_59 = np.where(test_set.test_labels > 4)[0]

    if args.fit:
        # Training the tuned model on digits 0-4
        print('> Training a new model on MNIST digits 0-4')

        X_train_04, y_train_04, X_valid_04, y_valid_04 = data_to_numpy(
            train_set, test_set, INPUT_DIM, train_digits_04, test_digits_04)

        torch.manual_seed(args.seed)

        print('> Initializing the model')

        model = DNN(INPUT_DIM, OUTPUT_DIM, HIDDEN_DIM, batch_norm=True)
        model.apply(init_he_normal)  # He initialization

        model = model.to(device)

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=args.lr)

        print('> Training the model')
        model, _, _ = train_model(model,
                                  device,
                                  X_train_04,
                                  y_train_04,
                                  criterion,
                                  optimizer,
                                  X_valid=X_valid_04,
                                  y_valid=y_valid_04,
                                  batch_size=args.batch_size,
                                  n_epochs=args.epochs,
                                  early_stopping=args.early_stopping)

        print(f'> Saving the model state at {MODEL_04_PATH}')
        torch.save(model.state_dict(), MODEL_04_PATH)
    elif args.transfer:
        # Transfer learning
        print(
            '> Training a model on MNIST digits 5-9 from a pretrained model for digits 0-4'
        )

        if os.path.isfile(MODEL_04_PATH):
            print('> Loading the pretrained model')

            model = DNN(INPUT_DIM, OUTPUT_DIM, HIDDEN_DIM,
                        batch_norm=True).to(device)
            model.load_state_dict(torch.load(MODEL_04_PATH))

            for param in model.parameters():
                param.requires_grad = False

            # Parameters of newly constructed modules have requires_grad=True by default
            model.fc4 = nn.Linear(HIDDEN_DIM, HIDDEN_DIM)
            model.fc5 = nn.Linear(HIDDEN_DIM, HIDDEN_DIM)
            model.out = nn.Linear(HIDDEN_DIM, OUTPUT_DIM)

            print('> Using saved model state')
        else:
            print(
                '> Model state file is not found, fit a model before the transfer learning'
            )
            print('> Stopping execution')
            return

        X_train_59, y_train_59, X_valid_59, y_valid_59 = data_to_numpy(
            train_set, test_set, INPUT_DIM, train_digits_59[:args.size],
            test_digits_59)

        # fixing the issues with labels
        y_train_59 = y_train_59 - 5
        y_valid_59 = y_valid_59 - 5

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=args.lr)

        print('> Training the model')
        model, _, _ = train_model(model,
                                  device,
                                  X_train_59,
                                  y_train_59,
                                  criterion,
                                  optimizer,
                                  X_valid=X_valid_59,
                                  y_valid=y_valid_59,
                                  batch_size=args.batch_size,
                                  n_epochs=args.epochs,
                                  early_stopping=args.early_stopping)

        print(f'> Saving the model state at {MODEL_59_PATH}')
        torch.save(model.state_dict(), MODEL_59_PATH)
    else:
        print('> Incorrect mode, try either `--fit` or `--transfer`')
        print('> Stopping execution')
    model = DNN(input_size, hidden_size, out_size)
    model = torch.nn.DataParallel(model.to(device), device_ids=use_devices)  # use GPU
    print(model)
    # load data
    print('loading data...')
    sample_generator = AudioSampleGenerator(os.path.join(in_path, ser_data_fdr))
    random_data_loader = DataLoader(
        dataset=sample_generator,
        batch_size=batch_size,  # specified batch size here
        shuffle=True,
        num_workers=1,
        drop_last=True,  # drop the last batch that cannot be divided by batch_size
        pin_memory=True)
    print('DataLoader created')
    #optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.5, 0.999))
    optimizer = optim.Adam(model.parameters(), lr=1e-4, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
    # create tensorboard writer
    # The logs will be stored NOT under the run_time, but under segan_data_out/'tblog_fdr'.
    # This way, tensorboard can show graphs for each experiment in one board
    tbwriter = SummaryWriter(log_dir=tblog_path)
    print('TensorboardX summary writer created')

    print('Starting Training...')
    total_steps = 1
    MSE = nn.MSELoss()
    
    scaler_path_input = os.path.join(scaler_dir, "scaler_input.p")
    scaler_input = pickle.load(open(scaler_path_input, 'rb'))
    scaler_path_label = os.path.join(scaler_dir, "scaler_label.p")
    scaler_label = pickle.load(open(scaler_path_label, 'rb'))
    for epoch in range(num_epochs):