x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output


net = Net()
print(net)

# %%
criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(net.parameters(), lr=0.01)
optimizer = optim.Adadelta(net.parameters(), lr=1.0)

# %%
# エポック数
num_epochs = 30

for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch + 1, num_epochs))
    print('-------------')

    for phase in ['train', 'valid']:

        if phase == 'train':
            # モデルを訓練モードに設定
            net.train()
        else:
예제 #2
0
    criterion = criterion.cuda()

image = Variable(image)
text = Variable(text)
length = Variable(length)

# loss averager
loss_avg = utils.averager()

# setup optimizer
if opt.adam:
    optimizer = optim.Adam(crnn.parameters(),
                           lr=opt.lr,
                           betas=(opt.beta1, 0.999))
elif opt.adadelta:
    optimizer = optim.Adadelta(crnn.parameters())
else:
    optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr)


def val(net, dataset, criterion, max_iter=100):
    print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False

    net.eval()
    data_loader = torch.utils.data.DataLoader(dataset,
                                              shuffle=True,
                                              batch_size=opt.batchSize,
                                              num_workers=int(opt.workers))
예제 #3
0
    def __init__(self, opt):
        super(AdvisorAgent, self).__init__()
        self.model_name = opt['model_name']
        self.evaluate_every = opt['evaluate_every_steps']
        if self.model_name == 'advisor':
            Module = Advisor
        elif self.model_name == 'hred_db':
            Module = HRED_DB
        elif self.model_name == 'hred_db0':
            Module = HRED_DB0
        elif self.model_name == 'hred':
            Module = HRED
        else:
            Module = BiLSTM

        opt['cuda'] = not opt['no_cuda'] and torch.cuda.is_available()
        if opt['cuda']:
            print('[ Using CUDA ]')
            torch.cuda.device(opt['gpu'])
            # torch.cuda.device([0, 1])

            # It enables benchmark mode in cudnn, which
            # leads to faster runtime when the input sizes do not vary.
            cudnn.benchmark = True

        self.opt = opt
        if opt['pre_word2vec']:
            pre_w2v = load_ndarray(opt['pre_word2vec'])
        else:
            pre_w2v = None

        self.evaluator = Evaluator(CrossEntropyLoss(),
                                   batch_size=opt['batch_size'],
                                   use_cuda=opt['cuda'],
                                   model_name=self.model_name)
        self.score_type = 'clf'

        self.model = Module(opt['vocab_size'], \
                opt['word_emb_size'], \
                opt['hidden_size'], \
                init_w2v=pre_w2v, \
                enc_type=opt['enc_type'], \
                rnn_type=opt['rnn_type'], \
                bidirectional=not opt['no_bidirectional'], \
                utter_enc_dropout=opt['utter_enc_dropout'], \
                knowledge_enc_dropout=opt['knowledge_enc_dropout'], \
                atten_type=opt['atten_type'], \
                score_type=self.score_type, \
                use_cuda=opt['cuda']#, \
                # phase=opt['phase']
                )

        if opt['cuda']:
            self.model.cuda()
            # self.model = torch.nn.DataParallel(self.model, device_ids=[0, 1])

        if self.score_type == 'ranking':
            # MultiLabelMarginLoss
            # For each sample in the mini-batch:
            # loss(x, y) = sum_ij(max(0, 1 - (x[y[j]] - x[i]))) / x.size(0)
            self.loss_fn = MultiLabelMarginLoss()
        else:
            self.loss_fn = CrossEntropyLoss()

        optim_params = [p for p in self.model.parameters() if p.requires_grad]
        lr = opt['learning_rate']
        if opt['optimizer'] == 'sgd':
            self.optimizers = {self.model_name: optim.SGD(optim_params, lr=lr)}
        elif opt['optimizer'] == 'adam':
            self.optimizers = {
                self.model_name: optim.Adam(optim_params, lr=lr)
            }
        elif opt['optimizer'] == 'adadelta':
            self.optimizers = {
                self.model_name: optim.Adadelta(optim_params, lr=lr)
            }
        elif opt['optimizer'] == 'adagrad':
            self.optimizers = {
                self.model_name: optim.Adagrad(optim_params, lr=lr)
            }
        elif opt['optimizer'] == 'adamax':
            self.optimizers = {
                self.model_name: optim.Adamax(optim_params, lr=lr)
            }
        elif opt['optimizer'] == 'rmsprop':
            self.optimizers = {
                self.model_name: optim.RMSprop(optim_params, lr=lr)
            }
        else:
            raise NotImplementedError('Optimizer not supported.')

        self.scheduler = ReduceLROnPlateau(self.optimizers[self.model_name], mode='min', \
                    patience=opt['valid_patience'] // 3, verbose=True)

        if opt.get('model_file') and os.path.isfile(opt['model_file']):
            print('Loading existing model parameters from ' +
                  opt['model_file'])
            self.load(opt['model_file'])
예제 #4
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description="PyTorch MNIST Example")
    parser.add_argument("--batch-size",
                        type=int,
                        default=64,
                        metavar="N",
                        help="input batch size for training (default: 64)")
    parser.add_argument("--test-batch-size",
                        type=int,
                        default=1000,
                        metavar="N",
                        help="input batch size for testing (default: 1000)")
    parser.add_argument("--epochs",
                        type=int,
                        default=14,
                        metavar="N",
                        help="number of epochs to train (default: 14)")
    parser.add_argument("--lr",
                        type=float,
                        default=1.0,
                        metavar="LR",
                        help="learning rate (default: 1.0)")
    parser.add_argument("--gamma",
                        type=float,
                        default=0.7,
                        metavar="M",
                        help="Learning rate step gamma (default: 0.7)")
    parser.add_argument("--dry-run",
                        action="store_true",
                        default=False,
                        help="quickly check a single pass")
    parser.add_argument("--seed",
                        type=int,
                        default=1,
                        metavar="S",
                        help="random seed (default: 1)")
    parser.add_argument(
        "--log-interval",
        type=int,
        default=10,
        metavar="N",
        help="how many batches to wait before logging training status",
    )
    parser.add_argument("--save-model",
                        action="store_true",
                        default=False,
                        help="For Saving the current Model")
    args = parser.parse_args()

    torch.manual_seed(args.seed)

    kwargs = {"batch_size": args.batch_size}
    kwargs.update({"num_workers": 1, "pin_memory": True, "shuffle": True}, )

    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])
    dataset1 = datasets.MNIST("../data",
                              train=True,
                              download=True,
                              transform=transform)
    dataset2 = datasets.MNIST("../data", train=False, transform=transform)
    train_loader = torch.utils.data.DataLoader(dataset1, **kwargs)
    test_loader = torch.utils.data.DataLoader(dataset2, **kwargs)

    model = net
    model = Pipe(model, balance=[6, 6], devices=[0, 1], chunks=2)
    device = model.devices[0]

    optimizer = optim.Adadelta(model.parameters(), lr=args.lr)

    scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
    for epoch in range(1, args.epochs + 1):
        tic = time.perf_counter()
        train(args, model, device, train_loader, optimizer, epoch)
        toc = time.perf_counter()
        print(f">>> TRANING Time {toc - tic:0.4f} seconds")

        tic = time.perf_counter()
        test(model, device, test_loader)
        toc = time.perf_counter()
        print(f">>> TESTING Time {toc - tic:0.4f} seconds")
        scheduler.step()

    if args.save_model:
        torch.save(model.state_dict(), "mnist_cnn.pt")
예제 #5
0
    # decoder = torch.nn.DataParallel(decoder, device_ids=range(opt.ngpu))
    image = image.cuda()
    text = text.cuda()
    criterion = criterion.cuda()

# loss averager
loss_avg = utils.averager()

# setup optimizer
if  opt . man :
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=opt.lr,
                           betas=(opt.beta1, 0.999))
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=opt.lr,
                        betas=(opt.beta1, 0.999))
elif opt.adadelta:
    optimizer = optim.Adadelta(encoder.parameters(), lr=opt.lr)
else:
    encoder_optimizer = optim.RMSprop(encoder.parameters(), lr=opt.lr)
    decoder_optimizer = optim.RMSprop(decoder.parameters(), lr=opt.lr)


def val(encoder, decoder, criterion, batchsize, dataset, teach_forcing=False, max_iter=100):
    print('Start val')

    for e, d in zip(encoder.parameters(), decoder.parameters()):
        e.requires_grad = False
        d.requires_grad = False

    encoder.eval()
    decoder.eval()
    data_loader = torch.utils.data.DataLoader(
예제 #6
0
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)


netG = Generator(latent_size, hidden_size, 28**2)
netD = Discriminator(28**2, 256, 1)
netG.apply(weights_init)
netD.apply(weights_init)
netG.to(device)
netD.to(device)
criterion = nn.BCELoss()
optimizerD = optim.Adadelta(netD.parameters())
optimizerG = optim.Adadelta(netG.parameters())
# optimizerD = torch.optim.Adam(netD.parameters(), lr=lr)
# optimizerG = torch.optim.Adam(netG.parameters(), lr=lr)

# netG.cuda()
summary(netG, input_size=(latent_size, 1, 1))
summary(netD, input_size=(1, 28, 28))

# In[ ]:


def train(netG, netD, num_epochs, optG, optD, data_loader, test_data_loader,
          criterion):

    for one in data_loader:
예제 #7
0
 def setup_update(self, weights):
     super(AdaDeltaTorch, self).setup_update(weights)
     import torch.optim as topt
     self.torch_optimizer = topt.Adadelta(self.parameters, self.lr, self.rho, self.eps, self.weight_decay)
def run(config):
    model_dir = os.path.join(config.store_path, config.experiment_name + '.dir')
    os.makedirs(config.store_path, exist_ok=True)
    os.makedirs(model_dir, exist_ok=True)

    logging.basicConfig(
        level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s',
        filename=os.path.join(model_dir, config.experiment_name),
        filemode='w')

    # define a new Handler to log to console as well
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    # Load model
    nnet = torch.load(config.model, map_location=lambda storage, loc: storage)
    model = nnetAEClassifierMultitask(nnet['feature_dim'] * nnet['num_frames'], nnet['num_classes'],
                                      nnet['encoder_num_layers'], nnet['classifier_num_layers'], nnet['ae_num_layers'],
                                      nnet['hidden_dim'],
                                      nnet['bn_dim'])
    model.load_state_dict(nnet['model_state_dict'])

    # I want to only update the encoder
    for p in model.classifier.parameters():
        p.requires_grad = False

    for p in model.ae.parameters():
        p.requires_grad = False

    logging.info('Model Parameters: ')
    logging.info('Encoder Number of Layers: %d' % (nnet['encoder_num_layers']))
    logging.info('Classifier Number of Layers: %d' % (nnet['classifier_num_layers']))
    logging.info('AE Number of Layers: %d' % (nnet['ae_num_layers']))
    logging.info('AR Time shift: %d' % )
    logging.info('Hidden Dimension: %d' % (nnet['feature_dim']))
    logging.info('Number of Classes: %d' % (nnet['num_classes']))
    logging.info('Data dimension: %d' % (nnet['feature_dim']))
    logging.info('Bottleneck dimension: %d' % (nnet['bn_dim']))
    logging.info('Number of Frames: %d' % (nnet['num_frames']))
    logging.info('Optimizer: %s ' % (config.optimizer))
    logging.info('Batch Size: %d ' % (config.batch_size))
    logging.info('Initial Learning Rate: %f ' % (config.learning_rate))
    sys.stdout.flush()

    if config.use_gpu:
        # Set environment variable for GPU ID
        id = get_device_id()
        os.environ["CUDA_VISIBLE_DEVICES"] = id

        model = model.cuda()

    criterion_classifier = nn.CrossEntropyLoss()
    criterion_ae = nn.MSELoss()

    if config.optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == 'adadelta':
        optimizer = optim.Adadelta(model.parameters())
    elif config.optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate)
    else:
        raise NotImplementedError("Learning method not supported for the task")

    model_path = os.path.join(model_dir, config.experiment_name + '__epoch_0.model')
    torch.save({
        'epoch': 1,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict()}, (open(model_path, 'wb')))

    ep_ae_adapt = []
    ep_mm_adapt = []
    ep_loss_anchor = []
    ep_fer_anchor = []
    ep_ae_anchor = []
    ep_loss_test = []
    ep_fer_test = []
    ep_ae_test = []

    # Load Datasets

    # Anchor set
    path = os.path.join(config.egs_dir, config.anchor_set)
    with open(os.path.join(path, 'lengths.pkl'), 'rb') as f:
        lengths_anchor = pickle.load(f)
    labels_anchor = torch.load(os.path.join(path, 'labels.pkl'))
    anchor_ids = list(labels_anchor.keys())

    # Adaptation Set
    dataset_adapt = nnetDatasetSeqAE(os.path.join(config.egs_dir, config.adapt_set))
    data_loader_adapt = torch.utils.data.DataLoader(dataset_adapt, batch_size=config.batch_size, shuffle=True)

    # Test Set
    dataset_test = nnetDatasetSeq(os.path.join(config.egs_dir, config.test_set))
    data_loader_test = torch.utils.data.DataLoader(dataset_test, batch_size=config.batch_size, shuffle=True)

    # Start off with initial performance on test set

    model.eval()
    test_losses = []
    test_ae_losses = []
    test_fer = []
    for batch_x, batch_l, lab in data_loader_test:

        _, indices = torch.sort(batch_l, descending=True)
        if config.use_gpu:
            batch_x = Variable(batch_x[indices]).cuda()
            batch_l = Variable(batch_l[indices]).cuda()
            lab = Variable(lab[indices]).cuda()
        else:
            batch_x = Variable(batch_x[indices])
            batch_l = Variable(batch_l[indices])
            lab = Variable(lab[indices])

        # Main forward pass
        class_out, ae_out = model(batch_x, batch_l)

        # Convert all the weird tensors to frame-wise form
        class_out = pad2list(class_out, batch_l)
        batch_x = pad2list(batch_x, batch_l)
        ae_out = pad2list(ae_out, batch_l)
        lab = pad2list(lab, batch_l)

        loss_classifier = criterion_classifier(class_out, lab)
        loss_ae = criterion_ae(ae_out, batch_x)

        test_losses.append(loss_classifier.item())
        test_ae_losses.append(loss_ae.item())

        if config.use_gpu:
            test_fer.append(compute_fer(class_out.cpu().data.numpy(), lab.cpu().data.numpy()))
        else:
            test_fer.append(compute_fer(class_out.data.numpy(), lab.data.numpy()))

    print_log = "Initial Testset Error : Adapt (Test) loss: {:.3f} :: Adapt (Test) FER: {:.2f} :: Adapt (Test) AE Loss: {:.3f}".format(
        np.mean(test_losses), np.mean(test_fer), np.mean(test_ae_losses))

    logging.info(print_log)

    for epoch_i in range(config.epochs):

        ######################
        ##### Adaptation #####
        ######################

        model.train()
        adapt_ae_losses = []
        adapt_mm_losses = []
        anchor_losses = []
        anchor_ae_losses = []
        anchor_fer = []
        test_losses = []
        test_ae_losses = []
        test_fer = []

        # Main training loop

        for batch_x, batch_l in data_loader_adapt:

            # First do the adaptation

            _, indices = torch.sort(batch_l, descending=True)
            if config.use_gpu:
                batch_x = Variable(batch_x[indices]).cuda()
                batch_l = Variable(batch_l[indices]).cuda()
            else:
                batch_x = Variable(batch_x[indices])
                batch_l = Variable(batch_l[indices])

            # Main forward pass
            optimizer.zero_grad()
            class_out, ae_out = model(batch_x, batch_l)

            # Convert all the weird tensors to frame-wise form
            batch_x = pad2list(batch_x, batch_l)
            ae_out = pad2list(ae_out, batch_l)
            class_out = pad2list(class_out, batch_l)

            loss_ae = criterion_ae(ae_out, batch_x)
            mm_loss = mmeasure_loss(class_out, use_gpu=config.use_gpu)
            loss = config.adapt_weight * loss_ae - config.mm_weight * mm_loss  # Just the autoencoder loss
            adapt_ae_losses.append(loss_ae.item())
            adapt_mm_losses.append(mm_loss.item())

            # loss.backward()
            # optimizer.step()

            # Now lets try to anchor the parameters as close as possible to previously seen data

            # Select anchor data randomly
            ids = [random.choice(anchor_ids) for i in range(config.batch_size)]
            batch_x = torch.cat([torch.load(os.path.join(path, index))[None, :, :] for index in ids])
            batch_l = torch.cat([torch.IntTensor([lengths_anchor[index]]) for index in ids])
            lab = torch.cat([labels_anchor[index][None, :] for index in ids])

            _, indices = torch.sort(batch_l, descending=True)
            if config.use_gpu:
                batch_x = Variable(batch_x[indices]).cuda()
                batch_l = Variable(batch_l[indices]).cuda()
                lab = Variable(lab[indices]).cuda()
            else:
                batch_x = Variable(batch_x[indices])
                batch_l = Variable(batch_l[indices])
                lab = Variable(lab[indices])

            # Main forward pass
            optimizer.zero_grad()
            class_out, ae_out = model(batch_x, batch_l)

            # Convert all the weird tensors to frame-wise form
            class_out = pad2list(class_out, batch_l)
            batch_x = pad2list(batch_x, batch_l)
            ae_out = pad2list(ae_out, batch_l)
            lab = pad2list(lab, batch_l)

            loss_classifier = criterion_classifier(class_out, lab)
            loss_ae = criterion_ae(ae_out, batch_x)
            loss += config.anchor_weight * (loss_ae + loss_classifier)  # Use all the loss for anchor set

            anchor_losses.append(loss_classifier.item())
            anchor_ae_losses.append(loss_ae.item())

            if config.use_gpu:
                anchor_fer.append(compute_fer(class_out.cpu().data.numpy(), lab.cpu().data.numpy()))
            else:
                anchor_fer.append(compute_fer(class_out.data.numpy(), lab.data.numpy()))
            loss.backward()
            optimizer.step()

        ## Test it on the WSJ test set

        model.eval()

        for batch_x, batch_l, lab in data_loader_test:

            _, indices = torch.sort(batch_l, descending=True)
            if config.use_gpu:
                batch_x = Variable(batch_x[indices]).cuda()
                batch_l = Variable(batch_l[indices]).cuda()
                lab = Variable(lab[indices]).cuda()
            else:
                batch_x = Variable(batch_x[indices])
                batch_l = Variable(batch_l[indices])
                lab = Variable(lab[indices])

            # Main forward pass
            class_out, ae_out = model(batch_x, batch_l)

            # Convert all the weird tensors to frame-wise form
            class_out = pad2list(class_out, batch_l)
            batch_x = pad2list(batch_x, batch_l)
            ae_out = pad2list(ae_out, batch_l)
            lab = pad2list(lab, batch_l)

            loss_classifier = criterion_classifier(class_out, lab)
            loss_ae = criterion_ae(ae_out, batch_x)

            test_losses.append(loss_classifier.item())
            test_ae_losses.append(loss_ae.item())

            if config.use_gpu:
                test_fer.append(compute_fer(class_out.cpu().data.numpy(), lab.cpu().data.numpy()))
            else:
                test_fer.append(compute_fer(class_out.data.numpy(), lab.data.numpy()))

        ep_ae_adapt.append(np.mean(adapt_ae_losses))
        ep_mm_adapt.append(np.mean(adapt_mm_losses))

        ep_loss_anchor.append(np.mean(anchor_losses))
        ep_fer_anchor.append(np.mean(anchor_fer))
        ep_ae_anchor.append(np.mean(anchor_ae_losses))

        ep_loss_test.append(np.mean(test_losses))
        ep_fer_test.append(np.mean(test_fer))
        ep_ae_test.append(np.mean(test_ae_losses))
        print_log = "Epoch: {:d} Adapt (Test) loss: {:.3f} :: Adapt (Test) FER: {:.2f}".format(epoch_i + 1,
                                                                                               ep_loss_test[-1],
                                                                                               ep_fer_test[-1])

        print_log += " || Anchor loss : {:.3f} :: Anchor FER: {:.2f}".format(ep_loss_anchor[-1], ep_fer_anchor[-1])

        print_log += " || AE Loss (Adapt) : {:.3f} :: AE Loss (Anchor) : {:.3f} :: AE Loss (Test) : {:.3f} ".format(
            ep_ae_adapt[-1],
            ep_ae_anchor[-1], ep_ae_test[-1])

        print_log += " || Adapt mm loss : {:.3f} ".format(ep_mm_adapt[-1])

        logging.info(print_log)

        if (epoch_i + 1) % config.model_save_interval == 0:
            model_path = os.path.join(model_dir, config.experiment_name + '__epoch_%d' % (epoch_i + 1) + '.model')
            torch.save({
                'epoch': epoch_i + 1,
                'feature_dim': nnet['feature_dim'],
                'num_frames': nnet['num_frames'],
                'num_classes': nnet['num_classes'],
                'encoder_num_layers': nnet['encoder_num_layers'],
                'classifier_num_layers': nnet['classifier_num_layers'],
                'ae_num_layers': nnet['ae_num_layers'],
                'ep_ae_adapt': ep_ae_adapt,
                'ep_mm_adapt': ep_mm_adapt,
                'ep_loss_anchor': ep_loss_anchor,
                'ep_fer_anchor': ep_fer_anchor,
                'ep_ae_anchor': ep_ae_anchor,
                'ep_loss_test': ep_loss_test,
                'ep_fer_test': ep_fer_test,
                'ep_ae_test': ep_ae_test,
                'hidden_dim': nnet['hidden_dim'],
                'bn_dim': nnet['bn_dim'],
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()}, (open(model_path, 'wb')))
예제 #9
0
def build_torch_optimizer(model, opt):
    """Builds the PyTorch optimizer.

    We use the default parameters for Adam that are suggested by
    the original paper https://arxiv.org/pdf/1412.6980.pdf
    These values are also used by other established implementations,
    e.g. https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer
    https://keras.io/optimizers/
    Recently there are slightly different values used in the paper
    "Attention is all you need"
    https://arxiv.org/pdf/1706.03762.pdf, particularly the value beta2=0.98
    was used there however, beta2=0.999 is still arguably the more
    established value, so we use that here as well

    Args:
      model: The model to optimize.
      opt. The dictionary of options.

    Returns:
      A ``torch.optim.Optimizer`` instance.
    """
    params = [p for p in model.parameters() if p.requires_grad]
    betas = [opt.adam_beta1, opt.adam_beta2]
    if opt.optim == 'sgd':
        optimizer = optim.SGD(params,
                              lr=opt.learning_rate,
                              momentum=0.9,
                              weight_decay=5e-3)
    elif opt.optim == 'adagrad':
        optimizer = optim.Adagrad(
            params,
            lr=opt.learning_rate,
            initial_accumulator_value=opt.adagrad_accumulator_init)
    elif opt.optim == 'adadelta':
        optimizer = optim.Adadelta(params, lr=opt.learning_rate)
    elif opt.optim == 'adafactor':
        optimizer = AdaFactor(params,
                              non_constant_decay=True,
                              enable_factorization=True,
                              weight_decay=0)
    elif opt.optim == 'adam':
        optimizer = optim.Adam(params,
                               lr=opt.learning_rate,
                               betas=betas,
                               eps=1e-9)
    elif opt.optim == 'sparseadam':
        dense = []
        sparse = []
        for name, param in model.named_parameters():
            if not param.requires_grad:
                continue
            # TODO: Find a better way to check for sparse gradients.
            if 'embed' in name:
                sparse.append(param)
            else:
                dense.append(param)
        optimizer = MultipleOptimizer([
            optim.Adam(dense, lr=opt.learning_rate, betas=betas, eps=1e-8),
            optim.SparseAdam(sparse,
                             lr=opt.learning_rate,
                             betas=betas,
                             eps=1e-8)
        ])
    elif opt.optim == 'fusedadam':
        # we use here a FusedAdam() copy of an old Apex repo
        optimizer = FusedAdam(params, lr=opt.learning_rate, betas=betas)
    else:
        raise ValueError('Invalid optimizer type: ' + opt.optim)

    if opt.model_dtype == 'fp16':
        import apex
        if opt.optim != 'fusedadam':
            # In this case use the new AMP API from apex
            loss_scale = "dynamic" if opt.loss_scale == 0 else opt.loss_scale
            model, optimizer = apex.amp.initialize(
                [model, model.generator],
                optimizer,
                opt_level=opt.apex_opt_level,
                loss_scale=loss_scale,
                keep_batchnorm_fp32=None)
        else:
            # In this case use the old FusedAdam with FP16_optimizer wrapper
            static_loss_scale = opt.loss_scale
            dynamic_loss_scale = opt.loss_scale == 0
            optimizer = apex.optimizers.FP16_Optimizer(
                optimizer,
                static_loss_scale=static_loss_scale,
                dynamic_loss_scale=dynamic_loss_scale)
    return optimizer
예제 #10
0
    def optimization_algorithms(SCI_optimizer, cnn, LR, SCI_SGD_MOMENTUM,
                                REGULARIZATION):

        if type(SCI_optimizer) is str:
            if (SCI_optimizer == 'Adam'):
                optimizer = optim.Adam(cnn.parameters(),
                                       lr=LR,
                                       betas=(0.01, 0.999),
                                       weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'AMSGrad'):
                optimizer = optim.Adam(cnn.parameters(),
                                       lr=LR,
                                       betas=(0.01, 0.999),
                                       weight_decay=REGULARIZATION,
                                       amsgrad=True)
            if (SCI_optimizer == 'AdamW'):
                optimizer = AdamW(cnn.parameters(),
                                  lr=LR,
                                  betas=(0.01, 0.999),
                                  weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'RMSprop'):
                optimizer = optim.RMSprop(cnn.parameters(), lr=LR)
            #if (SCI_optimizer == 'SparseAdam') or (int(SCI_optimizer) == 4) :
            #optimizer = optim.SparseAdam(cnn.parameters(), lr=LR)
            if (SCI_optimizer == 'SGD'):
                optimizer = optim.SGD(cnn.parameters(),
                                      lr=LR,
                                      momentum=SCI_SGD_MOMENTUM,
                                      weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'Adadelta'):
                optimizer = optim.Adadelta(cnn.parameters(),
                                           lr=LR,
                                           weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'Rprop'):
                optimizer = optim.Rprop(cnn.parameters(), lr=LR)
            #if (SCI_optimizer == 'Adagrad') or (int(SCI_optimizer) == 7) :
            #    optimizer = optim.Adagrad(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'Adamax'):
                optimizer = optim.Adamax(cnn.parameters(),
                                         lr=LR,
                                         weight_decay=REGULARIZATION)
            if (SCI_optimizer == 'ASGD'):
                optimizer = optim.ASGD(cnn.parameters(),
                                       lr=LR,
                                       weight_decay=REGULARIZATION)
            #if (SCI_optimizer == 'LBFGS') or (int(SCI_optimizer) == 10) :
            #optimizer = optim.LBFGS(cnn.parameters(), lr=LR)
        else:
            if (int(SCI_optimizer) == 1):
                optimizer = optim.Adam(cnn.parameters(),
                                       lr=LR,
                                       betas=(0.01, 0.999),
                                       weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 2):
                optimizer = optim.Adam(cnn.parameters(),
                                       lr=LR,
                                       betas=(0.01, 0.999),
                                       weight_decay=REGULARIZATION,
                                       amsgrad=True)
            if (int(SCI_optimizer) == 3):
                optimizer = AdamW(cnn.parameters(),
                                  lr=LR,
                                  betas=(0.01, 0.999),
                                  weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 4):
                optimizer = optim.RMSprop(cnn.parameters(), lr=LR)
            #if (SCI_optimizer == 'SparseAdam') or (int(SCI_optimizer) == 4) :
            #optimizer = optim.SparseAdam(cnn.parameters(), lr=LR)
            if (int(SCI_optimizer) == 5):
                optimizer = optim.SGD(cnn.parameters(),
                                      lr=LR,
                                      momentum=SCI_SGD_MOMENTUM,
                                      weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 6):
                optimizer = optim.Adadelta(cnn.parameters(),
                                           lr=LR,
                                           weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 7):
                optimizer = optim.Rprop(cnn.parameters(), lr=LR)
            #if (SCI_optimizer == 'Adagrad') or (int(SCI_optimizer) == 7) :
            #    optimizer = optim.Adagrad(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 8):
                optimizer = optim.Adamax(cnn.parameters(),
                                         lr=LR,
                                         weight_decay=REGULARIZATION)
            if (int(SCI_optimizer) == 9):
                optimizer = optim.ASGD(cnn.parameters(),
                                       lr=LR,
                                       lambd=0.0001,
                                       alpha=0.75,
                                       t0=1000000.0,
                                       weight_decay=REGULARIZATION)

            #if (SCI_optimizer == 'LBFGS') or (int(SCI_optimizer) == 10) :
            #optimizer = optim.LBFGS(cnn.parameters(), lr=LR)

        return optimizer
예제 #11
0
def main():
    # Training settings
    # Use the command line to modify the default settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for testing (default: 64)')
    parser.add_argument('--epochs',
                        type=int,
                        default=10,
                        metavar='N',
                        help='number of epochs to train (default: 14)')
    parser.add_argument('--lr',
                        type=float,
                        default=1.0,
                        metavar='LR',
                        help='learning rate (default: 1.0)')
    parser.add_argument(
        '--step',
        type=int,
        default=1,
        metavar='N',
        help='number of epochs between learning rate reductions (default: 1)')
    parser.add_argument('--gamma',
                        type=float,
                        default=0.7,
                        metavar='M',
                        help='Learning rate step gamma (default: 0.7)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=100,
        metavar='N',
        help='how many batches to wait before logging training status')

    parser.add_argument('--evaluate',
                        action='store_true',
                        default=False,
                        help='evaluate your model on the official test set')
    parser.add_argument('--load-model', type=str, help='model file path')

    parser.add_argument('--save-model',
                        action='store_true',
                        default=True,
                        help='For Saving the current Model')

    parser.add_argument('--test-datasize',
                        action='store_true',
                        default=False,
                        help='train on different sizes of dataset')

    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    torch.manual_seed(args.seed)

    # Evaluate on the official test set
    # if args.evaluate:
    #     assert os.path.exists(args.load_model)
    #
    #     # Set the test model
    #     model = Net().to(device)
    #     model = M.resnet18(num_classes=99).to(device)
    #     model.load_state_dict(torch.load(args.load_model))
    #
    #     test_dataset = datasets.MNIST('./data', train=False,
    #                 transform=transforms.Compose([
    #                     transforms.ToTensor(),
    #                     transforms.Normalize((0.1307,), (0.3081,))
    #                 ]))
    #
    #     test_loader = torch.utils.data.DataLoader(
    #         test_dataset, batch_size=args.test_batch_size, shuffle=True, **kwargs)
    #
    #     test(model, device, test_loader, analysis=True)
    #
    #     return

    # Pytorch has default MNIST dataloader which loads data at each iteration
    # train_dataset_no_aug = TrainDataset(True, 'data/imet-2020-fgvc7/labels.csv',
    #             'data/imet-2020-fgvc7/train_20country.csv', 'data/imet-2020-fgvc7/train/',
    #             transform=transforms.Compose([       # Data preprocessing
    #                 transforms.ToPILImage(),           # Add data augmentation here
    #                 transforms.RandomResizedCrop(128),
    #                 transforms.ToTensor(),
    #                 transforms.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225))
    #             ]))
    train_dataset_no_aug = TrainDataset(
        True,
        'data/imet-2020-fgvc7/labels.csv',
        'data/imet-2020-fgvc7/train_20country.csv',
        'data/imet-2020-fgvc7/train/',
        transform=transforms.Compose([  # Data preprocessing
            transforms.ToPILImage(),  # Add data augmentation here
            transforms.Resize(255),
            transforms.RandomCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.485, 0.456, 0.406),
                                 std=(0.229, 0.224, 0.225))
        ]))
    train_dataset_with_aug = train_dataset_no_aug
    assert (len(train_dataset_no_aug) == len(train_dataset_with_aug))

    # You can assign indices for training/validation or use a random subset for
    # training by using SubsetRandomSampler. Right now the train and validation
    # sets are built from the same indices - this is bad! Change it so that
    # the training and validation sets are disjoint and have the correct relative sizes.
    np.random.seed(args.seed)
    subset_indices_valid = np.random.choice(len(train_dataset_no_aug),
                                            int(0.15 *
                                                len(train_dataset_no_aug)),
                                            replace=False)
    subset_indices_train = [
        i for i in range(len(train_dataset_no_aug))
        if i not in subset_indices_valid
    ]
    # subset_indices_train = []
    # subset_indices_valid = []
    # for target in range(10):
    #     idx = (train_dataset_no_aug.targets == target).nonzero() # indices for each class
    #     idx = idx.numpy().flatten()
    #     val_idx = np.random.choice( len(idx), int(0.15*len(idx)), replace=False )
    #     val_idx = np.ndarray.tolist(val_idx.flatten())
    #     train_idx = [i for i in range(len(idx)) if i not in val_idx]
    #     subset_indices_train += np.ndarray.tolist(idx[train_idx])
    #     subset_indices_valid += np.ndarray.tolist(idx[val_idx])

    assert (len(subset_indices_train) +
            len(subset_indices_valid)) == len(train_dataset_no_aug)
    assert len(np.intersect1d(subset_indices_train, subset_indices_valid)) == 0

    train_loader = torch.utils.data.DataLoader(
        train_dataset_with_aug,
        batch_size=args.batch_size,
        sampler=SubsetRandomSampler(subset_indices_train))
    val_loader = torch.utils.data.DataLoader(
        train_dataset_no_aug,
        batch_size=args.test_batch_size,
        sampler=SubsetRandomSampler(subset_indices_valid))

    # Load your model [fcNet, ConvNet, Net]
    #model = Net().to(device)
    # model = M.resnet50(num_classes=20).to(device)
    # model.load_state_dict(torch.load(args.load_model))
    model = M.resnet50(pretrained=True)
    model.fc = nn.Linear(model.fc.in_features, 20)
    model = model.to(device)
    # model.load_state_dict(torch.load(args.load_model))
    # print(model)
    # summary(model, (1,28,28))

    # Try different optimzers here [Adam, SGD, RMSprop]
    optimizer = optim.Adadelta(model.parameters(), lr=args.lr)

    # Set your learning rate scheduler
    scheduler = StepLR(optimizer, step_size=args.step, gamma=args.gamma)

    # if args.test_datasize:
    #     train_final_loss = []
    #     val_final_loss = []
    #     train_size = []
    #     for i in [1, 2, 4, 8, 16]:
    #         print("Dataset with size 1/{} of original: ".format(i))
    #         subset_indices_train_sub = np.random.choice(subset_indices_train, int(len(subset_indices_train)/i), replace=False)
    #         train_loader_sub = torch.utils.data.DataLoader(
    #             train_dataset_with_aug, batch_size=args.batch_size,
    #             sampler=SubsetRandomSampler(subset_indices_train_sub)
    #         )
    #         train_losses = []
    #         val_losses = []
    #         for epoch in range(1, args.epochs + 1):
    #             train_loss = train(args, model, device, train_loader_sub, optimizer, epoch)
    #             val_loss = validation(model, device, val_loader)
    #             train_losses.append(train_loss)
    #             val_losses.append(val_loss)
    #             scheduler.step()    # learning rate scheduler
    #             # You may optionally save your model at each epoch here
    #         print("Train Loss: ", train_losses)
    #         print("Test Loss: ", val_losses)
    #         print("\n")
    #         train_final_loss.append(train_losses[-1])
    #         val_final_loss.append(val_losses[-1])
    #         train_size.append(int(len(subset_indices_train)/i))
    #
    #     plt.loglog(range(1, args.epochs + 1), train_losses)
    #     plt.loglog(range(1, args.epochs + 1), val_losses)
    #     plt.xlabel("Number of training examples")
    #     plt.ylabel("Loss")
    #     plt.legend(["Training loss", "Val loss"])
    #     plt.title("Training loss and val loss as a function of the number of training examples on log-log scale")
    #     plt.show()
    #     return

    # Training loop
    train_losses = []
    val_losses = []
    accuracies = []
    for epoch in range(1, args.epochs + 1):
        train_loss = train(args, model, device, train_loader, optimizer, epoch)
        (accuracy, val_loss) = validation(model, device, val_loader)
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        accuracies.append(accuracy)
        scheduler.step()  # learning rate scheduler
        # You may optionally save your model at each epoch here
        if args.save_model:
            torch.save(model.state_dict(), "mnist_model.pt")

    plt.plot(range(1, args.epochs + 1), train_losses)
    plt.plot(range(1, args.epochs + 1), val_losses)
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend(["Training loss", "Val loss"])
    plt.title("Training loss and val loss as a function of the epoch")
    plt.show()

    plt.plot(range(1, args.epochs + 1), accuracies)
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.legend(["Validation Accuracy"])
    plt.title("Accuracy in validation set as a function of the epoch")
    plt.show()
예제 #12
0
파일: model.py 프로젝트: MananSoni42/SAN
    def __init__(self, opt, embedding=None, state_dict=None):
        self.opt = opt
        self.updates = state_dict[
            'updates'] if state_dict and 'updates' in state_dict else 0
        self.eval_embed_transfer = True
        self.train_loss = AverageMeter()

        self.network = DNetwork(opt, embedding)
        #reload checkpoint parameters if state dictionary passed
        if state_dict:
            new_state = set(self.network.state_dict().keys())
            for k in list(state_dict['network'].keys()):
                if k not in new_state:
                    del state_dict['network'][k]
            for k, v in list(self.network.state_dict().items()):
                if k not in state_dict['network']:
                    state_dict['network'][k] = v
            self.network.load_state_dict(state_dict['network'])
        #select optimizer
        parameters = [p for p in self.network.parameters() if p.requires_grad]
        if opt['optimizer'] == 'sgd':
            self.optimizer = optim.SGD(parameters,
                                       opt['learning_rate'],
                                       momentum=opt['momentum'],
                                       weight_decay=opt['weight_decay'])
        elif opt['optimizer'] == 'adamax':
            self.optimizer = optim.Adamax(parameters,
                                          opt['learning_rate'],
                                          weight_decay=opt['weight_decay'])
        elif opt['optimizer'] == 'adam':
            self.optimizer = optim.Adam(parameters,
                                        opt['learning_rate'],
                                        weight_decay=opt['weight_decay'])
        elif opt['optimizer'] == 'adadelta':
            self.optimizer = optim.Adadelta(parameters,
                                            opt['learning_rate'],
                                            rho=0.95)
        else:
            raise RuntimeError('Unsupported optimizer: %s' % opt['optimizer'])
        if state_dict and 'optimizer' in state_dict:
            self.optimizer.load_state_dict(state_dict['optimizer'])

        if opt['fix_embeddings']:
            wvec_size = 0
        else:
            wvec_size = (opt['vocab_size'] -
                         opt['tune_partial']) * opt['embedding_dim']
        if opt.get('have_lr_scheduler', False):
            if opt.get('scheduler_type', 'rop') == 'rop':
                self.scheduler = ReduceLROnPlateau(self.optimizer,
                                                   mode='max',
                                                   factor=opt['lr_gamma'],
                                                   patience=3)
            elif opt.get('scheduler_type', 'rop') == 'exp':
                self.scheduler = ExponentioalLR(self.optimizer,
                                                gamma=opt.get('lr_gamma', 0.5))
            else:
                milestones = [
                    int(step)
                    for step in opt.get('multi_step_lr', '10,20,30').split(',')
                ]
                self.scheduler = MultiStepLR(self.optimizer,
                                             milestones=milestones,
                                             gamma=opt.get('lr_gamma'))
        else:
            self.scheduler = None
        self.total_param = sum([p.nelement() for p in parameters]) - wvec_size
예제 #13
0
def train(data):
    print("Training model...")
    data.show_data_summary()
    save_data_name = data.model_dir + ".dset"
    data.save(save_data_name)
    if data.sentence_classification:
        model = SentClassifier(data)
    else:
        model = SeqLabel(data)

    if data.optimizer.lower() == "sgd":
        optimizer = optim.SGD(model.parameters(),
                              lr=data.HP_lr,
                              momentum=data.HP_momentum,
                              weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adagrad":
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adadelta":
        optimizer = optim.Adadelta(model.parameters(),
                                   lr=data.HP_lr,
                                   weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "rmsprop":
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adam":
        optimizer = optim.Adam(model.parameters(),
                               lr=data.HP_lr,
                               weight_decay=data.HP_l2)
    else:
        print("Optimizer illegal: %s" % (data.optimizer))
        exit(1)
    best_dev = -10
    # data.HP_iteration = 1
    ## start training
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        print("Epoch: %s/%s" % (idx, data.HP_iteration))
        if data.optimizer == "SGD":
            optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        random.shuffle(data.train_Ids)
        print("Shuffle: first input word list:", data.train_Ids[0][0])
        ## set model in train model
        model.train()
        model.zero_grad()
        batch_size = data.HP_batch_size
        batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num // batch_size + 1
        for batch_id in range(total_batch):
            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            if not instance:
                continue
            batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                instance, data.HP_gpu, True, data.sentence_classification)
            instance_count += 1
            loss, tag_seq = model.calculate_loss(batch_word, batch_features,
                                                 batch_wordlen, batch_char,
                                                 batch_charlen,
                                                 batch_charrecover,
                                                 batch_label, mask)
            right, whole = predict_check(tag_seq, batch_label, mask,
                                         data.sentence_classification)
            right_token += right
            whole_token += whole
            # print("loss:",loss.item())
            sample_loss += loss.item()
            total_loss += loss.item()
            if end % 500 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                print(
                    "     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"
                    % (end, temp_cost, sample_loss, right_token, whole_token,
                       (right_token + 0.) / whole_token))
                if sample_loss > 1e8 or str(sample_loss) == "nan":
                    print(
                        "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...."
                    )
                    exit(1)
                sys.stdout.flush()
                sample_loss = 0
            loss.backward()
            optimizer.step()
            model.zero_grad()
        temp_time = time.time()
        temp_cost = temp_time - temp_start
        print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" %
              (end, temp_cost, sample_loss, right_token, whole_token,
               (right_token + 0.) / whole_token))

        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print(
            "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"
            % (idx, epoch_cost, train_num / epoch_cost, total_loss))
        print("totalloss:", total_loss)
        if total_loss > 1e8 or str(total_loss) == "nan":
            print(
                "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...."
            )
            exit(1)
        # continue
        speed, acc, p, r, f, _, _, bal_acc, cm = evaluate(data, model, "dev")
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if data.seg:
            current_score = f
            print(
                "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, bal_acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (dev_cost, speed, acc, bal_acc, p, r, f))
            print(cm)  #cm.tabulate()
        else:
            current_score = acc
            print(
                "Dev: time: %.2fs speed: %.2fst/s; acc: %.4f; bal_acc: %.4f" %
                (dev_cost, speed, acc, bal_acc))
            print(cm)  #cm.tabulate()

        if current_score > best_dev:
            if data.seg:
                print("Exceed previous best f score:", best_dev)
            else:
                print("Exceed previous best acc score:", best_dev)
            model_name = data.model_dir + '.' + str(idx) + ".model"
            print("Save current best model in file:", model_name)
            torch.save(model.state_dict(), model_name)
            best_dev = current_score
        # ## decode test
        speed, acc, p, r, f, _, _, bal_acc, cm = evaluate(data, model, "test")
        test_finish = time.time()
        test_cost = test_finish - dev_finish
        if data.seg:
            print(
                "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, bal_acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (test_cost, speed, acc, bal_acc, p, r, f))
            print(cm)  #cm.tabulate()
        else:
            print(
                "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f; bal_acc: %.4f"
                % (test_cost, speed, acc, bal_acc))
            print(cm)  #cm.tabulate()
        gc.collect()
예제 #14
0
                     rel_feat_extractor, rel_decoder, bin_rel_decoder, gcn,
                     vocab, config.schedule_k, config.use_cuda,
                     config.max_entity_num)

util.assign_embeddings(word_encoder.word_embeddings, pretrained_embeddings)
if config.use_cuda:
    mymodel.cuda()

if os.path.exists(config.load_model_path):
    state_dict = torch.load(open(config.load_model_path, "rb"),
                            map_location=lambda storage, loc: storage)
    mymodel.load_state_dict(state_dict)
    print("Loading previous model successful [%s]" % config.load_model_path)

parameters = [p for p in mymodel.parameters() if p.requires_grad]
optimizer = optim.Adadelta(parameters)


def create_batch_list(sort_batch_tensor: Dict[str, Any],
                      outputs: Dict[str, Any]) -> List[Dict[str, Any]]:
    new_batch = []
    for k in range(len(outputs['ent_span_pred'])):
        instance = {}
        instance['tokens'] = sort_batch_tensor['tokens'][k].cpu().numpy()
        instance['ent_labels'] = sort_batch_tensor['ent_labels'][k].cpu(
        ).numpy()
        instance['ent_span_labels'] = sort_batch_tensor['ent_span_labels'][
            k].cpu().numpy()

        instance['candi_rels'] = sort_batch_tensor['candi_rels'][k]
        instance['rel_labels'] = sort_batch_tensor['rel_labels'][k]
예제 #15
0
def train(opt):
    # load train/valid/test data
    opt.vocab_size = get_nwords(opt.data_path)
    opt.category_size = get_nclasses(opt.data_path)
    mytrain_dset, myvalid_dset, mytest_dset = loaddset(opt)

    writer = SummaryWriter(opt.checkpoint_path)
    # init or load training infos
    infos = {}
    histories = {}
    if opt.start_from is not None:
        # open old infos and check if models are compatible
        with open(os.path.join(opt.start_from,
                               'infos_' + opt.id + '-best.pkl')) as f:
            infos = cPickle.load(f)
            saved_model_opt = infos['opt']
            need_be_same = ["rnn_size", "num_layers"]  # optim needn't same
            for checkme in need_be_same:
                assert vars(saved_model_opt)[checkme] == vars(
                    opt
                )[checkme], "Command line argument and saved model disagree on '%s' " % checkme

        if os.path.isfile(
                os.path.join(opt.start_from,
                             'histories_' + opt.id + '-best.pkl')):
            with open(
                    os.path.join(opt.start_from,
                                 'histories_' + opt.id + '-best.pkl')) as f:
                histories = cPickle.load(f)
        # random seed must be inherited if didn't assign it.
        if opt.seed == 0:
            opt.seed = infos['opt'].seed

    iteration = infos.get('iter', 0) + 1
    epoch = infos.get('epoch', 0)

    val_result_history = histories.get('val_result_history', {})
    loss_history = histories.get('loss_history', {})
    lr_history = histories.get('lr_history', {})
    ss_prob_history = histories.get('ss_prob_history', {})

    if opt.load_best_score == 1:
        best_val_score = infos.get('best_val_score', None)
    else:
        best_val_score = None

    torch.manual_seed(opt.seed)
    torch.cuda.manual_seed(opt.seed)
    model = SAModel(opt)

    if opt.start_from is not None:
        # check if all necessary files exist
        assert os.path.isdir(
            opt.start_from), " %s must be a a path" % opt.start_from
        model.load_state_dict(torch.load(
            os.path.join(opt.start_from, 'model-best.pth')),
                              strict=True)
    model.cuda()
    model.train()

    crit = LanguageModelCriterion()  # 评估生成的caption
    classify_crit = ClassiferCriterion()  # 评估分类结果
    rl_crit = RewardCriterion()  # RL训练

    # select optimizer
    if opt.optim == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=opt.learning_rate,
                               weight_decay=opt.weight_decay)
    elif opt.optim == 'adadelta':
        optimizer = optim.Adadelta(model.parameters(),
                                   lr=1.0,
                                   weight_decay=opt.weight_decay)
        opt.learning_rate_decay_start = -1

    # training start
    tmp_patience = 0
    # each epoch
    while True:
        update_lr_flag = True  # when a new epoch start, set update_lr_flag to True
        if update_lr_flag:
            # Assign the learning rate
            if epoch > opt.learning_rate_decay_start and opt.learning_rate_decay_start >= 0 and opt.optim != 'adadelta':
                frac = int((epoch - opt.learning_rate_decay_start) /
                           opt.learning_rate_decay_every)
                decay_factor = opt.learning_rate_decay_rate**frac
                opt.current_lr = opt.learning_rate * decay_factor
                myutils.set_lr(optimizer,
                               opt.current_lr)  # set the decayed rate
                #print('epoch {}, lr_decay_start {}, cur_lr {}'.format(epoch, opt.learning_rate_decay_start, opt.current_lr))
            else:
                opt.current_lr = opt.learning_rate
            # Assign the scheduled sampling prob
            if epoch > opt.scheduled_sampling_start and opt.scheduled_sampling_start >= 0:
                frac = int((epoch - opt.scheduled_sampling_start) /
                           opt.scheduled_sampling_increase_every)
                opt.ss_prob = min(opt.scheduled_sampling_increase_prob * frac,
                                  opt.scheduled_sampling_max_prob)
                model.ss_prob = opt.ss_prob
            # If start self critical training
            if opt.self_critical_after != -1 and epoch >= opt.self_critical_after:
                sc_flag = True
                myutils.init_cider_scorer(opt.reward_type)
            else:
                sc_flag = False
            update_lr_flag = False

        #loading train data
        myloader_train = DataLoader(mytrain_dset,
                                    batch_size=opt.batch_size,
                                    collate_fn=data_io.collate_fn,
                                    shuffle=True)
        torch.cuda.synchronize()
        for data, cap, cap_mask, cap_classes, class_mask, feat1, feat2, feat_mask, pos_feat, lens, groundtruth, image_id in myloader_train:
            start = time.time()
            cap = Variable(cap, requires_grad=False).cuda()
            cap_mask = Variable(cap_mask, requires_grad=False).cuda()
            cap_classes = Variable(cap_classes, requires_grad=False).cuda()
            class_mask = Variable(class_mask, requires_grad=False).cuda()
            feat1 = Variable(feat1, requires_grad=False).cuda()
            feat2 = Variable(feat2, requires_grad=False).cuda()
            feat_mask = Variable(feat_mask, requires_grad=False).cuda()
            pos_feat = Variable(pos_feat, requires_grad=False).cuda()

            optimizer.zero_grad()
            if not sc_flag:
                out, category = model(
                    feat1, feat2, feat_mask, pos_feat, cap,
                    cap_mask)  # (B,seq_len+1,29324),(B,seq_len+1,14)
                loss_language = crit(out, cap, cap_mask)
                loss_classify = classify_crit(category, cap_classes, cap_mask,
                                              class_mask)
                # print(loss_language.data[0], loss_classify.data[0])
                loss = loss_language + opt.weight_class * loss_classify  # weight_class为0,不再训练pos信息生成,仅仅训练caption
            else:
                gen_result, sample_logprobs = model.sample(
                    feat1, feat2, feat_mask, pos_feat, {'sample_max': 0})
                reward = myutils.get_self_critical_reward(
                    model, feat1, feat2, feat_mask, pos_feat, groundtruth,
                    gen_result)  # (m,max_length)
                loss = rl_crit(
                    sample_logprobs, gen_result,
                    Variable(torch.from_numpy(reward).float().cuda(),
                             requires_grad=False))
            loss.backward()

            myutils.clip_gradient(optimizer, opt.grad_clip)
            optimizer.step()
            train_loss = loss.data[0]
            torch.cuda.synchronize()
            end = time.time()

            if not sc_flag:
                print(
                    "iter {} (epoch {}), train_loss = {:.3f}, loss_lang = {:.3f}, loss_class = {:.3f}, time/batch = {:.3f}"
                    .format(iteration, epoch, train_loss,
                            loss_language.data[0], loss_classify.data[0],
                            end - start))
            else:
                print(
                    "iter {} (epoch {}), avg_reward = {:.3f}, time/batch = {:.3f}"
                    .format(iteration, epoch, np.mean(reward[:, 0]),
                            end - start))

            # Write the training loss summary
            if (iteration % opt.losses_log_every == 0):
                writer.add_scalar('train_loss', train_loss, iteration)
                writer.add_scalar('learning_rate', opt.current_lr, iteration)
                writer.add_scalar('scheduled_sampling_prob', model.ss_prob,
                                  iteration)
                if sc_flag:
                    writer.add_scalar('avg_reward', np.mean(reward[:, 0]),
                                      iteration)

                loss_history[
                    iteration] = train_loss if not sc_flag else np.mean(
                        reward[:, 0])
                lr_history[iteration] = opt.current_lr
                ss_prob_history[iteration] = model.ss_prob

            # make evaluation on validation set, and save model
            if (iteration % opt.save_checkpoint_every == 0):
                # eval model
                print('validation and save the model...')
                time.sleep(3)
                eval_kwargs = {}
                eval_kwargs.update(
                    vars(opt))  # attend vars(opt) into eval_kwargs
                val_loss, predictions, lang_stats = eval_utils.eval_split(
                    model, crit, classify_crit, myvalid_dset, eval_kwargs)
                print('validation is finish!')
                time.sleep(3)

                writer.add_scalar('validation loss', val_loss, iteration)
                if opt.language_eval == 1:
                    for tag, value in lang_stats.items():
                        if type(value) is list:
                            writer.add_scalar(tag, value[-1], iteration)
                        else:
                            writer.add_scalar(tag, value, iteration)
                    for tag, value in model.named_parameters():
                        try:
                            tag = tag.replace('.', '/')
                            writer.add_histogram(tag,
                                                 value.data.cpu().numpy(),
                                                 iteration)
                            writer.add_histogram(
                                tag + '/grad', (value.grad).data.cpu().numpy(),
                                iteration)
                        except AttributeError:
                            continue

                val_result_history[iteration] = {
                    'loss': val_loss,
                    'lang_stats': lang_stats,
                    'predictions': predictions
                }

                # Save model if is improving on validation result
                if opt.language_eval == 1:
                    current_score = lang_stats['CIDEr']
                else:
                    current_score = -val_loss
                best_flag = False

                if best_val_score is None or current_score > best_val_score:
                    best_val_score = current_score
                    best_flag = True
                    tmp_patience = 0
                else:
                    tmp_patience += 1

                if not os.path.exists(opt.checkpoint_path):
                    os.mkdir(opt.checkpoint_path)
                checkpoint_path = os.path.join(opt.checkpoint_path,
                                               'model.pth')
                torch.save(model.state_dict(), checkpoint_path)
                print("model saved to {}".format(checkpoint_path))

                # Dump miscalleous informations(current information)
                infos['iter'] = iteration
                infos['epoch'] = epoch
                infos['best_val_score'] = best_val_score
                infos['opt'] = opt
                infos['val_score'] = lang_stats
                infos['val_sents'] = predictions

                histories['val_result_history'] = val_result_history
                histories['loss_history'] = loss_history
                histories['lr_history'] = lr_history
                histories['ss_prob_history'] = ss_prob_history
                with open(
                        os.path.join(opt.checkpoint_path,
                                     'infos_' + opt.id + '.pkl'), 'wb') as f:
                    cPickle.dump(infos, f)
                with open(
                        os.path.join(opt.checkpoint_path,
                                     'histories_' + opt.id + '.pkl'),
                        'wb') as f:
                    cPickle.dump(histories, f)

                if best_flag:
                    checkpoint_path = os.path.join(opt.checkpoint_path,
                                                   'model-best.pth')
                    torch.save(model.state_dict(), checkpoint_path)
                    print("model saved to {}".format(checkpoint_path))
                    with open(
                            os.path.join(opt.checkpoint_path,
                                         'infos_' + opt.id + '-best.pkl'),
                            'wb') as f:
                        cPickle.dump(infos, f)
                    with open(
                            os.path.join(opt.checkpoint_path,
                                         'histories_' + opt.id + '-best.pkl'),
                            'wb') as f:
                        cPickle.dump(histories, f)

            if tmp_patience >= opt.patience:
                break
            iteration += 1
        if tmp_patience >= opt.patience:
            print("early stop, trianing is finished!")
            break
        if epoch >= opt.max_epochs and opt.max_epochs != -1:
            print("reach max epochs, training is finished!")
            break
        epoch += 1
예제 #16
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=14,
                        metavar='N',
                        help='number of epochs to train (default: 14)')
    parser.add_argument('--lr',
                        type=float,
                        default=1.0,
                        metavar='LR',
                        help='learning rate (default: 1.0)')
    parser.add_argument('--gamma',
                        type=float,
                        default=0.7,
                        metavar='M',
                        help='Learning rate step gamma (default: 0.7)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--dry-run',
                        action='store_true',
                        default=False,
                        help='quickly check a single pass')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--save-model',
                        action='store_true',
                        default=False,
                        help='For Saving the current Model')
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    train_kwargs = {'batch_size': args.batch_size}
    test_kwargs = {'batch_size': args.test_batch_size}
    if use_cuda:
        cuda_kwargs = {'num_workers': 1, 'pin_memory': True, 'shuffle': True}
        train_kwargs.update(cuda_kwargs)
        test_kwargs.update(cuda_kwargs)

    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])
    dataset1 = datasets.MNIST('./data',
                              train=True,
                              download=True,
                              transform=transform)
    dataset2 = datasets.MNIST('./data', train=False, transform=transform)
    train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs)
    test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)

    model = Net().to(device)
    optimizer = optim.Adadelta(model.parameters(), lr=args.lr)

    scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader)
        scheduler.step()

    if args.save_model:
        torch.save(model.state_dict(), "mnist_cnn.pt")
예제 #17
0
    def __init__(self, opt, state_dict=None, num_train_step=-1):
        self.config = opt
        self.updates = state_dict['updates'] if state_dict and 'updates' in state_dict else 0
        self.local_updates = 0
        self.train_loss = AverageMeter()
        self.network = SANBertNetwork(opt)

        if state_dict:
            self.network.load_state_dict(state_dict['state'], strict=False)
        self.mnetwork = nn.DataParallel(self.network) if opt['multi_gpu_on'] else self.network
        self.total_param = sum([p.nelement() for p in self.network.parameters() if p.requires_grad])
        if opt['cuda']:
            self.network.cuda()

        no_decay = ['bias', 'gamma', 'beta', 'LayerNorm.bias', 'LayerNorm.weight']

        optimizer_parameters = [
            {'params': [p for n, p in self.network.named_parameters() if not any(nd in n for nd in no_decay)],
             'weight_decay': 0.01},
            {'params': [p for n, p in self.network.named_parameters() if any(nd in n for nd in no_decay)],
             'weight_decay': 0.0}
        ]

        # note that adamax are modified based on the BERT code
        if opt['optimizer'] == 'sgd':
            self.optimizer = optim.sgd(optimizer_parameters, opt['learning_rate'],
                                       weight_decay=opt['weight_decay'])

        elif opt['optimizer'] == 'adamax':
            self.optimizer = Adamax(optimizer_parameters,
                                    opt['learning_rate'],
                                    warmup=opt['warmup'],
                                    t_total=num_train_step,
                                    max_grad_norm=opt['grad_clipping'],
                                    schedule=opt['warmup_schedule'])
            if opt.get('have_lr_scheduler', False): opt['have_lr_scheduler'] = False
        elif opt['optimizer'] == 'adadelta':
            self.optimizer = optim.Adadelta(optimizer_parameters,
                                            opt['learning_rate'],
                                            rho=0.95)
        elif opt['optimizer'] == 'adam':
            self.optimizer = Adam(optimizer_parameters,
                                  lr=opt['learning_rate'],
                                  warmup=opt['warmup'],
                                  t_total=num_train_step,
                                  max_grad_norm=opt['grad_clipping'],
                                  schedule=opt['warmup_schedule'])
            if opt.get('have_lr_scheduler', False): opt['have_lr_scheduler'] = False
        else:
            raise RuntimeError('Unsupported optimizer: %s' % opt['optimizer'])

        if state_dict and 'optimizer' in state_dict:
            self.optimizer.load_state_dict(state_dict['optimizer'])

        if opt['fp16']:
            try:
                from apex import amp
            except ImportError:
                raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")
            model, optimizer = amp.initialize(self.network, self.optimizer, opt_level=opt['fp16_opt_level'])
            self.network = model
            self.optimizer = optimizer

        if opt.get('have_lr_scheduler', False):
            if opt.get('scheduler_type', 'rop') == 'rop':
                self.scheduler = ReduceLROnPlateau(self.optimizer, mode='max', factor=opt['lr_gamma'], patience=3)
            elif opt.get('scheduler_type', 'rop') == 'exp':
                self.scheduler = ExponentialLR(self.optimizer, gamma=opt.get('lr_gamma', 0.95))
            else:
                milestones = [int(step) for step in opt.get('multi_step_lr', '10,20,30').split(',')]
                self.scheduler = MultiStepLR(self.optimizer, milestones=milestones, gamma=opt.get('lr_gamma'))
        else:
            self.scheduler = None

        self.ema = None
        if opt['ema_opt'] > 0:
            self.ema = EMA(self.config['ema_gamma'], self.network)
            if opt['cuda']:
                self.ema.cuda()

        self.para_swapped = False
        # zero optimizer grad
        self.optimizer.zero_grad()
예제 #18
0
    def train(self):
        CrossEntropyLoss = nn.CrossEntropyLoss().to(self.device) # input이 (N, C), target이 (N) 형태여야함

        optimizer = optim.Adadelta(self.net.parameters(), lr=self.lr, rho=0.95, eps=1e-07)

        counter = self.counter
        self.checkpoints_to_keep = []

        start_time = time.time()
        for epoch in range(self.epoch):

            if epoch != 0 and epoch % self.decay_epoch == 0:
                optimizer.param_groups[0]['lr'] = self.lr / 10
                print('learning rate decayed')

            for step, (imgs, masks) in enumerate(self.train_loader):
                imgs, masks = imgs.to(self.device), masks.to(self.device)
                preds = self.net(imgs)
                # imgs.shape (N, 3, 224, 224)
                # masks.shape (N, 1, 224, 224)
                # preds.shape (N, 2, 224, 224)

                preds_flat = preds.permute(0, 2, 3, 1).contiguous().view(-1, self.num_classes)
                masks_flat = masks.squeeze(1).view(-1).long()
                # preds_flat.shape (N*224*224, 2)
                # masks_flat.shape (N*224*224, 1)

                self.net.zero_grad()
                loss = CrossEntropyLoss(preds_flat, masks_flat)
                loss.backward()
                optimizer.step()

                counter += 1
                step_end_time = time.time()
                print('[%d/%d][%d/%d] - time_passed: %.2f, CrossEntropyLoss: %.2f'
                      % (epoch, self.epoch, step, self.num_steps, step_end_time - start_time, loss))

                # save sample images
                if step % self.sample_step == 0:
                    for num, (imgs, masks) in enumerate(self.test_loader):
                        imgs, masks = imgs.to(self.device), masks.to(self.device)
                        preds = self.net(imgs)

                        inverse_normalize = transforms.Normalize(mean=[-0.5 / 0.5, -0.5 / 0.5, -0.5 / 0.5],
                                                             std=[1 / 0.5, 1 / 0.5, 1 / 0.5])

                        imgs = inverse_normalize(imgs[0]).permute(1, 2, 0).detach().cpu().numpy()[:,:,::-1] * 255
                        masks = masks[0].repeat(3, 1, 1).permute(1, 2, 0).detach().cpu().numpy() * 255
                        preds = torch.argmax(preds[0], 0).unsqueeze(2).repeat(1, 1, 3).detach().cpu().numpy() * 255

                        if not os.path.exists(os.path.join(self.sample_dir, self.model_dir())):
                            os.makedirs(os.path.join(self.sample_dir, self.model_dir()))

                        samples = np.hstack((imgs, masks, preds))
                        cv2.imwrite('{}/{}/sample_{}-{}.png'.format(self.sample_dir, self.model_dir(), num, counter), samples)
                    print('Saved images')

                # save checkpoints
                if step % self.checkpoint_step == 0:
                    if not os.path.exists(os.path.join(self.checkpoint_dir, self.model_dir())):
                        os.makedirs(os.path.join(self.checkpoint_dir, self.model_dir()))

                    self.save_checkpoint(counter, self.ckpt_max_to_keep)
                    print("Saved checkpoint")
예제 #19
0
def train(config):
    #######################################################
    # ENV
    #######################################################
    use_gpu = torch.cuda.is_available()

    torch.manual_seed(config['seed'])

    if config['cuda']:
        torch.cuda.manual_seed(config['seed'])

    save_path = config['save_path']

    #####################################################
    # DATA
    #####################################################
    source_path = config['source_path']
    target_path = config['target_path']

    num_k = config['num_k']

    num_layer = config['num_layer']

    batch_size = config['batch_size']

    data_transforms = {
        source_path:
        transforms.Compose([
            transforms.Resize(256),
            transforms.RandomHorizontalFlip(),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        target_path:
        transforms.Compose([
            transforms.Resize(256),
            transforms.RandomHorizontalFlip(),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }
    dsets = {
        source_path:
        datasets.ImageFolder(os.path.join(source_path),
                             data_transforms[source_path]),
        target_path:
        datasets.ImageFolder(os.path.join(target_path),
                             data_transforms[target_path])
    }

    train_loader = CVDataLoader()
    train_loader.initialize(dsets[source_path], dsets[target_path],
                            batch_size)  #CVDataLoader.initialize
    dataset = train_loader.load_data()  #CVDataLoader.load_data()

    test_loader = CVDataLoader()
    #opt = args
    test_loader.initialize(dsets[source_path],
                           dsets[target_path],
                           batch_size,
                           shuffle=True)
    dataset_test = test_loader.load_data()

    dset_sizes = {
        source_path: len(dsets[source_path]),
        target_path: len(dsets[target_path])
    }

    dset_classes = dsets[source_path].classes
    print('classes' + str(dset_classes))

    option = 'resnet' + config['resnet']
    G = ResBase(option)
    F1 = ResClassifier(num_classes=config['num_classes'],
                       num_layer=config['num_layer'],
                       num_unit=config['num_unit'],
                       prob=config['prob'],
                       middle=config['middle'])
    F2 = ResClassifier(num_classes=config['num_classes'],
                       num_layer=config['num_layer'],
                       num_unit=config['num_unit'],
                       prob=config['prob'],
                       middle=config['middle'])

    F1.apply(weights_init)
    F2.apply(weights_init)

    lr = config['lr']

    if config['cuda']:
        G.cuda()
        F1.cuda()
        F2.cuda()

    if config['optimizer'] == 'momentum':
        optimizer_g = optim.SGD(list(G.features.parameters()),
                                lr=config['lr'],
                                weight_decay=0.0005)

        optimizer_f = optim.SGD(list(F1.parameters()) + list(F2.parameters()),
                                momentum=0.9,
                                lr=config['lr'],
                                weight_decay=0.0005)

    elif config['optimizer'] == 'adam':
        optimizer_g = optim.Adam(G.features.parameters(),
                                 lr=config['lr'],
                                 weight_decay=0.0005)
        optimizer_f = optim.Adam(list(F1.parameters()) + list(F2.parameters()),
                                 lr=config['lr'],
                                 weight_decay=0.0005)

    else:
        optimizer_g = optim.Adadelta(G.features.parameters(),
                                     lr=args.lr,
                                     weight_decay=0.0005)
        optimizer_f = optim.Adadelta(list(F1.parameters()) +
                                     list(F2.parameters()),
                                     lr=args.lr,
                                     weight_decay=0.0005)

    criterion = nn.CrossEntropyLoss().cuda()
    for ep in range(config['num_epoch']):
        G.train()
        F1.train()
        F2.train()

        for batch_idx, data in enumerate(dataset):
            if batch_idx * batch_size > 30000:
                break  # 이 부분 왜 있는지 확인

            if config['cuda']:
                data1 = data['S']
                target1 = data['S_label']

                data2 = data['T']
                target2 = data['T_label']

                data1, target1 = data1.cuda(), target1.cuda()
                data2, target2 = data2.cuda(), target2.cuda()

            eta = 1.0
            data = Variable(torch.cat((data1, data2), 0))
            target1 = Variable(target1)

            # Step A : source data로 G, F1,F2 학습시키는 과정
            optimizer_g.zero_grad()
            optimizer_f.zero_grad()
            output = G(data)  # source, target data 같이 입력

            output1 = F1(output)
            output_s1 = output1[:batch_size, :]  # source data 부분
            loss1 = criterion(output_s1,
                              target1)  # source data의 cross entropy 계산

            output_t1 = output1[batch_size:, :]  # target data logit 부분
            output_t1 = F.softmax(output_t1)  # target data softmax 통과
            entropy_loss = -torch.mean(
                torch.log(torch.mean(output_t1, 0) + 1e-6))

            output2 = F2(output)
            output_s2 = output2[:batch_size, :]  # source data
            loss2 = criterion(output_s2,
                              target1)  # source data의 cross entropy 계산

            output_t2 = output2[batch_size:, :]  # target data logit 부분
            output_t2 = F.softmax(output_t2)  # target data softmax 통과
            entropy_loss = entropy_loss - torch.mean(
                torch.log(torch.mean(output_t2, 0) + 1e-6))
            # 두 F1, F2의 entropy를 더한다

            all_loss = loss1 + loss2 + 0.01 * entropy_loss  # 이 entropy loss가 논문에서는 class balance loss??
            all_loss.backward()
            optimizer_g.step()
            optimizer_f.step()

            # Step B: F1, F2들의 target data에 대한 output의 차이가 max되도록 F1, F2를 트레인
            # G의 파라메터들은 고정
            optimizer_g.zero_grad()
            optimizer_f.zero_grad()

            output = G(data)
            output1 = F1(output)
            output_s1 = output1[:batch_size, :]
            loss1 = criterion(output_s1, target1)

            output_t1 = output1[batch_size:, :]
            output_t1 = F.softmax(output_t1)
            entropy_loss = -torch.mean(
                torch.log(torch.mean(output_t1, 0) + 1e-6))

            output2 = F2(output)
            output_s2 = output2[:batch_size, :]
            loss2 = criterion(output_s2, target1)

            output_t2 = output2[batch_size:, :]
            output_t2 = F.softmax(output_t2)
            entropy_loss = entropy_loss - torch.mean(
                torch.log(torch.mean(output_t2, 0) + 1e-6))

            loss_dis = torch.mean(torch.abs(output_t1 - output_t2))

            F_loss = loss1 + loss2 - eta * loss_dis + 0.01 * entropy_loss
            F_loss.backward()
            optimizer_f.step()

            # Step C : G를 train, F1, F2의 ouput의 discrepancy가 작아지도록 G를 학습
            # 이 단계를 여러번 수행한다

            for i in range(num_k):
                optimizer_g.zero_grad()

                output = G(data)
                output1 = F1(output)
                output_s1 = output1[:batch_size, :]
                loss1 = criterion(output_s1, target1)

                output_t1 = output1[batch_size:, :]
                output_t1 = F.softmax(output_t1)
                entropy_loss = -torch.mean(
                    torch.log(torch.mean(output_t1, 0) + 1e-6))
                # torch.mean(input, dim=0) 각 컬럼별 평균계산, 왜 mean을 계산하는 거지? 이 부분 이해가 안됨

                output2 = F2(output)
                output_s2 = output2[:batch_size, :]
                loss2 = criterion(output_s2, target1)

                output_t2 = output2[batch_size:, :]
                output_t2 = F.softmax(output_t2)
                entropy_loss = entropy_loss - torch.mean(
                    torch.log(torch.mean(output_t2, 0) + 1e-6))

                loss_dis = torch.mean(
                    torch.abs(output_t1 -
                              output_t2))  #왜 여기서는 entropy loss를 구현하지 않았지?
                loss_dis.backward()
                optimizer_g.step()

            if batch_idx % config['log_interval'] == 0:
                print(
                    'Train Ep: {} [{}/{} ({:.0f}%)]\tLoss1: {:.6f}\tLoss2: {:.6f}\t Dis: {:.6f} Entropy: {:.6f}'
                    .format(ep, batch_idx * len(data), 70000,
                            100. * batch_idx / 70000, loss1.data[0],
                            loss2.data[0], loss_dis.data[0],
                            entropy_loss.data[0]))

            if batch_idx == 1 and ep > 1:
                test(test_loader, dataset_test, ep, config)
예제 #20
0
# Check for GPU availability:
device = my_models.device_gpu_cpu()
print('using device:', device)

dtype = torch.float32  # we will be using float

train = True
test = True
model = None

if train:
    # Create models:
    model = my_models.model_2()
    my_models.test_model_size(model, dtype)  # test model size output:

    optimizer = optim.Adadelta(model.parameters())

    # Train model:
    model, loss_data = my_models.train_model(model,
                                             optimizer,
                                             train_loader,
                                             val_loader,
                                             device,
                                             dtype,
                                             epoches=2,
                                             print_every=5)

    # Save model to file:
    torch.save(model.state_dict(), MODEL_PATH + MODEL_NAME)

    # Save loss data to file:
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=14,
                        metavar='N',
                        help='number of epochs to train (default: 14)')
    parser.add_argument('--lr',
                        type=float,
                        default=1.0,
                        metavar='LR',
                        help='learning rate (default: 1.0)')
    parser.add_argument('--gamma',
                        type=float,
                        default=0.7,
                        metavar='M',
                        help='Learning rate step gamma (default: 0.7)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')

    parser.add_argument('--save-model',
                        action='store_true',
                        default=False,
                        help='For Saving the current Model')
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../data',
        train=True,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../data',
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                              batch_size=args.test_batch_size,
                                              shuffle=True,
                                              **kwargs)

    # get some random traning images
    dataiter = iter(train_loader)
    images, labels = dataiter.next()

    # show batch images
    grid = torchvision.utils.make_grid(images)
    writer.add_image('images', grid, 0)

    model = Net().to(device)
    optimizer = optim.Adadelta(model.parameters(), lr=args.lr)

    # show model graph
    writer.add_graph(model, images)

    scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)

    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader)
        scheduler.step()

    if args.save_model:
        torch.save(model.state_dict(), "mnist_cnn.pt")

    # close writer
    writer.close()
def main(opt, case):
    print("Arguments are : " + str(opt))

    if opt.experiment is None:
        opt.experiment = 'expr'
    os.system('mkdir {0}'.format(opt.experiment))

    # Why do we use this?
    opt.manualSeed = random.randint(1, 10000)  # fix seed
    print("Random Seed: ", opt.manualSeed)
    random.seed(opt.manualSeed)
    np.random.seed(opt.manualSeed)
    torch.manual_seed(opt.manualSeed)

    cudnn.benchmark = True

    if torch.cuda.is_available() and not opt.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )

        opt.cuda = True
        print('Set CUDA to true.')

    train_dataset = dataset.hwrDataset(mode="train")
    assert train_dataset

    # The shuffle needs to be false when the sizing has been done.

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=opt.batchSize,
                                               shuffle=False,
                                               num_workers=int(opt.workers),
                                               collate_fn=dataset.alignCollate(
                                                   imgH=opt.imgH,
                                                   imgW=opt.imgW,
                                                   keep_ratio=True))

    test_dataset = dataset.hwrDataset(mode="test",
                                      transform=dataset.resizeNormalize(
                                          (100, 32)))

    nclass = len(opt.alphabet) + 1
    nc = 1

    criterion = CTCLoss()

    # custom weights initialization called on crnn
    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Conv') != -1:
            m.weight.data.normal_(0.0, 0.02)
        elif classname.find('BatchNorm') != -1:
            m.weight.data.normal_(1.0, 0.02)
            m.bias.data.fill_(0)

    crnn = crnn_model.CRNN(opt.imgH, nc, nclass, opt.nh)
    crnn.apply(weights_init)

    if opt.cuda and not opt.uses_old_saving:
        crnn.cuda()
        crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu))
        criterion = criterion.cuda()

    if opt.crnn != '':

        print('Loading pre-trained model from %s' % opt.crnn)
        loaded_model = torch.load(opt.crnn)

        if opt.uses_old_saving:
            print("Assuming model was saved in rudementary fashion")
            crnn.load_state_dict(loaded_model)
            crnn.cuda()

            crnn = torch.nn.DataParallel(crnn, device_ids=range(opt.ngpu))
            criterion = criterion.cuda()
            start_epoch = 0
        else:
            print("Loaded model accuracy: " + str(loaded_model['accuracy']))
            print("Loaded model epoch: " + str(loaded_model['epoch']))
            start_epoch = loaded_model['epoch']
            crnn.load_state_dict(loaded_model['state'])

    # Read this.
    loss_avg = utils.averager()

    # If following the paper's recommendation, using AdaDelta
    if opt.adam:
        optimizer = optim.Adam(crnn.parameters(),
                               lr=opt.lr,
                               betas=(opt.beta1, 0.999))
    elif opt.adadelta:
        optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr)
    elif opt.adagrad:
        print("Using adagrad")
        optimizer = optim.Adagrad(crnn.parameters(), lr=opt.lr)
    else:
        optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr)

    converter = utils.strLabelConverter(opt.alphabet)

    best_val_accuracy = 0

    for epoch in range(start_epoch, opt.niter):
        train_iter = iter(train_loader)
        i = 0
        while i < len(train_loader):
            for p in crnn.parameters():
                p.requires_grad = True
            crnn.train()

            cost = train_batch(crnn, criterion, optimizer, train_iter, opt,
                               converter)
            loss_avg.add(cost)
            i += 1

            if i % opt.displayInterval == 0:
                print(
                    '[%d/%d][%d/%d] Loss: %f' %
                    (epoch, opt.niter, i, len(train_loader), loss_avg.val()) +
                    " " + case)
                loss_avg.reset()

            if i % opt.valInterval == 0:
                try:
                    val_loss_avg, accuracy = val_batch(crnn, opt, test_dataset,
                                                       converter, criterion)

                    model_state = {
                        'epoch': epoch + 1,
                        'iter': i,
                        'state': crnn.state_dict(),
                        'accuracy': accuracy,
                        'val_loss_avg': val_loss_avg,
                    }
                    utils.save_checkpoint(
                        model_state, accuracy > best_val_accuracy,
                        '{0}/netCRNN_{1}_{2}_{3}.pth'.format(
                            opt.experiment, epoch, i,
                            accuracy), opt.experiment)

                    if accuracy > best_val_accuracy:
                        best_val_accuracy = accuracy

                except Exception as e:
                    print(e)
예제 #23
0
            args.emb_size, args.theta_act, embeddings, args.train_embeddings,
            args.enc_drop).to(device)

print('model: {}'.format(model))

if args.optimizer == 'adam':
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.wdecay)
elif args.optimizer == 'adagrad':
    optimizer = optim.Adagrad(model.parameters(),
                              lr=args.lr,
                              weight_decay=args.wdecay)
elif args.optimizer == 'adadelta':
    optimizer = optim.Adadelta(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.wdecay)
elif args.optimizer == 'rmsprop':
    optimizer = optim.RMSprop(model.parameters(),
                              lr=args.lr,
                              weight_decay=args.wdecay)
elif args.optimizer == 'asgd':
    optimizer = optim.ASGD(model.parameters(),
                           lr=args.lr,
                           t0=0,
                           lambd=0.,
                           weight_decay=args.wdecay)
else:
    print('Defaulting to vanilla SGD')
    optimizer = optim.SGD(model.parameters(), lr=args.lr)
예제 #24
0
params += list(model_tag.parameters())
if opt.task_sc:
    params += list(model_class.parameters())
params = list(
    filter(lambda p: p.requires_grad,
           params))  # must be list, otherwise clip_grad_norm_ will be invalid
if opt.optim.lower() == 'sgd':
    optimizer = optim.SGD(params, lr=opt.lr)
elif opt.optim.lower() == 'adam':
    optimizer = optim.Adam(params,
                           lr=opt.lr,
                           betas=(0.9, 0.999),
                           eps=1e-8,
                           weight_decay=0)  # (beta1, beta2)
elif opt.optim.lower() == 'adadelta':
    optimizer = optim.Adadelta(params, rho=0.95, lr=1.0)
elif opt.optim.lower() == 'rmsprop':
    optimizer = optim.RMSprop(params, lr=opt.lr)


def decode(data_feats, data_tags, data_class, output_path):
    data_index = np.arange(len(data_feats))
    losses = []
    TP, FP, FN, TN = 0.0, 0.0, 0.0, 0.0
    TP2, FP2, FN2, TN2 = 0.0, 0.0, 0.0, 0.0
    with open(output_path, 'w') as f:
        for j in range(0, len(data_index), opt.test_batchSize):
            if opt.testing:
                words, tags, raw_tags, classes, raw_classes, lens, line_nums = data_reader.get_minibatch_with_class(
                    data_feats,
                    data_tags,
예제 #25
0
def main():
    global net
    global trainloader
    global valloader
    global best_loss
    global log_file
    global optimizer
    global criterion
    #initialize
    start_epoch = 0
    best_loss = np.finfo(np.float32).max

    #augmentation
    random_rotate_func = lambda x: x.rotate(random.randint(-15, 15),
                                            resample=Image.BICUBIC)
    random_scale_func = lambda x: transforms.Scale(int(random.uniform(1.0,1.4)\
                                                   * max(x.size)))(x)
    gaus_blur_func = lambda x: x.filter(PIL.ImageFilter.GaussianBlur(radius=1))
    median_blur_func = lambda x: x.filter(PIL.ImageFilter.MedianFilter(size=3))

    #train preprocessing
    transform_train = transforms.Compose([
        transforms.Lambda(lambd=random_rotate_func),
        transforms.CenterCrop(224),
        transforms.Scale((112, 112)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=MEAN, std=STD),
    ])

    #validation preprocessing
    transform_val = transforms.Compose([
        transforms.CenterCrop(224),
        transforms.Scale((112, 112)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=MEAN, std=STD)
    ])

    print('==> Preparing data..')
    trainset = ImageListDataset(root=args.root,
                                list_path=args.datalist,
                                split='train',
                                transform=transform_train)

    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              num_workers=8,
                                              pin_memory=True)

    valset = ImageListDataset(root=args.root,
                              list_path=args.datalist,
                              split='val',
                              transform=transform_val)

    valloader = torch.utils.data.DataLoader(valset,
                                            batch_size=args.batch_size,
                                            shuffle=False,
                                            num_workers=8,
                                            pin_memory=True)

    # Create model
    net = None
    if args.model_name == 'ResNet18':
        net = ResNet18()
    elif args.model_name == 'ResNet34':
        net = ResNet34()
    elif args.model_name == 'ResNet50':
        net = ResNet50()
    elif args.model_name == 'DenseNet':
        net = DenseNet121()
    elif args.model_name == 'VGG11':
        net = VGG('VGG11')
    elif args.model_name == 'ResNet152':
        net = ResNet152()
    elif args / model_name == 'ResNet101':
        net = ResNet101()

    print('==> Building model..')

    if args.resume:
        # Load checkpoint
        print('==> Resuming from checkpoint..')
        assert os.path.isdir(
            'checkpoint'), 'Error: no checkpoint directory found!'
        checkpoint = torch.load('./checkpoint/{0}/best_model_chkpt.t7'.format(
            args.name))
        net.load_state_dict(checkpoint['net'])
        best_loss = checkpoint['loss']
        start_epoch = checkpoint['epoch'] + 1

    # Choosing of criterion
    if args.criterion == 'MSE':
        criterion = nn.MSELoss()
    else:
        criterion = None  # Add your criterion

    # Choosing of optimizer
    if args.optimizer == 'adam':
        optimizer = optim.Adam(net.parameters(), lr=args.lr)
    elif args.optimizer == 'adadelta':
        optimizer = optim.Adadelta(net.parameters(), lr=args.lr)
    else:
        optimizer = optim.SGD(net.parameters(),
                              lr=args.lr,
                              momentum=0.9,
                              weight_decay=5e-4)

    # Load on GPU
    if args.cuda:
        print('==> Using CUDA')
        print(torch.cuda.device_count())
        if torch.cuda.device_count() > 1:
            net = torch.nn.DataParallel(net).cuda()
        else:
            net = net.cuda()
        cudnn.benchmark = True
        print('==> model on GPU')
        criterion = criterion.cuda()
    else:
        print('==> model on CPU')

    if not os.path.isdir(args.log_dir_path):
        os.makedirs(args.log_dir_path)
    log_file_path = os.path.join(args.log_dir_path, args.name + '.log')
    # logger file openning
    log_file = open(log_file_path, 'w')
    log_file.write('type,epoch,batch,loss,acc\n')

    print('==> Model')
    print(net)

    try:
        for epoch in range(start_epoch, args.epochs):
            trainloader = torch.utils.data.DataLoader(
                trainset,
                batch_size=args.batch_size,
                shuffle=True,
                num_workers=8,
                pin_memory=True)
            train(epoch)
            validation(epoch)
        print('==> Best loss: {0:.5f}'.format(best_loss))
    except Exception as e:
        print(e.message)
        log_file.write(e.message)
    finally:
        log_file.close()
예제 #26
0
    padPF1 = np.zeros((1, 5))
    PF1 = np.vstack((padPF1, PF1))
    PF2 = np.asarray(rng.uniform(low=-1, high=1, size=[101, 5]))
    padPF2 = np.zeros((1, 5))
    PF2 = np.vstack((padPF2, PF2))

    net = pcnn.textPCNN(parameterlist['max_sentence_word'],
                        parameterlist['classes'],
                        parameterlist['wordvector_dim'],
                        parameterlist['PF_dim'], parameterlist['filter_size'],
                        parameterlist['num_filter'], Wv, PF1, PF2)
    # criterion = nn.CrossEntropyLoss()
    # optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=0.001)
    optimizer = optim.Adadelta(net.parameters(),
                               lr=1.0,
                               rho=0.95,
                               eps=1e-06,
                               weight_decay=0)

    np.random.seed(1234)
    epoch_now = 0
    batch_now = 0
    print 'a epoch = %d batch' % (
        int(len(train)) / int(parameterlist['batch_size']) + 1)
    for epoch in range(parameterlist['trainepoch']):
        print 'epoch = %d , start.. ' % epoch_now
        shuffled_data = []
        shuffle_indices = np.random.permutation(np.arange(len(train)))
        for i in range(len(train)):
            shuffled_data.append(train[shuffle_indices[i]])
        bag_now = 0
예제 #27
0
def train(data):
    print("Training model...")
    data.show_data_summary()
    save_data_name = data.model_dir + ".dset"
    data.save(save_data_name)
    model = SeqModel(data)
    loss_function = nn.NLLLoss()
    if data.optimizer.lower() == "sgd":
        optimizer = optim.SGD(model.parameters(),
                              lr=data.HP_lr,
                              momentum=data.HP_momentum,
                              weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adagrad":
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adadelta":
        optimizer = optim.Adadelta(model.parameters(),
                                   lr=data.HP_lr,
                                   weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "rmsprop":
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adam":
        optimizer = optim.Adam(model.parameters(),
                               lr=data.HP_lr,
                               weight_decay=data.HP_l2)
    else:
        print("Optimizer illegal: %s" % (data.optimizer))
        exit(0)
    best_dev = -10
    # data.HP_iteration = 1
    ## start training
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        print("Epoch: %s/%s" % (idx, data.HP_iteration))
        if data.optimizer == "SGD":
            optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        random.shuffle(data.train_Ids)
        ## set model in train model
        model.train()
        model.zero_grad()
        batch_size = data.HP_batch_size
        batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num // batch_size + 1
        for batch_id in range(total_batch):
            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            if not instance:
                continue
            batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                instance, data.HP_gpu)
            instance_count += 1
            loss, tag_seq = model.neg_log_likelihood_loss(
                batch_word, batch_features, batch_wordlen, batch_char,
                batch_charlen, batch_charrecover, batch_label, mask)
            right, whole = predict_check(tag_seq, batch_label, mask)
            right_token += right
            whole_token += whole
            sample_loss += loss.data[0]
            total_loss += loss.data[0]
            if end % 500 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                print(
                    "     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"
                    % (end, temp_cost, sample_loss, right_token, whole_token,
                       (right_token + 0.) / whole_token))
                sys.stdout.flush()
                sample_loss = 0
            loss.backward()
            optimizer.step()
            model.zero_grad()
        temp_time = time.time()
        temp_cost = temp_time - temp_start
        print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" %
              (end, temp_cost, sample_loss, right_token, whole_token,
               (right_token + 0.) / whole_token))
        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print(
            "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"
            % (idx, epoch_cost, train_num / epoch_cost, total_loss))
        # continue
        speed, acc, p, r, f, _, _ = evaluate(data, model, "dev")
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if data.seg:
            current_score = f
            print(
                "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (dev_cost, speed, acc, p, r, f))
        else:
            current_score = acc
            print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" %
                  (dev_cost, speed, acc))

        if current_score > best_dev:
            if data.seg:
                print("Exceed previous best f score:", best_dev)
            else:
                print("Exceed previous best acc score:", best_dev)
            model_name = data.model_dir + '.' + str(idx) + ".model"
            print("Save current best model in file:", model_name)
            torch.save(model.state_dict(), model_name)
            best_dev = current_score
        # ## decode test
        speed, acc, p, r, f, _, _ = evaluate(data, model, "test")
        test_finish = time.time()
        test_cost = test_finish - dev_finish
        if data.seg:
            print(
                "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (test_cost, speed, acc, p, r, f))
        else:
            print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" %
                  (test_cost, speed, acc))
        gc.collect()
예제 #28
0
def train(config_path, experiment_info, thread_queue):
    logger.info('------------MedQA v1.0 Train--------------')
    logger.info(
        '============================loading config file... print config file ========================='
    )
    global_config = read_config(config_path)
    logger.info(open(config_path).read())
    logger.info(
        '^^^^^^^^^^^^^^^^^^^^^^   config file info above ^^^^^^^^^^^^^^^^^^^^^^^^^'
    )
    # set random seed
    seed = global_config['global']['random_seed']
    torch.manual_seed(seed)
    global gpu_nums, init_embedding_weight, batch_test_data, tensorboard_writer, test_epoch, embedding_layer_name
    test_epoch = 0

    enable_cuda = global_config['train']['enable_cuda']
    device = torch.device("cuda" if enable_cuda else "cpu")
    if torch.cuda.is_available() and not enable_cuda:
        logger.warning("CUDA is avaliable, you can enable CUDA in config file")
    elif not torch.cuda.is_available() and enable_cuda:
        raise ValueError(
            "CUDA is not abaliable, please unable CUDA in config file")

    ############################### 获取数据集 ############################
    logger.info('reading MedQA h5file dataset...')
    dataset = MedQADataset(global_config)

    logger.info('constructing model...')
    model_choose = global_config['global']['model']
    dataset_h5_path = global_config['data']['dataset_h5']
    logger.info('Using dataset path is : %s' % dataset_h5_path)
    logger.info('### Using model is: %s ###' % model_choose)
    if model_choose == 'SeaReader':
        model = SeaReader(dataset_h5_path, device)
    elif model_choose == 'SimpleSeaReader':
        model = SimpleSeaReader(dataset_h5_path, device)
    elif model_choose == 'TestModel':
        model = TestModel(dataset_h5_path, device)
    elif model_choose == 'cnn_model':
        model = cnn_model(dataset_h5_path, device)
    elif model_choose == 'match-lstm+':
        model = MatchLSTMPlus(dataset_h5_path)
    elif model_choose == 'r-net':
        model = RNet(dataset_h5_path)
    else:
        raise ValueError('model "%s" in config file not recognized' %
                         model_choose)

    print_network(model)
    gpu_nums = torch.cuda.device_count()
    logger.info('dataParallel using %d GPU.....' % gpu_nums)
    if gpu_nums > 1:
        model = torch.nn.DataParallel(model)
    model = model.to(device)
    # weights_init(model)

    embedding_layer_name = 'module.embedding.embedding_layer.weight'
    for name in model.state_dict().keys():
        if 'embedding_layer.weight' in name:
            embedding_layer_name = name
            break
    init_embedding_weight = model.state_dict()[embedding_layer_name].clone()

    task_criterion = CrossEntropyLoss(
        weight=torch.tensor([0.2, 0.8]).to(device)).to(device)
    gate_criterion = gate_Loss().to(device)
    embedding_criterion = Embedding_reg_L21_Loss(c=0.01).to(device)
    all_criterion = [task_criterion, gate_criterion, embedding_criterion]

    # optimizer
    optimizer_choose = global_config['train']['optimizer']
    optimizer_lr = global_config['train']['learning_rate']
    optimizer_eps = float(global_config['train']['eps'])
    optimizer_param = filter(lambda p: p.requires_grad, model.parameters())

    if optimizer_choose == 'adamax':
        optimizer = optim.Adamax(optimizer_param)
    elif optimizer_choose == 'adadelta':
        optimizer = optim.Adadelta(optimizer_param)
    elif optimizer_choose == 'adam':
        optimizer = optim.Adam(optimizer_param,
                               lr=optimizer_lr,
                               eps=optimizer_eps)
    elif optimizer_choose == 'sgd':
        optimizer = optim.SGD(optimizer_param, lr=optimizer_lr)
    else:
        raise ValueError('optimizer "%s" in config file not recoginized' %
                         optimizer_choose)

    scheduler = ReduceLROnPlateau(optimizer,
                                  mode='min',
                                  factor=0.2,
                                  patience=5,
                                  verbose=True)

    # check if exist model weight
    weight_path = global_config['data']['model_path']
    if os.path.exists(weight_path) and global_config['train']['continue']:
        logger.info('loading existing weight............')
        if enable_cuda:
            weight = torch.load(
                weight_path, map_location=lambda storage, loc: storage.cuda())
        else:
            weight = torch.load(weight_path,
                                map_location=lambda storage, loc: storage)
        # weight = pop_dict_keys(weight, ['pointer', 'init_ptr_hidden'])  # partial initial weight
        # todo 之后的版本可能不需要这句了
        if not global_config['train']['keep_embedding']:
            del weight[
                'module.embedding.embedding_layer.weight']  #删除掉embedding层的参数 ,避免尺寸不对的问题
        # # 删除全连接层的参数
        # decision_layer_names=[]
        # for name,w in weight.items():
        #     if 'decision_layer' in name:
        #         decision_layer_names.append(name)
        # for name in decision_layer_names:
        #     del weight[name]
        model.load_state_dict(weight, strict=False)

    # training arguments
    logger.info('start training............................................')
    train_batch_size = global_config['train']['batch_size']
    valid_batch_size = global_config['train']['valid_batch_size']
    test_batch_size = global_config['train']['test_batch_size']

    batch_train_data = dataset.get_dataloader_train(train_batch_size,
                                                    shuffle=False)
    batch_dev_data = dataset.get_dataloader_dev(valid_batch_size,
                                                shuffle=False)
    batch_test_data = dataset.get_dataloader_test(test_batch_size,
                                                  shuffle=False)

    clip_grad_max = global_config['train']['clip_grad_norm']
    enable_char = False
    # tensorboardX writer

    save_cur_experiment_code_path = "savedcodes/" + experiment_info
    save_current_codes(save_cur_experiment_code_path, global_config)

    tensorboard_writer = SummaryWriter(
        log_dir=os.path.join('tensorboard_logdir', experiment_info))

    best_valid_acc = None
    # every epoch
    for epoch in range(global_config['train']['epoch']):
        # train
        model.train()  # set training = True, make sure right dropout
        train_avg_loss, train_avg_binary_acc = train_on_model(
            model=model,
            criterion=all_criterion,
            optimizer=optimizer,
            batch_data=batch_train_data,
            epoch=epoch,
            clip_grad_max=clip_grad_max,
            device=device,
            thread_queue=thread_queue)

        # evaluate
        with torch.no_grad():
            model.eval()  # let training = False, make sure right dropout
            val_avg_loss, val_avg_binary_acc, val_avg_problem_acc = eval_on_model(
                model=model,
                criterion=all_criterion,
                batch_data=batch_dev_data,
                epoch=epoch,
                device=device,
                init_embedding_weight=init_embedding_weight,
                eval_dataset='dev')

            # test_avg_loss, test_avg_binary_acc, test_avg_problem_acc=eval_on_model(model=model,
            #                                                                       criterion=all_criterion,
            #                                                                       batch_data=batch_test_data,
            #                                                                       epoch=epoch,
            #                                                                       device=device,
            #                                                                       enable_char=enable_char,
            #                                                                       batch_char_func=dataset.gen_batch_with_char,
            #                                                                       init_embedding_weight=init_embedding_weight)

        # save model when best f1 score
        if best_valid_acc is None or val_avg_problem_acc > best_valid_acc:
            epoch_info = 'epoch=%d, val_binary_acc=%.4f, val_problem_acc=%.4f' % (
                epoch, val_avg_binary_acc, val_avg_problem_acc)
            save_model(
                model,
                epoch_info=epoch_info,
                model_weight_path=global_config['data']['model_weight_dir'] +
                experiment_info + "_model_weight.pt",
                checkpoint_path=global_config['data']['checkpoint_path'] +
                experiment_info + "_save.log")
            logger.info("=========  saving model weight on epoch=%d  =======" %
                        epoch)
            best_valid_acc = val_avg_problem_acc

        tensorboard_writer.add_scalar("train/lr",
                                      optimizer.param_groups[0]['lr'], epoch)
        tensorboard_writer.add_scalar("train/avg_loss", train_avg_loss, epoch)
        tensorboard_writer.add_scalar("train/binary_acc", train_avg_binary_acc,
                                      epoch)
        tensorboard_writer.add_scalar("val/avg_loss", val_avg_loss, epoch)
        tensorboard_writer.add_scalar("val/binary_acc", val_avg_binary_acc,
                                      epoch)
        tensorboard_writer.add_scalar("val/problem_acc", val_avg_problem_acc,
                                      epoch)

        #  adjust learning rate
        scheduler.step(train_avg_loss)

    logger.info('finished.................................')
    tensorboard_writer.close()
예제 #29
0
        TEXT, LABEL, filter_pred=lambda ex: ex.label != 'neutral')

    TEXT.build_vocab(train)
    LABEL.build_vocab(train)

    train_iter, val_iter, test_iter = torchtext.data.BucketIterator.splits(
        (train, val, test), batch_size=50, device=-1, repeat=False)

    # Build the vocabulary with word embeddings
    url = 'https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki.simple.vec'
    TEXT.vocab.load_vectors(vectors=Vectors('wiki.simple.vec', url=url))

    net = CNN(model='multichannel', vocab_size=len(TEXT.vocab), class_number=2)
    criterion = nn.CrossEntropyLoss()
    parameters = filter(lambda p: p.requires_grad, net.parameters())
    optimizer = optim.Adadelta(parameters, lr=0.5)

    for epoch in range(50):
        total_loss = 0
        for batch in train_iter:
            text, label = batch.text.t_(), batch.label
            label = label - 1
            net.zero_grad()

            logit = net(text)
            loss = criterion(logit, label)
            loss.backward()
            nn.utils.clip_grad_norm(parameters, max_norm=3)
            optimizer.step()
            total_loss += loss.data
예제 #30
0
파일: run.py 프로젝트: wrccrwx/glomo
def train(config):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    # with open(config.train_eval_file, "r") as fh:
    #     train_eval_file = json.load(fh)
    with open(config.dev_eval_file, "r") as fh:
        dev_eval_file = json.load(fh)
    with open(config.idx2word_file, 'r') as fh:
        idx2word_dict = json.load(fh)

    random.seed(config.seed)
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    if config.pre_att_id != '':
        config.save = 'T16-v2-{}-kp0{}-cond{}-ori{}-attcnt{}-gatefuse{}-lr{}-opt{}'.format(config.pre_att_id, config.keep_prob0, int(config.condition), int(config.original_ptr), config.att_cnt, config.gate_fuse, config.init_lr, config.optim)
        if config.use_elmo:
            config.save += "_ELMO"
        if config.train_emb:
            raise ValueError
            config.save += "_TE"
        if config.trnn:
            config.save += '_TRNN'
    else:
        config.save = 'baseline-{}'.format(time.strftime("%Y%m%d-%H%M%S"))
        if config.use_elmo:
            config.save += "_ELMO"
        if config.uniform_graph:
            config.save += '_UNIFORM'
    # non overwriting
    # if os.path.exists(config.save):
    #     sys.exit(1)
    create_exp_dir(config.save, scripts_to_save=['run.py', 'model.py', 'util.py', 'main.py'])
    def logging(s, print_=True, log_=True):
        if print_:
            print(s)
        if log_:
            with open(os.path.join(config.save, 'log.txt'), 'a+') as f_log:
                f_log.write(s + '\n')

    if config.pre_att_id != '':
        sys.path.insert(0, '../pretrain')
        from data import Vocab
        vocab = Vocab('../pretrain/vocabv2.pkl', 100000, '<unk>')
        
        # from model8 import StructurePredictor
        # model = StructurePredictor(512, len(vocab), 1, 1, 0.0)
        # model.load_state_dict(torch.load('../skip_thought/{}/st_predictor.pt'.format(config.pre_att_id)))
        # model.cuda()
        # model.eval()

        model = torch.load('../pretrain/{}/model.pt'.format(config.pre_att_id))
        # if 'gru' in config.pre_att_id:
        #     model.set_gru(True)
        # elif 'add' in config.pre_att_id:
        #     model.set_gru(False)
        # else:
        #     assert False
        model.cuda()
        ori_model = model
        model = nn.DataParallel(model)
        model.eval()
        import re
        try:
            nly = int(re.search(r'ly(\d+)', config.pre_att_id).group(1))
        except:
            nly = len(ori_model.enc_net.nets)
        if config.gate_fuse < 3:
            config.num_mixt = nly * 8
        else:
            config.num_mixt = (nly + nly - 1) * 8

        # old_model = torch.load('../skip_thought/{}/model.pt'.format(config.pre_att_id))
        # from model5 import GraphModel
        # model = GraphModel(old_model).cuda()
        # model = nn.DataParallel(model)
        # model.eval()
        # del old_model
        # import gc
        # gc.collect()
        # from data import Vocab
        # vocab = Vocab('../skip_thought/vocabv2.pkl', 100000, '<unk>')
        
        del sys.path[0]
        
        pre_att_data = {'model': model, 'vocab': vocab}
    else:
        pre_att_data = None

    logging('Config')
    for k, v in config.__dict__.items():
        logging('    - {} : {}'.format(k, v))

    if config.use_elmo and config.load_elmo:
        ee = torch.load(config.elmo_ee_file)
    else:
        ee = None

    logging("Building model...")
    train_buckets = get_buckets(config.train_record_file, config, limit=True)
    dev_buckets = get_buckets(config.dev_record_file, config, limit=False)

    def build_train_iterator():
        return DataIterator(train_buckets, config.batch_size, config.para_limit, config.ques_limit, config.char_limit, True, pre_att_data, config, ee, idx2word_dict, 'train')

    def build_dev_iterator():
        return DataIterator(dev_buckets, config.batch_size, config.para_limit, config.ques_limit, config.char_limit, False, pre_att_data, config, ee, idx2word_dict, 'dev')

    model = Model(config, word_mat, char_mat) if not config.trnn else ModelTRNN(config, word_mat, char_mat)
    # logging('nparams {}'.format(sum([p.nelement() for p in model.parameters() if p.requires_grad])))
    ori_model = model.cuda()
    # ori_model.word_emb.cpu()
    # model = ori_model
    model = nn.DataParallel(ori_model)

    lr = config.init_lr
    # optimizer = optim.SGD(model.parameters(), lr=config.init_lr, momentum=config.momentum)
    if config.optim == "adadelta":  # default
        optimizer = optim.Adadelta(filter(lambda p: p.requires_grad, model.parameters()), lr=config.init_lr, rho=0.95)
    elif config.optim == "sgd":
        optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=config.init_lr, momentum=config.momentum)
    elif config.optim == "adam":
        optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=config.init_lr, betas=(config.momentum, 0.999))
    cur_patience = 0
    total_loss = 0
    global_step = 0
    best_dev_F1 = None
    stop_train = False
    start_time = time.time()
    eval_start_time = time.time()
    model.train()

    for epoch in range(10000 * 32 // config.batch_size):
        for data in build_train_iterator():
            context_idxs = Variable(data['context_idxs'])
            ques_idxs = Variable(data['ques_idxs'])
            context_char_idxs = Variable(data['context_char_idxs'])
            ques_char_idxs = Variable(data['ques_char_idxs'])
            context_lens = Variable(data['context_lens'])
            y1 = Variable(data['y1'])
            y2 = Variable(data['y2'])

            graph = data['graph']
            graph_q = data['graph_q']
            if graph is not None:
                graph.volatile = False
                graph.requires_grad = False
                graph_q.volatile = False
                graph_q.requires_grad = False

            elmo, elmo_q = data['elmo'], data['elmo_q']
            if elmo is not None:
                elmo.volatile = False
                elmo.requires_grad = False
                elmo_q.volatile = False
                elmo_q.requires_grad = False

            logit1, logit2 = model(context_idxs, ques_idxs, context_char_idxs, ques_char_idxs, context_lens, pre_att=graph, pre_att_q=graph_q, elmo=elmo, elmo_q=elmo_q)
            loss = criterion(logit1, y1) + criterion(logit2, y2)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            import gc; gc.collect()

            total_loss += loss.data[0]
            global_step += 1

            if global_step % config.period == 0:
                cur_loss = total_loss / config.period
                elapsed = time.time() - start_time
                logging('| epoch {:3d} | step {:6d} | lr {:05.5f} | ms/batch {:5.2f} | train loss {:8.3f}'.format(epoch, global_step, lr, elapsed*1000/config.period, cur_loss))
                total_loss = 0
                start_time = time.time()

            if global_step % (config.checkpoint * 32 // config.batch_size)  == 0:
                model.eval()
                metrics = evaluate_batch(build_dev_iterator(), model, 0, dev_eval_file)
                model.train()

                logging('-' * 89)
                logging('| eval {:6d} in epoch {:3d} | time: {:5.2f}s | dev loss {:8.3f} | EM {:.4f} | F1 {:.4f}'.format(global_step//config.checkpoint,
                    epoch, time.time()-eval_start_time, metrics['loss'], metrics['exact_match'], metrics['f1']))
                debug_s = ''
                if hasattr(ori_model, 'scales'):
                    debug_s += '| scales {} '.format(ori_model.scales.data.cpu().numpy().tolist())
                # if hasattr(ori_model, 'mixt_logits') and (not hasattr(ori_model, 'condition') or not ori_model.condition):
                #     debug_s += '| mixt {}'.format(F.softmax(ori_model.mixt_logits, dim=-1).data.cpu().numpy().tolist())
                if debug_s != '':
                    logging(debug_s)
                logging('-' * 89)

                eval_start_time = time.time()

                dev_F1 = metrics['f1']
                if best_dev_F1 is None or dev_F1 > best_dev_F1:
                    best_dev_F1 = dev_F1
                    torch.save(ori_model.state_dict(), os.path.join(config.save, 'model.pt'))
                    cur_patience = 0
                else:
                    cur_patience += 1
                    if cur_patience >= config.patience:
                        lr /= 2.0
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lr
                        if lr < config.init_lr * 1e-2:
                            stop_train = True
                            break
                        cur_patience = 0
        if stop_train: break
    logging('best_dev_F1 {}'.format(best_dev_F1))