def evaluate(checkpoint_path,
             num_class,
             num_words,
             datafolds,
             glove=None,
             use_gpu=False):
    checkpoint = torch.load(checkpoint_path)
    config_string = checkpoint['config_string']
    groups = re.search(r'input(\d+)_hidden(\d+)', config_string)
    input_size, hidden_size = int(groups.group(1)), int(groups.group(2))
    classifier = Classifier(input_size, hidden_size, num_class, num_words,
                            glove, use_gpu)
    if use_gpu:
        classifier = classifier.cuda()
    dataset_eval = datafolds[-1]
    classifier.load_state_dict(checkpoint['model'])
    correct, total = classifier.evalute_dataset(dataset_eval)
    return correct, total
Esempio n. 2
0
def train(task,
          phase,
          num_class,
          num_words,
          logs_dir,
          models_dir,
          datafolds,
          seed,
          num_folds=10,
          glove=None,
          epochs=20,
          batch_size=25,
          input_size=100,
          hidden_size=50,
          lr=0.008,
          lr_milestones=None,
          weight_decay=1e-4,
          log_iteration_interval=500,
          use_gpu=False):
    config_string = '{}_{}_batchsize{}_input{}_hidden{}_lr{}{}_wc{}{}_seed{}'.format(
        task, phase, batch_size, input_size, hidden_size, lr,
        '' if not lr_milestones
        else '_ms' + ','.join([str(i) for i in lr_milestones]),
        weight_decay, '_glove' if glove is not None else '', seed)
    log_train_path = os.path.join(logs_dir,
                                  'train_{}.txt'.format(config_string))
    log_eval_path = os.path.join(logs_dir, 'eval_{}.txt'.format(config_string))
    print('[INFO] {}'.format(config_string))
    classifier = Classifier(input_size, hidden_size, num_class, num_words,
                            glove, use_gpu)
    criterion = nn.CrossEntropyLoss()
    if use_gpu:
        classifier = classifier.cuda()
        criterion = criterion.cuda()
    optimizer = optim.Adam(
        [p for p in classifier.parameters() if p.requires_grad],
        lr=lr,
        weight_decay=weight_decay)
    dataset_train = list()
    for i in range(num_folds - 1):
        dataset_train += datafolds[i]
    dataset_eval = datafolds[-1]
    scheduler = None if not lr_milestones else optim.lr_scheduler.MultiStepLR(
        optimizer, lr_milestones, gamma=0.5)
    # train
    for epoch in range(epochs):
        if scheduler is not None:
            scheduler.step()
        random.shuffle(dataset_train)
        optimizer.zero_grad()
        log_loss = 0
        for iteration in range(1, len(dataset_train) + 1):
            tree_root, label = dataset_train[iteration - 1]
            output = classifier(tree_root)
            target = Variable(torch.LongTensor([label]), requires_grad=False)
            if use_gpu:
                target = Variable(
                    torch.LongTensor([label]).cuda(), requires_grad=False)
            loss = criterion(output, target)
            loss.backward()
            if iteration % batch_size == 0:
                optimizer.step()
                optimizer.zero_grad()
            # log
            log_loss += loss.data[0] / log_iteration_interval
            if iteration % log_iteration_interval == 0:
                add_log(log_train_path, '{} {} {}'.format(
                    time.ctime(), iteration, log_loss))
                log_loss = 0
        # evaluate
        correct, total = classifier.evalute_dataset(dataset_eval)
        add_log(log_eval_path, '{} / {} = {:.3f}'.format(
            correct, total,
            float(correct) / total))
        # save checkpoint
        checkpoint = {
            'model': classifier.state_dict(),
            'optimizer': optimizer,
            'epoch': epoch,
            'config_string': config_string
        }
        checkpoint_path = os.path.join(models_dir, '{}_epoch{}.pth'.format(
            config_string, epoch))
        torch.save(checkpoint, checkpoint_path)