Ejemplo n.º 1
0
def evaluate(valid_pkl_path,
             loss_criterion,
             model,
             config,
             vocab_size,
             annoyIndex,
             annoyPkl,
             use_cuda=False,
             use_kb=False,
             valid_kb_path=None,
             valid_celeb_path=None):
    model.eval()
    # with torch.no_grad():
    valid_data = pkl.load(open(valid_pkl_path, 'r'))
    batch_size = config['data']['batch_size']
    total_samples = len(valid_data)
    num_valid_batch = int(math.ceil(float(total_samples) / float(batch_size)))
    total_loss = 0.
    n_total_words = 0.
    correct = 0
    total = 0

    kb_len = None
    celeb_len = None
    kb_vec = None
    celeb_vec = None
    if use_kb:
        celeb_data = pkl.load(open(valid_celeb_path, 'r'))
        kb_data = pkl.load(open(valid_kb_path, 'r'))

    valid_start = time.time()
    for batch_id in range(num_valid_batch):
        batch_data = valid_data[batch_id * batch_size:(batch_id + 1) *
                                batch_size]
        if use_kb:
            kb_len = np.array(kb_data[0][batch_id * batch_size:(batch_id + 1) *
                                         batch_size])
            kb_len = utils.convert_states_to_torch(kb_len, use_cuda=use_cuda)
            kb_vec = np.array(kb_data[1][batch_id * batch_size:(batch_id + 1) *
                                         batch_size])
            kb_vec = utils.convert_states_to_torch(kb_vec, use_cuda=use_cuda)
            # Celebs
            celeb_len = np.array(
                celeb_data[0][batch_id * batch_size:(batch_id + 1) *
                              batch_size])
            celeb_len = utils.convert_states_to_torch(celeb_len,
                                                      use_cuda=use_cuda)
            celeb_vec = np.array(
                celeb_data[1][batch_id * batch_size:(batch_id + 1) *
                              batch_size])
            celeb_vec = utils.convert_states_to_torch(celeb_vec,
                                                      use_cuda=use_cuda)

        text_enc_input, text_enc_in_len, image_enc_input, dec_text_input,\
         dec_out_seq, dec_seq_length= utils.get_batch_mmd_data(batch_data, config['data']['start_id'],
            config['data']['end_id'], config['data']['pad_id'],
            config['data']['image_rep_size'], annoyIndex, annoyPkl,
            use_cuda=use_cuda, volatile=True)
        dec_output_prob = model(text_enc_input,
                                image_enc_input,
                                text_enc_in_len,
                                dec_text_input,
                                dec_out_seq,
                                context_size=args.context_size,
                                teacher_forcing_ratio=1,
                                decode=False,
                                use_cuda=use_cuda,
                                kb_vec=kb_vec,
                                celeb_vec=celeb_vec,
                                kb_len=kb_len,
                                celeb_len=celeb_len)
        loss_val = loss_criterion(
            dec_output_prob.contiguous().view(
                -1, vocab_size),  #config['model']['tgt_vocab_size']),
            dec_out_seq.view(-1))
        n_words = dec_seq_length.float().sum().data[0]
        n_total_words += n_words
        # batch_loss = loss_val.data[0]/n_words
        total_loss += loss_val.data[0]
        dec_out_model = dec_output_prob.data.cpu().numpy().argmax(
            axis=2)  # argmax for each timestep
        # print(dec_out_model)
        # print(dec_out_model.shape)

        # Multi task learning
        # _, predicted = torch.max(outputs.data, 1)
        # total += labels.size(0)
        # correct += (predicted == labels).sum().item()

    # Printing epoch loss
    # epoch_loss = total_loss / num_valid_batch
    epoch_loss = total_loss / n_total_words
    valid_elapsed = (time.time() - valid_start) / 60
    # if (epoch+1) % config['training']['log_every'] == 0:
    print('Valid Loss: Loss: %.6f, Perplexity: %5.4f, Run Time:%5.4f' %
          (epoch_loss, np.exp(epoch_loss), valid_elapsed))
    print("")
    model.train()
Ejemplo n.º 2
0
def main(args):
    config = utils.read_json_config(args.config_file_path)
    print(config)
    torch.manual_seed(config['training']['seed'])  # Seed for reproducability
    use_cuda = check_cuda(config['training']['seed'])
    # Load vocabulary
    with open(args.vocab_path, 'rb') as vocab_file:
        vocab = pkl.load(vocab_file)[1]  #inverted_vocab
    vocab_size = len(vocab)
    # Server
    annoyIndex = AnnoyIndex(4096, metric='euclidean')
    annoyIndex.load(args.annoy_file_path)
    annoyPkl = pkl.load(open(args.annoy_pkl_path))
    # # Local
    # annoyIndex = ""
    # annoyPkl = ""
    # model_type = getattr(models, args.model_type)

    kb_len = None
    celeb_len = None
    kb_vec = None
    use_kb = False
    celeb_vec = None
    kb_size = None
    celeb_vec_size = None
    if args.use_kb == 'True':
        use_kb = True
        celeb_data = pkl.load(open(args.test_celeb_path, 'r'))
        kb_data = pkl.load(open(args.test_kb_path, 'r'))
        kb_vocab = pkl.load(open(args.kb_vocab_path, 'r'))
        celeb_vocab = pkl.load(open(args.celeb_vocab_path, 'r'))
        kb_size = len(kb_vocab[0])
        celeb_vec_size = len(celeb_vocab[0])
        del kb_vocab, celeb_vocab

    if args.model_type == 'MultimodalHRED':
        model = MultimodalHRED(
            src_vocab_size=vocab_size,
            tgt_vocab_size=vocab_size,
            src_emb_dim=config['model']['src_emb_dim'],
            tgt_emb_dim=config['model']['tgt_emb_dim'],
            enc_hidden_size=config['model']['enc_hidden_size'],
            dec_hidden_size=config['model']['dec_hidden_size'],
            context_hidden_size=config['model']['context_hidden_size'],
            batch_size=config['data']['batch_size'],
            image_in_size=config['model']['image_in_size'],
            bidirectional_enc=config['model']['bidirectional_enc'],
            bidirectional_context=config['model']['bidirectional_context'],
            num_enc_layers=config['model']['num_enc_layers'],
            num_dec_layers=config['model']['num_dec_layers'],
            num_context_layers=config['model']['num_context_layers'],
            dropout_enc=config['model']['dropout_enc'],
            dropout_dec=config['model']['dropout_dec'],
            dropout_context=config['model']['dropout_context'],
            max_decode_len=config['model']['max_decode_len'],
            non_linearity=config['model']['non_linearity'],
            enc_type=config['model']['enc_type'],
            dec_type=config['model']['dec_type'],
            context_type=config['model']['context_type'],
            use_attention=config['model']['use_attention'],
            decode_function=config['model']['decode_function'],
            num_states=args.num_states,
            use_kb=use_kb,
            kb_size=kb_size,
            celeb_vec_size=celeb_vec_size)
    else:
        model = HRED(
            src_vocab_size=vocab_size,
            tgt_vocab_size=vocab_size,
            src_emb_dim=config['model']['src_emb_dim'],
            tgt_emb_dim=config['model']['tgt_emb_dim'],
            enc_hidden_size=config['model']['enc_hidden_size'],
            dec_hidden_size=config['model']['dec_hidden_size'],
            context_hidden_size=config['model']['context_hidden_size'],
            batch_size=config['data']['batch_size'],
            image_in_size=config['model']['image_in_size'],
            bidirectional_enc=config['model']['bidirectional_enc'],
            bidirectional_context=config['model']['bidirectional_context'],
            num_enc_layers=config['model']['num_enc_layers'],
            num_dec_layers=config['model']['num_dec_layers'],
            num_context_layers=config['model']['num_context_layers'],
            dropout_enc=config['model']['dropout_enc'],
            dropout_dec=config['model']['dropout_dec'],
            dropout_context=config['model']['dropout_context'],
            max_decode_len=config['model']['max_decode_len'],
            non_linearity=config['model']['non_linearity'],
            enc_type=config['model']['enc_type'],
            dec_type=config['model']['dec_type'],
            context_type=config['model']['context_type'],
            use_attention=config['model']['use_attention'],
            decode_function=config['model']['decode_function'],
            num_states=args.num_states,
            use_kb=use_kb,
            kb_size=kb_size,
            celeb_vec_size=celeb_vec_size)
    model = torch_utils.gpu_wrapper(model, use_cuda=use_cuda)
    # model = torch.load('model.pkl')
    model.load_state_dict(torch.load(args.checkpoint_path))
    model.eval()
    print_model(model)
    test_data = pkl.load(open(args.test_pkl_path, 'r'))

    batch_size = config['data']['batch_size']
    total_samples = len(test_data)
    num_test_batch = int(math.ceil(float(total_samples) / float(batch_size)))
    sentences = []
    # loss_criterion = nn.CrossEntropyLoss(ignore_index=config['data']['pad_id']) #weight=weight_mask) nn.CrossEntropyLoss
    # loss_criterion = torch_utils.gpu_wrapper(loss_criterion, use_cuda=use_cuda)
    for batch_id in range(num_test_batch):
        batch_start = time.time()
        batch_data = test_data[batch_id * batch_size:(batch_id + 1) *
                               batch_size]
        if use_kb:
            kb_len = np.array(kb_data[0][batch_id * batch_size:(batch_id + 1) *
                                         batch_size])
            kb_len = utils.convert_states_to_torch(kb_len, use_cuda=use_cuda)
            kb_vec = np.array(kb_data[1][batch_id * batch_size:(batch_id + 1) *
                                         batch_size])
            kb_vec = utils.convert_states_to_torch(kb_vec, use_cuda=use_cuda)
            # Celebs
            celeb_len = np.array(
                celeb_data[0][batch_id * batch_size:(batch_id + 1) *
                              batch_size])
            celeb_len = utils.convert_states_to_torch(celeb_len,
                                                      use_cuda=use_cuda)
            celeb_vec = np.array(
                celeb_data[1][batch_id * batch_size:(batch_id + 1) *
                              batch_size])
            celeb_vec = utils.convert_states_to_torch(celeb_vec,
                                                      use_cuda=use_cuda)

        text_enc_input, text_enc_in_len, image_enc_input, dec_text_input,\
         dec_out_seq, dec_seq_length= utils.get_batch_mmd_data(batch_data, config['data']['start_id'],
            config['data']['end_id'], config['data']['pad_id'],
            config['data']['image_rep_size'], annoyIndex, annoyPkl,
            use_cuda=use_cuda, volatile=True)
        dec_output_prob = model(text_enc_input,
                                image_enc_input,
                                text_enc_in_len,
                                context_size=args.context_size,
                                teacher_forcing_ratio=0,
                                decode=True,
                                use_cuda=use_cuda,
                                kb_vec=kb_vec,
                                celeb_vec=celeb_vec,
                                kb_len=kb_len,
                                celeb_len=celeb_len)
        dec_output_seq = dec_output_prob[:, 0, :].data.cpu().numpy()
        # loss = loss_criterion(dec_output_prob.contiguous().view(-1, vocab_size), #config['model']['tgt_vocab_size']),
        # 	dec_out_seq.view(-1))
        # dec_output_seq = dec_output_prob.data.cpu().numpy().argmax(axis=2) # argmax for each timestep
        for sequence in dec_output_seq:
            words = []
            for word_id in sequence:
                if word_id == config['data']['end_id']:
                    break
                word = vocab[word_id]
                words.append(word)
            sentence = ' '.join(words)
            sentences.append(sentence)
    with open(args.out_file_path, 'w') as out_file:
        for item in sentences:
            out_file.write("{}\n".format(item))
Ejemplo n.º 3
0
def main(args):
    config = utils.read_json_config(args.config_file_path)
    torch.manual_seed(config['training']['seed'])  # Seed for reproducability
    use_cuda = check_cuda(config['training']['seed'])
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        # filename='log/%s' % (experiment_name),
        filemode='w')
    # define a new Handler to log to console as well
    console = logging.StreamHandler()
    # optional, set the logging level
    console.setLevel(logging.INFO)
    # set a format which is the same for console use
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    # tell the handler to use this format
    console.setFormatter(formatter)
    # add the handler to the root logger
    # logging.getLogger('').addHandler(console)
    # print 'Reading data ...'
    print(config)
    vocab = pkl.load(open(args.vocab_path, 'rb'))[1]
    vocab_size = len(vocab)
    # Server
    annoyIndex = AnnoyIndex(4096, metric='euclidean')
    annoyIndex.load(args.annoy_file_path)
    annoyPkl = pkl.load(open(args.annoy_pkl_path))
    # # Local
    # annoyIndex = ""
    # annoyPkl = ""
    model_type = getattr(models, args.model_type)

    kb_vec = None
    use_kb = False
    celeb_vec = None
    kb_size = None
    celeb_vec_size = None
    kb_len = None
    celeb_len = None
    if args.use_kb == 'True':
        use_kb = True
        celeb_data = pkl.load(open(args.train_celeb_path, 'r'))
        kb_data = pkl.load(open(args.train_kb_path, 'r'))
        ## TODO - copy in all
        kb_vocab = pkl.load(open(args.kb_vocab_path, 'r'))
        celeb_vocab = pkl.load(open(args.celeb_vocab_path, 'r'))
        kb_size = len(kb_vocab[0])
        celeb_vec_size = len(celeb_vocab[0])
        del kb_vocab, celeb_vocab

    model = model_type(
        src_vocab_size=vocab_size,  #config['model']['src_vocab_size'],
        tgt_vocab_size=vocab_size,  #config['model']['tgt_vocab_size'],
        src_emb_dim=config['model']['src_emb_dim'],
        tgt_emb_dim=config['model']['tgt_emb_dim'],
        enc_hidden_size=config['model']['enc_hidden_size'],
        dec_hidden_size=config['model']['dec_hidden_size'],
        context_hidden_size=config['model']['context_hidden_size'],
        batch_size=config['data']['batch_size'],
        image_in_size=config['model']['image_in_size'],
        bidirectional_enc=config['model']['bidirectional_enc'],
        bidirectional_context=config['model']['bidirectional_context'],
        num_enc_layers=config['model']['num_enc_layers'],
        num_dec_layers=config['model']['num_dec_layers'],
        num_context_layers=config['model']['num_context_layers'],
        dropout_enc=config['model']['dropout_enc'],
        dropout_dec=config['model']['dropout_dec'],
        dropout_context=config['model']['dropout_context'],
        max_decode_len=config['model']['max_decode_len'],
        non_linearity=config['model']['non_linearity'],
        enc_type=config['model']['enc_type'],
        dec_type=config['model']['dec_type'],
        context_type=config['model']['context_type'],
        use_attention=config['model']['use_attention'],
        decode_function=config['model']['decode_function'],
        num_states=args.num_states,
        use_kb=use_kb,
        kb_size=kb_size,
        celeb_vec_size=celeb_vec_size)
    model = torch_utils.gpu_wrapper(model, use_cuda=use_cuda)
    print_model(model)
    optimizer = optimizer_wrapper(model, config['training']['optimizer'],
                                  config['training']['lr'],
                                  config['training']['lr_decay'])
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
    losses = []
    # Weight masking for cross entropy loss; can also use ignore_index
    # weight_mask = torch.ones(config['model']['tgt_vocab_size'])
    # weight_mask = torch_utils.gpu_wrapper(weight_mask, use_cuda=False)
    # weight_mask[3] = 0 # weight_mask[tgt_dic['word2id']['<pad>']] = 0
    loss_criterion = nn.CrossEntropyLoss(ignore_index=config['data']['pad_id'])
    #weight=weight_mask) nn.CrossEntropyLoss
    loss_criterion = torch_utils.gpu_wrapper(loss_criterion, use_cuda=use_cuda)
    # Load all training data
    train_data = pkl.load(open(args.train_pkl_path, 'r'))
    # train_state_data = pkl.load(open(args.train_state_pkl_path,'r'))

    batch_size = config['data']['batch_size']
    total_samples = len(train_data)
    num_train_batch = int(math.ceil(float(total_samples) / float(batch_size)))
    for epoch in range(config['training']['num_epochs']):
        total_loss = 0.
        n_total_words = 0.
        epoch_start = time.time()
        for batch_id in range(num_train_batch):
            batch_start = time.time()
            batch_data = train_data[batch_id * batch_size:(batch_id + 1) *
                                    batch_size]
            if use_kb:
                kb_len = np.array(
                    kb_data[0][batch_id * batch_size:(batch_id + 1) *
                               batch_size])
                kb_len = utils.convert_states_to_torch(kb_len,
                                                       use_cuda=use_cuda)
                kb_vec = np.array(
                    kb_data[1][batch_id * batch_size:(batch_id + 1) *
                               batch_size])
                kb_vec = utils.convert_states_to_torch(kb_vec,
                                                       use_cuda=use_cuda)
                # Celebs
                celeb_len = np.array(
                    celeb_data[0][batch_id * batch_size:(batch_id + 1) *
                                  batch_size])
                celeb_len = utils.convert_states_to_torch(celeb_len,
                                                          use_cuda=use_cuda)
                celeb_vec = np.array(
                    celeb_data[1][batch_id * batch_size:(batch_id + 1) *
                                  batch_size])
                celeb_vec = utils.convert_states_to_torch(celeb_vec,
                                                          use_cuda=use_cuda)
                # print(kb_len)
                # # kb_array = np.array(kb_array)
                # # kb_len = np.array(kb_array[0])
                # kb_vec = np.array(kb_array[1])
                # kb_vec = utils.convert_states_to_torch(kb_vec, use_cuda=use_cuda)
                # celeb_vec = celeb_data[batch_id*batch_size:(batch_id+1)*batch_size]
            # batch_state = train_state_data[batch_id*batch_size:(batch_id+1)*batch_size]
            text_enc_input, text_enc_in_len, image_enc_input, dec_text_input,\
             dec_out_seq, dec_seq_length= utils.get_batch_mmd_data(batch_data,
               config['data']['start_id'],
               config['data']['end_id'], config['data']['pad_id'],
               config['data']['image_rep_size'], annoyIndex, annoyPkl,\
               use_cuda=use_cuda)
            # Forward + Backward + Optimize
            # model.zero_grad() ??? zero grad model or optim?
            # https://discuss.pytorch.org/t/do-i-need-to-do-optimizer-zero-grad-when-using-adam-solver/3235
            optimizer.zero_grad()
            dec_output_prob = model(text_enc_input,
                                    image_enc_input,
                                    text_enc_in_len,
                                    dec_text_input,
                                    dec_out_seq,
                                    context_size=args.context_size,
                                    teacher_forcing_ratio=1,
                                    use_cuda=use_cuda,
                                    kb_vec=kb_vec,
                                    celeb_vec=celeb_vec,
                                    kb_len=kb_len,
                                    celeb_len=celeb_len)
            # loss = loss_criterion(dec_output_prob, dec_out_seq)
            loss = loss_criterion(
                dec_output_prob.contiguous().view(
                    -1, vocab_size),  #config['model']['tgt_vocab_size']),
                dec_out_seq.view(-1))

            # target_toks = dec_out_seq.ne(config['data']['pad_id']).long().sum().data[0]
            n_words = dec_seq_length.float().sum().data[0]
            n_total_words += n_words
            loss.backward()
            # Gradient clipping to avoid exploding gradients
            nn.utils.clip_grad_norm(model.parameters(),
                                    config['training']['clip_grad'])
            optimizer.step()
            # exp_lr_scheduler.step()
            batch_elapsed = (time.time() - batch_start) / 60
            batch_loss = loss.data[0] / n_words  # @TODO
            if (batch_id + 1) % config['training']['log_every'] == 0:
                print(
                    'Batch Loss: Epoch [%d], Batch [%d], Loss: %.6f, Perplexity: %5.5f, Batch Time:%5.4f'
                    % (epoch + 1, batch_id + 1, batch_loss, np.exp(batch_loss),
                       batch_elapsed))
            total_loss += loss.data[0]
            losses.append(batch_loss)
        epoch_loss = total_loss / n_total_words

        epoch_elapsed = time.time() - epoch_start
        # if (epoch+1) % config['training']['log_every'] == 0:
        print(
            'Epoch Loss: Epoch [%d], Loss: %.6f, Perplexity: %5.5f, Epoch Time:%5.4f'
            % (epoch + 1, epoch_loss, np.exp(epoch_loss), epoch_elapsed))
        if (epoch + 1) % config['training']['evaluate_every'] == 0:
            print("\nEvaluation:")
            evaluate(args.valid_pkl_path, loss_criterion, model, config, vocab_size,\
              annoyIndex, annoyPkl, use_cuda=use_cuda, use_kb=use_kb,
              valid_kb_path=args.valid_kb_path, valid_celeb_path=args.valid_celeb_path)
        # Save the models
        if (epoch + 1) % config['training']['save_every'] == 0:
            # Save and load only the model parameters(recommended).
            torch.save(
                model.state_dict(),
                os.path.join(args.model_path,
                             'model_params_%d.pkl' % (epoch + 1)))