def evaluate_agent(agent,
                   epoch,
                   batch_size,
                   recom_length,
                   validSample,
                   testSample,
                   device,
                   eval_type='valid',
                   final_eval=False):
    correct = 0.
    correctk = 0.

    if eval_type == 'valid':
        sample = validSample
    else:
        sample = testSample
        print('\nVALIDATION : Epoch {0}'.format(epoch))

    for i in range(0, sample.length(), batch_size):
        # prepare batch
        embed_batch, length, tgt_batch, reward_batch, action_batch = getBatch_pred(
            i, i + batch_size, sample, None, recom_length)
        embed_batch, tgt_batch, action_batch, reward_batch = Variable(
            embed_batch.cuda()), Variable(tgt_batch.cuda()), Variable(
                action_batch.cuda()), Variable(reward_batch.cuda())
        k = embed_batch.size(0)
        agent.eval()
        probs, _, _ = agent((embed_batch, length), True)
        mask = torch.zeros(k, probs.size(1)).cuda()
        mask.scatter_(1, action_batch, 1.)
        outputk = probs * mask

        output_click = outputk.data.max(1)[1]
        correct += output_click.data.long().eq(
            tgt_batch.data.long()).cpu().numpy().sum()
        all_prob_output = outputk.data.cpu().numpy()

        for i in range(len(all_prob_output)):
            pos = int(
                np.argwhere(
                    np.argsort(-all_prob_output[i]) ==
                    tgt_batch.data.long().cpu().numpy()[i])[0] + 1)
            # p@k
            if pos <= 10:
                correctk += 1
    eval_acc = np.round(100 * correct / sample.length(), 2)
    eval_prek = np.round(100 * correctk / sample.length(), 2)
    if final_eval:
        print('finalgrep : accuracy {0} : {1}, precision@k {0} : {2}'.format(
            eval_type, eval_acc, eval_prek))
    else:
        print(
            'togrep : results : epoch {0} ; accuracy {1} : {2}, precision@10 {1} : {3}'
            .format(epoch, eval_type, eval_acc, eval_prek))
    return eval_acc, eval_prek
def evaluate_interaction(model,
                         epoch,
                         batch_size,
                         recom_length,
                         validSample,
                         testSample,
                         loss_fn_target,
                         loss_fn_reward,
                         device,
                         eval_type='valid',
                         final_eval=False):
    correct = 0.
    correct_reward = 0.
    mapeach = 0.
    all_costs = []

    if eval_type == 'valid':
        sample = validSample
    else:
        sample = testSample
        print('\nVALIDATION : Epoch {0}'.format(epoch))

    for i in range(0, sample.length(), batch_size):
        # prepare batch
        embed_batch, length, tgt_batch, reward_batch, action_batch = getBatch_pred(
            i, i + batch_size, sample, None, recom_length)
        embed_batch, tgt_batch, reward_batch, action_batch = Variable(
            embed_batch.cuda()), Variable(tgt_batch.cuda()), Variable(
                reward_batch.cuda()), Variable(action_batch.cuda())
        k = embed_batch.size(0)
        generator, agent = model
        generator.eval()
        agent.eval()
        enc_out, h = generator((embed_batch, length))
        _, action, _ = agent((embed_batch, length), True)
        output = generator.next_click(enc_out[:, -1, :], action,
                                      len(embed_batch))
        reward, reward_logit = generator.get_reward(
            tgt_batch.view(-1, 1), enc_out[:, -1, :].unsqueeze(0))
        pred_reward = torch.round(reward.data)
        correct_reward += pred_reward.long().eq(
            reward_batch.data.long()).cpu().sum().numpy()

        pred = output.data.max(1)[1]
        correct += pred.long().eq(tgt_batch.data.long()).cpu().numpy().sum()
        all_prob = output.data.cpu().numpy()
        for i in range(len(output)):
            mapeach += 1 / int((np.argwhere(
                np.argsort(-all_prob[i]) ==
                tgt_batch.data.long().cpu().numpy()[i])[0] + 1))
        # loss
        with torch.no_grad():
            loss_pred = loss_fn_target(output, tgt_batch)
            loss_reward = loss_fn_reward(reward_logit, reward_batch)
            loss = loss_pred + loss_reward
            all_costs.append(loss.data.cpu().numpy())

    eval_acc = np.round(100 * correct / sample.length(), 2)
    eval_map = np.round(100 * mapeach / sample.length(), 2)
    eval_acc_reward = np.round(100 * correct_reward / sample.length(), 2)
    if final_eval:
        print(
            'finalgrep : accuracy {0} : {1}, map {0} : {2}, accuracy reward {0} : {3}'
            .format(eval_type, eval_acc, eval_map, eval_acc_reward))
    else:
        print(
            'togrep : results : epoch {0} ; mean accuracy pred {1} : {2}, map pred {1} : {3}; mean accuracy reward {1} : {4}'
            .format(epoch, eval_type, eval_acc, eval_map, eval_acc_reward))
    return eval_acc, eval_map, eval_acc_reward, np.mean(all_costs)
def evaluate_user(generator,
                  epoch,
                  batch_size,
                  recom_length,
                  validSample,
                  testSample,
                  loss_fn_target,
                  loss_fn_reward,
                  device,
                  eval_type='valid',
                  model_type='recommend',
                  final_eval=False):
    correct = 0.
    correctk = 0.
    correct_reward = 0.
    all_costs = []

    if eval_type == 'valid':
        sample = validSample
    else:
        sample = testSample
        print('\nVALIDATION : Epoch {0}'.format(epoch))

    with torch.no_grad():
        loss_fn_target = nn.CrossEntropyLoss()
        loss_fn_reward = nn.BCEWithLogitsLoss()
        loss_fn_target.size_average = True
        loss_fn_target.to(device)
        loss_fn_reward.size_average = True
        loss_fn_reward.to(device)

    for i in range(0, sample.length(), batch_size):
        # prepare batch
        embed_batch, length, tgt_batch, reward_batch, action_batch = getBatch_pred(
            i, i + batch_size, sample, None, recom_length)
        embed_batch, tgt_batch, reward_batch, action_batch = Variable(
            embed_batch.cuda()), Variable(tgt_batch.cuda()), Variable(
                reward_batch.cuda()), Variable(action_batch.cuda())
        k = embed_batch.size(0)
        # model(agent) forward
        generator.eval()
        enc_out, h = generator((embed_batch, length))
        if model_type == 'recommend':
            output = generator.next_click(enc_out[:, -1, :], action_batch,
                                          len(embed_batch))
        else:
            output = generator.next_simple(enc_out[:, -1, :])
        output_click = output.data.max(1)[1]
        correct += output_click.data.long().eq(
            tgt_batch.data.long()).cpu().numpy().sum()
        all_prob_output = output.data.cpu().numpy()

        reward, reward_logit = generator.get_reward(
            tgt_batch.view(-1, 1), enc_out[:, -1, :].unsqueeze(0))
        pred_reward = torch.round(reward)
        correct_reward += pred_reward.long().eq(
            reward_batch.data.long()).cpu().sum().numpy()

        for i in range(len(all_prob_output)):
            pos = int(
                np.argwhere(
                    np.argsort(-all_prob_output[i]) ==
                    tgt_batch.data.long().cpu().numpy()[i])[0] + 1)
            # p@k
            if pos <= 10:
                correctk += 1
        # loss
        with torch.no_grad():
            loss_pred = loss_fn_target(output, tgt_batch)
            loss_reward = loss_fn_reward(reward_logit, reward_batch)
            loss = loss_pred + loss_reward
            all_costs.append(loss.data.cpu().numpy())

    eval_acc = np.round(100 * correct / sample.length(), 2)
    eval_prek = np.round(100 * correctk / sample.length(), 2)
    eval_acc_rewd = np.round(100 * correct_reward / sample.length(), 2)
    if final_eval:
        print('finalgrep : accuracy {0} : {1}, precision@k {0} : {2}'.format(
            eval_type, eval_acc, eval_prek))
    else:
        print(
            'togrep : results : epoch {0} ; accuracy {1} : {2}, precision@10 {1} : {3}, reward_accuracy {1} {4}'
            .format(epoch, eval_type, eval_acc, eval_prek, eval_acc_rewd))
    return eval_acc, eval_prek, eval_acc_rewd, np.mean(all_costs)
Beispiel #4
0
def train_pred_each(generator, epoch, trainSample, optimizer, batch_size, embed_dim, recom_length, loss_fn_target, loss_fn_reward, device, generator_only = True, action_given = True, only_rewards = False):
    print('\nGENERATOR TRAINING : Epoch ' + str(epoch))
    generator.train()
    all_costs   = []
    logs        = []
    decay=0.95
    max_norm=5
    #loss_fn.size_average = False
    all_num=0
    last_time = time.time()
    correct = 0.
    correct_reward = 0.
    #mapeach=0.
    correctk = 0.
     
    #Adjust the learning rate 
    if epoch>1:
        optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] * decay
    print('Learning rate : {0}'.format(optimizer.param_groups[0]['lr']))
     
    for stidx in range(0, trainSample.length(), batch_size):
        # prepare batch
        embed_batch, length, tgt_batch, reward_batch, action_batch = getBatch_pred(stidx, stidx + batch_size, trainSample, embed_dim, recom_length) 
        embed_batch,tgt_batch, reward_batch, action_batch = Variable(embed_batch.to(device)), Variable(tgt_batch.to(device)), Variable(reward_batch.to(device)), Variable(action_batch.to(device))
        k = embed_batch.size(0) #Actual batch size
        # model forward
        enc_out, h = generator((embed_batch, length))
        if generator_only:
            if action_given == True:
                output = generator.next_click(enc_out[:,-1,:], action_batch, len(embed_batch))                
            else:
                output = generator.next_simple(enc_out[:,-1,:])
        else:
            _, action, _ = agent((embed_batch, length))        
            output = generator.next_click(enc_out[:,-1,:], action, len(embed_batch))
        #Get next click
        reward, reward_logit = generator.get_reward(tgt_batch.view(-1,1), enc_out[:,-1,:].unsqueeze(0))
        all_prob_output = output.data.cpu().numpy()
        # reward correctness
        pred_reward = torch.round(reward.data)#.max(1)[1]
        correct_reward += pred_reward.long().eq(reward_batch.data.long()).cpu().sum().numpy()
        for i in range(len(all_prob_output)):
            pos = int(np.argwhere(np.argsort(-all_prob_output[i])==tgt_batch.data.long().cpu().numpy()[i])[0]+1)
            #mapeach += 1/pos
            # p@k
            if pos <= 1:
                correct += 1   
            if pos <= 10:
                correctk += 1
        # loss
        loss_pred = loss_fn_target(output, tgt_batch)
        #weight_loss = (reward_batch + 1) #** 5.3
        weight_loss = torch.FloatTensor(k).fill_(1).cuda() 
        loss_fn_reward = nn.BCEWithLogitsLoss(weight_loss)
        loss_fn_target.size_average = True
        loss_reward = loss_fn_reward(reward_logit, reward_batch)
        if not only_rewards:
            loss = loss_pred + loss_reward
        else:
            loss = loss_reward
            #Unable updates of the rnn model
            for name, param in generator.named_parameters():
                if 'embedding' in name or 'encoder' or 'enc2out' in name:
                    param.requires_grad = False
                    
        all_costs.append(loss.data.cpu().numpy())
        # backward
        optimizer.zero_grad()
        loss.backward()
        #Gradient clipping
        clip_grad_norm_(filter(lambda p: p.requires_grad, generator.parameters()), 5)
        #clip_grad_value_(filter(lambda p: p.requires_grad, generator.parameters()), 1)
        # optimizer step
        optimizer.step()
    train_acc = np.round(100 * correct/trainSample.length(), 2)
    #train_map=np.round(100 * mapeach/trainSample.length(), 2)
    train_preck=np.round(100 * correctk/trainSample.length(), 2)
    train_reward_acc = np.round(100 * correct_reward/trainSample.length(), 2)
    print('results : epoch {0} ; mean accuracy pred : {1}; mean P@10 pred: {2}; mean accuracy reward: {3}'.format(epoch, train_acc,train_preck, train_reward_acc))
    return train_acc, train_preck, np.mean(all_costs)