def evaluate(model, dataset, batch_size, loss_function):
    total_loss = torch.Tensor([0])
    steps = torch.Tensor([0])
    for batch_data in data_loader(dataset,
                                  batch_size=batch_size,
                                  is_shuffle=True,
                                  drop_last=True):
        target = batch_data["target"]
        del batch_data["target"]
        batch_data = {
            k: autograd.Variable(torch.LongTensor(v))
            for k, v in batch_data.items()
        }
        log_probs = model.forward(**batch_data)

        batch_log_probs = log_probs.view(-1, list(log_probs.size())[-1])
        target, idx_unsort = torch_util.pack_padded_sequence(
            autograd.Variable(torch.LongTensor(target)).cuda(GPU_INDEX),
            batch_data['length'],
            batch_firse=True,
            GPU_INDEX=GPU_INDEX)
        target, _ = torch_util.pad_packed_sequence(target,
                                                   idx_unsort,
                                                   pad_value=PAD_TOKEN,
                                                   batch_firse=True,
                                                   GPU_INDEX=GPU_INDEX)

        loss = loss_function(batch_log_probs, target.view(-1))
        total_loss += loss.data.cpu()
        steps += torch.sum(batch_data['length'].data.cpu())
    return total_loss / steps
def evaluate(model, dataset, batch_size, loss_function):
    total_loss = torch.Tensor([0]).cuda(GPU_INDEX)
    steps = torch.Tensor([0]).cuda(GPU_INDEX)
    for batch_data in data_loader(dataset,
                                  batch_size=batch_size,
                                  is_shuffle=True,
                                  drop_last=True):
        identifier_scope_mask, is_identifier, lengths, target, terminal_mask, tokens, update_mask, has_identifier\
            = parse_batch_data(
            batch_data)
        log_probs = model.forward(tokens, identifier_scope_mask, is_identifier,
                                  update_mask, terminal_mask, lengths,
                                  has_identifier)

        batch_log_probs = log_probs.contiguous().view(
            -1,
            list(log_probs.size())[-1])

        target, idx_unsort = torch_util.pack_padded_sequence(
            autograd.Variable(torch.LongTensor(target)).cuda(GPU_INDEX),
            lengths,
            batch_firse=True,
            GPU_INDEX=GPU_INDEX)
        target, _ = torch_util.pad_packed_sequence(target,
                                                   idx_unsort,
                                                   pad_value=PAD_TOKEN,
                                                   batch_firse=True,
                                                   GPU_INDEX=GPU_INDEX)

        loss = loss_function(batch_log_probs, target.view(-1))
        total_loss += loss.data
        steps += torch.sum(lengths.float().cuda(GPU_INDEX).data)
    return total_loss / steps
Example #3
0
def train(model,
          dataset,
          batch_size,
          loss_function,
          optimizer):
    total_loss = torch.Tensor([0])
    steps = torch.Tensor([0])
    for batch_data in data_loader(dataset, batch_size=batch_size, is_shuffle=True,  drop_last=True, epoch_ratio=0.5):
        # print(batch_data['terminal_mask'])
        # print('batch_data size: ', len(batch_data['terminal_mask'][0]), len(batch_data['terminal_mask'][0][0]))
        # res = list(more_itertools.collapse(batch_data['terminal_mask']))
        # print('res len: ', len(res))
        # res = util.padded(batch_data['terminal_mask'], deepcopy=True, fill_value=0)
        # print('batch_data size: ', len(res[0]), len(res[0][0]))
        # res = list(more_itertools.collapse(res))
        # print('res len: ', len(res))
        target = batch_data["target"]
        del batch_data["target"]
        model.zero_grad()
        batch_data = {k: autograd.Variable(torch.LongTensor(v)) for k, v in batch_data.items()}
        log_probs = model.forward(**batch_data)
        # log_probs.register_hook(create_hook_fn("log_probs"))

        batch_log_probs = log_probs.view(-1, list(log_probs.size())[-1])

        target, idx_unsort = torch_util.pack_padded_sequence(
            autograd.Variable(torch.LongTensor(target)).cuda(GPU_INDEX),
            batch_data['length'], batch_firse=True, GPU_INDEX=GPU_INDEX)
        target, _ = torch_util.pad_packed_sequence(target, idx_unsort, pad_value=PAD_TOKEN, batch_firse=True,
                                                GPU_INDEX=GPU_INDEX)

        loss = loss_function(batch_log_probs, target.view(-1))

        # loss.register_hook(create_hook_fn("loss"))
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 10)

        # print()
        # print("The loss is nan:{}".format(is_nan(loss.detach())))
        # print("The loss grad is nan:{}".format(is_nan(loss.grad)))
        # print("The log_probs is nan:{}".format(is_nan(log_probs.detach())))
        # print("The log_probs grad is nan:{}".format(is_nan(log_probs.grad)))
        # for name, param in model.named_parameters():
        #     print("name of {}: has nan:{}".format(name, is_nan(param.detach())))
        #     print("the gradient of {}: has nan:{}".format(name, is_nan(param.grad)))
        # if HAS_NAN:
        #     for k, v in batch_data.items():
        #         print("{}:{}".format(k, show_tensor(v)))
        #     print("{}:{}".format("target", show_tensor(target)))
        # print()

        optimizer.step()

        total_loss += loss.data.cpu()
        steps += torch.sum(batch_data['length'].data.cpu())
    return total_loss/steps
Example #4
0
def accuracy_evaluate(
    model,
    dataset,
    batch_size,
    loss_function,
):
    total_loss = torch.Tensor([0]).cuda(GPU_INDEX)
    steps = torch.Tensor([0]).cuda(GPU_INDEX)
    accuracy_dict = None
    for batch_data in data_loader(dataset,
                                  batch_size=batch_size,
                                  is_shuffle=True,
                                  drop_last=True):
        identifier_scope_mask, is_identifier, lengths, target, tokens, update_mask\
            = parse_batch_data(
            batch_data)
        log_probs = model.forward(tokens, identifier_scope_mask, is_identifier,
                                  update_mask, lengths)

        batch_log_probs = log_probs.contiguous().view(
            -1,
            list(log_probs.size())[-1])

        target, idx_unsort = torch_util.pack_padded_sequence(
            autograd.Variable(torch.LongTensor(target)).cuda(GPU_INDEX),
            lengths,
            batch_firse=True,
            GPU_INDEX=GPU_INDEX)
        target, _ = torch_util.pad_packed_sequence(target,
                                                   idx_unsort,
                                                   pad_value=PAD_TOKEN,
                                                   batch_firse=True,
                                                   GPU_INDEX=GPU_INDEX)

        loss = loss_function(batch_log_probs, target.view(-1))
        total_loss += loss.data
        steps += torch.sum(lengths.data)
        # print("target size:{}".format(target.size()))
        # print("batch log size:{}".format(log_probs.size()))
        topk_accuracy = calculate_accuracy_of_code_completion(
            log_probs, target, ignore_token=PAD_TOKEN, gpu_index=GPU_INDEX)
        if accuracy_dict is None:
            accuracy_dict = topk_accuracy
        else:
            for k, v in topk_accuracy.items():
                accuracy_dict[k] += topk_accuracy[k]
    accuracy_dict = {
        k: float(v) / steps.item()
        for k, v in accuracy_dict.items()
    }
    return total_loss / steps, accuracy_dict
def accuracy_evaluate(model, dataset, batch_size, loss_function, vocabulary):
    total_loss = torch.Tensor([0]).cuda(GPU_INDEX)
    steps = torch.Tensor([0]).cuda(GPU_INDEX)
    accuracy_dict = None
    sample_predict = []
    sample_target = []
    sample_prob = []
    for batch_data in data_loader(dataset,
                                  batch_size=batch_size,
                                  is_shuffle=True,
                                  drop_last=True):
        identifier_scope_mask, is_identifier, lengths, target, terminal_mask, tokens, update_mask, has_identifier\
            = parse_batch_data(
            batch_data)
        log_probs = model.forward(tokens, identifier_scope_mask, is_identifier,
                                  update_mask, terminal_mask, lengths,
                                  has_identifier)

        batch_log_probs = log_probs.contiguous().view(
            -1,
            list(log_probs.size())[-1])
        ori_target = target
        target, idx_unsort = torch_util.pack_padded_sequence(
            autograd.Variable(torch.LongTensor(target)).cuda(GPU_INDEX),
            lengths,
            batch_firse=True,
            GPU_INDEX=GPU_INDEX)
        target, _ = torch_util.pad_packed_sequence(target,
                                                   idx_unsort,
                                                   pad_value=PAD_TOKEN,
                                                   batch_firse=True,
                                                   GPU_INDEX=GPU_INDEX)

        loss = loss_function(batch_log_probs, target.view(-1))
        total_loss += loss.data
        steps += torch.sum(lengths.data)
        # print("target size:{}".format(target.size()))
        # print("batch log size:{}".format(log_probs.size()))
        topk_accuracy = calculate_accuracy_of_code_completion(
            log_probs, target, ignore_token=PAD_TOKEN, gpu_index=GPU_INDEX)
        if accuracy_dict is None:
            accuracy_dict = topk_accuracy
        else:
            for k, v in topk_accuracy.items():
                accuracy_dict[k] += topk_accuracy[k]
        for l, p, t in zip(lengths, log_probs, ori_target):
            p = p[:l]
            p = torch.unsqueeze(p, dim=0)
            a, b, pro = get_predict_and_target_tokens(p, [t[:l]],
                                                      vocabulary.id_to_word,
                                                      k=5)
            sample_predict.append(a)
            sample_target.append(b)
            sample_prob.append(pro)
    accuracy_dict = {
        k: float(v) / steps.item()
        for k, v in accuracy_dict.items()
    }
    sample_predict = more_itertools.flatten(sample_predict)
    sample_target = more_itertools.flatten(sample_target)
    sample_prob = more_itertools.flatten(sample_prob)
    return total_loss / steps, accuracy_dict, sample_predict, sample_target, sample_prob, steps