def evaluate(model, dataset, batch_size, loss_function): total_loss = torch.Tensor([0]) steps = torch.Tensor([0]) for batch_data in data_loader(dataset, batch_size=batch_size, is_shuffle=True, drop_last=True): target = batch_data["target"] del batch_data["target"] batch_data = { k: autograd.Variable(torch.LongTensor(v)) for k, v in batch_data.items() } log_probs = model.forward(**batch_data) batch_log_probs = log_probs.view(-1, list(log_probs.size())[-1]) target, idx_unsort = torch_util.pack_padded_sequence( autograd.Variable(torch.LongTensor(target)).cuda(GPU_INDEX), batch_data['length'], batch_firse=True, GPU_INDEX=GPU_INDEX) target, _ = torch_util.pad_packed_sequence(target, idx_unsort, pad_value=PAD_TOKEN, batch_firse=True, GPU_INDEX=GPU_INDEX) loss = loss_function(batch_log_probs, target.view(-1)) total_loss += loss.data.cpu() steps += torch.sum(batch_data['length'].data.cpu()) return total_loss / steps
def evaluate(model, dataset, batch_size, loss_function): total_loss = torch.Tensor([0]).cuda(GPU_INDEX) steps = torch.Tensor([0]).cuda(GPU_INDEX) for batch_data in data_loader(dataset, batch_size=batch_size, is_shuffle=True, drop_last=True): identifier_scope_mask, is_identifier, lengths, target, terminal_mask, tokens, update_mask, has_identifier\ = parse_batch_data( batch_data) log_probs = model.forward(tokens, identifier_scope_mask, is_identifier, update_mask, terminal_mask, lengths, has_identifier) batch_log_probs = log_probs.contiguous().view( -1, list(log_probs.size())[-1]) target, idx_unsort = torch_util.pack_padded_sequence( autograd.Variable(torch.LongTensor(target)).cuda(GPU_INDEX), lengths, batch_firse=True, GPU_INDEX=GPU_INDEX) target, _ = torch_util.pad_packed_sequence(target, idx_unsort, pad_value=PAD_TOKEN, batch_firse=True, GPU_INDEX=GPU_INDEX) loss = loss_function(batch_log_probs, target.view(-1)) total_loss += loss.data steps += torch.sum(lengths.float().cuda(GPU_INDEX).data) return total_loss / steps
def train(model, dataset, batch_size, loss_function, optimizer): total_loss = torch.Tensor([0]) steps = torch.Tensor([0]) for batch_data in data_loader(dataset, batch_size=batch_size, is_shuffle=True, drop_last=True, epoch_ratio=0.5): # print(batch_data['terminal_mask']) # print('batch_data size: ', len(batch_data['terminal_mask'][0]), len(batch_data['terminal_mask'][0][0])) # res = list(more_itertools.collapse(batch_data['terminal_mask'])) # print('res len: ', len(res)) # res = util.padded(batch_data['terminal_mask'], deepcopy=True, fill_value=0) # print('batch_data size: ', len(res[0]), len(res[0][0])) # res = list(more_itertools.collapse(res)) # print('res len: ', len(res)) target = batch_data["target"] del batch_data["target"] model.zero_grad() batch_data = {k: autograd.Variable(torch.LongTensor(v)) for k, v in batch_data.items()} log_probs = model.forward(**batch_data) # log_probs.register_hook(create_hook_fn("log_probs")) batch_log_probs = log_probs.view(-1, list(log_probs.size())[-1]) target, idx_unsort = torch_util.pack_padded_sequence( autograd.Variable(torch.LongTensor(target)).cuda(GPU_INDEX), batch_data['length'], batch_firse=True, GPU_INDEX=GPU_INDEX) target, _ = torch_util.pad_packed_sequence(target, idx_unsort, pad_value=PAD_TOKEN, batch_firse=True, GPU_INDEX=GPU_INDEX) loss = loss_function(batch_log_probs, target.view(-1)) # loss.register_hook(create_hook_fn("loss")) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 10) # print() # print("The loss is nan:{}".format(is_nan(loss.detach()))) # print("The loss grad is nan:{}".format(is_nan(loss.grad))) # print("The log_probs is nan:{}".format(is_nan(log_probs.detach()))) # print("The log_probs grad is nan:{}".format(is_nan(log_probs.grad))) # for name, param in model.named_parameters(): # print("name of {}: has nan:{}".format(name, is_nan(param.detach()))) # print("the gradient of {}: has nan:{}".format(name, is_nan(param.grad))) # if HAS_NAN: # for k, v in batch_data.items(): # print("{}:{}".format(k, show_tensor(v))) # print("{}:{}".format("target", show_tensor(target))) # print() optimizer.step() total_loss += loss.data.cpu() steps += torch.sum(batch_data['length'].data.cpu()) return total_loss/steps
def accuracy_evaluate( model, dataset, batch_size, loss_function, ): total_loss = torch.Tensor([0]).cuda(GPU_INDEX) steps = torch.Tensor([0]).cuda(GPU_INDEX) accuracy_dict = None for batch_data in data_loader(dataset, batch_size=batch_size, is_shuffle=True, drop_last=True): identifier_scope_mask, is_identifier, lengths, target, tokens, update_mask\ = parse_batch_data( batch_data) log_probs = model.forward(tokens, identifier_scope_mask, is_identifier, update_mask, lengths) batch_log_probs = log_probs.contiguous().view( -1, list(log_probs.size())[-1]) target, idx_unsort = torch_util.pack_padded_sequence( autograd.Variable(torch.LongTensor(target)).cuda(GPU_INDEX), lengths, batch_firse=True, GPU_INDEX=GPU_INDEX) target, _ = torch_util.pad_packed_sequence(target, idx_unsort, pad_value=PAD_TOKEN, batch_firse=True, GPU_INDEX=GPU_INDEX) loss = loss_function(batch_log_probs, target.view(-1)) total_loss += loss.data steps += torch.sum(lengths.data) # print("target size:{}".format(target.size())) # print("batch log size:{}".format(log_probs.size())) topk_accuracy = calculate_accuracy_of_code_completion( log_probs, target, ignore_token=PAD_TOKEN, gpu_index=GPU_INDEX) if accuracy_dict is None: accuracy_dict = topk_accuracy else: for k, v in topk_accuracy.items(): accuracy_dict[k] += topk_accuracy[k] accuracy_dict = { k: float(v) / steps.item() for k, v in accuracy_dict.items() } return total_loss / steps, accuracy_dict
def accuracy_evaluate(model, dataset, batch_size, loss_function, vocabulary): total_loss = torch.Tensor([0]).cuda(GPU_INDEX) steps = torch.Tensor([0]).cuda(GPU_INDEX) accuracy_dict = None sample_predict = [] sample_target = [] sample_prob = [] for batch_data in data_loader(dataset, batch_size=batch_size, is_shuffle=True, drop_last=True): identifier_scope_mask, is_identifier, lengths, target, terminal_mask, tokens, update_mask, has_identifier\ = parse_batch_data( batch_data) log_probs = model.forward(tokens, identifier_scope_mask, is_identifier, update_mask, terminal_mask, lengths, has_identifier) batch_log_probs = log_probs.contiguous().view( -1, list(log_probs.size())[-1]) ori_target = target target, idx_unsort = torch_util.pack_padded_sequence( autograd.Variable(torch.LongTensor(target)).cuda(GPU_INDEX), lengths, batch_firse=True, GPU_INDEX=GPU_INDEX) target, _ = torch_util.pad_packed_sequence(target, idx_unsort, pad_value=PAD_TOKEN, batch_firse=True, GPU_INDEX=GPU_INDEX) loss = loss_function(batch_log_probs, target.view(-1)) total_loss += loss.data steps += torch.sum(lengths.data) # print("target size:{}".format(target.size())) # print("batch log size:{}".format(log_probs.size())) topk_accuracy = calculate_accuracy_of_code_completion( log_probs, target, ignore_token=PAD_TOKEN, gpu_index=GPU_INDEX) if accuracy_dict is None: accuracy_dict = topk_accuracy else: for k, v in topk_accuracy.items(): accuracy_dict[k] += topk_accuracy[k] for l, p, t in zip(lengths, log_probs, ori_target): p = p[:l] p = torch.unsqueeze(p, dim=0) a, b, pro = get_predict_and_target_tokens(p, [t[:l]], vocabulary.id_to_word, k=5) sample_predict.append(a) sample_target.append(b) sample_prob.append(pro) accuracy_dict = { k: float(v) / steps.item() for k, v in accuracy_dict.items() } sample_predict = more_itertools.flatten(sample_predict) sample_target = more_itertools.flatten(sample_target) sample_prob = more_itertools.flatten(sample_prob) return total_loss / steps, accuracy_dict, sample_predict, sample_target, sample_prob, steps