def init_hidden(self, cur_batch_size): return (to_cuda( torch.randn(self.num_layers * self.bidirectional_num, cur_batch_size, self.hidden_size)), to_cuda( torch.randn(self.num_layers * self.bidirectional_num, cur_batch_size, self.hidden_size)))
def predict_file(self, vector: Sequence[Vind]) -> Iterable[TokenResult]: seq = to_cuda(torch.LongTensor([vector])) length = to_cuda(torch.LongTensor([len(vector)])) forward, backward = self.rnn_model(seq, length) forward = F.softmax(forward, dim=-1) backward = F.softmax(backward, dim=-1) return [TokenResult(forward[0, s, :].numpy(), backward[0, s, :].numpy()) for s in range(forward.size()[1])]
def evaluate(model, valid_dataset, batch_size, evaluate_object_list: typing.List[Evaluator], train_loss_function, desc, label_preprocess_fn): model.eval() for o in evaluate_object_list: o.clear_result() train_total_loss = to_cuda(torch.Tensor([0])) steps = to_cuda(torch.Tensor([0])) with tqdm(total=math.ceil(len(valid_dataset) / batch_size), leave=False) as pbar: for batch_data in data_loader(valid_dataset, batch_size=batch_size, is_shuffle=False, drop_last=False): model.zero_grad() predict_logit = model.forward(batch_data) target = label_preprocess_fn(batch_data) train_loss = train_loss_function(predict_logit, target) for evaluator in evaluate_object_list: evaluator.add_result(predict_logit, target, batch_data=batch_data) train_total_loss += train_loss.data steps += 1 pbar.update(1) return evaluate_object_list, train_total_loss / steps
def parse_target_batch_data(batch_data): is_copy = to_cuda( torch.FloatTensor( PaddedList(batch_data['is_copy'], fill_value=ignore_token))) target = to_cuda( torch.LongTensor(list(more_itertools.flatten( batch_data['target'])))) return is_copy, target
def _preprocess(self, batch_data): from common.util import PaddedList s1 = batch_data["s1"] s2 = batch_data['s2'] return to_cuda(torch.LongTensor(PaddedList(s1, fill_value=self._pad_idx))), \ to_cuda(torch.LongTensor(PaddedList(batch_data['s1_char'], fill_value=self._character_pad_idx))), \ to_cuda(torch.LongTensor(PaddedList(s2, fill_value=self._pad_idx))), \ to_cuda(torch.LongTensor(PaddedList(batch_data['s2_char'], fill_value=self._character_pad_idx)))
def parse_target_batch_data(batch_data, ): forward_target_seq = to_cuda( torch.LongTensor( PaddedList(batch_data['forward_target'], fill_value=ignore_id))) backward_target_seq = to_cuda( torch.LongTensor( PaddedList(batch_data['backward_target'], fill_value=ignore_id))) return forward_target_seq, backward_target_seq
def parse_input_tensor(batch_data, do_sample=False): input_seq = to_cuda( torch.LongTensor(PaddedList(batch_data['input_seq']))) inp_seq_len = to_cuda(torch.LongTensor(batch_data['input_seq_len'])) target_seq = to_cuda( torch.LongTensor(PaddedList(batch_data['target_seq']))) target_seq_len = to_cuda(torch.LongTensor( batch_data['target_seq_len'])) return input_seq, inp_seq_len, None, target_seq, target_seq_len, None, batch_data[ 'masked_positions']
def _preprocess(self, batch_data): from common.util import PaddedList s1 = batch_data["s1"] s2 = batch_data['s2'] batch_size = len(s1) size = max(len(t1)+len(t2)+1 for t1, t2 in zip(s1, s2)) if self._summary_node: size += 2 # print("size:{}".format(size)) if not self._summary_node: sentences = to_cuda(torch.LongTensor( PaddedList([t1 + [self._pad_idx] + t2 for t1, t2 in zip(s1, s2)], fill_value=self._pad_idx,))) sentences_char = to_cuda(torch.LongTensor( PaddedList([t1 + [[self._character_pad_idx]] + t2 for t1, t2 in zip(batch_data['s1_char'], batch_data['s2_char'])], fill_value=self._character_pad_idx))) else: sentences = to_cuda(torch.LongTensor( PaddedList([t1 + [self._pad_idx] + t2 + [self._pad_idx, self._pad_idx] for t1, t2 in zip(s1, s2)], fill_value=self._pad_idx, ))) sentences_char = to_cuda(torch.LongTensor( PaddedList( [t1 + [[self._character_pad_idx]] + t2 + [[self._character_pad_idx], [self._character_pad_idx]] for t1, t2 in zip(batch_data['s1_char'], batch_data['s2_char'])], fill_value=self._character_pad_idx))) distance_matrix = np.ones((batch_size, size, size)) * float('-inf') for i, (t1, t2) in enumerate(zip(s1, s2)): s1_matrix = util.create_distance_node_matrix(len(t1)) s2_matrix = util.create_distance_node_matrix(len(t2)) distance_matrix[i, :len(t1), :len(t1)] = s1_matrix distance_matrix[i, len(t1)+1:len(t1)+len(t2)+1, len(t1)+1:len(t1)+len(t2)+1] = s2_matrix if self._summary_node: distance_matrix[i, :len(t1), -2] = 0 distance_matrix[i, len(t1)+1:len(t1)+len(t2)+1, -1] = 0 distance_matrix = to_cuda(torch.FloatTensor(np.stack(distance_matrix, axis=0))) # sentence_same_token_link_matrix = [] # for t1, t2 in zip(s1, s2): # idx, idy, data = util.create_sentence_pair_same_node_matrix(t1, 0, t2, len(t1)+1) # sentence_same_token_link_matrix.append( # sparse.coo_matrix( # (data, (idx, idy)), # shape=(size, size), dtype=np.float # ).toarray() # ) # sentence_same_token_link_matrix = to_cuda(torch.FloatTensor(np.stack(sentence_same_token_link_matrix, axis=0))) return sentences, sentences_char, distance_matrix,
def parse_target(batch_data): if 'error_line' not in batch_data.keys() or no_target: return None target_error_position = to_cuda( torch.LongTensor(PaddedList(batch_data['error_line']))) target_seq = to_cuda( torch.LongTensor( PaddedList(batch_data['target_line_ids'], fill_value=ignore_id))) target_seq = target_seq[:, 1:] return target_error_position, target_seq
def parse_input(batch_data, do_sample=False): inputs = to_cuda(torch.LongTensor(PaddedList(batch_data['input_seq']))) input_length = to_cuda( torch.LongTensor(PaddedList(batch_data['input_length']))) if not do_sample: targets = to_cuda( torch.LongTensor(PaddedList(batch_data['target_seq']))) targets_length = to_cuda( torch.LongTensor(PaddedList(batch_data['target_length']))) else: targets = None targets_length = None return inputs, input_length, targets, targets_length
def _forward_pre_process(self, batch_data): input_seq = to_cuda( torch.LongTensor(PaddedList(batch_data['input_seq']))) input_length = to_cuda(torch.LongTensor(batch_data['input_length'])) decoder_input = to_cuda( torch.LongTensor(PaddedList(batch_data['decoder_input']))) grammar_index = list( more_itertools.flatten(batch_data['grammar_index'])) grammar_index_length = to_cuda( torch.LongTensor([len(t) for t in grammar_index])) grammar_index = to_cuda(torch.LongTensor(PaddedList(grammar_index))) target_index = batch_data['target_index'] return input_seq, input_length, decoder_input, grammar_index, grammar_index_length, target_index
def decoder(self, encoder_output, endocer_hidden, encoder_mask, **kwargs): batch_size = encoder_output.shape[0] continue_mask = to_cuda( torch.ByteTensor([1 for i in range(batch_size)])) outputs = to_cuda( torch.LongTensor([[self.start_label] for i in range(batch_size)])) decoder_output_list = [] outputs_list = [] hidden = endocer_hidden error_list = [0 for i in range(batch_size)] for i in range(self.max_length): one_step_decoder_output, hidden, error_ids = self.decoder_fn( outputs, continue_mask, start_index=i, hidden=hidden, encoder_output=encoder_output, encoder_mask=encoder_mask, **kwargs) if (error_ids is not None) and len(error_ids) != 0: error_ids_list = [0 for i in range(batch_size)] for err in error_ids: # print('error index: {}'.format(err)) error_list[err] = 1 error_ids_list[err] = 1 error_ids_tensor = to_cuda(torch.ByteTensor(error_ids_list)) continue_mask = continue_mask & ~error_ids_tensor # continue_mask[err] = 0 decoder_output_list += [one_step_decoder_output] outputs = self.create_next_output_fn(one_step_decoder_output, **kwargs) outputs_list += [outputs] step_continue = torch.ne(outputs, self.end_label).view(batch_size) continue_mask = continue_mask & step_continue # try: if torch.sum(continue_mask) == 0: break # except Exception as e: # print(e) # print(error_list) # print(outputs) # print(step_continue) # print(continue_mask) # raise Exception(e) return decoder_output_list, outputs_list, error_list
def parse_input(batch_data, do_sample=False): input_seq = to_cuda( torch.LongTensor( PaddedList(batch_data['error_token_ids'], fill_value=0))) input_line_length = to_cuda( torch.LongTensor(PaddedList(batch_data['error_line_length']))) input_line_token_length = to_cuda( torch.LongTensor(PaddedList( batch_data['error_line_token_length']))) input_length = to_cuda( torch.LongTensor(PaddedList(batch_data['error_token_length']))) if not use_ast: adj_matrix = to_cuda(torch.LongTensor(batch_data['adj'])) else: adjacent_tuple = [[[i] + tt for tt in t] for i, t in enumerate(batch_data['adj'])] adjacent_tuple = [ list(t) for t in unzip(more_itertools.flatten(adjacent_tuple)) ] size = max(batch_data['error_token_length']) # print("max length in this batch:{}".format(size)) adjacent_tuple = torch.LongTensor(adjacent_tuple) adjacent_values = torch.ones(adjacent_tuple.shape[1]).long() adjacent_size = torch.Size( [len(batch_data['error_token_length']), size, size]) info('batch_data input_length: ' + str(batch_data['error_token_length'])) info('size: ' + str(size)) info('adjacent_tuple: ' + str(adjacent_tuple.shape)) info('adjacent_size: ' + str(adjacent_size)) adj_matrix = to_cuda( torch.sparse.LongTensor( adjacent_tuple, adjacent_values, adjacent_size, ).float().to_dense()) if not do_sample: target_error_position = to_cuda( torch.LongTensor(PaddedList(batch_data['error_line']))) target_seq = to_cuda( torch.LongTensor( PaddedList(batch_data['target_line_ids'], fill_value=ignore_id))) target_length = to_cuda( torch.LongTensor(PaddedList(batch_data['target_line_length']))) else: target_error_position = None target_seq = None target_length = None return input_seq, input_line_length, input_line_token_length, input_length, adj_matrix, target_error_position, target_seq, target_length
def parse_graph_input_from_mask_lm_output(input_seq, input_length, adj, use_ast=True): from common.problem_util import to_cuda from common.util import PaddedList def to_long(x): return to_cuda(torch.LongTensor(x)) if not use_ast: adjacent_matrix = to_long(adj) else: adjacent_tuple = [[[i] + tt for tt in t] for i, t in enumerate(adj)] adjacent_tuple = [ list(t) for t in unzip(more_itertools.flatten(adjacent_tuple)) ] size = max(input_length) # print("max length in this batch:{}".format(size)) adjacent_tuple = torch.LongTensor(adjacent_tuple) adjacent_values = torch.ones(adjacent_tuple.shape[1]).long() adjacent_size = torch.Size([len(input_length), size, size]) # info('batch_data input_length: ' + str(batch_data['input_length'])) # info('size: ' + str(size)) # info('adjacent_tuple: ' + str(adjacent_tuple.shape)) # info('adjacent_size: ' + str(adjacent_size)) adjacent_matrix = to_cuda( torch.sparse.LongTensor( adjacent_tuple, adjacent_values, adjacent_size, ).float().to_dense()) input_seq = to_long(PaddedList(input_seq)) input_length = to_long(input_length) return adjacent_matrix, input_seq, input_length
def reverse_tensor(x, x_length): x_list = torch.unbind(x, dim=0) reverse_list = [] for one, l in zip(x_list, x_length): idx = to_cuda(torch.arange(l.item()-1, -1, -1).long()) r_one = one.index_select(dim=0, index=idx) reverse_list += [torch.cat([r_one, one[l:]], dim=0)] o = torch.stack(reverse_list, dim=0) return o
def add_result(self, output, model_output, model_target, model_input, ignore_token=None, batch_data=None): model_output = [t.data for t in model_output] if ignore_token is None: ignore_token = self.ignore_token is_copy = (torch.sigmoid(model_output[2]) > 0.5).float() is_copy_target = model_target[2] is_copy_accuracy = self.is_copy_accuracy.add_result( is_copy, is_copy_target) p0 = torch.topk(F.softmax(model_output[0], dim=-1), dim=-1, k=1)[1] p1 = torch.topk(F.softmax(model_output[1], dim=-1), dim=-1, k=1)[1] position = torch.cat([p0, p1], dim=1) position_target = torch.stack([model_target[0], model_target[1]], dim=1) position_correct = self.position_correct.add_result( position, position_target) all_output, sample_output_ids = output target_output = to_cuda( torch.LongTensor( PaddedList(batch_data['target'], fill_value=ignore_token))) sample_output_ids, target_output = expand_tensor_sequence_to_same( sample_output_ids, target_output[:, 1:]) output_accuracy = self.output_accuracy.add_result( sample_output_ids, target_output) full_output_target = to_cuda( torch.LongTensor( PaddedList(batch_data['full_output_target'], fill_value=ignore_token))) all_output, full_output_target = expand_tensor_sequence_to_same( all_output, full_output_target, fill_value=ignore_token) all_correct = self.all_correct.add_result(all_output, full_output_target) return "is_copy_accuracy evaluate:{}, position_correct evaluate:{}, output_accuracy evaluate:{}, " \ "all_correct evaluate: {}".format(is_copy_accuracy, position_correct, output_accuracy, all_correct)
def expand_tensor_sequence_len(t, max_len, fill_value=0, dim=1): t_len = t.shape[dim] if max_len == t_len: return t expand_shape = list(t.shape) expand_shape[dim] = 1 one_t = to_cuda(torch.ones(*expand_shape).float()) * fill_value expand_t = one_t.expand(*[-1 for i in range(dim)], max_len - t_len, *[-1 for i in range(len(t.shape) - 1 - dim)]) if t.data.type() == 'torch.cuda.LongTensor' or t.data.type() == 'torch.LongTensor': expand_t = expand_t.long() elif t.data.type() == 'torch.cuda.ByteTensor' or t.data.type() == 'torch.ByteTensor': expand_t = expand_t.byte() res_t = torch.cat([t, expand_t], dim=dim) return res_t
def train(model, dataset, batch_size, loss_function, optimizer, clip_norm, epoch_ratio, parse_input_batch_data_fn, parse_target_batch_data_fn, create_output_ids_fn, evaluate_obj_list): total_loss = to_cuda(torch.Tensor([0])) steps = 0 for o in evaluate_obj_list: o.clear_result() model.train() with tqdm(total=(len(dataset) * epoch_ratio)) as pbar: for batch_data in data_loader(dataset, batch_size=batch_size, is_shuffle=True, drop_last=True, epoch_ratio=epoch_ratio): model.zero_grad() model_input = parse_input_batch_data_fn(batch_data, do_sample=False) model_output = model.forward(*model_input) model_target = parse_target_batch_data_fn(batch_data) loss = loss_function(*model_output, *model_target) loss.backward() optimizer.step() output_ids = create_output_ids_fn(model_output, model_input, False) for evaluator in evaluate_obj_list: evaluator.add_result(output_ids, model_output, model_target, model_input, batch_data=batch_data) total_loss += loss.data step_output = 'in train step {} loss: {}'.format( steps, loss.data.item()) # print(step_output) info(step_output) steps += 1 pbar.update(batch_size) return evaluate_obj_list, (total_loss / steps).item()
def _validate_args(self, inputs, encoder_hidden, encoder_outputs, function, teacher_forcing_ratio): if self.use_attention: if encoder_outputs is None: raise ValueError( "Argument encoder_outputs cannot be None when attention is used." ) # inference batch size if inputs is None and encoder_hidden is None: batch_size = 1 else: if inputs is not None: batch_size = inputs.size(0) else: if self.rnn_cell is nn.LSTM: batch_size = encoder_hidden[0].size(1) elif self.rnn_cell is nn.GRU: batch_size = encoder_hidden.size(1) # set default input and max decoding length if inputs is None: if teacher_forcing_ratio > 0: raise ValueError( "Teacher forcing has to be disabled (set 0) when no inputs is provided." ) inputs = torch.LongTensor([self.sos_id] * batch_size).view( batch_size, 1) if torch.cuda.is_available(): from common.problem_util import to_cuda inputs = to_cuda(inputs) max_length = self.max_length else: max_length = inputs.size( 1) - 1 # minus the start of sequence symbol return inputs, batch_size, max_length
def decoder(self, encoder_output, endocer_hidden, encoder_mask, **kwargs): batch_size = encoder_output.shape[0] continue_mask_stack = to_cuda(torch.ByteTensor([[1 for _ in range(self.beam_size)] for i in range(batch_size)])) beam_outputs = to_cuda(torch.LongTensor([[[self.start_label] for _ in range(self.beam_size)] for i in range(batch_size)])) # outputs_stack = [to_cuda(torch.LongTensor([[self.start_label] for i in range(batch_size)])) for _ in range(self.beam_size)] probability_stack = to_cuda(torch.FloatTensor([[0.0 for _ in range(self.beam_size)] for _ in range(batch_size)])) decoder_output_list = [] outputs_list = [] hidden_stack = [endocer_hidden for _ in range(self.beam_size)] error_stack = to_cuda(torch.ByteTensor([[0 for _ in range(self.beam_size)] for i in range(batch_size)])) for i in range(self.max_length): # beam * (output * [batch, 1, ...]) # beam_one_step_decoder_output = [] beam_outputs_list = [] beam_log_probs_list = [] beam_decoder_output_list = [] # beam * [batch, hidden] beam_hidden_list = [] # beam * [batch, error_count] beam_error_ids = [] for b in range(self.beam_size): outputs = beam_outputs[:, b] continue_mask = continue_mask_stack[:, b] hidden = hidden_stack[b] one_step_decoder_output, hidden, error_ids = self.decoder_fn(outputs, continue_mask, start_index=i, hidden=hidden, encoder_output=encoder_output, encoder_mask=encoder_mask, **kwargs) # if (error_ids is not None) and len(error_ids) != 0: error_ids_list = [0 for i in range(batch_size)] for err in error_ids: error_ids_list[err] = 1 error_ids_tensor = to_cuda(torch.ByteTensor(error_ids_list)) beam_error_ids += [error_ids_tensor] beam_hidden_list += [hidden] # one_beam_outputs: [batch, beam, seq] # beam_probs: [batch, beam] # one_beam_decoder_output: tuple of [batch, beam, seq] one_beam_outputs, one_beam_log_probs, one_beam_decoder_output = self.create_beam_next_output_fn(one_step_decoder_output, continue_mask=continue_mask, beam_size=self.beam_size, **kwargs) beam_outputs_list += [one_beam_outputs] beam_log_probs_list += [one_beam_log_probs] beam_decoder_output_list += [one_beam_decoder_output] if i == 0: break # beam_step_log_probs: [batch, outer_beam, inner_beam] beam_step_log_probs = torch.stack(beam_log_probs_list, dim=1) if i != 0: beam_total_log_probs = torch.unsqueeze(probability_stack, dim=-1) + torch.squeeze(beam_step_log_probs, dim=-1) else: beam_total_log_probs = torch.squeeze(beam_step_log_probs, dim=-1) probability_stack, sort_index = torch.topk( beam_total_log_probs.view(batch_size, beam_total_log_probs.shape[1] * beam_total_log_probs.shape[2]), k=self.beam_size, dim=-1) stack_sort_index = sort_index / self.beam_size # beam_outputs: [batch, outer_beam * inner_beam, seq] beam_outputs = beam_stack_and_reshape(beam_outputs_list) beam_outputs = batch_index_select(beam_outputs, sort_index, batch_size) outputs_list = [batch_index_select(outputs, stack_sort_index, batch_size) for outputs in outputs_list] outputs_list += [beam_outputs] beam_decoder_output = [beam_stack_and_reshape(one_output_list) for one_output_list in zip(*beam_decoder_output_list)] beam_decoder_output = [batch_index_select(one_output, sort_index, batch_size) for one_output in beam_decoder_output] decoder_output_list = [[batch_index_select(one_output, stack_sort_index, batch_size) for one_output in decoder_output] for decoder_output in decoder_output_list] decoder_output_list += [beam_decoder_output] # beam_error = beam_stack_and_reshape(beam_error_ids) beam_error = torch.stack(beam_error_ids, dim=1) beam_error = batch_index_select(beam_error, stack_sort_index, batch_size=batch_size) beam_continue = torch.ne(beam_outputs, self.end_label).view(batch_size, self.beam_size) beam_continue = beam_continue & ~beam_error continue_mask_stack = batch_index_select(continue_mask_stack, stack_sort_index, batch_size) & beam_continue error_stack = batch_index_select(error_stack, stack_sort_index, batch_size) | beam_error if isinstance(beam_hidden_list[0], list): one_hidden_list = zip(*beam_hidden_list) hidden_stack = list(zip(*[deal_beam_hidden(one_hidden_beam_list, stack_sort_index, batch_size) for one_hidden_beam_list in one_hidden_list])) else: hidden_stack = deal_beam_hidden(beam_hidden_list, stack_sort_index, batch_size) # try: if torch.sum(continue_mask_stack) == 0: break # except Exception as e: # print(e) # print(error_list) # print(outputs) # print(step_continue) # print(continue_mask) # raise Exception(e) return decoder_output_list, outputs_list, error_stack
def parse_output(batch_data): target_seq = [t[1:] for t in batch_data['target_seq']] targets = to_cuda( torch.LongTensor(PaddedList(target_seq, fill_value=ignore_id))) return [targets]
is_debug = args.debug just_evaluate = args.just_evaluate p_config = parameter_config.__dict__.get(args.config_name)(is_debug, args.output_log) epoches = p_config.get("epcohes", 20) lr = p_config.get("lr", 20) batch_size = p_config.get("batch_size", 32) train_loss_fn = p_config.get("train_loss", nn.CrossEntropyLoss)() clip_norm = p_config.get("clip_norm", 10) optimizer = p_config.get("optimizer", optim.SGD) optimizer_dict = p_config.get("optimizer_dict", dict()) epoch_ratio = p_config.get("epoch_ratio", 0.5) evaluate_object_list = p_config.get("evaluate_object_list") label_preprocess_fn = p_config.get( "label_preprocess", lambda x: to_cuda(torch.LongTensor(x['label']))) scheduler_fn = p_config.get( "scheduler_fn", lambda x: torch.optim.lr_scheduler.ReduceLROnPlateau( x, 'min', patience=3, verbose=True)) save_root_path = os.path.join(config.DATA_PATH, p_config.get("name")) util.make_dir(save_root_path) need_pad = p_config.get("need_pad", False) print("save_root_path:{}".format(save_root_path)) model_path = os.path.join(save_root_path, "model.pkl") model = get_model(p_config['model_fn'], p_config['model_dict'], p_config['pre_process_module_fn'], p_config['pre_process_module_dict'], model_path, load_previous=load_previous, parallel=problem_util.Parallel,
def sequence_transform_data_config3(is_debug, output_log=None): from model.encoder_decoder_graph import SEDWithInitialStatePreproceser import numpy as np from read_data.sequencec_transform_data.load_data import load_generated_random_target_data train, valid, test = load_generated_random_target_data(is_debug) valid.train = False test.train = False max_index = 10 def new_id(): nonlocal max_index max_index += 1 return max_index max_length = 20 begin_index = new_id() end_index = new_id() delimiter_index = new_id() pad_index = new_id() decoder_init_idx = new_id() for t in [train, valid, test]: t.end = [end_index] train_size = len(train) itr_num = 80 batch_size = 14 from model.transformer_lm import dotdict from model.encoder_decoder_graph import SEDWithInitialState return { "model_fn": SEDWithInitialState, "model_dict": { "cfg": dotdict({ 'n_embd': 768, 'n_head': 12, 'n_layer': 12, 'embd_pdrop': 0.1, 'attn_pdrop': 0.1, 'resid_pdrop': 0.1, 'afn': 'gelu', 'clf_pdrop': 0.1}), "vocab": max_index + 1 + max_length * 2 + 4, "n_source_ctx": max_length + 2, "n_ctx": max_length * 2 + 4, "decoder_init_idx": decoder_init_idx, }, "pre_process_module_fn": SEDWithInitialStatePreproceser, "pre_process_module_dict": { "begin_idx": begin_index, "delimeter_idx": delimiter_index, "summary_idx": decoder_init_idx, "pad_idx": pad_index, "source_ctx": max_length+2, "position_embedding_base": max_index+1, }, "data": [train, valid, test], "label_preprocess": lambda x: to_cuda(torch.LongTensor([PaddedList(t, fill_value=pad_index, shape=[max_length+1]) for t in x['y']])), "batch_size": batch_size, "train_loss": lambda: NCE_train_loss(ignore_index=pad_index), "clip_norm": 1, "name": "SEDWithInitialState", "optimizer": OpenAIAdam, "need_pad": True, "optimizer_dict": { "schedule": 'warmup_linear', "warmup": 0.002, "t_total": (train_size//batch_size)*itr_num, "b1": 0.9, "b2": 0.999, "e": 1e-8, "l2": 0.01, "vector_l2": 'store_true', "max_grad_norm": 1}, "epcohes": itr_num, "lr": 6.25e-5, "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index(), ignore_token=pad_index), SequenceOutputIDToWord(vocab=None, file_path=output_log, ignore_token=pad_index)], "epoch_ratio": 1, "scheduler_fn": None }
def train(model, dataset, batch_size, loss_function, optimizer, clip_norm, epoch_ratio, evaluate_object_list, desc, label_preprocess_fn): total_loss = to_cuda(torch.Tensor([0])) steps = to_cuda(torch.Tensor([0])) # previous_char_max = 0 # previous_word_max = 0 for o in evaluate_object_list: o.clear_result() model.train() with tqdm(total=len(dataset) // batch_size, desc=desc, leave=False) as pbar: for batch_data in data_loader(dataset, batch_size=batch_size, is_shuffle=True, drop_last=True, epoch_ratio=epoch_ratio): # print(batch_data['terminal_mask']) # print('batch_data size: ', len(batch_data['terminal_mask'][0]), len(batch_data['terminal_mask'][0][0])) # res = list(more_itertools.collapse(batch_data['terminal_mask'])) # print('res len: ', len(res)) # res = util.padded(batch_data['terminal_mask'], deepcopy=True, fill_value=0) # print('batch_data size: ', len(res[0]), len(res[0][0])) # res = list(more_itertools.collapse(res)) # print('res len: ', len(res)) # previous_char_max = max(previous_char_max, max(batch_data['q1_char_length']), max(batch_data['q2_char_length'])) # previous_word_max = max(previous_word_max, max(batch_data['q1_word_length']), max(batch_data['q2_word_length'])) # print('max q1_length:{},{}'.format(max(batch_data['q1_char_length']), max(batch_data['q1_word_length']))) # print("max q2_length:{},{}".format(max(batch_data['q2_char_length']), max(batch_data['q2_word_length']))) # print("previous_char_max:{}, previous_word_max:{}".format(previous_char_max, previous_word_max)) model.zero_grad() log_probs = model.forward(batch_data) # log_probs.register_hook(create_hook_fn("log_probs")) # print("log_probs sizze:{}".format(log_probs.size())) label = label_preprocess_fn(batch_data) loss = loss_function(log_probs, label) # loss.register_hook(create_hook_fn("loss")) loss.backward() if clip_norm is not None: torch.nn.utils.clip_grad_norm_(model.parameters(), clip_norm) # print() # print("The loss is nan:{}".format(is_nan(loss.detach()))) # print("The loss grad is nan:{}".format(is_nan(loss.grad))) # print("The log_probs is nan:{}".format(is_nan(log_probs.detach()))) # print("The log_probs grad is nan:{}".format(is_nan(log_probs.grad))) # for name, param in model.named_parameters(): # print("name of {}: has nan:{}".format(name, is_nan(param.detach()))) # print("the gradient of {}: has nan:{}".format(name, is_nan(param.grad))) # if HAS_NAN: # for k, v in batch_data.items(): # print("{}:{}".format(k, show_tensor(v))) # print("{}:{}".format("target", show_tensor(target))) # print() optimizer.step() # print("loss:{}".format(loss.data)) total_loss += loss.data steps += 1 for evaluator in evaluate_object_list: evaluator.add_result(log_probs, label, batch_data=batch_data) pbar.update(1) return evaluate_object_list, total_loss / steps
def to_long(x): return to_cuda(torch.LongTensor(x))
def parse_target_tensor(batch_data): masked_target_seq = to_cuda( torch.LongTensor( PaddedList(batch_data['target_seq'], fill_value=ignore_id))) return [masked_target_seq]
0 if i % 2 == 0 else np.pi / 2 for i in range(self.d) ]).unsqueeze(1), requires_grad=False) def forward(self, x): l = x.shape[-1] # computing signal pos = torch.arange(l).repeat(self.d, 1).to(x.device) tmp = pos * self.freqs + self.phases pos_enc = torch.sin(tmp) pos_enc = Variable(pos_enc) x = x + pos_enc return x if __name__ == "__main__": mdl = to_cuda(PositionEncoding()) batch_size = 8 n_channels = 128 n_items = 60 input = Variable(torch.ones(batch_size, n_channels, n_items)) input = to_cuda(input) out = mdl(input)
def evaluate(model, dataset, batch_size, loss_function, parse_input_batch_data_fn, parse_target_batch_data_fn, do_sample=False, print_output=False, create_output_ids_fn=None, evaluate_obj_list=[], expand_output_and_target_fn=None): total_loss = to_cuda(torch.Tensor([0])) total_batch = to_cuda(torch.Tensor([0])) steps = 0 for o in evaluate_obj_list: o.clear_result() model.eval() with tqdm(total=len(dataset)) as pbar: with torch.no_grad(): for batch_data in data_loader(dataset, batch_size=batch_size, drop_last=True): model.zero_grad() # model_input = parse_input_batch_data(batch_data) model_input = parse_input_batch_data_fn(batch_data, do_sample=do_sample) # model_output = model.forward(*model_input, test=do_sample) if do_sample: model_output = model.forward(*model_input, do_sample=True) model_target = parse_target_batch_data_fn(batch_data) model_output, model_target = expand_output_and_target_fn( model_output, model_target) else: model_output = model.forward(*model_input) model_target = parse_target_batch_data_fn(batch_data) loss = loss_function(*model_output, *model_target) output_ids = create_output_ids_fn(model_output, model_input, do_sample) total_loss += loss.data total_batch += batch_size step_output = 'in evaluate step {} loss: {}, '.format( steps, loss.data.item()) for evaluator in evaluate_obj_list: res = evaluator.add_result(output_ids, model_output, model_target, model_input, batch_data=batch_data) step_output += res # print(step_output) info(step_output) if print_output and steps % 10 == 0: pass steps += 1 pbar.update(batch_size) return evaluate_obj_list, (total_loss / steps).item()
def multi_step_evaluate(model, dataset, batch_size, parse_input_batch_data_fn, parse_target_batch_data_fn, do_sample=False, print_output=False, create_output_ids_fn=None, evaluate_obj_list=[], expand_output_and_target_fn=None, max_step_times=0, vocabulary=None, file_path='', create_multi_step_next_input_batch_fn=None, extract_includes_fn=lambda x: x['includes'], print_output_fn=None, do_beam_search=False, target_file_path='main.out', log_file_path='main.log', do_save_data=False, max_save_distance=None, save_records_to_database=False, db_path='', table_name='', change_output_records_to_batch_fn=None, create_save_database_records_fn=None, error_stop_type='normal'): total_loss = to_cuda(torch.Tensor([0])) total_batch = to_cuda(torch.Tensor([0])) steps = 0 compile_evaluator = CompileResultEvaluate() compile_evaluator.clear_result() for o in evaluate_obj_list: o.clear_result() model.eval() from common.pycparser_util import tokenize_by_clex_fn tokenize_fn = tokenize_by_clex_fn() save_data_dict = {} save_records_list = [] # file_path = add_pid_to_file_path(file_path) # target_file_path = add_pid_to_file_path(target_file_path) with tqdm(total=len(dataset)) as pbar: with torch.no_grad(): for batch_data in data_loader(dataset, batch_size=batch_size, drop_last=False): model.zero_grad() input_data = batch_data.copy() final_output_list = [] output_records_list = [] continue_list = [True for _ in range(batch_size)] result_list = [False for _ in range(batch_size)] result_records_list = [] sample_steps = [-1 for _ in range(batch_size)] error_count_list = batch_data['error_count'] for i in range(max_step_times): model_input = parse_input_batch_data_fn(input_data, do_sample=True) model_output = model.forward(*model_input, do_sample=True, do_beam_search=do_beam_search) input_data, final_output, output_records, final_output_name_list, continue_list = create_multi_step_next_input_batch_fn( input_data, model_input, model_output, continue_list, do_beam_search) final_output_list += [final_output] output_records_list += [output_records] continue_list, result_list, cur_error_count_list = compile_code_ids_list( final_output_name_list, continue_list, result_list, vocabulary=vocabulary, includes_list=extract_includes_fn(input_data), file_path=file_path, target_file_path=target_file_path, log_file_path=log_file_path, do_compile_pool=True, need_transform=False) if error_stop_type == 'oracle': reject_list = [ True if c and n > o else False for c, o, n in zip(continue_list, error_count_list, cur_error_count_list) ] elif error_stop_type == 'normal': reject_list = [False for _ in range(batch_size)] error_count_list = [ n if n < o and n >= 0 else o for o, n in zip(error_count_list, cur_error_count_list) ] for i_f, rej in enumerate(reject_list): if rej: # use last output final_output_name_list[i_f] = input_data[ 'last_input_seq_name'][i_f] continue_list[i_f] = False sample_steps = [ i + 1 if s == -1 and not c and not r else s for s, c, r in zip(sample_steps, continue_list, reject_list) ] sample_steps = [ i if s == -1 and not c and r else s for s, c, r in zip( sample_steps, continue_list, reject_list) ] result_records_list += [result_list] if sum(continue_list) == 0: break sample_steps = [ max_step_times if s == -1 else s for s in sample_steps ] if do_save_data: batch_data['input_seq_name'] = batch_data[ 'final_output_name'] save_res_dict = save_addition_data( original_states=batch_data, states=input_data, tokenize_fn=tokenize_fn, batch_size=batch_size, file_path=file_path, target_file_path=target_file_path, vocabulary=vocabulary, max_distande=max_save_distance, only_error=True) for k, v in save_res_dict.items(): save_data_dict[k] = save_data_dict.get(k, []) + v if save_records_to_database: batch_output_records = change_output_records_to_batch_fn( output_records_list, sample_steps) records_list = create_save_database_records_fn( batch_data, sample_steps, final_output_name_list, result_list, batch_output_records, input_data) save_records_list += records_list step_output = 'in evaluate step {}: '.format(steps) res = compile_evaluator.add_result(result_list) step_output += res for evaluator in evaluate_obj_list: # customer evaluator interface res = evaluator.add_result(result_list, batch_data=batch_data) step_output += res # print(step_output) info(step_output) if print_output and steps % 1 == 0: print_output_fn(output_records=output_records_list, final_output=final_output_list, batch_data=batch_data, step_i=steps, vocabulary=vocabulary, compile_result_list=result_records_list) steps += 1 pbar.update(batch_size) evaluate_obj_list = [compile_evaluator] + evaluate_obj_list if save_records_to_database: create_table(db_path, DATA_RECORDS_DEEPFIX, replace_table_name=table_name) run_sql_statment(db_path, DATA_RECORDS_DEEPFIX, 'insert_ignore', save_records_list, replace_table_name=table_name) if steps == 0: t_loss = 0 else: t_loss = (total_loss / steps).item() return evaluate_obj_list, t_loss, save_data_dict
def sample_and_save(model, dataset, batch_size, loss_function, parse_input_batch_data_fn, parse_target_batch_data_fn, do_sample=False, print_output=False, create_output_ids_fn=None, evaluate_obj_list=[], expand_output_and_target_fn=None, add_data_record_fn=None, db_path='', table_name=''): # total_loss = to_cuda(torch.Tensor([0])) total_batch = to_cuda(torch.Tensor([0])) saved_count = 0 steps = 1 for o in evaluate_obj_list: o.clear_result() model.eval() total_saved_list = [] with tqdm(total=len(dataset)) as pbar: with torch.no_grad(): for batch_data in data_loader(dataset, batch_size=batch_size, drop_last=True): model.zero_grad() # model_input = parse_input_batch_data(batch_data) model_input = parse_input_batch_data_fn(batch_data, do_sample=do_sample) # model_output = model.forward(*model_input, test=do_sample) if do_sample: model_output = model.forward(*model_input, do_sample=True) model_target = parse_target_batch_data_fn(batch_data) model_output, model_target = expand_output_and_target_fn( model_output, model_target) else: model_output = model.forward(*model_input) model_target = parse_target_batch_data_fn(batch_data) # loss = loss_function(*model_output, *model_target) output_ids = create_output_ids_fn(model_output, model_input) # total_loss += loss.data total_batch += batch_size # step_output = 'in evaluate step {} loss: {}, '.format(steps, loss.data.item()) step_output = 'in evaluate step {} '.format(steps) for evaluator in evaluate_obj_list: res = evaluator.add_result(output_ids, model_output, model_target, model_input, batch_data=batch_data) step_output += res # print(step_output) info(step_output) saved_list = add_data_record_fn(output_ids, model_output, batch_data) total_saved_list += saved_list if steps % 100 == 0: create_table(db_path, table_name) insert_items(db_path, table_name, total_saved_list) saved_count += len(total_saved_list) print('saved {} record in total {}. '.format( saved_count, total_batch.item())) total_saved_list = [] if print_output and steps % 100 == 0: pass # output_ids = output_ids.tolist() # target_ids = batch_data['ac_tokens'] # is_copy = (is_copy > 0.5).tolist() # target_is_copy = target_is_copy.tolist() # value_output = torch.squeeze(torch.topk(F.softmax(value_output, dim=-1), k=1, dim=-1)[1], dim=-1) # value_output = value_output.tolist() # target_ac_tokens = target_ac_tokens.tolist() # pointer_output = torch.squeeze(torch.topk(F.softmax(pointer_output, dim=-1), k=1, dim=-1)[1], dim=-1) # pointer_output = pointer_output.tolist() # target_pointer_output = target_pointer_output.tolist() # target_length = torch.sum(output_mask, dim=-1) # target_length = target_length.tolist() # for out, tar, cop, tar_cop, val, tar_val, poi, tar_poi, tar_len in zip(output_ids, target_ids, is_copy, # target_is_copy, value_output, # target_ac_tokens, # pointer_output, # target_pointer_output, target_length): # # for out, tar, in zip(output_ids, target_ids): # out_code, end_pos = convert_one_token_ids_to_code(out, id_to_word_fn=vocab.id_to_word, start=start_id, # end=end_id, unk=unk_id) # tar_code, tar_end_pos = convert_one_token_ids_to_code(tar[1:], id_to_word_fn=vocab.id_to_word, start=start_id, # end=end_id, unk=unk_id) # info('-------------- step {} ------------------------'.format(steps)) # info('output: {}'.format(out_code)) # info('target: {}'.format(tar_code)) # cop = [str(c) for c in cop] # tar_cop = [str(int(c)) for c in tar_cop] # poi = [str(c) for c in poi] # tar_poi = [str(c) for c in tar_poi] # info('copy output: {}'.format(' '.join(cop[:tar_len]))) # info('copy target: {}'.format(' '.join(tar_cop[:tar_len]))) # info('pointer output: {}'.format(' '.join(poi[:tar_len]))) # info('pointer target: {}'.format(' '.join(tar_poi[:tar_len]))) # # value_list = [] # target_list = [] # for c, v, t in zip(tar_cop, val, tar_val): # if c == '1': # value_list += ['<COPY>'] # target_list += ['<COPY>'] # else: # value_list += [vocab.id_to_word(int(v))] # target_list += [vocab.id_to_word(int(t))] # info('value output: {}'.format(' '.join(value_list[:tar_len]))) # info('value target: {}'.format(' '.join(target_list[:tar_len]))) steps += 1 pbar.update(batch_size) create_table(db_path, table_name) insert_items(db_path, table_name, total_saved_list) saved_count += len(total_saved_list) print('saved {} record in total {}. '.format(saved_count, total_batch.item())) return evaluate_obj_list