def _init_sequence_ids(self): return torch.LongTensor(self.bsz).random_(0, self.n_sequences)
# 4:1 划分训练集和测试集 input_ids_train = np.array([input_ids[i] for i in random_order[:int(len(input_ids)*0.8)]]) input_types_train = np.array([input_types[i] for i in random_order[:int(len(input_ids)*0.8)]]) input_masks_train = np.array([input_masks[i] for i in random_order[:int(len(input_ids)*0.8)]]) y_train = np.array([label[i] for i in random_order[:int(len(input_ids) * 0.8)]]) print(input_ids_train.shape, input_types_train.shape, input_masks_train.shape, y_train.shape) input_ids_test = np.array([input_ids[i] for i in random_order[int(len(input_ids)*0.8):]]) input_types_test = np.array([input_types[i] for i in random_order[int(len(input_ids)*0.8):]]) input_masks_test = np.array([input_masks[i] for i in random_order[int(len(input_ids)*0.8):]]) y_test = np.array([label[i] for i in random_order[int(len(input_ids) * 0.8):]]) print(input_ids_test.shape, input_types_test.shape, input_masks_test.shape, y_test.shape) #dataloader加载 BATCH_SIZE = 16 train_data = TensorDataset(torch.LongTensor(input_ids_train), torch.LongTensor(input_types_train), torch.LongTensor(input_masks_train), torch.LongTensor(y_train)) train_sampler = RandomSampler(train_data) train_loader = DataLoader(train_data, sampler=train_sampler, batch_size=BATCH_SIZE) test_data = TensorDataset(torch.LongTensor(input_ids_test), torch.LongTensor(input_types_test), torch.LongTensor(input_masks_test), torch.LongTensor(y_test)) test_sampler = SequentialSampler(test_data) test_loader = DataLoader(test_data, sampler=test_sampler, batch_size=BATCH_SIZE) #定义bert模型 class Model(nn.Module):
numc = [args.ncl] * args.hc model = models.__dict__[args.arch](num_classes=numc,return_features=False) knn_dim = 4096 N = len(trainloader.dataset) optimize_times = ((args.epochs + 1.0001)*N*(np.linspace(0, 1, args.nopts))[::-1]).tolist() optimize_times = [(args.epochs +10)*N] + optimize_times logger.warning('We will optimize L at epochs: {}'.format([np.round(1.0*t/N, 2) for t in optimize_times])) # init selflabels randomly if args.hc == 1: selflabels = np.zeros(N, dtype=np.int32) for qq in range(N): selflabels[qq] = qq % args.ncl selflabels = np.random.permutation(selflabels) selflabels = torch.LongTensor(selflabels).cuda() else: selflabels = np.zeros((args.hc, N), dtype=np.int32) for nh in range(args.hc): for _i in range(N): selflabels[nh, _i] = _i % numc[nh] selflabels[nh] = np.random.permutation(selflabels[nh]) selflabels = torch.LongTensor(selflabels).cuda() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=5e-4) # Model if args.test_only or len(args.resume) > 0: # Load checkpoint.[ logger.info('==> Resuming from checkpoint..') assert(os.path.isdir('%s/'%(args.exp)))
def encode_file(self, path, ordered=False, verbose=False, add_eos=True, add_double_eos=False) -> torch.LongTensor: with open(path, encoding='utf-8') as f: return torch.LongTensor(self.sp.EncodeAsIds(f.read()))
def __call__(self, examples): # get list of trees mol_trees = _unpack_field(examples, 'mol_tree') wid = _unpack_field(examples, 'wid') for _wid, mol_tree in zip(wid, mol_trees): mol_tree.ndata['wid'] = torch.LongTensor(_wid) # TODO: either support pickling or get around ctypes pointers using scipy # batch molecule graphs mol_graphs = _unpack_field(examples, 'mol_graph') atom_x = torch.cat(_unpack_field(examples, 'atom_x_enc')) bond_x = torch.cat(_unpack_field(examples, 'bond_x_enc')) mol_graph_batch = self._batch_and_set(mol_graphs, atom_x, bond_x, False) result = { 'mol_trees': mol_trees, 'mol_graph_batch': mol_graph_batch, } if not self.training: return result # batch candidate graphs cand_graphs = _unpack_field(examples, 'cand_graphs') cand_batch_idx = [] atom_x = torch.cat(_unpack_field(examples, 'atom_x_dec')) bond_x = torch.cat(_unpack_field(examples, 'bond_x_dec')) tree_mess_src_e = _unpack_field(examples, 'tree_mess_src_e') tree_mess_tgt_e = _unpack_field(examples, 'tree_mess_tgt_e') tree_mess_tgt_n = _unpack_field(examples, 'tree_mess_tgt_n') n_graph_nodes = 0 n_tree_nodes = 0 for i in range(len(cand_graphs)): tree_mess_tgt_e[i] += n_graph_nodes tree_mess_src_e[i] += n_tree_nodes tree_mess_tgt_n[i] += n_graph_nodes n_graph_nodes += sum(g.number_of_nodes() for g in cand_graphs[i]) n_tree_nodes += mol_trees[i].number_of_nodes() cand_batch_idx.extend([i] * len(cand_graphs[i])) tree_mess_tgt_e = torch.cat(tree_mess_tgt_e) tree_mess_src_e = torch.cat(tree_mess_src_e) tree_mess_tgt_n = torch.cat(tree_mess_tgt_n) cand_graph_batch = self._batch_and_set(cand_graphs, atom_x, bond_x, True) # batch stereoisomers stereo_cand_graphs = _unpack_field(examples, 'stereo_cand_graphs') atom_x = torch.cat(_unpack_field(examples, 'stereo_atom_x_enc')) bond_x = torch.cat(_unpack_field(examples, 'stereo_bond_x_enc')) stereo_cand_batch_idx = [] for i in range(len(stereo_cand_graphs)): stereo_cand_batch_idx.extend([i] * len(stereo_cand_graphs[i])) if len(stereo_cand_batch_idx) > 0: stereo_cand_labels = [ (label, length) for ex in _unpack_field(examples, 'stereo_cand_label') for label, length in ex ] stereo_cand_labels, stereo_cand_lengths = zip(*stereo_cand_labels) stereo_cand_graph_batch = self._batch_and_set( stereo_cand_graphs, atom_x, bond_x, True) else: stereo_cand_labels = [] stereo_cand_lengths = [] stereo_cand_graph_batch = None stereo_cand_batch_idx = [] result.update({ 'cand_graph_batch': cand_graph_batch, 'cand_batch_idx': cand_batch_idx, 'tree_mess_tgt_e': tree_mess_tgt_e, 'tree_mess_src_e': tree_mess_src_e, 'tree_mess_tgt_n': tree_mess_tgt_n, 'stereo_cand_graph_batch': stereo_cand_graph_batch, 'stereo_cand_batch_idx': stereo_cand_batch_idx, 'stereo_cand_labels': stereo_cand_labels, 'stereo_cand_lengths': stereo_cand_lengths, }) return result
def forward(self, real_stft, imag_stft, length): """input: (batch_size, 1, time_steps, n_fft // 2 + 1) Returns: real: (batch_size, data_length) """ device = next(self.parameters()).device batch_size = real_stft.shape[0] real_stft = real_stft[:, 0, :, :].transpose(1, 2) imag_stft = imag_stft[:, 0, :, :].transpose(1, 2) # (batch_size, n_fft // 2 + 1, time_steps) # Full stft full_real_stft = torch.cat((real_stft, torch.flip(real_stft[:, 1 : -1, :], dims=[1])), dim=1) full_imag_stft = torch.cat((imag_stft, - torch.flip(imag_stft[:, 1 : -1, :], dims=[1])), dim=1) # Reserve space for reconstructed waveform if length: if self.center: padded_length = length + int(self.n_fft) else: padded_length = length n_frames = min( real_stft.shape[2], int(np.ceil(padded_length / self.hop_length))) else: n_frames = real_stft.shape[2] expected_signal_len = self.n_fft + self.hop_length * (n_frames - 1) expected_signal_len = self.n_fft + self.hop_length * (n_frames - 1) y = torch.zeros(batch_size, expected_signal_len).to(device) # IDFT s_real = self.conv_real(full_real_stft) - self.conv_imag(full_imag_stft) # Overlap add for i in range(n_frames): y[:, i * self.hop_length : i * self.hop_length + self.n_fft] += s_real[:, :, i] ifft_window_sum = librosa.filters.window_sumsquare(self.window, n_frames, win_length=self.win_length, n_fft=self.n_fft, hop_length=self.hop_length) approx_nonzero_indices = np.where(ifft_window_sum > librosa.util.tiny(ifft_window_sum))[0] approx_nonzero_indices = torch.LongTensor(approx_nonzero_indices).to(device) ifft_window_sum = torch.Tensor(ifft_window_sum).to(device) y[:, approx_nonzero_indices] /= ifft_window_sum[approx_nonzero_indices][None, :] # Trim or pad to length if length is None: if self.center: y = y[:, self.n_fft // 2 : -self.n_fft // 2] else: if self.center: start = self.n_fft // 2 else: start = 0 y = y[:, start : start + length] (batch_size, len_y) = y.shape if y.shape[-1] < length: y = torch.cat((y, torch.zeros(batch_size, length - len_y).to(device)), dim=-1) return y
epoch_counter = 0 time1 = time.time() I_permutation = np.random.permutation(L_Y_train) for i in range(0, L_Y_train, batch_size): x_input2 = [x_train[j] for j in I_permutation[i:i + batch_size]] sequence_length = 50 x_input = np.zeros((batch_size, sequence_length), dtype=np.int) for j in range(batch_size): x = np.asarray(x_input2[j]) sl = x.shape[0] if (sl < sequence_length): x_input[j, 0:sl] = x else: start_index = np.random.randint(sl - sequence_length + 1) x_input[j, :] = x[start_index:(start_index + sequence_length)] x_input = Variable(torch.LongTensor(x_input)).cuda() optimizer.zero_grad() loss, pred = model(x_input) loss.backward() norm = nn.utils.clip_grad_norm_(model.parameters(), 2.0) if (epoch > 6): for group in optimizer.param_groups: for p in group['params']: state = optimizer.state[p] if (state['step'] > 1024): state['step'] = 1000 optimizer.step() # update gradients values, prediction = torch.max(pred, 1) prediction = prediction.cpu().data.numpy() accuracy = float( np.sum(prediction == x_input.cpu().data.numpy()
def seq_collate_fn(batch): """ Returns several tensors. Tensor of lengths should not be sent to CUDA. """ idx2batch = pd.Series(np.arange(len(batch)), index = [b["idx"] for b in batch]) df = pd.concat(b["path"] for b in batch) value_cols = [c.startswith("Value") for c in df.columns] mask_cols = [c.startswith("Mask") for c in df.columns] ## converting mask to int df.iloc[:, mask_cols] = df.iloc[:, mask_cols].astype(np.bool) df["num_obs"] = -df.iloc[:,mask_cols].sum(1) df.sort_values(by=["Time", "num_obs"], inplace=True) ## num_obs is not a value/mask column value_cols.append(False) mask_cols.append(False) cov = torch.Tensor([b["cov"] for b in batch]) batch_ids = idx2batch[df.index.values].values ## calculating number of events at every time times, counts = np.unique(df.Time.values, return_counts=True) time_ptr = np.concatenate([[0], np.cumsum(counts)]) assert df.shape[0] == time_ptr[-1] ## tensors for the data in the batch X = df.iloc[:, value_cols].values M = df.iloc[:, mask_cols].values ## selecting only observed X and splitting lengths = (-df.num_obs.values).tolist() Xsplit = torch.split(torch.from_numpy(X[M]), lengths) Fsplit = torch.split(torch.from_numpy(np.where(M)[1]), lengths) Xpadded = torch.nn.utils.rnn.pad_sequence(Xsplit, batch_first=True) Fpadded = torch.nn.utils.rnn.pad_sequence(Fsplit, batch_first=True) if batch[0]['val_samples'] is not None: df_after = pd.concat(b["val_samples"] for b in batch) df_after.sort_values(by=["ID","Time"], inplace=True) value_cols_val = [c.startswith("Value") for c in df_after.columns] mask_cols_val = [c.startswith("Mask") for c in df_after.columns] X_val = torch.tensor(df_after.iloc[:,value_cols_val].values) M_val = torch.tensor(df_after.iloc[:,mask_cols_val].values) times_val = df_after["Time"].values index_val = idx2batch[df_after["ID"].values].values else: X_val = None M_val = None times_val = None index_val = None res = {} res["times"] = times res["time_ptr"] = time_ptr res["Xpadded"] = Xpadded res["Fpadded"] = Fpadded res["X"] = torch.from_numpy(X) res["M"] = torch.from_numpy(M.astype(np.float32)) res["lengths"] = torch.LongTensor(lengths) res["obs_idx"] = torch.tensor(batch_ids) res["y"] = torch.tensor([b["y"] for b in batch]) res["cov"] = cov res["X_val"] = X_val res["M_val"] = M_val res["times_val"]=times_val res["index_val"]=index_val return res
def test_neural_beamformer_forward_backward( n_fft, win_length, hop_length, num_spk, loss_type, use_wpe, wnet_type, wlayers, wunits, wprojs, taps, delay, use_dnn_mask_for_wpe, multi_source_wpe, use_beamformer, bnet_type, blayers, bunits, bprojs, badim, ref_channel, use_noise_mask, bnonlinear, beamformer_type, ): # Skip some cases if num_spk > 1 and use_wpe and use_beamformer: if not multi_source_wpe: # Single-source WPE is not supported with beamformer in multi-speaker cases return elif num_spk == 1 and multi_source_wpe: # When num_spk == 1, `multi_source_wpe` has no effect return if bnonlinear != "sigmoid" and ( beamformer_type != "mvdr_souden" or multi_source_wpe ): # only test different nonlinear layers with MVDR_Souden return # ensures reproducibility and reversibility in the matrix inverse computation torch.random.manual_seed(0) stft = STFTEncoder(n_fft=n_fft, win_length=win_length, hop_length=hop_length) model = NeuralBeamformer( stft.output_dim, num_spk=num_spk, loss_type=loss_type, use_wpe=use_wpe, wnet_type=wnet_type, wlayers=wlayers, wunits=wunits, wprojs=wprojs, taps=taps, delay=delay, use_dnn_mask_for_wpe=use_dnn_mask_for_wpe, use_beamformer=use_beamformer, bnet_type=bnet_type, blayers=blayers, bunits=bunits, bprojs=bprojs, badim=badim, ref_channel=ref_channel, use_noise_mask=use_noise_mask, beamformer_type=beamformer_type, rtf_iterations=2, shared_power=True, ) model.train() inputs = random_speech[..., :2].float() ilens = torch.LongTensor([16, 12]) input_spectrum, flens = stft(inputs, ilens) est_speech, flens, others = model(input_spectrum, flens) if loss_type.startswith("mask"): assert est_speech is None loss = sum([abs(m).mean() for m in others.values()]) else: loss = sum([abs(est).mean() for est in est_speech]) loss.backward()
features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] # Create Model pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) adj_norm = torch.sparse.FloatTensor(torch.LongTensor(adj_norm[0].T), torch.FloatTensor(adj_norm[1]), torch.Size(adj_norm[2])) adj_label = torch.sparse.FloatTensor(torch.LongTensor(adj_label[0].T), torch.FloatTensor(adj_label[1]), torch.Size(adj_label[2])) features = torch.sparse.FloatTensor(torch.LongTensor(features[0].T), torch.FloatTensor(features[1]), torch.Size(features[2])) weight_mask = adj_label.to_dense().view(-1) == 1 weight_tensor = torch.ones(weight_mask.size(0)) weight_tensor[weight_mask] = pos_weight # init model and optimizer model = getattr(model,args.model)(adj_norm)
def sparse2th(mat): value = mat.data indices = th.LongTensor([mat.row, mat.col]) tensor = th.sparse.FloatTensor(indices, th.from_numpy(value).float(), mat.shape) return tensor
def __getitem__(self, index): """ Parameters ---------- index : integer The user id to get the user's session history.. Returns ------- Tuple of form (user session inputs, user session labels, user session length, user id) user session inputs: torch LongTensor The sequence of clicks in a user session (only length of maximum length). user session genres: torch LongTensor The genre vectors of each items from the clicks user session labels: torch LongTensor The sequence of clicks in a user session (only length of maximum length) offset by 1 to use as labels. user session length: torch LongTensor The length before padding of the user session history (if user session history is greater than maximum length then length=maximum length). user id: torch LongTensor The user id """ # get the user id user = self.users[index] # get the user session history seq = self._getseq(user) if self.mode == 'train': # input for training is all items in session history except last one tokens = seq[:-1] # label for training is all items in session history except first (offset of tokens by 1) labels = seq[1:] # get the session length of the tokens, and the session length of the labels # if session length > maximum length it is equal to maximum length x_len = len(tokens) y_len = len(labels) # how many pad tokens are needed to make the input sequence and output sequence # length equal to the maximum length x_mask_len = self.max_len - x_len y_mask_len = self.max_len - y_len # append the pad tokens to the end of the input sequence and the output sequence tokens = tokens + [self.pad_token] * x_mask_len labels = labels + [self.pad_token] * y_mask_len if self.mode == 'eval': tokens = seq[:-1] # only difference between 'train' : the label is only the last item in the session history labels = seq[-1:] x_len = len(tokens) labels = [self.pad_token] * (x_len - 1) + labels y_len = len(labels) x_mask_len = self.max_len - x_len y_mask_len = self.max_len - y_len tokens = tokens + [self.pad_token] * x_mask_len labels = labels + [self.pad_token] * y_mask_len if self.genre_df is not None: genres = np.vstack(self.genre_df.loc[tokens]['genre'].to_numpy()) return torch.LongTensor(tokens), torch.LongTensor( genres), torch.LongTensor(labels), torch.LongTensor( [x_len]), torch.LongTensor([user]) elif self.genre_df is None: return torch.LongTensor(tokens), torch.LongTensor( labels), torch.LongTensor([x_len]), torch.LongTensor([user])
def push_action(self, state: ParserState, target_action_idx: int) -> None: """Used for updating the state with a target next action Args: state (ParserState): The state of the stack, buffer and action target_action_idx (int): Index of the action to process """ # Update action_stackrnn action_embedding = self.actions_lookup( cuda_utils.Variable(torch.LongTensor([target_action_idx]))) state.action_stackrnn.push(action_embedding, Element(target_action_idx)) # Update stack_stackrnn if target_action_idx == self.shift_idx: # To SHIFT, # 1. Pop T from buffer # 2. Push T into stack state.is_open_NT.append(False) token_embedding, token = state.buffer_stackrnn.pop() state.stack_stackrnn.push(token_embedding, Element(token)) elif target_action_idx == self.reduce_idx: # To REDUCE # 1. Pop Ts from stack until hit NT # 2. Pop the open NT from stack and close it # 3. Compute compositionalRep and push into stack state.num_open_NT -= 1 popped_rep = [] nt_tree = [] while not state.is_open_NT[-1]: assert len( state.stack_stackrnn) > 0, "How come stack is empty!" state.is_open_NT.pop() top_of_stack = state.stack_stackrnn.pop() popped_rep.append(top_of_stack[0]) nt_tree.append(top_of_stack[1]) # pop the open NT and close it top_of_stack = state.stack_stackrnn.pop() popped_rep.append(top_of_stack[0]) nt_tree.append(top_of_stack[1]) state.is_open_NT.pop() state.is_open_NT.append(False) compostional_rep = self.p_compositional(popped_rep) combinedElement = Element(nt_tree) state.stack_stackrnn.push(compostional_rep, combinedElement) elif target_action_idx in self.valid_NT_idxs: # if this is root prediction and if that root is one # of the unsupported intents if (len(state.predicted_actions_idx) == 1 and target_action_idx in self.ignore_subNTs_roots): state.found_unsupported = True state.is_open_NT.append(True) state.num_open_NT += 1 state.stack_stackrnn.push(action_embedding, Element(target_action_idx)) else: assert "not a valid action: {}".format( self.actions_vocab.itos[target_action_idx])
def forward( self, tokens: torch.Tensor, seq_lens: torch.Tensor, dict_feat: Optional[Tuple[torch.Tensor, ...]] = None, actions: Optional[List[List[int]]] = None, ): """RNNG forward function. Args: tokens (torch.Tensor): list of tokens seq_lens (torch.Tensor): list of sequence lengths dict_feat (Optional[Tuple[torch.Tensor, ...]]): dictionary or gazetteer features for each token actions (Optional[List[List[int]]]): Used only during training. Oracle actions for the instances. Returns: if top_k == 1 tuple of list of predicted actions and list of corresponding scores else list of tuple of list of predicted actions and list of \ corresponding scores """ beam_size = self.beam_size top_k = self.top_k if self.stage != Stage.TEST: beam_size = 1 top_k = 1 if self.training: assert actions is not None, "actions must be provided for training" actions_idx_rev = list(reversed(actions[0])) else: torch.manual_seed(0) beam_size = max(beam_size, 1) # Reverse the order of indices along last axis before embedding lookup. tokens_list_rev = torch.flip(tokens, [len(tokens.size()) - 1]) dict_feat_rev = None if dict_feat: dict_ids, dict_weights, dict_lengths = dict_feat dict_ids_rev = torch.flip(dict_ids, [len(dict_ids.size()) - 1]) dict_weights_rev = torch.flip(dict_weights, [len(dict_weights.size()) - 1]) dict_lengths_rev = torch.flip(dict_lengths, [len(dict_lengths.size()) - 1]) dict_feat_rev = (dict_ids_rev, dict_weights_rev, dict_lengths_rev) embedding_input = ([tokens_list_rev, dict_feat_rev] if dict_feat_rev is not None else [tokens_list_rev]) token_embeddings = self.embedding(*embedding_input) # Batch size is always = 1. So we squeeze the batch_size dimension. token_embeddings = token_embeddings.squeeze(0) tokens_list_rev = tokens_list_rev.squeeze(0) initial_state = ParserState(self) for i in range(token_embeddings.size()[0]): token_embedding = token_embeddings[i].unsqueeze(0) tok = tokens_list_rev[i] initial_state.buffer_stackrnn.push(token_embedding, Element(tok)) beam = [initial_state] while beam and any(not state.finished() for state in beam): # Stores plans for expansion as (score, state, action) plans: List[Tuple[float, ParserState, int]] = [] # Expand current beam states for state in beam: # Keep terminal states if state.finished(): plans.append((state.neg_prob, state, -1)) continue # translating Expression p_t = affine_transform({pbias, S, # stack_summary, B, buffer_summary, A, action_summary}); stack = state.stack_stackrnn stack_summary = stack.embedding() action_summary = state.action_stackrnn.embedding() buffer_summary = state.buffer_stackrnn.embedding() if self.dropout_layer.p > 0: stack_summary = self.dropout_layer(stack_summary) action_summary = self.dropout_layer(action_summary) buffer_summary = self.dropout_layer(buffer_summary) # feature for index of last open non-terminal last_open_NT_feature = torch.zeros(len(self.actions_vocab)) open_NT_exists = state.num_open_NT > 0 if (len(stack) > 0 and open_NT_exists and self.ablation_use_last_open_NT_feature): last_open_NT = None try: open_NT = state.is_open_NT[::-1].index(True) last_open_NT = stack.element_from_top(open_NT) except ValueError: pass if last_open_NT: last_open_NT_feature[last_open_NT.node] = 1.0 last_open_NT_feature = last_open_NT_feature.unsqueeze(0) summaries = [] if self.ablation_use_buffer: summaries.append(buffer_summary) if self.ablation_use_stack: summaries.append(stack_summary) if self.ablation_use_action: summaries.append(action_summary) if self.ablation_use_last_open_NT_feature: summaries.append(last_open_NT_feature) action_p = self.action_linear(torch.cat(summaries, dim=1)) log_probs = F.log_softmax(action_p, dim=1)[0] for action in self.valid_actions(state): plans.append( (state.neg_prob - log_probs[action], state, action)) beam = [] # Take actions to regenerate the beam for neg_prob, state, predicted_action_idx in sorted( plans)[:beam_size]: # Skip terminal states if state.finished(): beam.append(state) continue # Only branch out states when needed if beam_size > 1: state = state.copy() state.predicted_actions_idx.append(predicted_action_idx) target_action_idx = predicted_action_idx if self.training: assert (len(actions_idx_rev) > 0), "Actions and tokens may not be in sync." target_action_idx = actions_idx_rev[-1] actions_idx_rev = actions_idx_rev[:-1] if (self.constraints_ignore_loss_for_unsupported and state.found_unsupported): pass else: state.action_scores.append(action_p) self.push_action(state, target_action_idx) state.neg_prob = neg_prob beam.append(state) # End for # End while assert len(beam) > 0, "How come beam is empty?" assert len(state.stack_stackrnn) == 1, "How come stack len is " + str( len(state.stack_stackrnn)) assert len( state.buffer_stackrnn) == 0, "How come buffer len is " + str( len(state.buffer_stackrnn)) # Add batch dimension before returning. if top_k <= 1: state = min(beam) return ( torch.LongTensor(state.predicted_actions_idx).unsqueeze(0), torch.cat(state.action_scores).unsqueeze(0), ) else: return [( torch.LongTensor(state.predicted_actions_idx).unsqueeze(0), torch.cat(state.action_scores).unsqueeze(0), ) for state in sorted(beam)[:top_k]]
def prediction(model, dataset, mode, batch_size): model.eval() if mode == "dev": dataloader = dataset.batch_delivery('dev', batch_size=batch_size, shuffle=False, is_digital=False) elif mode == "test": dataloader = dataset.batch_delivery('test', batch_size=batch_size, shuffle=False, is_digital=False) else: raise Exception( "Argument error! mode belongs to {\"dev\", \"test\"}.") pred_slot, real_slot = [], [] pred_intent, real_intent = [], [] for text_batch, slot_batch, intent_batch in tqdm(dataloader, ncols=50): padded_text, [sorted_slot, sorted_intent ], seq_lens, sorted_index = dataset.add_padding( text_batch, [(slot_batch, False), (intent_batch, False)], digital=False) # Because it's a visualization bug, in valid time, it doesn't matter # Only in test time will it need to restore if mode == 'test': tmp_r_slot = [[] for _ in range(len(sorted_index))] for i in range(len(sorted_index)): tmp_r_slot[sorted_index[i]] = sorted_slot[i] sorted_slot = tmp_r_slot tmp_intent = [[] for _ in range(len(sorted_index))] for i in range(len(sorted_index)): tmp_intent[sorted_index[i]] = sorted_intent[i] sorted_intent = tmp_intent real_slot.extend(sorted_slot) real_intent.extend(list(Evaluator.expand_list(sorted_intent))) digit_text = dataset.word_alphabet.get_index(padded_text) var_text = Variable(torch.LongTensor(digit_text)) if torch.cuda.is_available(): var_text = var_text.cuda() slot_idx, intent_idx = model(var_text, seq_lens, n_predicts=1) nested_slot = Evaluator.nested_list( [list(Evaluator.expand_list(slot_idx))], seq_lens)[0] if mode == 'test': tmp_r_slot = [[] for _ in range(len(sorted_index))] for i in range(len(sorted_index)): tmp_r_slot[sorted_index[i]] = nested_slot[i] nested_slot = tmp_r_slot pred_slot.extend(dataset.slot_alphabet.get_instance(nested_slot)) nested_intent = Evaluator.nested_list( [list(Evaluator.expand_list(intent_idx))], seq_lens)[0] if mode == 'test': tmp_intent = [[] for _ in range(len(sorted_index))] for i in range(len(sorted_index)): tmp_intent[sorted_index[i]] = nested_intent[i] nested_intent = tmp_intent pred_intent.extend( dataset.intent_alphabet.get_instance(nested_intent)) exp_pred_intent = Evaluator.max_freq_predict(pred_intent) return pred_slot, real_slot, exp_pred_intent, real_intent, pred_intent
def _demo_mm_inputs(num_kernels=0, input_shape=(1, 3, 300, 300), num_items=None, num_classes=1): # yapf: disable """Create a superset of inputs needed to run test or train batches. Args: input_shape (tuple): Input batch dimensions. num_items (None | list[int]): Specifies the number of boxes for each batch item. num_classes (int): Number of distinct labels a box might have. """ from mmdet.core import BitmapMasks (N, C, H, W) = input_shape rng = np.random.RandomState(0) imgs = rng.rand(*input_shape) img_metas = [{ 'img_shape': (H, W, C), 'ori_shape': (H, W, C), 'pad_shape': (H, W, C), 'filename': '<demo>.png', 'scale_factor': np.array([1, 1, 1, 1]), 'flip': False, } for _ in range(N)] gt_bboxes = [] gt_labels = [] gt_masks = [] gt_kernels = [] gt_effective_mask = [] for batch_idx in range(N): if num_items is None: num_boxes = rng.randint(1, 10) else: num_boxes = num_items[batch_idx] cx, cy, bw, bh = rng.rand(num_boxes, 4).T tl_x = ((cx * W) - (W * bw / 2)).clip(0, W) tl_y = ((cy * H) - (H * bh / 2)).clip(0, H) br_x = ((cx * W) + (W * bw / 2)).clip(0, W) br_y = ((cy * H) + (H * bh / 2)).clip(0, H) boxes = np.vstack([tl_x, tl_y, br_x, br_y]).T class_idxs = [0] * num_boxes gt_bboxes.append(torch.FloatTensor(boxes)) gt_labels.append(torch.LongTensor(class_idxs)) kernels = [] for kernel_inx in range(num_kernels): kernel = np.random.rand(H, W) kernels.append(kernel) gt_kernels.append(BitmapMasks(kernels, H, W)) gt_effective_mask.append(BitmapMasks([np.ones((H, W))], H, W)) mask = np.random.randint(0, 2, (len(boxes), H, W), dtype=np.uint8) gt_masks.append(BitmapMasks(mask, H, W)) mm_inputs = { 'imgs': torch.FloatTensor(imgs).requires_grad_(True), 'img_metas': img_metas, 'gt_bboxes': gt_bboxes, 'gt_labels': gt_labels, 'gt_bboxes_ignore': None, 'gt_masks': gt_masks, 'gt_kernels': gt_kernels, 'gt_mask': gt_effective_mask, 'gt_thr_mask': gt_effective_mask, 'gt_text_mask': gt_effective_mask, 'gt_center_region_mask': gt_effective_mask, 'gt_radius_map': gt_kernels, 'gt_sin_map': gt_kernels, 'gt_cos_map': gt_kernels, } return mm_inputs
def train(self): best_dev_slot = 0.0 best_dev_intent = 0.0 best_dev_sent = 0.0 dataloader = self.__dataset.batch_delivery('train') for epoch in range(0, self.__dataset.num_epoch): total_slot_loss, total_intent_loss = 0.0, 0.0 time_start = time.time() self.__model.train() for text_batch, slot_batch, intent_batch in tqdm(dataloader, ncols=50): padded_text, [sorted_slot, sorted_intent ], seq_lens, _ = self.__dataset.add_padding( text_batch, [(slot_batch, False), (intent_batch, False)]) sorted_intent = [ item * num for item, num in zip(sorted_intent, seq_lens) ] sorted_intent = list(Evaluator.expand_list(sorted_intent)) text_var = Variable(torch.LongTensor(padded_text)) slot_var = Variable( torch.LongTensor(list(Evaluator.expand_list(sorted_slot)))) intent_var = Variable(torch.LongTensor(sorted_intent)) if torch.cuda.is_available(): text_var = text_var.cuda() slot_var = slot_var.cuda() intent_var = intent_var.cuda() random_slot, random_intent = random.random(), random.random() if random_slot < self.__dataset.slot_forcing_rate and \ random_intent < self.__dataset.intent_forcing_rate: slot_out, intent_out = self.__model( text_var, seq_lens, forced_slot=slot_var, forced_intent=intent_var) elif random_slot < self.__dataset.slot_forcing_rate: slot_out, intent_out = self.__model(text_var, seq_lens, forced_slot=slot_var) elif random_intent < self.__dataset.intent_forcing_rate: slot_out, intent_out = self.__model( text_var, seq_lens, forced_intent=intent_var) else: slot_out, intent_out = self.__model(text_var, seq_lens) slot_loss = self.__criterion(slot_out, slot_var) intent_loss = self.__criterion(intent_out, intent_var) batch_loss = slot_loss + intent_loss self.__optimizer.zero_grad() batch_loss.backward() self.__optimizer.step() try: total_slot_loss += slot_loss.cpu().item() total_intent_loss += intent_loss.cpu().item() except AttributeError: total_slot_loss += slot_loss.cpu().data.numpy()[0] total_intent_loss += intent_loss.cpu().data.numpy()[0] time_con = time.time() - time_start print('[Epoch {:2d}]: The total slot loss on train data is {:2.6f}, intent data is {:2.6f}, cost ' \ 'about {:2.6} seconds.'.format(epoch, total_slot_loss, total_intent_loss, time_con)) change, time_start = False, time.time() dev_f1_score, dev_acc, dev_sent_acc = self.estimate( if_dev=True, test_batch=self.__batch_size) if dev_f1_score > best_dev_slot or dev_acc > best_dev_intent or dev_sent_acc > best_dev_sent: test_f1, test_acc, test_sent_acc = self.estimate( if_dev=False, test_batch=self.__batch_size) if dev_f1_score > best_dev_slot: best_dev_slot = dev_f1_score if dev_acc > best_dev_intent: best_dev_intent = dev_acc if dev_sent_acc > best_dev_sent: best_dev_sent = dev_sent_acc print( '\nTest result: slot f1 score: {:.6f}, intent acc score: {:.6f}, semantic ' 'accuracy score: {:.6f}.'.format(test_f1, test_acc, test_sent_acc)) model_save_dir = os.path.join(self.__dataset.save_dir, "model") if not os.path.exists(model_save_dir): os.mkdir(model_save_dir) torch.save(self.__model, os.path.join(model_save_dir, "model.pkl")) torch.save(self.__dataset, os.path.join(model_save_dir, 'dataset.pkl')) time_con = time.time() - time_start print('[Epoch {:2d}]: In validation process, the slot f1 score is {:2.6f}, ' \ 'the intent acc is {:2.6f}, the semantic acc is {:.2f}, cost about ' \ '{:2.6f} seconds.\n'.format(epoch, dev_f1_score, dev_acc, dev_sent_acc, time_con))
def _mask_transform(self, mask): target = self._class_to_index(np.array(mask).astype('int32')) return torch.LongTensor(np.array(target).astype('int32'))
def convert2cpu_long(gpu_matrix): return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix)
def cross_entropy_supervised_training(epochs, batches, paths, brain, \ lr, intermediate=''): """ This performs supervised training on the monkey without consulting the exact values of the qualities. Qualities out of the model go through a softmax before being compared to a one-hot vector denoting the direction the AI moved. Due to all the cuts in the data, it has become unreliable to consult the quality. Losses are reported every batch. Args: N: The number of epochs to run in training. paths: A list of paths leading to the data files. brain: The brain to train.. lr: The learning rate to use. intermediate: The file to save intermediate brain trainings to. Returns: 0: Training data in the form of list of tuples. First element is epoch number, second number is average loss over this epoch. """ # Set the brain to training mode brain.train() data_set = [] # First read all training data for path in paths: print('Reading', path) in_f = open(path, 'r') in_lines = in_f.readlines() in_f.close() # parse the input lines data = [eval(x.rstrip()) for x in in_lines] data_set += data # Calculate batch data batch_length = len(data_set) // batches # Report status print('Data loaded') # Now we do the actual learning! # Define the loss function criterion = nn.CrossEntropyLoss() # Create an optimizer optimizer = torch.optim.Adagrad(brain.parameters(), lr=lr) loss_record = [] # Iterate through epochs for epoch in range(epochs): # Permute the data to decorrelate it. random.shuffle(data_set) # Separate into batches batched_data = [] for batch_no in range(batches - 1): batch_start = batch_no * batch_length batched_data.append(data_set[batch_start:batch_start + batch_length]) batched_data.append(data_set[(batches - 1) * batch_length:]) # Iterate through data for batch_no, batch_set in enumerate(batched_data): total_loss = 0 print('Epoch', epoch, 'Batch', batch_no, 'begun') for food, action, vision in batch_set: s = (food, vision) # Get the quality of the action the monkey did predicted_Qs = brain.forward(s) # Calculate the loss loss = criterion(predicted_Qs[None], torch.LongTensor([action])) # Zero the gradients optimizer.zero_grad() # perform a backward pass loss.backward() # Update the weights optimizer.step() # Add to total loss total_loss += float(loss) # Add to loss record loss_record.append( (epoch * batches + batch_no, total_loss / batch_length)) print('Epoch', epoch, 'batch', batch_no, 'loss', total_loss / batch_length) # Save brain if intermediate != '': torch.save(brain.state_dict(), intermediate) return loss_record
def convert_to_tensor(self, symbols): return torch.LongTensor(self.get_indices(symbols))
def frcnn(train): args = parse_args() print('Called with args:') print(args) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) from model.utils.config import cfg cfg.USE_GPU_NMS = args.cuda print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) pascal_classes = np.asarray([ '___background__', u'person', u'bicycle', u'car', u'motorcycle', u'airplane', u'bus', u'train', u'truck', u'boat', u'traffic light', u'fire hydrant', u'stop sign', u'parking meter', u'bench', u'bird', u'cat', u'dog', u'horse', u'sheep', u'cow', u'elephant', u'bear', u'zebra', u'giraffe', u'backpack', u'umbrella', u'handbag', u'tie', u'suitcase', u'frisbee', u'skis', u'snowboard', u'sports ball', u'kite', u'baseball bat', u'baseball glove', u'skateboard', u'surfboard', u'tennis racket', u'bottle', u'wine glass', u'cup', u'fork', u'knife', u'spoon', u'bowl', u'banana', u'apple', u'sandwich', u'orange', u'broccoli', u'carrot', u'hot dog', u'pizza', u'donut', u'cake', u'chair', u'couch', u'potted plant', u'bed', u'dining table', u'toilet', u'tv', u'laptop', u'mouse', u'remote', u'keyboard', u'cell phone', u'microwave', u'oven', u'toaster', u'sink', u'refrigerator', u'book', u'clock', u'vase', u'scissors', u'teddy bear', u'hair drier', u'toothbrush' ]) # initilize the network here. #args.imdb_name = "coco_2014_train+coco_2014_valminusminival" # imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name) if args.net == 'vgg16': fasterRCNN = vgg16(pascal_classes, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(pascal_classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(pascal_classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(pascal_classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) if args.cuda > 0: checkpoint = torch.load(load_name) else: checkpoint = torch.load(load_name, map_location=(lambda storage, loc: storage)) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') # pdb.set_trace() print("load checkpoint %s" % (load_name)) # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda > 0: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable with torch.no_grad(): im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda > 0: cfg.CUDA = True if args.cuda > 0: fasterRCNN.cuda() fasterRCNN.eval() thresh = 0.5 webcam_num = args.webcam_num imglist = os.listdir(args.image_dir) num_images = len(imglist) print('Loaded Photo: {} images.'.format(num_images)) import json, re from tqdm import tqdm d = {} pbar = tqdm(imglist) if not train: for i in pbar: im_file = os.path.join(args.image_dir, i) # im = cv2.imread(im_file) im_name = i im_in = np.array(imread(im_file)) if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) # rgb -> bgr im = im_in[:, :, ::-1] blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.data.resize_(1, 1, 5).zero_() num_boxes.data.resize_(1).zero_() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: #Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() lis = json.load( open( '/home/nesa320/huangshicheng/gitforwork/gsnn/graph/labels.json', 'r')) sm_lis = np.zeros(len(lis)) for j in xrange(1, len(pascal_classes)): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) #cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] score = cls_dets[0][-1] try: sm_lis[lis.index(pascal_classes[j])] = score.numpy() except: pass d[re.sub("\D", "", im_name)] = sm_lis.tolist() json.dump(d, open('annotation_dict' + '.json', 'w'), indent=2) else: pass
def forward(self, nodes, to_neighs, feature_dim, num_sample=None): """ nodes --- list of nodes in a batch to_neighs --- list of sets, each set is the set of neighbors for node in batch num_sample --- number of neighbors to sample. No sampling if None. """ # Local pointers to functions (speed hack) _set = set if not num_sample is None: _sample = random.sample samp_neighs = [_set(_sample(to_neigh, num_sample, )) if len(to_neigh) >= num_sample else to_neigh for to_neigh in to_neighs] else: samp_neighs = to_neighs #print samp_neighs if self.gcn: # consider node itself samp_neighs = [samp_neigh + set([nodes[i]]) for i, samp_neigh in enumerate(samp_neighs)] unique_nodes_list = list(set.union(*samp_neighs)) #print unique_nodes_list unique_nodes = {n:i for i,n in enumerate(unique_nodes_list)} mask = np.zeros((len(samp_neighs), len(unique_nodes))) ###mask = Variable(torch.zeros(len(samp_neighs), len(unique_nodes))) flag = 0 if 0 in unique_nodes_list: flag = 1 # select indecies of the nodes who are connected in the sample set column_indices = [] for samp_neigh in samp_neighs: for n in samp_neigh: column_indices.append(unique_nodes[n]) if n == 0: remove_col = unique_nodes[n] #print remove_col ###column_indices = [unique_nodes[n] for samp_neigh in samp_neighs for n in samp_neigh] row_indices = [i for i in range(len(samp_neighs)) for j in range(len(samp_neighs[i]))] mask[row_indices, column_indices] = 1 #print mask # how many nodes deleted num = mask.size if flag == 1: mask = np.delete(mask, remove_col, 1) #print mask #print mask.size if mask.size == 0: return torch.zeros([num, feature_dim], dtype=torch.float32) else: mask = Variable(torch.FloatTensor(mask)) if self.cuda: mask = mask.cuda() # normalization num_neigh = mask.sum(1, keepdim=True) # avoid nan num_neigh[num_neigh==0] = 1 mask = mask.div(num_neigh) #print unique_nodes_list if 0 in unique_nodes_list: unique_nodes_list.remove(0) #print unique_nodes_list # select feature matrix of nodes in sample set if len(unique_nodes_list) == 0: return torch.zeros([1, feature_dim], dtype=torch.float32) else: if self.cuda: embed_matrix = self.features(torch.LongTensor(unique_nodes_list).cuda()) else: #print "xixixi" embed_matrix = self.features(torch.LongTensor(unique_nodes_list)) #print "yichiyichi" #print mask # message passing to_feats = mask.mm(embed_matrix) return to_feats
def __call__(self, batch_idx): neg_num = 2 batch_size = 2 * len( batch_idx) + neg_num #two negative sample per batch batch_input_ids = np.zeros((batch_size, self.max_seq_len), dtype=np.int64) batch_token_type_ids = np.ones((batch_size, self.max_seq_len), dtype=np.int64) batch_y_start = np.zeros((batch_size, ), dtype=np.int64) batch_y_end = np.zeros((batch_size, ), dtype=np.int64) batch_y = np.zeros((batch_size, ), dtype=np.int64) for i, pos_idx in enumerate(batch_idx): doc_id = self.id_list[pos_idx] data = self.data_dict[doc_id] # get label annotations = data['annotations'][0] if annotations['yes_no_answer'] == 'YES': batch_y[i * 2] = 4 elif annotations['yes_no_answer'] == 'NO': batch_y[i * 2] = 3 elif annotations['short_answers']: batch_y[i * 2] = 2 elif annotations['long_answer']['candidate_index'] != -1: batch_y[i * 2] = 1 batch_y[i * 2 + 1] = 0 # get positive and negative samples question_tokens = self.tokenizer.tokenize( data['question_text'])[:self.max_question_len] # positive answer_tokens, start_position, end_position = self._get_positive_input_ids( data, question_tokens) input_tokens = ['[CLS]'] + question_tokens + [ '[SEP]' ] + answer_tokens + ['[SEP]'] #if annotations['short_answers']: # print(data['question_text'],"[AAA]",input_tokens[start_position:end_position],"[BBB]",data['positive_text'][data['annotations'][0]['short_answers'][0]['start_token']-data['positive_start']:data['annotations'][0]['short_answers'][0]['end_token']-data['positive_end']]) #if annotations['short_answers']: # print(answer_tokens,"[AAA]",input_tokens[start_position:end_position]) input_ids = self.tokenizer.convert_tokens_to_ids(input_tokens) batch_input_ids[i * 2, :len(input_ids)] = input_ids batch_token_type_ids[i * 2, :len(input_ids)] = [ 0 if k <= input_ids.index(102) else 1 for k in range(len(input_ids)) ] if annotations['short_answers']: if start_position < 0 or end_position < 0: batch_y_start[i * 2] = -1 batch_y_end[i * 2] = -1 batch_y[i * 2] = -1 else: batch_y_start[i * 2] = start_position batch_y_end[i * 2] = end_position else: batch_y_start[i * 2] = start_position batch_y_end[i * 2] = end_position # negative answer_tokens, start_position, end_position = self._get_negative_input_ids( data, question_tokens) input_tokens = ['[CLS]'] + question_tokens + [ '[SEP]' ] + answer_tokens + ['[SEP]'] input_ids = self.tokenizer.convert_tokens_to_ids(input_tokens) batch_token_type_ids[i * 2 + 1, :len(input_ids)] = [ 0 if k <= input_ids.index(102) else 1 for k in range(len(input_ids)) ] batch_input_ids[i * 2 + 1, :len(input_ids)] = input_ids batch_y_start[i * 2 + 1] = start_position batch_y_end[i * 2 + 1] = end_position for i, neg_idx in enumerate(batch_idx[:neg_num]): idx = i + 2 * len(batch_idx) if idx >= batch_size: break doc_id = self.neg_id_list[neg_idx] data = self.neg_data_dict[doc_id] # get negative samples question_tokens = self.tokenizer.tokenize( data['question_text'])[:self.max_question_len] # negative answer_tokens, start_position, end_position = self._get_negative_input_ids( data, question_tokens) input_tokens = ['[CLS]'] + question_tokens + [ '[SEP]' ] + answer_tokens + ['[SEP]'] input_ids = self.tokenizer.convert_tokens_to_ids(input_tokens) batch_token_type_ids[idx, :len(input_ids)] = [ 0 if k <= input_ids.index(102) else 1 for k in range(len(input_ids)) ] batch_input_ids[idx, :len(input_ids)] = input_ids batch_y_start[idx] = start_position batch_y_end[idx] = end_position batch_attention_mask = batch_input_ids > 0 return torch.from_numpy(batch_input_ids), torch.from_numpy( batch_attention_mask), torch.from_numpy( batch_token_type_ids), torch.LongTensor( batch_y_start), torch.LongTensor( batch_y_end), torch.LongTensor(batch_y)
def get_meeting_KB(self, meetinglist): KBlist = torch.LongTensor( [self.meetingdict[meeting] for meeting in meetinglist]) KBmask = torch.Tensor( [self.meetingmask[meeting] for meeting in meetinglist]) return KBlist, KBmask.byte(), meetinglist
def test_step(model, test_triples, all_true_triples, args): ''' Evaluate the model on test or valid datasets ''' model.eval() if args.countries: #Countries S* datasets are evaluated on AUC-PR #Process test data for AUC-PR evaluation sample = list() y_true = list() for head, relation, tail in test_triples: for candidate_region in args.regions: y_true.append(1 if candidate_region == tail else 0) sample.append((head, relation, candidate_region)) sample = torch.LongTensor(sample) if args.cuda: sample = sample.cuda() with torch.no_grad(): y_score = model(sample).squeeze(1).cpu().numpy() y_true = np.array(y_true) #average_precision_score is the same as auc_pr auc_pr = average_precision_score(y_true, y_score) metrics = {'auc_pr': auc_pr} else: #Otherwise use standard (filtered) MRR, MR, HITS@1, HITS@3, and HITS@10 metrics #Prepare dataloader for evaluation test_dataloader_head = DataLoader( TestDataset( test_triples, all_true_triples, args.nentity, args.nrelation, 'head-batch' ), batch_size=args.test_batch_size, num_workers=max(1, args.cpu_num//2), collate_fn=TestDataset.collate_fn ) test_dataloader_tail = DataLoader( TestDataset( test_triples, all_true_triples, args.nentity, args.nrelation, 'tail-batch' ), batch_size=args.test_batch_size, num_workers=max(1, args.cpu_num//2), collate_fn=TestDataset.collate_fn ) test_dataset_list = [test_dataloader_head, test_dataloader_tail] logs = [] step = 0 total_steps = sum([len(dataset) for dataset in test_dataset_list]) with torch.no_grad(): for test_dataset in test_dataset_list: for positive_sample, negative_sample, filter_bias, mode in test_dataset: if args.cuda: positive_sample = positive_sample.cuda() negative_sample = negative_sample.cuda() filter_bias = filter_bias.cuda() batch_size = positive_sample.size(0) score = model((positive_sample, negative_sample), mode) score += filter_bias #Explicitly sort all the entities to ensure that there is no test exposure bias argsort = torch.argsort(score, dim = 1, descending=True) if mode == 'head-batch': positive_arg = positive_sample[:, 0] elif mode == 'tail-batch': positive_arg = positive_sample[:, 2] else: raise ValueError('mode %s not supported' % mode) for i in range(batch_size): #Notice that argsort is not ranking ranking = (argsort[i, :] == positive_arg[i]).nonzero() assert ranking.size(0) == 1 #ranking + 1 is the true ranking used in evaluation metrics ranking = 1 + ranking.item() logs.append({ 'MRR': 1.0/ranking, 'MR': float(ranking), 'HITS@1': 1.0 if ranking <= 1 else 0.0, 'HITS@3': 1.0 if ranking <= 3 else 0.0, 'HITS@10': 1.0 if ranking <= 10 else 0.0, }) if step % args.test_log_steps == 0: logging.info('Evaluating the model... (%d/%d)' % (step, total_steps)) step += 1 metrics = {} for metric in logs[0].keys(): metrics[metric] = sum([log[metric] for log in logs])/len(logs) return metrics
def __init__(self, X, n_toks): self.X = [torch.LongTensor(xx) for xx in X] self.n_toks = n_toks
def forward( self, input_tokens, encoder_out, incremental_state=None, possible_translation_tokens=None, ): if incremental_state is not None: input_tokens = input_tokens[:, -1:] bsz, seqlen = input_tokens.size() # get outputs from encoder (encoder_outs, final_hidden, final_cell, src_lengths, src_tokens) = encoder_out # embed tokens x = self.embed_tokens(input_tokens) x = F.dropout(x, p=self.dropout_in, training=self.training) # B x T x C -> T x B x C x = x.transpose(0, 1) # initialize previous states (or get from cache during incremental generation) cached_state = utils.get_incremental_state(self, incremental_state, "cached_state") if cached_state is not None: prev_hiddens, prev_cells, input_feed = cached_state else: # first time step, initialize previous states prev_hiddens, prev_cells = self._init_prev_states(encoder_out) input_feed = self.initial_attn_context.expand( bsz, self.encoder_hidden_dim) attn_scores_per_step = [] outs = [] for j in range(seqlen): # input feeding: concatenate context vector from previous time step if self.attention is not None: step_input = torch.cat((x[j, :, :], input_feed), dim=1) else: step_input = x[j, :, :] previous_layer_input = step_input for i, rnn in enumerate(self.layers): # recurrent cell hidden, cell = rnn(step_input, (prev_hiddens[i], prev_cells[i])) # hidden state becomes the input to the next layer layer_output = F.dropout(hidden, p=self.dropout_out, training=self.training) if self.residual_level is not None and i >= self.residual_level: # TODO add an assert related to sizes here step_input = layer_output + previous_layer_input else: step_input = layer_output previous_layer_input = step_input # save state for next time step prev_hiddens[i] = hidden prev_cells[i] = cell if self.attention is not None: out, step_attn_scores = self.attention( hidden, encoder_outs, src_lengths, ) input_feed = out else: combined_output_and_context = hidden step_attn_scores = Variable( torch.ones(src_lengths.shape[0], src_lengths.data.max()).type_as( encoder_outs.data, ), requires_grad=False, ).t() attn_scores_per_step.append(step_attn_scores.unsqueeze(1)) attn_scores = torch.cat(attn_scores_per_step, dim=1) # srclen x tgtlen x bsz -> bsz x tgtlen x srclen attn_scores = attn_scores.transpose(0, 2) combined_output_and_context = torch.cat((hidden, out), dim=1) # save final output outs.append(combined_output_and_context) # cache previous states (no-op except during incremental generation) utils.set_incremental_state( self, incremental_state, "cached_state", (prev_hiddens, prev_cells, input_feed), ) # collect outputs across time steps x = torch.cat(outs, dim=0).view( seqlen, bsz, self.combined_output_and_context_dim, ) # T x B x C -> B x T x C x = x.transpose(1, 0) # bottleneck layer if hasattr(self, "additional_fc"): x = self.additional_fc(x) x = F.dropout(x, p=self.dropout_out, training=self.training) output_projection_w = self.output_projection_w output_projection_b = self.output_projection_b decoder_input_tokens = input_tokens if self.training else None if self.vocab_reduction_module and possible_translation_tokens is None: possible_translation_tokens = self.vocab_reduction_module( src_tokens, decoder_input_tokens=decoder_input_tokens) if possible_translation_tokens is not None: output_projection_w = output_projection_w.index_select( dim=0, index=possible_translation_tokens) output_projection_b = output_projection_b.index_select( dim=0, index=possible_translation_tokens) # avoiding transpose of projection weights during ONNX tracing batch_time_hidden = torch.onnx.operators.shape_as_tensor(x) x_flat_shape = torch.cat( (torch.LongTensor([-1]), batch_time_hidden[2].view(1))) x_flat = torch.onnx.operators.reshape_from_tensor_shape( x, x_flat_shape) projection_flat = torch.matmul(output_projection_w, x_flat.t()).t() logits_shape = torch.cat( (batch_time_hidden[:2], torch.LongTensor([-1]))) logits = (torch.onnx.operators.reshape_from_tensor_shape( projection_flat, logits_shape) + output_projection_b) return logits, attn_scores, possible_translation_tokens
def __getitem__( self, i: int ) -> Tuple[torch.LongTensor, torch.LongTensor, torch.LongTensor]: return torch.LongTensor(self.data['sents'][i]), \ torch.LongTensor([self.data['words_per_sentence'][i]]), \ torch.LongTensor([self.data['labels'][i]])
def beam_decode_batch(decoder, decoder_hidden, outU, outB, voc, beam_size, batch_size, max_length=MAX_LENGTH): # these list should include all samples in the batch terminal_sentences, prev_top_sentences, next_top_sentences = [], [], [] for i in range(batch_size): terminal_sentences.append([]) next_top_sentences.append([]) prev_top_sentences.append([Sentence(decoder_hidden[:, i])]) for t in tqdm(range(max_length)): beam_sizes = [len(s) for s in prev_top_sentences] for j in range(max(beam_sizes)): decoder_input = [] sentence_decoder_hidden = [] for i in range(batch_size): sentence = prev_top_sentences[i][j] decoder_input.append(torch.LongTensor([sentence.last_idx])) sentence_decoder_hidden.append(sentence.decoder_hidden) decoder_input = torch.stack(decoder_input, 1) sentence_decoder_hidden = torch.stack(sentence_decoder_hidden, 1) decoder_input = decoder_input.cuda() if USE_CUDA else decoder_input decoder_output, decoder_hidden, user_decoder_attn, business_decoder_attn = decoder( decoder_input, sentence_decoder_hidden, outU, outB) decoder_output = F.softmax(decoder_output, dim=-1) topv, topi = decoder_output.data.topk(beam_size) for i in range(batch_size): sentence = prev_top_sentences[i][j] term, top = sentence.addTopk(topi[:, i:i + 1], topv[:, i:i + 1], decoder_hidden[:, i], beam_size, voc) terminal_sentences[i].extend(term) next_top_sentences[i].extend(top) for i in range(batch_size): # after adding all beams, keep beam top next_top_sentences[i].sort(key=lambda s: s.avgScore(), reverse=True) prev_top_sentences[i] = next_top_sentences[i][:beam_size] next_top_sentences[i] = [] for i in range(batch_size): terminal_sentences[i] += [ sentence.toWordScore(voc) for sentence in prev_top_sentences[i] ] terminal_sentences[i].sort(key=lambda x: x[1], reverse=True) n = min(len(terminal_sentences), 3) # keep top 3? terminal_sentences[i] = terminal_sentences[i][:n] return terminal_sentences