def load(self, fdata, use_char=False, n_context=1, max_len=10): sentences = self.preprocess(fdata) x, y, char_x, lens = [], [], [], [] for wordseq, tagseq in sentences: wiseq = [self.wdict.get(w, self.unk_wi) for w in wordseq] tiseq = [self.tdict[t] for t in tagseq] # 获取每个词汇的上下文 if n_context > 1: x.append(self.get_context(wiseq, n_context)) else: x.append(torch.tensor(wiseq, dtype=torch.long)) y.append(torch.tensor(tiseq, dtype=torch.long)) # 不足最大长度的部分用0填充 char_x.append(torch.tensor([ [self.cdict.get(c, self.unk_ci) for c in w[:max_len]] + [0] * (max_len - len(w)) for w in wordseq ])) lens.append(len(tiseq)) x = pad_sequence(x, True) y = pad_sequence(y, True) char_x = pad_sequence(char_x, True) lens = torch.tensor(lens) if use_char: dataset = TensorDataset(x, y, char_x, lens) else: dataset = TensorDataset(x, y, lens) return dataset
def postprocess_sequence(self, X): """Embed (variable-length) sequences Parameters ---------- X : list List of input sequences Returns ------- fX : numpy array Batch of sequence embeddings. """ lengths = torch.tensor([len(x) for x in X]) sorted_lengths, sort = torch.sort(lengths, descending=True) _, unsort = torch.sort(sort) sequences = [torch.tensor(X[i], dtype=torch.float32, device=self.device) for i in sort] padded = pad_sequence(sequences, batch_first=True, padding_value=0) packed = pack_padded_sequence(padded, sorted_lengths, batch_first=True) cpu = torch.device('cpu') fX = self.model(packed).detach().to(cpu).numpy() return fX[unsort]
def _process(self, index): if type(index) is list: dict_list = sorted( [self._transform(s, t) for s, t in self.dataset[index]], key=lambda x: x["num_frames"], reverse=True) spectrogram = pack_sequence([d["spectrogram"] for d in dict_list]) target_attr = pad_sequence( [d["target_attr"] for d in dict_list], batch_first=True) silent_mask = pad_sequence( [d["silent_mask"] for d in dict_list], batch_first=True) return spectrogram, target_attr, silent_mask elif type(index) is int: s, t = self.dataset[index] data_dict = self._transform(s, t) return data_dict["spectrogram"], \ data_dict["target_attr"], \ data_dict["silent_mask"] else: raise ValueError("Unsupported index type({})".format(type(index)))
def test_rnn_init_predict_split(self): model = nn.LSTM(RNN_INPUT_SIZE, RNN_HIDDEN_SIZE, 3, bidirectional=True) seq_lengths = np.random.randint(1, RNN_SEQUENCE_LENGTH + 1, size=7) seq_lengths = list(reversed(sorted(map(int, seq_lengths)))) input = [Variable(torch.randn(l, RNN_INPUT_SIZE)) for l in seq_lengths] input = rnn_utils.pad_sequence(input) # Test that we are correctly splitting between init and # predict net. When we embed parameters, there should be more # ops in the init net. mp = onnx.ModelProto.FromString(do_export(model, input, export_params=self.embed_params)[0]) prepared = c2.prepare(mp, device='CPU') if self.embed_params: assert len(prepared.init_net.op) == 1038 assert len(prepared.predict_net.op) == 101 else: assert len(prepared.init_net.op) == 27 assert len(prepared.predict_net.op) == 1112
def make_input(batch_size): seq_lengths = np.random.randint(1, RNN_SEQUENCE_LENGTH + 1, size=batch_size) seq_lengths = list(reversed(sorted(map(int, seq_lengths)))) inputs = [Variable(torch.randn(l, RNN_INPUT_SIZE)) for l in seq_lengths] inputs = rnn_utils.pad_sequence(inputs) if packed_sequence == 2: inputs = inputs.transpose(0, 1) inputs = [inputs] directions = 2 if bidirectional else 1 if initial_state: h0 = Variable(torch.randn(directions * layers, batch_size, RNN_HIDDEN_SIZE)) inputs.append(h0) if packed_sequence != 0: inputs.append(Variable(torch.IntTensor(seq_lengths))) if len(inputs) == 1: input = inputs[0] else: input = tuple(inputs) return input
def forward(self, x, char_x, lens): B, T = x.shape # 获取掩码 mask = x.gt(0) # 获取词嵌入向量 x = self.embed(x) # 获取字嵌入向量 char_x = self.char_lstm(char_x[mask]) char_x = pad_sequence(torch.split(char_x, lens.tolist()), True) # 获取词表示与字表示的拼接 x = torch.cat((x, char_x), dim=-1) x = self.drop(x) x = pack_padded_sequence(x, lens, True) x, _ = self.word_lstm(x) x, _ = pad_packed_sequence(x, True) x = self.drop(x) return self.out(x)
policy.parameters()), lr=0.01) for i in range(num_batch): policy_optimizer.zero_grad() actor_batch = [] critic_batch = [] reward_batch = [] regex = [] for j in range(batch_size): actions, reg = policy.sample_regex(max_len_regex) reward_batch.append(evaluator.evaluate(reg)) actor_outs, critic_outs, _, _ = policy.evaluate_solution(actions) actor_batch.append(actor_outs) critic_batch.append(critic_outs) regex.append(reg) actor_batch = pad_sequence(actor_batch, True) critic_batch = pad_sequence(critic_batch, True) reward_batch = (torch.FloatTensor(reward_batch) if not torch.cuda.is_available() else torch.FloatTensor(reward_batch).cuda()) reward_batch = reward_batch.unsqueeze(1) print("max_reward :", reward_batch.max()) print("regex max :", regex[int(reward_batch.argmax())]) loss = -1.0 * (actor_batch * reward_batch).sum() print("loss :", loss) loss.backward() policy_optimizer.step()
def batch_loss(self, batch, model, device, writer=None, **kwargs): lengths = torch.tensor([len(x) for x in batch['X']]) variable_lengths = len(set(lengths)) > 1 if variable_lengths: sorted_lengths, sort = torch.sort(lengths, descending=True) _, unsort = torch.sort(sort) sequences = [torch.tensor(batch['X'][i], dtype=torch.float32, device=device) for i in sort] padded = pad_sequence(sequences, batch_first=True, padding_value=0) packed = pack_padded_sequence(padded, sorted_lengths, batch_first=True) batch['X'] = packed else: batch['X'] = torch.tensor(np.stack(batch['X']), dtype=torch.float32, device=device) # forward pass fX = model(batch['X']) if variable_lengths: fX = fX[unsort] # log embedding norms if writer is not None: norm_npy = np.linalg.norm(self.to_numpy(fX), axis=1) self.log_norm_.append(norm_npy) batch['fX'] = fX batch = self.aggregate(batch) fX = batch['fX'] y = batch['y'] # pre-compute pairwise distances distances = self.pdist(fX) # sample triplets triplets = getattr(self, 'batch_{0}'.format(self.sampling)) anchors, positives, negatives = triplets(y, distances) # compute loss for each triplet losses, deltas, _, _ = self.triplet_loss( distances, anchors, positives, negatives, return_delta=True) if writer is not None: pdist_npy = self.to_numpy(distances) delta_npy = self.to_numpy(deltas) same_speaker = pdist(y.reshape((-1, 1)), metric='equal') self.log_positive_.append(pdist_npy[np.where(same_speaker)]) self.log_negative_.append(pdist_npy[np.where(~same_speaker)]) self.log_delta_.append(delta_npy) # average over all triplets return torch.mean(losses)
def pad(batch, device=torch.device('cpu')): batch_lengths = torch.tensor(_.map_(batch, len), dtype=torch.long, device=device) return (pad_sequence(batch, batch_first=True, padding_value=1).to(device), batch_lengths)
print("weights: ", weights) print("===================") test_acc=[] train_acc=[] weighted_acc = [] test_loss=[] train_loss=[] for epoch in range(100): # again, normally you would NOT do 300 epochs, it is toy data print("===================================" + str(epoch+1) + "==============================================") losses = 0 correct=0 model.train() for j, (input_lstm,input, target,seq_length) in enumerate(train_loader): if (j+1)%20==0: print("=================================Train Batch"+ str(j+1)+ str(weight)+"===================================================") model.zero_grad() input_lstm = pad_sequence(sequences=input_lstm,batch_first=True) losses_batch,correct_batch= model(input_lstm,input, target,seq_length) loss = torch.mean(losses_batch,dim=0) correct_batch=torch.sum(correct_batch,dim=0) losses += loss.item() * batch_size loss.backward() weight=model.module.state_dict()["weight"] weight=torch.exp(10*weight)/(1+torch.exp(10*weight)).item() optimizer.step() correct += correct_batch.item() accuracy=correct*1.0/((j+1)*batch_size) losses=losses / ((j+1)*batch_size) losses_test = 0 correct_test = 0 losses_test_ce=0
def stroke_embed(batch, initials, embedder, bezier_degree, bezier_degree_low, variational=False, inf_loss=False): h_initial, c_initial = initials # Redundant, but thats fine device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') # accumulate all info into these empty lists sketches_ctrlpt, sketches_ratw, sketches_st_starts, sketches_stopbits = [], [], [], [] deg_losses = [] n_strokes = [] for sk, _ in batch: # for each sketch in the batch st_starts = torch.tensor([st[0, :2] for st in sk], device=device) sk = [ torch.tensor(st[:, :-1], device=device) - st_start for st, st_start in zip(sk, st_starts) ] ls = [st.shape[0] for st in sk] sk = pad_sequence(sk, batch_first=True) sk = pack_padded_sequence(sk, ls, batch_first=True, enforce_sorted=False) if embedder.rational: emb_ctrlpt, emb_ratw = embedder(sk, h_initial, c_initial) else: if not inf_loss: emb_ctrlpt = embedder(sk, h_initial, c_initial, inf_loss=False) else: emb_ctrlpt, deg_loss = embedder(sk, h_initial, c_initial, inf_loss=True) # breakpoint() if not inf_loss: emb_ctrlpt = emb_ctrlpt[bezier_degree - bezier_degree_low] sketches_ctrlpt.append(emb_ctrlpt.view(len(ls), -1)) else: sketches_ctrlpt.append(emb_ctrlpt) deg_losses.append(deg_loss) # breakpoint() if embedder.rational: sketches_ratw.append(emb_ratw) sketches_st_starts.append(st_starts) # create stopbits stopbit = torch.zeros(len(ls), 1, device=device) stopbit[-1, 0] = 1. sketches_stopbits.append(stopbit) n_strokes.append(len(ls)) n_strokes = torch.tensor(n_strokes, device=device) if not inf_loss: sketches_ctrlpt = pad_sequence(sketches_ctrlpt, batch_first=True) if embedder.rational: sketches_ratw = pad_sequence(sketches_ratw, batch_first=True) sketches_st_starts = pad_sequence(sketches_st_starts, batch_first=True) sketches_stopbits = pad_sequence(sketches_stopbits, batch_first=True, padding_value=1.0) # For every sketch in a batch: # For every stroke in the sketch: # 1. (Control Point, Rational Weights) pair # 2. Start location of the stroke with respect to a global reference (of the sketch) if embedder.rational: return sketches_ctrlpt, sketches_ratw, sketches_st_starts, sketches_stopbits, n_strokes else: if not inf_loss: return sketches_ctrlpt, sketches_st_starts, sketches_stopbits, n_strokes else: return ( sketches_ctrlpt, deg_losses), sketches_st_starts, sketches_stopbits, n_strokes
def fit(self, X, y=None, y_for_verification=None, plot=False): # assert not self.semisupervised, "semisupervised not supported yet" self.best_delta_mi = -1 self.best_full_net = None self.best_embedding_net = None # self.final_model = None # self.final_model_trained = False self.best_n_clusters = 1 self.zero_cutoff = self.initial_zero_cutoff self.exp_dist = 0 if self.random_seed is not None: np.random.seed(self.random_seed) use_y_to_verify_performance = y_for_verification is not None self.semisupervised = self.semisupervised and y is not None if self.semisupervised and self.semisupervised_weight is None: self.semisupervised_weight = np.sum(y != -1) / y.shape[0] if self.semisupervised: n_classes = np.unique(y[y != -1]).shape[0] # because of the -1 if use_y_to_verify_performance: verify_n_classes = np.unique(y_for_verification).shape[0] self._print_with_verbosity( f"number of classes in verification set: {verify_n_classes}", 3) if self.model == "auto": self.model = self._select_model(X) if self.is_tokens: X = pad_sequence(X, padding_value=0, batch_first=True) if type(X) is not torch.Tensor: X = torch.Tensor(X) self.device = torch.device("cuda") if ( torch.cuda.is_available() and self.use_gpu) else torch.device("cpu") if self.device.type == "cpu": self._print_with_verbosity("WARNING: using CPU, may be very slow", 0, strict=True) self._print_with_verbosity(f"using torch device {self.device}", 1) self._print_with_verbosity("building dataset", 1) dataset = self._build_dataset( X, y=y if self.semisupervised else None, ) data_loader = DataLoader(dataset, shuffle=True, batch_size=self.batch_size) self.model = self.model.to(self.device) if self.optimizer_override is None: self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate) else: self.optimizer = self.optimizer_override(self.model.parameters(), lr=self.learning_rate) if self.semisupervised: label_subnet = ClusterNet(self.n_components, n_classes).to(self.device) self.semisupervised_model = FullNet(self.model, label_subnet).to(self.device) self.optimizer = optim.Adam(self.semisupervised_model.parameters(), lr=self.learning_rate) self._print_with_verbosity("training", 1) for i in self._progressbar_with_verbosity(range(self.epochs), 0, strict=True): self.model.train() self._print_with_verbosity(f"this is epoch {i}", 1) self._train_siamese_one_epoch(data_loader) self.model.eval() transformed = self.transform(X, model=self.model) self._get_exp_dist(data_loader) self._print_with_verbosity( f"found expected distance between related points as {self.exp_dist}", 3) cluster_assignments = self._cluster(transformed) self._print_with_verbosity(f"found {self.n_clusters} clusters", 1) preds = self._build_cluster_subnet(X, transformed, cluster_assignments) if use_y_to_verify_performance: nmi_score = normalized_mutual_info_score( cluster_assignments, y_for_verification, 'geometric') self._print_with_verbosity( f"NMI of cluster labels with y: {nmi_score}", 2) nmi_score = normalized_mutual_info_score( preds, y_for_verification, 'geometric') self._print_with_verbosity( f"NMI of network predictions with y: {nmi_score}", 1) if self.n_clusters == verify_n_classes: acc_score = get_accuracy(cluster_assignments, y_for_verification) self._print_with_verbosity( f"accuracy of cluster labels: {acc_score}", 2) if np.unique(preds).shape[0] == verify_n_classes: acc_score = get_accuracy(preds, y_for_verification) self._print_with_verbosity( f"accuracy of network predictions: {acc_score}", 1) else: self._print_with_verbosity( f"number of predicted classes did not match number of clusters so not computing accuracy, correct {verify_n_classes} vs {self.n_clusters}", 2) if plot: if self.n_components == 2: plot_2d(transformed, cluster_assignments, show=False, no_legend=True) if use_y_to_verify_performance: plot_2d(transformed, y_for_verification, show=False, no_legend=True) plt.show() elif self.n_components == 3: plot_3d(transformed, cluster_assignments, show=False) if use_y_to_verify_performance: plot_3d(transformed, y_for_verification, show=False) plt.show()
sequences_1 = [sequence[0] for sequence in input_variables] sequences_2 = [sequence[1] for sequence in input_variables] batch_size = len(sequences_1) # Make a tensor for the similarity scores sim_scores_2d = torch.zeros([batch_size, 2]) for j in range(batch_size): if similarity_scores[j] == 0: sim_scores_2d[j] = fake_label else: sim_scores_2d[j] = real_label sim_scores_2d = sim_scores_2d.cuda() temp = rnn.pad_sequence(sequences_1 + sequences_2) sequences_1 = temp[:, :batch_size] sequences_2 = temp[:, batch_size:] model_optimizer.zero_grad() loss_s = 0.0 optimizerG.zero_grad() loss_g = 0.0 optimizerD.zero_grad() loss_d = 0.0 loss_f = 0.0 # Initialise the hidden state and pass through the maLSTM
def to_torch(batch, **kwargs): return pad_sequence( [torch.tensor(b, dtype=torch.long) for b in batch], batch_first=False)
def collate_batch(batch): """Collate a whole batch of utterances.""" flatten = [u for s in batch for u in s] return pad_sequence(flatten, batch_first=True, padding_value=0)
stocks = stocks.set_index('symbol', drop=True) train_df = stocks.drop(test_symbols, axis=0) test_df = stocks.drop(stocks.index.difference(test_symbols), axis=0) train_symbols = train_df.index.unique().tolist() train_tensors = [] train_seq_lens = [] for sym in train_symbols: stock_data, stock_data_len = prepare_stock_data(sym) stock_data = normalize_stock_data(stock_data) stock_tensor = torch.Tensor(stock_data) train_seq_lens.append(stock_data_len) train_tensors.append(stock_tensor) X = pad_sequence(train_tensors).T.unsqueeze(-1) y = torch.Tensor(train_seq_lens) train_dataset = TensorDataset(X, y) test_seq_lens = [] test_tensors = [] for sym in test_symbols: stock_data, stock_data_len = prepare_stock_data(sym) stock_data = normalize_stock_data(stock_data) test_seq_lens.append(stock_data_len) stock_tensor = torch.Tensor(stock_data) test_tensors.append(stock_tensor) X = pad_sequence(test_tensors).T.unsqueeze(-1) y = torch.Tensor(test_seq_lens) test_dataset = TensorDataset(X, y)
hidden_state = torch.cat([ht_final[i] for i in range(ht_final.size(0))], dim=1) # apply attention hidden_state = hidden_state.unsqueeze(2) # (B, hidden * 2, 1) attention_scores = torch.bmm(out, hidden_state).squeeze(2) soft_attention_weights = F.softmax(attention_scores, 1).unsqueeze(2) # (B, L, 1) attention_out = torch.bmm(out.permute(0, 2, 1), soft_attention_weights).squeeze(2) features = torch.cat([hidden_state.squeeze(2), attention_out], dim=1) features = self.dropout_1(features) features = self.fc1(features) features = self.bn1(features) features = F.relu(features) features = self.dropout_2(features) logits = self.fc2(features) return logits if __name__ == '__main__': import torch from torch.nn.utils.rnn import pad_sequence sentences = [torch.LongTensor([2, 3, 4, 5]), torch.LongTensor([6, 7, 8]), torch.LongTensor([9, 10])] x = pad_sequence(sentences, batch_first=True, padding_value=0) masks = (x != 0).type(torch.FloatTensor) len_x = [4, 3, 2] model = TopicClassLSTM(vocab_size=11, emb_size=10, embedding_tensor=None, freeze=False, dropout=0.2, lstm_hidden=200, num_classes=16) out = model(x, len_x)
def train_model(cls, online_net, target_net, optimizer, batch, batch_size, sequence_length, gamma, use_deeper_net): # def slice_burn_in(item): # return item[:, burn_in_length:, :] # batch.state is a list of tensors of shape (seq_length, input_dim) # so seq.size()[0] = the length of the sequence lengths = np.array([seq.size()[0] for seq in batch.state]) max_length = int(np.max(lengths)) # ===== compute loss mask ===== # for example, if sequence_length == 3, then lower_triangular_matrix = # 1 0 0 # 1 1 0 # 1 1 1 # suppose lengths == np.array([2, 3, 1]), then lengths - 1 == np.array([1, 2, 0]) and # the loss_mask computed from lower_triangular_matrix[lengths-1] is # 1 1 0 # 1 1 1 # 1 0 0 # which corresponds to lengths correctly lower_triangular_matrix = np.tril( np.ones((sequence_length, sequence_length))) loss_mask = lower_triangular_matrix[ lengths - 1] # first convert from 1-based to 0-based indexing loss_mask = torch.tensor(loss_mask) # has shape (bs, seq_len) if use_deeper_net: states = pad_sequence(batch.state, batch_first=True) next_states = pad_sequence(batch.next_state, batch_first=True) else: states = pack_padded_sequence(pad_sequence(batch.state, batch_first=True), lengths=lengths, batch_first=True, enforce_sorted=False) next_states = pack_padded_sequence(pad_sequence(batch.next_state, batch_first=True), lengths=lengths, batch_first=True, enforce_sorted=False) # max_length == sequence_length most of the times, but not always actions = pad_sequence(batch.action, batch_first=True).view( batch_size, max_length, -1).long() # has shape (bs, seq_len, 1) rewards = pad_sequence(batch.reward, batch_first=True).view( batch_size, max_length, -1) # has shape (bs, seq_len, 1) masks = pad_sequence(batch.mask, batch_first=True).view( batch_size, max_length, -1) # has shape (bs, seq_len, 1) h0 = torch.stack([ rnn_state[0, 0, :] for rnn_state in batch.rnn_state ]).unsqueeze(0).detach() # has shape (1, bs, hidden_size) c0 = torch.stack([ rnn_state[0, 1, :] for rnn_state in batch.rnn_state ]).unsqueeze(0).detach() # has shape (1, bs, hidden_size) h1 = torch.stack([ rnn_state[1, 0, :] for rnn_state in batch.rnn_state ]).unsqueeze(0).detach() # has shape (1, bs, hidden_size) c1 = torch.stack([ rnn_state[1, 1, :] for rnn_state in batch.rnn_state ]).unsqueeze(0).detach() # has shape (1, bs, hidden_size) # states = torch.stack(batch.state).view(batch_size, sequence_length, online_net.num_inputs) # next_states = torch.stack(batch.next_state).view(batch_size, sequence_length, online_net.num_inputs) # actions = torch.stack(batch.action).view(batch_size, sequence_length, -1).long() # rewards = torch.stack(batch.reward).view(batch_size, sequence_length, -1) # masks = torch.stack(batch.mask).view(batch_size, sequence_length, -1) # rnn_state = torch.stack(batch.rnn_state).view(batch_size, sequence_length, 2, -1) # [h0, c0] = rnn_state[:, 0, :, :].transpose(0, 1) # h0 = h0.unsqueeze(0).detach() # c0 = c0.unsqueeze(0).detach() # [h1, c1] = rnn_state[:, 1, :, :].transpose(0, 1) # h1 = h1.unsqueeze(0).detach() # c1 = c1.unsqueeze(0).detach() pred, _ = online_net(states, (h0, c0), inference=False, max_length=max_length, lengths=lengths) next_pred, _ = target_net(next_states, (h1, c1), inference=False, max_length=max_length, lengths=lengths) # if burn_in_length > 0: # pred = slice_burn_in(pred) # next_pred = slice_burn_in(next_pred) # actions = slice_burn_in(actions) # rewards = slice_burn_in(rewards) # masks = slice_burn_in(masks) pred = pred.gather(2, actions).squeeze() # has shape (bs, seq_len) target = rewards + masks * gamma * next_pred.max(2, keepdim=True)[0] target = target.squeeze() # has shape (bs, seq_len) loss = torch.mean(((pred - target.detach())**2) * loss_mask.float()) # loss = F.mse_loss(pred, target.detach()) optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(online_net.parameters(), 1.0) optimizer.step() return loss
def padded_collate(batch): # Sort batch by the longest sequence desc batch.sort(key=lambda sequence: len(sequence[3]), reverse=True) graph_ids, targetss, char_indices, *index_groups = zip(*batch) #print("sens",[(i.shape, j.shape) for i,j in zip(index_groups[0], index_groups[-1])]) # The number of words in each sequence seq_lengths = torch.LongTensor( [len(indices) for indices in index_groups[0]]) max_word_count = seq_lengths[0] #print(max_word_count, targets[0].shape, len(heads[0]), len(index_groups[0][0])) padded_targetss = [] # when only having a primary (no other) loss unpadding_mask = None targetss = tuple(zip(*targetss)) #print(seq_lengths) #print(len(targetss[0])) #print([x.shape for x in targetss[0]]) for targets in targetss: if not targets[0] is None: padded_targets = torch.zeros( len(seq_lengths), max_word_count, max_word_count, dtype=torch.long) if unpadding_mask is None: #unpadding_mask = torch.zeros_like(padded_targets, dtype=torch.uint8) unpadding_mask = torch.zeros_like(padded_targets, dtype=torch.bool) for i, target in enumerate(targets): padded_targets[i, :seq_lengths[i], :seq_lengths[i]] = target unpadding_mask[i, :seq_lengths[i], :seq_lengths[i]] = 1 else: padded_targets = None unpadding_mask = None padded_targetss.append(padded_targets) # Batch specific word vocabulary where each word # is expressed by its character indices batch_voc = list({word for sentence in char_indices for word in sentence}) batch_voc.append(PAD_WORD) batch_voc.sort(key=lambda word: len(word), reverse=True) voc_lengths = torch.LongTensor([len(word) for word in batch_voc]) voc_lookup = {word: i for i, word in enumerate(batch_voc)} batch_voc = pad_sequence([torch.LongTensor(tup) for tup in batch_voc], batch_first=True) index_mapping = torch.full( size=(len(batch), max_word_count), fill_value=voc_lookup[PAD_WORD], dtype=torch.long) # Map each word in the batch to an index in the char word vocabulary for i, sentence in enumerate(char_indices): for j, word in enumerate(sentence): index_mapping[i, j] = voc_lookup[word] padded = PaddedBatch( graph_ids, padded_targetss, unpadding_mask, CharContainer(index_mapping, batch_voc, voc_lengths), seq_lengths, [pad_sequence(indices, batch_first=True) for indices in index_groups]) return padded
def test_case4(self): device = torch.device('cuda', device_id) # combine case1 to case3 to a minibatch # the first example (a): input_length: 1, label_length: 1 # the second example (c, c): input_length: 3, label_length: 2 # the third example (b, c): input_length: 3, label_length: 2 label_lengths_tensor = torch.tensor([1, 2, 2], dtype=torch.int32) input_lengths_tensor = torch.tensor([1, 3, 3], dtype=torch.int32) alphabet_size = 5 minibatch = 3 info = ctc.CtcOptions() info.loc = ctc.CtcComputeLocation.CTC_GPU info.blank_label = 0 label_lengths = kaldi.IntSubVectorFromDLPack( to_dlpack(label_lengths_tensor)) input_lengths = kaldi.IntSubVectorFromDLPack( to_dlpack(input_lengths_tensor)) status, size_in_bytes = ctc.GetWorkspaceSize( label_lengths=label_lengths, input_lengths=input_lengths, alphabet_size=alphabet_size, minibatch=minibatch, info=info) self.assertEqual(status, ctc.CtcStatus.CTC_STATUS_SUCCESS) num_floats = size_in_bytes // 4 + 1 workspace_tensor = torch.empty( num_floats, dtype=torch.float32).contiguous().to(device) ex1 = torch.tensor([[0.2, 0.2, 0.2, 0.2, 0.2]], dtype=torch.float32) ex2 = torch.tensor( [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]], dtype=torch.float32) ex3 = torch.tensor([[-5, -4, -3, -2, -1], [-10, -9, -8, -7, -6], [-15, -14, -13, -12, -11]], dtype=torch.float32) activations_tensor = pad_sequence([ex1, ex2, ex3], batch_first=False) activations_tensor = activations_tensor.contiguous().view(-1).to(device) gradients_tensor = torch.empty_like(activations_tensor) # labels are: (a), (c, c) (b, c) # which are: (1), (3, 3), (2, 3) flat_labels_tensor = torch.tensor([1, 3, 3, 2, 3], dtype=torch.int32) costs_tensor = torch.empty(minibatch, dtype=torch.float32) activations = kaldi.CuSubVectorFromDLPack(to_dlpack(activations_tensor)) gradients = kaldi.CuSubVectorFromDLPack(to_dlpack(gradients_tensor)) flat_labels = kaldi.IntSubVectorFromDLPack( to_dlpack(flat_labels_tensor)) costs = kaldi.FloatSubVectorFromDLPack(to_dlpack(costs_tensor)) workspace = kaldi.CuSubVectorFromDLPack(to_dlpack(workspace_tensor)) status = ctc.ComputeCtcLossGpu(activations=activations, gradients=gradients, flat_labels=flat_labels, label_lengths=label_lengths, input_lengths=input_lengths, alphabet_size=alphabet_size, minibatch=minibatch, costs=costs, workspace=workspace, options=info) self.assertAlmostEqual(costs[0], 1.6094379425049) self.assertAlmostEqual(costs[1], 7.355742931366) self.assertAlmostEqual(costs[2], 4.938850402832, places=6)
def collate(batch): return [ pad_sequence(x, batch_first=True).to(dtype=torch.long) for x in zip(*batch) ]
def pad_collate(batch): sent, label = zip(*batch) sent_pad = pad_sequence(sent, batch_first=True, padding_value=0) return sent_pad, torch.LongTensor(label)
def collate(batch): q_tokens, qids, qrel_sets = zip(*batch) padded = pad_sequence(q_tokens, batch_first=True).to(dtype=torch.long) return padded, qids, qrel_sets
def _process_data_batch(self, data_batch): # pad the sequences, each seq must be (L, *) seq_lens = [len(x) for x in data_batch] seq_batch = pad_sequence(data_batch, batch_first=True) return seq_batch.unsqueeze(1).cuda(), seq_lens
def collate_fn(samples): # samples: [(seq_len, channel), ...] samples = pad_sequence(samples, batch_first=True) # samples: (batch_size, max_len, channel) return samples.transpose(-1, -2).contiguous()
def forward(self, features, labels, records, logger, prefix, global_step, **kwargs): """ This function will be used in both train/dev/test, you can use self.training (bool) to control the different behavior for training or evaluation (dev/test) Args: features: list of unpadded features [feat1, feat2, ...] each feat is in torch.FloatTensor and already put in the device assigned by command-line args your_other_contents1, ... : in the order defined by your dataloader (dataset + collate_fn) these are all in cpu, and you can move them to the same device as features records: defaultdict(list), by dumping contents into records, these contents can be averaged and logged on Tensorboard later by self.log_records Note1. downstream/runner.py will call self.log_records 1. every log_step during training 2. once after evalute the whole dev/test dataloader Note2. log_step is defined in your downstream config logger: Tensorboard SummaryWriter, given here for logging/debugging convenience please use f'{prefix}your_content_name' as key name to log your customized contents prefix: used to indicate downstream and train/test on Tensorboard eg. 'phone/train-' global_step: global_step in runner, which is helpful for Tensorboard logging Return: loss: the loss to be optimized, should not be detached a single scalar in torch.FloatTensor """ features_pad = pad_sequence(features, batch_first=True) attention_mask = [torch.ones((feature.shape[0])) for feature in features] attention_mask_pad = pad_sequence(attention_mask,batch_first=True) attention_mask_pad = (1.0 - attention_mask_pad) * -100000.0 features_pad = self.connector(features_pad) predicted = self.model(features_pad, attention_mask_pad.cuda()) labels = torch.LongTensor(labels).to(features_pad.device) loss = self.objective(predicted, labels) predicted_classid = predicted.max(dim=-1).indices records['acc'] += (predicted_classid == labels).view(-1).cpu().float().tolist() if not self.training: # some evaluation-only processing, eg. decoding pass return loss
def collect(sequences): sequences = sorted(sequences, key = lambda x: x.size(), reverse = True) mols = [seq for seq in sequences] lengths = [seq.size(0) for seq in mols] return pad_sequence(mols,batch_first=True), lengths
def collate_fn(batch): token, label = zip(*batch) label = torch.tensor(label) token = pad_sequence(token, batch_first=True) return token, label
def forward(self, mode, features, utter_idx, labels, records, **kwargs): """ Args: features: the features extracted by upstream put in the device assigned by command-line args labels: the speaker labels records: defaultdict(list), by appending scalars into records, these scalars will be averaged and logged on Tensorboard logger: Tensorboard SummaryWriter, given here for logging/debugging convenience, please use "self.downstream/your_content_name" as key name to log your customized contents global_step: global_step in runner, which is helpful for Tensorboard logging Return: loss: the loss to be optimized, should not be detached """ features_pad = pad_sequence(features, batch_first=True) if self.modelrc['module'] == "XVector": # TDNN layers in XVector will decrease the total sequence length by fixed 14 attention_mask = [ torch.ones((feature.shape[0] - 14)) for feature in features ] else: attention_mask = [ torch.ones((feature.shape[0])) for feature in features ] attention_mask_pad = pad_sequence(attention_mask, batch_first=True) attention_mask_pad = (1.0 - attention_mask_pad) * -100000.0 features_pad = self.connector(features_pad) if mode == 'train': agg_vec = self.model(features_pad, attention_mask_pad.cuda()) labels = torch.LongTensor(labels).to(features_pad.device) loss = self.objective(agg_vec, labels) records['loss'].append(loss.item()) return loss elif mode in ['dev', 'test']: agg_vec = self.model.inference(features_pad, attention_mask_pad.cuda()) agg_vec = torch.nn.functional.normalize(agg_vec, dim=-1) # separate batched data to pair data. vec1, vec2 = self.separate_data(agg_vec) names1, names2 = self.separate_data(utter_idx) scores = self.score_fn(vec1, vec2).cpu().detach().tolist() records['scores'].extend(scores) records['labels'].extend(labels) records['pair_names'].extend( [f"{name1}_{name2}" for name1, name2 in zip(names1, names2)]) return torch.tensor(0)
def collect_fn_quadkey(batch, data_source, sampler, region_processer, loc2quadkey=None, k=5, with_trg_quadkey=True): src, trg = zip(*batch) user, loc, time, region = [], [], [], [] data_size = [] trg_ = [] trg_probs_ = [] for e in src: u_, l_, t_, r_, b_ = zip(*e) data_size.append(len(u_)) user.append(torch.tensor(u_)) loc.append(torch.tensor(l_)) time.append(torch.tensor(t_)) r_ = region_processer.numericalize(list(r_)) # (L, LEN_QUADKEY) region.append(r_) user_ = pad_sequence(user, batch_first=True) # (N,T) 下同,返回时通过.t()变为(T,N) loc_ = pad_sequence(loc, batch_first=True) time_ = pad_sequence(time, batch_first=True) # (T, N, LEN_QUADKEY) region_ = pad_sequence(region, batch_first=False) if with_trg_quadkey: batch_trg_regs = [] for i, seq in enumerate(trg): pos = torch.tensor([[e[1]] for e in seq]) neg, probs = sampler(seq, k, user=seq[0][0]) # (L, k+1), k即为负采样的k trg_seq = torch.cat([pos, neg], dim=-1) trg_.append(trg_seq) trg_regs = [] for trg_seq_idx in range(trg_seq.size(0)): regs = [] for loc in trg_seq[trg_seq_idx]: regs.append(loc2quadkey[loc]) trg_regs.append(region_processer.numericalize(regs)) batch_trg_regs.append(torch.stack(trg_regs)) trg_probs_.append(probs) # (N, T, k+1, LEN_QUADKEY) batch_trg_regs = pad_sequence(batch_trg_regs, batch_first=True) # [(1+k) * T, N, LEN_QUADKEY) batch_trg_regs = batch_trg_regs.permute(2, 1, 0, 3).contiguous().view( -1, batch_trg_regs.size(0), batch_trg_regs.size(3)) trg_ = pad_sequence(trg_, batch_first=True) trg_probs_ = pad_sequence(trg_probs_, batch_first=True, padding_value=1.0) trg_ = trg_.permute(2, 1, 0).contiguous().view(-1, trg_.size(0)) trg_nov_ = [[not e[-1] for e in seq] for seq in trg] return user_.t(), loc_.t(), time_.t( ), region_, trg_, batch_trg_regs, trg_nov_, trg_probs_, data_size else: for i, seq in enumerate(trg): pos = torch.tensor([[e[1]] for e in seq]) neg, probs = sampler(seq, k, user=seq[0][0]) trg_.append(torch.cat([pos, neg], dim=-1)) trg_probs_.append(probs) trg_ = pad_sequence(trg_, batch_first=True) trg_probs_ = pad_sequence(trg_probs_, batch_first=True, padding_value=1.0) trg_ = trg_.permute(2, 1, 0).contiguous().view(-1, trg_.size(0)) trg_nov_ = [[not e[-1] for e in seq] for seq in trg] return user_.t(), loc_.t(), time_.t( ), region_, trg_, trg_nov_, trg_probs_, data_size
def forward(self, sentences, doc_lens=[], batch_sent_lens=[], log=False): word_embeddings = self.embedding(sentences) batch_len, word_len, embedding_dim = word_embeddings.shape packed_word_embeddings = pack_padded_sequence( word_embeddings, torch.LongTensor(batch_sent_lens), batch_first=True, enforce_sorted=False, ) packed_word_encoder_hidden_states = self.word_encoder(packed_word_embeddings) word_encoder_hidden_states, _ = pad_packed_sequence( packed_word_encoder_hidden_states, batch_first=True ) word_attention_weights = self.word_attention(word_encoder_hidden_states) sentence_vectors = torch.squeeze( torch.matmul( torch.unsqueeze(word_attention_weights, 1), word_encoder_hidden_states ) ) n = 0 batch_sentence_vectors = [] for doc_len in doc_lens: batch_sentence_vectors.append(sentence_vectors[n:n+doc_len]) n += doc_len padded_sentence_vectors = pad_sequence(batch_sentence_vectors, batch_first=True) packed_sentence_vectors = pack_padded_sequence( padded_sentence_vectors, torch.LongTensor(doc_lens), batch_first=True, enforce_sorted=False, ) packed_sentence_encoder_hidden_states = self.sentence_encoder(packed_sentence_vectors) sentence_encoder_hidden_states, _ = pad_packed_sequence( packed_sentence_encoder_hidden_states, batch_first=True ) sentence_attention_weights = self.sentence_attention( sentence_encoder_hidden_states ) document_vectors = torch.squeeze( torch.matmul( torch.unsqueeze(sentence_attention_weights, 1), sentence_encoder_hidden_states, ) ) batch_probs = [] Cs = [] Ms = [] Ns = [] Ps = [] Pros = [] for doc_index, doc_len in enumerate(doc_lens): o = torch.zeros( 2 * self.hparams.lstm_hidden_size, device=self.device, ) document_vector = document_vectors[doc_index] probs = [] for pos in range(doc_len): sentence_vector = padded_sentence_vectors[doc_index, pos, :] C = self.content(sentence_vector) M = self.salience(sentence_vector, document_vector) N = self.novelty(sentence_vector, torch.tanh(o)) pos_forward = self.pos_forward_embed( torch.tensor([pos], dtype=torch.long, device=self.device) ).view(-1) pos_backward = self.pos_backward_embed( torch.tensor( [doc_len - pos - 1], dtype=torch.long, device=self.device, ) ).view(-1) positional_embedding = torch.cat((pos_forward, pos_backward)) P = self.position(positional_embedding) prob = torch.sigmoid(C + M - N + P + self.bias) if doc_index == 0: Cs.append(C.item()) Ms.append(M.item()) Ns.append(N.item()) Ps.append(P.item()) Pros.append(prob.item()) # print(C, M, N, P, prob) o = o + (prob * sentence_vector) if log: print( f"doc {doc_index+1}, sentence {pos+1}, C: {C.item():10.4f}, M: {M.item():10.4f}, N: {N.item():10.4f}, bias: {self.bias.item():10.4f}, prob: {prob.item():10.4f}, o: {o}" ) probs.append(prob) batch_probs.append(torch.cat(probs)) return pad_sequence(batch_probs, batch_first=True)
def collate(examples: List[torch.Tensor]): if tokenizer._pad_token is None: return pad_sequence(examples, batch_first=True) return pad_sequence(examples, batch_first=True, padding_value=tokenizer.pad_token_id)
def _collate_fn(self, data): word = pad_sequence([x[0] for x in data], batch_first=True, padding_value=0) label = pad_sequence([x[-1] for x in data], batch_first=True, padding_value=0) return word, label
stats_loss = [] t0 = time.time() for i in range(max_iter): encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() # data batch : TODO - Loader needed rnd_idx = np.random.randint(len(libri), size=batch_size) batches = [libri[r] for r in rnd_idx] xbs = [torch.tensor(b[0]) for b in batches] # t,d xlen = torch.tensor([get_padded_len(_x, n_conv_layers) for _x in xbs]).cuda() #xlen = torch.tensor([len(_x) for _x in xbs]).cuda() x_batches = pad_sequence(xbs).cuda() ybs = [b[1][:-1] for b in batches] y_inputs = pad_sequence(ybs, padding_value=EOS_TOKEN).long().cuda() ybs = [b[1][1:] for b in batches] y_outputs = pad_sequence(ybs, padding_value=PAD_TOKEN).long().cuda() out_enc = encoder(x_batches) h_enc = torch.zeros([batch_size, h_dim]).cuda() # model forward path # out_enc, h_enc = encoder(x_batches, xlen) prediction, ce_loss = decoder(y_inputs, y_outputs, out_enc, h_enc, i) ce_loss.backward() _ = nn.utils.clip_grad_norm_(encoder.parameters(), grad_clip)
def test_step(self, batch, batch_idx): sent_order = [] sum_ids = [] sum_sent_lens = [] sum_sent_toks = [] sum_len = 0 source_ids_flat_pad, sum_ids_flat_pad, target_dist, counts, masks, metadata = batch gold_sent_order = list(np.argsort(tens_to_np(-target_dist.squeeze()))) num_sents = len(metadata['source_sents'][0]) mrn = metadata['mrn'][0] rel_ranks = [] account = metadata['account'][0] for _ in range(min(num_sents, MAX_GEN_SUM_SENTS)): i0 = source_ids_flat_pad.to(self.device_name) i1 = sum_ids_flat_pad.to(self.device_name) i2 = {} i3 = {} for k, v in counts.items(): i2[k] = v.to(self.device_name) for k, v in masks.items(): i3[k] = v.to(self.device_name) y_hat_scores = self(i0, i1, i2, i3) y_hat_scores = tens_to_np(y_hat_scores.squeeze(0)) if len(sent_order) > 0: y_hat_scores[sent_order] = float('-inf') max_idx = np.argmax(y_hat_scores) rel_ranks.append(gold_sent_order.index(max_idx)) sent_sum_len = counts['source_sent_lens_flat'][max_idx] sum_len += sent_sum_len if sum_len > MAX_GEN_SUM_TOK_CT: break sent_order.append(max_idx) chosen_sent_toks = metadata['source_sents'][0][max_idx] sum_sent_toks.append(chosen_sent_toks) num_sum_sents = len(sent_order) chosen_sent_ids = list( tens_to_np(source_ids_flat_pad[max_idx][:sent_sum_len])) sum_ids.append(chosen_sent_ids) sum_sent_lens.append(sent_sum_len) sum_ids_flat = list(map(torch.LongTensor, sum_ids)) sum_ids_flat_pad = pad_sequence(sum_ids_flat, batch_first=True, padding_value=0) sum_att_mask = mask_2D([num_sum_sents]) counts['sum_sent_lens_flat'] = torch.LongTensor(sum_sent_lens) counts['sum_lens'] = torch.LongTensor([len(sent_order)]) masks['sum_att_mask'] = sum_att_mask result = pl.EvalResult() result.mrn = mrn result.account = account result.sent_order = ','.join([str(s) for s in sent_order]) result.sum_sent_toks = ' <s> '.join(sum_sent_toks) result.reference = metadata['reference'][0] result.rel_r1 = rel_ranks[0] result.rel_r2 = rel_ranks[1] result.rel_r3 = rel_ranks[2] result.rel_r4 = rel_ranks[3] result.rel_r5 = rel_ranks[4] result.rel_r5plus = sum(rel_ranks[5:]) / float(len(rel_ranks[5:])) return result
def __call__(self, batch): dat = pd.DataFrame(batch) return [self.pad_collate(dat[i]) if i==0 else \ pad_sequence(dat[i], True) if i < 7 else \ dat[i].tolist() for i in dat]
embeddings.weight.data[idx] = (torch.FloatTensor(values)) model = BiLSTM_CRF(len(word_to_ix), tag_to_ix, EMBEDDING_DIM, HIDDEN_DIM, BS).to(device) load_fastext_embeeding(model.word_embeds, word_to_ix, "wiki-news-300d-1M.vec") model.word_embeds.requires_grad = False optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4) best_f1 = -1 for epoch in range(epochs): for i, batch in enumerate(train_dataloader): model.zero_grad() sents, labs, lens = batch sents = pad_sequence(sents, batch_first=True).to(device) labs = pad_sequence(labs, batch_first=True).to(device) lens = torch.tensor(lens).to(device) lens, idx = torch.sort(lens, descending=True) sents = sents[idx] labs = labs[idx] loss = model.neg_log_likelihood(sents, labs, lens) loss.backward() optimizer.step() score, preds = model(sents, lens) true_labs = [id2lab(labs[i, :l]) for i, l in enumerate(lens)] pred_labs = [id2lab(preds[i, :l]) for i, l in enumerate(lens)] acc = accuracy_score(true_labs, pred_labs) f1 = f1_score(true_labs, pred_labs) print( "Epoch {}, batch {}, train loss {:.4f}, train acc {:.4f}, train f1 {:.4f} "