def _filter_labels(self, start_ids: torch.LongTensor, end_ids: torch.LongTensor, predicates: torch.LongTensor, srls: List[List[SRLSpan]]) -> torch.LongTensor: batch_size, num_spans = start_ids.size() num_predicates = predicates.size(1) device = start_ids.device start_ids = start_ids.cpu().numpy() end_ids = end_ids.cpu().numpy() predicates = predicates.cpu().numpy() batch_predicates = [{pred: idx for idx, pred in enumerate(preds)} for preds in predicates] batch_spans = [{(l, r): idx for idx, (l, r) in enumerate(zip(starts, ends))} for starts, ends in zip(start_ids, end_ids)] gold_labels = torch.zeros(batch_size, num_predicates * num_spans, dtype=torch.long) for b_idx in range(batch_size): for srl in srls[b_idx]: span_idx = batch_spans[b_idx].get((srl.start, srl.end), None) predicate_idx = batch_predicates[b_idx].get( srl.predicate, None) if span_idx is not None and predicate_idx is not None: label_idx = predicate_idx * num_spans + span_idx gold_labels[b_idx, label_idx] = self.label_vocab[srl.label] gold_labels = gold_labels.to(device=device) return gold_labels
def _tensor_to_poem(self, src: torch.LongTensor, trg: torch.LongTensor) -> List[str]: # src: [src_seq_len, batch_size] # trg: [trg_seq_len, batch_size] func_src_itos = np.vectorize(lambda x: self.vocab_input.itos[x]) func_trg_itos = np.vectorize(lambda x: self.vocab_output.itos[x]) array_poem_src = func_src_itos(src.cpu().numpy().T) array_poem_trg = func_trg_itos(trg.cpu().numpy().T) batch_size = array_poem_trg.shape[0] array_output_init_token = func_trg_itos( np.ones((batch_size, 1), dtype=np.int64) * self.vocab_output[self.ct.output_init_token]) array_split_poem = np.concatenate( [array_poem_src, array_output_init_token, array_poem_trg], axis=1) # array_split_poem: [batch_size, src_seq_len + trg_seq_len] def func_poem_str(row): poem = "".join( filter( lambda word: word not in (self.ct.pad_token, self.ct.input_init_token), row)) poem = poem.split( self.ct.output_eos_token)[0] # 可能预测出多个<EOP>,只取第一个EOP前的作为答案 return poem poem_list = np.apply_along_axis(func1d=func_poem_str, axis=1, arr=array_split_poem).tolist() return poem_list
def __call__(self, outputs: torch.Tensor, targets: torch.LongTensor, masks: torch.LongTensor = None, relevant_ignored: torch.LongTensor = None, irrelevant_ignored: torch.LongTensor = None) -> None: if not len(outputs.shape) == len(targets.shape): targets = torch.unsqueeze(targets, 1) targets = torch.zeros_like(outputs).scatter_(1, targets, 1) if relevant_ignored is None: relevant_ignored = np.zeros(outputs.shape[0]) else: relevant_ignored = relevant_ignored.cpu().numpy() if irrelevant_ignored is None: irrelevant_ignored = np.zeros(outputs.shape[0]) else: irrelevant_ignored = irrelevant_ignored.cpu().numpy() if masks is None: masks = torch.ones_like(targets).int() else: # try converting to int masks = masks.int() for output, target, mask, rel_ignored, irrel_ignored in zip( outputs, targets, masks, relevant_ignored, irrelevant_ignored): valid_docs = torch.sum(mask) output, target = output[:valid_docs], target[:valid_docs] output, target = paired_sort(output, target) output = self._cutoff(output) confusion = self.confusion_matrix(output, target) if torch.sum(target).cpu().numpy() + rel_ignored == 0: if self.version == 'tuning': # ignore both miss and false alarm when tuning pass elif self.version == 'program': # ignore miss when calculating program target false_alarm = confusion[0, 1] / float( sum(confusion[0, :]) + irrel_ignored) self.false_alarm.append(false_alarm) else: miss = (confusion[1, 0] + rel_ignored ) / float(sum(confusion[1, :]) + rel_ignored) false_alarm = confusion[0, 1] / float( sum(confusion[0, :]) + irrel_ignored) self.false_alarm.append(false_alarm) self.miss.append(miss)
def kNN( pos:torch.Tensor, edges:torch.LongTensor, neighbors_num:int=256, cutoff:int=3): device = pos.device if len(pos.shape)!= 2 or pos.shape[1] != 3: raise ValueError("The vertices matrix must have shape [n,3] and type float!") if len(edges.shape) != 2 or edges.shape[1] != 2 or edges.dtype != torch.long: raise ValueError("The edge index matrix must have shape [m,2] and type long!") n = pos.shape[0] m = edges.shape[0] k = neighbors_num edge_index = edges.cpu().clone().detach().numpy() # they are all necessary unfortunately graph = nx.Graph() graph.add_nodes_from(range(n)) graph.add_edges_from(edge_index) N = np.zeros([n,k], dtype=float) spiral = nx.all_pairs_shortest_path(graph, cutoff=cutoff) for node_idx, neighborhood in spiral: if len(neighborhood) < k: raise RuntimeError("Node {} has only {} neighbours, increase cutoff value!".format(node_idx, len(neighborhood))) for i, neighbour_idx in enumerate(neighborhood.keys()): if i >= k: break else: N[node_idx, i] = neighbour_idx node_neighbours_matrix = torch.tensor(N, device=device, dtype=torch.long) return node_neighbours_matrix
def marginal_score( entity_relation_batch: torch.LongTensor, per_entity: Optional[scipy.sparse.csr_matrix], per_relation: Optional[scipy.sparse.csr_matrix], num_entities: int, ) -> torch.FloatTensor: """Shared code for computing entity scores from marginals.""" batch_size = entity_relation_batch.shape[0] # base case if per_entity is None and per_relation is None: return torch.full(size=(batch_size, num_entities), fill_value=1 / num_entities) e, r = entity_relation_batch.cpu().numpy().T if per_relation is not None and per_entity is None: scores = per_relation[r] elif per_relation is None and per_entity is not None: scores = per_entity[e] elif per_relation is not None and per_entity is not None: e_score = per_entity[e] r_score = per_relation[r] scores = e_score.multiply(r_score) scores = sklearn_normalize(scores, norm="l1", axis=1) else: raise AssertionError # for mypy # note: we need to work with dense arrays only to comply with returning torch tensors. Otherwise, we could # stay sparse here, with a potential of a huge memory benefit on large datasets! return torch.from_numpy(scores.todense())
def batch_translate( self, src: torch.LongTensor, src_mask: torch.LongTensor, ): """ Translates a minibatch of inputs from source language to target language. Args: src: minibatch of inputs in the src language (batch x seq_len) src_mask: mask tensor indicating elements to be ignored (batch x seq_len) Returns: translations: a list strings containing detokenized translations inputs: a list of string containing detokenized inputs """ mode = self.training try: self.eval() src_hiddens = self.encoder(input_ids=src, encoder_mask=src_mask) beam_results = self.beam_search(encoder_hidden_states=src_hiddens, encoder_input_mask=src_mask) beam_results = self.filter_predicted_ids(beam_results) translations = [self.decoder_tokenizer.ids_to_text(tr) for tr in beam_results.cpu().numpy()] inputs = [self.encoder_tokenizer.ids_to_text(inp) for inp in src.cpu().numpy()] if self.target_processor is not None: translations = [ self.target_processor.detokenize(translation.split(' ')) for translation in translations ] if self.source_processor is not None: inputs = [self.source_processor.detokenize(item.split(' ')) for item in inputs] finally: self.train(mode=mode) return inputs, translations
def remove_features(self, context: "DynamicModule", remaining_offsets: LongTensor, log: GarbageCollectionLog) -> None: """Remove features from this FeatureBag by specifing the ones to keep Parameters ---------- context The module requesting to remove the features remaining_offsets The offsets (relative to the module) to keep log The garbage collector log to register any change made to the parameters """ assert context in self.module_awareness, ( "The module requesting the feature removal is not a listener") # We do not care about the torch format here remaining_offsets = remaining_offsets.cpu().numpy() # Compute the features from the offsets in context remaining_features = self.offsets_to_features(context, remaining_offsets) if len(remaining_features) == len(self.latest_features): # We are effectively not modifing the features return # Remove the useless features self.latest_features = sorted_list_intersection(remaining_features, self.latest_features) self.propagate_changes(log)
def decode(self, char_ids_tensor: torch.LongTensor) -> str: """ The inverse of `forward`. Keeps the start, end and pad indices. """ char_ids = char_ids_tensor.cpu().detach().tolist() out = [] buf = [] for c in char_ids: if c < 256: buf.append(c) else: if buf: out.append(bytes(buf).decode()) buf = [] if c == self.start_idx: out.append(self.start_token) elif c == self.end_idx: out.append(self.end_token) elif c == self.pad_idx: out.append(self.pad_token) if buf: # in case some are left out.append(bytes(buf).decode()) return "".join(out)
def forward(self, sent_rep): # x = self.dropout(sent_rep) # x = self.dense(x) total_length = sent_rep.size(1) input_lengths = sent_rep.size(0) # print('input length: ', input_lengths) vectorized_seqs = sent_rep.cpu().numpy() input_lengths = LongTensor(list(map(len, vectorized_seqs))) packed_input = pack_padded_sequence(sent_rep, input_lengths.cpu().numpy(), batch_first=True) packed_output, (h_n, c_n) = self.lstm(packed_input) # lstm_out, (h_n, c_n) = self.lstm(sent_rep) output, _ = pad_packed_sequence(packed_output, batch_first=True, total_length=total_length) final_feature_map = self.dropout( output) # shape=(num_layers * num_directions, 64, hidden_size) # final_feature_map = self.dropout(lstm_out) # shape=(num_layers * num_directions, 64, hidden_size) # Convert input to (64, hidden_size * hidden_layers * num_directions) for linear layer # final_feature_map = torch.cat([final_feature_map[i, :, :] for i in range(final_feature_map.shape[0])], dim=1) x = self.activation_fn(final_feature_map) x = self.dropout(x) x = self.out_proj(x) # F.log_softmax(self.out_proj(x), dim=1) return x
def _get_gold_answer(self, gold_answer_representations: Dict[str, torch.LongTensor], log_probs: torch.LongTensor, mask: torch.LongTensor) -> torch.LongTensor: answer_as_text_to_disjoint_bios = gold_answer_representations[ 'answer_as_text_to_disjoint_bios'] answer_as_list_of_bios = gold_answer_representations[ 'answer_as_list_of_bios'] span_bio_labels = gold_answer_representations['span_bio_labels'] with torch.no_grad(): answer_as_list_of_bios = answer_as_list_of_bios * mask.unsqueeze(1) if answer_as_text_to_disjoint_bios.sum() > 0: # TODO: verify correctness (Elad) full_bio = span_bio_labels if self._generation_top_k > 0: most_likely_predictions, _ = viterbi_decode( log_probs.cpu(), self._bio_allowed_transitions, top_k=self._generation_top_k) most_likely_predictions = torch.FloatTensor( most_likely_predictions).to(log_probs.device) # ^ Should be converted to tensor most_likely_predictions = most_likely_predictions * mask.unsqueeze( 1) generated_list_of_bios = self._filter_correct_predictions( most_likely_predictions, answer_as_text_to_disjoint_bios, full_bio) is_pregenerated_answer_format_mask = ( answer_as_list_of_bios.sum( (1, 2)) > 0).unsqueeze(-1).unsqueeze(-1).long() bio_seqs = torch.cat( (answer_as_list_of_bios, (generated_list_of_bios * (1 - is_pregenerated_answer_format_mask))), dim=1) bio_seqs = self._add_full_bio(bio_seqs, full_bio) else: is_pregenerated_answer_format_mask = ( answer_as_list_of_bios.sum((1, 2)) > 0).long() bio_seqs = torch.cat( (answer_as_list_of_bios, (full_bio * (1 - is_pregenerated_answer_format_mask ).unsqueeze(-1)).unsqueeze(1)), dim=1) else: bio_seqs = answer_as_list_of_bios return bio_seqs
def decode(self, tokens: torch.LongTensor): assert tokens.dim() == 1 tokens = tokens.cpu().numpy() if tokens[0] == self.task.source_dictionary.bos(): tokens = tokens[1:] # remove <s> eos_mask = (tokens == self.task.source_dictionary.eos()) doc_mask = eos_mask[1:] & eos_mask[:-1] sentences = np.split(tokens, doc_mask.nonzero()[0] + 1) sentences = [self.bpe.decode(self.task.source_dictionary.string(s)) for s in sentences] if len(sentences) == 1: return sentences[0] return sentences
def tensor_to_df( self, tensor: torch.LongTensor, **kwargs: Union[torch.Tensor, np.ndarray, Sequence], ) -> pd.DataFrame: """Take a tensor of triples and make a pandas dataframe with labels. :param tensor: shape: (n, 3) The triples, ID-based and in format (head_id, relation_id, tail_id). :param kwargs: Any additional number of columns. Each column needs to be of shape (n,). Reserved column names: {"head_id", "head_label", "relation_id", "relation_label", "tail_id", "tail_label"}. :return: A dataframe with n rows, and 6 + len(kwargs) columns. """ # Input validation additional_columns = set(kwargs.keys()) forbidden = additional_columns.intersection(TRIPLES_DF_COLUMNS) if len(forbidden) > 0: raise ValueError( f'The key-words for additional arguments must not be in {TRIPLES_DF_COLUMNS}, but {forbidden} were ' f'used.', ) # convert to numpy tensor = tensor.cpu().numpy() data = dict(zip(['head_id', 'relation_id', 'tail_id'], tensor.T)) # vectorized label lookup entity_id_to_label = np.vectorize(self.entity_id_to_label.__getitem__) relation_id_to_label = np.vectorize( self.relation_id_to_label.__getitem__) for column, id_to_label in dict( head=entity_id_to_label, relation=relation_id_to_label, tail=entity_id_to_label, ).items(): data[f'{column}_label'] = id_to_label(data[f'{column}_id']) # Additional columns for key, values in kwargs.items(): # convert PyTorch tensors to numpy if torch.is_tensor(values): values = values.cpu().numpy() data[key] = values # convert to dataframe rv = pd.DataFrame(data=data) # Re-order columns columns = list(TRIPLES_DF_COLUMNS) + sorted( set(rv.columns).difference(TRIPLES_DF_COLUMNS)) return rv.loc[:, columns]
def predict(self, x: torch.FloatTensor, y: torch.LongTensor): x = x.to(self.device) y = y.to(self.device) self.model.eval() outputs = self.model(x) if self.multilabel: preds = outputs.cpu().detach( ) >= BINARY_CLASSIFICATION_PROBABILITY_THRESHOLD else: preds = torch.argmax(outputs, dim=1).cpu().detach() micro, macro = f1_score(y_true=y.cpu().detach(), y_pred=preds, average='micro'), \ f1_score(y_true=y.cpu().detach(), y_pred=preds, average='macro') return preds, micro, macro
def repeat_tensor(input: torch.Tensor, repeats: torch.LongTensor, dim: int = 0) -> torch.Tensor: """ Repeats each entry of a tensor along a given dimension according to a tensor of repetitions, gradients can be computed w.r.t. `tensor`, but not w.r.t. `repeats` Args: input: a tensor to repeat, e.g. [x, y, z] repeats: the non-negative number of repetition of each entry of the tensor, e.g. [2, 3, 1] dim: the dimension used to repeat the tensor Returns: A tensor with repeated entries that has the same type and placement as `tensor` Examples: Each element of `x` is repeated according to the corresponding number of repetitions in `repeats` >>> x = torch.tensor([a, b, c, d]) >>> repeats = torch.tensor([2, 3, 0, 1]) >>> repeat_tensor(x, repeats, dim=0) tensor([a, a, b, b, b, d]) Gradient information can be propagated through the repetition >>> x = torch.tensor([a, b, c, d], requires_grad=True) >>> repeats = torch.tensor([2, 3, 0, 1]) >>> repeat_tensor(x, repeats, dim=0).sum().backward() >>> x.grad tensor([2., 3., 0., 1.]) """ import warnings warnings.warn( 'Use torch.repeat_interleave instead of torchgraphs.utils.repeat_tensor', DeprecationWarning) if repeats.dim() != 1: raise ValueError( f'`repeats` should have a single dimension, got shape {repeats.shape}' ) if (repeats < 0).any(): raise ValueError(f'All entries in `repeats` should be non-negative') if len(repeats) != input.shape[dim]: raise ValueError( f'`input.shape[dim]` should match `len(repeats)`, got {input.shape[dim]} and {len(repeats)}' ) index = input.new_tensor(np.arange(len(repeats)).repeat( repeats.cpu().numpy()), dtype=torch.long) return torch.index_select(input, index=index, dim=dim)
def decode(self, tokens: torch.LongTensor): assert tokens.dim() == 1 tokens = tokens.cpu().numpy() while tokens[0] == self.task.source_dictionary.bos(): tokens = tokens[1:] # remove <s> eos_mask = tokens == self.task.source_dictionary.eos() doc_mask = eos_mask[1:] & eos_mask[:-1] sentences = np.split(tokens, doc_mask.nonzero()[0] + 1) # sentences = [ # self.bpe.decode(self.task.source_dictionary.string(s)) for s in sentences # ] # import pdb # pdb.set_trace() sentences = [s.tolist() for s in sentences] if len(sentences) == 1: return sentences[0] return sentences
def tensor_to_df( tensor: torch.LongTensor, **kwargs: Union[torch.Tensor, np.ndarray, Sequence], ) -> pandas.DataFrame: """Take a tensor of triples and make a pandas dataframe with labels. :param tensor: shape: (n, 3) The triples, ID-based and in format (head_id, relation_id, tail_id). :param kwargs: Any additional number of columns. Each column needs to be of shape (n,). Reserved column names: {"head_id", "head_label", "relation_id", "relation_label", "tail_id", "tail_label"}. :return: A dataframe with n rows, and 3 + len(kwargs) columns. :raises ValueError: If a reserved column name appears in kwargs. """ # Input validation additional_columns = set(kwargs.keys()) forbidden = additional_columns.intersection(TRIPLES_DF_COLUMNS) if len(forbidden) > 0: raise ValueError( f"The key-words for additional arguments must not be in {TRIPLES_DF_COLUMNS}, but {forbidden} were " f"used.", ) # convert to numpy tensor = tensor.cpu().numpy() data = dict(zip(["head_id", "relation_id", "tail_id"], tensor.T)) # Additional columns for key, values in kwargs.items(): # convert PyTorch tensors to numpy if isinstance(values, torch.Tensor): values = values.cpu().numpy() data[key] = values # convert to dataframe rv = pandas.DataFrame(data=data) # Re-order columns columns = list(TRIPLES_DF_COLUMNS[::2]) + sorted( set(rv.columns).difference(TRIPLES_DF_COLUMNS)) return rv.loc[:, columns]
def mlm_mask_tokens( inputs: torch.LongTensor, tokenizer, mlm_probability) -> Tuple[torch.LongTensor, torch.LongTensor]: """From HuggingFace""" device = inputs.device inputs = inputs.cpu().clone() labels = inputs.clone() # We sample a few tokens in each sequence for masked-LM training # (with probability args.mlm_probability defaults to 0.15 in Bert/RoBERTa) probability_matrix = torch.full(labels.shape, mlm_probability) special_tokens_mask = [ tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in labels.tolist() ] probability_matrix.masked_fill_(torch.tensor(special_tokens_mask, dtype=torch.bool), value=0.0) # noinspection PyProtectedMember if tokenizer._pad_token is not None: padding_mask = labels.eq(tokenizer.pad_token_id) probability_matrix.masked_fill_(padding_mask, value=0.0) masked_indices = torch.bernoulli(probability_matrix).bool() labels[ ~masked_indices] = NON_MASKED_TOKEN_LABEL_ID # We only compute loss on masked tokens # 80% of the time, we replace masked input tokens with tokenizer.mask_token ([MASK]) indices_replaced = torch.bernoulli(torch.full(labels.shape, 0.8)).bool() & masked_indices inputs[indices_replaced] = tokenizer.convert_tokens_to_ids( tokenizer.mask_token) # 10% of the time, we replace masked input tokens with random word indices_random = (torch.bernoulli(torch.full(labels.shape, 0.5)).bool() & masked_indices & ~indices_replaced) random_words = torch.randint(len(tokenizer), labels.shape, dtype=torch.long) inputs[indices_random] = random_words[indices_random] # The rest of the time (10% of the time) we keep the masked input tokens unchanged # noinspection PyTypeChecker return inputs.to(device), labels.to(device)
def decode(self, tokens: torch.LongTensor, dict): assert tokens.dim() == 1 tokens = tokens.cpu().numpy() if tokens[0] == self.src_dict.bos(): tokens = tokens[1:] # remove <s> eos_mask = (tokens == self.src_dict.eos()) doc_mask = eos_mask[1:] & eos_mask[:-1] sentences = np.split(tokens, doc_mask.nonzero()[0] + 1) new_sentences = [] for s in sentences: _s = dict.string(s, extra_symbols_to_ignore=[ 0, 1, 2, 3, 50262, 50263, 50264, 50265 ]) #print(s, _s) _s = self.bpe.decode(_s) new_sentences.append(_s) sentences = new_sentences #sentences = [self.bpe.decode(self.src_dict.string(s)) for s in sentences] if len(sentences) == 1: return sentences[0] return sentences
def draw_img_preds(img: Tensor, bboxes: Tensor, bbox_labels: LongTensor, img_size: Tuple[int]) -> np.ndarray: """ Args: img: bboxes: xyxy order, normalized img_size: (h, w) """ img = TF.resize(img.squeeze(), img_size).cpu() img = torch.clamp(img, 0, 255).round().to(dtype=torch.uint8).permute( 1, 2, 0).cpu().numpy() bboxes = torch.clamp(bboxes, min=0, max=1) if bboxes.shape[0] != 0: # x bboxes[:, [0, 2]] *= img_size[1] # y bboxes[:, [1, 3]] *= img_size[0] bboxes = bboxes.round().to(dtype=torch.int, device="cpu").tolist() bbox_labels = bbox_labels.cpu().tolist() img = draw_bbox(img, bboxes, bbox_labels) return img
def _eval( self, x: torch.FloatTensor, y: torch.LongTensor, ): self.model.eval() x = x.to(self.device) y = y.to(self.device) outputs = self.model(x) loss = self.loss_fn(input=outputs, target=y) y = y.cpu().detach() if self.multilabel: pred = outputs.cpu().detach( ) >= BINARY_CLASSIFICATION_PROBABILITY_THRESHOLD else: pred = torch.argmax(outputs, dim=1).cpu().detach() micro, macro = f1_score(y_true=y, y_pred=pred, average='micro'), f1_score(y_true=y, y_pred=pred, average='macro') #else: #accuracy = float(torch.sum(torch.argmax(outputs, dim=1) == y)) / float(x.shape[1]) return loss, micro, macro
def segment_lengths_to_ids( segment_lengths: torch.LongTensor) -> torch.LongTensor: """ Args: segment_lengths: Non-negative lengths of the tensor segments Returns: A tensor containing ids for every element in the tensor to be segmented Examples: >>> segments = torch.tensor([2, 4, 3, 1]) >>> segment_lengths_to_slices(segments) tensor([0, 0, 1, 1, 1, 1, 2, 2, 2, 3]) """ if segment_lengths.dim() != 1: raise ValueError( f'`segment_lengths` should have a single dimension, got shape {segment_lengths.shape}' ) if (segment_lengths < 0).any(): raise ValueError( f'All entries in `segment_lengths` should be non-negative') return segment_lengths.new_tensor( np.arange(len(segment_lengths)).repeat(segment_lengths.cpu().numpy()))
def forward(self, input_vec: torch.LongTensor, mask: torch.LongTensor, label: torch.LongTensor): start_logits = self.start_pos_predict(input_vec).squeeze(-1) end_logits = self.end_pos_predict(input_vec).squeeze(-1) start_log_probs = masked_log_softmax(start_logits, mask) end_log_probs = masked_log_softmax(end_logits, mask) # Info about the best span prediction start_logits = replace_masked_values(start_logits, mask, -1e7) end_logits = replace_masked_values(end_logits, mask, -1e7) best_span = None # Shape: (batch_size, 2) if input_vec.shape[0] != 0: best_span = get_best_span(start_logits, end_logits) if torch.LongTensor([-1, -1]) in label.cpu().detach(): label_fit_index = label[:, 0] != -1 label = label[label_fit_index] start_log_probs = start_log_probs[label_fit_index] end_log_probs = end_log_probs[label_fit_index] loss = self.NLL(start_log_probs, label[:, 0]) + self.NLL( end_log_probs, label[:, 1]) return loss, best_span
# [[ 0.64004815 0.45813003 0.3476034 -0.03451729] t # [ 0.27616557 -1.224429 -1.342848 -0.7495876 ] i # [-0.6000342 1.1732816 0.19938554 -1.5976517 ] n # [-1.284392 0.68294704 1.4064184 -0.42879772] y # [ 0.2691206 -0.43435425 0.87935454 -2.2269666 ] <pad> # [ 0.2691206 -0.43435425 0.87935454 -2.2269666 ] <pad> # [ 0.2691206 -0.43435425 0.87935454 -2.2269666 ] <pad> # [ 0.2691206 -0.43435425 0.87935454 -2.2269666 ]]] <pad> # embedded_seq_tensor.shape : (batch_size X max_seq_len X embedding_dim) = (3 X 8 X 4) ## Step 7: Call pack_padded_sequence with embeded instances and sequence lengths ## ##-------------------------------------------------------------------------------## packed_input = pack_padded_sequence(embedded_seq_tensor, seq_lengths.cpu().numpy(), batch_first=True) # packed_input (PackedSequence is NamedTuple with 2 attributes: data and batch_sizes # # packed_input.data => # [[-0.00947162 0.07743231 0.20343193 0.29611713 0.07992904] l # [ 0.08596145 0.09205993 0.20892891 0.21788561 0.00624391] m # [ 0.16861682 0.07807446 0.18812777 -0.01148055 -0.01091915] t # [ 0.20994528 0.17932937 0.17748171 0.05025435 0.15717036] o # [ 0.01364102 0.11060348 0.14704391 0.24145307 0.12879576] e # [ 0.02610307 0.00965587 0.31438383 0.246354 0.08276576] i # [ 0.09527554 0.14521319 0.1923058 -0.05925677 0.18633027] n # [ 0.09872741 0.13324396 0.19446367 0.4307988 -0.05149471] d # [ 0.03895474 0.08449443 0.18839942 0.02205326 0.23149511] n # [ 0.14620507 0.07822411 0.2849248 -0.22616537 0.15480657] g # [ 0.00884941 0.05762182 0.30557525 0.373712 0.08834908] i
def forward( self, # type: ignore tokens: TextFieldTensors, spans: torch.LongTensor, metadata: List[Dict[str, Any]], pos_tags: TextFieldTensors = None, span_labels: torch.LongTensor = None, ) -> Dict[str, torch.Tensor]: """ # Parameters tokens : `TextFieldTensors`, required The output of `TextField.as_array()`, which should typically be passed directly to a `TextFieldEmbedder`. This output is a dictionary mapping keys to `TokenIndexer` tensors. At its most basic, using a `SingleIdTokenIndexer` this is : `{"tokens": Tensor(batch_size, num_tokens)}`. This dictionary will have the same keys as were used for the `TokenIndexers` when you created the `TextField` representing your sequence. The dictionary is designed to be passed directly to a `TextFieldEmbedder`, which knows how to combine different word representations into a single vector per token in your input. spans : `torch.LongTensor`, required. A tensor of shape `(batch_size, num_spans, 2)` representing the inclusive start and end indices of all possible spans in the sentence. metadata : `List[Dict[str, Any]]`, required. A dictionary of metadata for each batch element which has keys: tokens : `List[str]`, required. The original string tokens in the sentence. gold_tree : `nltk.Tree`, optional (default = `None`) Gold NLTK trees for use in evaluation. pos_tags : `List[str]`, optional. The POS tags for the sentence. These can be used in the model as embedded features, but they are passed here in addition for use in constructing the tree. pos_tags : `torch.LongTensor`, optional (default = `None`) The output of a `SequenceLabelField` containing POS tags. span_labels : `torch.LongTensor`, optional (default = `None`) A torch tensor representing the integer gold class labels for all possible spans, of shape `(batch_size, num_spans)`. # Returns An output dictionary consisting of: class_probabilities : `torch.FloatTensor` A tensor of shape `(batch_size, num_spans, span_label_vocab_size)` representing a distribution over the label classes per span. spans : `torch.LongTensor` The original spans tensor. tokens : `List[List[str]]`, required. A list of tokens in the sentence for each element in the batch. pos_tags : `List[List[str]]`, required. A list of POS tags in the sentence for each element in the batch. num_spans : `torch.LongTensor`, required. A tensor of shape (batch_size), representing the lengths of non-padded spans in `enumerated_spans`. loss : `torch.FloatTensor`, optional A scalar loss to be optimised. """ embedded_text_input = self.text_field_embedder(tokens) if pos_tags is not None and self.pos_tag_embedding is not None: embedded_pos_tags = self.pos_tag_embedding(pos_tags) embedded_text_input = torch.cat([embedded_text_input, embedded_pos_tags], -1) elif self.pos_tag_embedding is not None: raise ConfigurationError("Model uses a POS embedding, but no POS tags were passed.") mask = get_text_field_mask(tokens) # Looking at the span start index is enough to know if # this is padding or not. Shape: (batch_size, num_spans) span_mask = (spans[:, :, 0] >= 0).squeeze(-1) if span_mask.dim() == 1: # This happens if you use batch_size 1 and encounter # a length 1 sentence in PTB, which do exist. -.- span_mask = span_mask.unsqueeze(-1) if span_labels is not None and span_labels.dim() == 1: span_labels = span_labels.unsqueeze(-1) num_spans = get_lengths_from_binary_sequence_mask(span_mask) encoded_text = self.encoder(embedded_text_input, mask) span_representations = self.span_extractor(encoded_text, spans, mask, span_mask) if self.feedforward_layer is not None: span_representations = self.feedforward_layer(span_representations) logits = self.tag_projection_layer(span_representations) class_probabilities = masked_softmax(logits, span_mask.unsqueeze(-1)) output_dict = { "class_probabilities": class_probabilities, "spans": spans, "tokens": [meta["tokens"] for meta in metadata], "pos_tags": [meta.get("pos_tags") for meta in metadata], "num_spans": num_spans, } if span_labels is not None: loss = sequence_cross_entropy_with_logits(logits, span_labels, span_mask) self.tag_accuracy(class_probabilities, span_labels, span_mask) output_dict["loss"] = loss # The evalb score is expensive to compute, so we only compute # it for the validation and test sets. batch_gold_trees = [meta.get("gold_tree") for meta in metadata] if all(batch_gold_trees) and self._evalb_score is not None and not self.training: gold_pos_tags: List[List[str]] = [ list(zip(*tree.pos()))[1] for tree in batch_gold_trees ] predicted_trees = self.construct_trees( class_probabilities.cpu().data, spans.cpu().data, num_spans.data, output_dict["tokens"], gold_pos_tags, ) self._evalb_score(predicted_trees, batch_gold_trees) return output_dict
def forward( self, # type: ignore tokens: Dict[str, torch.LongTensor], spans: torch.LongTensor, metadata: List[Dict[str, Any]], span_labels: torch.LongTensor = None) -> Dict[str, torch.Tensor]: # pylint: disable=arguments-differ """ Parameters ---------- tokens : Dict[str, torch.LongTensor], required The output of ``TextField.as_array()``, which should typically be passed directly to a ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer`` tensors. At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens": Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used for the ``TokenIndexers`` when you created the ``TextField`` representing your sequence. The dictionary is designed to be passed directly to a ``TextFieldEmbedder``, which knows how to combine different word representations into a single vector per token in your input. spans : ``torch.LongTensor``, required. A tensor of shape ``(batch_size, num_spans, 2)`` representing the inclusive start and end indices of all possible spans in the sentence. span_labels : ``torch.LongTensor``, optional (default = None) A torch tensor representing the integer gold class labels for all possible spans, of shape ``(batch_size, num_spans)``. metadata : List[Dict[str, Any]], required. A dictionary of metadata for each batch element which has keys: tokens : ``List[str]``, required. The original string tokens in the sentence. gold_tree : ``nltk.Tree``, optional (default = None) Gold NLTK trees for use in evaluation. Returns ------- An output dictionary consisting of: class_probabilities : ``torch.FloatTensor`` A tensor of shape ``(batch_size, num_spans, span_label_vocab_size)`` representing a distribution over the label classes per span. spans : ``torch.LongTensor`` The original spans tensor. tokens : ``List[List[str]]``, required. A list of tokens in the sentence for each element in the batch. num_spans : ``torch.LongTensor``, required. A tensor of shape (batch_size), representing the lengths of non-padded spans in ``enumerated_spans``. loss : ``torch.FloatTensor``, optional A scalar loss to be optimised. """ embedded_text_input = self.text_field_embedder(tokens) mask = get_text_field_mask(tokens) # Looking at the span start index is enough to know if # this is padding or not. Shape: (batch_size, num_spans) span_mask = (spans[:, :, 0] >= 0).squeeze(-1).long() if span_mask.dim() == 1: # This happens if you use batch_size 1 and encounter # a length 1 sentence in PTB, which do exist. -.- span_mask = span_mask.unsqueeze(-1) num_spans = get_lengths_from_binary_sequence_mask(span_mask) encoded_text = self.encoder(embedded_text_input, mask) span_representations = self.span_extractor(encoded_text, spans, mask, span_mask) if self.feedforward_layer is not None: span_representations = self.feedforward_layer(span_representations) logits = self.tag_projection_layer(span_representations) class_probabilities = last_dim_softmax(logits, span_mask.unsqueeze(-1)) output_dict = { "class_probabilities": class_probabilities, "spans": spans, "tokens": [meta["tokens"] for meta in metadata], "num_spans": num_spans } if span_labels is not None: loss = sequence_cross_entropy_with_logits(logits, span_labels, span_mask) for metric in self.metrics.values(): metric(logits, span_labels, span_mask) output_dict["loss"] = loss # The evalb score is expensive to compute, so we only compute # it for the validation and test sets. batch_gold_trees = [meta.get("gold_tree") for meta in metadata] if all(batch_gold_trees ) and self._evalb_score is not None and not self.training: # TODO(Mark): Predict POS and use here instead of using the gold ones. gold_pos_tags: List[List[str]] = [ list(zip(*tree.pos()))[1] for tree in batch_gold_trees ] predicted_trees = self.construct_trees( class_probabilities.cpu().data, spans.cpu().data, num_spans.data, output_dict["tokens"], gold_pos_tags) self._evalb_score(predicted_trees, batch_gold_trees) return output_dict
def forward(self, # type: ignore tokens: Dict[str, torch.LongTensor], spans: torch.LongTensor, metadata: List[Dict[str, Any]], pos_tags: Dict[str, torch.LongTensor] = None, span_labels: torch.LongTensor = None) -> Dict[str, torch.Tensor]: # pylint: disable=arguments-differ """ Parameters ---------- tokens : Dict[str, torch.LongTensor], required The output of ``TextField.as_array()``, which should typically be passed directly to a ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer`` tensors. At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens": Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used for the ``TokenIndexers`` when you created the ``TextField`` representing your sequence. The dictionary is designed to be passed directly to a ``TextFieldEmbedder``, which knows how to combine different word representations into a single vector per token in your input. spans : ``torch.LongTensor``, required. A tensor of shape ``(batch_size, num_spans, 2)`` representing the inclusive start and end indices of all possible spans in the sentence. metadata : List[Dict[str, Any]], required. A dictionary of metadata for each batch element which has keys: tokens : ``List[str]``, required. The original string tokens in the sentence. gold_tree : ``nltk.Tree``, optional (default = None) Gold NLTK trees for use in evaluation. pos_tags : ``List[str]``, optional. The POS tags for the sentence. These can be used in the model as embedded features, but they are passed here in addition for use in constructing the tree. pos_tags : ``torch.LongTensor``, optional (default = None) The output of a ``SequenceLabelField`` containing POS tags. span_labels : ``torch.LongTensor``, optional (default = None) A torch tensor representing the integer gold class labels for all possible spans, of shape ``(batch_size, num_spans)``. Returns ------- An output dictionary consisting of: class_probabilities : ``torch.FloatTensor`` A tensor of shape ``(batch_size, num_spans, span_label_vocab_size)`` representing a distribution over the label classes per span. spans : ``torch.LongTensor`` The original spans tensor. tokens : ``List[List[str]]``, required. A list of tokens in the sentence for each element in the batch. pos_tags : ``List[List[str]]``, required. A list of POS tags in the sentence for each element in the batch. num_spans : ``torch.LongTensor``, required. A tensor of shape (batch_size), representing the lengths of non-padded spans in ``enumerated_spans``. loss : ``torch.FloatTensor``, optional A scalar loss to be optimised. """ embedded_text_input = self.text_field_embedder(tokens) if pos_tags is not None and self.pos_tag_embedding is not None: embedded_pos_tags = self.pos_tag_embedding(pos_tags) embedded_text_input = torch.cat([embedded_text_input, embedded_pos_tags], -1) elif self.pos_tag_embedding is not None: raise ConfigurationError("Model uses a POS embedding, but no POS tags were passed.") mask = get_text_field_mask(tokens) # Looking at the span start index is enough to know if # this is padding or not. Shape: (batch_size, num_spans) span_mask = (spans[:, :, 0] >= 0).squeeze(-1).long() if span_mask.dim() == 1: # This happens if you use batch_size 1 and encounter # a length 1 sentence in PTB, which do exist. -.- span_mask = span_mask.unsqueeze(-1) if span_labels is not None and span_labels.dim() == 1: span_labels = span_labels.unsqueeze(-1) num_spans = get_lengths_from_binary_sequence_mask(span_mask) encoded_text = self.encoder(embedded_text_input, mask) span_representations = self.span_extractor(encoded_text, spans, mask, span_mask) if self.feedforward_layer is not None: span_representations = self.feedforward_layer(span_representations) logits = self.tag_projection_layer(span_representations) class_probabilities = last_dim_softmax(logits, span_mask.unsqueeze(-1)) output_dict = { "class_probabilities": class_probabilities, "spans": spans, "tokens": [meta["tokens"] for meta in metadata], "pos_tags": [meta.get("pos_tags") for meta in metadata], "num_spans": num_spans } if span_labels is not None: loss = sequence_cross_entropy_with_logits(logits, span_labels, span_mask) self.tag_accuracy(class_probabilities, span_labels, span_mask) output_dict["loss"] = loss # The evalb score is expensive to compute, so we only compute # it for the validation and test sets. batch_gold_trees = [meta.get("gold_tree") for meta in metadata] if all(batch_gold_trees) and self._evalb_score is not None and not self.training: gold_pos_tags: List[List[str]] = [list(zip(*tree.pos()))[1] for tree in batch_gold_trees] predicted_trees = self.construct_trees(class_probabilities.cpu().data, spans.cpu().data, num_spans.data, output_dict["tokens"], gold_pos_tags) self._evalb_score(predicted_trees, batch_gold_trees) return output_dict
def forward(self, indices: torch.LongTensor, offsets: Optional[torch.LongTensor] = None, per_index_weights: Optional[torch.Tensor] = None): """ Forward process to the embedding bag layer. :param indices: Tensor containing bags of indices into the embedding matrix. :param offsets: Only used when indices is 1D. offsets determines the starting index position of each bag (sequence)in input. :param per_index_weights: a tensor of float / double weights, or None to indicate all weights should be taken to be 1. If specified, per_sample_weights must have exactly the same shape as input and is treated as having the same offsets, if those are not None. :return: an #bag x embedding_dim Tensor. """ # always move indices to cpu, as we need to get its corresponding minhash values from table in memory indices = indices.cpu() # Check input validation. if per_index_weights is not None and indices.size() != per_index_weights.size(): raise ValueError("embedding_bag: If per_index_weights ({}) is not None, " "then it must have the same shape as the indices ({})" .format(per_index_weights.shape, indices.shape)) if indices.dim() == 2: if offsets is not None: raise ValueError("if input is 2D, then offsets has to be None" ", as input is treated is a mini-batch of" " fixed length sequences. However, found " "offsets of type {}".format(type(offsets))) offsets = torch.arange(0, indices.numel(), indices.size(1), dtype=torch.long, device=indices.device) indices = indices.reshape(-1) if per_index_weights is not None: per_sample_weights = per_index_weights.reshape(-1) elif indices.dim() == 1: if offsets is None: raise ValueError("offsets has to be a 1D Tensor but got None") if offsets.dim() != 1: raise ValueError("offsets has to be a 1D Tensor") else: ValueError("input has to be 1D or 2D Tensor," " but got Tensor of dimension {}".format(input.dim())) num_bags = offsets.size(0) # get the min-hash for each category value, note that lsh_weight_index is in cpu memory lsh_weight_index = self._minhash_table[indices] # print("In forward: ", lsh_weight_index, indices, self._minhash_table[indices], self.lsh_weight_size) # move the min-hash values to target device lsh_weight_index = lsh_weight_index.to(self.hashed_weight.device) lsh_weight_index %= self.lsh_weight_size # indices_embedding_vector is a |indices| x |embedding_dim| tensor. indices_embedding_vectors = self.hashed_weight[lsh_weight_index] # print('indices_embedding_vectors: ', lsh_weight_index, indices_embedding_vectors) # multiply embedding vectors by weights if per_index_weights is not None: per_index_weights = per_index_weights.to(indices_embedding_vectors.device) indices_embedding_vectors *= per_index_weights[:, None] # print("per_index_weights",per_index_weights) offsets2bag = make_offset2bag(offsets, indices) # print("offsets2bag: ", offsets2bag) if self._mode == "sum" or self._mode == "mean": result = \ torch.zeros(num_bags, self.embedding_dim, dtype=indices_embedding_vectors.dtype, device=self.hashed_weight.device) result.index_add_(0, offsets2bag, indices_embedding_vectors) if self._mode == "sum": return result # self._mode == "mean": bag_size = make_bag_size(offsets, indices).to(result.device) result /= bag_size[:, None] return result
def forward( self, tokens: Dict[str, torch.LongTensor], spans: torch.LongTensor, metadata: List[Dict[str, Any]], pos_tags: Dict[str, torch.LongTensor] = None, span_labels: torch.LongTensor = None) -> Dict[str, torch.Tensor]: embedded_text_input = self.text_field_embedder(tokens) if pos_tags is not None and self.pos_tag_embedding is not None: embedded_pos_tags = self.pos_tag_embedding(pos_tags) embedded_text_input = torch.cat( [embedded_text_input, embedded_pos_tags], -1) elif self.pos_tag_embedding is not None: raise ConfigurationError( "Model uses a POS embedding, but no POS tags were passed.") mask = get_text_field_mask(tokens) # Looking at the span start index is enough to know if # this is padding or not. Shape: (batch_size, num_spans) span_mask = (spans[:, :, 0] >= 0).squeeze(-1).long() if span_mask.dim() == 1: # This happens if you use batch_size 1 and encounter # a length 1 sentence in PTB, which do exist. -.- span_mask = span_mask.unsqueeze(-1) if span_labels is not None and span_labels.dim() == 1: span_labels = span_labels.unsqueeze(-1) num_spans = get_lengths_from_binary_sequence_mask(span_mask) encoded_text = self.encoder(embedded_text_input, mask) span_representations = self.span_extractor(encoded_text, spans, mask, span_mask) if self.feedforward_layer is not None: span_representations = self.feedforward_layer(span_representations) logits = self.tag_projection_layer(span_representations) class_probabilities = masked_softmax(logits, span_mask.unsqueeze(-1)) output_dict = { "class_probabilities": class_probabilities, "spans": spans, "tokens": [meta["tokens"] for meta in metadata], "pos_tags": [meta.get("pos_tags") for meta in metadata], "num_spans": num_spans } if span_labels is not None: loss = sequence_cross_entropy_with_logits(logits.float(), span_labels, span_mask) self.tag_accuracy(class_probabilities, span_labels, span_mask) output_dict["loss"] = loss # The evalb score is expensive to compute, so we only compute # it for the validation and test sets. batch_gold_trees = [meta.get("gold_tree") for meta in metadata] if all(batch_gold_trees ) and self._evalb_score is not None and not self.training: gold_pos_tags: List[List[str]] = [ list(zip(*tree.pos()))[1] for tree in batch_gold_trees ] predicted_trees = self.construct_trees( class_probabilities.cpu().data, spans.cpu().data, num_spans.data, output_dict["tokens"], gold_pos_tags) self._evalb_score(predicted_trees, batch_gold_trees) return output_dict
#print(vectorized_seqs) embed = nn.Embedding(len(vocab), 4) lstm = nn.LSTM(4, 5, batch_first=True) seq_lengths = LongTensor(list(map(len, vectorized_seqs))) #print(seq_lengths) seq_tensor = Variable(torch.zeros(len(vectorized_seqs), seq_lengths.max())).long() for idx, (seq, seqlen) in enumerate(zip(vectorized_seqs, seq_lengths)): seq_tensor[idx, :seqlen] = LongTensor(seq) #print(seq_tensor) seq_lengths, perm_idx = seq_lengths.sort(0, descending=True) seq_tensor = seq_tensor[perm_idx] #print(seq_tensor) embedded_seq_tensor = embed(seq_tensor) #print(embedded_seq_tensor) # 3x8xvocab.vocabx4 --> 3 x 8 x 4 packed_input = pack_padded_sequence(embedded_seq_tensor, seq_lengths.cpu().numpy(), batch_first=True) #print(packed_input.data.shape) packed_output, (ht,ct) = lstm(packed_input) #print(packed_output.data.shape) output, input_sizes = pad_packed_sequence(packed_output, batch_first=True) print(output) print(ht[-1])
def forward( self, # type: ignore tokens: Dict[str, torch.LongTensor], spans: torch.LongTensor, span_labels: torch.LongTensor = None, gold_tree: List[Tree] = None) -> Dict[str, torch.Tensor]: # pylint: disable=arguments-differ """ Parameters ---------- tokens : Dict[str, torch.LongTensor], required The output of ``TextField.as_array()``, which should typically be passed directly to a ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer`` tensors. At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens": Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used for the ``TokenIndexers`` when you created the ``TextField`` representing your sequence. The dictionary is designed to be passed directly to a ``TextFieldEmbedder``, which knows how to combine different word representations into a single vector per token in your input. spans : ``torch.LongTensor``, required. A tensor of shape ``(batch_size, num_spans, 2)`` representing the inclusive start and end indices of all possible spans in the sentence. span_labels : ``torch.LongTensor``, optional (default = None) A torch tensor representing the integer gold class labels for all possible spans, of shape ``(batch_size, num_spans)``. gold_tree : ``List[Tree]``, optional, (default = None) Gold NLTK trees for use in evaluation. Returns ------- An output dictionary consisting of: logits : ``torch.FloatTensor`` A tensor of shape ``(batch_size, num_spans, span_label_vocab_size)`` representing unnormalised log probabilities of the label classes for each span. class_probabilities : ``torch.FloatTensor`` A tensor of shape ``(batch_size, num_spans, span_label_vocab_size)`` representing a distribution over the label classes per span. loss : ``torch.FloatTensor``, optional A scalar loss to be optimised. """ embedded_text_input = self.text_field_embedder(tokens) mask = get_text_field_mask(tokens) sentence_lengths = get_lengths_from_binary_sequence_mask(mask) # Looking at the span start index is enough to know if # this is padding or not. Shape: (batch_size, num_spans) span_mask = (spans[:, :, 0] >= 0).squeeze(-1).long() encoded_text = self.encoder(embedded_text_input, mask) span_representations = self.span_extractor(encoded_text, spans, mask, span_mask) if self.feedforward_layer is not None: span_representations = self.feedforward_layer(span_representations) logits = self.tag_projection_layer(span_representations) class_probabilities = last_dim_softmax(logits, span_mask.unsqueeze(-1)) output_dict = { "class_probabilities": class_probabilities, "spans": spans, # TODO(Mark): This relies on having tokens represented with a SingleIdTokenIndexer... "tokens": tokens["tokens"], "sentence_lengths": sentence_lengths } if span_labels is not None: loss = sequence_cross_entropy_with_logits(logits, span_labels, span_mask) for metric in self.metrics.values(): metric(logits, span_labels, span_mask) output_dict["loss"] = loss # The evalb score is expensive to compute, so we only compute # it for the validation and test sets. if gold_tree is not None and self._evalb_score is not None and not self.training: predicted_trees = self.construct_trees( class_probabilities.cpu().data, spans.cpu().data, tokens["tokens"].cpu().data, sentence_lengths.cpu().data) self._evalb_score(predicted_trees, gold_tree) return output_dict
def forward( self, normalized_system_time_input: torch. FloatTensor, # shape = [batch_size, 1, 1] visiting_node_ids_input: torch. LongTensor, # shape = [batch_size, (capacity*2+1+3)] normalized_remaining_delays_input: torch. FloatTensor, # shape = [batch_size, (capacity*2+1+3), 1] num_of_visiting_nodes_info: torch.LongTensor, # shape = [batch_size,] normalized_num_of_nearby_vehs_input: torch. FloatTensor, # shape = [batch_size, 1, 1] normalized_num_of_new_reqs_input: torch. FloatTensor, # shape = [batch_size, 1, 1] ) -> torch.FloatTensor: # shape = [batch_size, 1, 1] """ Examples of input (batch_size = 3, vehicle_capacity = 1): ------- normalized_system_time_input = torch.FloatTensor([[0.25], [0.25], [0.25]]) visiting_node_ids_input = torch.LongTensor([[1, 2, 3], [4, 5, 0], [7, 0, 0]]) # 0 is used for padding normalized_remaining_delays_input = torch.LongTensor([[[1], [0.2], [0.3]], [[1], [0.1], [-1]], [[1], [-1], [-1]]]) # -1 is used for padding num_of_visiting_nodes_info = torch.LongTensor([3, 2, 1]) normalized_num_of_nearby_vehs_input = torch.LongTensor([[0.33], [0.67], [0.67]]) normalized_num_of_new_reqs_input = torch.LongTensor([[0.67], [0.67], [0.67]]) Examples of output (batch_size = 3, vehicle_capacity = 1): ------- output = torch.FloatTensor([[3.8047], [3.7808], [3.7767]]) """ # Schedule's visiting locations embedding. visiting_node_ids_embedded = self.embedding_location( visiting_node_ids_input) # Concatenate location features and remaining delays. sche_features = torch.cat( (visiting_node_ids_embedded, normalized_remaining_delays_input), dim=2) # pack_padded_sequence packed_sche_features = rnn_utils.pack_padded_sequence( sche_features, lengths=num_of_visiting_nodes_info.cpu().numpy(), batch_first=True, enforce_sorted=False) # Lstm output, (final_hidden_state, final_cell_state) = self.lstm(packed_sche_features) # output, final_hidden_state = self.gru(packed_pos_feature_and_delay) # Time embedding. time_embedded = F.elu(self.fc1_time(normalized_system_time_input)) # Concatenate state. schedule_state = torch.cat( (final_hidden_state[1], normalized_num_of_nearby_vehs_input, normalized_num_of_new_reqs_input, time_embedded), dim=1) # Two hidden fully-connected layers. schedule_state = F.elu(self.fc2_state(schedule_state)) schedule_state = F.elu(self.fc3_state(schedule_state)) output = self.out(schedule_state) return output