def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: """Populate the feed dictionary for the decoder object. Arguments: dataset: The dataset to use for the decoder. train: Boolean flag, telling whether this is a training run. """ fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is None and train: raise ValueError("When training, you must feed " "reference sentences") go_symbol_idx = self.vocabulary.get_word_index(START_TOKEN) fd[self.go_symbols] = np.full([len(dataset)], go_symbol_idx, dtype=np.int32) if sentences is not None: sentences_list = list(sentences) # train_mode=False, since we don't want to <unk>ize target words! inputs, weights = self.vocabulary.sentences_to_tensor( sentences_list, self.max_output_len, train_mode=False, add_start_symbol=False, add_end_symbol=True, pad_to_max_len=False) fd[self.train_inputs] = inputs fd[self.train_mask] = weights return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = cast(Iterable[List[str]], dataset.maybe_get_series(self.data_id)) if sentences is None and train: raise ValueError("When training, you must feed " "reference sentences") if sentences is not None: vectors, paddings = self.vocabulary.sentences_to_tensor( list(sentences), train_mode=train, max_len=self.max_length) # sentences_to_tensor returns time-major tensors, targets need to # be batch-major vectors = vectors.T paddings = paddings.T bool_mask = (paddings > 0.5) flat_labels = vectors[bool_mask] label_lengths = bool_mask.sum(axis=1) fd[self.label_lengths] = label_lengths fd[self.flat_labels] = flat_labels return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = cast(Iterable[List[str]], dataset.maybe_get_series(self.data_id)) if sentences is None and train: raise ValueError("When training, you must feed " "reference sentences") if sentences is not None: vectors, paddings = self.vocabulary.sentences_to_tensor( list(sentences), train_mode=train, max_len=self.max_length) # sentences_to_tensor returns time-major tensors, targets need to # be batch-major vectors = vectors.T paddings = paddings.T # Need to convert the data to a sparse representation bool_mask = (paddings > 0.5) indices = np.stack(np.where(bool_mask), axis=1) values = vectors[bool_mask] fd[self.train_targets] = tf.SparseTensorValue( indices=indices, values=values, dense_shape=vectors.shape) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = {} # type: FeedDict sentences = dataset.maybe_get_series(self.data_id) fd[self.train_mode] = train if sentences is not None: vectors, paddings = self.vocabulary.sentences_to_tensor( list(sentences), train_mode=train, max_len=self.max_length) # sentences_to_tensor returns time-major tensors, targets need to # be batch-major vectors = vectors.T paddings = paddings.T # Need to convert the data to a sparse representation bool_mask = (paddings > 0.5) indices = np.stack(np.where(bool_mask), axis=1) values = vectors[bool_mask] fd[self.train_targets] = tf.SparseTensorValue( indices=indices, values=values, dense_shape=vectors.shape) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is not None: fd[self.target_tokens] = pad_batch(list(sentences)) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) sentences_list = list(sentences) if sentences is not None else None if sentences_list is not None: fd[self.train_inputs] = list(zip(*sentences_list))[0] return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is not None: labels = [l[0] for l in pad_batch(list(sentences), self.max_output_len)] fd[self.targets] = labels return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is not None: vectors, _ = self.vocabulary.sentences_to_tensor( list(sentences), pad_to_max_len=False, train_mode=train) fd[self.train_targets] = vectors.T return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is not None: label_tensors, _ = self.vocabulary.sentences_to_tensor( list(sentences), self.max_output_len) fd[self.gt_inputs[0]] = label_tensors[0] return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is not None: fd[self.target_tokens] = pad_batch( list(sentences), self.max_output_len, self.add_start_symbol, self.add_end_symbol) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is not None: labels = [ l[0] for l in pad_batch(list(sentences), self.max_output_len) ] fd[self.targets] = labels return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is not None: fd[self.target_tokens] = pad_batch(list(sentences), self.max_output_len, self.add_start_symbol, self.add_end_symbol) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: sentences = dataset.maybe_get_series(self.data_id) sentences_list = list(sentences) if sentences is not None else None fd = {} # type: FeedDict if sentences_list is not None: fd[self.train_inputs] = list(zip(*sentences_list))[0] fd[self.train_mode] = train return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = {} # type: FeedDict fd[self.train_mode] = train sentences = dataset.maybe_get_series(self.data_id) if sentences is not None: vectors, paddings = self.vocabulary.sentences_to_tensor( list(sentences), pad_to_max_len=False, train_mode=train) fd[self.train_targets] = vectors.T fd[self.train_weights] = paddings.T return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is None and train: raise ValueError("You must feed reference sentences when training") if sentences is not None: fd[self.target_tokens] = pad_batch(list(sentences), self.max_length) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) alignment = dataset.maybe_get_series(self.data_id) if alignment is None: if train: warn("Training alignment not present!") alignment = np.zeros((len(dataset), self.decoder.max_output_len, self.enc_input.max_length), np.float32) fd[self.ref_alignment] = alignment return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = {} # type: FeedDict sentences = dataset.maybe_get_series(self.data_id) if sentences is not None: label_tensors, _ = self.vocabulary.sentences_to_tensor( list(sentences), self.max_output_len) # pylint: disable=unsubscriptable-object fd[self.gt_inputs[0]] = label_tensors[0] # pylint: enable=unsubscriptable-object fd[self.train_mode] = train return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: """Populate the feed dictionary for the decoder object. Arguments: dataset: The dataset to use for the decoder. train: Boolean flag, telling whether this is a training run. """ fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is None and train: raise ValueError("When training, you must feed " "reference sentences") if sentences is not None: fd[self.train_tokens] = pad_batch( list(sentences), self.max_output_len, add_start_symbol=False, add_end_symbol=True) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: """Populate the feed dictionary for the decoder object. Arguments: dataset: The dataset to use for the decoder. train: Boolean flag, telling whether this is a training run. """ fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is None and train: raise ValueError("When training, you must feed " "reference sentences") if sentences is not None: fd[self.train_tokens] = pad_batch(list(sentences), self.max_output_len, add_start_symbol=False, add_end_symbol=True) return fd