def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = cast(Iterable[List[str]], dataset.maybe_get_series(self.data_id)) if sentences is None and train: raise ValueError("When training, you must feed " "reference sentences") if sentences is not None: vectors, paddings = self.vocabulary.sentences_to_tensor( list(sentences), train_mode=train, max_len=self.max_length) # sentences_to_tensor returns time-major tensors, targets need to # be batch-major vectors = vectors.T paddings = paddings.T bool_mask = (paddings > 0.5) flat_labels = vectors[bool_mask] label_lengths = bool_mask.sum(axis=1) fd[self.label_lengths] = label_lengths fd[self.flat_labels] = flat_labels return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: """Populate the feed dictionary with the encoder inputs. Arguments: dataset: The dataset to use train: Boolean flag telling whether it is training time """ fd = ModelPart.feed_dict(self, dataset, train) series = list(dataset.get_series(self.data_id)) lengths = [] inputs = [] max_len = max(x.shape[0] for x in series) if self.max_input_len is not None: max_len = min(self.max_input_len, max_len) for x in series: length = min(max_len, x.shape[0]) x_padded = np.zeros(shape=(max_len, ) + x.shape[1:], dtype=x.dtype) x_padded[:length] = x[:length] lengths.append(length) inputs.append(x_padded) fd[self.inputs] = inputs fd[self._input_lengths] = lengths return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: """Populate the feed dictionary for the decoder object. Arguments: dataset: The dataset to use for the decoder. train: Boolean flag, telling whether this is a training run. """ fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is None and train: raise ValueError("When training, you must feed " "reference sentences") go_symbol_idx = self.vocabulary.get_word_index(START_TOKEN) fd[self.go_symbols] = np.full([len(dataset)], go_symbol_idx, dtype=np.int32) if sentences is not None: sentences_list = list(sentences) # train_mode=False, since we don't want to <unk>ize target words! inputs, weights = self.vocabulary.sentences_to_tensor( sentences_list, self.max_output_len, train_mode=False, add_start_symbol=False, add_end_symbol=True, pad_to_max_len=False) fd[self.train_inputs] = inputs fd[self.train_mask] = weights return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = cast(Iterable[List[str]], dataset.maybe_get_series(self.data_id)) if sentences is None and train: raise ValueError("When training, you must feed " "reference sentences") if sentences is not None: vectors, paddings = self.vocabulary.sentences_to_tensor( list(sentences), train_mode=train, max_len=self.max_length) # sentences_to_tensor returns time-major tensors, targets need to # be batch-major vectors = vectors.T paddings = paddings.T # Need to convert the data to a sparse representation bool_mask = (paddings > 0.5) indices = np.stack(np.where(bool_mask), axis=1) values = vectors[bool_mask] fd[self.train_targets] = tf.SparseTensorValue( indices=indices, values=values, dense_shape=vectors.shape) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) # if it is from the pickled file, it is a list, not a numpy tensor, # so convert it as as a prevention images = np.array(list(dataset.get_series(self.data_id))) fd[self.image_input] = images / 255.0 return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is not None: fd[self.target_tokens] = pad_batch(list(sentences)) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) images = np.array(dataset.get_series(self.data_id)) assert images.shape[1:] == (self.height, self.width, 3) fd[self.input_image] = images return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) sentences_list = list(sentences) if sentences is not None else None if sentences_list is not None: fd[self.train_inputs] = list(zip(*sentences_list))[0] return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is not None: vectors, _ = self.vocabulary.sentences_to_tensor( list(sentences), pad_to_max_len=False, train_mode=train) fd[self.train_targets] = vectors.T return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is not None: fd[self.target_tokens] = pad_batch( list(sentences), self.max_output_len, self.add_start_symbol, self.add_end_symbol) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is not None: labels = [l[0] for l in pad_batch(list(sentences), self.max_output_len)] fd[self.targets] = labels return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is not None: label_tensors, _ = self.vocabulary.sentences_to_tensor( list(sentences), self.max_output_len) fd[self.gt_inputs[0]] = label_tensors[0] return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is not None: fd[self.target_tokens] = pad_batch(list(sentences), self.max_output_len, self.add_start_symbol, self.add_end_symbol) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is not None: labels = [ l[0] for l in pad_batch(list(sentences), self.max_output_len) ] fd[self.targets] = labels return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: """Populate the feed dictionary with the encoder inputs. Arguments: dataset: The dataset to use train: Boolean flag telling whether it is training time """ fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.get_series(self.data_id) fd[self.input_tokens] = pad_batch(list(sentences), self.max_input_len) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is None and train: raise ValueError("You must feed reference sentences when training") if sentences is not None: fd[self.target_tokens] = pad_batch(list(sentences), self.max_length) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) # if it is from the pickled file, it is a list, not a numpy tensor, # so convert it as as a prevention images = np.array(list(dataset.get_series(self.data_id))) fd[self.image_input] = images / 255.0 # the image mask is one everywhere where the image is non-zero, i.e. # zero pixels are masked out fd[self.image_mask] = np.sign(np.sum(images, axis=3, keepdims=True)) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) alignment = dataset.maybe_get_series(self.data_id) if alignment is None: if train: warn("Training alignment not present!") alignment = np.zeros((len(dataset), self.decoder.max_output_len, self.enc_input.max_length), np.float32) fd[self.ref_alignment] = alignment return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: """Populate the feed dictionary with the encoder inputs. Arguments: dataset: The dataset to use train: Boolean flag telling whether it is training time """ fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.get_series(self.data_id) vectors, _ = self.vocabulary.sentences_to_tensor( list(sentences), self.max_input_len, pad_to_max_len=False, train_mode=train) # as sentences_to_tensor returns lists of shape (time, batch), # we need to transpose fd[self.inputs] = list(zip(*vectors)) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: """Feed the placholders with the data. Arguments: dataset: The dataset. train: A flag whether the train mode is enabled. Returns: The constructed feed dictionary that contains the factor data and the mask. """ fd = ModelPart.feed_dict(self, dataset, train) # for checking the lengths of individual factors arr_strings = [] last_paddings = None for factor_plc, name, vocabulary in zip(self.input_factors, self.data_ids, self.vocabularies): factors = dataset.get_series(name) vectors, paddings = vocabulary.sentences_to_tensor( list(factors), self.max_length, pad_to_max_len=False, train_mode=train, add_start_symbol=self.add_start_symbol, add_end_symbol=self.add_end_symbol) fd[factor_plc] = list(zip(*vectors)) arr_strings.append(paddings.tostring()) last_paddings = paddings if len(set(arr_strings)) > 1: raise ValueError("The lenghts of factors do not match") assert last_paddings is not None fd[self.mask] = list(zip(*last_paddings)) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: """Populate the feed dictionary for the decoder object. Arguments: dataset: The dataset to use for the decoder. train: Boolean flag, telling whether this is a training run. """ fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is None and train: raise ValueError("When training, you must feed " "reference sentences") if sentences is not None: fd[self.train_tokens] = pad_batch( list(sentences), self.max_output_len, add_start_symbol=False, add_end_symbol=True) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: """Feed the placholders with the data. Arguments: dataset: The dataset. train: A flag whether the train mode is enabled. Returns: The constructed feed dictionary that contains the factor data and the mask. """ fd = ModelPart.feed_dict(self, dataset, train) # for checking the lengths of individual factors for factor_plc, name in zip(self.input_factors, self.data_ids): sentences = dataset.get_series(name) fd[factor_plc] = pad_batch( list(sentences), self.max_length, self.add_start_symbol, self.add_end_symbol) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: """Feed the placholders with the data. Arguments: dataset: The dataset. train: A flag whether the train mode is enabled. Returns: The constructed feed dictionary that contains the factor data and the mask. """ fd = ModelPart.feed_dict(self, dataset, train) # for checking the lengths of individual factors for factor_plc, name in zip(self.input_factors, self.data_ids): sentences = dataset.get_series(name) fd[factor_plc] = pad_batch(list(sentences), self.max_length, self.add_start_symbol, self.add_end_symbol) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: """Populate the feed dictionary for the decoder object. Arguments: dataset: The dataset to use for the decoder. train: Boolean flag, telling whether this is a training run. """ fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is None and train: raise ValueError("When training, you must feed " "reference sentences") if sentences is not None: fd[self.train_tokens] = pad_batch(list(sentences), self.max_output_len, add_start_symbol=False, add_end_symbol=True) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) series = list(dataset.get_series(self.data_id)) lengths = [] inputs = [] max_len = max(x.shape[0] for x in series) if self.max_input_len is not None: max_len = min(self.max_input_len, max_len) for x in series: length = min(max_len, x.shape[0]) x_padded = np.zeros(shape=(max_len, ) + x.shape[1:], dtype=x.dtype) x_padded[:length] = x[:length] lengths.append(length) inputs.append(x_padded) fd[self.temporal_states] = inputs fd[self._input_lengths] = lengths return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) series = list(dataset.get_series(self.data_id)) lengths = [] inputs = [] max_len = max(x.shape[0] for x in series) if self.max_input_len is not None: max_len = min(self.max_input_len, max_len) for x in series: length = min(max_len, x.shape[0]) x_padded = np.zeros(shape=(max_len,) + x.shape[1:], dtype=x.dtype) x_padded[:length] = x[:length] lengths.append(length) inputs.append(x_padded) fd[self.temporal_states] = inputs fd[self._input_lengths] = lengths return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) fd[self.spatial_input] = list(dataset.get_series(self.data_id)) return fd
def feed_dict(self, dataset: Dataset, train: bool = True) -> FeedDict: return ModelPart.feed_dict(self, dataset, train)
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) fd[self.vector] = dataset.get_series(self.data_id) return fd