예제 #1
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        """Populate the feed dictionary for the decoder object.

        Arguments:
            dataset: The dataset to use for the decoder.
            train: Boolean flag, telling whether this is a training run.
        """
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.maybe_get_series(self.data_id)

        if sentences is None and train:
            raise ValueError("When training, you must feed "
                             "reference sentences")

        go_symbol_idx = self.vocabulary.get_word_index(START_TOKEN)
        fd[self.go_symbols] = np.full([len(dataset)], go_symbol_idx,
                                      dtype=np.int32)

        if sentences is not None:
            sentences_list = list(sentences)
            # train_mode=False, since we don't want to <unk>ize target words!
            inputs, weights = self.vocabulary.sentences_to_tensor(
                sentences_list, self.max_output_len, train_mode=False,
                add_start_symbol=False, add_end_symbol=True,
                pad_to_max_len=False)

            fd[self.train_inputs] = inputs
            fd[self.train_mask] = weights

        return fd
예제 #2
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = cast(Iterable[List[str]],
                         dataset.maybe_get_series(self.data_id))

        if sentences is None and train:
            raise ValueError("When training, you must feed "
                             "reference sentences")

        if sentences is not None:
            vectors, paddings = self.vocabulary.sentences_to_tensor(
                list(sentences), train_mode=train, max_len=self.max_length)

            # sentences_to_tensor returns time-major tensors, targets need to
            # be batch-major
            vectors = vectors.T
            paddings = paddings.T

            bool_mask = (paddings > 0.5)
            flat_labels = vectors[bool_mask]
            label_lengths = bool_mask.sum(axis=1)

            fd[self.label_lengths] = label_lengths
            fd[self.flat_labels] = flat_labels

        return fd
예제 #3
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = cast(Iterable[List[str]],
                         dataset.maybe_get_series(self.data_id))

        if sentences is None and train:
            raise ValueError("When training, you must feed "
                             "reference sentences")

        if sentences is not None:
            vectors, paddings = self.vocabulary.sentences_to_tensor(
                list(sentences), train_mode=train, max_len=self.max_length)

            # sentences_to_tensor returns time-major tensors, targets need to
            # be batch-major
            vectors = vectors.T
            paddings = paddings.T

            # Need to convert the data to a sparse representation
            bool_mask = (paddings > 0.5)
            indices = np.stack(np.where(bool_mask), axis=1)
            values = vectors[bool_mask]

            fd[self.train_targets] = tf.SparseTensorValue(
                indices=indices, values=values, dense_shape=vectors.shape)

        return fd
예제 #4
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = {}  # type: FeedDict

        sentences = dataset.maybe_get_series(self.data_id)

        fd[self.train_mode] = train

        if sentences is not None:
            vectors, paddings = self.vocabulary.sentences_to_tensor(
                list(sentences), train_mode=train, max_len=self.max_length)

            # sentences_to_tensor returns time-major tensors, targets need to
            # be batch-major
            vectors = vectors.T
            paddings = paddings.T

            # Need to convert the data to a sparse representation
            bool_mask = (paddings > 0.5)
            indices = np.stack(np.where(bool_mask), axis=1)
            values = vectors[bool_mask]

            fd[self.train_targets] = tf.SparseTensorValue(
                indices=indices, values=values,
                dense_shape=vectors.shape)

        return fd
예제 #5
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.maybe_get_series(self.data_id)
        if sentences is not None:
            fd[self.target_tokens] = pad_batch(list(sentences))

        return fd
예제 #6
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.maybe_get_series(self.data_id)
        sentences_list = list(sentences) if sentences is not None else None
        if sentences_list is not None:
            fd[self.train_inputs] = list(zip(*sentences_list))[0]

        return fd
예제 #7
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.maybe_get_series(self.data_id)
        sentences_list = list(sentences) if sentences is not None else None
        if sentences_list is not None:
            fd[self.train_inputs] = list(zip(*sentences_list))[0]

        return fd
예제 #8
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)
        sentences = dataset.maybe_get_series(self.data_id)

        if sentences is not None:
            labels = [l[0] for l in pad_batch(list(sentences),
                                              self.max_output_len)]
            fd[self.targets] = labels

        return fd
예제 #9
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.maybe_get_series(self.data_id)
        if sentences is not None:
            vectors, _ = self.vocabulary.sentences_to_tensor(
                list(sentences), pad_to_max_len=False, train_mode=train)

            fd[self.train_targets] = vectors.T
        return fd
예제 #10
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)
        sentences = dataset.maybe_get_series(self.data_id)

        if sentences is not None:
            label_tensors, _ = self.vocabulary.sentences_to_tensor(
                list(sentences), self.max_output_len)
            fd[self.gt_inputs[0]] = label_tensors[0]

        return fd
예제 #11
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.maybe_get_series(self.data_id)
        if sentences is not None:
            fd[self.target_tokens] = pad_batch(
                list(sentences), self.max_output_len, self.add_start_symbol,
                self.add_end_symbol)

        return fd
예제 #12
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)
        sentences = dataset.maybe_get_series(self.data_id)

        if sentences is not None:
            labels = [
                l[0] for l in pad_batch(list(sentences), self.max_output_len)
            ]
            fd[self.targets] = labels

        return fd
예제 #13
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.maybe_get_series(self.data_id)
        if sentences is not None:
            fd[self.target_tokens] = pad_batch(list(sentences),
                                               self.max_output_len,
                                               self.add_start_symbol,
                                               self.add_end_symbol)

        return fd
예제 #14
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        sentences = dataset.maybe_get_series(self.data_id)

        sentences_list = list(sentences) if sentences is not None else None

        fd = {}  # type: FeedDict
        if sentences_list is not None:
            fd[self.train_inputs] = list(zip(*sentences_list))[0]

        fd[self.train_mode] = train

        return fd
예제 #15
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = {}  # type: FeedDict
        fd[self.train_mode] = train

        sentences = dataset.maybe_get_series(self.data_id)
        if sentences is not None:
            vectors, paddings = self.vocabulary.sentences_to_tensor(
                list(sentences), pad_to_max_len=False, train_mode=train)

            fd[self.train_targets] = vectors.T
            fd[self.train_weights] = paddings.T

        return fd
예제 #16
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.maybe_get_series(self.data_id)

        if sentences is None and train:
            raise ValueError("You must feed reference sentences when training")

        if sentences is not None:
            fd[self.target_tokens] = pad_batch(list(sentences),
                                               self.max_length)

        return fd
예제 #17
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.maybe_get_series(self.data_id)

        if sentences is None and train:
            raise ValueError("You must feed reference sentences when training")

        if sentences is not None:
            fd[self.target_tokens] = pad_batch(list(sentences),
                                               self.max_length)

        return fd
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        alignment = dataset.maybe_get_series(self.data_id)
        if alignment is None:
            if train:
                warn("Training alignment not present!")

            alignment = np.zeros((len(dataset), self.decoder.max_output_len,
                                  self.enc_input.max_length), np.float32)

        fd[self.ref_alignment] = alignment

        return fd
예제 #19
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = {}  # type: FeedDict
        sentences = dataset.maybe_get_series(self.data_id)

        if sentences is not None:
            label_tensors, _ = self.vocabulary.sentences_to_tensor(
                list(sentences), self.max_output_len)

            # pylint: disable=unsubscriptable-object
            fd[self.gt_inputs[0]] = label_tensors[0]
            # pylint: enable=unsubscriptable-object

        fd[self.train_mode] = train

        return fd
예제 #20
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        """Populate the feed dictionary for the decoder object.

        Arguments:
            dataset: The dataset to use for the decoder.
            train: Boolean flag, telling whether this is a training run.
        """
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.maybe_get_series(self.data_id)

        if sentences is None and train:
            raise ValueError("When training, you must feed "
                             "reference sentences")

        if sentences is not None:
            fd[self.train_tokens] = pad_batch(
                list(sentences), self.max_output_len, add_start_symbol=False,
                add_end_symbol=True)

        return fd
예제 #21
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        """Populate the feed dictionary for the decoder object.

        Arguments:
            dataset: The dataset to use for the decoder.
            train: Boolean flag, telling whether this is a training run.
        """
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.maybe_get_series(self.data_id)

        if sentences is None and train:
            raise ValueError("When training, you must feed "
                             "reference sentences")

        if sentences is not None:
            fd[self.train_tokens] = pad_batch(list(sentences),
                                              self.max_output_len,
                                              add_start_symbol=False,
                                              add_end_symbol=True)

        return fd