Beispiel #1
0
    def generate_data(self):
        """Generate data for offline training."""
        text, label = load_seq_label_raw_data(
            paths=self.paths,
            mode=self.mode,
            infer_no_label=self.infer_no_label)

        text_placeholder = tf.placeholder(tf.string, name="text")
        label_placeholder = tf.placeholder(tf.string, name="label")
        self.init_feed_dict[text_placeholder] = text
        self.init_feed_dict[label_placeholder] = label

        text_ds = self.load_text_dataset(text_placeholder)

        if self.infer_without_label:
            data_set = text_ds
        else:
            label_ds = load_multi_label_dataset(label_placeholder, self.config)
            data_set = tf.data.Dataset.zip((text_ds, label_ds))

        self.config['data']['vocab_size'] = get_vocab_size(
            self.text_vocab_file_path)
        self.config['data']['{}_data_size'.format(self.mode)] = len(text)

        return data_set
Beispiel #2
0
    def generate_data(self):
        """Generate data for offline training."""
        text, label = load_seq_label_raw_data(
            paths=self.paths,
            mode=self.mode,
            infer_no_label=self.infer_no_label)
        text_ds = self.load_text_dataset(text)

        if self.infer_without_label:
            data_set = text_ds
        else:
            label_ds = load_multi_label_dataset(label, self.config)
            data_set = tf.data.Dataset.zip((text_ds, label_ds))

        self.config['data']['vocab_size'] = get_vocab_size(
            self.text_vocab_file_path)
        self.config['data']['{}_data_size'.format(self.mode)] = len(text)

        return data_set
Beispiel #3
0
 def test_load_seq_label_raw_data(self):
   mode = utils.TRAIN
   paths = self.config["data"]["train"]["paths"]
   text, label = load_seq_label_raw_data(paths, mode)
   self.assertEqual(text[0], "I feel good .")
   self.assertEqual(label[0], "O O O O")