Ejemplo n.º 1
0
def main():
    raw_config = read_configure()
    model = Model(raw_config)

    config = model.get_default_config()
    config.update(raw_config)

    corpus = get_corpus_processor(config)
    corpus.prepare()
    train_data_generator_func = corpus.get_generator_func(corpus.TRAIN)
    eval_data_generator_func = corpus.get_generator_func(corpus.EVAL)

    corpus_meta_data = corpus.get_meta_info()

    config["tags_data"] = generate_tagset(corpus_meta_data["tags"])

    # train and evaluate model
    train_input_func = build_input_func(train_data_generator_func, config)
    eval_input_func = (build_input_func(eval_data_generator_func, config)
                       if eval_data_generator_func else None)

    evaluate_result, export_results, final_saved_model = model.train_and_eval_then_save(
        train_input_func, eval_input_func, config)

    export_as_deliverable_model(create_dir_if_needed(
        config["deliverable_model_dir"]),
                                tensorflow_saved_model=final_saved_model,
                                converter_for_request=converter_for_request,
                                converter_for_response=converter_for_response,
                                addition_model_dependency=["micro_toolkit"])
Ejemplo n.º 2
0
    def _keras_train(self, training_data: TrainingData,
                     cfg: RasaNLUModelConfig, **kwargs: Any) -> None:
        from tensorflow.python.keras.layers import Input, Masking
        from tensorflow.python.keras.models import Sequential
        from tf_crf_layer.layer import CRF
        from tf_crf_layer.loss import crf_loss
        from tf_crf_layer.metrics import crf_accuracy
        from seq2annotation.input import generate_tagset
        from seq2annotation.input import build_input_func
        from seq2annotation.input import Lookuper

        config = self.component_config

        if 'result_dir' not in config:
            config['result_dir'] = tempfile.mkdtemp()

        # read data according configure
        train_data_generator_func = kwargs.get('addons_tf_input_fn')
        corpus_meta_data = kwargs.get('addons_tf_input_meta')

        config['tags_data'] = generate_tagset(corpus_meta_data['tags'])

        # train and evaluate model
        train_input_func = build_input_func(train_data_generator_func, config)

        tag_lookuper = Lookuper(
            {v: i
             for i, v in enumerate(config['tags_data'])})

        maxlen = 25

        offset_data = train_input_func()
        train_x, train_y = self._keras_data_preprocss(offset_data,
                                                      tag_lookuper, maxlen)

        EPOCHS = 1

        tag_size = tag_lookuper.size()

        model = Sequential()
        model.add(Input(shape=(25, 768)))
        model.add(Masking())
        model.add(CRF(tag_size))
        model.compile('adam', loss=crf_loss)
        model.summary()

        model.compile('adam', loss=crf_loss, metrics=[crf_accuracy])
        model.fit(train_x, train_y, epochs=EPOCHS)
Ejemplo n.º 3
0
def test_build_input_func():
    def data_generator_func():
        seq = Sequence("王小明在北京的清华大学读书。")
        seq.span_set.append(Span(0, 3, 'PERSON'))
        seq.span_set.append(Span(4, 6, 'GPE'))
        seq.span_set.append(Span(7, 11, 'ORG'))

        return [seq]

    output_func = build_input_func(data_generator_func, {"shuffle_pool_size": 1, "epochs": 2, "batch_size": 1})
    output = output_func()

    with tf.Session() as sess:
        word_info, encoding = sess.run(output)
        assert word_info["words"].shape == (1, 14)
        assert word_info["words_len"].shape == (1,)
        assert encoding.shape == (1, 14)
    def train(
        self, training_data: TrainingData, cfg: RasaNLUModelConfig, **kwargs: Any
    ) -> None:

        from seq2annotation.input import generate_tagset
        from seq2annotation.input import build_input_func
        from seq2annotation.model import Model

        raw_config = self.component_config

        print(raw_config)

        if 'result_dir' not in raw_config:
            raw_config['result_dir'] = tempfile.mkdtemp()

        model = Model(raw_config)

        config = model.get_default_config()
        config.update(raw_config)

        # task_status = TaskStatus(config)

        # read data according configure
        train_data_generator_func = kwargs.get('addons_tf_input_fn')
        corpus_meta_data = kwargs.get('addons_tf_input_meta')

        config['tags_data'] = generate_tagset(corpus_meta_data['tags'])

        # build model according configure

        # send START status to monitor system
        # task_status.send_status(task_status.START)

        # train and evaluate model
        train_input_func = build_input_func(train_data_generator_func, config)

        evaluate_result, export_results, final_saved_model = model.train_and_eval_then_save(
            train_input_func,
            None,
            config
        )

        # task_status.send_status(task_status.DONE)

        self.result_dir = final_saved_model
Ejemplo n.º 5
0
task_status = TaskStatus(config)

# read data according configure
corpus = Corpus(config)
corpus.prepare()
train_data_generator_func = corpus.get_generator_func(corpus.TRAIN)
eval_data_generator_func = corpus.get_generator_func(corpus.EVAL)

# build model according configure


# send START status to monitor system
task_status.send_status(task_status.START)

# train and evaluate model
train_input_func = build_input_func(train_data_generator_func, config)
eval_input_func = build_input_func(eval_data_generator_func, config)

# ***** test ******
# train_iterator = train_input_func()
# import tensorflow as tf
# import sys

# data_generator = generator_func(train_data_generator_func)
# for i, data in enumerate(data_generator):
#     print(i, data)
#
# sys.exit(0)

# with tf.Session() as sess:
#     sess.run(tf.tables_initializer())