예제 #1
0
def _tf_extractive_qa_reader(model_module_constructor,
                             resources_or_conf: Union[dict, SharedResources]):
    from jack.readers.extractive_qa.shared import XQAInputModule, XQAOutputModule
    shared_resources = create_shared_resources(resources_or_conf)
    input_module = XQAInputModule(shared_resources)
    model_module = model_module_constructor(shared_resources)
    output_module = XQAOutputModule()
    return TFReader(shared_resources, input_module, model_module,
                    output_module)
예제 #2
0
def fastqa_reader(resources_or_conf: Union[dict, SharedResources] = None):
    """Creates a FastQA reader instance (extractive qa model)."""
    from jack.readers.extractive_qa.fastqa import FastQAModule
    from jack.readers.extractive_qa.shared import XQAInputModule, XQAOutputModule
    shared_resources = create_shared_resources(resources_or_conf)

    input_module = XQAInputModule(shared_resources)
    model_module = FastQAModule(shared_resources)
    output_module = XQAOutputModule(shared_resources)
    return TFReader(shared_resources, input_module, model_module,
                    output_module)
예제 #3
0
def bidaf_reader(resources_or_conf: Union[dict, SharedResources] = None):
    """Creates a FastQA model as described in https://arxiv.org/abs/1703.04816 (extractive qa model)."""
    from jack.readers.extractive_qa.shared import XQAInputModule, XQAOutputModule
    from jack.readers.extractive_qa.bidaf import BiDAF
    shared_resources = create_shared_resources(resources_or_conf)

    input_module = XQAInputModule(shared_resources)
    model_module = BiDAF(shared_resources)
    output_module = XQAOutputModule(shared_resources)
    return TFReader(shared_resources, input_module, model_module,
                    output_module)
예제 #4
0
def test_fastqa():
    tf.reset_default_graph()

    data = load_jack('tests/test_data/squad/snippet_jtr.json')

    # fast qa must be initialized with existing embeddings, so we create some
    embeddings = load_embeddings('./tests/test_data/glove.840B.300d_top256.txt', 'glove')

    # we need a vocabulary (with embeddings for our fastqa_reader, but this is not always necessary)
    vocab = Vocab(emb=embeddings, init_from_embeddings=True)

    # ... and a config
    config = {
        "batch_size": 1,
        "repr_dim": 10,
        "repr_dim_input": embeddings.lookup.shape[1],
        "with_char_embeddings": True
    }

    # create/setup reader
    shared_resources = SharedResources(vocab, config)

    input_module = XQAInputModule(shared_resources)
    model_module = FastQAModule(shared_resources)
    output_module = XQAOutputModule()

    reader = TFReader(shared_resources, input_module, model_module, output_module)
    reader.setup_from_data(data, is_training=True)

    loss = reader.model_module.tensors[Ports.loss]
    optimizer = tf.train.AdagradOptimizer(learning_rate=0.01)
    min_op = optimizer.minimize(loss)

    session = model_module.tf_session
    session.run(tf.global_variables_initializer())

    for epoch in range(0, 10):
        for batch in reader.input_module.batch_generator(data, 1, False):
            feed_dict = reader.model_module.convert_to_feed_dict(batch)
            loss_value, _ = session.run((loss, min_op), feed_dict=feed_dict)
            print(loss_value)