def annotate_text(self, text):
        sentence = sentence_pb2.Sentence(
            text=text, token=[sentence_pb2.Token(word=text, start=-1, end=-1)])

        # preprocess
        with tf.Session(graph=tf.Graph()) as tmp_session:
            char_input = gen_parser_ops.char_token_generator(
                [sentence.SerializeToString()])
            preprocessed = tmp_session.run(char_input)[0]
        segmented, _ = self.segmenter_model(preprocessed)

        annotations, traces = self.parser_model(segmented[0])
        assert len(annotations) == 1
        assert len(traces) == 1
        return sentence_pb2.Sentence.FromString(annotations[0])
예제 #2
0
def syntaxnet_sentence(tokens):
    pb_tokens = []
    last_start = 0
    for token in tokens:
        token_bytes = token.encode("utf8")
        pb_tokens.append(sentence_pb2.Token(
            word=token_bytes, start=last_start, end=last_start + len(token_bytes) - 1)
        )
        last_start = last_start + len(token_bytes) + 1

    annotations, traces = parser_model(sentence_pb2.Sentence(
        text=u" ".join(tokens).encode("utf8"),
        token=pb_tokens
    ).SerializeToString())
    assert len(annotations) == 1
    assert len(traces) == 1
    return sentence_pb2.Sentence.FromString(annotations[0])
예제 #3
0
def syntaxnet_tokenize(text):
    sentence = sentence_pb2.Sentence(
        text=text,
        token=[sentence_pb2.Token(word=text, start=-1, end=-1)]
    )

    # preprocess
    with tf.Session(graph=tf.Graph()) as tmp_session:
        char_input = gen_parser_ops.char_token_generator([sentence.SerializeToString()])
        preprocessed = tmp_session.run(char_input)[0]
    segmented, _ = segmenter_model(preprocessed)
    tokens = []

    for t in sentence_pb2.Sentence.FromString(segmented[0]).token:
        tokens.append(t.word)

    return tokens
예제 #4
0
def annotate_text(text):
    """
    Segment and parse input text using syntaxnet models.
    """
    sentence = sentence_pb2.Sentence(
        text=text, token=[sentence_pb2.Token(word=text, start=-1, end=-1)])

    # preprocess
    with tf.Session(graph=tf.Graph()) as tmp_session:
        char_input = gen_parser_ops.char_token_generator(
            [sentence.SerializeToString()])
        preprocessed = tmp_session.run(char_input)[0]
    segmented, _ = SEGMENTER_MODEL(preprocessed)

    annotations, traces = PARSER_MODEL(segmented[0])
    assert len(annotations) == 1
    assert len(traces) == 1
    return sentence_pb2.Sentence.FromString(annotations[0]), traces[0]
예제 #5
0
def inference(sess, graph, builder, annotator, text, enable_tracing=False):
    tokens = [
        sentence_pb2.Token(word=word, start=-1, end=-1)
        for word in text.split()
    ]
    sentence = sentence_pb2.Sentence()
    sentence.token.extend(tokens)
    if enable_tracing:
        annotations, traces = sess.run(
            [annotator['annotations'], annotator['traces']],
            feed_dict={
                annotator['input_batch']: [sentence.SerializeToString()]
            })
        #HTML(visualization.trace_html(traces[0]))
    else:
        annotations = sess.run(annotator['annotations'],
                               feed_dict={
                                   annotator['input_batch']:
                                   [sentence.SerializeToString()]
                               })

    parsed_sentence = sentence_pb2.Sentence.FromString(annotations[0])
    #HTML(render_parse_tree_graphviz.parse_tree_graph(parsed_sentence))
    return parsed_sentence