Beispiel #1
0
 def __init__(self, text_or_tokens, max_sentence_length=399):
     if isinstance(text_or_tokens, Sentence):
         self.sentrep = text_or_tokens.sentrep
     elif isinstance(text_or_tokens, basestring):
         self.sentrep = parser.tokenize('<s> ' + text_or_tokens + ' </s>',
                                        max_sentence_length)
     else:
         self.sentrep = parser.SentRep(text_or_tokens)
 def load_parsing_model(self, model_dir, language='En',
                        case_insensitive=False, nbest=50, small_corpus=True,
                        overparsing=21, debug=0, smoothPos=0):
     assert not self._parser_model_loaded
     self._parser_model_loaded = True
     parser.loadModel(model_dir)
     parser.setOptions(language, case_insensitive, nbest, small_corpus,
                       overparsing, debug, smoothPos)
Beispiel #3
0
def test_parse():
    sr1 = parser.SentRep(['These', 'are', 'tokens', '.'])
    sr2 = test_tokenizer()

    for sr in (sr1, sr2):
        parses = parser.parse(sr, thread_slot)
        display_parses(parses)
        print '---'
Beispiel #4
0
 def load_parsing_model(self,
                        model_dir,
                        language='En',
                        case_insensitive=False,
                        nbest=50,
                        small_corpus=True,
                        overparsing=21,
                        debug=0,
                        smoothPos=0):
     assert not self._parser_model_loaded
     self._parser_model_loaded = True
     parser.loadModel(model_dir)
     parser.setOptions(language, case_insensitive, nbest, small_corpus,
                       overparsing, debug, smoothPos)
Beispiel #5
0
def test_tokenizer():
    sr = parser.tokenize("junk <s> It's some text to tokenize, if you feel like it -- or not. </s>", 399)
    print 'sr %r' % str(sr)
    print 'sr length', len(sr)
    for i in range(len(sr)):
        print 'sr word', i, sr.getWord(i).lexeme()
    return sr
Beispiel #6
0
def test_extpos():
    sr1 = parser.SentRep(['record'])

    print 'Unconstrained'
    display_parses(parser.parse(sr1))

    print 'NN'
    ext_pos1 = parser.ExtPos()
    ext_pos1.addTagConstraints(parser.StringVector(['NN']))

    display_parses(parser.parse(sr1, ext_pos1, None))

    print 'VB'
    ext_pos2 = parser.ExtPos()
    ext_pos2.addTagConstraints(parser.StringVector(['VB']))
    display_parses(parser.parse(sr1, ext_pos2, None))
Beispiel #7
0
def test_extpos():
    sr1 = parser.SentRep(["record"])

    print "Unconstrained"
    display_parses(parser.parse(sr1))

    print "NN"
    ext_pos1 = parser.ExtPos()
    ext_pos1.addTagConstraints(parser.StringVector(["NN"]))

    display_parses(parser.parse(sr1, ext_pos1))

    print "VB"
    ext_pos2 = parser.ExtPos()
    ext_pos2.addTagConstraints(parser.StringVector(["VB"]))
    display_parses(parser.parse(sr1, ext_pos2))
Beispiel #8
0
def test_parse():
    sr1 = parser.SentRep(["These", "are", "tokens", "."])
    sr2 = test_tokenizer()

    for sr in (sr1, sr2):
        parses = parser.parse(sr)
        display_parses(parses)
        print "---"
 def __init__(self, text_or_tokens, max_sentence_length=399):
     if isinstance(text_or_tokens, Sentence):
         self.sentrep = text_or_tokens.sentrep
     elif isinstance(text_or_tokens, basestring):
         self.sentrep = parser.tokenize('<s> ' + text_or_tokens + ' </s>',
                                        max_sentence_length)
     else:
         self.sentrep = parser.SentRep(text_or_tokens)
Beispiel #10
0
def test_parse():
    sr1 = parser.SentRep(['These', 'are', 'tokens', '.'])
    sr2 = test_tokenizer()

    for sr in (sr1, sr2):
        parses = parser.parse(sr)
        display_parses(parses)
        print '---'
Beispiel #11
0
def test_tokenizer():
    sr = parser.tokenize(
        "junk <s> It's some text to tokenize, if you feel like it -- or not. </s>",
        399)
    print 'sr %r' % str(sr)
    print 'sr length', len(sr)
    for i in range(len(sr)):
        print 'sr word', i, sr.getWord(i).lexeme()
    return sr
Beispiel #12
0
def test_threadslot():
    print 'parser.ThreadSlot contents:', dir(parser.ThreadSlot)
    print
    z = parser.ThreadSlot()
    print z
    print z.acquiredThreadSlot()
    print z.recycle()
    print z.acquiredThreadSlot()
    print z.acquire()
    print z.acquiredThreadSlot()
Beispiel #13
0
 def __str__(self):
     if self._reranked:
         from cStringIO import StringIO
         combined = StringIO()
         combined.write('%d dummy\n' % len(self.parses))
         for parse in self.parses:
             combined.write('%s %s\n%s\n' % \
                 (parse.reranker_score, parse.parser_score, parse.ptb_parse))
         combined.seek(0)
         return combined.read()
     else:
         return parser.asNBestList(self._parses)
Beispiel #14
0
 def __str__(self):
     if self._reranked:
         from cStringIO import StringIO
         combined = StringIO()
         combined .write('%d dummy\n' % len(self.parses))
         for parse in self.parses:
             combined.write('%s %s\n%s\n' % \
                 (parse.reranker_score, parse.parser_score, parse.ptb_parse))
         combined.seek(0)
         return combined.read()
     else:
         return parser.asNBestList(self._parses)
Beispiel #15
0
    def parse(self, sentence, rerank=True, max_sentence_length=399):
        """Parse some text or tokens and return an NBestList with the
        results.  sentence can be a string or a sequence.  If it is a
        string, it will be tokenized.  If rerank is True, we will rerank
        the n-best list."""
        assert self._parser_model_loaded

        sentence = Sentence(sentence, max_sentence_length)
        parses = parser.parse(sentence.sentrep, self._parser_thread_slot)
        nbest_list = NBestList(sentence, parses)
        if rerank:
            self.rerank(nbest_list)
        return nbest_list
Beispiel #16
0
    def parse_tagged(self, tokens, possible_tags, rerank=True):
        """Parse some pre-tagged, pre-tokenized text.  tokens is a
        sequence of strings.  possible_tags is map from token indices
        to possible POS tags.  Tokens without an entry in possible_tags
        will be unconstrained by POS.  If rerank is True, we will
        rerank the n-best list."""
        assert self._parser_model_loaded

        ext_pos = parser.ExtPos()
        for index in range(len(tokens)):
            tags = possible_tags.get(index, [])
            if isinstance(tags, basestring):
                tags = [tags]
            ext_pos.addTagConstraints(parser.VectorString(tags))

        sentence = Sentence(tokens)
        parses = parser.parse(sentence.sentrep, ext_pos,
                              self._parser_thread_slot)
        nbest_list = NBestList(sentence, parses)
        if rerank:
            self.rerank(nbest_list)
        return nbest_list
Beispiel #17
0
def test_extpos():
    sr1 = parser.SentRep(['record'])

    print 'Unconstrained'
    display_parses(parser.parse(sr1, thread_slot))

    print 'NN'
    ext_pos1 = parser.ExtPos()
    ext_pos1.addTagConstraints(parser.VectorString(['NN']))

    display_parses(parser.parse(sr1, ext_pos1, thread_slot))

    print 'VB'
    ext_pos2 = parser.ExtPos()
    ext_pos2.addTagConstraints(parser.VectorString(['VB']))
    display_parses(parser.parse(sr1, ext_pos2, thread_slot))
Beispiel #18
0
def test_multiword_extpos():
    sr1 = parser.SentRep("British left waffles on Falklands .".split())

    print "waffles = [anything]:"
    display_parses(parser.parse(sr1))

    if 1:
        print "waffles = VBZ/VBD/VB:"
        ext_pos = parser.ExtPos()
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector(["VBZ", "VBD", "VB"]))
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector([]))
        display_parses(parser.parse(sr1, ext_pos))

        print "waffles = NNS:"
        ext_pos = parser.ExtPos()
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector(["NNS"]))
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector([]))
        display_parses(parser.parse(sr1, ext_pos))

        print "waffles = NN/NNS:"
        ext_pos = parser.ExtPos()
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector(["NN", "NNS"]))
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector([]))
        display_parses(parser.parse(sr1, ext_pos))
Beispiel #19
0
def test_multiword_extpos():
    sr1 = parser.SentRep('British left waffles on Falklands .'.split())

    print 'waffles = [anything]:'
    display_parses(parser.parse(sr1))

    if 1:
        print 'waffles = VBZ/VBD/VB:'
        ext_pos = parser.ExtPos()
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector(['VBZ', 'VBD', 'VB']))
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector([]))
        display_parses(parser.parse(sr1, ext_pos, None))

        print 'waffles = NNS:'
        ext_pos = parser.ExtPos()
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector(['NNS']))
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector([]))
        display_parses(parser.parse(sr1, ext_pos, None))

        print 'waffles = NN/NNS:'
        ext_pos = parser.ExtPos()
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector(['NN', 'NNS']))
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector([]))
        ext_pos.addTagConstraints(parser.StringVector([]))
        display_parses(parser.parse(sr1, ext_pos, None))
Beispiel #20
0
    def parse(self, sentence, rerank=True, max_sentence_length=399):
        """Parse some text or tokens and return an NBestList with the
        results.  sentence can be a string or a sequence.  If it is a
        string, it will be tokenized.  If rerank is True, we will rerank
        the n-best list."""
        assert self._parser_model_loaded

        sentence = Sentence(sentence, max_sentence_length)
        try:
            parses = parser.parse(sentence.sentrep, self._parser_thread_slot)
        except RuntimeError:
            parses = []
        nbest_list = NBestList(sentence, parses)
        if rerank:
            nbest_list.rerank(self)
        return nbest_list
Beispiel #21
0
    def parse_tagged(self, tokens, possible_tags, rerank=True):
        """Parse some pre-tagged, pre-tokenized text.  tokens is a
        sequence of strings.  possible_tags is map from token indices
        to possible POS tags.  Tokens without an entry in possible_tags
        will be unconstrained by POS.  If rerank is True, we will
        rerank the n-best list."""
        assert self._parser_model_loaded

        ext_pos = parser.ExtPos()
        for index in range(len(tokens)):
            tags = possible_tags.get(index, [])
            if isinstance(tags, basestring):
                tags = [tags]
            ext_pos.addTagConstraints(parser.VectorString(tags))

        sentence = Sentence(tokens)
        parses = parser.parse(sentence.sentrep, ext_pos, self._parser_thread_slot)
        nbest_list = NBestList(sentence, parses)
        if rerank:
            nbest_list.rerank(self)
        return nbest_list
Beispiel #22
0
def test_multiword_extpos():
    sr1 = parser.SentRep('British left waffles on Falklands .'.split())

    print 'waffles = [anything]:'
    display_parses(parser.parse(sr1, thread_slot))

    if 1:
        print 'waffles = VBZ/VBD/VB:'
        ext_pos = parser.ExtPos()
        ext_pos.addTagConstraints(parser.VectorString([]))
        ext_pos.addTagConstraints(parser.VectorString([]))
        ext_pos.addTagConstraints(parser.VectorString(['VBZ', 'VBD', 'VB']))
        ext_pos.addTagConstraints(parser.VectorString([]))
        ext_pos.addTagConstraints(parser.VectorString([]))
        ext_pos.addTagConstraints(parser.VectorString([]))
        display_parses(parser.parse(sr1, ext_pos, thread_slot))

        print 'waffles = NNS:'
        ext_pos = parser.ExtPos()
        ext_pos.addTagConstraints(parser.VectorString([]))
        ext_pos.addTagConstraints(parser.VectorString([]))
        ext_pos.addTagConstraints(parser.VectorString(['NNS']))
        ext_pos.addTagConstraints(parser.VectorString([]))
        ext_pos.addTagConstraints(parser.VectorString([]))
        ext_pos.addTagConstraints(parser.VectorString([]))
        display_parses(parser.parse(sr1, ext_pos, thread_slot))

        print 'waffles = NN/NNS:'
        ext_pos = parser.ExtPos()
        ext_pos.addTagConstraints(parser.VectorString([]))
        ext_pos.addTagConstraints(parser.VectorString([]))
        ext_pos.addTagConstraints(parser.VectorString(['NN', 'NNS']))
        ext_pos.addTagConstraints(parser.VectorString([]))
        ext_pos.addTagConstraints(parser.VectorString([]))
        ext_pos.addTagConstraints(parser.VectorString([]))
        display_parses(parser.parse(sr1, ext_pos, thread_slot))
Beispiel #23
0
def test_as_nbest_list():
    sr1 = parser.SentRep(['These', 'are', 'tokens', '.'])
    parses = parser.parse(sr1)
    print parser.asNBestList(parses, 'test_as_nbest_list_sentence')
Beispiel #24
0
def test_as_nbest_list():
    sr1 = parser.SentRep(['These', 'are', 'tokens', '.'])
    parses = parser.parse(sr1)
    print parser.asNBestList(parses, 'test_as_nbest_list_sentence')
Beispiel #25
0
def initialize(n=10):
    # this assumes we're in PARSE/
    parser.loadModel("../DATA/EN")
    parser.setOptions('En', False, n, True, 21, 0, 0)
Beispiel #26
0
# a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
# License for the specific language governing permissions and limitations
# under the License.

import SWIGParser
import fileinput

if __name__ == "__main__":
    from test import initialize, display_parses
    thread_slot = SWIGParser.ThreadSlot()
    initialize(n=50)
    for line in fileinput.input():
        line = line.strip()

        print line
        tree = SWIGParser.inputTreeFromString('(S1 ' + line + ')')
        print tree
        sentence = tree.toSentRep()
        print sentence
        parses = SWIGParser.parse(sentence, thread_slot)
        print len(parses), 'parses'
        if not parses:
            raise 'failed'
        display_parses(parses)
        print 'example failure tree', sentence.makeFailureTree(
Beispiel #27
0
def test_as_nbest_list():
    sr1 = parser.SentRep(['These', 'are', 'tokens', '.'])
    parses = parser.parse(sr1, thread_slot)
    print parser.asNBestList(parses)
Beispiel #28
0
        ext_pos.addTagConstraints(parser.VectorString([]))
        ext_pos.addTagConstraints(parser.VectorString([]))
        ext_pos.addTagConstraints(parser.VectorString([]))
        display_parses(parser.parse(sr1, ext_pos, thread_slot))


def test_threadslot():
    print 'parser.ThreadSlot contents:', dir(parser.ThreadSlot)
    print
    z = parser.ThreadSlot()
    print z
    print z.acquiredThreadSlot()
    print z.recycle()
    print z.acquiredThreadSlot()
    print z.acquire()
    print z.acquiredThreadSlot()


if __name__ == "__main__":
    thread_slot = parser.ThreadSlot()
    dir_contents()
    if 1:
        initialize(n=5)
        test_as_nbest_list()
        for x in range(1000):  # memory leak detection
            print 'iteration', x
            test_tokenizer()
            test_parse()
            test_multiword_extpos()
            test_extpos()
Beispiel #29
0
def initialize(n=10):
    # this assumes we're in PARSE/
    parser.loadModel("../DATA/EN")
    parser.setOptions('En', False, n, True, 21, 0, 0)
Beispiel #30
0
 def __init__(self):
     self._parser_model_loaded = False
     self._reranker_model = None
     self._parser_thread_slot = parser.ThreadSlot()
Beispiel #31
0
def test_as_nbest_list():
    sr1 = parser.SentRep(["These", "are", "tokens", "."])
    parses = parser.parse(sr1)
    print parser.asNBestList(parses, "test_as_nbest_list_sentence")
Beispiel #32
0
def test_as_nbest_list():
    sr1 = parser.SentRep(['These', 'are', 'tokens', '.'])
    parses = parser.parse(sr1, thread_slot)
    print parser.asNBestList(parses)
Beispiel #33
0
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
# License for the specific language governing permissions and limitations
# under the License.

import SWIGParser
import fileinput

if __name__ == "__main__":
    from test import initialize, display_parses
    initialize(n=50)
    for line in fileinput.input():
        line = line.strip()

        print line
        tree = SWIGParser.inputTreeFromString('(S1 ' + line + ')')
        print tree
        sentence = tree.toSentRep()
        print sentence
        parses = SWIGParser.parse(sentence)
        print len(parses), 'parses'
        if not parses:
            raise 'failed'
        display_parses(parses)
        print 'example failure tree', sentence.makeFailureTree('Xyz')
        print
Beispiel #34
0
 def __str__(self):
     return parser.asNBestList(self._parses)