예제 #1
0
def chartParser():
    """
    线图句法分析
    """
    from nltk.grammar import CFG
    from nltk.parse.chart import ChartParser, BU_LC_STRATEGY

    # BNF格式文法 开始符号:S 终结符号:单词
    grammar = CFG.fromstring("""
    S -> T1 T4
    T1 -> NNP VBZ
    T2 -> DT NN
    T3 ->IN NNP
    T4 -> T3 | T2 T3
    NNP -> 'Tajmahal' | 'Agra' | 'Bangalore' | 'Karnataka'
    VBZ -> 'is'
    IN -> 'in' | 'of'
    DT -> 'the'
    NN -> 'capital'
    """)

    cp = ChartParser(grammar, BU_LC_STRATEGY, trace=True)
    # trace=True可以看见分析过程
    # strategy=BU_LC_STRATEGY是默认的,不写好像也行

    sentence = 'Bangalore is the capital of Karnataka'
    tokens = sentence.split()
    chart = cp.chart_parse(tokens)  # 对单词列表分析,并存到chart对象
    parses = list(chart.parses(grammar.start()))  # 将chart取到的所有分析树赋给parses
    print('Total Edges:', len(chart.edges()))  # 输出chart对象所有边的数量
    for tree in parses:
        print(tree)
        tree.draw()
예제 #2
0
    def __init__(self, **parser_args):
        """
        Constructor. Initializes a MbmaParser.

        Args:
            - parser_args: needs a keyword grammar which is of
                type :class:`ContextFreeGrammar`
        """
        ChartParser.__init__(self, [], BU_RHR_STRATEGY, **parser_args)
예제 #3
0
 def generate_context_free_grammar_novel_text(
     self, number_of_words_in_sentence=0, number_of_sentences_per_record=0, number_of_records=0
 ):
     """
     This method utilizes NLTK's Context Free Grammar parser objects to parse an available .*cfg file and generate
     novel text from it.
     
     @param number_of_words_in_sentence: An indicator as to the number of words to generate in each novel sentence.
     @type number_of_words_in_sentence: int
     @param number_of_sentences_per_record: An indicator as to the number of sentences per record to generate.
     @type number_of_sentences_per_record: int
     @param number_of_records: An indicator as to the total number of records to generate.
     @type number_of_records: int
     @return: str
     """
     words = []
     punct_selector = [". ", "! ", "? "]
     punctuation_stop_symbols = dict((ord(char), None) for char in string.punctuation)
     parser = None
     grammar = None
     try:
         if isinstance(self._corpus, CFG):
             _grammar = self._corpus
             if _grammar is not None:
                 parser = ChartParser(_grammar)
                 grammar = parser.grammar
         elif isinstance(self._corpus, FeatureGrammar):
             _grammar = self._corpus
             if _grammar is not None:
                 parser = FeatureChartParser(_grammar)
                 grammar = parser.grammar()
         elif isinstance(self._corpus, PCFG):
             _grammar = self._corpus
             if _grammar is not None:
                 parser = InsideChartParser(_grammar)
                 grammar = parser.grammar()
         else:
             grammar = CFG.fromstring(self._corpus)
         if grammar is not None:
             for _ in range(number_of_records):
                 novel_sentence = []
                 for _ in range(number_of_sentences_per_record):
                     sentence = " ".join(
                         [
                             sent
                             for _, sent in enumerate(generate_text(grammar, depth=2, n=number_of_words_in_sentence))
                         ]
                     )
                     sentence = sentence.translate(punctuation_stop_symbols) + random.choice(punct_selector)
                     sentence = sentence[0:].capitalize()
                     novel_sentence.append(sentence)
                 words.append("".join(novel_sentence))
     except Exception, error:
         self.logger.error(
             "TextGenerator.generate_context_free_grammar_novel_text: Error occurred - {0}".format(str(error))
         )
 def __init__(self, grammar,
              strategy=BU_LC_FEATURE_STRATEGY,
              trace_chart_width=20,
              chart_class=FeatureChart,
              **parser_args):
     ChartParser.__init__(self, grammar,
                          strategy=strategy,
                          trace_chart_width=trace_chart_width,
                          chart_class=chart_class,
                          **parser_args)
예제 #5
0
 def __init__(self, grammar,
              strategy=BU_LC_FEATURE_STRATEGY,
              trace_chart_width=20,
              chart_class=FeatureChart,
              **parser_args):
     ChartParser.__init__(self, grammar,
                          strategy=strategy,
                          trace_chart_width=trace_chart_width,
                          chart_class=chart_class,
                          **parser_args)
예제 #6
0
    def generate_context_free_grammar_novel_text(
            self, corpus, number_of_words_in_sentence,
            number_of_sentences_per_record, number_of_records):
        '''
        This method utilizes NLTK's Context Free Grammar parser objects to
        parse an available .*cfg file and generate novel text from it.

        Params:
        -------
        - number_of_words_in_sentence (int): An indicator as to the number of
        words to generate in each novel sentence.
        - number_of_sentences_per_record (int): An indicator as to the number
        of sentences per record to generate.
        - number_of_records (int): An indicator as to the total number of
        records to generate.

        Returns: str
        '''
        words = []
        punct_selector = ['. ', '! ', '? ']
        punctuation_stop_symbols = dict(
            (ord(char), None) for char in string.punctuation)
        parser = None
        grammar = None
        try:
            if isinstance(corpus, CFG):
                _grammar = corpus
                if _grammar is not None:
                    parser = ChartParser(_grammar)
                    grammar = parser.grammar
            elif isinstance(corpus, FeatureGrammar):
                _grammar = corpus
                if _grammar is not None:
                    parser = FeatureChartParser(_grammar)
                    grammar = parser.grammar()
            elif isinstance(corpus, PCFG):
                _grammar = corpus
                if _grammar is not None:
                    parser = InsideChartParser(_grammar)
                    grammar = parser.grammar()
            else:
                grammar = CFG.fromstring(corpus)
            if grammar is not None:
                for _ in range(number_of_records):
                    novel_sentence = []
                    for _ in range(number_of_sentences_per_record):
                        sentence = ' '.join([
                            sent for _, sent in enumerate(
                                generate_text(grammar,
                                              depth=2,
                                              n=number_of_words_in_sentence))
                        ])
                        sentence = sentence.translate(
                            punctuation_stop_symbols) + random.choice(
                                punct_selector)
                        sentence = sentence[0:].capitalize()
                        novel_sentence.append(sentence)
                    words.append(''.join(novel_sentence))
        except Exception, error:
            logging.error('TextGenerator: Error occurred - {0}'.format(
                str(error)))
예제 #7
0
from nltk.grammar import CFG
from nltk.parse.chart import ChartParser, BU_LC_STRATEGY

grammar = CFG.fromstring("""
S -> T1 T4
T1 -> NNP VBZ
T2 -> DT NN
T3 -> IN NNP
T4 -> T3 | T2 T3
NNP -> 'Tajmahal' | 'Agra' | 'Bangalore' | 'Karnataka'
VBZ -> 'is'
IN -> 'in' | 'of'
DT -> 'the'
NN -> 'capital'
""")

cp = ChartParser(grammar, BU_LC_STRATEGY, trace=True)

sentence = "Bangalore is the capital of Karnataka"
tokens = sentence.split()
chart = cp.chart_parse(tokens)
parses = list(chart.parses(grammar.start()))
print("Total Edges :", len(chart.edges()))
for tree in parses:
    print(tree)
tree.draw()