Exemple #1
0
def execute(text: str):
    groucho_grammer = CFG.fromstring("""
    S -> NP VP
    PP -> P NP
    NP -> Det N | Det N PP | 'I'
    VP -> V NP | VP PP
    Det -> 'an' | 'my'
    N -> 'elephant' | 'pajamas'
    V -> 'shot'
    P -> 'in'
    """)
    parser = ChartParser(groucho_grammer)

    tokens = word_tokenize(text=SAMPLE_3)
    print(type(tokens))
    print(tokens)
    for tree in parser.parse(tokens=[
            'The',
            'little',
            'bear',
            'saw',
            'the',
            'fine',
            'fat',
            'trout',
            'in',
            'the',
            'brook',
    ]):
        print(tree)
 def generate_phrase(self, pool):
     try:
         adj = choice(list(pool.adjectives))
         parser = ChartParser(self.grammar)
         gr = parser.grammar()
         phrase = self.produce(gr, gr.start())
         #adj = choice(list(pool.adjectives))
         noun = choice(list(pool.comparisons[adj]))
         if en.noun.plural(noun.name) == noun.name:
             article = "the"
         else:
             article = en.noun.article(noun.name).split(" ")[0]
         replace_words = {'adj': adj, 'n': noun, 'det': article}
         for pos in replace_words:
             while pos in phrase:
                 try:
                     phrase = self.replace_pos(pos, replace_words[pos],
                                               phrase)
                 except:
                     return
         for w in phrase:
             if not isinstance(w, Word):
                 phrase[phrase.index(w)] = Word(w)
         return phrase
     except:
         return
Exemple #3
0
def context_free_grammar():
    cfg = CFG.fromstring("""\
    ################# Rules #################
    S -> NP VP
    S -> PP NP VP
    S -> Wh Aux NP VP 
    NP -> ProperNoun | CC ProperNoun | N | ProperNoun NP | AP N | DET NP | N PP    
    VP -> V | V NP | Adv VP | V NP VP
    AP -> Adj | Adj AP
    PP -> P NP | P NP VP
    
    ################# Lexicons ################# 
    N -> 'milk'| 'shoes' | 'salad' | 'kitchen' | 'midnight' | 'table'
    V -> 'laughs' | 'laughed' | 'drink' | 'wears' | 'serves' | 'drinks' | 'thinks' | 'wear'
    ProperNoun -> 'Bart' | 'Homer' | 'Lisa'
    Aux -> 'do' | 'does'
    CC -> 'and'
    Adj -> 'blue' | 'healthy' | 'green' 
    DET -> 'a' | 'the' 
    Adv -> 'always' | 'never' 
    P -> 'in' | 'before' | 'on' | 'when'
    Wh -> 'when'
    """)
    cfparser = ChartParser(cfg)
    sents = text.splitlines()
    for sent in sents:
        parses = cfparser.parse(sent.split())
        print(sent)
        for tree in parses:
            print(tree)
Exemple #4
0
def parse_original_sentences(grammar):
    '''
    Uses given grammar to parse sentences from the file corpus.txt
    Writes the parse trees of each sentence in parsed_corpus.txt
    :param grammar: A context free grammar in the form of nltk.grammar.CFG
    :return: None (Output in parsed_corpus.txt)
    '''
    parser = ChartParser(grammar)
    f = open("corpus.txt", "r")
    f_write = open("parsed_corpus.txt", "w")
    lines = f.readlines()
    count = 1
    working = []
    for line in lines:
        line = line.replace("didnt", "did not")
        s = "Tree {}:\n".format(count)
        sent = word_tokenize(line[:-2])
        for tree in parser.parse(sent):
            s += str(tree) + "\n\n"
            working.append(count)
            break
        count += 1
        f_write.write(s)

    f.close()
    f_write.close()
    print(
        "Parsed form of original corpus sentences using this CFG can be found in parsed_corpus.txt\n"
    )
 def generate_phrase(self, pool):
     parser = ChartParser(self.grammar)
     gr = parser.grammar()
     phrase = self.produce(gr, gr.start())
     noun = choice(list(pool.nouns))
     try:
         replace_words = {
             'n': [noun],
             'v':
             [Word(self.conjugate(v.name)) for v in list(pool.verbs[noun])],
             'adj': pool.epithets[noun],
             'atv':
             [Word(self.conjugate(v, self.person)) for v in self.atv],
             'eva':
             [Word(self.conjugate(v, self.person)) for v in self.eva],
             'ej': pool.emotional_adjectives,
             'en': pool.emotional_nouns,
             'erb': pool.emotional_adverbs,
             'person': [Word(self.persons[self.person][0])],
             'pron': [Word(self.persons[self.person][1])]
         }
     except:
         return
     for pos in replace_words:
         while pos in phrase:
             try:
                 word = choice(replace_words[pos])
                 phrase = self.replace_pos(pos, word, phrase)
             except:
                 return
     for w in phrase:
         if not isinstance(w, Word):
             phrase[phrase.index(w)] = Word(w)
     return phrase
Exemple #6
0
def recognizes(cfg, word):
    """
    cfg : a nltk.grammar.CFG instance
    word : a string with tokens separated with spaces.

    A parser is created at every call of this function.
    """
    return _recognizes(ChartParser(cfg), word.split())
Exemple #7
0
    def __init__(self, grammar):
        """
        Initialize from a CFG.

        :type grammar: CFG
        :param grammar: The grammar for this oracle
        """
        self._parser = ChartParser(grammar)
Exemple #8
0
def parse_sentences(grammar):
    parser = ChartParser(grammar)
    sent = input("Parse a sentence (Q to quit): ")
    while sent != "Q":
        tokens = word_tokenize(sent)
        trees = parser.parse(tokens)
        print_trees(trees)
        sent = input("Parse a sentence (Q to quit): ")
Exemple #9
0
def generate_name(G):
    grammar = CFG.fromstring(G)

    parser = ChartParser(grammar)

    gr = parser.grammar()
    tokens = produce(gr, gr.start())
    name = ''.join(tokens)
    return name.title()
Exemple #10
0
def accepted_length(cfg, x):
    """
    Returns a list of every accepted word of a context-free grammar with a specific length
    """
    terminals = _get_terminal_symbols(cfg)
    parser = ChartParser(cfg)
    accepted = []
    for y in product(terminals, repeat=x):
        if _recognizes(parser, y):
            accepted.append(' '.join(y))
    return accepted
def generate_impacts_question(attr, impacts, phase):
    impact = get_attribute_name(attr, impacts)
    parser = ChartParser(generate_impacts_grammar(impact, phase))
    gr = parser.grammar()
    question = {
        'text': ' '.join(produce(gr, gr.start())),
        'answer': 0,
        'questionId': 0,
        'attrId': attr,
        'topicId': 4
    }
    return question
def generate_entities_question(attr, entities, phase):
    entity = get_attribute_name(attr, entities)
    parser = ChartParser(generate_entities_grammar(entity, phase))
    gr = parser.grammar()
    question = {
        'text': ' '.join(produce(gr, gr.start())),
        'answer': 0,
        'questionId': 0,
        'attrId': attr,
        'topicId': 3
    }
    return question
Exemple #13
0
def recognizesAll(cfg, words):
    """
    Returns a list of boolean values corresponding to [recognizes(cfg,w) for w in words].
    cfg : a nltk.grammar.CFG instance
    words must be a list of string with tokens separated with spaces.

    """
    r = []
    parser = ChartParser(cfg)
    for word in words:
        r.append(_recognizes(parser, word.split()))
    return r
def get_productions(sentence, grammar):
    trees = []
    sent = sentence.split(' ')
    print sent
    cfgGrammar = CFG.fromstring(grammar)

    parser = ChartParser(cfgGrammar)
    for tree in parser.parse(sent):
        trees.append(str(tree).replace("\n", " "))

    # print trees[0]
    t = Tree.fromstring(trees[0])
    return t.productions()
Exemple #15
0
def accepted_under(cfg, length):
    """
    Returns a list of every accepted word of a context-free grammar under a given length.
    cfg : a nltk.grammar.CFG instance. 
    """
    terminals = _get_terminal_symbols(cfg)

    parser = ChartParser(cfg)
    accepted = []
    for x in range(1, length):
        for y in product(terminals, repeat=x):
            if _recognizes(parser, y):
                accepted.append(' '.join(y))
    return accepted
def generate_sources_question(attr, parent_attr, sources, phase):
    id = attr
    attribute = get_attribute_name(attr, sources)
    attribute = analyze_numerus(attribute)
    if parent_attr is not None:
        parent_attr = get_attribute_name(parent_attr, sources)
    parser = ChartParser(
        generate_sources_grammar(attribute, parent_attr, phase))
    gr = parser.grammar()
    question = {
        'text': ' '.join(produce(gr, gr.start())),
        'answer': 0,
        'questionId': 0,
        'attrId': id,
        'topicId': 1
    }
    return question
def main():
    cfparser = ChartParser(cfg)
    index = 0
    for sent in text:
        index += 1
        print_tree(sent, cfparser, index)
    print "Input testing sentece or the number of the above one: (q to quit)"
    str = sys.stdin.readline().strip()
    while str != "q":
        try:
            index = int(str)
            print_tree(text[index], cfparser, index)
        except IndexError:
            print "Index out of range. Please check."
        except ValueError:
            print_tree(str, cfparser, -1)
        print "Input testing sentece or the number of the above one: (q to quit)"
        str = sys.stdin.readline().strip()
Exemple #18
0
def parse_blazon(blazon):
    blazon = blazon.lower()
    to_discard = set(string.punctuation)
    to_discard.remove("&")
    blazon = ''.join(c for c in blazon if c not in to_discard)
    # Convert raw data to tokens to be parsed
    tokens = word_tokenize(blazon)

    # Replace instances of '1st', '2nd', etc with their non abbreviated forms
    for (index, item) in enumerate(tokens):
        if (item in abbr_to_full):
            tokens[index] = abbr_to_full[item]
        elif (item == "&"):
            tokens[index] = "and"

    # Sanitise tokens
    tokens = disambiguate_colours(tokens)
    tokens = reorder(tokens)

    # Construct grammar and parser
    with open('app/parser_cfg.txt') as f:
        raw_cfg = f.read()

    parser_grammar = CFG.fromstring(raw_cfg)
    parser = ChartParser(parser_grammar)

    # Parse data into tree
    output_data = None
    for tree in parser.parse(tokens):
        output_data = tree

    if (output_data is None):
        print("Error: Parse failed, please check input is of correct format.")
    else:
        # Convert Tree to dict to prepare it for JSON serialisation
        output_data = tree_to_dict(output_data)
        # If a tincture is in the top level of the dictionary, change its name to "field"
        if ("tincture" in output_data.keys()):
            output_data["field"] = output_data["tincture"]
            output_data.pop("tincture")
        # Convert dict to JSON
        return (output_data)
Exemple #19
0
    def verify(self, grammar, tags):
        """ Verify tag sequence as grammatically correct or not """
        # rd_parser = RecursiveDescentParser(grammar)
        rd_parser = ChartParser(grammar)
        valid = False

        try:
            for tree in rd_parser.parse(tags):
                valid = True
                break
        except ValueError:
            print "This is a grammatical structure I don't understand yet."
            return

        if valid:
            print "Valid"
            return True
        else:
            print "Invalid"
            return False
Exemple #20
0
    def generate_phrase(self):
        adj = choice([a for a in self.blackboard.pool.comparisons if len(self.blackboard.pool.comparisons[a]) > 0])
        parser = ChartParser(self.grammar)
        gr = parser.grammar()
        phrase = self.produce(gr, gr.start())
        noun = choice(list(self.blackboard.pool.comparisons[adj]))
        noun.name = en.singularize(noun.name)
        article = en.referenced(noun.name).split(" ")[0]
        replace_words = {'adj': adj, 'n': noun, 'det': article}

        for pos in replace_words:
            while pos in phrase:
                try:
                    phrase = self.replace_pos(
                        pos, replace_words[pos], phrase)
                except:
                    return
        for w in phrase:
            if not isinstance(w, Word):
                phrase[phrase.index(w)] = Word(w)
        return phrase
Exemple #21
0
    def generate_phrase(self, pool):
        noun = random.choice(list(pool.nouns))
        parser = ChartParser(self.grammar)
        gr = parser.grammar()
        phrase = self.produce(gr, gr.start())
        phrase.append("?")

        try:
            adj = choice(pool.epithets[noun])
        except:
            return
        replace_words = {'adj': adj, 'n': noun, 'be': self.conjugate("be")}
        for pos in replace_words:
            while pos in phrase:
                try:
                    phrase = self.replace_pos(pos, replace_words[pos], phrase)
                except:
                    return
        for w in phrase:
            if not isinstance(w, Word):
                phrase[phrase.index(w)] = Word(w)
        return phrase
Exemple #22
0
def parse_sentences(grammar, sent):
	parser = ChartParser(grammar)
	tokens = word_tokenize(sent)
	trees = parser.parse(tokens)
	return trees
Exemple #23
0
Nominal    ->    NOUN | Nominal PP | ADJ Nominal | Nominal NOUN
PP         ->    Prep NP
AdvC       ->    CONJ S
ProperNoun ->    'Bart' | 'Homer' | 'Lisa'
CONJ       ->    'and' | 'when'
ADV        ->    'always' | 'never'
V          ->    'laughs' | 'laughed' | 'drink' | 'wears' | 'serves' | 'drinks' | 'thinks' | 'wear'
DET        ->    'a' | 'the'
NOUN       ->    'milk' | 'shoes' | 'salad' | 'kitchen' | 'midnight' | 'table'
ADJ        ->    'blue' | 'healthy' | 'green'
Prep       ->    'in' | 'before' | 'on'
WH         ->    'when'
Aux        ->    'do' | 'does'
""")

cfparser = ChartParser(cfg)
text = """
Bart laughs
Homer laughed
Bart and Lisa drink milk
Bart wears blue shoes
Lisa serves Bart a healthy green salad
Homer serves Lisa
Bart always drinks milk
Lisa thinks Homer thinks Bart drinks milk
Homer never drinks milk in the kitchen before midnight
when Homer drinks milk Bart laughs
when does Lisa drinks the milk on the table
when do Lisa and Bart wear shoes
"""
Exemple #24
0
NN -> 'milk' | 'shoes' | 'salad' | 'kitchen' | 'midnight' | 'table'
Adjective -> 'blue' | 'healthy' | 'green'
Determinant -> 'a' | 'the'
Adverb -> 'always' | 'never' | 'before' | 'when'
Preposition -> 'in' | 'on'
""")

#NP -> ProperNoun
#ProperNoun -> 'Homer' | 'Bart'
#VP -> V
#V -> 'laughs' | 'laughed' |

# Produce Trees for Step 2

# Init Parser
cf_parser = ChartParser(cf_grammar)

# Init Sentences to test
correct_grammar_sents = """\
Bart laughs
Homer laughed
Bart and Lisa drink milk
Bart wears blue shoes
Lisa serves Bart a healthy green salad
Homer serves Lisa
Bart always drinks milk
Lisa thinks Homer thinks Bart drinks milk
Homer never drinks milk in the kitchen before midnight
when Homer drinks milk Bart laughs
when does Lisa drink the milk on the table
when do Lisa and Bart wear shoes
Exemple #25
0
import nltk
from nltk import ChartParser

# Load grammar.
grammar = nltk.data.load('labelgrammar.cfg')
parser = ChartParser(grammar)


def analyze_label(label):
    "Analyze a label using our CFG."
    tokenized_label = label.split()
    try:
        analysis = parser.parse(tokenized_label)
        trees = list(analysis)
        for tree in trees:
            print(tree)
        if len(trees) > 0:
            return analysis
        else:
            print('No analysis possible')
            return None

    except ValueError as e:
        print('No analysis possible:', e.strerror)
        return None
Exemple #26
0
from nltk import data, ChartParser
from nltk import pos_tag
from nltk.corpus import inaugural

data.clear_cache()
G = data.load("file:mygrammar.cfg")
RDP = ChartParser(G)


# extract_short_sents :: Int?, Int?, Corpus?-> [[(String, String)]]
def extract_short_sents(num=8, max_len=8, corpus=inaugural):
    li = []
    num = num if num < len(corpus.fileids()) else len(corpus.fileids())

    for i in range(num):
        for sent in corpus.sents(corpus.fileids()[i]):
            if len(sent) <= max_len:
                li.append(pos_tag(sent))
                if len(li) / 3.0 == i:
                    break

    return li


# parse :: String -> ParseTree
def parse(s):
    return RDP.parse(s.split())


if __name__ == "__main__":
    sents = [
 def __init__(self, grammar_string):
   self.grammar = CFG.fromstring(grammar_string)
   self.parser = ChartParser(self.grammar)
   self.tokenizer = self._get_tokenizer()
Exemple #28
0
def main():
    # Check arguments
    if (len(sys.argv) == 1):
        print("Too few arguments\nUsage: $ python generate.py <INPUT_FILE> [OUTPUT_FILE]")
        sys.exit(0)
    elif (len(sys.argv) > 3):
        print("Too many arguments\nUsage: $ python generate.py <INPUT_FILE> [OUTPUT_FILE]")
        sys.exit(0)

    # Initialise paths
    WORKING_DIR = sys.path[0]
    INPUT_FILE = os.path.join(WORKING_DIR, sys.argv[1])

    if (len(sys.argv) == 3):
        OUTPUT_FILE = os.path.join(WORKING_DIR, sys.argv[2])
    else:
        # Extract base filename of input file
        OUTPUT_NAME = os.path.basename(INPUT_FILE)
        # Strip off file extension and add own (.esc for escutcheon)
        OUTPUT_NAME = "trees/" + os.path.splitext(OUTPUT_NAME)[0] + ".esc"
        OUTPUT_FILE = os.path.join(WORKING_DIR, OUTPUT_NAME)

    # Read in input data
    with open(INPUT_FILE) as f:
        raw_data = f.read().lower()

        to_discard = set(string.punctuation)
        to_discard.remove("&")

        raw_data = ''.join(c for c in raw_data if c not in to_discard)

    # Convert raw data to tokens to be parsed
    tokens = word_tokenize(raw_data)

    # Replace instances of '1st', '2nd', etc with their non abbreviated forms
    for (index, item) in enumerate(tokens):
        if (item in abbr_to_full):
            tokens[index] = abbr_to_full[item]
        elif (item == "&"):
            tokens[index] = "and"

    # Sanitise tokens
    tokens = disambiguate_colours(tokens)
    tokens = reorder(tokens)

    # Construct grammar and parser
    with open('parser_cfg.txt') as f:
        raw_cfg = f.read()

    parser_grammar = CFG.fromstring(raw_cfg)
    parser = ChartParser(parser_grammar)

    # Parse data into tree
    output_data = None
    for tree in parser.parse(tokens):
        output_data = tree

    if (output_data is None):
        print("Error: Parse failed, please check input is of correct format.")
    else:
        # Convert Tree to dict to prepare it for JSON serialisation
        output_data = tree_to_dict(output_data)

        # If a tincture is in the top level of the dictionary, change its name to "field"
        if ("tincture" in output_data.keys()):
            output_data["field"] = output_data["tincture"]
            output_data.pop("tincture")

        # Convert dict to JSON
        with open(OUTPUT_FILE, 'w+') as f:
            json.dump(output_data, f, indent=2)
Exemple #29
0
def make_sentence(corpus, term_rules, *args, **kwargs):
    '''
    
    Generate sentences with random structure and word choice
    using a context-free grammar
    
    The start point is taken from the sentence itself.
    
    Parameters
    ----------
    
    corpus : str
        a string containing the full, cleaned corpus
        
    term_rules : str
        a string containing all the terminal rules for the corpus
        
    maxdepth : int
        The maximum allowed recursion depth before throwing a
        ValueError
        
    fixed_grammar : bool
        Turn off the random sentence selection and used a fixed grammar
        instead.
    
    sample_sentence : str
        When fixed_grammar is turned on, this is the sentence that will
        be parsed. This can be finicky with grammars containing specially
        punctuated constructions like quotations or positions

    args[0] : dict()
        Optional: a dictionary of kgrams and their subsequent words. If this
        variable exists then cfgen will use this to pick the next words with
        conditional weighting (The prescence of this argument turns on Markov
        text generation features.)
        
    Notes
    -----
    
    Add the ability to turn off the kgram parsing, ideally by counting
    the number of unnamed arguments
    ----> Added this option
    
    '''

    markov_flag = (not len(args) == 0)
    if markov_flag:
        kgram_dict = args[0]

    fixed_grammar = kwargs.pop('fixed_grammar', False)
    sample_sentence = kwargs.pop('sample_sentence', '')
    maxdepth = kwargs.pop('maxdepth', 25)

    if fixed_grammar:
        if sample_sentence == '':
            warnings.warn('When using fixed_grammar, user should specify ' \
                          'the keyword argument "sample_sentence." Using a default simple sentence.')
            sample_sentence = 'The cow jumped over the moon.'
        else:
            pass

    flag = False
    attempts = 0
    while not flag and attempts < 30:
        tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')

        if has_parser and not fixed_grammar:
            rsent = choice(tokenizer.tokenize(corpus))
        elif fixed_grammar:
            rsent = sample_sentence
        elif not has_parser and not fixed_grammar:
            # select from a parsed corpus of pre-approved grammars
            print("Usage library being built")
            rsent = "The dog walked up the stairs slowly."
        else:
            print("Usage library being built")
            rsent = "The dog walked up the stairs slowly."

        parsed_syntax = parse_sentence(rsent)
        # print(parsed_syntax)
        cfg_str = term_rules + parsed_syntax
        try:
            startpt = parsed_syntax[:parsed_syntax.find(' ->')]
            startpt = nltk.grammar.Nonterminal(startpt)
            grammar = CFG.fromstring(cfg_str)
            parser = ChartParser(grammar)
            gr = parser.grammar()
            if markov_flag:
                out_txt = (' '.join(
                    produce_kgram(gr,
                                  startpt,
                                  kgram_dict,
                                  maxdepth=maxdepth,
                                  sent=[])))
            else:
                out_txt = (' '.join(produce(gr, startpt, maxdepth=maxdepth)))
            flag = True
        except ValueError:
            warnings.warn(
                'Badly formed sentence encountered, resampling the corpus.')
            attempts = attempts + 1

    # now re-tag special characters
    swappairs = zip(replacements, to_replace)
    for member in swappairs:
        out_txt = out_txt.replace(member[0], member[1])

    return out_txt
Exemple #30
0
    def make_sentence(self, do_markov=True, **kwargs):
        '''

        Generate sentences with random structure and word choice
        using a context-free grammar

        The start point is taken from the sentence itself.

        Parameters
        ----------
    
        do_markov : boolean that can be used to toggle the Markov
            word selection on or off
        
        maxdepth : int
            The maximum allowed recursion depth before throwing a
            ValueError

        fixed_grammar : bool
            Turn off the random sentence selection and used a fixed grammar
            instead.

        sample_sentence : str
            When fixed_grammar is turned on, this is the sentence that will
            be parsed. This can be finicky with grammars containing specially
            punctuated constructions like quotations or positions


        Notes
        -----

        Add the ability to turn off the kgram parsing, ideally by counting
        the number of unnamed arguments
        ----> Added this option

        '''

        corpus = self.corpus
        term_rules = self.term_rules

        if (self.order > 0) and do_markov:
            markov_flag = True
        else:
            markov_flag = False

        fixed_grammar = kwargs.pop('fixed_grammar', False)
        sample_sentence = kwargs.pop('sample_sentence', '')
        maxdepth = kwargs.pop('maxdepth', 25)

        if fixed_grammar:
            if sample_sentence == '':
                warnings.warn('When using fixed_grammar, user should specify ' \
                              'the keyword argument "sample_sentence." Using a default simple sentence.')
                sample_sentence = 'The cow jumped over the moon.'
            else:
                pass

        flag = False
        attempts = 0
        while not flag and attempts < 30:
            tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')

            if has_parser and not fixed_grammar:
                rsent = choice(tokenizer.tokenize(corpus))
            elif fixed_grammar:
                rsent = sample_sentence
            elif not has_parser and not fixed_grammar:
                # select from a parsed corpus of pre-approved grammars
                warnings.warn(
                    "Usage library being built, falling back to simple sentence"
                )
                rsent = "The dog walked up the stairs slowly."
            else:
                warnings.warn(
                    "Usage library being built, falling back to simple sentence"
                )
                rsent = "The dog walked up the stairs slowly."

            parsed_syntax = self.parse_sentence(rsent)
            # print(parsed_syntax)
            cfg_str = term_rules + parsed_syntax
            try:
                startpt = parsed_syntax[:parsed_syntax.find(' ->')]
                startpt = nltk.grammar.Nonterminal(startpt)
                grammar = CFG.fromstring(cfg_str)
                parser = ChartParser(grammar)
                gr = parser.grammar()
                if markov_flag:
                    out_txt = (' '.join(
                        self.produce_kgram(gr,
                                           startpt,
                                           maxdepth=maxdepth,
                                           sent=[])))
                else:
                    out_txt = (' '.join(
                        self.produce(gr, startpt, maxdepth=maxdepth)))
                flag = True
            except ValueError:
                warnings.warn(
                    'Badly formed sentence encountered, resampling the corpus.'
                )
                attempts += 1

        # now re-tag special characters
        swappairs = zip(replacements, to_replace)
        for member in swappairs:
            out_txt = out_txt.replace(member[0], member[1])

        return out_txt