Beispiel #1
0
def Tweet_content1():
  grammar = CFG.fromstring(demo_grammar)

  for sentence in generate(grammar, n=4): """generating sentence of 4 words depth"""
    print(' '.join(sentence))
    
    return sentence
Beispiel #2
0
    def generate_sentences(self, remove_duplicates=True, is_test=False):
        """
        Generates strings from self.grammar. Duplicates may optionally be
        removed.

        :type remove_duplicates: bool
        :param remove_duplicates: If True,duplicates will be removed

        :type is_test: bool
        :param is_test: If true, use test parameters to generate strings.
        Otherwise, use train parameters.

        :rtype: list :return: A list of strings generated by self.grammar
        """

        params = self.params
        if is_test:
            params = params.test

        generator = generate(self.grammar,
                             depth=params.sample_depth,
                             n=params.sentence_count)

        if remove_duplicates:
            return [list(y) for y in set(tuple(x) for x in generator)]
        else:
            return list(generator)
Beispiel #3
0
def rand_sentences(n=10, depth=6, wpt=0.25):
    #grammar = CFG.fromstring(open('assets/text/grammar.txt', 'r').read())
    grammar = CFG.fromstring(rand_vocabulary(wpt))
    sentences = list(generate(grammar, n=n * 20, depth=depth))
    return [
        ' '.join(i) for i in random.sample(sentences, min(n, len(sentences)))
    ]
Beispiel #4
0
def generate_positive_examples(G, N=None):
    if N is None:
        N = random.randint(3, 10)
    T = []
    nltk_grammar = convert2_nltk_CFG(G)
    # If grammer has no ending then it will be infinite
    # Really no grammar like this should be generated but
    # really limiting the depth is fine to stop it but the
    # technically the examples wont be in G

    # sentence is originally a list of terminal symbols
    # change it to a list of tokens
    # this generates examples in order
    # depth is the max sentence size desired
    d = min(np.log2(len(G[0].union(G[1]))**2), 8)
    for sentence in generate(nltk_grammar, n=50 * N, depth=15):
        #print(sentence)
        #print(len(sentence))
        tokens = list(map(lambda x: (x, ), sentence))
        if check(nltk_grammar, tokens, nltk=True):
            T.append(tokens)
    #print("don")
    # randomize the order of the strings
    random.shuffle(T)
    #print("len T", len(T))
    #print("lenT[N]", len(T[:N]))
    TN = []
    Nm = min(len(T), N)
    for i in range(Nm):
        TN.append(T[i])
    #print("l TN", len(TN))
    return TN
def gen_grammar3_past_plural(verb, direct_object, count):
    g1 = """
	S -> W TR SUB V '?' | WA TR SUB V DO '?' 
	W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how'
	WA -> 'when' | 'where' | 'why' | 'how'
	TR -> 'have'
	SUB -> PRO
	PRO -> 'they' |'you'
	V -> '%s'
	DO -> 'the %s'
	""" % (verb, direct_object)
    grammar1 = CFG.fromstring(g1)
    multiplier = 0
    with open('sentences.csv', 'ab') as csvwriter:
        writer = csv.writer(csvwriter)
        for sentence in generate(grammar1, n=999):
            if sentence.find('who') == 0:
                multiplier = 1
            if sentence.find('what') == 0:
                multiplier = 1
            if sentence.find('when') == 0:
                multiplier = 2
            if sentence.find('where') == 0:
                multiplier = 2
            if sentence.find('why') == 0:
                multiplier = 4
            if sentence.find('how') == 0:
                multiplier = 4
            writer.writerow((' '.join(sentence), multiplier * count))
Beispiel #6
0
    def generate_sentence(self, depth=9, num=30000):
        if num > 30000:
            num = 30000
        gen_num = 0
        done = False
        sentences_list = list()

        for dep in range(1, depth):
            sentences = generate(self.grammar, depth=dep)
            for s in sentences:
                sentences_list.append(' '.join(s) + '\n')
                gen_num += 1
                if gen_num > num:
                    done = True
                    break
            if done:
                break

        # sentences = generate(self.grammar, depth=depth, n=4)
        # for s in sentences:
        #     # file.write(' '.join(s) + '\n')
        #     sentences_list.append(' '.join(s) + '\n')
        # sentences_list = sentences_list[0:num]
        random.shuffle(sentences_list)
        with open(self.origin_file, 'w') as file:
            for s in sentences_list:
                file.write(s)
def gen_grammar_plural(verb, direct_object, count):
    try:
        verb = en.verb.present_participle(verb)
    except KeyError:
        return
    if verb != "":
        g1 = """
		S -> WA TR SUB V DO '?' | W TR SUB V '?' 
		W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how'
		WA -> 'when' | 'where' | 'why' | 'how'
		TR -> 'are' | 'were'
		SUB -> 'they' | 'you'
		V -> '%s'
		DO -> 'the %s'
		""" % (verb, direct_object)
        grammar1 = CFG.fromstring(g1)
        multiplier = 1
        with open('sentences.csv', 'ab') as csvwriter:
            writer = csv.writer(csvwriter)
            for sentence in generate(grammar1, n=999):
                sentence = ' '.join(sentence)
                if sentence.find('who') == 0:
                    multiplier = 1
                if sentence.find('what') == 0:
                    multiplier = 1
                if sentence.find('when') == 0:
                    multiplier = 2
                if sentence.find('where') == 0:
                    multiplier = 2
                if sentence.find('why') == 0:
                    multiplier = 4
                if sentence.find('how') == 0:
                    multiplier = 4
                writer.writerow((' '.join(sentence), multiplier * count))
def gen_grammar_plural(verb, direct_object, count):
	try:
		verb = en.verb.present_participle(verb)
	except KeyError:
		return
	if verb != "":
		g1 ="""
		S -> WA TR SUB V DO '?' | W TR SUB V '?' 
		W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how'
		WA -> 'when' | 'where' | 'why' | 'how'
		TR -> 'are' | 'were'
		SUB -> 'they' | 'you'
		V -> '%s'
		DO -> 'the %s'
		"""%(verb, direct_object)
		grammar1 = CFG.fromstring(g1)
		multiplier = 1
		with open('sentences.csv', 'ab') as csvwriter:
			writer = csv.writer(csvwriter)
			for sentence in generate(grammar1, n=999):
				sentence = ' '.join(sentence)
				if sentence.find('who') == 0:
					multiplier = 1
				if sentence.find('what') == 0:
					multiplier = 1
				if sentence.find('when') == 0:
					multiplier = 2
				if sentence.find('where') == 0:
					multiplier = 2
				if sentence.find('why') == 0:
					multiplier = 4
				if sentence.find('how') == 0:
					multiplier = 4
				writer.writerow((' '.join(sentence) , multiplier*count))
def gen_grammar3_past_plural(verb, direct_object, count):
	g1 ="""
	S -> W TR SUB V '?' | WA TR SUB V DO '?' 
	W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how'
	WA -> 'when' | 'where' | 'why' | 'how'
	TR -> 'have'
	SUB -> PRO
	PRO -> 'they' |'you'
	V -> '%s'
	DO -> 'the %s'
	"""%(verb, direct_object)
	grammar1 = CFG.fromstring(g1)
	multiplier = 0
	with open('sentences.csv', 'ab') as csvwriter:
		writer = csv.writer(csvwriter)
		for sentence in generate(grammar1, n=999):
			if sentence.find('who') == 0:
				multiplier = 1
			if sentence.find('what') == 0:
				multiplier = 1
			if sentence.find('when') == 0:
				multiplier = 2
			if sentence.find('where') == 0:
				multiplier = 2
			if sentence.find('why') == 0:
				multiplier = 4
			if sentence.find('how') == 0:
				multiplier = 4
			writer.writerow((' '.join(sentence) , multiplier*count))
Beispiel #10
0
    def generate_sentence(self, depth=9, num=30000):
        if num > 30000:
            num = 30000
        gen_num = 0
        done = False
        sentences_list = list()

        for dep in range(1, depth):
            sentences = generate(self.grammar, depth=dep)
            for s in sentences:
                sentences_list.append(' '.join(s) + '\n')
                gen_num += 1
                if gen_num > num:
                    done = True
                    break
            if done:
                break

        # sentences = generate(self.grammar, depth=depth, n=4)
        # for s in sentences:
        #     # file.write(' '.join(s) + '\n')
        #     sentences_list.append(' '.join(s) + '\n')
        # sentences_list = sentences_list[0:num]
        random.shuffle(sentences_list)
        with open(self.origin_file, 'w') as file:
            for s in sentences_list:
                file.write(s)
Beispiel #11
0
 def generate_from_grammar(self, n, depth):
     grammar = CFG.fromstring(self.gramma)
     print("Generuje dla n " + n + " i depth " + depth)
     for track in generate(grammar, n=int(n), depth=int(depth)):
         self.track_array.append(' '.join(track))
         # produkcje
         numbers = " ".join(track)
         self.productions.append(numbers)
def get_n_introductions(number):
    r = random.randint(0, 1)

    if r == 1:
        all_pos_sentences = list(generate(pos_grammar))
        pos_number = len(all_pos_sentences)
        print(pos_number)
        pos_sentence = all_pos_sentences[random.randint(0, pos_number)]
        return "Our tactics guy, and " + ' '.join(
            pos_sentence) + ", Nathan A Clark. Hello, Nathan."
    else:
        all_noun_sentences = list(generate(noun_grammar))
        noun_number = len(all_noun_sentences)
        print(noun_number)
        noun_sentence = all_noun_sentences[random.randint(0, noun_number)]
        return "Our tactics guy, and " + ' '.join(
            noun_sentence) + ", Nathan A Clark. Hello, Nathan."
Beispiel #13
0
    def __init__(self, grammar, depth=5):
        """
        Initialize from a CFG.

        :type grammar: CFG
        :param grammar: A CFG generating the text.
        """
        self._iterator = generate(grammar, depth=depth)
Beispiel #14
0
def banjoify(rules, song):
    arrangement = []
    for pitch, duration in parse_abc(song):
        grammar = CFG.fromstring(rules.format(pitch=pitch))
        options = list(generate(grammar, start=Nonterminal(duration)))
        phrase = random.choice(options)
        arrangement.append(''.join(phrase))
    return ' '.join(arrangement)
def generate_dataset(grammar, correct, incorrect):
    """
    Generate data with correct and incorrect number-verb agreement.

    Args:
        grammar (str): NLTK feature grammar
        correct (dict): for each number condition (key) a start symbol rule
                        (value) to create sentences with noun-verb agreement
        incorrect (dict): for each number condition (key) a start symbol rule
                        (value) to create sentences with incorrect verb number

    Returns:
        data_correct (list): tuples of (sentence, number_condition) for all
                            correct sentences
        data_incorrect (list): tuples of (sentence, number_condition) for all
                            sentences with number-verb disagreement
    """
    n_conditions = len(list(correct.keys())[0].split("_"))
    # Tasks that only have one noun of which we are tracking the number
    # Examples: simple, adv, qnty_simple, namepp
    if n_conditions == 1:
        grammar_correct, _ = get_grammar(grammar, correct["sg"])
    # Tasks that have two nouns of which we are tracking the number
    # Examples: nounpp
    elif n_conditions == 2:
        grammar_correct, _ = get_grammar(grammar, correct["sg_sg"])
    elif n_conditions == 3:
        grammar_correct, _ = get_grammar(grammar, correct["sg_sg_sg"])
    # Not tracking more than 3 nouns
    else:
        sys.exit(
            "Number of conditions is incorrect. Please check the template.")

    correct_parsers = defaultdict()
    incorrect_parsers = defaultdict()
    data_correct, data_incorrect = [], []

    # 'corect' and 'incorrect' are dictionaries containing the same keys
    # Get the parsers for both the correct sentences and the incorrect
    # sentences, where the verb number does not match the noun number
    for corr_key, incorr_key in zip(correct, incorrect):
        _, correct_parsers[corr_key] = get_grammar(grammar, correct[corr_key])
        _, incorrect_parsers[incorr_key] = get_grammar(grammar,
                                                       incorrect[incorr_key])

    # Generate n sentences and classify as either correct or incorrect
    for sent in tqdm(list(generate(grammar_correct, n=1000000))):
        for key in correct_parsers:
            # If a parser for correct sentence can parse the current sentence,
            # the sentence is correct
            if list(correct_parsers[key].parse(sent)):
                data_correct.append((" ".join(sent), key))
                break
            elif list(incorrect_parsers[key].parse(sent)):
                data_incorrect.append((" ".join(sent), key))
                break

    return data_correct, data_incorrect
Beispiel #16
0
def generate_from_grammar(G, depth=50, n=999):
    C = ""  # corpus
    ## toutes les phrases possibles
    print("\n")
    for n, sent in enumerate(generate.generate(G, depth=depth, n=n), 1):
        s = ' '.join(sent)
        C += s + '. '
        print('%3d. %s%s' % (n, s, '.'))
    return C
Beispiel #17
0
def generate_initiative(nouns, adjs, verbs):
    grammar = init_grammar(nouns, adjs, verbs)
    # print(grammar)

    # for sentence in generate(grammar, depth=1000):
    #     print(' '.join(sentence))

    results = generate(grammar)

    return results
Beispiel #18
0
def generate_text(grammar,N):
    from nltk.grammar import CFG
    import nltk.parse.generate as gen

    print('Generating the first %d sentences for demo grammar:' % (N,))
    print(grammar)
    grammar = CFG.fromstring(grammar)

    grm_list = gen.generate(grammar, n=N)
    for n, sent in enumerate(grm_list):
        print('%3d. %s' % (n, ' '.join(sent)))
Beispiel #19
0
def generate_sent(n: int = 1) -> list:
    """
    Generate Thai Sentences

    :param int n: number sentences

    :return: list sentences
    :rtype: list
    """
    global _thaigrammar
    return [' '.join(i) for i in generate(_thaigrammar, n=n)]
Beispiel #20
0
 def gen_sql_stmt_from_grammar(self,
                               start_,
                               num_stmts=None,
                               table_name="table_name",
                               columns_name="columns_names"):
     grammar = CFG.fromstring(
         self.get_sql_select_stml_grammar(table_name, columns_name,
                                          COMMON_VALUES))
     sql_select_stmts = []
     for stmt in generate(grammar, start=Nonterminal(start_), n=num_stmts):
         sql_select_stmts.append(''.join(stmt))
     return sql_select_stmts
    def generate_messages(self):
        """
        Generates messages for a synthetic structured language 
        according to a simple grammar, not randomly.

        Yields
        ------
        message : list
            A list with each element a word (str) in the message.
        """
        for message in generate(self.grammar):
            yield message
Beispiel #22
0
def main(args):
    grammar_string = DEMO_GRAMMAR

    if args.input_file_path:
        with open(args.input_file_path, 'r') as f:
            grammar_string = f.read()

    grammar = CFG.fromstring(grammar_string)

    for sentence in generate(grammar, depth=args.depth):
        print(''.join(sentence))

    return 0
def grammar_get(
    base_grammar_fname: util.type.TFile,
    domain_grammar_fnames: T.List[util.type.TFile],
    sents_per_domain=100) -> TNameToSents:
  with open(base_grammar_fname, 'r') as f:
    base = f.read()
  sents = {}
  for domain_fname in domain_grammar_fnames:
    with open(domain_fname, 'r') as f:
      productions = f.read()
    grammar = CFG.fromstring(base + productions)
    sents[domain_fname] = list(generate(grammar, n=sents_per_domain))
  return sents
Beispiel #24
0
def respondQuestion(sentence, keyWord, POS):
	if "Tell me" not in sentence:
		grammar = ""

		if POS == "NNPS" or POS == "NNS":
			grammar = CFG.fromstring("""
			S -> H-NP1 Adj VP'?' | Wh-NP VP'?'
			H-NP1 -> 'How'
			Wh-NP -> 'Who' | 'What' | 'Where' | 'What'
			Adj -> 'big' | 'small' | 'happy' | 'sad' | 'large' | 'difficult' | 'emotional' | 'old' | 'healthy' | 'strong' | 'cute' | 'hungry'
			NP -> Pronoun | Proper-Noun | Noun
			Pronoun -> 'they' | 'those'
			Proper-Noun -> '[]'
			Noun -> 'the <>'
			VP -> Verb NP  
			Verb -> 'are' 
			""")
		elif POS == "NN" or "NNP":
			grammar = CFG.fromstring("""
			S -> H-NP1 Adj VP'?' | Wh-NP VP'?'
			H-NP1 -> 'How'
			Wh-NP -> 'Who' | 'What' | 'Where' | 'What'
			Adj -> 'big' | 'small' | 'happy' | 'sad' | 'large' | 'difficult' | 'emotional' | 'old' | 'healthy' | 'strong' | 'cute' | 'hungry'
			NP -> Pronoun | Proper-Noun | Noun
			Pronoun -> 'it' | 'that'
			Proper-Noun -> '[]'
			Noun -> 'the <>'
			VP -> Verb NP  
			Verb -> 'is' 
			""")

		rand_sent_list = []
                response = ""
		for sentence in generate(grammar):
		    rand_sent_list.append(' '.join(sentence))
		while True:
			num = randint(0, len(rand_sent_list)-1)
			response = rand_sent_list[num]
			if "<>" in response and (POS == "NNS" or POS == "NN"):
				index = response.index("<>")
				response = response[:index] + keyWord + response[index+2:]
				break
			if "[]" in response and (POS == "NNPS" or POS == "NNP"):
				index = response.index("[]")
				response = response[:index] + keyWord + response[index+2:]
				break
			if "<>" not in response and "[]" not in response:
				break
		return response
	else:
		knowledgeRep(sentence)
def generate_pairs(depth, cfg):
    '''
    num_pairs: Integer denoting the number of translation pairs
    depth: integer for thedepth of the parse tree in the CFG
    cfg: chosen grammar, 1, 2 or 3
    '''
    if (cfg == 1):
        grammar = CFG.fromstring("""
        S -> Y  
        Y ->   a Y b | a Y | a |
        a -> '(' ')'  
        b -> '{' '}'  
        """)
    elif cfg == 2:
        grammar = CFG.fromstring("""
        S ->  X | Y  | X Y
        X -> a
        Y ->  b
        a -> '(' a ')'  |  
        b -> '{' b '}'  | 
        """)
    elif cfg == 3:
        grammar = CFG.fromstring("""
        S ->  X 
        X -> a | b
        a -> '(' a ')'  |  
        b -> '{' b '}' | '{' a '}'
        """)
    trg = list(generate(grammar, depth=depth))
    trg_list = []
    for sentence in trg:
        k = ''.join(sentence)
        trg_list.append(k)

    src_list = trg2src(trg)

    if cfg == 1:
        A = list((s + 'A ' for s in src_list))
    elif cfg == 2:
        A = list((s + 'B ' for s in src_list))
    elif cfg == 3:
        A = list((s + 'C ' for s in src_list))
    else:
        None

    B = list((s for s in trg_list))

    df = pd.concat([pd.Series(A), pd.Series(B)], axis=1)
    pairs = (df.iloc[:, 0] + df.iloc[:, 1]).values.tolist()
    return pairs
Beispiel #26
0
def generateRawTemplates(depth):
    gram = CFG.fromstring(grammarstring)
    rawTemplates = generate(gram, depth=depth)
    templatefiles = []

    for index, state in enumerate(rawTemplates):
        filename = os.path.join("./templates", "template" + str(index))
        with open(filename, 'w') as templatefile:
            templatefile.write(' '.join(state))
            templatefiles.append(filename)

    print str(len(templatefiles)) + " template files generated"

    return templatefiles
Beispiel #27
0
def generate_blazons(grammarfile, n, depth=None):
    bs = []
    with open(grammarfile) as g:
        raw_cfg = g.read()
    parser_grammar = CFG.fromstring(raw_cfg)
    for blazon in generate(parser_grammar, n=n, depth=depth):
        bwords = blazon
        field = bwords[0]
        z = ((isColour(field) and not any(map(isColour, bwords[1:]))) or
             (isMetal(field) and not any(map(isMetal, bwords[1:])))) and (
                 field not in bwords[1:])
        if z:
            bs.append(' '.join(blazon))
    return bs
Beispiel #28
0
def generate_sentence(subject, predicate, object, useTemplate=False):
    if useTemplate==False:
        predicate = literal_tuner(predicate)
        rand = random.randint(0, 1)
        object = literal_tuner(object)
        grammar = get_grammar(subject, object, predicate)

        #very simplified randomization string generation because we currently only have two valiid compositions
        for sentence in generate(grammar, n=10):
            if rand < 1:
                return ' '.join(sentence)
            else:
                rand = rand-1
                continue
Beispiel #29
0
    def generate(self, tree_depth, num_expressions):
        """Generates expression strings from context-free grammar.

    Args:
      tree_depth: Integer, depth of the grammar parsing tree.
      num_expressions: Integer, maximum number of expressions to generate.

    Yields:
      List of token strings for an expression string.
    """
        for token_list in generate.generate(self._cfg,
                                            depth=tree_depth,
                                            n=num_expressions):
            yield token_list
Beispiel #30
0
def generateRawTemplates(depth):
    gram = CFG.fromstring(grammarstring)
    rawTemplates = generate(gram, depth=depth)
    templatefiles = []

    for index, state in enumerate(rawTemplates):
        filename = os.path.join("./templates","template"+str(index))
        with open(filename, 'w') as templatefile:
            templatefile.write(' '.join(state))
            templatefiles.append(filename)

    print str(len(templatefiles))+" template files generated"

    return templatefiles
Beispiel #31
0
def main():
    """Skeleton towards a data generation process.

    An actual data generation pipeline would:
      1. Generate all basic sentences and throw them in train.
      2. Generate all complex sentences, and divide between train/test.
    """

    print("Basic grammar...")
    basic_grammar = load_grammar("grammars/basic_sents.fcfg")
    sentences = tqdm(generate(basic_grammar))
    expressions = semantic_parse(sentences, basic_grammar)
    for expression in expressions:
        value = evaluate(expression, model_dict)
        print(str(expression), ":", value)

    print("Complex grammar...")
    complex_grammar = load_grammar("grammars/complex_sents.fcfg")
    sentences = tqdm(generate(complex_grammar, n=200, depth=5))
    expressions = semantic_parse(sentences, complex_grammar)
    for expression in expressions:
        value = evaluate(expression, model_dict)
        if value is not None:
            print(str(expression), ":", value)
Beispiel #32
0
def main_phrases(name, othername=None):

    phrase_grammar = f"""
		S -> '{name}!'
		S -> 'Go, {name}!'
		S -> 'Show them, {name}!'
		S -> 'I chose you, {name}!'
		S -> 'You can do it, {name}!'
	"""

    grammar = CFG.fromstring(phrase_grammar)
    phrases = list(generate(grammar))
    shortlist = list(filter(lambda x: (syllables.estimate(x[0]) == 5),
                            phrases))
    return shortlist[0]
Beispiel #33
0
def single_phrases(name, types=None, evolutions=None, attacks=None):
    phrase_grammar = f"""
	   
		
	"""

    if attacks != None:
        for attack in attacks:
            phrase_grammar += f"""
				S -> 'Use {attack}!'
				S -> '{attack}, now!'
			"""

    grammar = CFG.fromstring(phrase_grammar)
    return generate(grammar)
Beispiel #34
0
def generate_tweet(grammar):
    from nltk.grammar import CFG
    import nltk.parse.generate as gen

    print(grammar)
    grammar = CFG.fromstring(grammar)
    grm_list = gen.generate(grammar, n=SIZE) # TODO voir la taille max ? moyen de la recuperer ?
    from random import randint
    rd = randint(0,SIZE)
    cpt = 0
    for n, sent in enumerate(grm_list):
        if rd == cpt:
            print ("Your tweet : ")
            print('%3d. %s' % (n, ' '.join(sent)))
        cpt += 1
Beispiel #35
0
    def generate_sample_strings(self, remove_duplicates=True):
        """
        Generates all strings from self.grammar up to the depth
        specified by self.depth. Duplicates may optionally be removed.

        :type remove_duplicates: bool
        :param remove_duplicates: If True, duplicates will be removed

        :rtype: list
        :return: A list of strings generated by self.grammar
        """
        generator = generate(self.grammar, depth=self.sample_depth)
        if remove_duplicates:
            return [list(y) for y in set(tuple(x) for x in generator)]
        else:
            return list(generator)
Beispiel #36
0
    def generate(self, topstart="top", n=no_sents):
        if topstart == "top":
            topstart = self.start
        else:
            topstart = self.findstart(topstart)

        if n > 0:
            max = n
        else:
            max = CorpusGenerator.no_sents

        sentences = 0
        for sentence in generate(self.grammar, start=topstart, n=max):
            if max < 1000000000:
                print(' '.join(sentence))
            sentences += 1
        print "Produced sentences: " + str(sentences)
def main():
    zen = """ Beautiful is better than ugly.
    Explicit is better than implicit.
    Simple is better than complex.
    Complex is better than complicated.
    Flat is better than nested.
    Sparse is better than dense.
    Readability counts.
    Special cases aren't special enough to break the rules.
    Although practicality beats purity.
    Errors should never pass silently.
    Unless explicitly silenced.
    In the face of ambiguity, refuse the temptation to guess.
    There should be one-- and preferably only one --obvious way to do it.
    Although that way may not be obvious at first unless you're Dutch.
    Now is better than never.
    Although never is often better than *right* now.
    If the implementation is hard to explain, it's a bad idea.
    If the implementation is easy to explain, it may be a good idea.
    Namespaces are one honking great idea -- let's do more of those!"""
        
    tagged = nltk.pos_tag(nltk.word_tokenize(zen))
    tagged = [(tag, word) for word, tag in tagged]
    #
    #tag_word_map = defaultdict(list)
    #[(tag, word) for word, tag in tagged]
    tags = set([tag for tag, _  in tagged])
    tag_word_map = {tag: {word for key, word in tagged if key == tag} for tag in tags}
                
           
    gram_head = """
      S -> NNP VBZ JJR IN RB
    """
    cats = ['NNP', 'VBZ', 'JJR', 'IN', 'RB']
    gram = [cat + ' -> ' + '|'.join([repr(x) for x in tag_word_map[cat]]) for cat in cats]
    
    grammar = gram_head + '\n'.join(gram)
    grammar = nltk.CFG.fromstring(grammar)
    
    poem = []    
    for sentence2 in generate(grammar, depth=5):
        poem.append(' '.join(sentence2))
        
    out =  "\n".join(choice(poem, size=10))
    print(out)
Beispiel #38
0
    def onMessage(self, author_id, message_object, thread_id, thread_type,
                  **kwargs):
        self.markAsDelivered(thread_id, message_object.uid)
        self.markAsRead(thread_id)

        log.info("{} from {} in {}".format(message_object, thread_id,
                                           thread_type.name))
        log.info(message_object.text)

        # If you're not the author, echo
        if author_id != self.uid:
            grammar = CFG.fromstring(demo_grammar)
            self.send(Message(text=generate(grammar, depth=14)),
                      thread_id=thread_id,
                      thread_type=thread_type)
            self.send(Message(text='Chao'),
                      thread_id=thread_id,
                      thread_type=thread_type)
Beispiel #39
0
def generate_sentences(args):

    in_file = open(args[1])
    out_file = open(args[2],'w')

    gram = in_file.read()

    grammar = CFG.fromstring(gram)
    print(grammar)
    sentences = ""

    for s in generate(grammar, depth=int(args[0])):
        sentences += ''.join(s) + '\n'

    out_file.writelines(sentences)

    in_file.close()
    out_file.close()
g3 = """
S -> S1[G=?n] 
S1[G='money'] -> 'How many notes of each denomination person has?'
S1[G='shape'] -> 'What are its length and breadth?'
S1[G='int'] -> 'What are the two numbers?'
S1[G='age'] -> 'What are their present ages?'
S1[G='class'] -> 'What is the total strength?'

"""
first=[]
sec=[]
third=[]

grammar1 = nltk.grammar.FeatureGrammar.fromstring("""% start S"""+"\n"+gramstring)
parser1 = nltk.FeatureChartParser(grammar1)
for sentence1 in generate(grammar1):
    if(parser1.parse_one(sentence1)): 
        string1=' '.join(sentence1)
        first.append(string1)
    #print(l)


grammar2 = nltk.grammar.FeatureGrammar.fromstring("""% start S"""+"\n"+g2)
parser2 = nltk.FeatureChartParser(grammar2)
for sentence2 in generate(grammar2):
    if(parser2.parse_one(sentence2)): 
        string2=' '.join(sentence2)
        if string2 not in sec:
            sec.append(string2)
        else:
            pass
Beispiel #41
0
import nltk
from nltk.parse import generate
from nltk.grammar import Nonterminal


cfg = nltk.CFG.fromstring("""
root -> who_player has the most runs
who_player -> who
who_player -> which player
who_player -> which team player
who -> 'who'
which -> 'which'
player -> 'player'
team -> 'indian' | 'australian' | 'england' | 'sri' 'lankan'
has -> 'has'
the -> 'the'
this -> 'this'
most -> 'most'
runs -> 'runs'
""")

print(list((n,sent) for n, sent in enumerate(generate.generate(cfg, n=100, start=Nonterminal('root')), 1)))

result1 = nltk.ChartParser(cfg).parse('which england player has the most runs'.split())
result2 = nltk.ChartParser(cfg).parse(['which', 'sri', 'lankan', 'player', 'has', 'the', 'most',  'runs'])
print(list(result1))
print(list(result2))

Beispiel #42
0
def output(request):
    # Validation of form
    if request.method == "POST":
        # Validation of request
        if 'inputURL' in request.POST:
            # Validation of image url
            imageURL = request.POST.get('inputURL')
            image_output = imageURL
            indexOfDot = imageURL.rfind(".")
            if indexOfDot == -1:
                return fail(request) # not an image URL
            indexOfDot += 1
            extension = imageURL[indexOfDot:]
            if extension != 'jpg' and extension != 'jpeg' and extension != 'png':
                return fail(request) # not a valid image (jpg, jpeg, png)
                
            client_id = '8SkASX_SM8xc-fxMF4SdpzS_b9uew8yG0UrQp0y6'
            secret_id = 'EXkfCNxXeiHtnpsxn9Njui_yUpCuvcSAXzfSYjwN'
                
            clarifai_api = ClarifaiApi(client_id, secret_id) # assumes environment variables are set.
            try:
                result = clarifai_api.tag_image_urls(imageURL)
            except ApiError:
                #return fail(request)
                
                messages.add_message(request, messages.INFO, "ApiError")
                return HttpResponseRedirect('makestory/fail.html')
            
            
            class_list = result['results'][0]['result']['tag']['classes']
            prob_list = result['results'][0]['result']['tag']['probs']
            
            class_str = ""
            for i in range(0, len(class_list)/2):
                class_str += class_list[i] + " " 
            
            # currently just the list of matched words
            text_output = class_list.__str__()
            
            # Parts of speech recognition
            tokens = nltk.word_tokenize(class_str)
            # dictionary = PyDictionary()
            
            # nouns = []
            # verbs = []
            # adjectives = []
            # otherPos = []
            # for word in tokens:
            #     #definition = dictionary.meaning(word) # https://pypi.python.org/pypi/PyDictionary/1.3.4
            #     #assignment = definition.keys()[0] # Get the part of speech from the dictonary
            #     assignment = ""
            #     # assignment = tuple[1]
                
            #     if assignment == 'Noun':
            #         nouns.append(word)
            #     elif assignment == 'Verb':
            #         verbs.append(word)
            #     elif assignment == 'Adjective':
            #         adjectives.append(word)
            #     else:
            #         otherPos.append(word)
                    
                    
            # Create the grammar
            #P:prepositions, DET:articles, adverbs
            DET = ["'the'","'a'","'some'"]
            # P = ["'in'","'at'","'since'","'for'","'to'","'past'","'to'""'by'","'in'","'at'","'on'","'under'","'below'","'over'","'above'","'into'","'from'","'of'","'on'","'at'"]
            VB = ["'talks'","'does'","'has'","'cries'", "'fights'", "'traps'", "'bakes'", "'fondles'", "'cooks'", "'sees'", "'calls'", "'smells'", "'tastes'", "'hears'"]
            
            
            assignments = pos_tag(tokens) # tagset='universal' for ADJ, NOUN, etc.
            
            # pos_tags = []
            pos_words = {}
            pos_words['DET'] = DET
            #pos_words['P'] = P
            pos_words['VB'] = VB
            
            for tuple in assignments:
                word = tuple[0]
                pos = tuple[1]
                if pos in pos_words:
                    pos_words[pos].append("\'" + word + "\'")
                else:
                    pos_words[pos] = []
                    pos_words[pos].append("\'" + word + "\'")
                # pos_tags.append(pos)

            #grammar = """
            #S -> NP VP
            #PP -> P NP
            #NP -> Det N
            #VP -> V Det N | V Det N PP
            
            #"""
            
            grammar = """
            S -> NP VP
            NP -> Det N
            VP -> V Det N
            """
            #Det -> 'DT'
            # N -> 'NN'
            # V -> 'VBZ'
            # P -> 'PP'
            
            
            # adverb is RB
            
            if 'DET' in pos_words:
                grammar += 'Det ->' + ' | '.join(pos_words['DET']) + '\n'
                
            if 'P' in pos_words:
                grammar += 'P ->' + ' | '.join(pos_words['P']) + '\n'
                
            if 'NN' in pos_words:
                grammar += 'N ->' + ' | '.join(pos_words['NN']) + '\n'
            #change to VB for nltk
            if 'VB' in pos_words:
                grammar += 'V ->' + ' | '.join(pos_words['VB']) + '\n'
            
            
            #if 'JJ' in pos_words:
            #    grammar += 'A ->' + ' | '.join(pos_words['JJ']) + '\n'
                
            simple_grammar = CFG.fromstring(grammar)
            #  simple_grammar.start()
            # simple_grammar.productions()
            
            sentences = []
            sentence_validity = []
         
            for sentence in generate(simple_grammar, depth=4):
                sentences.append(' '.join(sentence))
            
            
            sentence_validity = get_validity(sentences)
            
            #get_validity(sentences)
            
            # parser = nltk.ChartParser(simple_grammar)
            # tree = parser.parse(pos_tags)
            
            story = ""
            for i in range(0, 10):
                tuple = sentence_validity[i]
                string = tuple[1]
                start_letter = string[0].upper()
                story += start_letter
                story += string[1:]
                story += ". "
            
            return render(request, 'makestory/output.html',
                {
                'imageURL_output': imageURL,
                'story_output': story,
                'grammar_test_output': simple_grammar,
                'sentences_test_output': sentences,
                }
            )
        else:
            return fail(request)
    return fail(request)
Beispiel #43
0
 def generate_syllables(self):
     ''' every possible syllable for the given phonemes and grammar '''
     # spaces, which are only there for NLTK's sake, are removed
     return [re.sub(' ', '', '/'.join(s)) for s in \
             generate(self.grammar, depth=4)]
# Filter each sentence and return them all.
def eliminate(sentence):
    sents=nltk.sent_tokenize(sentence)
    for sent in sents:
        str=filter(sent)
        return str

#Here input is the chosen option on UI.
#Given IDs to each question as per NCERT Book,input will be given that chosen value.
input=26
# Generate variations of a particular question based on the input and its corresponding grammar.
if input==2:
    g=CFG.fromstring(g1)
    g2=CFG.fromstring(g2)
    rd_parser=nltk.RecursiveDescentParser(g)
    for sent,sent2 in zip(generate(g2,n=100),generate(g,n=100)):
        newsent1=' '.join(sent)
        newsent2=' '.join(sent2)
        ans1=eliminate(newsent1)
        ans2=eliminate(newsent2)
        if(ans1 == None or ans2 == None):
            pass
        else:
            print(ans1)
            print(ans2)
            print("Determine the length and breadth")
            print("\n")
elif input==4:
    g=CFG.fromstring(g3)
    g2=CFG.fromstring(g4)
    rd_parser=nltk.RecursiveDescentParser(g)
from contractions import contractions


sent_tokenizer = PunktSentenceTokenizer()

with open("<source of text>", "r") as f:
    text = f.read()

for k, v in contractions.items():
    text = text.replace(k, v)

sents = []
for paragraph in text.split('\n'):
    sents += sent_tokenizer.tokenize(paragraph)

parser = Parser()

productions = []
for sent in sents[:25]:
    try:
        tree = parser.parse(sent)
        productions += tree.productions()
    except:
        pass

S = Nonterminal('S')
grammar = induce_pcfg(S, productions)

for sentence in generate(grammar, depth=5):
    print " ".join(sentence) + "\n"
def surface_realizer(grammar):
    for sentence in generate(grammar, n=10):
        return ' '.join(sentence)
Beispiel #47
0
__author__ = 'Mohammed Shokr <*****@*****.**>'

# Generating sentences from context-free grammars

from nltk.parse.generate import generate, demo_grammar
from nltk import CFG

# An example grammar:
grammar = CFG.fromstring(demo_grammar)
print(grammar)

print("#---------------------------------------------------------------#")

# The first 10 generated sentences:
for sentence in generate(grammar, n=10):
    print(' '.join(sentence))

print("#---------------------------------------------------------------#")

            
            if 'NN' in pos_words:
                grammar += 'N ->' + ' | '.join(pos_words['NN']) + '\n'
            
            if 'VB' in pos_words:
                grammar += 'V ->' + ' | '.join(pos_words['VB']) + '\n'
                
            if 'JJ' in pos_words:
                grammar += 'A ->' + ' | '.join(pos_words['JJ']) + '\n'
                
            simple_grammar = CFG.fromstring(grammar)
            #simple_grammar.start()
            simple_grammar.productions()
            
            sentences = []
            for sentence in generate(simple_grammar, n=10):
                sentences.append(' '.join(sentence))
            
            # parser = nltk.ChartParser(simple_grammar)
            # tree = parser.parse(pos_tags)
            


            caption = 'this is a caption'
            story = 'this is the story'
            
            return render(request, 'makestory/output.html',
                {
                'nouns_output': nouns,
                'verbs_output': verbs,
                'adjectives_output': adjectives,
Beispiel #49
0
def choose_line(some_lines):#5
    return a_random.choice(#7
                    some_lines).lower() #5

############################################

############################################
choose = choose_line #5

g = G.fromstring(#7
                    this_is_the_grammar) #5
############################################

############################################
while not len(pentas):#5
    for poem in generate(g, #7
                           start=N('five')): #5
############################################

############################################
      pentas.append(#5
                    with_blank_spaces.join(poem))#7

fives = pentas #5
############################################

############################################
third = choose(fives) #5
first = choose(fives) #7

def display_the(poem):#5
############################################
Beispiel #50
0
    [1. if i == b else 0. for i in xrange(len(code_for))])

# list of codes of symbols to predict
to_predict_codes = [onehot(code_for[s]) for s in to_predict]


# function to test if a symbol code is in list to predict
def in_predict_codes(code):
    for i in xrange(len(to_predict_codes)):
        if ((code == to_predict_codes[i]).all()):
            return True
    return False


# sample_strings = all strings from grammar of depth at most sample_depth
sample_strings = list(generate(grammar, depth=sample_depth))

# report #, min length and max length for strings in sample_strings
print("number of sample strings = {}".format(len(sample_strings)))
sample_lengths = [len(s) for s in sample_strings]
print("min length = {}, max length = {}".format(min(sample_lengths),
                                                max(sample_lengths)))

# sanity check: report one random string from sample_strings
print "random sample string = {}".format(random.choice(sample_strings))

#################################

model = VanillaModel(len(code_for), READ_SIZE, len(code_for))
try:
    model.cuda()
from nltk.parse.generate import generate #, demo_grammar
from nltk import CFG


demo_grammar = """
  S -> NP VP
  NP -> Det N
  PP -> P NP
  VP -> 'slept' | 'saw' NP | 'walked' PP
  Det -> 'the' | 'a'
  N -> 'man' | 'park' | 'dog'
  P -> 'in' | 'with'
"""
grammar = CFG.fromstring(demo_grammar)
print(grammar)


#Join words and generate based off of grammar - for n 
for sentence in generate(grammar, n=12):
    print(' '.join(sentence))

'''
Notes: 
Need to symbolize the grammar
Have the machine process the language
Need to integrate with Markov chain - file 'agiliq-markov.py'
'''
for sentence in generate(grammar, depth=4):
    print(' '.join(sentence))
    
    # to save typing
    dgr = dyck_grammar
    uagr = unambig_agreement_grammar
    eegr = exp_eval_grammar

    # comparisons of table calculations and reported sample sizes

    # Note: the generate function from nltk uses a notion of
    # depth that is 1 more than that used above!

    # NB: the dyck_grammar is NOT unambiguous (S -> S S)
    dgr_table = make_table(6, dgr)
    print "dyck_grammar for 4 from count_nonterminal_depth"
    print count_nonterminal_depth(dgr.start(), 4, dgr_table, dgr)
    print "nltk generate: number of sentences for dyck grammar at depth = 5"
    print len(list(generate(dgr, depth=5)))
    print "The dyck_grammar is ambiguous!"

    # unambig_agreement_grammar
    # this agrees with the count for depth = 16 in generate
    uagr_table = make_table(15, uagr)
    print "unambig_agreement_grammar for 15 from count_nonterminal_depth"
    print count_nonterminal_depth(uagr.start(), 15, uagr_table, uagr)

    # exp_eval_grammar
    # this agrees with the count for depth = 6 in generate
    eegr_table = make_table(5, eegr)
    print "exp_eval_grammar for 5 from count_nonterminal_depth"
    print count_nonterminal_depth(eegr.start(), 5, eegr_table, eegr)

    print "number of nltk depth = 7 sentences from dyck_grammar"
def output(request):
    # Validation of form
    if request.method == "POST":
        # Validation of request
        if 'inputURL' in request.POST:
            # Validation of image url
            imageURL = request.POST.get('inputURL')
            image_output = imageURL
            indexOfDot = imageURL.rfind(".")
            if indexOfDot == -1:
                return fail(request) # not an image URL
            indexOfDot += 1
            extension = imageURL[indexOfDot:]
            if extension != 'jpg' and extension != 'jpeg' and extension != 'png':
                return fail(request) # not a valid image (jpg, jpeg, png)
                
            client_id = '8SkASX_SM8xc-fxMF4SdpzS_b9uew8yG0UrQp0y6'
            secret_id = 'EXkfCNxXeiHtnpsxn9Njui_yUpCuvcSAXzfSYjwN'
                
            clarifai_api = ClarifaiApi(client_id, secret_id) # assumes environment variables are set.
            return output(request, makes{image_output:'image_output', text_output:'text_output'})
                result = clarifai_api.tag_image_urls(imageURL)
            except ApiError:
                #return fail(request)
                
                messages.add_message(request, messages.INFO, "ApiError")
                return HttpResponseRedirect('makestory/fail.html')
            
            
            class_list = result['results'][0]['result']['tag']['classes']
            prob_list = result['results'][0]['result']['tag']['probs']
            
            class_str = ""
            for i in range(0, len(class_list)):
                class_str += class_list[i] + " " 
            
            # currently just the list of matched words
            text_output = class_list.__str__()
            
            # Parts of speech recognition
            tokens = nltk.word_tokenize(class_str)
            dictionary = PyDictionary()
            
            
            
            nouns = []
            verbs = []
            adjectives = []
            otherPos = []
            for word in tokens:
                definition = dictionary.meaning(word) # https://pypi.python.org/pypi/PyDictionary/1.3.4
                assignment = definition.keys()[0] # Get the part of speech from the dictonary
                
                # assignment = tuple[1]
                
                if assignment == 'Noun':
                    nouns.append(word)
                elif assignment == 'Verb':
                    verbs.append(word)
                elif assignment == 'Adjective':
                    adjectives.append(word)
                else:
                    otherPos.append(word)
                    
                    
            # Create the grammar
            #P:prepositions, DET:articles, adverbs
            P = ["on","in","at","since","for","ago","before","to","past","to","until","by","in","at","on","under","below","over","above","into","from","of","on","at"]
            DET = ["the","a","one","some","few","a few","the few","some"]
            
            assignments = pos_tag(tokens) # tagset='universal' for ADJ, NOUN, etc.
            
            pos_tags = []
            pos_words = {}
            for tuple in assignments:
                word = tuple[0]
                pos = tuple[1]
                if pos in pos_words:
                    pos_words[pos].append(word)
                else:
                    pos_words[pos] = []
                pos_tags.append(pos)
                
                
            
            
            grammar = """
            S -> NP VP
            PP -> P NP
            NP -> Det N | Det N PP
            VP -> V NP | VP PP
            Det -> 'DT'
            """
            # N -> 'NN'
            # V -> 'VBZ'
            # P -> 'PP'
            
            
            # adverb is RB
            
            if 'NN' in pos_words:
                grammar += 'N ->' + ' | '.join(pos_words['NN']) + '\n'
            
            if 'VB' in pos_words:
                grammar += 'V ->' + ' | '.join(pos_words['VB']) + '\n'
                
            if 'JJ' in pos_words:
                grammar += 'A ->' + ' | '.join(pos_words['JJ']) + '\n'
                
            simple_grammar = CFG.fromstring(grammar)
            #simple_grammar.start()
            simple_grammar.productions()
            
            sentences = []
            for sentence in generate(simple_grammar, n=10):
                sentences.append(' '.join(sentence))
            
            # parser = nltk.ChartParser(simple_grammar)
            # tree = parser.parse(pos_tags)
            


            caption = 'this is a caption'
            story = 'this is the story'
            
            return render(request, 'makestory/output.html',
                {
                'nouns_output': nouns,
                'verbs_output': verbs,
                'adjectives_output': adjectives,
                'otherPos_output': otherPos,
                'imageURL_output': imageURL,
                'caption_output': caption,
                'story_output': story,
                'sentences_test_output': sentences,
                }
            )
Beispiel #54
0
from nltk.parse.generate import generate
from nltk import CFG
from nltk.data import load

for gg in [ 'grammar_2.cfg']:
    grammar = load( 'file:' + gg)
    for  sentence in generate(grammar, depth=6, n=1000000):
        print(' '.join(sentence))