コード例 #1
0
ファイル: Tweet_content1.py プロジェクト: BelhalK/twitterbot
def Tweet_content1():
  grammar = CFG.fromstring(demo_grammar)

  for sentence in generate(grammar, n=4): """generating sentence of 4 words depth"""
    print(' '.join(sentence))
    
    return sentence
コード例 #2
0
ファイル: cfg.py プロジェクト: emrul/StackNN
    def generate_sentences(self, remove_duplicates=True, is_test=False):
        """
        Generates strings from self.grammar. Duplicates may optionally be
        removed.

        :type remove_duplicates: bool
        :param remove_duplicates: If True,duplicates will be removed

        :type is_test: bool
        :param is_test: If true, use test parameters to generate strings.
        Otherwise, use train parameters.

        :rtype: list :return: A list of strings generated by self.grammar
        """

        params = self.params
        if is_test:
            params = params.test

        generator = generate(self.grammar,
                             depth=params.sample_depth,
                             n=params.sentence_count)

        if remove_duplicates:
            return [list(y) for y in set(tuple(x) for x in generator)]
        else:
            return list(generator)
コード例 #3
0
def rand_sentences(n=10, depth=6, wpt=0.25):
    #grammar = CFG.fromstring(open('assets/text/grammar.txt', 'r').read())
    grammar = CFG.fromstring(rand_vocabulary(wpt))
    sentences = list(generate(grammar, n=n * 20, depth=depth))
    return [
        ' '.join(i) for i in random.sample(sentences, min(n, len(sentences)))
    ]
コード例 #4
0
ファイル: check.py プロジェクト: jdoherty7/learn-cfg
def generate_positive_examples(G, N=None):
    if N is None:
        N = random.randint(3, 10)
    T = []
    nltk_grammar = convert2_nltk_CFG(G)
    # If grammer has no ending then it will be infinite
    # Really no grammar like this should be generated but
    # really limiting the depth is fine to stop it but the
    # technically the examples wont be in G

    # sentence is originally a list of terminal symbols
    # change it to a list of tokens
    # this generates examples in order
    # depth is the max sentence size desired
    d = min(np.log2(len(G[0].union(G[1]))**2), 8)
    for sentence in generate(nltk_grammar, n=50 * N, depth=15):
        #print(sentence)
        #print(len(sentence))
        tokens = list(map(lambda x: (x, ), sentence))
        if check(nltk_grammar, tokens, nltk=True):
            T.append(tokens)
    #print("don")
    # randomize the order of the strings
    random.shuffle(T)
    #print("len T", len(T))
    #print("lenT[N]", len(T[:N]))
    TN = []
    Nm = min(len(T), N)
    for i in range(Nm):
        TN.append(T[i])
    #print("l TN", len(TN))
    return TN
コード例 #5
0
def gen_grammar3_past_plural(verb, direct_object, count):
    g1 = """
	S -> W TR SUB V '?' | WA TR SUB V DO '?' 
	W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how'
	WA -> 'when' | 'where' | 'why' | 'how'
	TR -> 'have'
	SUB -> PRO
	PRO -> 'they' |'you'
	V -> '%s'
	DO -> 'the %s'
	""" % (verb, direct_object)
    grammar1 = CFG.fromstring(g1)
    multiplier = 0
    with open('sentences.csv', 'ab') as csvwriter:
        writer = csv.writer(csvwriter)
        for sentence in generate(grammar1, n=999):
            if sentence.find('who') == 0:
                multiplier = 1
            if sentence.find('what') == 0:
                multiplier = 1
            if sentence.find('when') == 0:
                multiplier = 2
            if sentence.find('where') == 0:
                multiplier = 2
            if sentence.find('why') == 0:
                multiplier = 4
            if sentence.find('how') == 0:
                multiplier = 4
            writer.writerow((' '.join(sentence), multiplier * count))
コード例 #6
0
ファイル: OracleCfg.py プロジェクト: IshJ/Texygen
    def generate_sentence(self, depth=9, num=30000):
        if num > 30000:
            num = 30000
        gen_num = 0
        done = False
        sentences_list = list()

        for dep in range(1, depth):
            sentences = generate(self.grammar, depth=dep)
            for s in sentences:
                sentences_list.append(' '.join(s) + '\n')
                gen_num += 1
                if gen_num > num:
                    done = True
                    break
            if done:
                break

        # sentences = generate(self.grammar, depth=depth, n=4)
        # for s in sentences:
        #     # file.write(' '.join(s) + '\n')
        #     sentences_list.append(' '.join(s) + '\n')
        # sentences_list = sentences_list[0:num]
        random.shuffle(sentences_list)
        with open(self.origin_file, 'w') as file:
            for s in sentences_list:
                file.write(s)
コード例 #7
0
def gen_grammar_plural(verb, direct_object, count):
    try:
        verb = en.verb.present_participle(verb)
    except KeyError:
        return
    if verb != "":
        g1 = """
		S -> WA TR SUB V DO '?' | W TR SUB V '?' 
		W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how'
		WA -> 'when' | 'where' | 'why' | 'how'
		TR -> 'are' | 'were'
		SUB -> 'they' | 'you'
		V -> '%s'
		DO -> 'the %s'
		""" % (verb, direct_object)
        grammar1 = CFG.fromstring(g1)
        multiplier = 1
        with open('sentences.csv', 'ab') as csvwriter:
            writer = csv.writer(csvwriter)
            for sentence in generate(grammar1, n=999):
                sentence = ' '.join(sentence)
                if sentence.find('who') == 0:
                    multiplier = 1
                if sentence.find('what') == 0:
                    multiplier = 1
                if sentence.find('when') == 0:
                    multiplier = 2
                if sentence.find('where') == 0:
                    multiplier = 2
                if sentence.find('why') == 0:
                    multiplier = 4
                if sentence.find('how') == 0:
                    multiplier = 4
                writer.writerow((' '.join(sentence), multiplier * count))
コード例 #8
0
def gen_grammar_plural(verb, direct_object, count):
	try:
		verb = en.verb.present_participle(verb)
	except KeyError:
		return
	if verb != "":
		g1 ="""
		S -> WA TR SUB V DO '?' | W TR SUB V '?' 
		W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how'
		WA -> 'when' | 'where' | 'why' | 'how'
		TR -> 'are' | 'were'
		SUB -> 'they' | 'you'
		V -> '%s'
		DO -> 'the %s'
		"""%(verb, direct_object)
		grammar1 = CFG.fromstring(g1)
		multiplier = 1
		with open('sentences.csv', 'ab') as csvwriter:
			writer = csv.writer(csvwriter)
			for sentence in generate(grammar1, n=999):
				sentence = ' '.join(sentence)
				if sentence.find('who') == 0:
					multiplier = 1
				if sentence.find('what') == 0:
					multiplier = 1
				if sentence.find('when') == 0:
					multiplier = 2
				if sentence.find('where') == 0:
					multiplier = 2
				if sentence.find('why') == 0:
					multiplier = 4
				if sentence.find('how') == 0:
					multiplier = 4
				writer.writerow((' '.join(sentence) , multiplier*count))
コード例 #9
0
def gen_grammar3_past_plural(verb, direct_object, count):
	g1 ="""
	S -> W TR SUB V '?' | WA TR SUB V DO '?' 
	W -> 'who' | 'what' | 'when' | 'where' | 'why' | 'how'
	WA -> 'when' | 'where' | 'why' | 'how'
	TR -> 'have'
	SUB -> PRO
	PRO -> 'they' |'you'
	V -> '%s'
	DO -> 'the %s'
	"""%(verb, direct_object)
	grammar1 = CFG.fromstring(g1)
	multiplier = 0
	with open('sentences.csv', 'ab') as csvwriter:
		writer = csv.writer(csvwriter)
		for sentence in generate(grammar1, n=999):
			if sentence.find('who') == 0:
				multiplier = 1
			if sentence.find('what') == 0:
				multiplier = 1
			if sentence.find('when') == 0:
				multiplier = 2
			if sentence.find('where') == 0:
				multiplier = 2
			if sentence.find('why') == 0:
				multiplier = 4
			if sentence.find('how') == 0:
				multiplier = 4
			writer.writerow((' '.join(sentence) , multiplier*count))
コード例 #10
0
ファイル: OracleCfg.py プロジェクト: Leno1993/RecommendSystem
    def generate_sentence(self, depth=9, num=30000):
        if num > 30000:
            num = 30000
        gen_num = 0
        done = False
        sentences_list = list()

        for dep in range(1, depth):
            sentences = generate(self.grammar, depth=dep)
            for s in sentences:
                sentences_list.append(' '.join(s) + '\n')
                gen_num += 1
                if gen_num > num:
                    done = True
                    break
            if done:
                break

        # sentences = generate(self.grammar, depth=depth, n=4)
        # for s in sentences:
        #     # file.write(' '.join(s) + '\n')
        #     sentences_list.append(' '.join(s) + '\n')
        # sentences_list = sentences_list[0:num]
        random.shuffle(sentences_list)
        with open(self.origin_file, 'w') as file:
            for s in sentences_list:
                file.write(s)
コード例 #11
0
 def generate_from_grammar(self, n, depth):
     grammar = CFG.fromstring(self.gramma)
     print("Generuje dla n " + n + " i depth " + depth)
     for track in generate(grammar, n=int(n), depth=int(depth)):
         self.track_array.append(' '.join(track))
         # produkcje
         numbers = " ".join(track)
         self.productions.append(numbers)
コード例 #12
0
def get_n_introductions(number):
    r = random.randint(0, 1)

    if r == 1:
        all_pos_sentences = list(generate(pos_grammar))
        pos_number = len(all_pos_sentences)
        print(pos_number)
        pos_sentence = all_pos_sentences[random.randint(0, pos_number)]
        return "Our tactics guy, and " + ' '.join(
            pos_sentence) + ", Nathan A Clark. Hello, Nathan."
    else:
        all_noun_sentences = list(generate(noun_grammar))
        noun_number = len(all_noun_sentences)
        print(noun_number)
        noun_sentence = all_noun_sentences[random.randint(0, noun_number)]
        return "Our tactics guy, and " + ' '.join(
            noun_sentence) + ", Nathan A Clark. Hello, Nathan."
コード例 #13
0
    def __init__(self, grammar, depth=5):
        """
        Initialize from a CFG.

        :type grammar: CFG
        :param grammar: A CFG generating the text.
        """
        self._iterator = generate(grammar, depth=depth)
コード例 #14
0
def banjoify(rules, song):
    arrangement = []
    for pitch, duration in parse_abc(song):
        grammar = CFG.fromstring(rules.format(pitch=pitch))
        options = list(generate(grammar, start=Nonterminal(duration)))
        phrase = random.choice(options)
        arrangement.append(''.join(phrase))
    return ' '.join(arrangement)
コード例 #15
0
def generate_dataset(grammar, correct, incorrect):
    """
    Generate data with correct and incorrect number-verb agreement.

    Args:
        grammar (str): NLTK feature grammar
        correct (dict): for each number condition (key) a start symbol rule
                        (value) to create sentences with noun-verb agreement
        incorrect (dict): for each number condition (key) a start symbol rule
                        (value) to create sentences with incorrect verb number

    Returns:
        data_correct (list): tuples of (sentence, number_condition) for all
                            correct sentences
        data_incorrect (list): tuples of (sentence, number_condition) for all
                            sentences with number-verb disagreement
    """
    n_conditions = len(list(correct.keys())[0].split("_"))
    # Tasks that only have one noun of which we are tracking the number
    # Examples: simple, adv, qnty_simple, namepp
    if n_conditions == 1:
        grammar_correct, _ = get_grammar(grammar, correct["sg"])
    # Tasks that have two nouns of which we are tracking the number
    # Examples: nounpp
    elif n_conditions == 2:
        grammar_correct, _ = get_grammar(grammar, correct["sg_sg"])
    elif n_conditions == 3:
        grammar_correct, _ = get_grammar(grammar, correct["sg_sg_sg"])
    # Not tracking more than 3 nouns
    else:
        sys.exit(
            "Number of conditions is incorrect. Please check the template.")

    correct_parsers = defaultdict()
    incorrect_parsers = defaultdict()
    data_correct, data_incorrect = [], []

    # 'corect' and 'incorrect' are dictionaries containing the same keys
    # Get the parsers for both the correct sentences and the incorrect
    # sentences, where the verb number does not match the noun number
    for corr_key, incorr_key in zip(correct, incorrect):
        _, correct_parsers[corr_key] = get_grammar(grammar, correct[corr_key])
        _, incorrect_parsers[incorr_key] = get_grammar(grammar,
                                                       incorrect[incorr_key])

    # Generate n sentences and classify as either correct or incorrect
    for sent in tqdm(list(generate(grammar_correct, n=1000000))):
        for key in correct_parsers:
            # If a parser for correct sentence can parse the current sentence,
            # the sentence is correct
            if list(correct_parsers[key].parse(sent)):
                data_correct.append((" ".join(sent), key))
                break
            elif list(incorrect_parsers[key].parse(sent)):
                data_incorrect.append((" ".join(sent), key))
                break

    return data_correct, data_incorrect
コード例 #16
0
ファイル: grammars.py プロジェクト: mech4rhork/pcfg-bcl
def generate_from_grammar(G, depth=50, n=999):
    C = ""  # corpus
    ## toutes les phrases possibles
    print("\n")
    for n, sent in enumerate(generate.generate(G, depth=depth, n=n), 1):
        s = ' '.join(sent)
        C += s + '. '
        print('%3d. %s%s' % (n, s, '.'))
    return C
コード例 #17
0
def generate_initiative(nouns, adjs, verbs):
    grammar = init_grammar(nouns, adjs, verbs)
    # print(grammar)

    # for sentence in generate(grammar, depth=1000):
    #     print(' '.join(sentence))

    results = generate(grammar)

    return results
コード例 #18
0
ファイル: util.py プロジェクト: BelkacemB/nltk
def generate_text(grammar,N):
    from nltk.grammar import CFG
    import nltk.parse.generate as gen

    print('Generating the first %d sentences for demo grammar:' % (N,))
    print(grammar)
    grammar = CFG.fromstring(grammar)

    grm_list = gen.generate(grammar, n=N)
    for n, sent in enumerate(grm_list):
        print('%3d. %s' % (n, ' '.join(sent)))
コード例 #19
0
def generate_sent(n: int = 1) -> list:
    """
    Generate Thai Sentences

    :param int n: number sentences

    :return: list sentences
    :rtype: list
    """
    global _thaigrammar
    return [' '.join(i) for i in generate(_thaigrammar, n=n)]
コード例 #20
0
ファイル: views.py プロジェクト: 5l1v3r1/WS-VulnS
 def gen_sql_stmt_from_grammar(self,
                               start_,
                               num_stmts=None,
                               table_name="table_name",
                               columns_name="columns_names"):
     grammar = CFG.fromstring(
         self.get_sql_select_stml_grammar(table_name, columns_name,
                                          COMMON_VALUES))
     sql_select_stmts = []
     for stmt in generate(grammar, start=Nonterminal(start_), n=num_stmts):
         sql_select_stmts.append(''.join(stmt))
     return sql_select_stmts
コード例 #21
0
    def generate_messages(self):
        """
        Generates messages for a synthetic structured language 
        according to a simple grammar, not randomly.

        Yields
        ------
        message : list
            A list with each element a word (str) in the message.
        """
        for message in generate(self.grammar):
            yield message
コード例 #22
0
ファイル: grammar_permutator.py プロジェクト: Ker-A/c-
def main(args):
    grammar_string = DEMO_GRAMMAR

    if args.input_file_path:
        with open(args.input_file_path, 'r') as f:
            grammar_string = f.read()

    grammar = CFG.fromstring(grammar_string)

    for sentence in generate(grammar, depth=args.depth):
        print(''.join(sentence))

    return 0
コード例 #23
0
def grammar_get(
    base_grammar_fname: util.type.TFile,
    domain_grammar_fnames: T.List[util.type.TFile],
    sents_per_domain=100) -> TNameToSents:
  with open(base_grammar_fname, 'r') as f:
    base = f.read()
  sents = {}
  for domain_fname in domain_grammar_fnames:
    with open(domain_fname, 'r') as f:
      productions = f.read()
    grammar = CFG.fromstring(base + productions)
    sents[domain_fname] = list(generate(grammar, n=sents_per_domain))
  return sents
コード例 #24
0
ファイル: parser.py プロジェクト: Roceso1337/FriendBot
def respondQuestion(sentence, keyWord, POS):
	if "Tell me" not in sentence:
		grammar = ""

		if POS == "NNPS" or POS == "NNS":
			grammar = CFG.fromstring("""
			S -> H-NP1 Adj VP'?' | Wh-NP VP'?'
			H-NP1 -> 'How'
			Wh-NP -> 'Who' | 'What' | 'Where' | 'What'
			Adj -> 'big' | 'small' | 'happy' | 'sad' | 'large' | 'difficult' | 'emotional' | 'old' | 'healthy' | 'strong' | 'cute' | 'hungry'
			NP -> Pronoun | Proper-Noun | Noun
			Pronoun -> 'they' | 'those'
			Proper-Noun -> '[]'
			Noun -> 'the <>'
			VP -> Verb NP  
			Verb -> 'are' 
			""")
		elif POS == "NN" or "NNP":
			grammar = CFG.fromstring("""
			S -> H-NP1 Adj VP'?' | Wh-NP VP'?'
			H-NP1 -> 'How'
			Wh-NP -> 'Who' | 'What' | 'Where' | 'What'
			Adj -> 'big' | 'small' | 'happy' | 'sad' | 'large' | 'difficult' | 'emotional' | 'old' | 'healthy' | 'strong' | 'cute' | 'hungry'
			NP -> Pronoun | Proper-Noun | Noun
			Pronoun -> 'it' | 'that'
			Proper-Noun -> '[]'
			Noun -> 'the <>'
			VP -> Verb NP  
			Verb -> 'is' 
			""")

		rand_sent_list = []
                response = ""
		for sentence in generate(grammar):
		    rand_sent_list.append(' '.join(sentence))
		while True:
			num = randint(0, len(rand_sent_list)-1)
			response = rand_sent_list[num]
			if "<>" in response and (POS == "NNS" or POS == "NN"):
				index = response.index("<>")
				response = response[:index] + keyWord + response[index+2:]
				break
			if "[]" in response and (POS == "NNPS" or POS == "NNP"):
				index = response.index("[]")
				response = response[:index] + keyWord + response[index+2:]
				break
			if "<>" not in response and "[]" not in response:
				break
		return response
	else:
		knowledgeRep(sentence)
コード例 #25
0
def generate_pairs(depth, cfg):
    '''
    num_pairs: Integer denoting the number of translation pairs
    depth: integer for thedepth of the parse tree in the CFG
    cfg: chosen grammar, 1, 2 or 3
    '''
    if (cfg == 1):
        grammar = CFG.fromstring("""
        S -> Y  
        Y ->   a Y b | a Y | a |
        a -> '(' ')'  
        b -> '{' '}'  
        """)
    elif cfg == 2:
        grammar = CFG.fromstring("""
        S ->  X | Y  | X Y
        X -> a
        Y ->  b
        a -> '(' a ')'  |  
        b -> '{' b '}'  | 
        """)
    elif cfg == 3:
        grammar = CFG.fromstring("""
        S ->  X 
        X -> a | b
        a -> '(' a ')'  |  
        b -> '{' b '}' | '{' a '}'
        """)
    trg = list(generate(grammar, depth=depth))
    trg_list = []
    for sentence in trg:
        k = ''.join(sentence)
        trg_list.append(k)

    src_list = trg2src(trg)

    if cfg == 1:
        A = list((s + 'A ' for s in src_list))
    elif cfg == 2:
        A = list((s + 'B ' for s in src_list))
    elif cfg == 3:
        A = list((s + 'C ' for s in src_list))
    else:
        None

    B = list((s for s in trg_list))

    df = pd.concat([pd.Series(A), pd.Series(B)], axis=1)
    pairs = (df.iloc[:, 0] + df.iloc[:, 1]).values.tolist()
    return pairs
コード例 #26
0
def generateRawTemplates(depth):
    gram = CFG.fromstring(grammarstring)
    rawTemplates = generate(gram, depth=depth)
    templatefiles = []

    for index, state in enumerate(rawTemplates):
        filename = os.path.join("./templates", "template" + str(index))
        with open(filename, 'w') as templatefile:
            templatefile.write(' '.join(state))
            templatefiles.append(filename)

    print str(len(templatefiles)) + " template files generated"

    return templatefiles
コード例 #27
0
ファイル: generate.py プロジェクト: cecilcox/proj-webapp
def generate_blazons(grammarfile, n, depth=None):
    bs = []
    with open(grammarfile) as g:
        raw_cfg = g.read()
    parser_grammar = CFG.fromstring(raw_cfg)
    for blazon in generate(parser_grammar, n=n, depth=depth):
        bwords = blazon
        field = bwords[0]
        z = ((isColour(field) and not any(map(isColour, bwords[1:]))) or
             (isMetal(field) and not any(map(isMetal, bwords[1:])))) and (
                 field not in bwords[1:])
        if z:
            bs.append(' '.join(blazon))
    return bs
コード例 #28
0
def generate_sentence(subject, predicate, object, useTemplate=False):
    if useTemplate==False:
        predicate = literal_tuner(predicate)
        rand = random.randint(0, 1)
        object = literal_tuner(object)
        grammar = get_grammar(subject, object, predicate)

        #very simplified randomization string generation because we currently only have two valiid compositions
        for sentence in generate(grammar, n=10):
            if rand < 1:
                return ' '.join(sentence)
            else:
                rand = rand-1
                continue
コード例 #29
0
    def generate(self, tree_depth, num_expressions):
        """Generates expression strings from context-free grammar.

    Args:
      tree_depth: Integer, depth of the grammar parsing tree.
      num_expressions: Integer, maximum number of expressions to generate.

    Yields:
      List of token strings for an expression string.
    """
        for token_list in generate.generate(self._cfg,
                                            depth=tree_depth,
                                            n=num_expressions):
            yield token_list
コード例 #30
0
def generateRawTemplates(depth):
    gram = CFG.fromstring(grammarstring)
    rawTemplates = generate(gram, depth=depth)
    templatefiles = []

    for index, state in enumerate(rawTemplates):
        filename = os.path.join("./templates","template"+str(index))
        with open(filename, 'w') as templatefile:
            templatefile.write(' '.join(state))
            templatefiles.append(filename)

    print str(len(templatefiles))+" template files generated"

    return templatefiles
コード例 #31
0
ファイル: main.py プロジェクト: viking-sudo-rm/eval-probe
def main():
    """Skeleton towards a data generation process.

    An actual data generation pipeline would:
      1. Generate all basic sentences and throw them in train.
      2. Generate all complex sentences, and divide between train/test.
    """

    print("Basic grammar...")
    basic_grammar = load_grammar("grammars/basic_sents.fcfg")
    sentences = tqdm(generate(basic_grammar))
    expressions = semantic_parse(sentences, basic_grammar)
    for expression in expressions:
        value = evaluate(expression, model_dict)
        print(str(expression), ":", value)

    print("Complex grammar...")
    complex_grammar = load_grammar("grammars/complex_sents.fcfg")
    sentences = tqdm(generate(complex_grammar, n=200, depth=5))
    expressions = semantic_parse(sentences, complex_grammar)
    for expression in expressions:
        value = evaluate(expression, model_dict)
        if value is not None:
            print(str(expression), ":", value)
コード例 #32
0
def main_phrases(name, othername=None):

    phrase_grammar = f"""
		S -> '{name}!'
		S -> 'Go, {name}!'
		S -> 'Show them, {name}!'
		S -> 'I chose you, {name}!'
		S -> 'You can do it, {name}!'
	"""

    grammar = CFG.fromstring(phrase_grammar)
    phrases = list(generate(grammar))
    shortlist = list(filter(lambda x: (syllables.estimate(x[0]) == 5),
                            phrases))
    return shortlist[0]
コード例 #33
0
def single_phrases(name, types=None, evolutions=None, attacks=None):
    phrase_grammar = f"""
	   
		
	"""

    if attacks != None:
        for attack in attacks:
            phrase_grammar += f"""
				S -> 'Use {attack}!'
				S -> '{attack}, now!'
			"""

    grammar = CFG.fromstring(phrase_grammar)
    return generate(grammar)
コード例 #34
0
ファイル: util.py プロジェクト: BelkacemB/nltk
def generate_tweet(grammar):
    from nltk.grammar import CFG
    import nltk.parse.generate as gen

    print(grammar)
    grammar = CFG.fromstring(grammar)
    grm_list = gen.generate(grammar, n=SIZE) # TODO voir la taille max ? moyen de la recuperer ?
    from random import randint
    rd = randint(0,SIZE)
    cpt = 0
    for n, sent in enumerate(grm_list):
        if rd == cpt:
            print ("Your tweet : ")
            print('%3d. %s' % (n, ' '.join(sent)))
        cpt += 1
コード例 #35
0
    def generate_sample_strings(self, remove_duplicates=True):
        """
        Generates all strings from self.grammar up to the depth
        specified by self.depth. Duplicates may optionally be removed.

        :type remove_duplicates: bool
        :param remove_duplicates: If True, duplicates will be removed

        :rtype: list
        :return: A list of strings generated by self.grammar
        """
        generator = generate(self.grammar, depth=self.sample_depth)
        if remove_duplicates:
            return [list(y) for y in set(tuple(x) for x in generator)]
        else:
            return list(generator)
コード例 #36
0
    def generate(self, topstart="top", n=no_sents):
        if topstart == "top":
            topstart = self.start
        else:
            topstart = self.findstart(topstart)

        if n > 0:
            max = n
        else:
            max = CorpusGenerator.no_sents

        sentences = 0
        for sentence in generate(self.grammar, start=topstart, n=max):
            if max < 1000000000:
                print(' '.join(sentence))
            sentences += 1
        print "Produced sentences: " + str(sentences)
コード例 #37
0
def main():
    zen = """ Beautiful is better than ugly.
    Explicit is better than implicit.
    Simple is better than complex.
    Complex is better than complicated.
    Flat is better than nested.
    Sparse is better than dense.
    Readability counts.
    Special cases aren't special enough to break the rules.
    Although practicality beats purity.
    Errors should never pass silently.
    Unless explicitly silenced.
    In the face of ambiguity, refuse the temptation to guess.
    There should be one-- and preferably only one --obvious way to do it.
    Although that way may not be obvious at first unless you're Dutch.
    Now is better than never.
    Although never is often better than *right* now.
    If the implementation is hard to explain, it's a bad idea.
    If the implementation is easy to explain, it may be a good idea.
    Namespaces are one honking great idea -- let's do more of those!"""
        
    tagged = nltk.pos_tag(nltk.word_tokenize(zen))
    tagged = [(tag, word) for word, tag in tagged]
    #
    #tag_word_map = defaultdict(list)
    #[(tag, word) for word, tag in tagged]
    tags = set([tag for tag, _  in tagged])
    tag_word_map = {tag: {word for key, word in tagged if key == tag} for tag in tags}
                
           
    gram_head = """
      S -> NNP VBZ JJR IN RB
    """
    cats = ['NNP', 'VBZ', 'JJR', 'IN', 'RB']
    gram = [cat + ' -> ' + '|'.join([repr(x) for x in tag_word_map[cat]]) for cat in cats]
    
    grammar = gram_head + '\n'.join(gram)
    grammar = nltk.CFG.fromstring(grammar)
    
    poem = []    
    for sentence2 in generate(grammar, depth=5):
        poem.append(' '.join(sentence2))
        
    out =  "\n".join(choice(poem, size=10))
    print(out)
コード例 #38
0
    def onMessage(self, author_id, message_object, thread_id, thread_type,
                  **kwargs):
        self.markAsDelivered(thread_id, message_object.uid)
        self.markAsRead(thread_id)

        log.info("{} from {} in {}".format(message_object, thread_id,
                                           thread_type.name))
        log.info(message_object.text)

        # If you're not the author, echo
        if author_id != self.uid:
            grammar = CFG.fromstring(demo_grammar)
            self.send(Message(text=generate(grammar, depth=14)),
                      thread_id=thread_id,
                      thread_type=thread_type)
            self.send(Message(text='Chao'),
                      thread_id=thread_id,
                      thread_type=thread_type)
コード例 #39
0
def generate_sentences(args):

    in_file = open(args[1])
    out_file = open(args[2],'w')

    gram = in_file.read()

    grammar = CFG.fromstring(gram)
    print(grammar)
    sentences = ""

    for s in generate(grammar, depth=int(args[0])):
        sentences += ''.join(s) + '\n'

    out_file.writelines(sentences)

    in_file.close()
    out_file.close()
コード例 #40
0
g3 = """
S -> S1[G=?n] 
S1[G='money'] -> 'How many notes of each denomination person has?'
S1[G='shape'] -> 'What are its length and breadth?'
S1[G='int'] -> 'What are the two numbers?'
S1[G='age'] -> 'What are their present ages?'
S1[G='class'] -> 'What is the total strength?'

"""
first=[]
sec=[]
third=[]

grammar1 = nltk.grammar.FeatureGrammar.fromstring("""% start S"""+"\n"+gramstring)
parser1 = nltk.FeatureChartParser(grammar1)
for sentence1 in generate(grammar1):
    if(parser1.parse_one(sentence1)): 
        string1=' '.join(sentence1)
        first.append(string1)
    #print(l)


grammar2 = nltk.grammar.FeatureGrammar.fromstring("""% start S"""+"\n"+g2)
parser2 = nltk.FeatureChartParser(grammar2)
for sentence2 in generate(grammar2):
    if(parser2.parse_one(sentence2)): 
        string2=' '.join(sentence2)
        if string2 not in sec:
            sec.append(string2)
        else:
            pass
コード例 #41
0
ファイル: learning.py プロジェクト: aadiuppal/open-cricket
import nltk
from nltk.parse import generate
from nltk.grammar import Nonterminal


cfg = nltk.CFG.fromstring("""
root -> who_player has the most runs
who_player -> who
who_player -> which player
who_player -> which team player
who -> 'who'
which -> 'which'
player -> 'player'
team -> 'indian' | 'australian' | 'england' | 'sri' 'lankan'
has -> 'has'
the -> 'the'
this -> 'this'
most -> 'most'
runs -> 'runs'
""")

print(list((n,sent) for n, sent in enumerate(generate.generate(cfg, n=100, start=Nonterminal('root')), 1)))

result1 = nltk.ChartParser(cfg).parse('which england player has the most runs'.split())
result2 = nltk.ChartParser(cfg).parse(['which', 'sri', 'lankan', 'player', 'has', 'the', 'most',  'runs'])
print(list(result1))
print(list(result2))

コード例 #42
0
ファイル: views.py プロジェクト: cts5ws/hackuva2016
def output(request):
    # Validation of form
    if request.method == "POST":
        # Validation of request
        if 'inputURL' in request.POST:
            # Validation of image url
            imageURL = request.POST.get('inputURL')
            image_output = imageURL
            indexOfDot = imageURL.rfind(".")
            if indexOfDot == -1:
                return fail(request) # not an image URL
            indexOfDot += 1
            extension = imageURL[indexOfDot:]
            if extension != 'jpg' and extension != 'jpeg' and extension != 'png':
                return fail(request) # not a valid image (jpg, jpeg, png)
                
            client_id = '8SkASX_SM8xc-fxMF4SdpzS_b9uew8yG0UrQp0y6'
            secret_id = 'EXkfCNxXeiHtnpsxn9Njui_yUpCuvcSAXzfSYjwN'
                
            clarifai_api = ClarifaiApi(client_id, secret_id) # assumes environment variables are set.
            try:
                result = clarifai_api.tag_image_urls(imageURL)
            except ApiError:
                #return fail(request)
                
                messages.add_message(request, messages.INFO, "ApiError")
                return HttpResponseRedirect('makestory/fail.html')
            
            
            class_list = result['results'][0]['result']['tag']['classes']
            prob_list = result['results'][0]['result']['tag']['probs']
            
            class_str = ""
            for i in range(0, len(class_list)/2):
                class_str += class_list[i] + " " 
            
            # currently just the list of matched words
            text_output = class_list.__str__()
            
            # Parts of speech recognition
            tokens = nltk.word_tokenize(class_str)
            # dictionary = PyDictionary()
            
            # nouns = []
            # verbs = []
            # adjectives = []
            # otherPos = []
            # for word in tokens:
            #     #definition = dictionary.meaning(word) # https://pypi.python.org/pypi/PyDictionary/1.3.4
            #     #assignment = definition.keys()[0] # Get the part of speech from the dictonary
            #     assignment = ""
            #     # assignment = tuple[1]
                
            #     if assignment == 'Noun':
            #         nouns.append(word)
            #     elif assignment == 'Verb':
            #         verbs.append(word)
            #     elif assignment == 'Adjective':
            #         adjectives.append(word)
            #     else:
            #         otherPos.append(word)
                    
                    
            # Create the grammar
            #P:prepositions, DET:articles, adverbs
            DET = ["'the'","'a'","'some'"]
            # P = ["'in'","'at'","'since'","'for'","'to'","'past'","'to'""'by'","'in'","'at'","'on'","'under'","'below'","'over'","'above'","'into'","'from'","'of'","'on'","'at'"]
            VB = ["'talks'","'does'","'has'","'cries'", "'fights'", "'traps'", "'bakes'", "'fondles'", "'cooks'", "'sees'", "'calls'", "'smells'", "'tastes'", "'hears'"]
            
            
            assignments = pos_tag(tokens) # tagset='universal' for ADJ, NOUN, etc.
            
            # pos_tags = []
            pos_words = {}
            pos_words['DET'] = DET
            #pos_words['P'] = P
            pos_words['VB'] = VB
            
            for tuple in assignments:
                word = tuple[0]
                pos = tuple[1]
                if pos in pos_words:
                    pos_words[pos].append("\'" + word + "\'")
                else:
                    pos_words[pos] = []
                    pos_words[pos].append("\'" + word + "\'")
                # pos_tags.append(pos)

            #grammar = """
            #S -> NP VP
            #PP -> P NP
            #NP -> Det N
            #VP -> V Det N | V Det N PP
            
            #"""
            
            grammar = """
            S -> NP VP
            NP -> Det N
            VP -> V Det N
            """
            #Det -> 'DT'
            # N -> 'NN'
            # V -> 'VBZ'
            # P -> 'PP'
            
            
            # adverb is RB
            
            if 'DET' in pos_words:
                grammar += 'Det ->' + ' | '.join(pos_words['DET']) + '\n'
                
            if 'P' in pos_words:
                grammar += 'P ->' + ' | '.join(pos_words['P']) + '\n'
                
            if 'NN' in pos_words:
                grammar += 'N ->' + ' | '.join(pos_words['NN']) + '\n'
            #change to VB for nltk
            if 'VB' in pos_words:
                grammar += 'V ->' + ' | '.join(pos_words['VB']) + '\n'
            
            
            #if 'JJ' in pos_words:
            #    grammar += 'A ->' + ' | '.join(pos_words['JJ']) + '\n'
                
            simple_grammar = CFG.fromstring(grammar)
            #  simple_grammar.start()
            # simple_grammar.productions()
            
            sentences = []
            sentence_validity = []
         
            for sentence in generate(simple_grammar, depth=4):
                sentences.append(' '.join(sentence))
            
            
            sentence_validity = get_validity(sentences)
            
            #get_validity(sentences)
            
            # parser = nltk.ChartParser(simple_grammar)
            # tree = parser.parse(pos_tags)
            
            story = ""
            for i in range(0, 10):
                tuple = sentence_validity[i]
                string = tuple[1]
                start_letter = string[0].upper()
                story += start_letter
                story += string[1:]
                story += ". "
            
            return render(request, 'makestory/output.html',
                {
                'imageURL_output': imageURL,
                'story_output': story,
                'grammar_test_output': simple_grammar,
                'sentences_test_output': sentences,
                }
            )
        else:
            return fail(request)
    return fail(request)
コード例 #43
0
ファイル: syllable.py プロジェクト: mouse-reeve/langmaker
 def generate_syllables(self):
     ''' every possible syllable for the given phonemes and grammar '''
     # spaces, which are only there for NLTK's sake, are removed
     return [re.sub(' ', '', '/'.join(s)) for s in \
             generate(self.grammar, depth=4)]
コード例 #44
0
# Filter each sentence and return them all.
def eliminate(sentence):
    sents=nltk.sent_tokenize(sentence)
    for sent in sents:
        str=filter(sent)
        return str

#Here input is the chosen option on UI.
#Given IDs to each question as per NCERT Book,input will be given that chosen value.
input=26
# Generate variations of a particular question based on the input and its corresponding grammar.
if input==2:
    g=CFG.fromstring(g1)
    g2=CFG.fromstring(g2)
    rd_parser=nltk.RecursiveDescentParser(g)
    for sent,sent2 in zip(generate(g2,n=100),generate(g,n=100)):
        newsent1=' '.join(sent)
        newsent2=' '.join(sent2)
        ans1=eliminate(newsent1)
        ans2=eliminate(newsent2)
        if(ans1 == None or ans2 == None):
            pass
        else:
            print(ans1)
            print(ans2)
            print("Determine the length and breadth")
            print("\n")
elif input==4:
    g=CFG.fromstring(g3)
    g2=CFG.fromstring(g4)
    rd_parser=nltk.RecursiveDescentParser(g)
コード例 #45
0
from contractions import contractions


sent_tokenizer = PunktSentenceTokenizer()

with open("<source of text>", "r") as f:
    text = f.read()

for k, v in contractions.items():
    text = text.replace(k, v)

sents = []
for paragraph in text.split('\n'):
    sents += sent_tokenizer.tokenize(paragraph)

parser = Parser()

productions = []
for sent in sents[:25]:
    try:
        tree = parser.parse(sent)
        productions += tree.productions()
    except:
        pass

S = Nonterminal('S')
grammar = induce_pcfg(S, productions)

for sentence in generate(grammar, depth=5):
    print " ".join(sentence) + "\n"
コード例 #46
0
def surface_realizer(grammar):
    for sentence in generate(grammar, n=10):
        return ' '.join(sentence)
コード例 #47
0
ファイル: CFG.py プロジェクト: Shokr/nltk_tutorial
__author__ = 'Mohammed Shokr <*****@*****.**>'

# Generating sentences from context-free grammars

from nltk.parse.generate import generate, demo_grammar
from nltk import CFG

# An example grammar:
grammar = CFG.fromstring(demo_grammar)
print(grammar)

print("#---------------------------------------------------------------#")

# The first 10 generated sentences:
for sentence in generate(grammar, n=10):
    print(' '.join(sentence))

print("#---------------------------------------------------------------#")

コード例 #48
0
            
            if 'NN' in pos_words:
                grammar += 'N ->' + ' | '.join(pos_words['NN']) + '\n'
            
            if 'VB' in pos_words:
                grammar += 'V ->' + ' | '.join(pos_words['VB']) + '\n'
                
            if 'JJ' in pos_words:
                grammar += 'A ->' + ' | '.join(pos_words['JJ']) + '\n'
                
            simple_grammar = CFG.fromstring(grammar)
            #simple_grammar.start()
            simple_grammar.productions()
            
            sentences = []
            for sentence in generate(simple_grammar, n=10):
                sentences.append(' '.join(sentence))
            
            # parser = nltk.ChartParser(simple_grammar)
            # tree = parser.parse(pos_tags)
            


            caption = 'this is a caption'
            story = 'this is the story'
            
            return render(request, 'makestory/output.html',
                {
                'nouns_output': nouns,
                'verbs_output': verbs,
                'adjectives_output': adjectives,
コード例 #49
0
ファイル: generate.py プロジェクト: NatalieBlack/haiku_haiku
def choose_line(some_lines):#5
    return a_random.choice(#7
                    some_lines).lower() #5

############################################

############################################
choose = choose_line #5

g = G.fromstring(#7
                    this_is_the_grammar) #5
############################################

############################################
while not len(pentas):#5
    for poem in generate(g, #7
                           start=N('five')): #5
############################################

############################################
      pentas.append(#5
                    with_blank_spaces.join(poem))#7

fives = pentas #5
############################################

############################################
third = choose(fives) #5
first = choose(fives) #7

def display_the(poem):#5
############################################
コード例 #50
0
ファイル: cfg.py プロジェクト: simonjmendelsohn/StackNN
    [1. if i == b else 0. for i in xrange(len(code_for))])

# list of codes of symbols to predict
to_predict_codes = [onehot(code_for[s]) for s in to_predict]


# function to test if a symbol code is in list to predict
def in_predict_codes(code):
    for i in xrange(len(to_predict_codes)):
        if ((code == to_predict_codes[i]).all()):
            return True
    return False


# sample_strings = all strings from grammar of depth at most sample_depth
sample_strings = list(generate(grammar, depth=sample_depth))

# report #, min length and max length for strings in sample_strings
print("number of sample strings = {}".format(len(sample_strings)))
sample_lengths = [len(s) for s in sample_strings]
print("min length = {}, max length = {}".format(min(sample_lengths),
                                                max(sample_lengths)))

# sanity check: report one random string from sample_strings
print "random sample string = {}".format(random.choice(sample_strings))

#################################

model = VanillaModel(len(code_for), READ_SIZE, len(code_for))
try:
    model.cuda()
コード例 #51
0
from nltk.parse.generate import generate #, demo_grammar
from nltk import CFG


demo_grammar = """
  S -> NP VP
  NP -> Det N
  PP -> P NP
  VP -> 'slept' | 'saw' NP | 'walked' PP
  Det -> 'the' | 'a'
  N -> 'man' | 'park' | 'dog'
  P -> 'in' | 'with'
"""
grammar = CFG.fromstring(demo_grammar)
print(grammar)


#Join words and generate based off of grammar - for n 
for sentence in generate(grammar, n=12):
    print(' '.join(sentence))

'''
Notes: 
Need to symbolize the grammar
Have the machine process the language
Need to integrate with Markov chain - file 'agiliq-markov.py'
'''
for sentence in generate(grammar, depth=4):
    print(' '.join(sentence))
    
コード例 #52
0
    # to save typing
    dgr = dyck_grammar
    uagr = unambig_agreement_grammar
    eegr = exp_eval_grammar

    # comparisons of table calculations and reported sample sizes

    # Note: the generate function from nltk uses a notion of
    # depth that is 1 more than that used above!

    # NB: the dyck_grammar is NOT unambiguous (S -> S S)
    dgr_table = make_table(6, dgr)
    print "dyck_grammar for 4 from count_nonterminal_depth"
    print count_nonterminal_depth(dgr.start(), 4, dgr_table, dgr)
    print "nltk generate: number of sentences for dyck grammar at depth = 5"
    print len(list(generate(dgr, depth=5)))
    print "The dyck_grammar is ambiguous!"

    # unambig_agreement_grammar
    # this agrees with the count for depth = 16 in generate
    uagr_table = make_table(15, uagr)
    print "unambig_agreement_grammar for 15 from count_nonterminal_depth"
    print count_nonterminal_depth(uagr.start(), 15, uagr_table, uagr)

    # exp_eval_grammar
    # this agrees with the count for depth = 6 in generate
    eegr_table = make_table(5, eegr)
    print "exp_eval_grammar for 5 from count_nonterminal_depth"
    print count_nonterminal_depth(eegr.start(), 5, eegr_table, eegr)

    print "number of nltk depth = 7 sentences from dyck_grammar"
コード例 #53
0
def output(request):
    # Validation of form
    if request.method == "POST":
        # Validation of request
        if 'inputURL' in request.POST:
            # Validation of image url
            imageURL = request.POST.get('inputURL')
            image_output = imageURL
            indexOfDot = imageURL.rfind(".")
            if indexOfDot == -1:
                return fail(request) # not an image URL
            indexOfDot += 1
            extension = imageURL[indexOfDot:]
            if extension != 'jpg' and extension != 'jpeg' and extension != 'png':
                return fail(request) # not a valid image (jpg, jpeg, png)
                
            client_id = '8SkASX_SM8xc-fxMF4SdpzS_b9uew8yG0UrQp0y6'
            secret_id = 'EXkfCNxXeiHtnpsxn9Njui_yUpCuvcSAXzfSYjwN'
                
            clarifai_api = ClarifaiApi(client_id, secret_id) # assumes environment variables are set.
            return output(request, makes{image_output:'image_output', text_output:'text_output'})
                result = clarifai_api.tag_image_urls(imageURL)
            except ApiError:
                #return fail(request)
                
                messages.add_message(request, messages.INFO, "ApiError")
                return HttpResponseRedirect('makestory/fail.html')
            
            
            class_list = result['results'][0]['result']['tag']['classes']
            prob_list = result['results'][0]['result']['tag']['probs']
            
            class_str = ""
            for i in range(0, len(class_list)):
                class_str += class_list[i] + " " 
            
            # currently just the list of matched words
            text_output = class_list.__str__()
            
            # Parts of speech recognition
            tokens = nltk.word_tokenize(class_str)
            dictionary = PyDictionary()
            
            
            
            nouns = []
            verbs = []
            adjectives = []
            otherPos = []
            for word in tokens:
                definition = dictionary.meaning(word) # https://pypi.python.org/pypi/PyDictionary/1.3.4
                assignment = definition.keys()[0] # Get the part of speech from the dictonary
                
                # assignment = tuple[1]
                
                if assignment == 'Noun':
                    nouns.append(word)
                elif assignment == 'Verb':
                    verbs.append(word)
                elif assignment == 'Adjective':
                    adjectives.append(word)
                else:
                    otherPos.append(word)
                    
                    
            # Create the grammar
            #P:prepositions, DET:articles, adverbs
            P = ["on","in","at","since","for","ago","before","to","past","to","until","by","in","at","on","under","below","over","above","into","from","of","on","at"]
            DET = ["the","a","one","some","few","a few","the few","some"]
            
            assignments = pos_tag(tokens) # tagset='universal' for ADJ, NOUN, etc.
            
            pos_tags = []
            pos_words = {}
            for tuple in assignments:
                word = tuple[0]
                pos = tuple[1]
                if pos in pos_words:
                    pos_words[pos].append(word)
                else:
                    pos_words[pos] = []
                pos_tags.append(pos)
                
                
            
            
            grammar = """
            S -> NP VP
            PP -> P NP
            NP -> Det N | Det N PP
            VP -> V NP | VP PP
            Det -> 'DT'
            """
            # N -> 'NN'
            # V -> 'VBZ'
            # P -> 'PP'
            
            
            # adverb is RB
            
            if 'NN' in pos_words:
                grammar += 'N ->' + ' | '.join(pos_words['NN']) + '\n'
            
            if 'VB' in pos_words:
                grammar += 'V ->' + ' | '.join(pos_words['VB']) + '\n'
                
            if 'JJ' in pos_words:
                grammar += 'A ->' + ' | '.join(pos_words['JJ']) + '\n'
                
            simple_grammar = CFG.fromstring(grammar)
            #simple_grammar.start()
            simple_grammar.productions()
            
            sentences = []
            for sentence in generate(simple_grammar, n=10):
                sentences.append(' '.join(sentence))
            
            # parser = nltk.ChartParser(simple_grammar)
            # tree = parser.parse(pos_tags)
            


            caption = 'this is a caption'
            story = 'this is the story'
            
            return render(request, 'makestory/output.html',
                {
                'nouns_output': nouns,
                'verbs_output': verbs,
                'adjectives_output': adjectives,
                'otherPos_output': otherPos,
                'imageURL_output': imageURL,
                'caption_output': caption,
                'story_output': story,
                'sentences_test_output': sentences,
                }
            )
コード例 #54
0
ファイル: generate.py プロジェクト: johnjosephmorgan/yaounde
from nltk.parse.generate import generate
from nltk import CFG
from nltk.data import load

for gg in [ 'grammar_2.cfg']:
    grammar = load( 'file:' + gg)
    for  sentence in generate(grammar, depth=6, n=1000000):
        print(' '.join(sentence))