def how_many_questions(q,sentences,stemmed_sentences,sentence_vec):

    search_string = "how many {!VB*+} VB*"

    m = search(search_string, q)
    print m

    first_part =[w.string for w in m[0].words[2:-1]]
    search_string = "VB* *+"

    main_part = [lemma(first_part[0])]
    m = search(search_string, q)
    second_part = [w.string for w in m[0].words[1:]]
    
    answer = first_part + second_part
    stem_answer = " ".join([utils.stemm_term(w).lower() for w in answer])
    stem_vector = pp.text_to_vector (stem_answer)
    possible_answers,ans_idx = get_possible_answers(sentences,stem_vector, stemmed_sentences,sentence_vec,main_part)

    answered = False
    index = 0
    steps_to_rewind = 5
    while not answered and index < len(possible_answers):
        current_sentence = sentences[ans_idx[index]].split() 
        curr = possible_answers[index].split()


        main_idx = curr.index(main_part[0])
        if main_idx == -1:
            index+=1
            continue
        num_idx = -1
        for i in range(steps_to_rewind):
            if main_idx - (i+1) >= 0:
                if english_pack.is_number(curr[main_idx- (i+1)]):
                    num_idx = main_idx - (i+1)
                    break
            else:
                break
        
        if num_idx != -1:
            end_idx = num_idx+1
            while num_idx > 0 and english_pack.is_number(curr[num_idx-1]):
                num_idx-=1
                
            number = " ".join(current_sentence[num_idx:end_idx])
            if number != "," and number != ".":

                print fix_punctuation(" ".join([w.string for w in q.words]))
                print fix_punctuation(number + " " + " ".join(first_part) + ".")
                answered = True
                break
        index+=1
    return answered
Esempio n. 2
0
def checkTypeWordCount(answer,question):
    count = 0
    status = ''
    sum = 0
    status1 = 'false'

    for word1 in word_tokenize(answer):
        if word1 == '.' or word1 == ',' or word1 == '\'' or word1 == '\"' or word1 == ':' or word1 == ';' or word1 == '?' or word1 == '/' or word1 == '\\' or word1 == '|' or word1 == ']' or word1 == '[' or word1 == '}' or word1 == '{' or word1 == '(' or word1 == ')' or word1 == '*' or word1 == '&' or word1 == '^' or word1 == '%' or word1 == '$' or word1 == '#' or word1 == '@' or word1 == '!' or word1 == '`' or word1 == '~' or word1 == '-' or word1 == '_' or word1 == '='or word1 == '+':
            print 'error'
        else:
            sum = sum +1
            #print word1
    print sum

    words_ans = word_tokenize(answer)
    words_qus = word_tokenize(question)
    if words_ans[0]=="NOTICE"or words_ans[0]=="Notice":
        print "Correct"
        count = count+0.25
    else:
        status = "Wrong"

    for word in words_qus:
        if en.is_number(word) and words_qus[words_qus.index(word)+1]== 'words':
            if sum >= word:
                print word
                count = count+0.25
            status1='true'

    if status1 == 'false':
        count = count+0.25
    return count,status
    def find_grammatical_kind(self):

        st = self.get_sentence()
        st = re.sub(",", "", st)  # delete all commas
        result = []

        m = st.split(" ")

        for each in m:
            flag = False
            if en.noun.is_emotion(each):
                result.append("emotion")
                flag = True
            elif en.is_connective(each):
                result.append("connective")
                flag = True
            elif en.is_verb(each):
                result.append("verb")
                flag = True
            elif en.is_adjective(each):
                result.append("adjective")
                flag = True
            elif en.is_noun(each):
                result.append("noun")
                flag = True
            elif en.is_persuasive(each):
                result.append("persuasive")
                flag = True
            elif en.is_number(each):
                result.append("number")
                flag = True
            if flag == False:
                result.append("unclear")

        return result
def when_questions(curr,curr_idx,current_sentence, full_sentence,answer):
    already_seen = set(answer.split())
    initial_size = len(answer.split())

    if have_seen(already_seen,current_sentence,curr_idx):
        answer += " " + current_sentence[curr_idx] 
    curr_idx+=1
    tagged_curr = parsetree(full_sentence).words

    date_preposition = set(["in","at","on"])
    
    #look for the NP after the prep.
    while not (is_date(tagged_curr[curr_idx].string) or tagged_curr[curr_idx].type == "."):
        
        if not have_seen(already_seen,current_sentence,curr_idx):
            answer+= " " + current_sentence[curr_idx]
        curr_idx +=1

    while curr_idx < len(curr) and (is_date(tagged_curr[curr_idx].string)\
    or tagged_curr[curr_idx].type == "DT" or tagged_curr[curr_idx].type == ","):
        if not have_seen(already_seen,current_sentence,curr_idx):
            answer+= " " + current_sentence[curr_idx]
        curr_idx += 1
     
    answer = answer.split()
    curr_idx -= 1
    while not is_date(tagged_curr[curr_idx].string) and len(answer) > initial_size:
        del answer[-1]
        curr_idx-= 1

    if len(answer) == initial_size:
        return False,""
    
    answer[0] = answer[0].title()
    found_date = False
    found_prep = False
    for w in answer:
        if is_date(w):
            found_date = True
        if w in date_preposition:
            found_prep = True
            
    if not found_date:
        return False, ""

    if not found_prep:
        first_part = answer[:initial_size]
        last_part = answer[initial_size:]
        if english_pack.is_number(last_part[0]) and int(last_part[0]) < 32:
            answer = first_part + ["on"] + last_part
        else:
            answer = first_part + ["in"] + last_part

    answer = " ".join(answer)
    if not "." in answer:
        answer+= "."

    return True, answer
def isNumber(text):
	text = nltk.word_tokenize(text.lower())
	if len(text)==0:
		return False
	answer = True
	for word in text:
		if word not in ['a', 'and'] and not en.is_number(word):
			answer = False
			break
	return answer
def exp2words(expstr):
	expstr = nltk.word_tokenize(expstr.lower())
	text = ''
	operators = {'+': 'plus', '-': 'minus', '/': 'divided by', '*': 'multiplied by', '=': 'is equal to', '**': 'to the power of', 'log':'the logarithm of'}
	lw = None
	for w in expstr:
		if lw!=None:
			if en.is_number(lw):
				text += en.number.spoken(lw)+' '
			elif lw=='*' and w=='*':
				w = '**'
			elif lw in operators:
				text += operators[lw]+' '
		lw = w
	if en.is_number(lw):
		text += en.number.spoken(w)+' '
	elif lw=='*' and w=='*':
		w = '**'
	elif lw in operators:
		text += operators[lw]+' '
	return text.strip()
Esempio n. 7
0
def word_count(answer,question):

    words_qus = word_tokenize(question)
    count = 0
    status1 = 'false'
    for word in words_qus:
        if en.is_number(word) and words_qus[words_qus.index(word)+1]== 'words':
            if sum_of_words(answer) >= word:
                print word
                count = count+0.5
            status1 = 'true'
    if status1 == 'false':
        count = count+0.5
    return count
def words2exp(text):
	#text = nltk.word_tokenize(text.lower())
	#alreadyExp = True
	#for word in text:
	#	if not (word.isdigit() or word in ['+', '-', '*', '/', '**', '(', ')']):
	#		alreadyExp = False
	#		break
	#if alreadyExp and len(text)>0:
	#	return ' '.join(text)
	isExp = False
	try:
		isExp = en.is_number(str(sympy.simplify(text).evalf()))
	except Exception, e:
		isExp = False
Esempio n. 9
0
import en

print en.spelling.correct('zer0')
print en.spelling.suggest('tw0')


print en.is_number('eight')

import string
print string.punctuation

"""
/usr/lib/spark/bin/spark-submit \
--master yarn-client \
--conf "spark.yarn.executor.memoryOverhead=8192" \
--conf "spark.shuffle.memoryFraction=0.5" \
--executor-memory 10g  --executor-cores 2  --num-executors 5 \
--py-files wedc-lib.zip,scipy-0.18.0.dev0_5a779fd-py2.7-linux-x86_64.egg,scikit_learn-0.18.dev0-py2.7-linux-x86_64.egg,Cython-0.24-py2.7-linux-x86_64.egg,nltk-3.2.1-py2.7.egg,nose2-0.6.2-py2.7.egg,pyenchant-1.6.7-py2.7.egg,digSparkUtil-1.0.23-py2.7.egg,inflection-0.3.1-py2.7.egg,numpy-1.11.0-py2.7-linux-x86_64.egg \
spark_entrance.py \
$@
"""
Esempio n. 10
0
#! /usr/bin/python2
# -*- coding: utf-8 -*-

import en

# This file just runs some tests to see if en is working.
# To run it, cd to the directory just above en, then
# python2 < _en-test.py

# LEXICAL CATEGORIZATION ############################################################

# Returns True when the given value is a number.
print(1, en.is_number(12))
print(2, en.is_number("twelve"))

# Returns True when the given string is a noun.
# You can also check for is_verb(), is_adjective() and is_adverb().
print(3, en.is_noun("banana"))

# Returns True when the given string is a tag,
# for example HTML or XML.
print(4, en.is_tag("</a>"))

# Return True when the string is a HTML tag,
# for example <a> or <body>.
print(5, en.is_html_tag("</person>"))

# COMMONSENSE #######################################################################

# Returns True if the given word expresses a basic emotion:
# anger, disgust, fear, joy, sadness, surprise.
                    x = int(x)
                except:
                    pass
                if lazy == 0:
                    temp += en.number.spoken(x)
                    lazy = 1
                else:
                    temp += ' ' + en.number.spoken(x)
            text = temp
            convert(text)
        else:
            print text
    else:
        print text
        if text == '***':
            started = 1
    text = raw_input()
print en.is_number("twelve")
print en.is_basic_emotion("cheerful")