Ejemplo n.º 1
0
    def processFile(self, file_path_and_name):
        try:
            f = open(file_path_and_name, 'r')
            text = f.read()

            text = re.sub('<[^<]+?>', '', text)
            sent_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')

            lines = sent_tokenizer.tokenize(text.strip())

            text = lines

            sentences = []
            porter = nltk.PorterStemmer()

            for sent in lines:
                OG_sent = sent[:]
                sent = sent.strip().lower()
                line = nltk.word_tokenize(sent)

                stemmed_sentence = [porter.stem(word) for word in line]
                stemmed_sentence = filter(
                    lambda x: x != '.' and x != '`' and x != ',' and x != '?'
                    and x != "'" and x != '!' and x != '''"''' and x != "''"
                    and x != "'s", stemmed_sentence)
                if stemmed_sentence != []:
                    sentences.append(
                        sentence.sentence(file_path_and_name, stemmed_sentence,
                                          OG_sent))

            return sentences

        except IOError:
            print 'Oops! File not found', file_path_and_name
            return [sentence.sentence(file_path_and_name, [], [])]
Ejemplo n.º 2
0
def whilsent():
    if tk.token.code == CodeTable['WHILE']:
        tk.token = next(tk.tg)
    else:
        raise SyntaxError("SyntaxError at line:{},column:{}".format(tk.token.line, tk.token.column))
    conditio.conditio()
    if tk.token.code == CodeTable['DO']:
        tk.token = next(tk.tg)
    else:
        raise SyntaxError("SyntaxError at line:{},column:{}".format(tk.token.line, tk.token.column))
    sentence.sentence()
Ejemplo n.º 3
0
def compsent():
    # 复合语句
    if tk.token.code == CodeTable['BEGIN']:
        tk.token = next(tk.tg)
    else:
        raise SyntaxError("SyntaxError at line:{},column:{}".format(
            tk.token.line, tk.token.column))
    sentence()
    sentsuff()
    if tk.token.code == CodeTable['END']:
        tk.token = next(tk.tg)
    else:
        raise SyntaxError("SyntaxError at line:{},column:{}".format(
            tk.token.line, tk.token.column))
Ejemplo n.º 4
0
def init_joy():
    global sentences
    joy_file = open('joyToken.txt', 'r')
    for line in joy_file:
        line = stop_words_remove(line)
        sentences.append(sentence(line, 'joy'))
    joy_file.close()
Ejemplo n.º 5
0
def init_disgust():
    global sentences
    disgust_file = open('DisgustToken.txt', 'r')
    for line in disgust_file:
        line = stop_words_remove(line)
        sentences.append(sentence(line, 'disgust'))
    disgust_file.close()
Ejemplo n.º 6
0
def buildQuery(sentences, TF_IDF_w, n):
	#sort in descending order of TF-IDF values
	scores = TF_IDF_w.keys()
	scores = sorted(scores, reverse=True)	
	
	i = 0
	j = 0
	queryWords = []

	# print("n, len(scores):", n, len(scores))
	# if len(scores) == 1:
	# 	print(TF_IDF_w)
	# select top n words
	while(i<n):
		words = TF_IDF_w[scores[j]]
		for word in words:
			queryWords.append(word)
			i=i+1
			if (i>n): 
				break
		j=j+1

	# return the top selected words as a sentence
	# return sentence.sentence("query", queryWords, queryWords)
	return sentence.sentence(queryWords, queryWords)
def getemoj(request):

    global run

    run = run + 1
    text = request.GET['sentence']
    line = stop_words_remove(text)
    sent = sentence(line, 'NONE')
    sent.init_sent_vector()

    X = numpy.asarray([sent.get_sentence_vector()])

    getemoj.model = load_model('my_model.h5')

    predictions = getemoj.model.predict(X)

    feels = predictions.tolist()

    f = feels[0]
    ind = f.index(max(f))

    if (ind == 0):
        response = "you are feeling really afraid?! why so serious?"
    elif (ind == 1):
        response = "you are feeling really disgusting"
    else:
        response = "you are feeling happy that's awesome"

    return HttpResponse(response)
Ejemplo n.º 8
0
def init_fear():
    global sentences
    fear_file = open('fearToken.txt', 'r')
    for line in fear_file:
        line = stop_words_remove(line)
        sentences.append(sentence(line, 'fear'))
    fear_file.close()
Ejemplo n.º 9
0
    def processFile(self, file_path_and_name):
        try:
            # open file
            f = open(file_path_and_name, 'r')
            text = f.read()

            # remove HTML tags
            text = nltk.clean_html(text.replace('\n', ''))

            # segement data into a list of sentences
            sent_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')

            lines = sent_tokenizer.tokenize(text.strip())

            # ensure names used are in their complete form
            # text = self.use_full_names(lines)
            text = lines

            # convert sentences to list of words
            sentences = []
            porter = nltk.PorterStemmer()

            # every sentence
            for sent in lines:
                OG_sent = sent[:]
                sent = sent.strip().lower()
                line = nltk.word_tokenize(sent)

                # convert words to stemmed words before appending to list and returning
                stemmed_sentence = [porter.stem(word) for word in line]
                stemmed_sentence = filter(
                    lambda x: x != '.' and x != '`' and x != ',' and x != '?'
                    and x != "'" and x != '!' and x != '''"''' and x != "''"
                    and x != "'s", stemmed_sentence)
                # no empy sentences
                if stemmed_sentence != []:
                    sentences.append(
                        sentence.sentence(file_path_and_name, stemmed_sentence,
                                          OG_sent))

            return sentences

        # print error message if file not found
        except IOError:
            print 'Oops! File not found', file_path_and_name
            return [sentence.sentence(file_path_and_name, [], [])]
Ejemplo n.º 10
0
 def __init__(self, parent, sList):
     self.numSents = len(sList)
     self.parentPaper = parent
     self._topic = ''
     self.index = 0
     self.sentenceList = []
     for s in sList:
         self.sentenceList.append(sentence(s))
def translated(list_topics, start_idx, end_idx, dir_topic, dir_out, paramter):
#Translate

    fileBudget = open("data/budget/budget","r")
    budget_string = fileBudget.read().split("\n")
    budget = []
    for i in range(len(budget_string)-1):
        budget.append(int(budget_string[i]))

    number_sentence = 0

    for i in range(start_idx, end_idx,1):
        print("process " + list_topics[i])
        file = open(dir_topic + list_topics[i], mode="rb")
        parameter[-1] = budget[i]
        sentences = file.read().decode("utf8","replace").split('\r\n')
        phrase_sentences = []
        for line in sentences:
            if len(line) != 0:
                number_sentence += 1
                phrase= []
                try:
                    blob = TextBlob(line)
                    translated_blob = blob.translate(to='vi')
                    out_sen = " ".join(translated_blob.tokens)
                    start = 0
                    out_sen_tmp = out_sen.lower().split()
                    end = len(out_sen_tmp)
                    for nphrase in translated_blob.noun_phrases:
                        phrase_ele = []
                        nphrase = nphrase.split()
                        k = find_index(out_sen_tmp,nphrase,start, end)
                        start = k + len(nphrase)
                        for j in range(k,k + len(nphrase),1):
                            phrase_ele.append(j+1)
                        phrase.append(phrase_ele)

                except:
                    out_sen = line
                    phrase = []
            if (out_sen != "" ):
                sen = sentence.sentence(out_sen,phrase)
                phrase_sentences.append(sen)


        summarizer = PhraseBaseSummarization.phrase_based_summarization(phrase_sentences)
        summary =summarizer.summarizer(parameter)

        fileOut = open(dir_out+list_topics[i],"w")
        fileOut.write(summary)

        print("finish " + list_topics[i])
        fileOut.close()

    print "no.sentence: ", number_sentence
Ejemplo n.º 12
0
def create_doc_object(list_of_sentences):
    sentences = []
    total_tfidf = 0
    for sent in list_of_sentences:
        words = tb(sent).words
        tfidf_dic = tfidf.generate_dictionary_for_specific_words(words)
        tfidf_val = tfidf.get_sentence_value(sent)
        vector = sentence_to_vec(sent)
        sentences.append(
            sentence.sentence(sent, tfidf_dic, vector[0], tfidf_val))
        total_tfidf += tfidf_val
    return doc.documant(sentences, total_tfidf,
                        sens2vec(sentences, total_tfidf))
Ejemplo n.º 13
0
def processFile(sample):

	# read file from provided folder path
	# f = open(file_name,'r')
	# text_0 = f.read()
	text_0 = sample

	# extract content in TEXT tag and remove tags
	# text_1 = re.search(r"<TEXT>.*</TEXT>",text_0, re.DOTALL)
	# text_1 = re.sub("<TEXT>\n","",text_1.group(0))
	# text_1 = re.sub("\n</TEXT>","",text_1)

	# # replace all types of quotations by normal quotes
	# text_1 = re.sub("\n"," ",text_1)
	
	# text_1 = re.sub("\"","\"",text_1)
	# text_1 = re.sub("''","\"",text_1)
	# text_1 = re.sub("``","\"",text_1)	
	
	# text_1 = re.sub(" +"," ",text_1)

	# segment data into a list of sentences
	# sentence_token = nltk.data.load('tokenizers/punkt/english.pickle')
	# lines = sentence_token.tokenize(text_1.strip())	
	lines = split_sentences(text_0 + "\n")

	# setting the stemmer
	sentences = []
	porter = nltk.PorterStemmer()

	# modelling each sentence in file as sentence object
	for line in lines:

		# original words of the sentence before stemming
		originalWords = line[:]
		line = line.strip().lower()

		# word tokenization
		sent = nltk.word_tokenize(line)
		
		# stemming words
		stemmedSent = [porter.stem(word) for word in sent]		
		# stemmedSent = filter(lambda x: x!='.'and x!='`'and x!=','and x!='?'and x!="'" 
		# 	and x!='!' and x!='''"''' and x!="''" and x!="'s", stemmedSent)
		
		# list of sentence objects
		if stemmedSent != []:
			# sentences.append(sentence.sentence(file_name, stemmedSent, originalWords))	
			sentences.append(sentence.sentence(stemmedSent, originalWords))			
	
	return sentences
def processFile(file_name):

	# read file from provided folder path
	f = open(file_name,'r')
	text_0 = f.read()

	# extract content in TEXT tag and remove tags
	text_1 = re.search(r"<TEXT>.*</TEXT>",text_0, re.DOTALL)
	text_1 = re.sub("<TEXT>\n","",text_1.group(0))
	text_1 = re.sub("\n</TEXT>","",text_1)

	# replace all types of quotations by normal quotes
	text_1 = re.sub("\n"," ",text_1)
	
	text_1 = re.sub("\"","\"",text_1)
	text_1 = re.sub("''","\"",text_1)
	text_1 = re.sub("``","\"",text_1)	
	
	text_1 = re.sub(" +"," ",text_1)

	# segment data into a list of sentences
	sentence_token = nltk.data.load('tokenizers/punkt/english.pickle')
	lines = sentence_token.tokenize(text_1.strip())	

	# setting the stemmer
	sentences = []
	porter = nltk.PorterStemmer()

	# modelling each sentence in file as sentence object
	for line in lines:

		# original words of the sentence before stemming
		originalWords = line[:]
		line = line.strip().lower()

		# word tokenization
		sent = nltk.word_tokenize(line)
		
		# stemming words
		stemmedSent = [porter.stem(word) for word in sent]		
		stemmedSent = filter(lambda x: x!='.'and x!='`'and x!=','and x!='?'and x!="'" 
			and x!='!' and x!='''"''' and x!="''" and x!="'s", stemmedSent)
		
		# list of sentence objects
		if stemmedSent != []:
			sentences.append(sentence.sentence(file_name, stemmedSent, originalWords))				
	
	return sentences
Ejemplo n.º 15
0
 def readGoldenFile(self,wordfile, tagfile, numSentences):
     """ read training file """ 
     wf = open(wordfile,'rt')
     tf = open(tagfile,'rt')
     allSentences = []
     wlines = wf.readlines()
     tlines = tf.readlines()
     # build and save sentence objects
     for i in range(0,numSentences):
         # new sentence
         words = wlines[i].split()
         tags = tlines[i].split()
         s = sentence.sentence(words,tags)
         allSentences.append(s)
     wf.close()
     tf.close()
     # save data
     self.allSentences = allSentences
     self.sentenceNum = len(allSentences)
def buildQuery(sentences, TF_IDF_w, n):
    #sort in descending order of TF-IDF values
    scores = TF_IDF_w.keys()
    scores.sort(reverse=True)

    i = 0
    j = 0
    queryWords = []

    # select top n words
    while (i < n):
        words = TF_IDF_w[scores[j]]
        for word in words:
            queryWords.append(word)
            i = i + 1
            if (i > n):
                break
        j = j + 1

    # return the top selected words as a sentence
    return sentence.sentence("query", queryWords, queryWords)
def buildQuery(sentences, TF_IDF_w, n):
	#sort in descending order of TF-IDF values
	scores = TF_IDF_w.keys()
	scores.sort(reverse=True)	
	
	i = 0
	j = 0
	queryWords = []

	# select top n words
	while(i<n):
		words = TF_IDF_w[scores[j]]
		for word in words:
			queryWords.append(word)
			i=i+1
			if (i>n): 
				break
		j=j+1

	# return the top selected words as a sentence
	return sentence.sentence("query", queryWords, queryWords)
Ejemplo n.º 18
0
def processFile(file_name):
    # read file from provided folder path
    f = open(file_name, 'r')
    text_1 = f.read()

    # tách câu
    sentence_token = nltk.data.load('tokenizers/punkt/english.pickle')
    lines = sentence_token.tokenize(text_1.strip())

    # setting the stemmer
    sentences = []

    # modelling each sentence in file as sentence object
    for i in range(len(lines)):
        line = lines[i]
        # giữ lại câu gốc
        originalWords = line[:]

        # chuyển đối tất cả chữ hoa trong chuỗi sang kiểu chữ thường "Good Mike" => "good mike"
        line = line.strip().lower()

        # tách từ
        stemmedSent = ViTokenizer.tokenize(line).split()

        stemmedSent = list(
            filter(
                lambda x: x != '.' and x != '`' and x != ',' and x != '?' and x
                != "'" and x != ":" and x != '!' and x != '''"''' and x != "''"
                and x != '-' and x not in stop_word, stemmedSent))

        if ((i + 1) == len(lines)) and (len(stemmedSent) <= 5):
            break
        # list of sentence objects
        if stemmedSent != []:
            sentences.append(
                sentence.sentence(file_name, stemmedSent, originalWords))

    return sentences
Ejemplo n.º 19
0
 def makeQuery(self, n, sentences, idfs):
     scored_words = self.TF_IDF(sentences, idfs)
     best_words = self.getBestWords(n, scored_words)
     return sentence.sentence("query", best_words, [])
Ejemplo n.º 20
0
web.config.debug = False

render = web.template.render('templates/')
urls = ('/', 'index', '/image', 'image', '/history', 'history',
        '/imagedemo/(.*)', 'imagedemo')

app = web.application(urls, globals())
session = web.session.Session(app,
                              web.session.DiskStore('sessions'),
                              initializer={
                                  'login': 0,
                                  'user': 0
                              })

user_control = userControl.user(constant.DATABASE_FILE)
sen = sentence.sentence(constant.DATABASE_FILE)
image_pool = image.image(constant.DATABASE_FILE)


def check():
    if session.login == 0:
        judge = 4
        resp = {'judge': judge}
        return True, render.index(resp)
    else:
        return False, None


class index():
    def GET(self):
        judge = 4
Ejemplo n.º 21
0
    else:
      location = prevlocation
      newlocation = False
  
  location = location.replace(' ','-') #Google requires a '-' in 2-word city names
  
  
  if location != '':
    if newlocation:	#If location hasn't changed, don't fetch data again. It's already available
      print 'Fetching weather information from Google...'
      # Call Google weather to get current weather conditions
      google_result = weather.get_weather(location)
      
    # Print today's weather report. Replace this with code to deduce the user request
    if time == '' or time == 'today' :
      printstring = sentence.sentence(google_result['current_conditions']['condition'], time)
      print printstring, time
    else :
      if time == 'tomorrow':
	printstring = sentence.sentence(google_result['forecasts'][1]['condition'], time)
	print printstring, time
      else:
	found = False
	for i in range(4):
	  if google_result['forecasts'][i]['day_of_week'] == newtime:
	    printstring = sentence.sentence(google_result['forecasts'][i]['condition'], time)
	    print printstring, "on", time
	    found = True
	if not found:
	  print "Forecast for " + time + " is not available currently."
    
Ejemplo n.º 22
0
logging.basicConfig(
    format="[%(asctime)s - %(filename)s:line %(lineno)s] %(message)s",
    datefmt='%d %b %H:%M:%S',
    level=logging.INFO)
logger.setLevel(logging.INFO)

from setup import START
from constant import *

web.config.debug = False

render = web.template.render('adminTemplates/')
urls = ('/', 'index', '/image', 'image', '/history', 'history', '/single',
        'single')

sen = sentence.sentence(DATABASE_FILE)
user_control = userControl.user(DATABASE_FILE)
image_pool = image.image(DATABASE_FILE)


class index:
    def GET(self):
        i = web.input()
        start = i.get("start")
        end = i.get("end")

        if start is None:
            start = START
        if end is None:
            end = time.time()
Ejemplo n.º 23
0
def chat():
  # keyword conditions
  condnext = False
  condweather = False
  condtime = False
  condlocation = False
  condtemp = False
  condkey = False
  condresponse = False
  foundinfo = False
  condtrain = False
  condcountry = False
  condspellcheck = True

  # global variables
  conversation = []
  location = ''
  prevlocation = location 
  time = 'today'
  key = ''
  keytemplate = []
  fulltime = ''
  numdays = ''
  logstr = ''
  printstr = ''
  responsedict = {} 	# Dictionary to hold all inputs without predefined responses. This dictionary will be written into predefined_responses.txt before exiting the program.


  # read data files
  citylist = readfile.readfile('cities.txt')
  keylist = readfile.readfile('keywords.txt')
  timelist = readfile.readfile('time.txt')
  condlist = readfile.readfile('conditions.txt')
  numlist = readfile.readfile('numbers.txt')
  countrylist = readfile.readfile('countries.txt')
  exitlist = ['exit', 'quit', 'bye', 'ok']

  # Greeting message
  printstr =  'Hello! You can ask me questions about the weather in any major city in the world. What would you like to know?'
  print printstr
  logstr += '\n\n' + printstr

  # Start main loop
  while True :
    foundinfo = False
    condtrain = False
    condcountry = False
    # read input from user
    input = raw_input('\nMe > ')
    logstr += '\nMe > ' + input + '\nBot > '
    if input in exitlist:
      if input == 'ok':
	exitans = raw_input("Do you want to quit? (y/n)")
	if exitans in ('y','Y','Yes','YES','yes'):
	  break
	else:
	  continue
      break
    
    if input == 'disable spellcheck':
      condspellcheck = False
      continue
    
    if input == 'enable spellcheck':
      condspellcheck = True
      continue
    
    condcorrected = False
    if condspellcheck:
      corrected_input = ''
      for i in input.split():
	str = spellcheck.correct(i)
	if str != i:
	  condcorrected = True
	corrected_input += str + ' '
      if condcorrected:
	print 'did you mean: \"' + corrected_input + '\"?'
	input = corrected_input
    
    currentstring = input.split()
    conversation.append(currentstring)
    
    # Start searching input for each of the keywords
    
    if input == 'train':
      condtrain = True
      printstr =  'Entering training mode. Enter input and response seperated by a "|": input|response. Type "exit" to quit training mode'
      print printstr
      logstr += '\n' + printstr + '\n'
      
      while True:
	traininput = raw_input('>')
	if traininput == 'exit':
	  break
	if traininput.find('|') < 0:
	  printstr =  'Format error: use input|response'
	  print printstr
	  logstr += '\n' + printstr + '\n'
	  continue
	traininput = traininput.split('|')
	responsedict[traininput[0]] = traininput[1]
    
    if condtrain:
      continue
    


    for i in countrylist:
      for j in currentstring:
	if lower(i[0]) == lower(j):
	  printstr = 'Which city in ' + i[0] + '?'
	  condcountry = True
	  foundinfo = True
	  break
      
    if condcountry:
      print printstr
      logstr += printstr
      continue
    

    if 'next' in input:
      foundinfo = True
      condnext = True
      condtime = False
      numdays = currentstring[currentstring.index('next') + 1]
      for i in numlist:
	if numdays == i[0]:
	  numdays = i[1]
	  break
      if re.match('[0-9]*$',numdays):
	numdays = int(numdays)
      else:
	numdays = ''
    
    if 'weather' in input:
      foundinfo = True
      condweather = True
      condkey = False
      condtemp = False
      key = ''
      keytemplate = []

    # get key from input
    for i in keylist:
      if i[0] in input:
	if 'sunday' in lower(input) and i[0] == 'sun':
	  break
	else:
	  foundinfo = True
	  condkey = True
	  condweather = False
	  condtemp = False
	  key = i[0]
	  keytemplate = i
	  break

    # get time from input
      for i in timelist:
	if lower(i[0]) in input:
	  foundinfo = True
	  condtime = True
	  numdays = ''
	  if lower(i[0]) != 'today' and lower(i[0]) != 'tomorrow':
	    time = i[1]
	    fulltime = i[0]
	    break
	  else:
	    time = i[0]
	    fulltime = time
	    break
    if fulltime == '':
      fulltime = time

    if numdays != '':
      condtime = True
      if numdays > 4:
	printstr =  'Forecast is available only for the next 4 days.'
	print printstr
	logstr += '\n' + printstr + '\n'
      else:
	time = ''
	fulltime = ''
	count = numdays
    
    # get location from input
    for i in citylist:
      if lower(i[0]) in input:
	foundinfo = True
	condlocation = True
	location = i[0]
	break
    
    # find if a new location has been mentioned. if not, don't fetch data again
    if location != prevlocation:
      newlocation = True
      condlocation = True
      prevlocation = location
    else:
      newlocation = False
    
    if location is '':
      if prevlocation is '':
	condlocation = False
      else:
	location = prevlocation
	newlocation = False
    
    location = location.replace(' ','-') #Google requires a '-' in 2-word city names
    result = False
    
    # get temperature from input
    if 'temperature' in input:
      foundinfo = True
      condtemp = True

    # User gave no infomation about weather. Switching to general predefined response based chat
    if not foundinfo:
      response = predefined_responses.respond(input, responsedict)
      if response == '':
	printstr =  "I don't know what that means. If I asked you the same question, what would you reply?"
	print printstr
	logstr += printstr
	responseinput = raw_input('Me > ')
	logstr += '\nMe > ' + responseinput
	if not responseinput in ('exit', 'quit'):
	  responsedict[input] = responseinput
	  print 'response learnt'
      else:
	printstr =  response
	print printstr
	logstr += printstr
      continue
    
    if condlocation:
      if newlocation:	#If location hasn't changed, don't fetch data again. It's already available
	printstr =  'Fetching weather information from Google...'
	print printstr
	logstr += printstr
	# Call Google weather to get current weather conditions
	google_result = weather.get_weather(location)
	if google_result == {}:
	  print 'Could not get data from google.'
	  continue
      
      
  # We have a valid location. Get further information

  # User has asked about temperature. Return temperature information and continue
      if condtemp:
	printstr =  temperature.temperature(google_result, time)
	print printstr
	logstr += printstr
	continue
      
  # User has asked about a specific weather condition. Print information. There are 2 possibilities:
  #    1. Find the condition in the next n days
  #    2. Find the condition in a specified day

      if condkey:

  # 1. User has asked about a specific condition in the 'next x days'. Return appropriate response
	printstr = ''
	timecounter = 0

	day_of_week = ''
	condition = ''
	if numdays != '':
	  for i in google_result['forecasts']:
	    count -= 1
	    if count < 0:
	      break
	    if key in lower(i['condition']):
	      result = True
	      day_of_week = i['day_of_week']
	      condition = i['condition']
	      break

	  for i in timelist:
	    if i[0] != 'today' and i[0] != 'tomorrow':
	      if i[1] == day_of_week:
		fulltime = i[0]
		break
	  if result:
	    printstr = keytemplate[3] + keytemplate[0] + ' on ' + fulltime
	  else:
	    printstr = keytemplate[4] + keytemplate[0] + ' in the next ' + str(numdays) + ' days.'

	  print printstr
	  logstr += printstr
	  continue

  # 2. User has asked about a particular condition on a particular day. Return appropriate response
	if time != 'today' and time != 'tomorrow':
	  for i in google_result['forecasts']:
	    if i['day_of_week'] == time:
	      if key in lower(i['condition']):
		printstr = keytemplate[3] + keytemplate[0] + ' on'
	      else:
		printstr = keytemplate[4] + keytemplate[0] + ' on'
	elif time == 'today':
	  fulltime = time
	  if key in lower(google_result['current_conditions']['condition']):
	    printstr = keytemplate[1] + keytemplate[0]
	  else:
	    printstr = keytemplate[2] + keytemplate[0]
	elif time == 'tomorrow':
	  fulltime = time
	  if key in lower(google_result['forecasts'][1]['condition']):
	    printstr = keytemplate[3] + keytemplate[0]
	  else:
	    printstr = keytemplate[4] + keytemplate[0]

	printstr =  printstr + ' ' + fulltime
	print printstr
	logstr += printstr
	continue

  # User is asking about today's weather. Print details
      elif time == '' or time == 'today' :
	printstr = sentence.sentence(google_result['current_conditions']['condition'], time)
	printstr += ' ' + fulltime + '. ' + google_result['current_conditions']['humidity'] + ' '
	if google_result['current_conditions'].has_key('wind_condition'):
	  printstr += google_result['current_conditions']['wind_condition']
	print printstr
	logstr += printstr
	continue

  # User is asking about weather of a particular day. Print details
      elif time == 'tomorrow':
	printstr = sentence.sentence(google_result['forecasts'][1]['condition'], time)
	printstr += ' ' + fulltime
	print printstr
	logstr += printstr
      else:
	found = False
	for i in range(4):
	  if google_result['forecasts'][i]['day_of_week'] == time:
	    printstr = sentence.sentence(google_result['forecasts'][i]['condition'], time)
	    printstr +=   " on" + ' ' +  fulltime
	    print printstr
	    logstr += printstr
	    found = True
	if not found:
	  printstr =  "Forecast for " + time + " is not available currently."
	  print printstr
	  logstr += printstr
	continue
      
    else:
      printstr =  'What\'s the location?'
      print printstr
      logstr += printstr
  # End of outermost while loop.

  # Print message before exiting program
  dictcount = 0
  for i in responsedict:
    dictcount += 1
  if dictcount > 0:
    printstr =  'Writing new entries to database...'
    print printstr
    logstr += printstr
  datafile = file('predefined_responses.txt', 'a')
  for i in responsedict.keys():
    trimmedi = re.sub('[^a-zA-Z0-9 ]+','', i)
    string = trimmedi + '|' + responsedict[i] + '\n'
    datafile.write(string)
  log.log(logstr)
  print 'Ending the program...'
  print 'Bye!'
  
# End of function chat()
Ejemplo n.º 24
0
def bot():
  conversation = []
  location = ''
  time = 'today'
  key = ''
  keytemplate = []
  fulltime = ''
  numdays = ''

  citylist = readfile.readfile('cities.txt')
  keylist = readfile.readfile('keywords.txt')
  timelist = readfile.readfile('time.txt')
  condlist = readfile.readfile('conditions.txt')
  numlist = readfile.readfile('numbers.txt')
  exitlist = ['exit', 'quit', 'bye', 'ok']

  print 'Hello! You can ask me questions about the weather in any major city in the world. What would you like to know?'
  while True :
    input = raw_input('Me > ')
    if input in exitlist:
      break
    
    currentstring = input.split()
    conversation.append(currentstring)
    
    if 'next' in currentstring:
      numdays = currentstring[currentstring.index('next') + 1]
      for i in numlist:
	if numdays == i[0]:
	  numdays = i[1]
	  break
      if re.match('[0-9]*$',numdays):
	numdays = int(numdays)
      else:
	numdays = ''
    
    if 'weather' in currentstring:
      key = ''
      keytemplate = []
    # get key from input
    for i in keylist:
      if i[0] in input:
	key = i[0]
	keytemplate = i
	break
    
    # get time from input

    for i in timelist:
      if lower(i[0]) in input:
	numdays = ''
	if lower(i[0]) != 'today' and lower(i[0]) != 'tomorrow':
	  time = i[1]
	  fulltime = i[0]
	  break
	else:
	  time = i[0]
	  fulltime = time
	  break
    if fulltime == '':
      fulltime = time

    if numdays != '':
      if numdays > 4:
	print 'Forecast is available only for the next 4 days.'
      else:
	time = ''
	fulltime = ''
	count = numdays
    prevlocation = location 
    #We store previous location to avoid re-fetching data if the location hasn't been changed
    
    
    # Below, we check if any token in the input matches a city name, and if so, set location to that city
    newlocation = False
    
    # get location from input
    foundLocation = False
    for i in citylist:
      if lower(i[0]) in input:
	location = i[0]
	foundLocation = True
	break
    
    #if not foundLocation:
      #if location != '':
	#print "I didn't find any city name in your input. I'll get you information about " + location
    # find if a new location has been mentioned. if not, don't fetch data again
    if location is not prevlocation:
      newlocation = True
    
    if location is '':
      if prevlocation is '':
	print 'City not found'
      else:
	location = prevlocation
	newlocation = False
    
    location = location.replace(' ','-') #Google requires a '-' in 2-word city names
    result = False
    
    
    if location is not '':
      if newlocation:	#If location hasn't changed, don't fetch data again. It's already available
	print 'Fetching weather information from Google...'
	# Call Google weather to get current weather conditions
	google_result = weather.get_weather(location)
      
      if 'temperature' in currentstring:
	print temperature.temperature(google_result, time)
	continue
      
      printed = False
      
      
      if key is not '':
	printstring = ''
	timecounter = 0
	
	day_of_week = ''
	condition = ''
	if numdays != '':
	  for i in google_result['forecasts']:
	    count -= 1
	    if count < 0:
	      break
	    if key in lower(i['condition']):
	      result = True
	      day_of_week = i['day_of_week']
	      condition = i['condition']
	      break
	  
	  for i in timelist:
	    if i[0] != 'today' and i[0] != 'tomorrow':
	      if i[1] == day_of_week:
		fulltime = i[0]
		break
	  if result:
	    printstring = keytemplate[3] + keytemplate[0] + ' on ' + fulltime
	  else:
	    printstring = keytemplate[4] + keytemplate[0] + ' in the next ' + str(numdays) + ' days.'
	  
	  print printstring
	  printed = True
	      
	if not printed:
	  if time != 'today' and time != 'tomorrow':
	    for i in google_result['forecasts']:
	      if i['day_of_week'] == time:
		if key in lower(i['condition']):
		  printstring = keytemplate[3] + keytemplate[0] + ' on'
		else:
		  printstring = keytemplate[4] + keytemplate[0] + ' on'
	  elif time == 'today':
	    fulltime = time
	    if key in lower(google_result['current_conditions']['condition']):
	      printstring = keytemplate[1] + keytemplate[0]
	    else:
	      printstring = keytemplate[2] + keytemplate[0]
	  elif time == 'tomorrow':
	    fulltime = time
	    if key in lower(google_result['forecasts'][1]['condition']):
	      printstring = keytemplate[3] + keytemplate[0]
	    else:
	      printstring = keytemplate[4] + keytemplate[0]
		
	  print printstring, fulltime

      elif time == '' or time == 'today' :
	  printstring = sentence.sentence(google_result['current_conditions']['condition'], time)
	  print printstring, fulltime,  google_result['current_conditions']['humidity'], google_result['current_conditions']['wind_condition']
      else :
	if time == 'tomorrow':
	  printstring = sentence.sentence(google_result['forecasts'][1]['condition'], time)
	  print printstring, fulltime
	else:
	  found = False
	  for i in range(4):
	    if google_result['forecasts'][i]['day_of_week'] == time:
	      printstring = sentence.sentence(google_result['forecasts'][i]['condition'], time)
	      print printstring, "on", fulltime
	      found = True
	  if not found:
	    print "Forecast for " + time + " is not available currently."

    
    else:
      print 'What\'s the location?'
  #end of outermost while loop
  print 'ending the program...'
  print 'bye!'
Ejemplo n.º 25
0
while True:

    lookup_word = input("Word: ")
    print()

    result = dictionary.meaning(lookup_word)

    if result != None:

        # definition
        for field in result:
            cprint(field, "red", "on_white")
            print()

            for count, meaning in enumerate(result[field]):
                print("{:>4}. {}".format(count + 1, meaning))

            print("\n")

        # sentenc
        cprint("Sentence", "red", "on_white")
        print()
        sentence(lookup_word)

    else:

        print("no words match [{}]".format(lookup_word))

    print("_" * 105, "\n")
 def makeQuery(self, n, sentences, idfs):
     scored_words = self.TF_IDF(sentences, idfs)
     best_words = self.getBestWords(n, scored_words)
     return sentence.sentence("query", best_words, [])
Ejemplo n.º 27
0
	def processFile(file_name):

		# read file from provided folder path
		#f = open(file_name,'r')
		text_0 =file_name



		#print 'Text is :-'

		# replace all types of quotations by normal quotes
		text_1 = re.sub("\n"," ",text_0)

		text_1 = re.sub("\"","\"",text_1)
		text_1 = re.sub("''","\"",text_1)
		text_1 = re.sub("``","\"",text_1)

		text_1 = re.sub(" +"," ",text_1)

		text_1 = text_1.replace("<TEXT>","")

		#print text_1
		global article
		article = text_1

		#print 'Keywords in article are : '
		#print keywords(text_1)

		global data1


		data1=keywords(text_1)
		#data1=data1.encode('ascii','ignore')
		data1=data1.replace('\n'," ")
		#print type(data1)
		keyword_reges1=re.compile(r'[\S]+')
		data1= keyword_reges1.findall(data1)



		article_low = text_1.lower()

		#print article_low

		art_occ = 0

		for x in range(0,len(data1)):
			#print data1[x],'-',article_low.count(data1[x])
			art_occ = art_occ + article_low.count(data1[x])

	# 	Testing

		print ("Total Occurences of Keywords in Artice : ")
		print (art_occ)

		global occ
		occ = art_occ
		art_occ = 0

		print ('No of words in articles are : ')
		print (len(text_1.split()))

		print ('No of keywords in articles are : ')
		print (len(data1))


		# segment data into a list of sentences
		sentence_token = nltk.data.load('tokenizers/punkt/english.pickle')
		lines = sentence_token.tokenize(text_1.strip())

		# setting the stemmer
		sentences = []
		porter = nltk.PorterStemmer()

		# modelling each sentence in file as sentence object
		for line in lines:

			# original words of the sentence before stemming
			originalWords = line[:]
			line = line.strip().lower()

			# word tokenization
			sent = nltk.word_tokenize(line)

			# stemming words
			stemmedSent = [porter.stem(word) for word in sent]
			stemmedSent = filter(lambda x: x!='.'and x!='`'and x!=','and x!='?'and x!="'"
				and x!='!' and x!='''"''' and x!="''" and x!="'s", stemmedSent)

			# list of sentence objects
			stemmedSent=list(stemmedSent)
			if stemmedSent != []:
				sentences.append(sentence.sentence(file_name, stemmedSent, originalWords))




		return sentences