def processFile(self, file_path_and_name): try: f = open(file_path_and_name, 'r') text = f.read() text = re.sub('<[^<]+?>', '', text) sent_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle') lines = sent_tokenizer.tokenize(text.strip()) text = lines sentences = [] porter = nltk.PorterStemmer() for sent in lines: OG_sent = sent[:] sent = sent.strip().lower() line = nltk.word_tokenize(sent) stemmed_sentence = [porter.stem(word) for word in line] stemmed_sentence = filter( lambda x: x != '.' and x != '`' and x != ',' and x != '?' and x != "'" and x != '!' and x != '''"''' and x != "''" and x != "'s", stemmed_sentence) if stemmed_sentence != []: sentences.append( sentence.sentence(file_path_and_name, stemmed_sentence, OG_sent)) return sentences except IOError: print 'Oops! File not found', file_path_and_name return [sentence.sentence(file_path_and_name, [], [])]
def whilsent(): if tk.token.code == CodeTable['WHILE']: tk.token = next(tk.tg) else: raise SyntaxError("SyntaxError at line:{},column:{}".format(tk.token.line, tk.token.column)) conditio.conditio() if tk.token.code == CodeTable['DO']: tk.token = next(tk.tg) else: raise SyntaxError("SyntaxError at line:{},column:{}".format(tk.token.line, tk.token.column)) sentence.sentence()
def compsent(): # 复合语句 if tk.token.code == CodeTable['BEGIN']: tk.token = next(tk.tg) else: raise SyntaxError("SyntaxError at line:{},column:{}".format( tk.token.line, tk.token.column)) sentence() sentsuff() if tk.token.code == CodeTable['END']: tk.token = next(tk.tg) else: raise SyntaxError("SyntaxError at line:{},column:{}".format( tk.token.line, tk.token.column))
def init_joy(): global sentences joy_file = open('joyToken.txt', 'r') for line in joy_file: line = stop_words_remove(line) sentences.append(sentence(line, 'joy')) joy_file.close()
def init_disgust(): global sentences disgust_file = open('DisgustToken.txt', 'r') for line in disgust_file: line = stop_words_remove(line) sentences.append(sentence(line, 'disgust')) disgust_file.close()
def buildQuery(sentences, TF_IDF_w, n): #sort in descending order of TF-IDF values scores = TF_IDF_w.keys() scores = sorted(scores, reverse=True) i = 0 j = 0 queryWords = [] # print("n, len(scores):", n, len(scores)) # if len(scores) == 1: # print(TF_IDF_w) # select top n words while(i<n): words = TF_IDF_w[scores[j]] for word in words: queryWords.append(word) i=i+1 if (i>n): break j=j+1 # return the top selected words as a sentence # return sentence.sentence("query", queryWords, queryWords) return sentence.sentence(queryWords, queryWords)
def getemoj(request): global run run = run + 1 text = request.GET['sentence'] line = stop_words_remove(text) sent = sentence(line, 'NONE') sent.init_sent_vector() X = numpy.asarray([sent.get_sentence_vector()]) getemoj.model = load_model('my_model.h5') predictions = getemoj.model.predict(X) feels = predictions.tolist() f = feels[0] ind = f.index(max(f)) if (ind == 0): response = "you are feeling really afraid?! why so serious?" elif (ind == 1): response = "you are feeling really disgusting" else: response = "you are feeling happy that's awesome" return HttpResponse(response)
def init_fear(): global sentences fear_file = open('fearToken.txt', 'r') for line in fear_file: line = stop_words_remove(line) sentences.append(sentence(line, 'fear')) fear_file.close()
def processFile(self, file_path_and_name): try: # open file f = open(file_path_and_name, 'r') text = f.read() # remove HTML tags text = nltk.clean_html(text.replace('\n', '')) # segement data into a list of sentences sent_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle') lines = sent_tokenizer.tokenize(text.strip()) # ensure names used are in their complete form # text = self.use_full_names(lines) text = lines # convert sentences to list of words sentences = [] porter = nltk.PorterStemmer() # every sentence for sent in lines: OG_sent = sent[:] sent = sent.strip().lower() line = nltk.word_tokenize(sent) # convert words to stemmed words before appending to list and returning stemmed_sentence = [porter.stem(word) for word in line] stemmed_sentence = filter( lambda x: x != '.' and x != '`' and x != ',' and x != '?' and x != "'" and x != '!' and x != '''"''' and x != "''" and x != "'s", stemmed_sentence) # no empy sentences if stemmed_sentence != []: sentences.append( sentence.sentence(file_path_and_name, stemmed_sentence, OG_sent)) return sentences # print error message if file not found except IOError: print 'Oops! File not found', file_path_and_name return [sentence.sentence(file_path_and_name, [], [])]
def __init__(self, parent, sList): self.numSents = len(sList) self.parentPaper = parent self._topic = '' self.index = 0 self.sentenceList = [] for s in sList: self.sentenceList.append(sentence(s))
def translated(list_topics, start_idx, end_idx, dir_topic, dir_out, paramter): #Translate fileBudget = open("data/budget/budget","r") budget_string = fileBudget.read().split("\n") budget = [] for i in range(len(budget_string)-1): budget.append(int(budget_string[i])) number_sentence = 0 for i in range(start_idx, end_idx,1): print("process " + list_topics[i]) file = open(dir_topic + list_topics[i], mode="rb") parameter[-1] = budget[i] sentences = file.read().decode("utf8","replace").split('\r\n') phrase_sentences = [] for line in sentences: if len(line) != 0: number_sentence += 1 phrase= [] try: blob = TextBlob(line) translated_blob = blob.translate(to='vi') out_sen = " ".join(translated_blob.tokens) start = 0 out_sen_tmp = out_sen.lower().split() end = len(out_sen_tmp) for nphrase in translated_blob.noun_phrases: phrase_ele = [] nphrase = nphrase.split() k = find_index(out_sen_tmp,nphrase,start, end) start = k + len(nphrase) for j in range(k,k + len(nphrase),1): phrase_ele.append(j+1) phrase.append(phrase_ele) except: out_sen = line phrase = [] if (out_sen != "" ): sen = sentence.sentence(out_sen,phrase) phrase_sentences.append(sen) summarizer = PhraseBaseSummarization.phrase_based_summarization(phrase_sentences) summary =summarizer.summarizer(parameter) fileOut = open(dir_out+list_topics[i],"w") fileOut.write(summary) print("finish " + list_topics[i]) fileOut.close() print "no.sentence: ", number_sentence
def create_doc_object(list_of_sentences): sentences = [] total_tfidf = 0 for sent in list_of_sentences: words = tb(sent).words tfidf_dic = tfidf.generate_dictionary_for_specific_words(words) tfidf_val = tfidf.get_sentence_value(sent) vector = sentence_to_vec(sent) sentences.append( sentence.sentence(sent, tfidf_dic, vector[0], tfidf_val)) total_tfidf += tfidf_val return doc.documant(sentences, total_tfidf, sens2vec(sentences, total_tfidf))
def processFile(sample): # read file from provided folder path # f = open(file_name,'r') # text_0 = f.read() text_0 = sample # extract content in TEXT tag and remove tags # text_1 = re.search(r"<TEXT>.*</TEXT>",text_0, re.DOTALL) # text_1 = re.sub("<TEXT>\n","",text_1.group(0)) # text_1 = re.sub("\n</TEXT>","",text_1) # # replace all types of quotations by normal quotes # text_1 = re.sub("\n"," ",text_1) # text_1 = re.sub("\"","\"",text_1) # text_1 = re.sub("''","\"",text_1) # text_1 = re.sub("``","\"",text_1) # text_1 = re.sub(" +"," ",text_1) # segment data into a list of sentences # sentence_token = nltk.data.load('tokenizers/punkt/english.pickle') # lines = sentence_token.tokenize(text_1.strip()) lines = split_sentences(text_0 + "\n") # setting the stemmer sentences = [] porter = nltk.PorterStemmer() # modelling each sentence in file as sentence object for line in lines: # original words of the sentence before stemming originalWords = line[:] line = line.strip().lower() # word tokenization sent = nltk.word_tokenize(line) # stemming words stemmedSent = [porter.stem(word) for word in sent] # stemmedSent = filter(lambda x: x!='.'and x!='`'and x!=','and x!='?'and x!="'" # and x!='!' and x!='''"''' and x!="''" and x!="'s", stemmedSent) # list of sentence objects if stemmedSent != []: # sentences.append(sentence.sentence(file_name, stemmedSent, originalWords)) sentences.append(sentence.sentence(stemmedSent, originalWords)) return sentences
def processFile(file_name): # read file from provided folder path f = open(file_name,'r') text_0 = f.read() # extract content in TEXT tag and remove tags text_1 = re.search(r"<TEXT>.*</TEXT>",text_0, re.DOTALL) text_1 = re.sub("<TEXT>\n","",text_1.group(0)) text_1 = re.sub("\n</TEXT>","",text_1) # replace all types of quotations by normal quotes text_1 = re.sub("\n"," ",text_1) text_1 = re.sub("\"","\"",text_1) text_1 = re.sub("''","\"",text_1) text_1 = re.sub("``","\"",text_1) text_1 = re.sub(" +"," ",text_1) # segment data into a list of sentences sentence_token = nltk.data.load('tokenizers/punkt/english.pickle') lines = sentence_token.tokenize(text_1.strip()) # setting the stemmer sentences = [] porter = nltk.PorterStemmer() # modelling each sentence in file as sentence object for line in lines: # original words of the sentence before stemming originalWords = line[:] line = line.strip().lower() # word tokenization sent = nltk.word_tokenize(line) # stemming words stemmedSent = [porter.stem(word) for word in sent] stemmedSent = filter(lambda x: x!='.'and x!='`'and x!=','and x!='?'and x!="'" and x!='!' and x!='''"''' and x!="''" and x!="'s", stemmedSent) # list of sentence objects if stemmedSent != []: sentences.append(sentence.sentence(file_name, stemmedSent, originalWords)) return sentences
def readGoldenFile(self,wordfile, tagfile, numSentences): """ read training file """ wf = open(wordfile,'rt') tf = open(tagfile,'rt') allSentences = [] wlines = wf.readlines() tlines = tf.readlines() # build and save sentence objects for i in range(0,numSentences): # new sentence words = wlines[i].split() tags = tlines[i].split() s = sentence.sentence(words,tags) allSentences.append(s) wf.close() tf.close() # save data self.allSentences = allSentences self.sentenceNum = len(allSentences)
def buildQuery(sentences, TF_IDF_w, n): #sort in descending order of TF-IDF values scores = TF_IDF_w.keys() scores.sort(reverse=True) i = 0 j = 0 queryWords = [] # select top n words while (i < n): words = TF_IDF_w[scores[j]] for word in words: queryWords.append(word) i = i + 1 if (i > n): break j = j + 1 # return the top selected words as a sentence return sentence.sentence("query", queryWords, queryWords)
def buildQuery(sentences, TF_IDF_w, n): #sort in descending order of TF-IDF values scores = TF_IDF_w.keys() scores.sort(reverse=True) i = 0 j = 0 queryWords = [] # select top n words while(i<n): words = TF_IDF_w[scores[j]] for word in words: queryWords.append(word) i=i+1 if (i>n): break j=j+1 # return the top selected words as a sentence return sentence.sentence("query", queryWords, queryWords)
def processFile(file_name): # read file from provided folder path f = open(file_name, 'r') text_1 = f.read() # tách câu sentence_token = nltk.data.load('tokenizers/punkt/english.pickle') lines = sentence_token.tokenize(text_1.strip()) # setting the stemmer sentences = [] # modelling each sentence in file as sentence object for i in range(len(lines)): line = lines[i] # giữ lại câu gốc originalWords = line[:] # chuyển đối tất cả chữ hoa trong chuỗi sang kiểu chữ thường "Good Mike" => "good mike" line = line.strip().lower() # tách từ stemmedSent = ViTokenizer.tokenize(line).split() stemmedSent = list( filter( lambda x: x != '.' and x != '`' and x != ',' and x != '?' and x != "'" and x != ":" and x != '!' and x != '''"''' and x != "''" and x != '-' and x not in stop_word, stemmedSent)) if ((i + 1) == len(lines)) and (len(stemmedSent) <= 5): break # list of sentence objects if stemmedSent != []: sentences.append( sentence.sentence(file_name, stemmedSent, originalWords)) return sentences
def makeQuery(self, n, sentences, idfs): scored_words = self.TF_IDF(sentences, idfs) best_words = self.getBestWords(n, scored_words) return sentence.sentence("query", best_words, [])
web.config.debug = False render = web.template.render('templates/') urls = ('/', 'index', '/image', 'image', '/history', 'history', '/imagedemo/(.*)', 'imagedemo') app = web.application(urls, globals()) session = web.session.Session(app, web.session.DiskStore('sessions'), initializer={ 'login': 0, 'user': 0 }) user_control = userControl.user(constant.DATABASE_FILE) sen = sentence.sentence(constant.DATABASE_FILE) image_pool = image.image(constant.DATABASE_FILE) def check(): if session.login == 0: judge = 4 resp = {'judge': judge} return True, render.index(resp) else: return False, None class index(): def GET(self): judge = 4
else: location = prevlocation newlocation = False location = location.replace(' ','-') #Google requires a '-' in 2-word city names if location != '': if newlocation: #If location hasn't changed, don't fetch data again. It's already available print 'Fetching weather information from Google...' # Call Google weather to get current weather conditions google_result = weather.get_weather(location) # Print today's weather report. Replace this with code to deduce the user request if time == '' or time == 'today' : printstring = sentence.sentence(google_result['current_conditions']['condition'], time) print printstring, time else : if time == 'tomorrow': printstring = sentence.sentence(google_result['forecasts'][1]['condition'], time) print printstring, time else: found = False for i in range(4): if google_result['forecasts'][i]['day_of_week'] == newtime: printstring = sentence.sentence(google_result['forecasts'][i]['condition'], time) print printstring, "on", time found = True if not found: print "Forecast for " + time + " is not available currently."
logging.basicConfig( format="[%(asctime)s - %(filename)s:line %(lineno)s] %(message)s", datefmt='%d %b %H:%M:%S', level=logging.INFO) logger.setLevel(logging.INFO) from setup import START from constant import * web.config.debug = False render = web.template.render('adminTemplates/') urls = ('/', 'index', '/image', 'image', '/history', 'history', '/single', 'single') sen = sentence.sentence(DATABASE_FILE) user_control = userControl.user(DATABASE_FILE) image_pool = image.image(DATABASE_FILE) class index: def GET(self): i = web.input() start = i.get("start") end = i.get("end") if start is None: start = START if end is None: end = time.time()
def chat(): # keyword conditions condnext = False condweather = False condtime = False condlocation = False condtemp = False condkey = False condresponse = False foundinfo = False condtrain = False condcountry = False condspellcheck = True # global variables conversation = [] location = '' prevlocation = location time = 'today' key = '' keytemplate = [] fulltime = '' numdays = '' logstr = '' printstr = '' responsedict = {} # Dictionary to hold all inputs without predefined responses. This dictionary will be written into predefined_responses.txt before exiting the program. # read data files citylist = readfile.readfile('cities.txt') keylist = readfile.readfile('keywords.txt') timelist = readfile.readfile('time.txt') condlist = readfile.readfile('conditions.txt') numlist = readfile.readfile('numbers.txt') countrylist = readfile.readfile('countries.txt') exitlist = ['exit', 'quit', 'bye', 'ok'] # Greeting message printstr = 'Hello! You can ask me questions about the weather in any major city in the world. What would you like to know?' print printstr logstr += '\n\n' + printstr # Start main loop while True : foundinfo = False condtrain = False condcountry = False # read input from user input = raw_input('\nMe > ') logstr += '\nMe > ' + input + '\nBot > ' if input in exitlist: if input == 'ok': exitans = raw_input("Do you want to quit? (y/n)") if exitans in ('y','Y','Yes','YES','yes'): break else: continue break if input == 'disable spellcheck': condspellcheck = False continue if input == 'enable spellcheck': condspellcheck = True continue condcorrected = False if condspellcheck: corrected_input = '' for i in input.split(): str = spellcheck.correct(i) if str != i: condcorrected = True corrected_input += str + ' ' if condcorrected: print 'did you mean: \"' + corrected_input + '\"?' input = corrected_input currentstring = input.split() conversation.append(currentstring) # Start searching input for each of the keywords if input == 'train': condtrain = True printstr = 'Entering training mode. Enter input and response seperated by a "|": input|response. Type "exit" to quit training mode' print printstr logstr += '\n' + printstr + '\n' while True: traininput = raw_input('>') if traininput == 'exit': break if traininput.find('|') < 0: printstr = 'Format error: use input|response' print printstr logstr += '\n' + printstr + '\n' continue traininput = traininput.split('|') responsedict[traininput[0]] = traininput[1] if condtrain: continue for i in countrylist: for j in currentstring: if lower(i[0]) == lower(j): printstr = 'Which city in ' + i[0] + '?' condcountry = True foundinfo = True break if condcountry: print printstr logstr += printstr continue if 'next' in input: foundinfo = True condnext = True condtime = False numdays = currentstring[currentstring.index('next') + 1] for i in numlist: if numdays == i[0]: numdays = i[1] break if re.match('[0-9]*$',numdays): numdays = int(numdays) else: numdays = '' if 'weather' in input: foundinfo = True condweather = True condkey = False condtemp = False key = '' keytemplate = [] # get key from input for i in keylist: if i[0] in input: if 'sunday' in lower(input) and i[0] == 'sun': break else: foundinfo = True condkey = True condweather = False condtemp = False key = i[0] keytemplate = i break # get time from input for i in timelist: if lower(i[0]) in input: foundinfo = True condtime = True numdays = '' if lower(i[0]) != 'today' and lower(i[0]) != 'tomorrow': time = i[1] fulltime = i[0] break else: time = i[0] fulltime = time break if fulltime == '': fulltime = time if numdays != '': condtime = True if numdays > 4: printstr = 'Forecast is available only for the next 4 days.' print printstr logstr += '\n' + printstr + '\n' else: time = '' fulltime = '' count = numdays # get location from input for i in citylist: if lower(i[0]) in input: foundinfo = True condlocation = True location = i[0] break # find if a new location has been mentioned. if not, don't fetch data again if location != prevlocation: newlocation = True condlocation = True prevlocation = location else: newlocation = False if location is '': if prevlocation is '': condlocation = False else: location = prevlocation newlocation = False location = location.replace(' ','-') #Google requires a '-' in 2-word city names result = False # get temperature from input if 'temperature' in input: foundinfo = True condtemp = True # User gave no infomation about weather. Switching to general predefined response based chat if not foundinfo: response = predefined_responses.respond(input, responsedict) if response == '': printstr = "I don't know what that means. If I asked you the same question, what would you reply?" print printstr logstr += printstr responseinput = raw_input('Me > ') logstr += '\nMe > ' + responseinput if not responseinput in ('exit', 'quit'): responsedict[input] = responseinput print 'response learnt' else: printstr = response print printstr logstr += printstr continue if condlocation: if newlocation: #If location hasn't changed, don't fetch data again. It's already available printstr = 'Fetching weather information from Google...' print printstr logstr += printstr # Call Google weather to get current weather conditions google_result = weather.get_weather(location) if google_result == {}: print 'Could not get data from google.' continue # We have a valid location. Get further information # User has asked about temperature. Return temperature information and continue if condtemp: printstr = temperature.temperature(google_result, time) print printstr logstr += printstr continue # User has asked about a specific weather condition. Print information. There are 2 possibilities: # 1. Find the condition in the next n days # 2. Find the condition in a specified day if condkey: # 1. User has asked about a specific condition in the 'next x days'. Return appropriate response printstr = '' timecounter = 0 day_of_week = '' condition = '' if numdays != '': for i in google_result['forecasts']: count -= 1 if count < 0: break if key in lower(i['condition']): result = True day_of_week = i['day_of_week'] condition = i['condition'] break for i in timelist: if i[0] != 'today' and i[0] != 'tomorrow': if i[1] == day_of_week: fulltime = i[0] break if result: printstr = keytemplate[3] + keytemplate[0] + ' on ' + fulltime else: printstr = keytemplate[4] + keytemplate[0] + ' in the next ' + str(numdays) + ' days.' print printstr logstr += printstr continue # 2. User has asked about a particular condition on a particular day. Return appropriate response if time != 'today' and time != 'tomorrow': for i in google_result['forecasts']: if i['day_of_week'] == time: if key in lower(i['condition']): printstr = keytemplate[3] + keytemplate[0] + ' on' else: printstr = keytemplate[4] + keytemplate[0] + ' on' elif time == 'today': fulltime = time if key in lower(google_result['current_conditions']['condition']): printstr = keytemplate[1] + keytemplate[0] else: printstr = keytemplate[2] + keytemplate[0] elif time == 'tomorrow': fulltime = time if key in lower(google_result['forecasts'][1]['condition']): printstr = keytemplate[3] + keytemplate[0] else: printstr = keytemplate[4] + keytemplate[0] printstr = printstr + ' ' + fulltime print printstr logstr += printstr continue # User is asking about today's weather. Print details elif time == '' or time == 'today' : printstr = sentence.sentence(google_result['current_conditions']['condition'], time) printstr += ' ' + fulltime + '. ' + google_result['current_conditions']['humidity'] + ' ' if google_result['current_conditions'].has_key('wind_condition'): printstr += google_result['current_conditions']['wind_condition'] print printstr logstr += printstr continue # User is asking about weather of a particular day. Print details elif time == 'tomorrow': printstr = sentence.sentence(google_result['forecasts'][1]['condition'], time) printstr += ' ' + fulltime print printstr logstr += printstr else: found = False for i in range(4): if google_result['forecasts'][i]['day_of_week'] == time: printstr = sentence.sentence(google_result['forecasts'][i]['condition'], time) printstr += " on" + ' ' + fulltime print printstr logstr += printstr found = True if not found: printstr = "Forecast for " + time + " is not available currently." print printstr logstr += printstr continue else: printstr = 'What\'s the location?' print printstr logstr += printstr # End of outermost while loop. # Print message before exiting program dictcount = 0 for i in responsedict: dictcount += 1 if dictcount > 0: printstr = 'Writing new entries to database...' print printstr logstr += printstr datafile = file('predefined_responses.txt', 'a') for i in responsedict.keys(): trimmedi = re.sub('[^a-zA-Z0-9 ]+','', i) string = trimmedi + '|' + responsedict[i] + '\n' datafile.write(string) log.log(logstr) print 'Ending the program...' print 'Bye!' # End of function chat()
def bot(): conversation = [] location = '' time = 'today' key = '' keytemplate = [] fulltime = '' numdays = '' citylist = readfile.readfile('cities.txt') keylist = readfile.readfile('keywords.txt') timelist = readfile.readfile('time.txt') condlist = readfile.readfile('conditions.txt') numlist = readfile.readfile('numbers.txt') exitlist = ['exit', 'quit', 'bye', 'ok'] print 'Hello! You can ask me questions about the weather in any major city in the world. What would you like to know?' while True : input = raw_input('Me > ') if input in exitlist: break currentstring = input.split() conversation.append(currentstring) if 'next' in currentstring: numdays = currentstring[currentstring.index('next') + 1] for i in numlist: if numdays == i[0]: numdays = i[1] break if re.match('[0-9]*$',numdays): numdays = int(numdays) else: numdays = '' if 'weather' in currentstring: key = '' keytemplate = [] # get key from input for i in keylist: if i[0] in input: key = i[0] keytemplate = i break # get time from input for i in timelist: if lower(i[0]) in input: numdays = '' if lower(i[0]) != 'today' and lower(i[0]) != 'tomorrow': time = i[1] fulltime = i[0] break else: time = i[0] fulltime = time break if fulltime == '': fulltime = time if numdays != '': if numdays > 4: print 'Forecast is available only for the next 4 days.' else: time = '' fulltime = '' count = numdays prevlocation = location #We store previous location to avoid re-fetching data if the location hasn't been changed # Below, we check if any token in the input matches a city name, and if so, set location to that city newlocation = False # get location from input foundLocation = False for i in citylist: if lower(i[0]) in input: location = i[0] foundLocation = True break #if not foundLocation: #if location != '': #print "I didn't find any city name in your input. I'll get you information about " + location # find if a new location has been mentioned. if not, don't fetch data again if location is not prevlocation: newlocation = True if location is '': if prevlocation is '': print 'City not found' else: location = prevlocation newlocation = False location = location.replace(' ','-') #Google requires a '-' in 2-word city names result = False if location is not '': if newlocation: #If location hasn't changed, don't fetch data again. It's already available print 'Fetching weather information from Google...' # Call Google weather to get current weather conditions google_result = weather.get_weather(location) if 'temperature' in currentstring: print temperature.temperature(google_result, time) continue printed = False if key is not '': printstring = '' timecounter = 0 day_of_week = '' condition = '' if numdays != '': for i in google_result['forecasts']: count -= 1 if count < 0: break if key in lower(i['condition']): result = True day_of_week = i['day_of_week'] condition = i['condition'] break for i in timelist: if i[0] != 'today' and i[0] != 'tomorrow': if i[1] == day_of_week: fulltime = i[0] break if result: printstring = keytemplate[3] + keytemplate[0] + ' on ' + fulltime else: printstring = keytemplate[4] + keytemplate[0] + ' in the next ' + str(numdays) + ' days.' print printstring printed = True if not printed: if time != 'today' and time != 'tomorrow': for i in google_result['forecasts']: if i['day_of_week'] == time: if key in lower(i['condition']): printstring = keytemplate[3] + keytemplate[0] + ' on' else: printstring = keytemplate[4] + keytemplate[0] + ' on' elif time == 'today': fulltime = time if key in lower(google_result['current_conditions']['condition']): printstring = keytemplate[1] + keytemplate[0] else: printstring = keytemplate[2] + keytemplate[0] elif time == 'tomorrow': fulltime = time if key in lower(google_result['forecasts'][1]['condition']): printstring = keytemplate[3] + keytemplate[0] else: printstring = keytemplate[4] + keytemplate[0] print printstring, fulltime elif time == '' or time == 'today' : printstring = sentence.sentence(google_result['current_conditions']['condition'], time) print printstring, fulltime, google_result['current_conditions']['humidity'], google_result['current_conditions']['wind_condition'] else : if time == 'tomorrow': printstring = sentence.sentence(google_result['forecasts'][1]['condition'], time) print printstring, fulltime else: found = False for i in range(4): if google_result['forecasts'][i]['day_of_week'] == time: printstring = sentence.sentence(google_result['forecasts'][i]['condition'], time) print printstring, "on", fulltime found = True if not found: print "Forecast for " + time + " is not available currently." else: print 'What\'s the location?' #end of outermost while loop print 'ending the program...' print 'bye!'
while True: lookup_word = input("Word: ") print() result = dictionary.meaning(lookup_word) if result != None: # definition for field in result: cprint(field, "red", "on_white") print() for count, meaning in enumerate(result[field]): print("{:>4}. {}".format(count + 1, meaning)) print("\n") # sentenc cprint("Sentence", "red", "on_white") print() sentence(lookup_word) else: print("no words match [{}]".format(lookup_word)) print("_" * 105, "\n")
def processFile(file_name): # read file from provided folder path #f = open(file_name,'r') text_0 =file_name #print 'Text is :-' # replace all types of quotations by normal quotes text_1 = re.sub("\n"," ",text_0) text_1 = re.sub("\"","\"",text_1) text_1 = re.sub("''","\"",text_1) text_1 = re.sub("``","\"",text_1) text_1 = re.sub(" +"," ",text_1) text_1 = text_1.replace("<TEXT>","") #print text_1 global article article = text_1 #print 'Keywords in article are : ' #print keywords(text_1) global data1 data1=keywords(text_1) #data1=data1.encode('ascii','ignore') data1=data1.replace('\n'," ") #print type(data1) keyword_reges1=re.compile(r'[\S]+') data1= keyword_reges1.findall(data1) article_low = text_1.lower() #print article_low art_occ = 0 for x in range(0,len(data1)): #print data1[x],'-',article_low.count(data1[x]) art_occ = art_occ + article_low.count(data1[x]) # Testing print ("Total Occurences of Keywords in Artice : ") print (art_occ) global occ occ = art_occ art_occ = 0 print ('No of words in articles are : ') print (len(text_1.split())) print ('No of keywords in articles are : ') print (len(data1)) # segment data into a list of sentences sentence_token = nltk.data.load('tokenizers/punkt/english.pickle') lines = sentence_token.tokenize(text_1.strip()) # setting the stemmer sentences = [] porter = nltk.PorterStemmer() # modelling each sentence in file as sentence object for line in lines: # original words of the sentence before stemming originalWords = line[:] line = line.strip().lower() # word tokenization sent = nltk.word_tokenize(line) # stemming words stemmedSent = [porter.stem(word) for word in sent] stemmedSent = filter(lambda x: x!='.'and x!='`'and x!=','and x!='?'and x!="'" and x!='!' and x!='''"''' and x!="''" and x!="'s", stemmedSent) # list of sentence objects stemmedSent=list(stemmedSent) if stemmedSent != []: sentences.append(sentence.sentence(file_name, stemmedSent, originalWords)) return sentences