def do_clean(line, known_speakers): speaker_is_known = False text = TextBlob(line) text = text.replace('\n', '') #nws_text = text.replace(' ','') for test_speaker in known_speakers: if text.startswith(test_speaker): speaker_is_known = True speaker = test_speaker clean_line = remove_prefix(text, speaker) return speaker, clean_line #if speaker_is_known == False and nws_text != '': # print(text) return '-', '-'
def find_unknown_speakers(line, known_speakers): import re ''' Tests to see if a known speaker is in a line. Prints out if not. So we can see if there's a new unknown speaker ''' speaker_is_known = False text = TextBlob(line) text = text.replace('\n', '') nws_text = text.replace(' ', '') for test_speaker in known_speakers: if text.startswith(test_speaker): speaker_is_known = True if speaker_is_known == False and nws_text != '': text = str(text) print_text = re.sub("\[.*?\]", "[]", text) print_text = re.sub("\{.*?\}", "{}", print_text) if print_text != '[]' and print_text != '{}': print(print_text) #,'|||',text
def selectResponse(dclass, sentence): GREETING_RESPONSES = ["'Hello", "Hey", "Hi", "Good day to you", "Hi there"] GOODBYE_RESPONSES = [ "See you later", "Bye", "Talk to you later", "Bye for now", "Take care" ] IDENTITY_RESPONSES = [ "I am LifeBot. How can I help you ?", " I am LifeBot, I can give healthy recommendations." ] if (dclass == 'greeting'): input = TextBlob(sentence) if (input.startswith("how are you", start=0)): return "I am fine Thanks." else: return rd.choice(GREETING_RESPONSES) elif (dclass == 'goodbye'): return rd.choice(GOODBYE_RESPONSES) elif (dclass == 'identity'): return rd.choice(IDENTITY_RESPONSES) else: return ('Sorry, I dont understand that')
def parseFile(filename): words = [] lines = [] with open("." + os.sep + filename, 'r') as f: lines = f.readlines() adverbs = [] adjectives = [] nouns = [] verbs = [] #postuples = word_tokenize(' '.join(lines)) #postuples = nltk.pos_tag(postuples) pos = TextBlob(' '.join(lines)) postuples = pos.tags for word, pos in postuples: if len(word) >= min_word_length: if pos.startswith('J'): adjectives.append(word) if pos.startswith('V'): # convert to base form of the verb if requested if lemmatize_verbs == True: #word = WordNetLemmatizer().lemmatize(word,'v') w = Word(word) word = w.lemmatize('v') verbs.append(word) if pos == 'NN' or pos == 'NNS': nouns.append(word) if pos == 'NNP' and include_proper_nouns == True: nouns.append(word) if pos.startswith('RB'): adverbs.append(word) # now a second round of testing if second_pass == True: print("\n[First Pass] Adv: " + str(len(adverbs)) + " Adj: " + str(len(adjectives)) + " Nouns: " + str(len(nouns)) + " Verbs: " + str(len(verbs))) fadverbs = [] for word in adverbs: if word[-2:] == "ly": fadverbs.append(word) adverbs = fadverbs nouns = TextBlob(' '.join(nouns)) fnouns = [] for word, pos in nouns.tags: if pos == 'NN' or pos == 'NNS' or pos == 'NNP': fnouns.append(word) nouns = fnouns verbs = TextBlob(' '.join(verbs)) fverbs = [] for word, pos in verbs.tags: if pos.startswith('V'): fverbs.append(word) verbs = fverbs adjectives = TextBlob(' '.join(adjectives)) fadjectives = [] for word, pos in adjectives.tags: if pos.startswith('J'): fadjectives.append(word) adjectives = fadjectives global poolstring poolstring = "[Final Pool] Adv: " + str(len(adverbs)) + " Adj: " + str( len(adjectives)) + " Nouns: " + str(len(nouns)) + " Verbs: " + str( len(verbs)) wordcount = len(postuples) usedcount = len(adverbs) + len(adjectives) + len(nouns) + len(verbs) return [adverbs, adjectives, nouns, verbs], wordcount, usedcount