def create_passive(doc, sub_idx, root_idx, obj_index, obj_start, obj_end, negation_availability): # if str(doc[obj_start:obj_end] if len(doc) > obj_end + 2: if negation_availability: if inflect.singular_noun(str(doc[obj_index])) is False: # sentence[root_verb].lemma_ print(str(doc[obj_start:obj_end]) + " is not " + str( getInflection(doc[root_idx].lemma_, tag='VBN')[0]) + " " + str(doc[obj_end:])) else: print(str(doc[obj_start:obj_end]) + " are not " + str( getInflection(doc[root_idx].lemma_, tag='VBN')[0]) + " " + str(doc[obj_end:])) else: if inflect.singular_noun(str(doc[obj_index])) is False: print(str(doc[obj_start:obj_end]) + " is " + str( getInflection(doc[root_idx].lemma_, tag='VBN')[0]) + " " + str(doc[obj_end:])) else: print(str(doc[obj_start:obj_end]) + " are " + str( getInflection(doc[root_idx].lemma_, tag='VBN')[0]) + " " + str(doc[obj_end:])) # print(negation_availability) else: if negation_availability: if inflect.singular_noun(str(doc[obj_index])) is False: print(str(doc[obj_start:obj_end]) + " is not " + str( getInflection(doc[root_idx].lemma_, tag='VBN')[0]) + ".") else: print(str(doc[obj_start:obj_end]) + " are not" + str(getInflection(doc[root_idx].lemma_, tag='VBN')[0])+".") else: if inflect.singular_noun(str(doc[obj_index])) is False: print(str(doc[obj_start:obj_end]) + " is " + str(getInflection(doc[root_idx].lemma_, tag='VBN')[0])+".") else: print(str(doc[obj_start:obj_end]) + " are " + str(getInflection(doc[root_idx].lemma_, tag='VBN')[0])+".") # print(negation_availability) print(" ")
def establish_headnoun(hn, loc, xpos): head = Entity(hn, loc, xpos) split = head.name.split(' ') doc = nlp(hn) for ent in doc.ents: head.change_entity(ent.label_) if head.entity is 'PERSON': if 'Mr.' in hn: head.change_gender('male') elif 'Ms.' in hn or 'Miss' in hn or 'Mrs.' in hn: head.change_gender('female') else: split = head.name.split(' ') if any(gender_guess.get_gender(elem) for elem in split): for i in range(0, len(split)): if gender_guess.get_gender(split[i]) is not 'unknown': head.change_gender(gender_guess.get_gender(split[i])) break head.change_plural(-1) elif head.entity is 'ORG': head.change_plural(-1) elif not any(inflect.singular_noun(elem) for elem in split): head.change_plural(-1) else: for i in hn.split(' '): if inflect.singular_noun(i): head.change_plural(1) return head
def keywords(passage): # List words and make all singular nouns word = [] words = re.findall(r'\w+', passage) ini_tot_words = len(words) for w in words: if w=='000': w='THOUSAND' # Future work: generalize! if w !='' and len(w) >= 2: if inflect.singular_noun(w) is False: word.append(w) continue else: s = inflect.singular_noun(w) word.append(s) tot_words = len(word) # Count words and select the n-most repeated ones word_counts = Counter(word) key_word_1 = word_counts.most_common(20) # The n-most common single key-word # excluding words shorter than 3 characters all_2key_words = Counter(ngrams(word, 2)) key_words_2 = all_2key_words.most_common(20) # The n-most common bigram (double key-word) # excluding words shorter than 3 characters all_3key_words = Counter(ngrams(word, 3)) key_words_3 = all_3key_words.most_common(20) # The n-most common trigram (triple key-word) # excluding words shorter than 3 characters return(ini_tot_words, tot_words, key_word_1, key_words_2, key_words_3)
def create_passive(doc, root_idx, obj_index, obj_start, obj_end, negation_availability): # 'obj_end + 2' check whether sent ends with fullstop or not. If end with '.' need not to keep space in-between print(doc) try: if len(doc) > obj_end + 2: if negation_availability: if inflect.singular_noun(str(doc[obj_index])) is False: return str(doc[obj_start:obj_end]) + " is not " + str( getInflection(doc[root_idx].lemma_, tag='VBN')[0]) + " " + str( doc[obj_end:]) else: return str(doc[obj_start:obj_end]) + " are not " + str( getInflection(doc[root_idx].lemma_, tag='VBN')[0]) + " " + str( doc[obj_end:]) else: if inflect.singular_noun(str(doc[obj_index])) is False: return str(doc[obj_start:obj_end]) + " is " + str( getInflection(doc[root_idx].lemma_, tag='VBN')[0]) + " " + str( doc[obj_end:]) else: return str(doc[obj_start:obj_end]) + " are " + str( getInflection(doc[root_idx].lemma_, tag='VBN')[0]) + " " + str( doc[obj_end:]) # print(negation_availability) else: # sentence ending with object ;need keep a fullstop just after word without a space if negation_availability: if inflect.singular_noun(str(doc[obj_index])) is False: return str(doc[obj_start:obj_end]) + " is not " + str( getInflection(doc[root_idx].lemma_, tag='VBN')[0]) + "." else: return str(doc[obj_start:obj_end]) + " are not" + str( getInflection(doc[root_idx].lemma_, tag='VBN')[0]) + "." else: if inflect.singular_noun(str(doc[obj_index])) is False: return str(doc[obj_start:obj_end]) + " is " + str( getInflection(doc[root_idx].lemma_, tag='VBN')[0]) + "." else: return str(doc[obj_start:obj_end]) + " are " + str( getInflection(doc[root_idx].lemma_, tag='VBN')[0]) + "." # print(negation_availability) except: return str(doc)
def dRegVase(self): if self.name == 'containing things': desc_subs = '' i = len(self.children) imax = i for child in self.children: singular = not inflect.singular_noun(child.name) multi = 'a' if singular else ' {} '.format(child.quantity) template = ', {} {} ' if i == imax: template = '{} {} ' if i == 1: template = 'and {} {} ' desc_subs = desc_subs + template.format(multi, child.name) i = i - 1 return ' the {} contains {}. '.format('vase', desc_subs) if (len(self.children) == 0): return 'OVERLOAD: {} '.format(self.name) if (len(self.children) == 1): multi = (self.quantity > 1) if (len(self.children[0].children) == 0): if multi: return 'each {} is made of {} {}. '.format( inflect.singular_noun(self.name), self.children[0].quantity if self.children[0].quantity > 1 else 'a', self.children[0].name) else: return 'the {} is made of {} {}. '.format( self.name, self.children[0].quantity if self.children[0].quantity > 1 else 'a', self.children[0].name) elif (len(self.children[0].children) == 1): # has just 1 child return 'the {} which is {} '.format( self.name, self.children[0].dRegVase()) else: # has multiple children return 'the {} which is {} '.format( self.name, self.children[0].dRegVase()) desc_subs = '' i = len(self.children) imax = i for child in self.children: singular = not inflect.singular_noun(child.name) multi = 'a' if singular else ' {} '.format(child.quantity) template = ', {} {} ' if i == imax: template = '{} {} ' if i == 1: template = 'and {} {} ' desc_subs = desc_subs + template.format(multi, child.name) i = i - 1 return 'the {} has {}. '.format(self.name, desc_subs)
def keep_plurals(noun, newnoun): if inflect.singular_noun(noun) is False: # singular return singularize(newnoun) else: # plural return pluralize(newnoun)
def modifier(sentence, root_verb, subject, aux_list): # out of the aux identified specifically select the # aux matches to aux_list declared # 'str(sentense[idx]) in self.aux_list' aux_index = [ idx for idx in range(len(sentence)) if str(sentence[idx].dep_) == "aux" and subject < idx < root_verb and str(sentence[idx]) in aux_list ] # get the base form of the verb base_verb = sentence[root_verb].lemma_ if len(aux_index) != 0: aux_idx = aux_index[0] # check the availability of 'not' in the sentence - negation negation_availability = True if str(sentence[aux_idx + 1]) == "not" else False # continuous sent with 'I' is converted if str(sentence[subject]) is "I": return i_based_sent(negation_availability, sentence, aux_idx, root_verb, base_verb) # singular continuous sent is converted elif inflect.singular_noun(str(sentence[subject])) is False: return singular_sent(negation_availability, sentence, aux_idx, root_verb, base_verb) # plural continuous sent is converted else: return plural_sent(negation_availability, sentence, aux_idx, root_verb, base_verb) return False
def import_data(data, session): """ Import employee data into the database using the given session object. :param data: Employee data to import, see ``employee_data`` :param session: SQLAlchemy session object """ items = [] for table_name, table_data in data.items(): table_name_singular = inflect.singular_noun(table_name) table_name_pascal = stringcase.pascalcase(table_name_singular) factory = globals()[table_name_pascal] items += [ # construct an instance of the Model, ignoring id columns since # these are auto incremented in the database. factory( **{ name: value for name, value in construct_args.items() # don't add primary key if name != table_name_singular + '_id' }) for construct_args in table_data ] session.add_all(items) session.flush() session.commit()
def process_line(i): a_h = all_head[i] a_h = a_h.strip().split('\t') ans_h = a_h[0] + '\t' ans = a_h[0] + '\t' if len(a_h)>1: return ans_h, ans a = all[i] a = a.strip().split('\t') c_h = cat_head[i] c_h = c_h.strip().split('\t') c = cat[i] c = c.strip().split('\t') s_head = [] p_head = [] s = [] p = [] for j in range(1, len(c)): if inflect.singular_noun(c_h[j]) == False: s_head.append(c_h[j]) s.append(c[j]) else: p_head.append(c_h[j]) p.append(c[j]) if len(p) == 1: ans_h += p_head[0]+'\t' ans += p[0]+'\t' elif len(p) == 0: if len(s) == 1: ans_h += s_head[0] + '\t' ans += s[0] + '\t' return ans_h, ans
def pick_keyword(word): clue_list = [] global wordDatabase info = wordDatabase[word] print(word + "\n" + str(info) + "\n" + str(len(wordDatabase)) + "\n") if inflect.singular_noun(word): return False word_definition = wordDef.get_definition(word) info["long"] = word_definition if word_definition == "": return False details = wordDef.choose_questions(word, definition=word_definition) print(word) print(details) if details is None: return False for k in details: clue_list.append((k, details[k])) clue_list.append((word, word_definition)) if wordDef.has_swear_words(str(clue_list) + " " + word): return False return True
def dRoot(self): if (self.mcat == 'Vase'): return self.dRootVase() if (self.level < 1 and self.total_children < 3): return ' {} '.format(self.dSmall()) if (len(self.children) > 1 or self.level >= 2): desc_subs = '' desc_all_subs = '' i = len(self.children) imax = i for child in self.children: singular = not inflect.singular_noun(child.name) multi = 'a' if singular else 'a set of {}'.format( child.quantity) template = '{}, {} {}' if i == imax: template = '{} {} {}' if i == 1: template = '{} and {} {}' desc_subs = template.format(desc_subs, multi, child.name) desc_all_subs = desc_all_subs + (child.dReg( 1, 0) if (len(child.children) > 0) else '') i = i - 1 return 'a {} that is made of {}. {}'.format( self.name, desc_subs, desc_all_subs) else: return '{} that is {}'.format(self.name, self.children[0].dRoot())
def singular(word): """ Returns a singularized word :rtype: str """ head = inflect.singular_noun(word) if not head: head = word return head
def replace_pronoun(sentence, subject_idx): # if the present subject is a singular term convert it to "it" if inflect.singular_noun(str(sentence[subject_idx])) is False: new_sentence = str(sentence[:subject_idx]).strip() + " it " + str(sentence[subject_idx + 1:]).strip() return new_sentence.strip() else: # if the present subject is a plural term convert it to "they" new_sentence = str(sentence[:subject_idx]).strip() + " they " + str(sentence[subject_idx + 1:]).strip() return new_sentence.strip()
def analyze_numerus(attribute): vowels = ('a', 'e', 'i', 'o', 'u', 'A', 'E', 'I', 'O', 'U') attr = nltk.word_tokenize(attribute) pos = nltk.pos_tag(attr) for i in range(len(pos)): if i < len(pos) - 1: if pos[i][1] == 'NN' and pos[i + 1][1] != 'NN': if not inflect.singular_noun(pos[i][0]): if pos[0][0].startswith(vowels): return 'an ' + attribute else: return 'a ' + attribute else: if not inflect.singular_noun(pos[i][0]): if pos[0][0].startswith(vowels): return 'an ' + attribute else: return 'a ' + attribute return attribute
def adorn_adj_with_noun(astring): thisorthose, noun, adj, other_nouns = get_noun_adj_othernouns(astring) other_noun = random.choice(other_nouns[:10])[0] ## choose from top str_rep = "behold---%s %s %s as a %s" % (thisorthose, noun, adj, other_noun) if inflect.singular_noun( other_noun): ## weirdly, returns False if already singular str_rep = str_rep.replace(" as a", " as") # str_rep = re.sub(r' a ([aeiou])',r' an \1',str_rep) return str_rep
def initialize_data(topic): if topic == "OP": results = search( "(Ultraviolet rays[MeSH Terms] OR Sunlight[Mesh]) AND (Osteoporosis[MeSH Terms]" ) records = fetch_medline(results['IdList']) agent_mesh = [ "Ultraviolet Rays", "Ultra-Violet Rays", "Ultra-Violet Rays", "Ultra Violet Rays", "Actinic Rays", "Ultraviolet Light", "Ultraviolet", "UV Light", "UV", "Black Lights", "Ultraviolet Black Lights" ] expanded_terms = [ "Sun", "Sunshine", "Sunlight", "Ultraviolet Radiation", "UVA", "UVB", "Ultraviolet A", "Ultraviolet B" ] chemicals = agent_mesh + [mesh.rstrip("s") for mesh in agent_mesh] + expanded_terms diseases = ["osteoporosis", "osteoporosis, NOS", "osteoporoses"] if topic == "PD": data = pandas.read_csv( '/Users/dyin/Desktop/Semester 2/HaBIC/common_sql.csv', header=0) agent_mesh = [ "Acaricides", "Chemosterilants", "Fungicides", "Herbicides", "Defoliants", "Insect Repellents", "Insecticides", "Molluscacides", "Pesticide Residues", "Pesticide Synergists", "Rodenticides", "Pesticides" ] chemicals = data["Chemical name"].tolist() + agent_mesh + [ inflect.singular_noun(mesh) for mesh in agent_mesh ] results = search( "(Parkinson disease[Mesh] OR Parkinsonian Disorders[Mesh]) AND Pesticides[Mesh]" ) records = fetch_medline(results['IdList']) diseases = [ "Idiopathic Parkinson's disease", "Parkinson disease", "PD", "Parkinson's disease", "Parkinsons disease", "Primary Parkinsonism", "Idiopathic Parkinsonism", "Parkinson's disease (disorder)", "Parkinson's disease, NOS", "Paralysis agitans", "Idiopathic parkinsonism", "Primary parkinsonism", "Shaking palsy" ] return records, chemicals, diseases
def get_start_sent_noun(noun): beginning = random.choice(list(beginnings.items())) subject = beginning[0] verb = random.choice(beginning[1]) sent = subject + " " + verb + " " if countable.countable_noun(noun) and inflect.singular_noun(noun): if start_with_vowel(noun): sent += "an " else: sent += "a " sent += noun print(sent) if(estimate_syllables(sent) < 5): return get_start_sent_noun(noun) return sent
def process_line(i): content = contents[i] head = heads[i] content = content.strip().split('\t') head = head.strip().split('\t') res_p = content[0] + '\t' res_p_head = head[0] + '\t' res_s = content[0] + '\t' res_s_head = head[0] + '\t' for i in range(1, len(head)): if inflect.singular_noun(head[i]) == False: res_s += content[i] + '\t' res_s_head += head[i] + '\t' else: res_p += content[i] + '\t' res_p_head += head[i] + '\t' return res_p, res_p_head, res_s, res_s_head
def table_write(request, session): # items is false item is true bool = False item = request['intent']['slots']['item']['value'] if inflect.singular_noun(item) is False: bool = True location = request['intent']['slots']['location']['value'] loc = [] if 'my' in location.split(' '): for i in location.split(' '): if i != 'my': loc.append(i) else: loc.append('your') location = ' '.join(loc) table.put_item( Item={ 'userID': session['user']['userId'].split('.')[-1], 'itemName': item, 'itemBool': bool, 'location': location, 'loggedTime': str(datetime.utcnow().time()), 'loggedDate': str(datetime.utcnow().date()) })
def check_tell(new_list,idx,element,check_dict): list1=sorted(new_list,key=lambda x:x[0]) if list1==[]: return 1 if idx<list1[0][0]: return 1 check=-1 echeck=0 for el in excp: if el in check_dict[element]: echeck=1 break if inflect.singular_noun(check_dict[element]) is False and echeck==0: return 1 for id,ele in enumerate(list1): if id<len(list1)-1: if idx>ele[0] and idx<list1[id+1][0]: check=ele[1] if idx>list1[len(list1)-1][0]: check=list1[len(list1)-1][1] if check==-1: return 1 else: return check
def main(): hour = int(datetime.datetime.now().strftime("%H")) time_words = ['morning', 'afternoon', 'evening', 'night'] if hour < 12: current_time_word = 'morning' if 11 < hour < 19: current_time_word = 'afternoon' if 18 < hour < 21: current_time_word = 'evening' if hour > 20: current_time_word = 'night' time_words.remove(current_time_word) resp = None global conv, latestname, latestfemale, latestmale, latestpronoun, latestname, mood think('getting input...') theysaid = str(getinput().lower().replace('/', '').replace( '\\', '').replace('?', '').replace('!', '').replace("'", '').replace('"', '')) og_theysaid = theysaid txtblb = textblob.TextBlob(theysaid) mood[0] = (mood[0] * 0.95) + (txtblb.sentiment.polarity * 0.05) mood[1] = (mood[1] * 0.95) + (txtblb.sentiment.subjectivity * 0.05) think('input before preprocessing: {}'.format(theysaid)) theysaid = theysaid.replace(talkingto, '!speaker!') if theysaid == 'exit': think('Saving and exiting...') raise SystemExit('Saving and exiting') if theysaid == 'reset': import reset raise SystemExit('Saving and exiting') for command in commands.commands: if theysaid in command.inp: command.func() say(random.choice(command.responses)) input('Press RETURN to exit') raise SystemExit('Saving and exiting') choices_temp = [] if theysaid == 'what is your name' or theysaid == 'whats your name' or theysaid == 'what is ur name' or theysaid == 'whats ur name' or theysaid == 'what are you called' or theysaid == 'what are u called': choices_temp.append([ 'my name is !speaker!', 'i am called !speaker!', 'call me !speaker!' ]) think('theysaid is {}'.format(theysaid)) opinionated = { 'dont like': -0.3, 'dislike': -0.4, 'really dont like': -0.5, 'really dislike': -0.5, 'hate': -0.75, 'really hate': -0.85, 'despise': -0.95, 'like': 0.3, 'really like': 0.5, 'dont dislike': 0, 'dont mind': 0.1, 'really dont mind': 0.2, 'love': 0.8, 'really love': 0.9, 'adore': 0.95 } opiniated_reversed_pos = { 0.95: 'adore', 0.9: 'really love', 0.8: 'love', 0.5: 'really like', 0.3: 'like', 0.2: 'really dont mind', 0.1: 'dont mind' } opiniated_reversed_neg = { -0.95: 'despise', -0.85: 'really hate', -0.75: 'hate', -0.5: 'really dislike', -0.5: 'really dont like', -0.4: 'dislike', -0.3: 'dont like', -0.1: 'am not fond of' } for i in list(opinionated.keys()): if theysaid.startswith('i ' + i) and not resp: try: rd = open( 'Info/Opinions/' + theysaid.replace('i ' + i + ' ', '') + '.txt', 'r') opinion = float(rd.read()) rd.close() newopinion = opinion + (random.gauss(0.05, 0.01) * min( max([ 2 * mood[0] * opinionated[i] + random.gauss(0, 0.2), -1 ]), 1)) with open( 'Info/Opinions/' + theysaid.replace('i ' + i + ' ', '') + '.txt', 'w+') as opopinion: opopinion.write(str(newopinion)) opinion = newopinion except BaseException: with open( 'Info/Opinions/' + theysaid.replace('i ' + i + ' ', '') + '.txt', 'w+') as opopinion: opinion = min( max([ 2 * mood[0] * opinionated[i] + random.gauss(0, 0.2), -1 ]), 1) opopinion.write(str(opinion)) if opinion > 0: for cur in opiniated_reversed_pos: if opinion > cur: howmuchilikeit = opiniated_reversed_pos[cur] if howmuchilikeit == i and random.randint(1, 3) == 3: resp = random.choice( ['as do i', 'same', 'same here', 'agreed']) if not resp: if inflect.singular_noun( theysaid.replace('i ' + i + ' ', '')): resp = random.choice([ random.choice(['i ', 'well i ']) + howmuchilikeit + ' ' + theysaid.replace('i ' + i + ' ', ''), random.choice(['i ', 'well i ']) + howmuchilikeit + ' them' ]) else: resp = random.choice([ random.choice(['i ', 'well i ']) + howmuchilikeit + ' ' + theysaid.replace('i ' + i + ' ', ''), random.choice(['i ', 'well i ']) + howmuchilikeit + ' it' ]) if opinion < 0: for cur in opiniated_reversed_pos: if opinion < cur: howmuchilikeit = opiniated_reversed_neg[cur] if howmuchilikeit == i and random.randint(1, 3) == 3: resp = random.choice( ['same', 'same here', 'agreed']) if not resp: if inflect.singular_noun( theysaid.replace('i ' + i + ' ', '')): resp = random.choice([ random.choice(['i ', 'well i ']) + howmuchilikeit + ' ' + theysaid.replace('i ' + i + ' ', ''), random.choice(['i ', 'well i ']) + howmuchilikeit + ' them' ]) else: resp = random.choice([ random.choice(['i ', 'well i ']) + howmuchilikeit + ' ' + theysaid.replace('i ' + i + ' ', ''), random.choice(['i ', 'well i ']) + howmuchilikeit + ' it' ]) if not resp: if inflect.singular_noun(theysaid.replace('i ' + i + ' ', '')): resp = random.choice(['i ', 'well i ']) + random.choice([ 'do not have an opinion on ', 'dont have any feeling towards ', 'feel indifferent about ' ]) + random.choice( [theysaid.replace('i ' + i + ' ', ''), 'them']) else: resp = random.choice(['i ', 'well i ']) + random.choice([ 'do not have an opinion on ', 'dont have any feeling towards ', 'feel indifferent about ' ]) + random.choice( [theysaid.replace('i ' + i + ' ', ''), 'it']) if theysaid.startswith('how are you') or theysaid.startswith('how are u'): think('Said how are you') if mood[0] > 0: choices_temp.extend([ 'good thanks how are you', 'im good', 'good', 'pretty good', 'im pretty good', 'good thanks' ]) elif mood[0] < 0: choices_temp.extend([ 'alright how are you', 'im alright', 'im fine', 'im fine how are you', 'im doing alright', 'alright' ]) else: choices_temp.append('lit') think('Choices temp is now {}'.format(choices_temp)) if theysaid.startswith('you are ') or theysaid.startswith('are you '): if theysaid.replace('you are ', '').replace('are you ', '') in things_i_am: resp = random.choice(['!pos!, i am', '!pos!', 'i am']) else: if textblob.TextBlob( theysaid.replace('you are ', '').replace( 'are you ', '')).sentiment.polarity > 0: resp = random.choice(['!pos!, i am', '!pos!', 'i am']) add_to_me = open('Me.txt', 'a') add_to_me.write(theysaid.replace('you are ', '')) add_to_me.close() else: resp = random.choice(['!neg!, i am', '!neg!', 'i am not']) think('checking if {} starts with i'.format(theysaid)) if len(theysaid) > 1 and theysaid.startswith( 'i ') and 'VB' in pyinflect.getAllInflections( theysaid.split(' ')[1]) and not theysaid.split(' ')[1] in [ 'do', 'can', 'will', 'would', 'could', 'should' ]: think('ok this is interesting') add_noun(talkingto, 'PERSON', makethird(theysaid, gend == 'male')) think(makethird(theysaid, gend == 'male')) if theysaid.startswith('i can '): if theysaid.replace('i can ', '') in things_i_can_do: resp = random.choice( ['same', 'as can i', 'so can i', theysaid + ' too']) else: resp = random.choice([ "yeah, i can't", "really, i can't", "can you, i can't", 'cool' ]) if theysaid.startswith('i cant ') or theysaid.startswith("i can't "): if theysaid.replace('i cant ', '').replace("i can't ", '') in things_i_can_do: resp = random.choice( ['i can', 'why not', 'why not, i can', 'really, i can']) else: resp = random.choice( ['same', 'nor can i', "i can't either", "i also can't"]) if theysaid.startswith('tell me about '): try: if theysaid.replace('tell me about ', '') == '!speaker!': theysaid = theysaid.replace('!speaker!', talkingto) rd_person_info = open( 'Info/PERSON/' + theysaid.replace('tell me about ', '') + '.txt', 'r') person_info = rd_person_info.read() rd_person_info.close() temp_pronoun = theysaid.replace('tell me about ', '') if 'male' in person_info.split( '\n') or 'is male' in person_info.split('\n'): temp_pronoun = 'he' if 'female' in person_info.split( '\n') or 'is female' in person_info.split('\n'): temp_pronoun = 'she' choices = person_info.split('\n') try: for i in choices: if len(i) < 2: choices.remove(i) choices.remove('/male') choices.remove('/female') except BaseException: pass if len(choices) > 2: facts = random.sample(choices, 2) resp = temp_pronoun + ' ' + \ facts[0] + ' and ' + temp_pronoun + ' ' + facts[1] else: raise Exception('No data (choices < 1)') except Exception as e: think(str(e)) if theysaid.replace('tell me about ', '') in male: resp = random.choice([ 'i know nothing about', 'i dont know anything about ', 'i dont know ', 'i dont think i know ', 'i dont think i know anything about ', 'i cant tell you about ' ]) + random.choice( ['him', 'them', theysaid.replace('tell me about ', '')]) if theysaid.replace('tell me about ', '') in female: resp = random.choice([ 'i know nothing about', 'i dont know anything about ', 'i dont know ', 'i dont think i know ', 'i dont think i know anything about ', 'i cant tell you about ' ]) + random.choice( ['her', 'them', theysaid.replace('tell me about ', '')]) theysaid.replace(talkingto, '!speaker!') for i in theysaid.split(' '): if i in male: latestmale = i if i in female: latestfemale = i if i in names or os.path.isfile('Info/PERSON/' + i + '.txt'): latestname = i try: if len(theysaid.split(' ')) > 1: if wn.synsets(theysaid.split(' ')[0])[0].pos() == 'n' and not theysaid.split(' ')[0] in male and not \ theysaid.split(' ')[0] in female and not theysaid.split( ' ')[0] in names and theysaid.split(' ')[1] in ['is', 'are'] and not os.path.isfile( 'Info/PERSON/' + theysaid.split(' ')[0] + '.txt'): add_noun( nltk.PorterStemmer(theysaid.split(' ')[0]).stem(), 'NOUN', theysaid.replace(theysaid.split(' ')[0] + ' ', '')) add_noun( nltk.PorterStemmer( theysaid.replace(theysaid.split(' ')[0] + ' ', ''), 'DESCRIPTION', nltk.PorterStemmer(theysaid.split(' ')[0]).stem())) except BaseException: pass for i in mynames: theysaid = theysaid.replace(i, '!speakingto!') person_in_it = False if theysaid.startswith('!speaker!'): add_noun(talkingto, 'PERSON', theysaid.replace('!speaker!', '').strip()) # Saving theysaid before replacing pronouns so we can add !pronoun! later pretheysaid = theysaid for i in range(len(theysaid.split(' '))): if i == current_time_word: theysaid = theysaid.replace( theysaid.split(' ')[i], '!currenttimeword!') if i in time_words: theysaid = theysaid.replace(theysaid.split(' ')[i], '!timeword!') if theysaid.split(' ')[i] == 'he': theysaid.split(' ')[i] = latestmale if theysaid.split(' ')[i] == 'she': theysaid.split(' ')[i] = latestfemale theysaid = ' ' + pretheysaid + ' ' for n in names: for i in theysaid.split(' '): if i == n and n in male: latestpronoun = ' he ' if i == n and n in female: latestpronoun = ' she ' if theysaid.startswith('is '): try: read_1 = open('Info/PERSON/' + theysaid.split(' ')[1], 'r') temp_text_1 = read_1.read() read_1.close() if theysaid.replace('is ' + theysaid.split(' ')[1] + ' ', '') in temp_text_1.split('\n'): resp = random.choice(positives) elif theysaid.replace('is ' + theysaid.split(' ')[1] + ' not ', '') in temp_text_1.split('\n'): resp = random.choice(negatives) else: resp = random.choice(negatives + positives + neutral + ["i don't know"] * 5) if resp in positives: add_noun( theysaid.split(' ')[1], 'PERSON', 'is ' + theysaid.replace( 'is ' + theysaid.split(' ')[1] + ' ', '')) else: add_noun( theysaid.split(' ')[1], 'PERSON', 'is not ' + theysaid.replace( 'is ' + theysaid.split(' ')[1] + ' ', '')) except BaseException: pass # Maybe add 'is' set here for i in [' he ', ' she ']: theysaid = theysaid.replace(i, ' !pronoun! ') for i in names: if i in theysaid: theysaid = theysaid.replace(' ' + i + ' ', ' !name! ') replacements.update({i: '!name!'}) for i in all_resp: for j in i.responses: if j in theysaid and not j in positives and not j in negatives: theysaid = theysaid.replace(j, i.tag) for i in positives: theysaid = theysaid.replace(' ' + i + ' ', ' !pos! ') for i in negatives: theysaid = theysaid.replace(' ' + i + ' ', ' !neg! ') theysaid = theysaid.strip() theysaid_mod_is = None contractions = [['do not', 'dont'], ['will not', 'wont'], ['can not', 'cant'], ['are not', 'arent'], ['is not', 'isnt'], ['were not', 'werent'], ['i am', 'im'], ["i'm", "im"]] for i in contractions: theysaid = theysaid.replace(i[1], i[0]) # conjunctions_a=[' is ',' are ',' isnt ',' arent ',' were ',' werent ',' can ',' cant ', ' do ', ' dont ', # ' will ',' wont '] think('input after preprocessing: {}'.format(theysaid)) og_conv = list(conv) og_conv.append(og_theysaid) conv.append(theysaid) think('conv:{}'.format(conv)) ogconv2 = [] for i in range(len(og_conv)): ogconv2.append(og_conv[i]) if i != len(og_conv) - 1: ogconv2.append('/') ogconv3 = [] for i in ogconv2: ogconv3.extend(i.split(' ')) conv_f2 = [] for i in range(len(conv)): conv_f2.append(og_conv[i]) if i != len(conv) - 1: conv_f2.append('/') conv_f3 = [] dictlist = [] for key, value in replacements.items( ): # Make sure this checks both theysaid and pretheysaid to find the # correct dictionary value that contains both sides of it for _ in range(theysaid.count(value)): if theysaid.count(key) == theysaid.count(value): dictlist.append([value, key]) for i in conv_f2: conv_f3.extend(i.split(' ')) for conv_version in thisfunction(dictlist, '/'.join(conv)): choices_temp.append(find_response(conv)) think('thisfunction returned {}'.format( str(list(thisfunction(dictlist, '/'.join(conv)))))) idk = False think('choices temp is now {}'.format(choices_temp)) for i in range(2): if None in choices_temp: choices_temp.remove(None) if choices_temp: resp = random.choice(choices_temp) think('selecting {} from {}'.format(resp, choices_temp)) if resp == None: if theysaid.startswith('why ') and len(theysaid.split(' ')) > 2: resp = random.choice([ "im not sure", "i dont know", "just because", "how should i know" ]) if theysaid.startswith('what ') or theysaid.startswith( 'who ') or theysaid.startswith( 'where ') or theysaid.startswith('how '): resp = random.choice([ "im not sure", "i dont know", "i don't know, " + theysaid, 'how should i know' ]) if resp == None: idk = True for i in range(len(conv)): if not os.path.exists('Memory/' + '/'.join(conv[i:len(conv)])): os.makedirs('Memory/' + '/'.join(conv[i:len(conv)])) conv = [] og_conv = [] think('1: ' + str(os.listdir())) os.chdir(path) think('2: ' + str(os.listdir())) for i in os.listdir('Memory/'): if len(os.listdir('Memory/' + str(i))) == 0: resp = str(i) think('cleared and spoke') break if resp == None: resp = theysaid conv.append(resp) real_resp = ' ' + resp + ' ' if latestname != None: real_resp = real_resp.replace('!name!', latestname) else: for i in real_resp.split(' '): if i == '!name!': latestname = random.choice(names) real_resp = real_resp.replace(i, latestname) for i in real_resp.split(' '): for n in names: if i == n and n in male: latestpronoun = ' he ' if i == n and n in female: latestpronoun = ' she ' if i == '!pronoun!': real_resp = real_resp.replace(' !pronoun! ', latestpronoun) real_resp = real_resp.replace('!pos!', random.choice(positives)) real_resp = real_resp.replace('!neg!', random.choice(negatives)) real_resp = real_resp.replace('!speakingto!', talkingto) real_resp = real_resp.replace('!speaker!', random.choice(mynames)) real_resp = real_resp.replace('!currenttimeword!', current_time_word) real_resp = real_resp.replace('!currenttime!', random.choice(time_words)) for i in all_resp: real_resp = real_resp.replace(i.tag, random.choice(i.responses)) real_resp = real_resp.strip() og_conv.append(real_resp) think('og_conv: {}'.format(og_conv)) think('saying {}'.format(real_resp)) say(real_resp) conv2 = list(conv) splitconv = [i.split(' ') for i in conv2] for j in splitconv: j.append('/') conv2 = [item for sublist in splitconv for item in sublist] og_conv2 = list(og_conv) og_splitconv = [i.split(' ') for i in og_conv2] for j in og_splitconv: j.append('/') og_conv2 = [item for sublist in og_splitconv for item in sublist] think('ogconv2: {}\nconv2: {}'.format(og_conv2, conv2)) dictlist = [] for key, value in replacements.items( ): # Make sure this checks both theysaid and pretheysaid to find the correct dictionary value that contains both sides of it for _ in range(''.join(conv2).count(value)): if ''.join(conv2).count(key) == ''.join(conv2).count(value): dictlist.append([value, key]) for i in dictlist: for j in range(len(conv)): if i[1] in conv[j]: conv[j].replace(i[1], i[0]) think('thisfunc ' + str(list(thisfunction(dictlist, '/'.join(og_conv))))) think('dictlist={}'.format(dictlist)) for current_conv in list(thisfunction(dictlist, '/'.join(og_conv))): curr_split_conv = current_conv.split('/') for i in range(len(curr_split_conv)): if not os.path.exists( 'Memory/' + '/'.join(curr_split_conv[i:len(curr_split_conv)])): os.makedirs('Memory/' + '/'.join(curr_split_conv[i:len(curr_split_conv)])) txtblb = textblob.TextBlob(resp) mood[0] = (mood[0] * 0.98) + (txtblb.sentiment.polarity * 0.02) mood[1] = (mood[1] * 0.98) + (txtblb.sentiment.subjectivity * 0.02)
for w in word_tokens: if w not in stop_words: filtered_sentence.append(w) filtered_text.append(filtered_sentence) else: filtered_text.append(jr) logging.info("Singularize plural words...") singularized_text = [] inflect = inflect.engine() for text in filtered_text: singular_sentence = [] if str(text) != 'nan': for word in text: if type(word) == str: if inflect.singular_noun(word) is False: singular_sentence.append(word) else: singular_sentence.append(singularize(word)) singular_sentence = ' '.join(singular_sentence) singularized_text.append(singular_sentence) else: singularized_text.append('-') # create new column, populate singularized_text into the column logging.info("Create new column & populate cleaned Job Responsibilities") ed['Cleaned Job Responsibilities'] = singularized_text new_dataframe = pd.DataFrame(ed) new_dataframe.to_csv('new_extracted_data.csv') logging.info("NEW COLUMN IS ADDED AND SAVED TO :'new_extracted_data.csv'") except Exception as e:
def is_plural(word): return not bool(inflect.singular_noun(word))
def token2features(sent, i, add_neighs=True): """Compute the features of a token. All the features are boolean, i.e. they appear or they do not. For the token, you have to return a set of strings that represent the features that *fire* for the token. See the code below. The token is at position i, and the rest of the sentence is provided as well. Try to make this efficient, since it is called on every token. One thing to note is that it is only called once per token, i.e. we do not call this function in the inner loops of training. So if your training is slow, it's not because of how long it's taking to run this code. That said, if your number of features is quite large, that will cause slowdowns for sure. add_neighs is a parameter that allows us to use this function itself in order to recursively add the same features, as computed for the neighbors. Of course, we do not want to recurse on the neighbors again, and then it is set to False (see code). """ ftrs = [] # bias ftrs.append("BIAS") # position features if i == 0: ftrs.append("SENT_BEGIN") if i == len(sent) - 1: ftrs.append("SENT_END") # the word itself word = unicode(sent[i]) # word = stemmer.stem(word) ftrs.append("WORD=" + word) ftrs.append("LCASE=" + word.lower()) # some features of the word if word.isalnum(): ftrs.append("IS_ALNUM") if word.isnumeric(): ftrs.append("IS_NUMERIC") if word.isdigit(): ftrs.append("IS_DIGIT") if word.isupper(): ftrs.append("IS_UPPER") if word.islower(): ftrs.append("IS_LOWER") # CUSTOM FEATURES # 1 check singular and plural if inflect.singular_noun(word.lower()) is False: ftrs.append("IS_SINGULAR") # 2 check for punctuations if word.lower() in string.punctuation: ftrs.append("IS_PUNCTUATION") # 3 check for # or @ if "#" in word or "@" in word or "RT" or "rt" in word: ftrs.append("IS_X") # 4 check adverb if word[-2:].lower() == "ly": ftrs.append("IS_ADVERB") # 5 first caps if word[0].isupper(): ftrs.append("IS_1_UPPER") # 6 has hyphen if "-" in word: ftrs.append("HAS_HYPHEN") # 7 possible adj if word.lower().endswith("able") or word.lower().endswith( "ible") or word.lower().endswith("ent") or word.lower().endswith( "er") or word.lower().endswith("ous") or word.lower().endswith( "est"): ftrs.append("IS_ADJECTIVE") # 8 possible verbs if word.lower().endswith("ing") or word.lower().endswith("ed"): ftrs.append("IS_VERB") # 9 possible determiners if word.lower() in possible_dt: ftrs.append("IS_DT") # brown clustering, uncomment to enable if word.lower() in brown_cluster_dict: cluster_id = brown_cluster_dict[word.lower()] feat = "IS_CLUSTER_" + ` cluster_id ` ftrs.append(feat) # previous/next word feats if add_neighs: if i > 0: for pf in token2features(sent, i - 1, add_neighs=False): ftrs.append("PREV_" + pf) if i < len(sent) - 1: for pf in token2features(sent, i + 1, add_neighs=False): ftrs.append("NEXT_" + pf) # return it! return ftrs
def return_entities(sentence): # sentence="so I would like to travel from Gorakhpur to Mumbai on monday.show me the flights." sentence = sentence.lower() sentence = sentence.replace(".", " . ") sentence = sentence.replace(",", " , ") if (sentence.find("'s") is not -1): sentence = sentence.replace("'s", " ") if (sentence.find("'ll") is not -1): sentence = sentence.replace("'ll", " will") if (sentence.find(" 'nd") is not -1): sentence = sentence.replace("'nd", " ") if (sentence.find(" 'rd") is not -1): sentence = sentence.replace("'rd", " ") if (sentence.find("!") is not -1): sentence = sentence.replace("!", " ") if (sentence.find("'t") is not -1): sentence = sentence.replace("'t", "t") sentence = ' '.join(sentence.split()) sentence_count = sentence number_list = count(sentence) check_len = sentence.split(" ") check_l = len(check_len) sentence = sentence.replace("?", "") sentence = changew_n(sentence) original_sentence = sentence token_dict = {} tok = word_tokenize(sentence) l_sentence = sentence pos = pos_tag(tok) ## tagging initially to ignore the list of adjectives print(pos) chunking = nltk.ne_chunk(pos, binary=True) # chunking.draw() adjectives = [] check_pos = ["NNP", "NN", "NNS", "RB", "JJ", "IN"] non_list = [] sing_list = [] check_list = ["along"] comma_check = [] idx = 0 for index, entity in enumerate(pos): idx += 1 if idx <= len(pos) - 1: if entity[1] == "JJ" and '-' not in entity[0] and entity[0].lower( ) not in cuisine and pos[index + 1][1] not in check_pos: adjectives.append(entity[0]) elif entity[1] == "CD": non_list.append(entity[0]) elif entity[1] == "DT": check_list.append(entity[0]) sing_list.append(" " + entity[0] + " ") if entity[0] == ',' and pos[index + 1][0] in top and pos[index - 1][0] in top: check_list.append(entity[0]) comma_check.append(index) elif entity[0] == 'and' and pos[index + 1][0] in top and pos[index - 1][0] in top: comma_check.append(index) check_list.append(entity[0]) #tak_list is dictionary that will store index of a, an, the in sentence tak_list = {} for element in sing_list: if element == ' a ' or element == ' an ': for match in re.finditer(element, sentence): tak_list[(match.start(), element)] = 1 new_sentence = "" for idx, wor in enumerate(sentence.split(' ')): # try: # tmp = wn.synsets(wor)[0].pos() ## dont want to capitalise verbs # except Exception as ex: # print(ex) # tmp = "r" #check_list will store commma, and, and delemiters(DT) if wor.lower() in check_list: if wor.lower() == "," or wor.lower() == "and": if idx in comma_check: continue else: new_sentence = new_sentence + wor.lower() + " " else: continue elif wor in adjectives or wor in non_list: ## if word is verb or adjective it is not being capitalised wor = wor.lower() new_sentence = new_sentence + wor + " " elif wor in blacklist1: ## also dont want to capitalise stopwords if wor[:1].isupper(): new_sentence = new_sentence + wor.lower() + " " else: new_sentence = new_sentence + wor + " " elif wor.lower() in blacklist1: new_sentence = new_sentence + wor.lower() + " " else: #Capitalise all first alphabet of words that are not (check_list(remove it!!), adjectives, blaclist) new_sentence = new_sentence + wor[:1].upper( ) + wor[1:] + " " ## else capitalise all other words new_sentence = ' '.join(new_sentence.split()) if new_sentence.endswith('.'): new_sentence = new_sentence[:-1] # new_sentence.replace("."," . ") # check= '.' in new_sentence this line was of no use tok = word_tokenize(new_sentence) tokens_pos = pos_tag(tok) #ne_chunk will combine proper noun chunking = nltk.ne_chunk(tokens_pos, binary=True) #check if there is any sub tree new_sentence_list = [ " ".join(w for w, t in elt) for elt in chunking if isinstance(elt, nltk.Tree) ] print(new_sentence_list) # chunking.draw() list = [] #in dict_word value of each word is 0 dict_word = {} #print(new_sentence) for word in new_sentence.split(" "): dict_word[word] = 0 #print(dict_word) word_to_token = {} #No idea why this is defined for word in new_sentence.split(" "): word_to_token[word] = word # for member in new_sentence_list: # for single_word in member.split(" "): # dict_word[single_word]=1 stop_list = ["with", "of"] quant_list = [] for i, unique in enumerate(tokens_pos): sentence = "" if i < len(tokens_pos) - 1: #we will take all consecutives nouns and append them to list if (unique[1] == "NNP" or unique[1] == "NN") and (tokens_pos[i + 1][1] == "NNP" or tokens_pos[i + 1][1] == "NNS" or tokens_pos[i + 1][1] == "NN" or tokens_pos[i + 1][1] == "NNPS"): j = i loopno = 1 enter = 0 # print(unique[0]) while j < len(tokens_pos) - 1 and ( tokens_pos[j][1] == "NNP" or tokens_pos[j][1] == "NN") and (tokens_pos[j + 1][1] == "NNP" or tokens_pos[j + 1][1] == "NNS" or tokens_pos[j + 1][1] == "NN" or tokens_pos[i + 1][1] == "NNPS"): #print("inside") enter = enter + 1 #dict_word will check which word is taken #sentence will add nouns if (dict_word[tokens_pos[j][0]] == 0 and enter == 1): sentence += tokens_pos[j][0] + " " dict_word[tokens_pos[j][0]] = 1 sentence += tokens_pos[j + 1][0] + " " dict_word[tokens_pos[j + 1][0]] = 1 elif enter != 1 and dict_word[tokens_pos[j + 1][0]] == 0: sentence += tokens_pos[j + 1][0] + " " dict_word[tokens_pos[j + 1][0]] = 1 j = j + 1 #remove last space from sentence sentence = sentence[:-1] for wor in sentence.split(): word_to_token[wor] = sentence dict_word[tokens_pos[j][0]] = 1 list.append(sentence) #here we will check if a Noun is followed by words like "with", "of" ,and then again noun come elif i < len(tokens_pos) - 2 and ( unique[1] == "NNP" or unique[1] == "NN" or unique[1] == "NNS") and tokens_pos[i + 1][0] in stop_list and ( tokens_pos[i + 2][1] == "NNP" or tokens_pos[i + 2][1] == "NN" or tokens_pos[i + 2][1] == "NNS"): j = i dict_word[tokens_pos[j][0]] = 1 dict_word[tokens_pos[j + 1][0]] = 1 dict_word[tokens_pos[j + 2][0]] = 1 sentence += word_to_token[tokens_pos[j][0]] + " " if word_to_token[tokens_pos[j][0]] in list: list.remove(word_to_token[tokens_pos[j][0]]) sentence += tokens_pos[j + 1][0] + " " sentence += tokens_pos[j + 2][0] + " " j = j + 2 while j < len(tokens_pos) - 1 and ( tokens_pos[j][1] == "NNP" or tokens_pos[j][1] == "NN") and (tokens_pos[j + 1][1] == "NNP" or tokens_pos[j + 1][1] == "NNS"): sentence += tokens_pos[j + 1][0] + " " dict_word[tokens_pos[j + 1][0]] = 1 j = j + 1 sentence = sentence[:-1] for wor in sentence.split(): word_to_token[wor] = sentence list.append(sentence) #last noun elif unique[1] == "NNP" or unique[1] == "NN" or unique[1] == "NNS": if (dict_word[unique[0]]) == 0: list.append(unique[0]) elif unique[1] == "JJ" and '-' in unique[0]: list.append(unique[0]) elif unique[0].lower() in cuisine: list.append(unique[0]) #last noun of sentence if tokens_pos[len(tokens_pos) - 1][1] == "NNP" or tokens_pos[ len(tokens_pos) - 1][1] == "NN" or tokens_pos[len(tokens_pos) - 1][1] == "NNS": if dict_word[tokens_pos[len(tokens_pos) - 1][0]] == 0: list.append(tokens_pos[len(tokens_pos) - 1][0]) #check quantity for i, unique in enumerate(tokens_pos): if unique[1] == "CD": quant_list.append(unique[0]) new1_list = [] original_sentence1 = original_sentence for key in quant_list: idx = original_sentence1.find(key) new1_list.append((idx, key)) #replace the number with some symbol original_sentence1 = original_sentence1[: idx] + "$" + original_sentence1[ idx + 1:] # print(list) for element in new_sentence.split(" "): if element.lower() in also_check and dict_word[element] == 0: list.append(element) remove_list = [] #new_sentence list is subtree #mereko pta nhi neeche kya ho rha hai for element2 in new_sentence_list: for unique2 in list: if element2 in unique2: #we want to remove proper substring if not element2 == unique2: remove_list.append(element2) #Remove all elements in remove_list if they are in new_sentence_list for element in remove_list: if element in new_sentence_list: new_sentence_list.remove(element) final_list = list + new_sentence_list #remove common parts set1 = set(final_list) new_list = [] for element in set1: new_list.append(element) if " " in new_list: new_list.remove(" ") if '' in new_list: new_list.remove('') top_list = [] #if elements of new_list are not in blacklist, then append them in top_list for element in new_list: if element.lower() not in blacklist1 and element not in blacklist1: top_list.append(element.lower()) check_dict = {} check_list = [] for element in top_list: #order is a noun, so remove it if "order " in element: element1 = element.replace("order ", "") top_list.remove(element) top_list.append(element1) sep_list = [] food_dict, transform_dict = main_check(top_list) print(food_dict) nf_list = [] for key in food_dict: idx = original_sentence.find(transform_dict[key]) nf_list.append((idx, key)) new1_list.sort(reverse=True) nf_list.sort() check_list = [] found_dict = {} pre_dict = {} for element in tak_list: for food in nf_list: if element[0] < food[0]: pre_dict[food[1]] = element break c_dict = {} for i, element in enumerate(nf_list): flag = 0 for indiv in new1_list: if indiv[0] < element[0] and indiv not in check_list: c_dict[element] = indiv check_list.append(indiv) flag = 1 break if flag == 0: c_dict[element] = (-1, -1) final_dict = {} element_list = [] for element in nf_list: check = 0 for word in element[1].split(): if word.endswith('s'): check = 1 break if check == 0: element_list.append(element) for i, element in enumerate(nf_list): if i == 0: if c_dict[element] == (-1, -1): if inflect.singular_noun(element[1].split( )[0]) is False or element in element_list or element[ 1] in pre_dict: final_dict[element[1]] = 1 else: final_dict[element[1]] = 2 else: check_list.append(c_dict[element]) final_dict[element[1]] = c_dict[element][1] else: if c_dict[element] == (-1, -1): if inflect.singular_noun(element[1].split( )[0]) is False or element in element_list or element[ 1] in pre_dict: final_dict[element[1]] = 1 else: final_dict[element[1]] = 2 # elif c_dict[element] in check_list: # if inflect.singular_noun(element[1]) is False or element in element_list: # final_dict[element[1]]=1 # else: # final_dict[element[1]]=2 else: final_dict[element[1]] = c_dict[element][1] new_food_dict = {} for element in food_dict: new_food_dict[element] = {} new_food_dict[element]["attribute"] = [] new_food_dict[element]["quantity"] = 0 new_food_dict[element]["topping"] = [] for element in food_dict: for item in food_dict[element]: if item in top: new_food_dict[element]["topping"].append(item) elif item in att: new_food_dict[element]["attribute"].append(item) else: new_food_dict[element]["attribute"].append(item) for element in new_food_dict: if element in final_dict: new_food_dict[element]["quantity"] = final_dict[element] print(new_food_dict) print(final_dict) # print(number_list) return top_list, new_food_dict, transform_dict, number_list
def singlar_or_plural(a, d): aflag = inflect.singular_noun(a) == a dflag = inflect.singular_noun(d) == d return aflag == dflag
import inflect inflect = inflect.engine() foods = ["strawberries", "apple", "banana pancake", "pineapple"] for food in foods: if inflect.singular_noun(food) is False: food = inflect.plural(food) print(food) else: print(food)
def is_are(f): if inflect.singular_noun(f): return 'are' return 'is'
c_dict[element]=(-1,-1) final_dict={} element_list=[] for element in nf_list: check=0 for word in element[1].split(): if word.endswith('s'): check=1 break if check==0: element_list.append(element) for i,element in enumerate(nf_list): if i==0: if c_dict[element]==(-1,-1): if inflect.singular_noun(element[1].split()[0]) is False or element in element_list or element[1] in pre_dict: final_dict[element[1]]=1 else: final_dict[element[1]]=2 else: check_list.append(c_dict[element]) final_dict[element[1]] = c_dict[element][1] else: if c_dict[element]==(-1,-1): if inflect.singular_noun(element[1].split()[0]) is False or element in element_list or element[1] in pre_dict: final_dict[element[1]]=1 else: final_dict[element[1]]=2 # elif c_dict[element] in check_list: # if inflect.singular_noun(element[1]) is False or element in element_list:
def return_entities(sentence): # sentence="so I would like to travel from Gorakhpur to Mumbai on monday.show me the flights." sentence=sentence.lower() sentence=sentence.replace("."," . ") sentence=sentence.replace(","," , ") if(sentence.find("'s") is not -1): sentence=sentence.replace("'s"," ") if (sentence.find("'ll") is not -1): sentence = sentence.replace("'ll", " will") if (sentence.find(" 'nd") is not -1): sentence = sentence.replace("'nd", " ") if (sentence.find(" 'rd") is not -1): sentence = sentence.replace("'rd", " ") if (sentence.find("!") is not -1): sentence = sentence.replace("!", " ") if (sentence.find("'t") is not -1): sentence = sentence.replace("'t", "t") sentence = ' '.join(sentence.split()) check_len=sentence.split(" ") check_l=len(check_len) sentence=sentence.replace("?","") sentence=changew_n(sentence) original_sentence = sentence token_dict={} tok=word_tokenize(sentence) l_sentence=sentence pos=pos_tag(tok) ## tagging initially to ignore the list of adjectives print(pos) chunking=nltk.ne_chunk(pos, binary=True) # chunking.draw() adjectives=[] check_pos=["NNP","NN","NNS","RB","JJ","IN"] non_list=[] sing_list=[] check_list=["along"] comma_check=[] idx=0 for index,entity in enumerate(pos): idx+=1 if idx<=len(pos)-1: if entity[1]=="JJ" and '-' not in entity[0] and entity[0].lower() not in cuisine and pos[index+1][1] not in check_pos: adjectives.append(entity[0]) elif entity[1]=="CD": non_list.append(entity[0]) elif entity[1]=="DT": check_list.append(entity[0]) sing_list.append(" "+entity[0]+" ") if entity[0]==',' and pos[index+1][0] in top and pos[index-1][0] in top: check_list.append(entity[0]) comma_check.append(index) elif entity[0]=='and' and pos[index+1][0] in top and pos[index-1][0] in top: comma_check.append(index) check_list.append(entity[0]) tak_list={} for element in sing_list: if element==' a ' or element==' an ': for match in re.finditer(element, sentence): tak_list[(match.start(),element)]=1 new_sentence="" for idx,wor in enumerate(sentence.split(' ')): try: tmp = wn.synsets(wor)[0].pos() ## dont want to capitalise verbs except Exception as ex: print(ex) tmp = "r" if wor.lower() in check_list: if wor.lower()=="," or wor.lower()=="and": if idx in comma_check: continue else: new_sentence = new_sentence + wor.lower() + " " else: continue elif wor in adjectives or wor in non_list: ## if word is verb or adjective it is not being capitalised wor = wor.lower() new_sentence = new_sentence + wor + " " elif wor in blacklist1: ## also dont want to capitalise stopwords if wor[:1].isupper(): new_sentence = new_sentence + wor.lower() + " " else: new_sentence = new_sentence + wor + " " elif wor.lower() in blacklist1: new_sentence = new_sentence + wor.lower() + " " else: new_sentence = new_sentence + wor[:1].upper() + wor[1:] + " " ## else capitalise all other words new_sentence = ' '.join(new_sentence.split()) if new_sentence.endswith('.'): new_sentence=new_sentence[:-1] # new_sentence.replace("."," . ") check= '.' in new_sentence tok=word_tokenize(new_sentence) tokens_pos=pos_tag(tok) chunking=nltk.ne_chunk(tokens_pos, binary=True) new_sentence_list = [" ".join(w for w, t in elt) for elt in chunking if isinstance(elt, nltk.Tree)] print(new_sentence_list) chunking.draw() list=[] dict_word={} print(new_sentence) for word in new_sentence.split(" "): dict_word[word]=0 print(dict_word) word_to_token={} for word in new_sentence.split(" "): word_to_token[word]=word # for member in new_sentence_list: # for single_word in member.split(" "): # dict_word[single_word]=1 stop_list=["with","of"] quant_list=[] for i, unique in enumerate(tokens_pos): sentence="" if i< len(tokens_pos)-1: if (unique[1] == "NNP" or unique[1]== "NN") and (tokens_pos[i+1][1]=="NNP" or tokens_pos[i+1][1]=="NNS" or tokens_pos[i+1][1]=="NN" or tokens_pos[i+1][1]=="NNPS"): j=i loopno=1 enter=0 print(unique[0]) while j<len(tokens_pos)-1 and (tokens_pos[j][1]== "NNP" or tokens_pos[j][1]=="NN") and (tokens_pos[j+ 1][1] == "NNP" or tokens_pos[j + 1][1] == "NNS" or tokens_pos[j+1][1]=="NN" or tokens_pos[i+1][1]=="NNPS"): print("inside") enter=enter+1 if dict_word[tokens_pos[j][0]]==0 and enter==1: sentence+=tokens_pos[j][0]+ " " dict_word[tokens_pos[j][0]]=1 sentence+=tokens_pos[j+1][0]+ " " dict_word[tokens_pos[j+1][0]] = 1 elif enter!=1 and dict_word[tokens_pos[j + 1][0]]==0: sentence += tokens_pos[j+1][0] + " " dict_word[tokens_pos[j+1][0]] = 1 j=j+1 sentence=sentence[:-1] for wor in sentence.split(): word_to_token[wor]=sentence dict_word[tokens_pos[j][0]]=1 list.append(sentence) elif i<len(tokens_pos)-2 and (unique[1]== "NNP" or unique[1]=="NN" or unique[1]=="NNS") and tokens_pos[i+1][0] in stop_list and (tokens_pos[i+2][1]=="NNP" or tokens_pos[i+2][1]=="NN" or tokens_pos[i+2][1]=="NNS"): j=i dict_word[tokens_pos[j][0]]=1 dict_word[tokens_pos[j+1][0]]=1 dict_word[tokens_pos[j+2][0]]=1 sentence+= word_to_token[tokens_pos[j][0]]+" " if word_to_token[tokens_pos[j][0]] in list: list.remove(word_to_token[tokens_pos[j][0]]) sentence += tokens_pos[j+1][0]+ " " sentence += tokens_pos[j+2][0]+ " " j=j+2 while j < len(tokens_pos) - 1 and (tokens_pos[j][1] == "NNP" or tokens_pos[j][1]=="NN") and (tokens_pos[j + 1][1] == "NNP" or tokens_pos[j + 1][1] == "NNS"): sentence+= tokens_pos[j+1][0]+ " " dict_word[tokens_pos[j + 1][0]] = 1 j=j+1 sentence=sentence[:-1] for wor in sentence.split(): word_to_token[wor]=sentence list.append(sentence) elif unique[1]=="NNP" or unique[1]=="NN" or unique[1]=="NNS" : if(dict_word[unique[0]])==0: list.append(unique[0]) elif unique[1]=="JJ" and '-' in unique[0]: list.append(unique[0]) elif unique[0].lower() in cuisine: list.append(unique[0]) if tokens_pos[len(tokens_pos)-1][1]=="NNP" or tokens_pos[len(tokens_pos)-1][1]=="NN" or tokens_pos[len(tokens_pos)-1][1]=="NNS": if dict_word[tokens_pos[len(tokens_pos)-1][0]]==0: list.append(tokens_pos[len(tokens_pos)-1][0]) for i, unique in enumerate(tokens_pos): if unique[1]=="CD": quant_list.append(unique[0]) new1_list=[] for key in quant_list: idx=original_sentence.find(key) new1_list.append((idx,key)) # print(list) for element in new_sentence.split(" "): if element.lower() in also_check and dict_word[element]==0: list.append(element) remove_list=[] for element2 in new_sentence_list: for unique2 in list: if element2 in unique2 : if not element2==unique2: remove_list.append(element2) for element in remove_list: if element in new_sentence_list: new_sentence_list.remove(element) final_list=list+new_sentence_list set1=set(final_list) new_list=[] for element in set1: new_list.append(element) if " " in new_list: new_list.remove(" ") if '' in new_list: new_list.remove('') top_list=[] for element in new_list: if element.lower() not in blacklist1 and element not in blacklist1: top_list.append(element.lower()) check_dict={} check_list=[] for element in top_list: if "order " in element: element1=element.replace("order ","") top_list.remove(element) top_list.append(element1) sep_list=[] food_dict, transform_dict=main_check(top_list) print(food_dict) nf_list=[] for key in food_dict: idx=original_sentence.find(transform_dict[key]) nf_list.append((idx,key)) new1_list.sort(reverse=True) nf_list.sort() check_list=[] found_dict={} pre_dict={} for element in tak_list: for food in nf_list: if element[0]<food[0]: pre_dict[food[1]]=element break c_dict={} for i,element in enumerate(nf_list): flag=0 for indiv in new1_list: if indiv[0]<element[0] and indiv not in check_list: c_dict[element]=indiv check_list.append(indiv) flag=1 break if flag==0: c_dict[element]=(-1,-1) final_dict={} element_list=[] for element in nf_list: check=0 for word in element[1].split(): if word.endswith('s'): check=1 break if check==0: element_list.append(element) for i,element in enumerate(nf_list): if i==0: if c_dict[element]==(-1,-1): if inflect.singular_noun(element[1].split()[0]) is False or element in element_list or element[1] in pre_dict: final_dict[element[1]]=1 else: final_dict[element[1]]=2 else: check_list.append(c_dict[element]) final_dict[element[1]] = c_dict[element][1] else: if c_dict[element]==(-1,-1): if inflect.singular_noun(element[1].split()[0]) is False or element in element_list or element[1] in pre_dict: final_dict[element[1]]=1 else: final_dict[element[1]]=2 # elif c_dict[element] in check_list: # if inflect.singular_noun(element[1]) is False or element in element_list: # final_dict[element[1]]=1 # else: # final_dict[element[1]]=2 else: final_dict[element[1]]=c_dict[element][1] new_food_dict={} for element in food_dict: new_food_dict[element]={} new_food_dict[element]["attribute"]=[] new_food_dict[element]["quantity"]=0 new_food_dict[element]["topping"]=[] for element in food_dict: for item in food_dict[element]: if item in top: new_food_dict[element]["topping"].append(item) elif item in att: new_food_dict[element]["attribute"].append(item) else: new_food_dict[element]["attribute"].append(item) for element in new_food_dict: if element in final_dict: new_food_dict[element]["quantity"]=final_dict[element] print(new_food_dict) print(final_dict) return top_list, new_food_dict , transform_dict