Esempio n. 1
0
def create_passive(doc, sub_idx, root_idx, obj_index, obj_start, obj_end, negation_availability):
    # if str(doc[obj_start:obj_end]

    if len(doc) > obj_end + 2:
        if negation_availability:
            if inflect.singular_noun(str(doc[obj_index])) is False:
                # sentence[root_verb].lemma_
                print(str(doc[obj_start:obj_end]) + " is not " + str(
                    getInflection(doc[root_idx].lemma_, tag='VBN')[0]) + " " + str(doc[obj_end:]))
            else:
                print(str(doc[obj_start:obj_end]) + " are not " + str(
                    getInflection(doc[root_idx].lemma_, tag='VBN')[0]) + " " + str(doc[obj_end:]))
        else:
            if inflect.singular_noun(str(doc[obj_index])) is False:
                print(str(doc[obj_start:obj_end]) + " is " + str(
                    getInflection(doc[root_idx].lemma_, tag='VBN')[0]) + " " + str(doc[obj_end:]))
            else:
                print(str(doc[obj_start:obj_end]) + " are " + str(
                    getInflection(doc[root_idx].lemma_, tag='VBN')[0]) + " " + str(doc[obj_end:]))
        # print(negation_availability)
    else:
        if negation_availability:
            if inflect.singular_noun(str(doc[obj_index])) is False:
                print(str(doc[obj_start:obj_end]) + " is not " + str(
                    getInflection(doc[root_idx].lemma_, tag='VBN')[0]) + ".")
            else:
                print(str(doc[obj_start:obj_end]) + " are not" + str(getInflection(doc[root_idx].lemma_, tag='VBN')[0])+".")
        else:
            if inflect.singular_noun(str(doc[obj_index])) is False:
                print(str(doc[obj_start:obj_end]) + " is " + str(getInflection(doc[root_idx].lemma_, tag='VBN')[0])+".")
            else:
                print(str(doc[obj_start:obj_end]) + " are " + str(getInflection(doc[root_idx].lemma_, tag='VBN')[0])+".")
        # print(negation_availability)
    print(" ")
Esempio n. 2
0
def establish_headnoun(hn, loc, xpos):
    head = Entity(hn, loc, xpos)
    split = head.name.split(' ')
    doc = nlp(hn)
    for ent in doc.ents:
        head.change_entity(ent.label_)

    if head.entity is 'PERSON':
        if 'Mr.' in hn:
            head.change_gender('male')
        elif 'Ms.' in hn or 'Miss' in hn or 'Mrs.' in hn:
            head.change_gender('female')
        else:
            split = head.name.split(' ')
            if any(gender_guess.get_gender(elem) for elem in split):
                for i in range(0, len(split)):
                    if gender_guess.get_gender(split[i]) is not 'unknown':
                        head.change_gender(gender_guess.get_gender(split[i]))
                        break
        head.change_plural(-1)
    elif head.entity is 'ORG':
        head.change_plural(-1)
    elif not any(inflect.singular_noun(elem) for elem in split):
        head.change_plural(-1)
    else:
        for i in hn.split(' '):
            if inflect.singular_noun(i):
                head.change_plural(1)
    return head
Esempio n. 3
0
def keywords(passage):
    # List words and make all singular nouns
    word = []
    words = re.findall(r'\w+', passage)
    ini_tot_words = len(words)
    for w in words:
        if w=='000': w='THOUSAND' # Future work: generalize!
        if w !='' and len(w) >= 2: 
           if inflect.singular_noun(w) is False:
              word.append(w)   
              continue
           else:
              s = inflect.singular_noun(w)
              word.append(s)
              
    tot_words = len(word)
       
    # Count words and select the n-most repeated ones   
    word_counts = Counter(word)
    key_word_1 = word_counts.most_common(20)        # The n-most common single key-word
                                                    # excluding words shorter than 3 characters                                      
    all_2key_words = Counter(ngrams(word, 2))
    key_words_2 = all_2key_words.most_common(20)    # The n-most common bigram (double key-word)
                                                    # excluding words shorter than 3 characters           
    all_3key_words = Counter(ngrams(word, 3))
    key_words_3 = all_3key_words.most_common(20)    # The n-most common trigram (triple key-word)
                                                    # excluding words shorter than 3 characters                                         
    return(ini_tot_words, tot_words, key_word_1, key_words_2, key_words_3)
                                           
Esempio n. 4
0
    def create_passive(doc, root_idx, obj_index, obj_start, obj_end,
                       negation_availability):
        # 'obj_end + 2' check whether sent ends with fullstop or not. If end with '.' need not to keep space in-between
        print(doc)
        try:

            if len(doc) > obj_end + 2:
                if negation_availability:
                    if inflect.singular_noun(str(doc[obj_index])) is False:

                        return str(doc[obj_start:obj_end]) + " is not " + str(
                            getInflection(doc[root_idx].lemma_,
                                          tag='VBN')[0]) + " " + str(
                                              doc[obj_end:])
                    else:
                        return str(doc[obj_start:obj_end]) + " are not " + str(
                            getInflection(doc[root_idx].lemma_,
                                          tag='VBN')[0]) + " " + str(
                                              doc[obj_end:])
                else:
                    if inflect.singular_noun(str(doc[obj_index])) is False:

                        return str(doc[obj_start:obj_end]) + " is " + str(
                            getInflection(doc[root_idx].lemma_,
                                          tag='VBN')[0]) + " " + str(
                                              doc[obj_end:])
                    else:
                        return str(doc[obj_start:obj_end]) + " are " + str(
                            getInflection(doc[root_idx].lemma_,
                                          tag='VBN')[0]) + " " + str(
                                              doc[obj_end:])
                # print(negation_availability)
            else:
                # sentence ending with object ;need keep a fullstop just after word without a space
                if negation_availability:
                    if inflect.singular_noun(str(doc[obj_index])) is False:
                        return str(doc[obj_start:obj_end]) + " is not " + str(
                            getInflection(doc[root_idx].lemma_,
                                          tag='VBN')[0]) + "."
                    else:
                        return str(doc[obj_start:obj_end]) + " are not" + str(
                            getInflection(doc[root_idx].lemma_,
                                          tag='VBN')[0]) + "."
                else:
                    if inflect.singular_noun(str(doc[obj_index])) is False:
                        return str(doc[obj_start:obj_end]) + " is " + str(
                            getInflection(doc[root_idx].lemma_,
                                          tag='VBN')[0]) + "."
                    else:
                        return str(doc[obj_start:obj_end]) + " are " + str(
                            getInflection(doc[root_idx].lemma_,
                                          tag='VBN')[0]) + "."
                # print(negation_availability)
        except:
            return str(doc)
Esempio n. 5
0
    def dRegVase(self):
        if self.name == 'containing things':
            desc_subs = ''
            i = len(self.children)
            imax = i
            for child in self.children:
                singular = not inflect.singular_noun(child.name)
                multi = 'a' if singular else ' {} '.format(child.quantity)
                template = ', {} {} '
                if i == imax: template = '{} {} '
                if i == 1: template = 'and {} {} '
                desc_subs = desc_subs + template.format(multi, child.name)
                i = i - 1
            return ' the {} contains {}. '.format('vase', desc_subs)

        if (len(self.children) == 0):
            return 'OVERLOAD: {} '.format(self.name)

        if (len(self.children) == 1):
            multi = (self.quantity > 1)

            if (len(self.children[0].children) == 0):
                if multi:
                    return 'each {} is made of {} {}. '.format(
                        inflect.singular_noun(self.name),
                        self.children[0].quantity
                        if self.children[0].quantity > 1 else 'a',
                        self.children[0].name)
                else:
                    return 'the {} is made of {} {}. '.format(
                        self.name, self.children[0].quantity
                        if self.children[0].quantity > 1 else 'a',
                        self.children[0].name)
            elif (len(self.children[0].children) == 1):  # has just 1 child
                return 'the {} which is {} '.format(
                    self.name, self.children[0].dRegVase())
            else:  # has multiple children
                return 'the {} which is {} '.format(
                    self.name, self.children[0].dRegVase())

        desc_subs = ''
        i = len(self.children)
        imax = i
        for child in self.children:
            singular = not inflect.singular_noun(child.name)
            multi = 'a' if singular else ' {} '.format(child.quantity)
            template = ', {} {} '
            if i == imax: template = '{} {} '
            if i == 1: template = 'and {} {} '
            desc_subs = desc_subs + template.format(multi, child.name)
            i = i - 1
        return 'the {} has {}. '.format(self.name, desc_subs)
Esempio n. 6
0
def keep_plurals(noun, newnoun):
    if inflect.singular_noun(noun) is False:
        # singular
        return singularize(newnoun)
    else:
        # plural
        return pluralize(newnoun)
Esempio n. 7
0
def modifier(sentence, root_verb, subject, aux_list):
    # out of the aux identified specifically select the
    # aux matches to aux_list declared
    # 'str(sentense[idx]) in self.aux_list'
    aux_index = [
        idx for idx in range(len(sentence)) if str(sentence[idx].dep_) == "aux"
        and subject < idx < root_verb and str(sentence[idx]) in aux_list
    ]
    # get the base form of the verb
    base_verb = sentence[root_verb].lemma_

    if len(aux_index) != 0:
        aux_idx = aux_index[0]
        # check the availability of 'not' in the sentence - negation
        negation_availability = True if str(sentence[aux_idx +
                                                     1]) == "not" else False

        # continuous sent with 'I' is converted
        if str(sentence[subject]) is "I":
            return i_based_sent(negation_availability, sentence, aux_idx,
                                root_verb, base_verb)

        # singular continuous sent is converted
        elif inflect.singular_noun(str(sentence[subject])) is False:
            return singular_sent(negation_availability, sentence, aux_idx,
                                 root_verb, base_verb)

        # plural continuous sent is converted
        else:
            return plural_sent(negation_availability, sentence, aux_idx,
                               root_verb, base_verb)
    return False
Esempio n. 8
0
def import_data(data, session):
    """
    Import employee data into the database using the given session object.

    :param data: Employee data to import, see ``employee_data``
    :param session: SQLAlchemy session object
    """
    items = []

    for table_name, table_data in data.items():

        table_name_singular = inflect.singular_noun(table_name)
        table_name_pascal = stringcase.pascalcase(table_name_singular)

        factory = globals()[table_name_pascal]
        items += [
            # construct an instance of the Model, ignoring id columns since
            # these are auto incremented in the database.
            factory(
                **{
                    name: value
                    for name, value in construct_args.items()
                    # don't add primary key
                    if name != table_name_singular + '_id'
                }) for construct_args in table_data
        ]

    session.add_all(items)
    session.flush()
    session.commit()
Esempio n. 9
0
def process_line(i):
	a_h = all_head[i]
	a_h = a_h.strip().split('\t')
	ans_h = a_h[0] + '\t'
	ans = a_h[0] + '\t'
	if len(a_h)>1:
		return ans_h, ans
	a = all[i]
	a = a.strip().split('\t')
	c_h = cat_head[i]
	c_h = c_h.strip().split('\t')
	c  = cat[i]
	c = c.strip().split('\t')
	s_head = []
	p_head = []
	s = []
	p = []
	for j in range(1, len(c)):
		if inflect.singular_noun(c_h[j]) == False:
			s_head.append(c_h[j])	
			s.append(c[j])
		else:
			p_head.append(c_h[j])	
			p.append(c[j])
	if len(p) == 1:
		ans_h += p_head[0]+'\t'
		ans += p[0]+'\t'
	elif len(p) == 0:
		if len(s) == 1:
			ans_h += s_head[0] + '\t'
			ans += s[0] + '\t'
	return ans_h, ans
Esempio n. 10
0
def pick_keyword(word):
    clue_list = []
    global wordDatabase

    info = wordDatabase[word]
    print(word + "\n" + str(info) + "\n" + str(len(wordDatabase)) + "\n")

    if inflect.singular_noun(word):
        return False

    word_definition = wordDef.get_definition(word)
    info["long"] = word_definition
    if word_definition == "":
        return False

    details = wordDef.choose_questions(word, definition=word_definition)
    print(word)
    print(details)

    if details is None:
        return False

    for k in details:
        clue_list.append((k, details[k]))
    clue_list.append((word, word_definition))

    if wordDef.has_swear_words(str(clue_list) + " " + word):
        return False

    return True
Esempio n. 11
0
    def dRoot(self):
        if (self.mcat == 'Vase'):
            return self.dRootVase()
        if (self.level < 1 and self.total_children < 3):
            return ' {} '.format(self.dSmall())

        if (len(self.children) > 1 or self.level >= 2):
            desc_subs = ''
            desc_all_subs = ''
            i = len(self.children)
            imax = i
            for child in self.children:
                singular = not inflect.singular_noun(child.name)
                multi = 'a' if singular else 'a set of {}'.format(
                    child.quantity)
                template = '{}, {} {}'
                if i == imax: template = '{} {} {}'
                if i == 1: template = '{} and {} {}'
                desc_subs = template.format(desc_subs, multi, child.name)
                desc_all_subs = desc_all_subs + (child.dReg(
                    1, 0) if (len(child.children) > 0) else '')
                i = i - 1
            return 'a {} that is made of {}. {}'.format(
                self.name, desc_subs, desc_all_subs)
        else:
            return '{} that is {}'.format(self.name, self.children[0].dRoot())
Esempio n. 12
0
 def singular(word):
     """
     Returns a singularized word
     :rtype: str
     """
     head = inflect.singular_noun(word)
     if not head:
         head = word
     return head
def replace_pronoun(sentence, subject_idx):
    # if the present subject is a singular term convert it to "it"
    if inflect.singular_noun(str(sentence[subject_idx])) is False:
        new_sentence = str(sentence[:subject_idx]).strip() + " it " + str(sentence[subject_idx + 1:]).strip()
        return new_sentence.strip()
    else:
        # if the present subject is a plural term convert it to "they"
        new_sentence = str(sentence[:subject_idx]).strip() + " they " + str(sentence[subject_idx + 1:]).strip()
        return new_sentence.strip()
Esempio n. 14
0
def analyze_numerus(attribute):
    vowels = ('a', 'e', 'i', 'o', 'u', 'A', 'E', 'I', 'O', 'U')
    attr = nltk.word_tokenize(attribute)
    pos = nltk.pos_tag(attr)
    for i in range(len(pos)):
        if i < len(pos) - 1:
            if pos[i][1] == 'NN' and pos[i + 1][1] != 'NN':
                if not inflect.singular_noun(pos[i][0]):
                    if pos[0][0].startswith(vowels):
                        return 'an ' + attribute
                    else:
                        return 'a ' + attribute
        else:
            if not inflect.singular_noun(pos[i][0]):
                if pos[0][0].startswith(vowels):
                    return 'an ' + attribute
                else:
                    return 'a ' + attribute
    return attribute
Esempio n. 15
0
def adorn_adj_with_noun(astring):
    thisorthose, noun, adj, other_nouns = get_noun_adj_othernouns(astring)
    other_noun = random.choice(other_nouns[:10])[0]  ## choose from top
    str_rep = "behold---%s %s %s as a %s" % (thisorthose, noun, adj,
                                             other_noun)
    if inflect.singular_noun(
            other_noun):  ## weirdly, returns False if already singular
        str_rep = str_rep.replace(" as a", " as")
    # str_rep = re.sub(r' a ([aeiou])',r' an \1',str_rep)
    return str_rep
Esempio n. 16
0
def initialize_data(topic):
    if topic == "OP":
        results = search(
            "(Ultraviolet rays[MeSH Terms] OR Sunlight[Mesh]) AND (Osteoporosis[MeSH Terms]"
        )
        records = fetch_medline(results['IdList'])

        agent_mesh = [
            "Ultraviolet Rays", "Ultra-Violet Rays", "Ultra-Violet Rays",
            "Ultra Violet Rays", "Actinic Rays", "Ultraviolet Light",
            "Ultraviolet", "UV Light", "UV", "Black Lights",
            "Ultraviolet Black Lights"
        ]
        expanded_terms = [
            "Sun", "Sunshine", "Sunlight", "Ultraviolet Radiation", "UVA",
            "UVB", "Ultraviolet A", "Ultraviolet B"
        ]
        chemicals = agent_mesh + [mesh.rstrip("s")
                                  for mesh in agent_mesh] + expanded_terms
        diseases = ["osteoporosis", "osteoporosis, NOS", "osteoporoses"]
    if topic == "PD":
        data = pandas.read_csv(
            '/Users/dyin/Desktop/Semester 2/HaBIC/common_sql.csv', header=0)

        agent_mesh = [
            "Acaricides", "Chemosterilants", "Fungicides", "Herbicides",
            "Defoliants", "Insect Repellents", "Insecticides", "Molluscacides",
            "Pesticide Residues", "Pesticide Synergists", "Rodenticides",
            "Pesticides"
        ]
        chemicals = data["Chemical name"].tolist() + agent_mesh + [
            inflect.singular_noun(mesh) for mesh in agent_mesh
        ]

        results = search(
            "(Parkinson disease[Mesh] OR Parkinsonian Disorders[Mesh]) AND Pesticides[Mesh]"
        )
        records = fetch_medline(results['IdList'])

        diseases = [
            "Idiopathic Parkinson's disease", "Parkinson disease", "PD",
            "Parkinson's disease", "Parkinsons disease",
            "Primary Parkinsonism", "Idiopathic Parkinsonism",
            "Parkinson's disease (disorder)", "Parkinson's disease, NOS",
            "Paralysis agitans", "Idiopathic parkinsonism",
            "Primary parkinsonism", "Shaking palsy"
        ]

    return records, chemicals, diseases
Esempio n. 17
0
def get_start_sent_noun(noun):
    beginning = random.choice(list(beginnings.items()))
    subject = beginning[0]
    verb = random.choice(beginning[1])
    sent = subject + " " + verb + " "
    if countable.countable_noun(noun) and inflect.singular_noun(noun):
        if start_with_vowel(noun):
            sent += "an "
        else:
            sent += "a "
    sent += noun
    print(sent)
    if(estimate_syllables(sent) < 5):
        return get_start_sent_noun(noun)
    return sent
Esempio n. 18
0
def process_line(i):
    content = contents[i]
    head = heads[i]
    content = content.strip().split('\t')
    head = head.strip().split('\t')
    res_p = content[0] + '\t'
    res_p_head = head[0] + '\t'
    res_s = content[0] + '\t'
    res_s_head = head[0] + '\t'
    for i in range(1, len(head)):
        if inflect.singular_noun(head[i]) == False:
            res_s += content[i] + '\t'
            res_s_head += head[i] + '\t'
        else:
            res_p += content[i] + '\t'
            res_p_head += head[i] + '\t'
    return res_p, res_p_head, res_s, res_s_head
def table_write(request, session):
    # items is false item is true
    bool = False
    item = request['intent']['slots']['item']['value']
    if inflect.singular_noun(item) is False:
        bool = True
    location = request['intent']['slots']['location']['value']
    loc = []
    if 'my' in location.split(' '):
        for i in location.split(' '):
            if i != 'my':
                loc.append(i)
            else:
                loc.append('your')
        location = ' '.join(loc)
    table.put_item(
        Item={
            'userID': session['user']['userId'].split('.')[-1],
            'itemName': item,
            'itemBool': bool,
            'location': location,
            'loggedTime': str(datetime.utcnow().time()),
            'loggedDate': str(datetime.utcnow().date())
        })
Esempio n. 20
0
def check_tell(new_list,idx,element,check_dict):
    list1=sorted(new_list,key=lambda x:x[0])
    if list1==[]:
        return 1
    if idx<list1[0][0]:
        return 1
    check=-1
    echeck=0
    for el in excp:
        if el in check_dict[element]:
            echeck=1
            break
    if inflect.singular_noun(check_dict[element]) is False and echeck==0:
        return 1
    for id,ele in enumerate(list1):
        if id<len(list1)-1:
            if idx>ele[0] and idx<list1[id+1][0]:
                check=ele[1]
    if idx>list1[len(list1)-1][0]:
        check=list1[len(list1)-1][1]
    if check==-1:
        return 1
    else:
        return check
Esempio n. 21
0
def main():
    hour = int(datetime.datetime.now().strftime("%H"))
    time_words = ['morning', 'afternoon', 'evening', 'night']
    if hour < 12:
        current_time_word = 'morning'
    if 11 < hour < 19:
        current_time_word = 'afternoon'
    if 18 < hour < 21:
        current_time_word = 'evening'
    if hour > 20:
        current_time_word = 'night'
    time_words.remove(current_time_word)

    resp = None
    global conv, latestname, latestfemale, latestmale, latestpronoun, latestname, mood
    think('getting input...')
    theysaid = str(getinput().lower().replace('/', '').replace(
        '\\', '').replace('?', '').replace('!',
                                           '').replace("'",
                                                       '').replace('"', ''))
    og_theysaid = theysaid
    txtblb = textblob.TextBlob(theysaid)
    mood[0] = (mood[0] * 0.95) + (txtblb.sentiment.polarity * 0.05)
    mood[1] = (mood[1] * 0.95) + (txtblb.sentiment.subjectivity * 0.05)
    think('input before preprocessing: {}'.format(theysaid))

    theysaid = theysaid.replace(talkingto, '!speaker!')
    if theysaid == 'exit':
        think('Saving and exiting...')
        raise SystemExit('Saving and exiting')
    if theysaid == 'reset':
        import reset
        raise SystemExit('Saving and exiting')
    for command in commands.commands:
        if theysaid in command.inp:
            command.func()
            say(random.choice(command.responses))
            input('Press RETURN to exit')
            raise SystemExit('Saving and exiting')
    choices_temp = []
    if theysaid == 'what is your name' or theysaid == 'whats your name' or theysaid == 'what is ur name' or theysaid == 'whats ur name' or theysaid == 'what are you called' or theysaid == 'what are u called':
        choices_temp.append([
            'my name is !speaker!', 'i am called !speaker!',
            'call me !speaker!'
        ])
    think('theysaid is {}'.format(theysaid))
    opinionated = {
        'dont like': -0.3,
        'dislike': -0.4,
        'really dont like': -0.5,
        'really dislike': -0.5,
        'hate': -0.75,
        'really hate': -0.85,
        'despise': -0.95,
        'like': 0.3,
        'really like': 0.5,
        'dont dislike': 0,
        'dont mind': 0.1,
        'really dont mind': 0.2,
        'love': 0.8,
        'really love': 0.9,
        'adore': 0.95
    }
    opiniated_reversed_pos = {
        0.95: 'adore',
        0.9: 'really love',
        0.8: 'love',
        0.5: 'really like',
        0.3: 'like',
        0.2: 'really dont mind',
        0.1: 'dont mind'
    }
    opiniated_reversed_neg = {
        -0.95: 'despise',
        -0.85: 'really hate',
        -0.75: 'hate',
        -0.5: 'really dislike',
        -0.5: 'really dont like',
        -0.4: 'dislike',
        -0.3: 'dont like',
        -0.1: 'am not fond of'
    }
    for i in list(opinionated.keys()):
        if theysaid.startswith('i ' + i) and not resp:
            try:
                rd = open(
                    'Info/Opinions/' + theysaid.replace('i ' + i + ' ', '') +
                    '.txt', 'r')
                opinion = float(rd.read())
                rd.close()
                newopinion = opinion + (random.gauss(0.05, 0.01) * min(
                    max([
                        2 * mood[0] * opinionated[i] + random.gauss(0, 0.2), -1
                    ]), 1))
                with open(
                        'Info/Opinions/' +
                        theysaid.replace('i ' + i + ' ', '') + '.txt',
                        'w+') as opopinion:
                    opopinion.write(str(newopinion))
                opinion = newopinion
            except BaseException:
                with open(
                        'Info/Opinions/' +
                        theysaid.replace('i ' + i + ' ', '') + '.txt',
                        'w+') as opopinion:
                    opinion = min(
                        max([
                            2 * mood[0] * opinionated[i] +
                            random.gauss(0, 0.2), -1
                        ]), 1)
                    opopinion.write(str(opinion))
            if opinion > 0:
                for cur in opiniated_reversed_pos:
                    if opinion > cur:
                        howmuchilikeit = opiniated_reversed_pos[cur]
                        if howmuchilikeit == i and random.randint(1, 3) == 3:
                            resp = random.choice(
                                ['as do i', 'same', 'same here', 'agreed'])
                        if not resp:
                            if inflect.singular_noun(
                                    theysaid.replace('i ' + i + ' ', '')):
                                resp = random.choice([
                                    random.choice(['i ', 'well i ']) +
                                    howmuchilikeit + ' ' +
                                    theysaid.replace('i ' + i + ' ', ''),
                                    random.choice(['i ', 'well i ']) +
                                    howmuchilikeit + ' them'
                                ])
                            else:
                                resp = random.choice([
                                    random.choice(['i ', 'well i ']) +
                                    howmuchilikeit + ' ' +
                                    theysaid.replace('i ' + i + ' ', ''),
                                    random.choice(['i ', 'well i ']) +
                                    howmuchilikeit + ' it'
                                ])
            if opinion < 0:
                for cur in opiniated_reversed_pos:
                    if opinion < cur:
                        howmuchilikeit = opiniated_reversed_neg[cur]
                        if howmuchilikeit == i and random.randint(1, 3) == 3:
                            resp = random.choice(
                                ['same', 'same here', 'agreed'])
                        if not resp:
                            if inflect.singular_noun(
                                    theysaid.replace('i ' + i + ' ', '')):
                                resp = random.choice([
                                    random.choice(['i ', 'well i ']) +
                                    howmuchilikeit + ' ' +
                                    theysaid.replace('i ' + i + ' ', ''),
                                    random.choice(['i ', 'well i ']) +
                                    howmuchilikeit + ' them'
                                ])
                            else:
                                resp = random.choice([
                                    random.choice(['i ', 'well i ']) +
                                    howmuchilikeit + ' ' +
                                    theysaid.replace('i ' + i + ' ', ''),
                                    random.choice(['i ', 'well i ']) +
                                    howmuchilikeit + ' it'
                                ])
            if not resp:
                if inflect.singular_noun(theysaid.replace('i ' + i + ' ', '')):
                    resp = random.choice(['i ', 'well i ']) + random.choice([
                        'do not have an opinion on ',
                        'dont have any feeling towards ',
                        'feel indifferent about '
                    ]) + random.choice(
                        [theysaid.replace('i ' + i + ' ', ''), 'them'])
                else:
                    resp = random.choice(['i ', 'well i ']) + random.choice([
                        'do not have an opinion on ',
                        'dont have any feeling towards ',
                        'feel indifferent about '
                    ]) + random.choice(
                        [theysaid.replace('i ' + i + ' ', ''), 'it'])

    if theysaid.startswith('how are you') or theysaid.startswith('how are u'):
        think('Said how are you')
        if mood[0] > 0:
            choices_temp.extend([
                'good thanks how are you', 'im good', 'good', 'pretty good',
                'im pretty good', 'good thanks'
            ])
        elif mood[0] < 0:
            choices_temp.extend([
                'alright how are you', 'im alright', 'im fine',
                'im fine how are you', 'im doing alright', 'alright'
            ])
        else:
            choices_temp.append('lit')
            think('Choices temp is now {}'.format(choices_temp))
    if theysaid.startswith('you are ') or theysaid.startswith('are you '):
        if theysaid.replace('you are ', '').replace('are you ',
                                                    '') in things_i_am:
            resp = random.choice(['!pos!, i am', '!pos!', 'i am'])
        else:

            if textblob.TextBlob(
                    theysaid.replace('you are ', '').replace(
                        'are you ', '')).sentiment.polarity > 0:
                resp = random.choice(['!pos!, i am', '!pos!', 'i am'])
                add_to_me = open('Me.txt', 'a')
                add_to_me.write(theysaid.replace('you are ', ''))
                add_to_me.close()
            else:
                resp = random.choice(['!neg!, i am', '!neg!', 'i am not'])
    think('checking if {} starts with i'.format(theysaid))
    if len(theysaid) > 1 and theysaid.startswith(
            'i ') and 'VB' in pyinflect.getAllInflections(
                theysaid.split(' ')[1]) and not theysaid.split(' ')[1] in [
                    'do', 'can', 'will', 'would', 'could', 'should'
                ]:
        think('ok this is interesting')
        add_noun(talkingto, 'PERSON', makethird(theysaid, gend == 'male'))
        think(makethird(theysaid, gend == 'male'))
        if theysaid.startswith('i can '):
            if theysaid.replace('i can ', '') in things_i_can_do:
                resp = random.choice(
                    ['same', 'as can i', 'so can i', theysaid + ' too'])
            else:
                resp = random.choice([
                    "yeah, i can't", "really, i can't", "can you, i can't",
                    'cool'
                ])
        if theysaid.startswith('i cant ') or theysaid.startswith("i can't "):
            if theysaid.replace('i cant ', '').replace("i can't ",
                                                       '') in things_i_can_do:
                resp = random.choice(
                    ['i can', 'why not', 'why not, i can', 'really, i can'])
            else:
                resp = random.choice(
                    ['same', 'nor can i', "i can't either", "i also can't"])

    if theysaid.startswith('tell me about '):
        try:
            if theysaid.replace('tell me about ', '') == '!speaker!':
                theysaid = theysaid.replace('!speaker!', talkingto)
            rd_person_info = open(
                'Info/PERSON/' + theysaid.replace('tell me about ', '') +
                '.txt', 'r')
            person_info = rd_person_info.read()
            rd_person_info.close()
            temp_pronoun = theysaid.replace('tell me about ', '')
            if 'male' in person_info.split(
                    '\n') or 'is male' in person_info.split('\n'):
                temp_pronoun = 'he'
            if 'female' in person_info.split(
                    '\n') or 'is female' in person_info.split('\n'):
                temp_pronoun = 'she'
            choices = person_info.split('\n')
            try:
                for i in choices:
                    if len(i) < 2:
                        choices.remove(i)
                choices.remove('/male')
                choices.remove('/female')
            except BaseException:
                pass

            if len(choices) > 2:
                facts = random.sample(choices, 2)
                resp = temp_pronoun + ' ' + \
                       facts[0] + ' and ' + temp_pronoun + ' ' + facts[1]
            else:
                raise Exception('No data (choices < 1)')
        except Exception as e:
            think(str(e))
            if theysaid.replace('tell me about ', '') in male:
                resp = random.choice([
                    'i know nothing about', 'i dont know anything about ',
                    'i dont know ', 'i dont think i know ',
                    'i dont think i know anything about ',
                    'i cant tell you about '
                ]) + random.choice(
                    ['him', 'them',
                     theysaid.replace('tell me about ', '')])
            if theysaid.replace('tell me about ', '') in female:
                resp = random.choice([
                    'i know nothing about', 'i dont know anything about ',
                    'i dont know ', 'i dont think i know ',
                    'i dont think i know anything about ',
                    'i cant tell you about '
                ]) + random.choice(
                    ['her', 'them',
                     theysaid.replace('tell me about ', '')])
            theysaid.replace(talkingto, '!speaker!')
    for i in theysaid.split(' '):
        if i in male:
            latestmale = i
        if i in female:
            latestfemale = i
        if i in names or os.path.isfile('Info/PERSON/' + i + '.txt'):
            latestname = i
    try:
        if len(theysaid.split(' ')) > 1:
            if wn.synsets(theysaid.split(' ')[0])[0].pos() == 'n' and not theysaid.split(' ')[0] in male and not \
            theysaid.split(' ')[0] in female and not theysaid.split(
                    ' ')[0] in names and theysaid.split(' ')[1] in ['is', 'are'] and not os.path.isfile(
                'Info/PERSON/' + theysaid.split(' ')[0] + '.txt'):
                add_noun(
                    nltk.PorterStemmer(theysaid.split(' ')[0]).stem(), 'NOUN',
                    theysaid.replace(theysaid.split(' ')[0] + ' ', ''))
                add_noun(
                    nltk.PorterStemmer(
                        theysaid.replace(theysaid.split(' ')[0] + ' ', ''),
                        'DESCRIPTION',
                        nltk.PorterStemmer(theysaid.split(' ')[0]).stem()))
    except BaseException:
        pass

    for i in mynames:
        theysaid = theysaid.replace(i, '!speakingto!')

    person_in_it = False
    if theysaid.startswith('!speaker!'):
        add_noun(talkingto, 'PERSON',
                 theysaid.replace('!speaker!', '').strip())
    # Saving theysaid before replacing pronouns so we can add !pronoun! later
    pretheysaid = theysaid
    for i in range(len(theysaid.split(' '))):
        if i == current_time_word:
            theysaid = theysaid.replace(
                theysaid.split(' ')[i], '!currenttimeword!')
        if i in time_words:
            theysaid = theysaid.replace(theysaid.split(' ')[i], '!timeword!')
        if theysaid.split(' ')[i] == 'he':
            theysaid.split(' ')[i] = latestmale
        if theysaid.split(' ')[i] == 'she':
            theysaid.split(' ')[i] = latestfemale

    theysaid = ' ' + pretheysaid + ' '
    for n in names:
        for i in theysaid.split(' '):
            if i == n and n in male:
                latestpronoun = ' he '

            if i == n and n in female:
                latestpronoun = ' she '

    if theysaid.startswith('is '):
        try:
            read_1 = open('Info/PERSON/' + theysaid.split(' ')[1], 'r')
            temp_text_1 = read_1.read()
            read_1.close()
            if theysaid.replace('is ' + theysaid.split(' ')[1] + ' ',
                                '') in temp_text_1.split('\n'):
                resp = random.choice(positives)
            elif theysaid.replace('is ' + theysaid.split(' ')[1] + ' not ',
                                  '') in temp_text_1.split('\n'):
                resp = random.choice(negatives)
            else:
                resp = random.choice(negatives + positives + neutral +
                                     ["i don't know"] * 5)
                if resp in positives:
                    add_noun(
                        theysaid.split(' ')[1], 'PERSON',
                        'is ' + theysaid.replace(
                            'is ' + theysaid.split(' ')[1] + ' ', ''))
                else:
                    add_noun(
                        theysaid.split(' ')[1], 'PERSON',
                        'is not ' + theysaid.replace(
                            'is ' + theysaid.split(' ')[1] + ' ', ''))
        except BaseException:
            pass  # Maybe add 'is' set here

    for i in [' he ', ' she ']:
        theysaid = theysaid.replace(i, ' !pronoun! ')
    for i in names:
        if i in theysaid:
            theysaid = theysaid.replace(' ' + i + ' ', ' !name! ')
            replacements.update({i: '!name!'})
    for i in all_resp:
        for j in i.responses:
            if j in theysaid and not j in positives and not j in negatives:
                theysaid = theysaid.replace(j, i.tag)
    for i in positives:
        theysaid = theysaid.replace(' ' + i + ' ', ' !pos! ')

    for i in negatives:
        theysaid = theysaid.replace(' ' + i + ' ', ' !neg! ')
    theysaid = theysaid.strip()

    theysaid_mod_is = None
    contractions = [['do not', 'dont'], ['will not', 'wont'],
                    ['can not', 'cant'], ['are not', 'arent'],
                    ['is not', 'isnt'], ['were not', 'werent'], ['i am', 'im'],
                    ["i'm", "im"]]
    for i in contractions:
        theysaid = theysaid.replace(i[1], i[0])
    # conjunctions_a=[' is ',' are ',' isnt ',' arent ',' were ',' werent ',' can ',' cant ', ' do ', ' dont ',
    # ' will ',' wont ']
    think('input after preprocessing: {}'.format(theysaid))
    og_conv = list(conv)
    og_conv.append(og_theysaid)
    conv.append(theysaid)
    think('conv:{}'.format(conv))
    ogconv2 = []
    for i in range(len(og_conv)):
        ogconv2.append(og_conv[i])
        if i != len(og_conv) - 1:
            ogconv2.append('/')
    ogconv3 = []
    for i in ogconv2:
        ogconv3.extend(i.split(' '))
    conv_f2 = []
    for i in range(len(conv)):
        conv_f2.append(og_conv[i])
        if i != len(conv) - 1:
            conv_f2.append('/')
    conv_f3 = []
    dictlist = []
    for key, value in replacements.items(
    ):  # Make sure this checks both theysaid and pretheysaid to find the
        # correct dictionary value that contains both sides of it
        for _ in range(theysaid.count(value)):
            if theysaid.count(key) == theysaid.count(value):
                dictlist.append([value, key])
    for i in conv_f2:
        conv_f3.extend(i.split(' '))
    for conv_version in thisfunction(dictlist, '/'.join(conv)):
        choices_temp.append(find_response(conv))
    think('thisfunction returned {}'.format(
        str(list(thisfunction(dictlist, '/'.join(conv))))))
    idk = False
    think('choices temp is now {}'.format(choices_temp))
    for i in range(2):
        if None in choices_temp:
            choices_temp.remove(None)
    if choices_temp:
        resp = random.choice(choices_temp)
        think('selecting {} from {}'.format(resp, choices_temp))
    if resp == None:
        if theysaid.startswith('why ') and len(theysaid.split(' ')) > 2:
            resp = random.choice([
                "im not sure", "i dont know", "just because",
                "how should i know"
            ])
        if theysaid.startswith('what ') or theysaid.startswith(
                'who ') or theysaid.startswith(
                    'where ') or theysaid.startswith('how '):
            resp = random.choice([
                "im not sure", "i dont know", "i don't know, " + theysaid,
                'how should i know'
            ])

    if resp == None:

        idk = True
        for i in range(len(conv)):
            if not os.path.exists('Memory/' + '/'.join(conv[i:len(conv)])):
                os.makedirs('Memory/' + '/'.join(conv[i:len(conv)]))

        conv = []
        og_conv = []

        think('1: ' + str(os.listdir()))
        os.chdir(path)
        think('2: ' + str(os.listdir()))
        for i in os.listdir('Memory/'):
            if len(os.listdir('Memory/' + str(i))) == 0:
                resp = str(i)
                think('cleared and spoke')
                break
    if resp == None:
        resp = theysaid
    conv.append(resp)

    real_resp = ' ' + resp + ' '
    if latestname != None:
        real_resp = real_resp.replace('!name!', latestname)
    else:
        for i in real_resp.split(' '):
            if i == '!name!':
                latestname = random.choice(names)
                real_resp = real_resp.replace(i, latestname)
    for i in real_resp.split(' '):
        for n in names:
            if i == n and n in male:
                latestpronoun = ' he '
            if i == n and n in female:
                latestpronoun = ' she '
        if i == '!pronoun!':
            real_resp = real_resp.replace(' !pronoun! ', latestpronoun)

    real_resp = real_resp.replace('!pos!', random.choice(positives))
    real_resp = real_resp.replace('!neg!', random.choice(negatives))
    real_resp = real_resp.replace('!speakingto!', talkingto)
    real_resp = real_resp.replace('!speaker!', random.choice(mynames))
    real_resp = real_resp.replace('!currenttimeword!', current_time_word)
    real_resp = real_resp.replace('!currenttime!', random.choice(time_words))
    for i in all_resp:
        real_resp = real_resp.replace(i.tag, random.choice(i.responses))
    real_resp = real_resp.strip()
    og_conv.append(real_resp)
    think('og_conv: {}'.format(og_conv))
    think('saying {}'.format(real_resp))
    say(real_resp)

    conv2 = list(conv)
    splitconv = [i.split(' ') for i in conv2]
    for j in splitconv:
        j.append('/')
    conv2 = [item for sublist in splitconv for item in sublist]

    og_conv2 = list(og_conv)

    og_splitconv = [i.split(' ') for i in og_conv2]
    for j in og_splitconv:
        j.append('/')
    og_conv2 = [item for sublist in og_splitconv for item in sublist]

    think('ogconv2: {}\nconv2: {}'.format(og_conv2, conv2))
    dictlist = []
    for key, value in replacements.items(
    ):  # Make sure this checks both theysaid and pretheysaid to find the correct dictionary value that contains both sides of it
        for _ in range(''.join(conv2).count(value)):
            if ''.join(conv2).count(key) == ''.join(conv2).count(value):
                dictlist.append([value, key])
    for i in dictlist:
        for j in range(len(conv)):
            if i[1] in conv[j]:
                conv[j].replace(i[1], i[0])
    think('thisfunc ' + str(list(thisfunction(dictlist, '/'.join(og_conv)))))
    think('dictlist={}'.format(dictlist))

    for current_conv in list(thisfunction(dictlist, '/'.join(og_conv))):
        curr_split_conv = current_conv.split('/')
        for i in range(len(curr_split_conv)):
            if not os.path.exists(
                    'Memory/' +
                    '/'.join(curr_split_conv[i:len(curr_split_conv)])):
                os.makedirs('Memory/' +
                            '/'.join(curr_split_conv[i:len(curr_split_conv)]))
    txtblb = textblob.TextBlob(resp)
    mood[0] = (mood[0] * 0.98) + (txtblb.sentiment.polarity * 0.02)
    mood[1] = (mood[1] * 0.98) + (txtblb.sentiment.subjectivity * 0.02)
            for w in word_tokens:
                if w not in stop_words:
                    filtered_sentence.append(w)
            filtered_text.append(filtered_sentence)
        else:
            filtered_text.append(jr)
    logging.info("Singularize plural words...")
    singularized_text = []
    inflect = inflect.engine()
    for text in filtered_text:
        singular_sentence = []
        if str(text) != 'nan':
            for word in text:
                if type(word) == str:
                    if inflect.singular_noun(word) is False:
                        singular_sentence.append(word)
                    else:
                        singular_sentence.append(singularize(word))
            singular_sentence = ' '.join(singular_sentence)
            singularized_text.append(singular_sentence)
        else:
            singularized_text.append('-')

    # create new column, populate singularized_text into the column
    logging.info("Create new column & populate cleaned Job Responsibilities")
    ed['Cleaned Job Responsibilities'] = singularized_text
    new_dataframe = pd.DataFrame(ed)
    new_dataframe.to_csv('new_extracted_data.csv')
    logging.info("NEW COLUMN IS ADDED AND SAVED TO :'new_extracted_data.csv'")
except Exception as e:
 def is_plural(word):
     return not bool(inflect.singular_noun(word))
Esempio n. 24
0
def token2features(sent, i, add_neighs=True):
    """Compute the features of a token.

    All the features are boolean, i.e. they appear or they do not. For the token,
    you have to return a set of strings that represent the features that *fire*
    for the token. See the code below.

    The token is at position i, and the rest of the sentence is provided as well.
    Try to make this efficient, since it is called on every token.

    One thing to note is that it is only called once per token, i.e. we do not call
    this function in the inner loops of training. So if your training is slow, it's
    not because of how long it's taking to run this code. That said, if your number
    of features is quite large, that will cause slowdowns for sure.

    add_neighs is a parameter that allows us to use this function itself in order to
    recursively add the same features, as computed for the neighbors. Of course, we do
    not want to recurse on the neighbors again, and then it is set to False (see code).
    """
    ftrs = []
    # bias
    ftrs.append("BIAS")
    # position features
    if i == 0:
        ftrs.append("SENT_BEGIN")
    if i == len(sent) - 1:
        ftrs.append("SENT_END")

    # the word itself
    word = unicode(sent[i])
    # word = stemmer.stem(word)
    ftrs.append("WORD=" + word)
    ftrs.append("LCASE=" + word.lower())

    # some features of the word
    if word.isalnum():
        ftrs.append("IS_ALNUM")
    if word.isnumeric():
        ftrs.append("IS_NUMERIC")
    if word.isdigit():
        ftrs.append("IS_DIGIT")
    if word.isupper():
        ftrs.append("IS_UPPER")
    if word.islower():
        ftrs.append("IS_LOWER")

    # CUSTOM FEATURES

    # 1 check singular and plural
    if inflect.singular_noun(word.lower()) is False:
        ftrs.append("IS_SINGULAR")

    # 2 check for punctuations
    if word.lower() in string.punctuation:
        ftrs.append("IS_PUNCTUATION")

    # 3 check for # or @
    if "#" in word or "@" in word or "RT" or "rt" in word:
        ftrs.append("IS_X")

    # 4 check adverb
    if word[-2:].lower() == "ly":
        ftrs.append("IS_ADVERB")

    # 5 first caps
    if word[0].isupper():
        ftrs.append("IS_1_UPPER")

    # 6 has hyphen
    if "-" in word:
        ftrs.append("HAS_HYPHEN")

    # 7 possible adj
    if word.lower().endswith("able") or word.lower().endswith(
            "ible") or word.lower().endswith("ent") or word.lower().endswith(
                "er") or word.lower().endswith("ous") or word.lower().endswith(
                    "est"):
        ftrs.append("IS_ADJECTIVE")

    # 8 possible verbs
    if word.lower().endswith("ing") or word.lower().endswith("ed"):
        ftrs.append("IS_VERB")

    # 9 possible determiners
    if word.lower() in possible_dt:
        ftrs.append("IS_DT")

    # brown clustering, uncomment to enable
    if word.lower() in brown_cluster_dict:
        cluster_id = brown_cluster_dict[word.lower()]
        feat = "IS_CLUSTER_" + ` cluster_id `
        ftrs.append(feat)

    # previous/next word feats
    if add_neighs:
        if i > 0:
            for pf in token2features(sent, i - 1, add_neighs=False):
                ftrs.append("PREV_" + pf)
        if i < len(sent) - 1:
            for pf in token2features(sent, i + 1, add_neighs=False):
                ftrs.append("NEXT_" + pf)

    # return it!
    return ftrs
Esempio n. 25
0
def return_entities(sentence):
    # sentence="so I would like to travel from Gorakhpur to Mumbai on monday.show me the flights."
    sentence = sentence.lower()
    sentence = sentence.replace(".", " . ")
    sentence = sentence.replace(",", " , ")
    if (sentence.find("'s") is not -1):
        sentence = sentence.replace("'s", " ")
    if (sentence.find("'ll") is not -1):
        sentence = sentence.replace("'ll", " will")
    if (sentence.find(" 'nd") is not -1):
        sentence = sentence.replace("'nd", " ")
    if (sentence.find(" 'rd") is not -1):
        sentence = sentence.replace("'rd", " ")
    if (sentence.find("!") is not -1):
        sentence = sentence.replace("!", " ")
    if (sentence.find("'t") is not -1):
        sentence = sentence.replace("'t", "t")
    sentence = ' '.join(sentence.split())
    sentence_count = sentence
    number_list = count(sentence)
    check_len = sentence.split(" ")
    check_l = len(check_len)
    sentence = sentence.replace("?", "")
    sentence = changew_n(sentence)
    original_sentence = sentence
    token_dict = {}
    tok = word_tokenize(sentence)
    l_sentence = sentence
    pos = pos_tag(tok)  ## tagging initially to ignore the list of adjectives
    print(pos)
    chunking = nltk.ne_chunk(pos, binary=True)
    # chunking.draw()
    adjectives = []
    check_pos = ["NNP", "NN", "NNS", "RB", "JJ", "IN"]
    non_list = []
    sing_list = []
    check_list = ["along"]
    comma_check = []
    idx = 0
    for index, entity in enumerate(pos):
        idx += 1
        if idx <= len(pos) - 1:
            if entity[1] == "JJ" and '-' not in entity[0] and entity[0].lower(
            ) not in cuisine and pos[index + 1][1] not in check_pos:
                adjectives.append(entity[0])
            elif entity[1] == "CD":
                non_list.append(entity[0])
            elif entity[1] == "DT":
                check_list.append(entity[0])
                sing_list.append(" " + entity[0] + " ")
            if entity[0] == ',' and pos[index +
                                        1][0] in top and pos[index -
                                                             1][0] in top:
                check_list.append(entity[0])
                comma_check.append(index)
            elif entity[0] == 'and' and pos[index +
                                            1][0] in top and pos[index -
                                                                 1][0] in top:
                comma_check.append(index)
                check_list.append(entity[0])
    #tak_list is dictionary that will store index of a, an, the in sentence
    tak_list = {}
    for element in sing_list:
        if element == ' a ' or element == ' an ':
            for match in re.finditer(element, sentence):
                tak_list[(match.start(), element)] = 1

    new_sentence = ""
    for idx, wor in enumerate(sentence.split(' ')):
        #         try:
        #             tmp = wn.synsets(wor)[0].pos()  ## dont want to capitalise verbs
        #         except Exception as ex:
        #             print(ex)
        #             tmp = "r"
        #check_list will store commma, and, and delemiters(DT)
        if wor.lower() in check_list:
            if wor.lower() == "," or wor.lower() == "and":
                if idx in comma_check:
                    continue
                else:
                    new_sentence = new_sentence + wor.lower() + " "
            else:
                continue
        elif wor in adjectives or wor in non_list:  ## if word is verb or adjective it is not being capitalised
            wor = wor.lower()
            new_sentence = new_sentence + wor + " "
        elif wor in blacklist1:  ## also dont want to capitalise stopwords
            if wor[:1].isupper():
                new_sentence = new_sentence + wor.lower() + " "
            else:
                new_sentence = new_sentence + wor + " "
        elif wor.lower() in blacklist1:
            new_sentence = new_sentence + wor.lower() + " "
        else:
            #Capitalise all first alphabet of words that are not (check_list(remove it!!), adjectives, blaclist)
            new_sentence = new_sentence + wor[:1].upper(
            ) + wor[1:] + " "  ## else capitalise all other words
    new_sentence = ' '.join(new_sentence.split())
    if new_sentence.endswith('.'):
        new_sentence = new_sentence[:-1]
    # new_sentence.replace("."," . ")


#    check= '.' in new_sentence this line was of no use

    tok = word_tokenize(new_sentence)
    tokens_pos = pos_tag(tok)
    #ne_chunk will combine proper noun
    chunking = nltk.ne_chunk(tokens_pos, binary=True)

    #check if there is any sub tree
    new_sentence_list = [
        " ".join(w for w, t in elt) for elt in chunking
        if isinstance(elt, nltk.Tree)
    ]
    print(new_sentence_list)
    # chunking.draw()
    list = []

    #in dict_word value of each word is 0
    dict_word = {}
    #print(new_sentence)
    for word in new_sentence.split(" "):
        dict_word[word] = 0
    #print(dict_word)
    word_to_token = {}  #No idea why this is defined
    for word in new_sentence.split(" "):
        word_to_token[word] = word

    # for member in new_sentence_list:
    #     for single_word in member.split(" "):
    #         dict_word[single_word]=1
    stop_list = ["with", "of"]
    quant_list = []
    for i, unique in enumerate(tokens_pos):
        sentence = ""
        if i < len(tokens_pos) - 1:
            #we will take all consecutives nouns and append them to list
            if (unique[1] == "NNP" or unique[1]
                    == "NN") and (tokens_pos[i + 1][1] == "NNP"
                                  or tokens_pos[i + 1][1] == "NNS"
                                  or tokens_pos[i + 1][1] == "NN"
                                  or tokens_pos[i + 1][1] == "NNPS"):
                j = i
                loopno = 1
                enter = 0
                #   print(unique[0])
                while j < len(tokens_pos) - 1 and (
                        tokens_pos[j][1] == "NNP" or tokens_pos[j][1]
                        == "NN") and (tokens_pos[j + 1][1] == "NNP"
                                      or tokens_pos[j + 1][1] == "NNS"
                                      or tokens_pos[j + 1][1] == "NN"
                                      or tokens_pos[i + 1][1] == "NNPS"):
                    #print("inside")
                    enter = enter + 1
                    #dict_word will check which word is taken
                    #sentence will add nouns
                    if (dict_word[tokens_pos[j][0]] == 0 and enter == 1):
                        sentence += tokens_pos[j][0] + " "
                        dict_word[tokens_pos[j][0]] = 1
                        sentence += tokens_pos[j + 1][0] + " "
                        dict_word[tokens_pos[j + 1][0]] = 1
                    elif enter != 1 and dict_word[tokens_pos[j + 1][0]] == 0:
                        sentence += tokens_pos[j + 1][0] + " "
                        dict_word[tokens_pos[j + 1][0]] = 1
                    j = j + 1
                #remove last space from sentence
                sentence = sentence[:-1]
                for wor in sentence.split():
                    word_to_token[wor] = sentence
                dict_word[tokens_pos[j][0]] = 1
                list.append(sentence)
            #here we will check if a Noun is followed by words like "with", "of" ,and then again noun come
            elif i < len(tokens_pos) - 2 and (
                    unique[1] == "NNP" or unique[1] == "NN" or unique[1]
                    == "NNS") and tokens_pos[i + 1][0] in stop_list and (
                        tokens_pos[i + 2][1] == "NNP" or tokens_pos[i + 2][1]
                        == "NN" or tokens_pos[i + 2][1] == "NNS"):
                j = i
                dict_word[tokens_pos[j][0]] = 1
                dict_word[tokens_pos[j + 1][0]] = 1
                dict_word[tokens_pos[j + 2][0]] = 1
                sentence += word_to_token[tokens_pos[j][0]] + " "
                if word_to_token[tokens_pos[j][0]] in list:
                    list.remove(word_to_token[tokens_pos[j][0]])
                sentence += tokens_pos[j + 1][0] + " "
                sentence += tokens_pos[j + 2][0] + " "
                j = j + 2
                while j < len(tokens_pos) - 1 and (
                        tokens_pos[j][1] == "NNP" or tokens_pos[j][1]
                        == "NN") and (tokens_pos[j + 1][1] == "NNP"
                                      or tokens_pos[j + 1][1] == "NNS"):
                    sentence += tokens_pos[j + 1][0] + " "
                    dict_word[tokens_pos[j + 1][0]] = 1
                    j = j + 1
                sentence = sentence[:-1]
                for wor in sentence.split():
                    word_to_token[wor] = sentence
                list.append(sentence)
            #last noun
            elif unique[1] == "NNP" or unique[1] == "NN" or unique[1] == "NNS":
                if (dict_word[unique[0]]) == 0:
                    list.append(unique[0])
            elif unique[1] == "JJ" and '-' in unique[0]:
                list.append(unique[0])
            elif unique[0].lower() in cuisine:
                list.append(unique[0])
    #last noun of sentence
    if tokens_pos[len(tokens_pos) - 1][1] == "NNP" or tokens_pos[
            len(tokens_pos) - 1][1] == "NN" or tokens_pos[len(tokens_pos) -
                                                          1][1] == "NNS":
        if dict_word[tokens_pos[len(tokens_pos) - 1][0]] == 0:
            list.append(tokens_pos[len(tokens_pos) - 1][0])
    #check quantity
    for i, unique in enumerate(tokens_pos):
        if unique[1] == "CD":
            quant_list.append(unique[0])
    new1_list = []
    original_sentence1 = original_sentence

    for key in quant_list:
        idx = original_sentence1.find(key)
        new1_list.append((idx, key))
        #replace the number with some symbol
        original_sentence1 = original_sentence1[:
                                                idx] + "$" + original_sentence1[
                                                    idx + 1:]

    # print(list)
    for element in new_sentence.split(" "):
        if element.lower() in also_check and dict_word[element] == 0:
            list.append(element)
    remove_list = []
    #new_sentence list is subtree
    #mereko pta nhi neeche kya ho rha hai
    for element2 in new_sentence_list:
        for unique2 in list:
            if element2 in unique2:
                #we want to remove proper substring
                if not element2 == unique2:
                    remove_list.append(element2)
    #Remove all elements in remove_list if they are in new_sentence_list
    for element in remove_list:
        if element in new_sentence_list:
            new_sentence_list.remove(element)

    final_list = list + new_sentence_list
    #remove common parts
    set1 = set(final_list)
    new_list = []
    for element in set1:
        new_list.append(element)
    if " " in new_list:
        new_list.remove(" ")
    if '' in new_list:
        new_list.remove('')
    top_list = []
    #if elements of new_list are not in blacklist, then append them in top_list
    for element in new_list:
        if element.lower() not in blacklist1 and element not in blacklist1:
            top_list.append(element.lower())
    check_dict = {}
    check_list = []
    for element in top_list:
        #order is a noun, so remove it
        if "order " in element:
            element1 = element.replace("order ", "")
            top_list.remove(element)
            top_list.append(element1)
    sep_list = []
    food_dict, transform_dict = main_check(top_list)
    print(food_dict)
    nf_list = []
    for key in food_dict:
        idx = original_sentence.find(transform_dict[key])
        nf_list.append((idx, key))
    new1_list.sort(reverse=True)
    nf_list.sort()
    check_list = []
    found_dict = {}

    pre_dict = {}
    for element in tak_list:
        for food in nf_list:
            if element[0] < food[0]:
                pre_dict[food[1]] = element
                break

    c_dict = {}
    for i, element in enumerate(nf_list):
        flag = 0
        for indiv in new1_list:
            if indiv[0] < element[0] and indiv not in check_list:
                c_dict[element] = indiv
                check_list.append(indiv)
                flag = 1
                break
        if flag == 0:
            c_dict[element] = (-1, -1)
    final_dict = {}
    element_list = []
    for element in nf_list:
        check = 0
        for word in element[1].split():
            if word.endswith('s'):
                check = 1
                break
        if check == 0:
            element_list.append(element)

    for i, element in enumerate(nf_list):
        if i == 0:
            if c_dict[element] == (-1, -1):
                if inflect.singular_noun(element[1].split(
                )[0]) is False or element in element_list or element[
                        1] in pre_dict:
                    final_dict[element[1]] = 1
                else:
                    final_dict[element[1]] = 2
            else:
                check_list.append(c_dict[element])
                final_dict[element[1]] = c_dict[element][1]

        else:
            if c_dict[element] == (-1, -1):
                if inflect.singular_noun(element[1].split(
                )[0]) is False or element in element_list or element[
                        1] in pre_dict:
                    final_dict[element[1]] = 1
                else:
                    final_dict[element[1]] = 2
            # elif c_dict[element] in check_list:
            #     if inflect.singular_noun(element[1]) is False or element  in element_list:
            #         final_dict[element[1]]=1
            #     else:
            #         final_dict[element[1]]=2
            else:
                final_dict[element[1]] = c_dict[element][1]
    new_food_dict = {}
    for element in food_dict:
        new_food_dict[element] = {}
        new_food_dict[element]["attribute"] = []
        new_food_dict[element]["quantity"] = 0
        new_food_dict[element]["topping"] = []

    for element in food_dict:
        for item in food_dict[element]:
            if item in top:
                new_food_dict[element]["topping"].append(item)
            elif item in att:
                new_food_dict[element]["attribute"].append(item)
            else:
                new_food_dict[element]["attribute"].append(item)
    for element in new_food_dict:
        if element in final_dict:
            new_food_dict[element]["quantity"] = final_dict[element]

    print(new_food_dict)

    print(final_dict)
    # print(number_list)
    return top_list, new_food_dict, transform_dict, number_list
Esempio n. 26
0
def singlar_or_plural(a, d):
    aflag = inflect.singular_noun(a) == a
    dflag = inflect.singular_noun(d) == d
    return aflag == dflag
Esempio n. 27
0
import inflect

inflect = inflect.engine()

foods = ["strawberries", "apple", "banana pancake", "pineapple"]

for food in foods:
    if inflect.singular_noun(food) is False:
        food = inflect.plural(food)
        print(food)
    else:
        print(food)

Esempio n. 28
0
def is_are(f):
    if inflect.singular_noun(f):
        return 'are'
    return 'is'
Esempio n. 29
0
            c_dict[element]=(-1,-1)
    final_dict={}
    element_list=[]
    for element in nf_list:
        check=0
        for word in element[1].split():
            if word.endswith('s'):
                check=1
                break
        if check==0:
            element_list.append(element)

    for i,element in enumerate(nf_list):
        if i==0:
            if c_dict[element]==(-1,-1):
                if inflect.singular_noun(element[1].split()[0]) is False or element in element_list or element[1] in pre_dict:
                    final_dict[element[1]]=1
                else:
                    final_dict[element[1]]=2
            else:
                check_list.append(c_dict[element])
                final_dict[element[1]] = c_dict[element][1]

        else:
            if c_dict[element]==(-1,-1):
                if inflect.singular_noun(element[1].split()[0]) is False or element in element_list or element[1] in pre_dict:
                    final_dict[element[1]]=1
                else:
                    final_dict[element[1]]=2
            # elif c_dict[element] in check_list:
            #     if inflect.singular_noun(element[1]) is False or element  in element_list:
Esempio n. 30
0
def return_entities(sentence):
    # sentence="so I would like to travel from Gorakhpur to Mumbai on monday.show me the flights."
    sentence=sentence.lower()
    sentence=sentence.replace("."," . ")
    sentence=sentence.replace(","," , ")
    if(sentence.find("'s") is not -1):
        sentence=sentence.replace("'s"," ")
    if (sentence.find("'ll") is not -1):
        sentence = sentence.replace("'ll", " will")
    if (sentence.find(" 'nd") is not -1):
        sentence = sentence.replace("'nd", " ")
    if (sentence.find(" 'rd") is not -1):
        sentence = sentence.replace("'rd", " ")
    if (sentence.find("!") is not -1):
        sentence = sentence.replace("!", " ")
    if (sentence.find("'t") is not -1):
        sentence = sentence.replace("'t", "t")
    sentence = ' '.join(sentence.split())
    check_len=sentence.split(" ")
    check_l=len(check_len)
    sentence=sentence.replace("?","")
    sentence=changew_n(sentence)
    original_sentence = sentence
    token_dict={}
    tok=word_tokenize(sentence)
    l_sentence=sentence
    pos=pos_tag(tok)      ## tagging initially to ignore the list of adjectives
    print(pos)
    chunking=nltk.ne_chunk(pos, binary=True)
    # chunking.draw()
    adjectives=[]
    check_pos=["NNP","NN","NNS","RB","JJ","IN"]
    non_list=[]
    sing_list=[]
    check_list=["along"]
    comma_check=[]
    idx=0
    for index,entity in enumerate(pos):
            idx+=1
            if idx<=len(pos)-1:
                if entity[1]=="JJ" and '-' not in entity[0] and entity[0].lower() not in cuisine and pos[index+1][1] not in check_pos:
                    adjectives.append(entity[0])
                elif entity[1]=="CD":
                    non_list.append(entity[0])
                elif entity[1]=="DT":
                    check_list.append(entity[0])
                    sing_list.append(" "+entity[0]+" ")
                if entity[0]==',' and pos[index+1][0] in top and pos[index-1][0] in top:
                    check_list.append(entity[0])
                    comma_check.append(index)
                elif entity[0]=='and' and pos[index+1][0] in top and pos[index-1][0] in top:
                    comma_check.append(index)
                    check_list.append(entity[0])

    tak_list={}
    for element in sing_list:
        if element==' a ' or element==' an ':
            for match in re.finditer(element, sentence):
                tak_list[(match.start(),element)]=1


    new_sentence=""
    for idx,wor in enumerate(sentence.split(' ')):
        try:
            tmp = wn.synsets(wor)[0].pos()  ## dont want to capitalise verbs
        except Exception as ex:
            print(ex)
            tmp = "r"
        if wor.lower() in check_list:
            if wor.lower()=="," or wor.lower()=="and":
                if idx in comma_check:
                    continue
                else:
                    new_sentence = new_sentence + wor.lower() + " "
            else:
                continue
        elif  wor in adjectives or wor in non_list:  ## if word is verb or adjective it is not being capitalised
            wor = wor.lower()
            new_sentence = new_sentence + wor + " "
        elif wor in blacklist1:  ## also dont want to capitalise stopwords
            if wor[:1].isupper():
                new_sentence = new_sentence + wor.lower() + " "
            else:
                new_sentence = new_sentence + wor + " "
        elif wor.lower() in blacklist1:
            new_sentence = new_sentence + wor.lower() + " "
        else:
            new_sentence = new_sentence + wor[:1].upper() + wor[1:] + " "  ## else capitalise all other words
    new_sentence = ' '.join(new_sentence.split())
    if new_sentence.endswith('.'):
        new_sentence=new_sentence[:-1]
    # new_sentence.replace("."," . ")
    check= '.' in new_sentence
    tok=word_tokenize(new_sentence)
    tokens_pos=pos_tag(tok)
    chunking=nltk.ne_chunk(tokens_pos, binary=True)
    new_sentence_list =  [" ".join(w for w, t in elt) for elt in chunking if isinstance(elt, nltk.Tree)]
    print(new_sentence_list)
    chunking.draw()
    list=[]
    dict_word={}
    print(new_sentence)
    for word in new_sentence.split(" "):
        dict_word[word]=0
    print(dict_word)
    word_to_token={}
    for word in new_sentence.split(" "):
        word_to_token[word]=word

    # for member in new_sentence_list:
    #     for single_word in member.split(" "):
    #         dict_word[single_word]=1
    stop_list=["with","of"]
    quant_list=[]
    for i, unique in enumerate(tokens_pos):
        sentence=""
        if i< len(tokens_pos)-1:
            if (unique[1] == "NNP" or unique[1]== "NN") and (tokens_pos[i+1][1]=="NNP" or tokens_pos[i+1][1]=="NNS" or tokens_pos[i+1][1]=="NN" or tokens_pos[i+1][1]=="NNPS"):
                j=i
                loopno=1
                enter=0
                print(unique[0])
                while j<len(tokens_pos)-1 and (tokens_pos[j][1]== "NNP" or tokens_pos[j][1]=="NN") and (tokens_pos[j+ 1][1] == "NNP" or tokens_pos[j + 1][1] == "NNS" or tokens_pos[j+1][1]=="NN" or tokens_pos[i+1][1]=="NNPS"):
                    print("inside")
                    enter=enter+1
                    if dict_word[tokens_pos[j][0]]==0 and enter==1:
                        sentence+=tokens_pos[j][0]+ " "
                        dict_word[tokens_pos[j][0]]=1
                        sentence+=tokens_pos[j+1][0]+ " "
                        dict_word[tokens_pos[j+1][0]] = 1
                    elif enter!=1 and dict_word[tokens_pos[j + 1][0]]==0:
                        sentence += tokens_pos[j+1][0] + " "
                        dict_word[tokens_pos[j+1][0]] = 1
                    j=j+1

                sentence=sentence[:-1]
                for wor in sentence.split():
                    word_to_token[wor]=sentence
                dict_word[tokens_pos[j][0]]=1
                list.append(sentence)
            elif i<len(tokens_pos)-2 and (unique[1]== "NNP" or unique[1]=="NN" or unique[1]=="NNS") and tokens_pos[i+1][0] in stop_list and (tokens_pos[i+2][1]=="NNP" or tokens_pos[i+2][1]=="NN" or tokens_pos[i+2][1]=="NNS"):
                j=i
                dict_word[tokens_pos[j][0]]=1
                dict_word[tokens_pos[j+1][0]]=1
                dict_word[tokens_pos[j+2][0]]=1
                sentence+= word_to_token[tokens_pos[j][0]]+" "
                if word_to_token[tokens_pos[j][0]] in list:
                    list.remove(word_to_token[tokens_pos[j][0]])
                sentence += tokens_pos[j+1][0]+ " "
                sentence += tokens_pos[j+2][0]+ " "
                j=j+2
                while j < len(tokens_pos) - 1 and (tokens_pos[j][1] == "NNP" or tokens_pos[j][1]=="NN") and (tokens_pos[j + 1][1] == "NNP" or tokens_pos[j + 1][1] == "NNS"):
                    sentence+= tokens_pos[j+1][0]+ " "
                    dict_word[tokens_pos[j + 1][0]] = 1
                    j=j+1
                sentence=sentence[:-1]
                for wor in sentence.split():
                    word_to_token[wor]=sentence
                list.append(sentence)

            elif unique[1]=="NNP" or unique[1]=="NN" or unique[1]=="NNS" :
                if(dict_word[unique[0]])==0:
                    list.append(unique[0])
            elif unique[1]=="JJ" and '-' in unique[0]:
                list.append(unique[0])
            elif unique[0].lower() in cuisine:
                list.append(unique[0])

    if tokens_pos[len(tokens_pos)-1][1]=="NNP" or tokens_pos[len(tokens_pos)-1][1]=="NN" or tokens_pos[len(tokens_pos)-1][1]=="NNS":
        if dict_word[tokens_pos[len(tokens_pos)-1][0]]==0:
            list.append(tokens_pos[len(tokens_pos)-1][0])
    for i, unique in enumerate(tokens_pos):
        if unique[1]=="CD":
            quant_list.append(unique[0])
    new1_list=[]
    for key in quant_list:
        idx=original_sentence.find(key)
        new1_list.append((idx,key))

    # print(list)
    for element in new_sentence.split(" "):
        if element.lower() in also_check and dict_word[element]==0:
            list.append(element)
    remove_list=[]
    for element2 in new_sentence_list:
        for unique2 in list:
            if element2 in unique2 :
                if not element2==unique2:
                    remove_list.append(element2)
    for element in remove_list:
        if element in new_sentence_list:
            new_sentence_list.remove(element)

    final_list=list+new_sentence_list
    set1=set(final_list)
    new_list=[]
    for element in set1:
        new_list.append(element)
    if " " in new_list:
        new_list.remove(" ")
    if '' in new_list:
        new_list.remove('')
    top_list=[]
    for element in new_list:
        if element.lower() not in blacklist1 and element not in blacklist1:
            top_list.append(element.lower())
    check_dict={}
    check_list=[]
    for element in top_list:
        if "order " in element:
            element1=element.replace("order ","")
            top_list.remove(element)
            top_list.append(element1)
    sep_list=[]
    food_dict, transform_dict=main_check(top_list)
    print(food_dict)
    nf_list=[]
    for key in food_dict:
        idx=original_sentence.find(transform_dict[key])
        nf_list.append((idx,key))
    new1_list.sort(reverse=True)
    nf_list.sort()
    check_list=[]
    found_dict={}

    pre_dict={}
    for element in tak_list:
        for food in nf_list:
            if element[0]<food[0]:
                pre_dict[food[1]]=element
                break




    c_dict={}
    for i,element in enumerate(nf_list):
        flag=0
        for indiv in new1_list:
            if indiv[0]<element[0] and indiv not in check_list:
                c_dict[element]=indiv
                check_list.append(indiv)
                flag=1
                break
        if flag==0:
            c_dict[element]=(-1,-1)
    final_dict={}
    element_list=[]
    for element in nf_list:
        check=0
        for word in element[1].split():
            if word.endswith('s'):
                check=1
                break
        if check==0:
            element_list.append(element)

    for i,element in enumerate(nf_list):
        if i==0:
            if c_dict[element]==(-1,-1):
                if inflect.singular_noun(element[1].split()[0]) is False or element in element_list or element[1] in pre_dict:
                    final_dict[element[1]]=1
                else:
                    final_dict[element[1]]=2
            else:
                check_list.append(c_dict[element])
                final_dict[element[1]] = c_dict[element][1]

        else:
            if c_dict[element]==(-1,-1):
                if inflect.singular_noun(element[1].split()[0]) is False or element in element_list or element[1] in pre_dict:
                    final_dict[element[1]]=1
                else:
                    final_dict[element[1]]=2
            # elif c_dict[element] in check_list:
            #     if inflect.singular_noun(element[1]) is False or element  in element_list:
            #         final_dict[element[1]]=1
            #     else:
            #         final_dict[element[1]]=2
            else:
                final_dict[element[1]]=c_dict[element][1]
    new_food_dict={}
    for element in food_dict:
        new_food_dict[element]={}
        new_food_dict[element]["attribute"]=[]
        new_food_dict[element]["quantity"]=0
        new_food_dict[element]["topping"]=[]

    for element in food_dict:
        for item in food_dict[element]:
            if item in top:
                new_food_dict[element]["topping"].append(item)
            elif item in att:
                new_food_dict[element]["attribute"].append(item)
            else:
                new_food_dict[element]["attribute"].append(item)
    for element in new_food_dict:
        if element in final_dict:
            new_food_dict[element]["quantity"]=final_dict[element]

    print(new_food_dict)


    print(final_dict)
    return top_list, new_food_dict , transform_dict