Ejemplo n.º 1
0
 def test_modality(self):
     # Assert -1.0 => +1.0 representing the degree of certainty.
     v = en.modality(en.Sentence(en.parse("I wish it would stop raining.")))
     self.assertTrue(v < 0)
     v = en.modality(
         en.Sentence(en.parse("It will surely stop raining soon.")))
     self.assertTrue(v > 0)
     # Assert the accuracy of the modality algorithm.
     # Given are the scores for the CoNLL-2010 Shared Task 1 Wikipedia uncertainty data:
     # http://www.inf.u-szeged.hu/rgai/conll2010st/tasks.html#task1
     # The baseline should increase (not decrease) when the algorithm is modified.
     from pattern.db import Datasheet
     from pattern.metrics import test
     sentences = []
     for certain, sentence in Datasheet.load(
             os.path.join(PATH, "corpora", "uncertainty-conll2010.csv")):
         sentence = en.parse(sentence, chunks=False, light=True)
         sentence = en.Sentence(sentence)
         sentences.append((sentence, int(certain) > 0))
     A, P, R, F = test(lambda sentence: en.modality(sentence) > 0.5,
                       sentences)
     #print A, P, R, F
     self.assertTrue(A > 0.69)
     self.assertTrue(P > 0.71)
     self.assertTrue(R > 0.64)
     self.assertTrue(F > 0.67)
     print "pattern.en.modality()"
Ejemplo n.º 2
0
 def test_mood(self):
     # Assert imperative mood.
     v = en.mood(en.Sentence(en.parse("Do your homework!")))
     self.assertEqual(v, en.IMPERATIVE)
     # Assert conditional mood.
     v = en.mood(en.Sentence(en.parse("We ought to help him.")))
     self.assertEqual(v, en.CONDITIONAL)
     # Assert subjunctive mood.
     v = en.mood(en.Sentence(en.parse("I wouldn't do that if I were you.")))
     self.assertEqual(v, en.SUBJUNCTIVE)
     # Assert indicative mood.
     v = en.mood(en.Sentence(en.parse("The weather is nice today.")))
     self.assertEqual(v, en.INDICATIVE)
     print("pattern.en.mood()")
Ejemplo n.º 3
0
 def test_negated(self):
     # Assert True for sentences that contain "not", "n't" or "never".
     for b, s in (
       (True, "Not true?"),
       (True, "Never true."),
       (True, "Isn't true."),):
         self.assertEqual(en.negated(en.Sentence(en.parse(s))), b)
     print("pattern.en.negated()")
Ejemplo n.º 4
0
 def test_conditional(self):
     # Assert True for sentences that contain possible or imaginary situations.
     from pattern.text.en.modality import conditional
     for b, s in ((True, "We ought to help him."),
                  (True, "We could help him."), (True, "I will help you."),
                  (True, "I hope you will help me."),
                  (True, "I can help you if you let me."),
                  (False, "You will help me."), (False, "I can help you.")):
         self.assertEqual(conditional(en.Sentence(en.parse(s))), b)
     # Assert predictive mood.
     s = "I will help you."
     v = conditional(en.Sentence(en.parse(s)), predictive=False)
     self.assertEqual(v, False)
     # Assert speculative mood.
     s = "I will help you if you pay me."
     v = conditional(en.Sentence(en.parse(s)), predictive=False)
     self.assertEqual(v, True)
     print "pattern.en.modality.conditional()"
Ejemplo n.º 5
0
 def test_subjunctive(self):
     # Assert True for sentences that contain wishes, judgments or opinions.
     from pattern.text.en.modality import subjunctive
     for b, s in ((True, "I wouldn't do that if I were you."),
                  (True, "I wish I knew."),
                  (True, "I propose that you be on time."),
                  (True, "It is a bad idea to be late."),
                  (False, "I will be late.")):
         self.assertEqual(subjunctive(en.Sentence(en.parse(s))), b)
     print "pattern.en.modality.subjunctive()"
Ejemplo n.º 6
0
 def test_word_custom_tags(self):
     # Assert word custom tags ("word/part-of-speech/.../some-custom-tag").
     s = en.Sentence("onion/NN/FOOD", token=[en.WORD, en.POS, "semantic_type"])
     v = s.words[0]
     self.assertEqual(v.semantic_type, "FOOD")
     self.assertEqual(v.custom_tags["semantic_type"], "FOOD")
     self.assertEqual(v.copy().custom_tags["semantic_type"], "FOOD")
     # Assert addition of new custom tags.
     v.custom_tags["taste"] = "pungent"
     self.assertEqual(s.token, [en.WORD, en.POS, "semantic_type", "taste"])
     print("pattern.en.Word.custom_tags")
Ejemplo n.º 7
0
 def test_imperative(self):
     # Assert True for sentences that are orders, commands, warnings.
     from pattern.text.en.modality import imperative
     for b, s in ((True, "Do your homework!"), (True,
                                                "Do not listen to me."),
                  (True, "Turn that off, will you."), (True,
                                                       "Let's help him."),
                  (True, "Help me!"), (True, "You will help me."),
                  (False, "Do it if you think it is necessary."),
                  (False, "I hope you will help me."), (False,
                                                        "I can help you."),
                  (False, "I can help you if you let me.")):
         self.assertEqual(imperative(en.Sentence(en.parse(s))), b)
     print "pattern.en.modality.imperative()"
Ejemplo n.º 8
0
    def add_keywords(self, phrase):

        sent = en.Sentence(en.parse(phrase))
        nouns = search('NN', sent)
        self.blackboard.pool.nouns.update(
            set(Word(en.singularize(n[0].string)) for n in nouns))
        adjs = search('JJ', sent)
        self.blackboard.pool.adjectives.update(
            set(Word(en.lemma(a[0].string)) for a in adjs))

        try:
            nps = search('NP', sent)
            for np in nps:
                self.blackboard.pool.epithets.update({
                    Word(en.singularize(w.string), "NN"):
                    [Word(jj.string, "JJ") for jj in np if "JJ" in jj.tag]
                    for w in np if "NN" in w.tag
                })
        except IndexError:
            pass
Ejemplo n.º 9
0
    def process(self, message):
        # print pattern_en.suggest(message) -- suggestions
        if message == ">!train":
            self.train()
            return "It is nice to learn new stuff."
        if message == ">!forget":
            memory.clear()
            return "I am reborn. So much free space :) maybe you will use files to store memory and not RAM..."
        if message == ">!load_page":
            if sessionId not in memory:
                response = "Hello! My name is Chad and I am passionate about music."
                response += "We can share our experiences and maybe we can get along."
                response += "Would you mind telling me your name first?"
                expect[sessionId] = "name"
                memory[sessionId] = dict()
            else:
                response = "Welcome back!"
                search.search("new songs")
                with open('results.json') as data_file:
                    data = json.load(data_file)
                    for i in range(10):
                        if 'musicrecording' in data['items'][i]['pagemap']:
                            mr = data['items'][i]['pagemap']['musicrecording']
                            which = random.randint(0, len(mr) - 1)
                            if 'name' not in mr[which]:
                                response += " Did you know that " + mr[which][
                                    'byartist'] + " has released a new song?"
                            else:
                                response += " You can check out this cool song, " + mr[which]['name'] + ", by " + \
                                            mr[which]['byartist']
            return response

        s = nlp.get_sentences(message)

        doc = spacy_nlp(message)
        for w in doc:
            print "(", w, w.dep_, w.pos_, w.head, ")"

        aiml_sent_type = []
        aiml_responses = []
        memory_responses = []
        sentence_types = []
        emotions = []

        for sentence in s:
            sentence_type = self.instant_classifier.classify(
                dialogue_act_features(sentence))

            sentence_types.append(sentence_type)

            polarity, subjective = pattern_en.sentiment(sentence)
            sent = pattern_en.parse(sentence, lemmata=True)
            sent = pattern_en.Sentence(sent)
            modality = pattern_en.modality(sent)
            mood = pattern_en.mood(sent)

            if polarity > 0.8:
                emotions.append("SUPER HAPPY")
            elif polarity > 0.3:
                emotions.append("GOOD SURPRISE")
            elif polarity < -0.4:
                emotions.append("FEAR")
            elif polarity > 0.4:
                emotions.append("COOL")
            elif polarity < -0.1:
                emotions.append("SAD")
            elif polarity < -0.7:
                emotions.append("ANGER")
            else:
                emotions.append("NEUTER")

            print sentence_type, polarity, subjective, modality, mood

            if sentence_type not in ["whQuestion", "ynQuestion"]:
                try:
                    aiml_sent_type_res = self.kernel.respond(
                        sentence_type, sessionId)
                except:
                    aiml_sent_type_res = ""
                aiml_sent_type.append(aiml_sent_type_res)

            verbs_subj = set()
            sentence = sentence[0].upper() + sentence[1:]
            doc = spacy_nlp(sentence)
            for possible_subject in doc:
                if (possible_subject.dep == nsubj or possible_subject.dep
                        == nsubjpass) and possible_subject.head.pos == VERB:
                    verbs_subj.add((possible_subject, possible_subject.head))

            try:
                aiml_response = self.kernel.respond(sentence, sessionId)
            except:
                aiml_response = ""
            aiml_responses.append(aiml_response)

            # MEMORY MODULE
            memory_msg = ""
            if sentence_type == "Statement":
                # insert into memory
                for i in verbs_subj:
                    subjs = []
                    subjects = [i[0]]
                    for tok in i[0].children:
                        if tok.dep == conj:
                            subjects.append(tok)

                    for subj in subjects:
                        predec = ""
                        for tok in subj.children:
                            if tok.dep_ == "poss" or tok.dep == amod:
                                predec += tok.lower_
                        if len(predec) > 0:
                            subjs.append(predec + " " + subj.lower_)
                        else:
                            subjs.append(subj.lower_)

                    vb = i[1].lower_
                    if vb not in memory[sessionId]:
                        memory[sessionId][vb] = dict()
                    for subj in subjs:
                        for c in i[1].children:
                            if c.dep in [prep]:
                                memory[sessionId][vb][subj] = c.lower_ + " "
                                for c_prep in c.children:
                                    if c_prep.dep in [dobj, pobj, attr]:
                                        memory[sessionId][vb][
                                            subj] += c_prep.text
                                        memory_responses.append(
                                            self.kernel.respond(
                                                "memorate", sessionId))
                            elif c.dep in [dobj, pobj, attr]:
                                memory[sessionId][vb][subj] = c.text
                                memory_responses.append(
                                    self.kernel.respond("memorate", sessionId))
            elif sentence_type == "whQuestion":
                for i in verbs_subj:
                    subjs = []
                    subjects = [i[0]]
                    for tok in i[0].children:
                        if tok.dep == conj:
                            subjects.append(tok)

                    for subj in subjects:
                        predec = ""
                        for tok in subj.children:
                            if tok.dep_ == "poss" or tok.dep == amod:
                                predec += tok.lower_
                        if len(predec) > 0:
                            subjs.append(predec + " " + subj.lower_)
                        else:
                            subjs.append(subj.lower_)

                    max_similarity = 0
                    verb = i[1].lower_
                    for j in memory[sessionId]:
                        p_word = spacy_nlp(j)
                        similarity = i[1].similarity(p_word[0])
                        if similarity > max_similarity:
                            max_similarity = similarity
                            verb = j
                    if max_similarity > 0.5 and verb in memory[sessionId]:
                        num_subjs = len(subjs)
                        memory_msg = ""
                        for subj in subjs:
                            if subj in memory[sessionId][verb]:
                                toks = nlp.tokenize_text(subj)
                                memory_msg = ""
                                for t in toks:
                                    if t in first_person:
                                        memory_msg += pron_translate[t] + " "
                                    else:
                                        memory_msg += t + " "
                                num_subjs -= 1
                                if num_subjs > 2:
                                    memory_msg += ", "
                                elif num_subjs == 1:
                                    memory_msg += "and "
                        if len(memory_msg) > 0:
                            memory_msg += verb + " "
                            if num_subjs != len(subjs):
                                memory_msg += memory[sessionId][verb][
                                    subjs[-1]] + "."
            memory_responses.append(memory_msg)

        arr_response = []

        for i in aiml_sent_type:
            if len(i) > 0:
                arr_response.append(i)

        for i in aiml_responses:
            if len(i) > 0:
                arr_response.append(i)

        for i in memory_responses:
            if len(i) > 0:
                arr_response.append(i)

        if len(arr_response) == 0:
            data = search.search(message)
            snip = data['items'][0]['snippet']
            sents = nlp.get_sentences(snip)
            arr_response.append(sents[0])

        response = ""

        for i in emotions:
            try:
                emoi = self.kernel.respond(i, sessionId)
            except:
                emoi = None
            if emoi is not None:
                if random.randint(0, 100) < 50:
                    response += " " + emoi + "."
                    break

        for res in arr_response:
            if len(res) > 1:
                response += res + " "

        # generic response, if no response
        restoks = nlp.tokenize_text(response)
        if len(restoks) == 0:
            idx = random.randint(0, len(sentence_types) - 1)
            try:
                aiml_response = self.kernel.respond(sentence_types[idx],
                                                    sessionId)
            except:
                aiml_response = ""
            response += aiml_response

        # polarity, subjective = pattern_en.sentiment(response)
        # sent = pattern_en.parse(sentence, lemmata=True)
        # sent = pattern_en.Sentence(sent)
        # modality = pattern_en.modality(sent)
        # mood = pattern_en.mood(sent)
        # sentence_type = self.instant_classifier.classify(dialogue_act_features(response))
        # print response, polarity, subjective, modality, mood

        return response
Ejemplo n.º 10
0
 def test_chunk_modifiers(self):
     # Assert list of nearby adjectives and adverbs with no role, for VP.
     v = en.Sentence(en.parse("Perhaps you should go."))
     self.assertEqual(v.chunk[2].modifiers, [v.chunk[0]]) # should <=> perhaps
     print("pattern.en.Chunk.modifiers")
Ejemplo n.º 11
0
 def test_chunk_conjunctions(self):
     # Assert list of conjunct/disjunct chunks ("black cat" AND "white cat").
     v = en.Sentence(en.parse("black cat and white cat"))
     self.assertEqual(v.chunk[0].conjunctions, [(v.chunk[1], en.AND)])
     print("pattern.en.Chunk.conjunctions()")
Ejemplo n.º 12
0
 def __steaming(self, sentence):
     return ' '.join(
         lemEng.Sentence(lemEng.parse(sentence, lemmata=True)).lemmata)