コード例 #1
0
def SVOS(corpus):
    tokens = nlp(corpus[1][:-1])
    svos = findSVOs(tokens)
    svos =[]
    for sent in corpus:
        tokens = nlp(sent[:-1])
        if len(findSVOs(tokens))>0:
            svos.append(findSVOs(tokens))
    return svos
コード例 #2
0
ファイル: lang_gen.py プロジェクト: UtilityHotbar/rpgtools
 def clever_translate(self, target_phrase):
     tok = subject_verb_object_extract.nlp(target_phrase)
     svos = subject_verb_object_extract.findSVOs(tok)
     vbprint(svos)
     final_sentence = []
     words_used_up = []
     if svos:
         for phrase in svos:
             for word in phrase:
                 words_used_up += word.lower().split()
             ctree = {'(S)': '', '(V)': '', '(O)': ''}
             try:
                 # Try and construct svo tree
                 ctree['(S)'] = phrase[0].split()
                 ctree['(V)'] = phrase[1].split()
                 ctree['(O)'] = phrase[2].split()
             except IndexError:
                 pass
             partial_translation_result = self.expand_phrase(ctree)
             final_sentence.append(partial_translation_result)
     else:
         # If no subject verb objects found, just brute force translate the whole thing
         return self.brute_force_translate(target_phrase)
     # If there are unused words add them as riders to the end
     additions = []
     for word in target_phrase.split():
         if word.lower() not in words_used_up:
             additions.append(word)
     final_sentence = ' '.join(final_sentence)
     final_sentence += ' '+self.brute_force_translate(' '.join(additions))
     return final_sentence
コード例 #3
0
 def test_svo_14(self):
     tok = nlp("the boy raced the girl who had a hat that had spots.")
     svos = findSVOs(tok)
     # printDeps(tok)
     self.assertTrue(
         set(svos) == {('the boy', 'raced',
                        'the girl'), ('who', 'had',
                                      'a hat'), ('a hat', 'had', 'spots')})
コード例 #4
0
 def test_svo_11(self):
     tok = nlp(
         "because he hit me and also made me so angry I wanted to kill him with a hammer."
     )
     svos = findSVOs(tok)
     # printDeps(tok)
     self.assertTrue(
         set(svos) == {('he', 'hit', 'me'), ('I', 'kill', 'him')})
コード例 #5
0
 def test_svo_1(self):
     tok = nlp("the annoying person that was my boyfriend hit me")
     svos = findSVOs(tok)
     printDeps(tok)  # just show what printDeps() does
     self.assertTrue(
         set(svos) == {('the annoying person', 'was',
                        'my boyfriend'), ('the annoying person', 'hit',
                                          'me')})
コード例 #6
0
def NER_all(text):
    nlp = en_core_web_sm.load()
    doc = nlp(text)
    NERS = []
    for ent in doc.ents:
        NERS.append((ent.text))
    NERS = list(set(NERS))
    return NERS
コード例 #7
0
 def test_svo_13(self):
     tok = nlp("he and his brother shot me and my sister")
     svos = findSVOs(tok)
     # printDeps(tok)
     self.assertTrue(
         set(svos) == {('he', 'shot', 'me'), (
             'he', 'shot',
             'my sister'), ('his brother', 'shot',
                            'me'), ('his brother', 'shot', 'my sister')})
コード例 #8
0
 def test_svo_2(self):
     tok = nlp(
         "making $12 an hour? where am i going to go? I have no other financial assistance available and he certainly won't provide support."
     )
     svos = findSVOs(tok)
     # printDeps(tok)
     self.assertTrue(
         set(svos) == {('I', '!have', 'other financial assistance available'
                        ), ('he', '!provide', 'support')})
コード例 #9
0
 def test_svo_8(self):
     tok = nlp("he is an evil man that hurt my child and sister")
     svos = findSVOs(tok)
     # printDeps(tok)
     self.assertTrue(
         set(svos) == {('he', 'is',
                        'an evil man'), (
                            'an evil man', 'hurt',
                            'my child'), ('an evil man', 'hurt', 'sister')})
コード例 #10
0
 def test_svo_6(self):
     tok = nlp(
         "I have no other financial assistance available, and he certainly won't provide support."
     )
     svos = findSVOs(tok)
     # printDeps(tok)
     self.assertTrue(
         set(svos) == {('I', '!have', 'other financial assistance available'
                        ), ('he', '!provide', 'support')})
コード例 #11
0
def extractSVO(dir_path):
    if os.path.isdir(dir_path):
        print("Extracting Files from " + str(dir_path))
        for file in os.listdir(dir_path):
            if file.endswith(".txt"):
                file_string = open(os.path.join(dir_path, file)).read()
                tokens = nlp(file_string)
                svos = findSVOs(tokens)
                tuplesToFile(svos)

    else:
        print(dir_path + " is not a directory")
コード例 #12
0
 def test_svo_25(self):
     tok = nlp(
         "Seated in Mission Control, Chris Kraft neared the end of a tedious Friday afternoon as he monitored a seemingly interminable ground test of the Apollo 1 spacecraft."
     )
     # printDeps(tok)
     svos = findSVOs(tok)
     self.assertTrue(
         set(svos) ==
         {('Chris Kraft', 'neared',
           'the end of a tedious Friday afternoon'),
          ('he', 'monitored',
           'a interminable ground test of the Apollo spacecraft')})
コード例 #13
0
def extractSVO(dir_path):
    if os.path.isdir(dir_path):
        logging.info("Extracting Files from " + str(dir_path))
        for file in os.listdir(dir_path):
            if file.endswith(".txt"):
                logging.info("Parsing file {}".format(file))
                file_string = open(os.path.join(dir_path, file)).read()
                tokens = nlp(file_string)
                svos = findSVOs(tokens)
                tuplesToFile(svos)
            else:
                logging.info("Skipping file {}".format(file))

    else:
        logging.fatal(dir_path + " is not a directory")
コード例 #14
0
ファイル: lang_gen.py プロジェクト: UtilityHotbar/rpgtools
 def process_word(self, word):
     return subject_verb_object_extract.nlp(word)
コード例 #15
0
 def test_svo_10(self):
     tok = nlp("I wanted to kill him with a hammer.")
     svos = findSVOs(tok)
     # printDeps(tok)
     self.assertTrue(set(svos) == {('I', 'kill', 'him')})
コード例 #16
0
 def test_svo_9(self):
     tok = nlp(
         "he told me i would die alone with nothing but my career someday")
     svos = findSVOs(tok)
     # printDeps(tok)
     self.assertTrue(set(svos) == {('he', 'told', 'me')})
コード例 #17
0
 def test_svo_22(self):
     tok = nlp("he beat and hurt me")
     # printDeps(tok)
     svos = findSVOs(tok)
     self.assertTrue(
         set(svos) == {('he', 'beat', 'me'), ('he', 'hurt', 'me')})
コード例 #18
0
import sys
from subject_verb_object_extract import findSVOs, nlp

str1 = ''
for word in sys.argv[1:]:
    str1 += word + ' '

# str1 = "Then there’s a development setback on top of that that pushes you even further back."

tokens1 = nlp(str1)
svos1 = findSVOs(tokens1)
print(svos1)
コード例 #19
0
 def test_svo_4(self):
     tok = nlp("They ate the pizza with anchovies.")
     svos = findSVOs(tok)
     # printDeps(tok)
     self.assertTrue(set(svos) == {('They', 'ate', 'the pizza')})
コード例 #20
0
from subject_verb_object_extract import findSVOs, printDeps, nlp

tok = nlp("expert spacy users are very kind to dogs")
svos = findSVOs(tok)
printDeps(tok)
print(svos)

コード例 #21
0
 def test_svo_16(self):
     tok = nlp("he didn't spit on me")
     svos = findSVOs(tok)
     # printDeps(tok)
     self.assertTrue(set(svos) == {('he', '!spit', 'me')})
コード例 #22
0
 def test_svo_24(self):
     tok = nlp("lessons were taken by me")
     # printDeps(tok)
     svos = findSVOs(tok)
     self.assertTrue(set(svos) == {('me', 'take', 'lessons')})
コード例 #23
0
 def test_svo_23(self):
     tok = nlp("I was beaten by him")
     # printDeps(tok)
     svos = findSVOs(tok)
     self.assertTrue(set(svos) == {('him', 'beat', 'I')})
コード例 #24
0
from subject_verb_object_extract import findSVOs, nlp

str1 = "Then there’s a development setback on top of that that pushes you even further back."
str2 = "And that goes with that we’re going to do things differently, but we haven’t done that yet."
str3 = "Seated in Mission Control, Chris Kraft neared the end of a tedious Friday afternoon as he monitored a " \
       "seemingly interminable ground test of the Apollo 1 spacecraft."

tokens1 = nlp(str1)
svos1 = findSVOs(tokens1)
print("\n1")
print(str1)
print(svos1)

tokens2 = nlp(str2)
svos2 = findSVOs(tokens2)
print("\n2")
print(str2)
print(svos2)

tokens3 = nlp(str3)
svos3 = findSVOs(tokens3)
print("\n3")
print(str3)
print(svos3)
コード例 #25
0
from subject_verb_object_extract import findSVOs, printDeps, nlp

tok = nlp("expert spacy users are very kind to dogs")
svos = findSVOs(tok)
printDeps(tok)
print(svos)

tok = nlp("both sides should understand that")
svos = findSVOs(tok)
printDeps(tok)
print(svos)
コード例 #26
0
 def test_svo_7(self):
     tok = nlp("he did not kill me")
     svos = findSVOs(tok)
     # printDeps(tok)
     self.assertTrue(set(svos) == {('he', '!kill', 'me')})
コード例 #27
0
ファイル: role_generator.py プロジェクト: ksatvat/EXTRACTOR
def roles(sentences):
    my_svo_triplet = []
    all_nodes = []
    for i in range(len(sentences)):
        # public SRL model https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2018.05.25.tar.gz
        predictor = Predictor.from_path("srl-model.tar.gz")
        predictions = predictor.predict(sentences[i])
        lst = []
        nodes = []
        for k in predictions['verbs']:
            if k['description'].count('[') > 1:
                lst.append(k['description'])
        for jj in range(len(lst)):
            nodes.append([])
            for j in re.findall(r"[^[]*\[([^]]*)\]", lst[jj]):
                nodes[jj].append(j)
        print("*****sentence:", sentences[i], '*****nodes: ', nodes)

        for lis_ in nodes:
            for indx in range(len(lis_)):
                if lis_[0].split(
                        ":", 1)[0].lower().strip() == "v" and lis_[0].split(
                            ":", 1)[1].lower().strip() in main_verbs:
                    n = len(lis_)
                    for j in range(1, len(lis_)):
                        if lis_[j].split(":", 1)[0].lower() != "v":
                            if len(iocs.list_of_iocs(lis_[j].split(":",
                                                                   1)[1])) > 0:
                                lis_.insert(0, " ARG-NEW: *")

        maxlength = 0
        if nodes:
            maxlength = max((len(i) for i in nodes))
        if nodes == [] or maxlength < 3:
            print("****DP SVO****")
            tokens = nlp(sentences[i])
            svos = findSVOs(tokens)
            if svos:
                for sv in range(len(svos)):
                    if len(svos[sv]) == 3:
                        print('Dependency SVO(s):', [
                            "ARG0: " + svos[sv][0], "V: " + svos[sv][1],
                            "ARG1: " + svos[sv][2]
                        ])
                        nodes.append([
                            "ARG0: " + svos[sv][0], "V: " + svos[sv][1],
                            "ARG1: " + svos[sv][2]
                        ])
            print("Dependency-SVO added nodes: ", nodes)

            print("****Naive SVO****")
            breakers = []
            subj, obj = '', ''
            doc = nlp(sentences[i])
            for token in doc:
                if token.pos_ == 'VERB':
                    breakers.append(str(token))
            if len(breakers) != 0:
                for vb in breakers:
                    subj = "subj: " + sentences[i].split(vb)[0]
                    obj = "obj: " + sentences[i].split(vb)[1]
                    vrb = "v: " + vb
                    lst = []
                    lst.append(subj)
                    lst.append(vrb)
                    lst.append(obj)
                    nodes.append(lst)
            print("Naive Nodes: ", nodes)

        if nodes != []:
            zero_dunplicate_removed = []
            for i in nodes:
                zero_dunplicate_removed.append(list(dict.fromkeys(i)))
            no_zero_nodes = []
            for i in zero_dunplicate_removed:
                if '.' in i:
                    i.remove('.')
                    no_zero_nodes.append(i)
                else:
                    no_zero_nodes = zero_dunplicate_removed

            no_zero_nodes_plus_3 = []
            for i in no_zero_nodes:
                if len(i) > 2:
                    no_zero_nodes_plus_3.append(i)

            removeable_items_list = [
                'both', 'also', 'that', 'would', 'could', 'immediately',
                'usually', 'for', 'when', 'then', 'will', 'which', 'first',
                'second', 'third', 'forth', 'fifth', 'internally', 'where',
                'while', 'either', 'nither', 'when', 'sever', 'successfully',
                'also', 'to', 'above', 'already', 'recently', 'may', 'however',
                'can', 'once loaded', 'in fact', 'in this way', 'all',
                'actually', 'inadvertently', 'instead',
                'when copying themselves', 'automatically', 'should', 'can',
                'could', 'necessarily', 'if found', 'randomly', 'again',
                'still', 'generally', 'slowly', 'ever', 'shall', 'newly',
                'However', 'when executed', 'subsequently'
            ]

            #lammarizer
            for i in range(len(no_zero_nodes_plus_3)):
                for index, item in enumerate(no_zero_nodes_plus_3[i]):
                    if item.split(': ')[0] == 'V':
                        word = item.split(': ')[1]
                        no_zero_nodes_plus_3[i][
                            index] = "V: " + WordNetLemmatizer().lemmatize(
                                item.split(": ")[1].lower(), 'v')

            for i in range(len(no_zero_nodes_plus_3)):

                if no_zero_nodes_plus_3[i]:
                    for index, item in enumerate(no_zero_nodes_plus_3[i]):

                        if 'ARGM-MOD:' in item:
                            if item.split(
                                    ': ',
                                    1)[1].lower() in removeable_items_list:
                                del no_zero_nodes_plus_3[i][index]
                            else:
                                print("##### NEW Exception: ", item)

                        if 'ARGM-ADV:' in item:
                            if item.split(
                                    ': ',
                                    1)[1].lower() in removeable_items_list:
                                del no_zero_nodes_plus_3[i][index]
                            else:
                                print("##### NEW Exception: ", item)

                        if 'ARGM-TMP:' in item:
                            if item.split(
                                    ': ',
                                    1)[1].lower() in removeable_items_list:
                                del no_zero_nodes_plus_3[i][index]
                            else:
                                print("##### NEW Exception: ", item)

                        if 'ARGM-MNR:' in item:
                            if item.split(
                                    ': ',
                                    1)[1].lower() in removeable_items_list:
                                del no_zero_nodes_plus_3[i][index]
                            else:
                                print("##### NEW Exception: ", item)

                        if 'R-ARG1:' in item:
                            if item.split(
                                    ': ',
                                    1)[1].lower() in removeable_items_list:
                                del no_zero_nodes_plus_3[i][index]
                            else:
                                print("##### NEW Exception: ", item)

                        if 'R-ARG0:' in item:
                            if item.split(
                                    ': ',
                                    1)[1].lower() in removeable_items_list:
                                del no_zero_nodes_plus_3[i][index]
                            else:
                                print("##### NEW Exception: ", item)

                        if 'ARGM-DIS:' in item:
                            if item.split(
                                    ': ',
                                    1)[1].lower() in removeable_items_list:
                                del no_zero_nodes_plus_3[i][index]
                            else:
                                print("##### NEW Exception: ", item)

                        if 'ARGM-PRP:' in item:
                            if item.split(
                                    ': ',
                                    1)[1].lower() in removeable_items_list:
                                del no_zero_nodes_plus_3[i][index]
                            else:
                                print("##### NEW Exception: ", item)

            for i in range(len(no_zero_nodes_plus_3)):
                if no_zero_nodes_plus_3[i]:
                    for index, item in enumerate(no_zero_nodes_plus_3[i]):

                        if 'ARGM-MOD:' in item:
                            if item.split(
                                    ': ')[1].lower() in removeable_items_list:
                                del no_zero_nodes_plus_3[i][index]
                            else:
                                print("##### NEW Exception: ", item)

                        if 'ARGM-ADV:' in item:
                            if item.split(
                                    ': ')[1].lower() in removeable_items_list:
                                del no_zero_nodes_plus_3[i][index]
                            else:
                                print("##### NEW Exception: ", item)

                        if 'ARGM-TMP:' in item:
                            if item.split(
                                    ': ')[1].lower() in removeable_items_list:
                                del no_zero_nodes_plus_3[i][index]
                            else:
                                print("##### NEW Exception: ", item)

                        if 'ARGM-MNR:' in item:
                            if item.split(
                                    ': ')[1].lower() in removeable_items_list:
                                del no_zero_nodes_plus_3[i][index]
                            else:
                                print("##### NEW Exception: ", item)

                        if 'R-ARG1:' in item:
                            if item.split(
                                    ': ')[1].lower() in removeable_items_list:
                                del no_zero_nodes_plus_3[i][index]
                            else:
                                print("##### NEW Exception: ", item)

                        if 'R-ARG0:' in item:
                            if item.split(
                                    ': ')[1].lower() in removeable_items_list:
                                del no_zero_nodes_plus_3[i][index]
                            else:
                                print("##### NEW Exception: ", item)

            v_unlink = [
                'delete', 'clear', 'remove', 'erase', 'wipe', 'purge',
                'expunge'
            ]
            v_write = [
                'entrench', 'exfiltrate', 'store', 'drop', 'drops', 'install',
                'place', 'deploy', 'implant', 'write', 'putfile', 'compose',
                'create', 'creates', 'copy', 'copies', 'save', 'saved',
                'saves', 'add', 'adds', 'modify', 'modifies', 'append',
                'appends'
            ]
            v_read = [
                'survey', 'download', 'navigate', 'locate', 'read', 'gather',
                'extract', 'extracts', 'obtain', 'acquire', 'check', 'checks',
                'detect', 'detects', 'record', 'records'
            ]
            v_exec = [
                'use', 'execute', 'executed', 'run', 'ran', 'launch', 'call',
                'perform', 'list', 'invoke', 'inject', 'open', 'opened',
                'target', 'resume', 'exec'
            ]
            v_mmap = ['allocate', 'assign']
            v_fork = [
                'clone', 'clones', 'spawned', 'spawn', 'spawns', 'issue', 'set'
            ]
            v_setuid = ['elevate', 'impersonated']
            v_send = [
                'send', 'sent', 'transfer', 'post', 'postsinformation',
                'postsinformations', 'move', 'transmit', 'deliver', 'push',
                'redirect', 'redirects'
            ]
            v_receive = ['receive', 'accept', 'take', 'get', 'gets', 'collect']
            v_connect = [
                'click', 'browse', 'browses', 'connect', 'connected',
                'portscan', 'connects', 'alerts', 'communicates', 'communicate'
            ]
            v_chmod = [
                'chmod', 'change permission', 'changes permission',
                'permision-modifies', 'modifies permission',
                'modify permission'
            ]
            v_load = ['load', 'loads']
            v_exit = [
                'terminate', 'terminates', 'stop', 'stops', 'end', 'finish',
                'break off', 'abort', 'conclude'
            ]
            v_2D = {'collect': ('read', 'receive'), 'open': ('exec', 'fork')}

            for i in range(len(no_zero_nodes_plus_3)):
                for index, item in enumerate(no_zero_nodes_plus_3[i]):
                    if item.split(': ')[0] == 'V':
                        if item.split(': ')[1] in v_unlink:
                            no_zero_nodes_plus_3[i][index] = 'V: ' + 'unlink'
                        elif item.split(': ')[1] in v_write:
                            no_zero_nodes_plus_3[i][index] = 'V: ' + 'write'
                        elif item.split(': ')[1] in v_read:
                            no_zero_nodes_plus_3[i][index] = 'V: ' + 'read'
                        elif item.split(': ')[1] in v_exec:
                            no_zero_nodes_plus_3[i][index] = 'V: ' + 'exec'
                        elif item.split(': ')[1] in v_mmap:
                            no_zero_nodes_plus_3[i][index] = 'V: ' + 'mmap'
                        elif item.split(': ')[1] in v_fork:
                            no_zero_nodes_plus_3[i][index] = 'V: ' + 'fork'
                        elif item.split(': ')[1] in v_setuid:
                            no_zero_nodes_plus_3[i][index] = 'V: ' + 'setuid'
                        elif item.split(': ')[1] in v_send:
                            no_zero_nodes_plus_3[i][index] = 'V: ' + 'send'
                        elif item.split(': ')[1] in v_receive:
                            no_zero_nodes_plus_3[i][index] = 'V: ' + 'receive'
                        elif item.split(': ')[1] in v_connect:
                            no_zero_nodes_plus_3[i][index] = 'V: ' + 'connect'
                        elif item.split(': ')[1] in v_chmod:
                            no_zero_nodes_plus_3[i][index] = 'V: ' + 'chmod'
                        elif item.split(': ')[1] in v_load:
                            no_zero_nodes_plus_3[i][index] = 'V: ' + 'load'
                        elif item.split(': ')[1] in v_exit:
                            no_zero_nodes_plus_3[i][index] = 'V: ' + 'exit'
        else:
            continue
        all_nodes += no_zero_nodes_plus_3
        if my_svo_triplet:
            all_nodes += my_svo_triplet
    print('*****all_nodes:::', all_nodes)
    return all_nodes
コード例 #28
0
 def test_svo_21(self):
     tok = nlp("he didn't spit on me nor my child")
     svos = findSVOs(tok)
     # printDeps(tok)
     self.assertTrue(
         set(svos) == {('he', '!spit', 'me'), ('he', '!spit', 'my child')})
コード例 #29
0
        return True
    return False
 
html = urllib.request.urlopen()
soup = BeautifulSoup(html)
data = soup.find("div", {"class": className})
paras = data.findAll("p")
paras = [o.text for o in paras]

nlp = spacy.load('en_core_web_lg')

# load NeuralCoref and add it to the pipe of SpaCy's model
coref = neuralcoref.NeuralCoref(nlp.vocab)
nlp.add_pipe(coref, name='neuralcoref')

paras = [nlp(para)._.coref_resolved for para in paras]


# For SVO extraction: less accurate
# allsvos = []
# for para in paras:
#     tokens = nlp(sent)
#     svos = findSVOs(tokens)
#     allsvos.extend(svos)


testData = []
for para in paras:
    instances = getInstances(para)
    ls = [(word, getSim("_".join(word.split(" ")), thresholdWord)) for word in list(set([a.lower() for a in instances]))]
    ls = list(set(list(itertools.combinations(filter_dissimilar(ls), 2))))
コード例 #30
0
 def test_svo_3(self):
     tok = nlp("I don't have other assistance")
     svos = findSVOs(tok)
     # printDeps(tok)
     self.assertTrue(set(svos) == {('I', '!have', 'other assistance')})