Ejemplo n.º 1
0
def coref_the_following_colon(stri):
    sentence2 = ' '
    final_txt = ''
    fl = len(final_txt)
    # list1 = the_following_colon_lst()
    list1 = load_lists(fpath)['TFCL']
    list1 = list1.replace("'", "").strip('][').split(', ')
    sentences = sent_tokenize(stri)
    l = len(sentences)
    c = 0
    for sentence in sentences:
        c += 1
        for value in list1:
            if value in sentence:
                sentence.strip(
                )  # to get ride of possible space at the end of sentence
                if sentence[-1] == ".":
                    sentence = sentence[:-1]  # removes the dot from the end
                if ":" in sentence:
                    one = sentence.split(value)[0]
                    two = sentence.split(value)[1]
                    # sentence2 = sentence.split(":")[0].replace(value[:-1],sentence.split(":",1)[1]) + ". "  # replace the token with value
                    # sentence2 = sentence.replace(value, sentence.split(value)[1]) + ". "  # replace the token with value
                    sentence2 = sentence.replace(value, " ") + ". "
                    final_txt += " " + sentence2
                    p = final_txt
                    fl += 1
                break
        if c > fl:
            final_txt += " " + sentence
            fl += 1
    return final_txt
Ejemplo n.º 2
0
def colon_seprator_multiplication(stri):
    coref_reference_list = load_lists(fpath)['TFL']
    coref_reference_list = coref_reference_list.replace(
        "'", "").strip('][').split(', ')
    stri = stri.rstrip()
    stri = stri.rstrip('.')
    result = ""
    for item in sent_tokenize(stri):
        flag = False
        for refrence in coref_reference_list:
            if refrence in item and ":" in item:
                sentence_splits = item.split(":", 1)
                y = iocs.list_of_iocs(item.split(":", 1)[1])
                if y:
                    sentence_replicas = [sentence_splits[0].rstrip(":")
                                         ] * len(y)
                    for i in range(len(sentence_replicas)):
                        result += sentence_replicas[i].replace(refrence,
                                                               y[i]) + " . "
                else:
                    item = sentence_splits[0].replace(refrence,
                                                      sentence_splits[1])
                    result += item

                flag = True
                break
        if flag == False:
            result += item
            result += " "
    if result.rstrip("")[-1] != ".":
        result += "."
    return result
Ejemplo n.º 3
0
def astriks(lis):
    apps_process = load_lists(fpath)['APPs-PROCESS']
    apps_process = apps_process.replace("'", "").strip('][').split(' , ')
    updated_list = [[] for x in range(len(lis))]
    for jj, lst in enumerate(lis):
        for i in range(len(lst)):
            if ":" not in lst[i]:
                lst[i] = "TMP: " + lst[i]
            leftnode = lst[i].split(":", 1)[0]
            rightnode = lst[i].split(":", 1)[1]
            lOFioc = iocs.list_of_iocs(rightnode)
            found_app = [
                app for app in apps_process if app in rightnode.lower()
            ]
            if len(found_app) > 1:
                found_app = list(group_partials(found_app))
                found_app = process_convert(found_app)
            else:
                found_app = process_convert(found_app)
            if not lOFioc:
                if leftnode.lower() != "v" and not found_app:
                    updated_list[jj].append(leftnode + ": *")
                elif leftnode.lower() == "v":
                    updated_list[jj].append(lst[i])
                elif leftnode.lower() != "v" and found_app:
                    for process in found_app:
                        updated_list[jj].append(leftnode + ": " + process)
            elif len(lOFioc) == 1:
                updated_list[jj].append(leftnode + ": " + lOFioc[0])
            elif len(lOFioc) >= 2:
                updated_list[jj].append(leftnode + ": " + lOFioc[0])
                for index in range(1, len(lOFioc)):
                    updated_list[jj].append("ARG-NEW: " + lOFioc[index])
    return updated_list
Ejemplo n.º 4
0
def on_the_windows_x_only():
    # on_the_windows_x_list = load_lists_microsoft.on_the_windows_x_lst()
    on_the_windows_x_list = load_lists(fpath)['MS_OTW']
    on_the_windows_x_list = on_the_windows_x_list.replace(
        "'", "").strip('][').split(', ')
    lst = perform_following_action()
    for i in lst:
        for j in on_the_windows_x_list:
            if j == i:
                lst.remove(i)
                # break
    return lst
Ejemplo n.º 5
0
def perform_following_action(
):  # When Virus:Win32/Funlove.4099 runs, it performs the following actions:
    perform_following_action_list = load_lists(fpath)['MS_PFA']
    perform_following_action_list = perform_following_action_list.replace(
        "'", "").strip('][').split(', ')
    lst = remove_analysis_by()
    for i in lst:
        for j in perform_following_action_list:
            if j in i:
                lst.remove(i)
                break
    return lst
Ejemplo n.º 6
0
def zero_word_verb(stri):
    doc = nlp(stri.strip())
    main_verbs = load_lists(fpath)['verbs']
    main_verbs = main_verbs.replace("'", "").strip('][').split(', ')
    if not (doc[0].tag_ == "MD") and\
            not (doc[0].tag_ == "VB" and
                 str(doc[0]).lower() in main_verbs) and\
            not (doc[0].tag_ == "VB" and
                 str(doc[0]).lower() not in main_verbs) and\
            not(str(doc[0]).lower() in main_verbs):
        return False
    else:
        return True
Ejemplo n.º 7
0
def verb_and_verb(txt):
    verbs_list = load_lists(fpath)['verbs']
    doc = nlp(txt)
    result = ""
    for i in range(len(doc) + 2):
        if doc[i].pos_ == "VERB" and doc[i + 1].pos_ == "CCONJ" and doc[
                i + 2].pos_ == "VERB":
            if doc[i].text in verbs_list and doc[i + 2].text in verbs_list:
                candidate = doc[i].text + " " + doc[i +
                                                    1].text + " " + doc[i +
                                                                        2].text
                result += txt.replace(candidate, doc[i].text) + " "
                result += txt.replace(candidate, doc[i + 2].text)
            break
    return result
Ejemplo n.º 8
0
def removable_token(
):  # When Virus:Win32/Funlove.4099 runs, it performs the following actions:
    removable_token_list = load_lists(fpath)['RTL']
    removable_token_list = removable_token_list.replace(
        "'", "").strip('][').split(', ')
    lst = on_the_windows_x_only()
    for id, value in enumerate(lst):
        for j in removable_token_list:
            if value.strip().startswith(
                    j
            ):  #### definetly remember we should use only startswith()for proper matching
                # lst.remove(value)
                lst[id] = value.replace(j, " ")
                # break
    return lst
Ejemplo n.º 9
0
def following_subject(txt):
    following_subject_list = load_lists(fpath)['TFSL']
    txt = txt.rstrip()
    txt = txt.rstrip('.')
    result = ""
    for sent in sent_tokenize(txt):
        for item in following_subject_list:
            if item in sent and ":" in sent:
                old_subj = item
                new_sub = sent.split(":", 1)[1]
                y = iocs.list_of_iocs(sent)
                if y:
                    sentence_replicas = [new_sub[0]] * len(y)
                    for i in range(len(sentence_replicas)):
                        k = sent.split(":", 1)[0]
                        l = k.replace(old_subj, " ")
                        result += y[i] + l + " . "
                break
    return result
def findSVOs(tokens):
    svos = []
    is_pas = _is_passive(tokens)
    verbs = [tok for tok in tokens if _is_non_aux_verb(tok)]
    # @kia
    if verbs == []:
        main_verbs = load_lists(fpath)['verbs']
        main_verbs = main_verbs.replace("'", "").strip('][').split(', ')
        verbs = [tok for tok in tokens if str(tok) in main_verbs]
    visited = set()  # recursion detection
    for v in verbs:
        subs, verbNegated = _get_all_subs(v)
        # hopefully there are subs, if not, don't examine this verb any longer
        if len(subs) > 0:
            isConjVerb, conjV = _right_of_verb_is_conj_verb(v)
            if isConjVerb:
                v2, objs = _get_all_objs(conjV, is_pas)
                for sub in subs:
                    for obj in objs:
                        objNegated = _is_negated(obj)
                        if is_pas:  # reverse object / subject for passive
                            svos.append((to_str(expand(obj, tokens, visited)),
                                         "!" + v.lemma_ if verbNegated or objNegated else v.lemma_, to_str(expand(sub, tokens, visited))))
                            svos.append((to_str(expand(obj, tokens, visited)),
                                         "!" + v2.lemma_ if verbNegated or objNegated else v2.lemma_, to_str(expand(sub, tokens, visited))))
                        else:
                            svos.append((to_str(expand(sub, tokens, visited)),
                                         "!" + v.lower_ if verbNegated or objNegated else v.lower_, to_str(expand(obj, tokens, visited))))
                            svos.append((to_str(expand(sub, tokens, visited)),
                                         "!" + v2.lower_ if verbNegated or objNegated else v2.lower_, to_str(expand(obj, tokens, visited))))
            else:
                v, objs = _get_all_objs(v, is_pas)
                for sub in subs:
                    for obj in objs:
                        objNegated = _is_negated(obj)
                        if is_pas:  # reverse object / subject for passive
                            svos.append((to_str(expand(obj, tokens, visited)),
                                         "!" + v.lemma_ if verbNegated or objNegated else v.lemma_, to_str(expand(sub, tokens, visited))))
                        else:
                            svos.append((to_str(expand(sub, tokens, visited)),
                                         "!" + v.lower_ if verbNegated or objNegated else v.lower_, to_str(expand(obj, tokens, visited))))
    return svos
Ejemplo n.º 11
0
def modification_():
    final_txt = ''
    c = fl = 0
    pattern = load_lists(fpath)['MDF']
    pattern = pattern.replace("'", "").strip('][').split(', ')

    sentences = sent_tokenize(cc)
    for sentence in sentences:
        c += 1
        for value in pattern:
            if value in sentence:
                sentence1 = sentence.split(
                    value)[0] + ' modifies ' + sentence.split(value)[1]
                final_txt += " " + sentence1 + " "
                fl += 2
                break
        if c > fl:
            final_txt += " " + sentence
            fl += 1
    return final_txt.strip()
Ejemplo n.º 12
0
def communicate_to_sr(stri):
    final_txt = ''
    c = fl = 0
    pattern = load_lists(fpath)['COMU']
    pattern = pattern.replace("'", "").strip('][').split(', ')
    sentences = sent_tokenize(stri)
    for sentence in sentences:
        c += 1
        for value in pattern:
            if value in sentence:
                sentence1 = sentence.split(
                    value)[0] + ' receives from' + sentence.split(value)[1]
                sentence2 = sentence.split(
                    value)[0] + ' sends to' + sentence.split(value)[1]
                final_txt += " " + sentence1 + " " + sentence2
                fl += 2
                c += 1
                break
        if c > fl:
            final_txt += " " + sentence
            fl += 1
    return final_txt
Ejemplo n.º 13
0
def ellipsis_subject(stri):
    ellipsis_verbs = load_lists(fpath)['verbs']
    ellipsis_verbs = ellipsis_verbs.replace("'", "").strip('][').split(', ')
    sent_text = nltk.sent_tokenize(stri)
    result = ""
    for sentence in sent_text:
        token = nltk.word_tokenize(sentence)
        doc = nlp(sentence)
        if nltk.pos_tag(token)[0][1] == "VB" or nltk.pos_tag(
                token)[0][1] == "VBZ" or doc[0].pos_ == "VERB" or doc[
                    0].text.lower() in ellipsis_verbs:
            new_sentence = " It " + nltk.pos_tag(
                token)[0][0].lower() + " " + " ".join(sentence.split(" ")[1:])
            result += " " + new_sentence
        elif doc[0].dep_ == "ROOT":
            if doc[0].text.lower in ellipsis_verbs:
                new_sentence = " It " + doc[0].text.lower() + " " + " ".join(
                    sentence.split(" ")[1:])
                result += " " + new_sentence
        elif doc[0].text.lower() in ellipsis_verbs and doc[0].dep_ != "ROOT":
            result += " " + doc.text
        else:
            result += " " + sentence
    return result
Ejemplo n.º 14
0
def coref_the_following_middle(stri):
    final_txt = ''
    list2 = load_lists(fpath)['TFL']
    list2 = list2.replace("'", "").strip('][').split(', ')
    sentences = sent_tokenize(stri)
    c = 0
    fl = len(final_txt)
    for sentence in sentences:
        c += 1
        for value in list2:
            if value in sentence:
                sentence.strip()
                if sentence[-1] == "." and ":" in sentence:
                    sentence = sentence[:-1]
                    sentence2 = sentence.split(":")[0].replace(
                        value,
                        sentence.split(":", 1)[1]) + ". "
                    final_txt += ' ' + sentence2
                    fl += 1
                    break
        if c > fl:
            final_txt += ' ' + sentence
            fl += 1
    return final_txt
Ejemplo n.º 15
0
import spacy
from nltk import sent_tokenize
nlp = spacy.load("en_core_web_lg")
from lists_patterns import load_lists, fpath
import main
if not main.args.input_file:
    raise ValueError(
        "usage: main.py [-h] [--asterisk ASTERISK] [--crf CRF] [--rmdup RMDUP] [--gname GNAME] [--input_file INPUT_FILE]"
    )
else:
    f = open(main.args.input_file, encoding='iso-8859-1')
    txt = f.readlines()
    txt = " ".join(txt)
    txt = txt.replace('\n', ' ')

titles_list = load_lists(fpath)['MS_TITLES']
titles_list = titles_list.replace("'", "").strip('][').split(', ')
main_verbs = load_lists(fpath)['verbs']
main_verbs = main_verbs.replace("'", "").strip('][').split(', ')


def delete_brackets(stri):
    stri = stri.replace("[", "")
    stri = stri.replace("]", "")
    stri = stri.replace("<", "")
    stri = stri.replace(">", "")
    return stri


txt = delete_brackets(txt)
txt = txt.strip(" ")
Ejemplo n.º 16
0
import re
from list_iocs import iocs
from allennlp.predictors.predictor import Predictor
from lists_patterns import load_lists, fpath
from nltk import sent_tokenize

my_svo_triplet, all_nodes = [], []
main_verbs = load_lists(fpath)['verbs']
main_verbs = main_verbs.replace("'", "").strip('][').split(', ')
sentences = r''' '''

# Abstractive/ To be added 
def ats():
    for sentence in range(sent_tokenize(sentences)):
        predictor = Predictor.from_path("srl-model.tar.gz")
        predictions = predictor.predict(sentence)
        lst = []
        nodes = []
        for k in predictions['verbs']:
            if k['description'].count('[') > 1:
                lst.append(k['description'])
        for jj in range(len(lst)):
            nodes.append([])
            for j in re.findall(r"[^[]*\[([^]]*)\]", lst[jj]):
                nodes[jj].append(j)
        print("*****sentence:", sentence, '*****nodes: ', nodes)

        for lis_ in nodes:
            for indx in range(len(lis_)):
                if lis_[0].split(":", 1)[0].lower().strip() == "v" and lis_[0].split(":", 1)[
                    1].lower().strip() in main_verbs:
Ejemplo n.º 17
0
def CـC(txt):
    pattern = load_lists(fpath)['C_C']
    pattern = pattern.replace("'", "").strip('][').split(', ')
    big_regex = re.compile('|'.join(map(re.escape, pattern)), re.IGNORECASE)
    sentence = big_regex.sub('remote ip:*', str(txt))
    return sentence