Python get_terms_words Beispiele, basic_dictionaries.get_terms_words Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: get_hedges_per_sentence.py Projekt: ChantalvanSon/Tutorial-2

def get_hedges_per_sentence(directory):
    """"Takes all KAF-files in a directory as input and returns a dictionary with all the sentences that contain a hedge cue, with the sentence id as key and a list of the hedge cues"""
    hedge_cues =  ["possible", "possibly", "probably", "probable", "might", "may", "possibility", "probability", "presume", "suppose", "suggest", "believe", "think", "if", "perhaps", "maybe", "likely", "could", "speculate", "speculation", "suspect", "guess", "predict", "prediction", "whether", "improbable", "seem", "question", "indicate", "indication", "unsure", "allegedly", "apparently", "favor", "unlikely", "doubt", "assume", "assumption", "certainty", "uncertainty", "certain", 'uncertain', 'questionable', 'ambivalent', 'dubious', 'erratic', 'hazy', 'hesitant', 'insecure', 'unclear', 'undetermined', 'unpredictable', 'predictable', 'unsure', 'speculative', 'indefinite', 'indeterminate', 'doubtful', 'disbelieve', 'potential', 'potentially', 'imaginable',  'fear', 'hope', 'thinkable', 'promising', 'promise', 'hopeful', 'hopefully', 'feasible', 'feasibly', 'reasonably', 'conceivably', 'reasonable', 'conceivable', 'perchance', 'imaginably', 'presumably', 'seemingly', 'assumable', 'expect', 'expectation', 'expectedly', 'consider']
    dict_hedges_per_sentence = {}
    hedge_terms = {}
    hedge_words = {}
    hedges_in_sentences = {}
    # CHECK WHICH TERMS ARE HEDGE CUES
    for filename in listdir(directory):
        path = directory + "\\" + filename
        try:
            root = get_root(path)
        except:
            print 'Error: could not process', filename
        terms_layer = root.find("terms")
        for term in terms_layer.findall("term"):
            term_id = term.get("tid")
            term_id = filename + "_" + term_id
            lemma = term.get("lemma")
            if lemma in hedge_cues:
                hedge_terms[term_id] = lemma
    #print hedge_terms
    # CHECK WHICH WORDS ARE HEDGE CUES
    dict_terms_words = get_terms_words(directory)
    for term in dict_terms_words:
        if term in hedge_terms:
            word_id = dict_terms_words[term]
            lemma = hedge_terms[term]
            hedge_words[word_id] = lemma
    # CHECK WHICH HEDGE CUES EACH SENTENCE CONTAINS AND CREATE DICTIONARY
    dict_sentences_words = get_sentences_words(directory)
    for sent_id in dict_sentences_words:
        span_words = dict_sentences_words[sent_id]
        for word_id in span_words:
            if word_id in hedge_words:
                lemma = hedge_words[word_id]
                if sent_id not in hedges_in_sentences:
                   hedges_in_sentences[sent_id] = [lemma]
                else: 
                    hedges_in_sentences[sent_id].append(lemma)
        if sent_id not in hedges_in_sentences:
            hedges_in_sentences[sent_id] = "No hedge cues"
    return hedges_in_sentences

Beispiel #2

0

Datei anzeigen

Datei: get_speculative_elements.py Projekt: ChantalvanSon/Tutorial-2

def get_opinionated_sentences(directory):
    """Takes all KAF-files from a directory and returns a set of all the speculative sentences in these files (sentence ids)."""
    opinionated_terms = []
    opinionated_words = []
    opinionated_sentences = []
    # CHECK WHICH TERMS ARE SPECULATIVE IN ALL KAF-FILES FROM DIRECTORY: CREATE SPECULATIVE_TERMS LIST
    print "Collecting opinionated terms"
    for filename in listdir(directory):
        path = directory + "\\" + filename
        try:
            root = get_root(path)
        except:
            print "Error: could not process", filename
        try:
            opinions_layer = root.find("opinions")
            for opinion in opinions_layer.findall("opinion"):
                for expression in opinion.findall("opinion_expression"):
                    span = expression.find("span")
                    for target in span.findall("target"):
                        term = target.get("id")
                        term = filename + "_" + term
                        opinionated_terms.append(term)
        except:
            print "Error: no opinion found in", filename
    # CHECK WHICH WORDS ARE SPECULATIVE USING THE TERMS-WORDS DICTIONARY: CREATE SPECULATIVE_WORDS LIST
    print "Collecting opinionated words"
    dict_terms_words = get_terms_words(directory)
    for term in dict_terms_words:
        if term in opinionated_terms:
            word_id = dict_terms_words[term]
            opinionated_words.append(word_id)
    # CHECK WHICH SENTENCES ARE SPECULATIVE USING THE SENTENCES-WORDS DICTIONARY: ADD TO SPECULATIVE_SENTENCES SET
    print "Collecting opinionated sentences"
    dict_sentences_words = get_sentences_words(directory)
    for sent_id in dict_sentences_words:
        span_words = dict_sentences_words[sent_id]
        for word_id in span_words:
            if word_id in opinionated_words:
                opinionated_sentences.append(sent_id)
    opinionated_sentences = set(opinionated_sentences)
    return opinionated_sentences

Beispiel #3

0

Datei anzeigen

Datei: get_speculative_elements.py Projekt: ChantalvanSon/Tutorial-2

def get_speculative_tokens(directory):
    """Takes all KAF-files from a directory and returns a list of all the speculative sentences in these files (sentence ids)."""
    speculative_terms = []
    speculative_wordids = []
    speculative_words = []
    # CHECK WHICH TERM IDS ARE SPECULATIVE IN ALL KAF-FILES FROM DIRECTORY: CREATE SPECULATIVE_TERMS LIST
    for filename in listdir(directory):
        path = directory + "\\" + filename
        try:
            root = get_root(path)
        except:
            print "Error: could not process", filename
        try:
            opinions_layer = root.find("opinions")
            for opinion in opinions_layer.findall("opinion"):
                for expression in opinion.findall("opinion_expression"):
                    polarity = expression.get("polarity")
                    if polarity == "speculation":
                        span = expression.find("span")
                        for target in span.findall("target"):
                            term = target.get("id")
                            term = filename + "_" + term
                            speculative_terms.append(term)
        except:
            print "Error: no opinion found in", filename
    # CHECK WHICH WORD IDS ARE SPECULATIVE USING THE TERMS-WORDS DICTIONARY: CREATE SPECULATIVE_WORDIDS LIST
    dict_terms_words = get_terms_words(directory)
    for term in dict_terms_words:
        if term in speculative_terms:
            word_id = dict_terms_words[term]
            speculative_wordids.append(word_id)
    # CHECK WHICH WORDS ARE PART OF SPECULATION USING THE WORDIDS-WORDS DICTIONARY: CREATE SPECULATIVE WORDS LIST
    dict_wordids_words = get_wordids_words(directory)
    for wordid in dict_wordids_words:
        word = dict_wordids_words[wordid]
        if wordid in speculative_wordids:
            speculative_words.append(word)
    speculative_words = list(set(speculative_words))
    return speculative_words