def get_hedges_per_sentence(directory): """"Takes all KAF-files in a directory as input and returns a dictionary with all the sentences that contain a hedge cue, with the sentence id as key and a list of the hedge cues""" hedge_cues = ["possible", "possibly", "probably", "probable", "might", "may", "possibility", "probability", "presume", "suppose", "suggest", "believe", "think", "if", "perhaps", "maybe", "likely", "could", "speculate", "speculation", "suspect", "guess", "predict", "prediction", "whether", "improbable", "seem", "question", "indicate", "indication", "unsure", "allegedly", "apparently", "favor", "unlikely", "doubt", "assume", "assumption", "certainty", "uncertainty", "certain", 'uncertain', 'questionable', 'ambivalent', 'dubious', 'erratic', 'hazy', 'hesitant', 'insecure', 'unclear', 'undetermined', 'unpredictable', 'predictable', 'unsure', 'speculative', 'indefinite', 'indeterminate', 'doubtful', 'disbelieve', 'potential', 'potentially', 'imaginable', 'fear', 'hope', 'thinkable', 'promising', 'promise', 'hopeful', 'hopefully', 'feasible', 'feasibly', 'reasonably', 'conceivably', 'reasonable', 'conceivable', 'perchance', 'imaginably', 'presumably', 'seemingly', 'assumable', 'expect', 'expectation', 'expectedly', 'consider'] dict_hedges_per_sentence = {} hedge_terms = {} hedge_words = {} hedges_in_sentences = {} # CHECK WHICH TERMS ARE HEDGE CUES for filename in listdir(directory): path = directory + "\\" + filename try: root = get_root(path) except: print 'Error: could not process', filename terms_layer = root.find("terms") for term in terms_layer.findall("term"): term_id = term.get("tid") term_id = filename + "_" + term_id lemma = term.get("lemma") if lemma in hedge_cues: hedge_terms[term_id] = lemma #print hedge_terms # CHECK WHICH WORDS ARE HEDGE CUES dict_terms_words = get_terms_words(directory) for term in dict_terms_words: if term in hedge_terms: word_id = dict_terms_words[term] lemma = hedge_terms[term] hedge_words[word_id] = lemma # CHECK WHICH HEDGE CUES EACH SENTENCE CONTAINS AND CREATE DICTIONARY dict_sentences_words = get_sentences_words(directory) for sent_id in dict_sentences_words: span_words = dict_sentences_words[sent_id] for word_id in span_words: if word_id in hedge_words: lemma = hedge_words[word_id] if sent_id not in hedges_in_sentences: hedges_in_sentences[sent_id] = [lemma] else: hedges_in_sentences[sent_id].append(lemma) if sent_id not in hedges_in_sentences: hedges_in_sentences[sent_id] = "No hedge cues" return hedges_in_sentences
def get_opinionated_sentences(directory): """Takes all KAF-files from a directory and returns a set of all the speculative sentences in these files (sentence ids).""" opinionated_terms = [] opinionated_words = [] opinionated_sentences = [] # CHECK WHICH TERMS ARE SPECULATIVE IN ALL KAF-FILES FROM DIRECTORY: CREATE SPECULATIVE_TERMS LIST print "Collecting opinionated terms" for filename in listdir(directory): path = directory + "\\" + filename try: root = get_root(path) except: print "Error: could not process", filename try: opinions_layer = root.find("opinions") for opinion in opinions_layer.findall("opinion"): for expression in opinion.findall("opinion_expression"): span = expression.find("span") for target in span.findall("target"): term = target.get("id") term = filename + "_" + term opinionated_terms.append(term) except: print "Error: no opinion found in", filename # CHECK WHICH WORDS ARE SPECULATIVE USING THE TERMS-WORDS DICTIONARY: CREATE SPECULATIVE_WORDS LIST print "Collecting opinionated words" dict_terms_words = get_terms_words(directory) for term in dict_terms_words: if term in opinionated_terms: word_id = dict_terms_words[term] opinionated_words.append(word_id) # CHECK WHICH SENTENCES ARE SPECULATIVE USING THE SENTENCES-WORDS DICTIONARY: ADD TO SPECULATIVE_SENTENCES SET print "Collecting opinionated sentences" dict_sentences_words = get_sentences_words(directory) for sent_id in dict_sentences_words: span_words = dict_sentences_words[sent_id] for word_id in span_words: if word_id in opinionated_words: opinionated_sentences.append(sent_id) opinionated_sentences = set(opinionated_sentences) return opinionated_sentences