コード例 #1
0
ファイル: event_extractor.py プロジェクト: hkayesh/causality
 def __init__(self):
     self.data_file = app_config['data_file']
     self.texts_in_file = 'texts_in_file.txt'
     self.ner_texts_file = 'output.txt'
     self.utilities = Utilities()
     self.lemmatizer = WordNetLemmatizer()
     self.preprocessor = Preprocessor(
         ['remove_urls', 'remove_mentions', 'remove_hashtags', 'normalize'])
コード例 #2
0
    def __init__(self):
        self.path_to_jar = 'lib/stanford_parser/stanford-parser.jar'
        self.path_to_models_jar = 'lib/stanford_parser/stanford-english-corenlp-2018-02-27-models.jar'
        self.path_to_ner_tagger = 'lib/stanford_ner/stanford-ner.jar'
        self.path_to_ner_model = 'lib/stanford_ner/english.all.3class.distsim.crf.ser.gz'

        self.ner_tagger = StanfordNERTagger(self.path_to_ner_model,
                                            self.path_to_ner_tagger)
        self.dependency_parser = StanfordDependencyParser(
            path_to_jar=self.path_to_jar,
            path_to_models_jar=self.path_to_models_jar)
        self.lemmatizer = WordNetLemmatizer()
        self.utilities = Utilities()
コード例 #3
0
ファイル: preprocesssor.py プロジェクト: hkayesh/causality
 def __init__(self, params=list()):
     self.remove_urls = True if 'remove_urls' in params else False
     self.remove_mentions = True if 'remove_mentions' in params else False
     self.remove_hashtags = True if 'remove_hashtags' in params else False
     self.normalize = True if 'normalize' in params else False
     self.remove_stopwords = True if 'remove_stopwords' in params else False
     self.remove_punct = True if 'remove_punctuation' in params else False
     self.lower = True if 'lower' in params else False
     self.lemmatize = True if 'lemmatize' in params else False
     self.stemming = True if 'stemming' in params else False
     self.remove_non_letters = True if 'remove_non_letters' in params else False
     self.lemmatizer = WordNetLemmatizer()
     self.stemmer = PorterStemmer()
     self.utilities = Utilities()
コード例 #4
0
    def get_evaluation_data(self, dataset_file, n_pair):
        utilities = Utilities()

        preprocessor = Preprocessor(
            ['remove_stopwords', 'remove_non_letters', 'lemmatize'])

        data_rows = utilities.read_from_csv(dataset_file)
        del data_rows[0]

        X = []
        y = []
        for data_row in data_rows[:n_pair]:
            candidate_causal_pair = eval(data_row[2])
            label = 1 if data_row[3] == 'causal' else 0

            candidate_causal_phrase = preprocessor.preprocess(
                candidate_causal_pair[0])
            candidate_effect_phrase = preprocessor.preprocess(
                candidate_causal_pair[1])
            if len(candidate_causal_phrase) > 0 and len(
                    candidate_effect_phrase) > 0:
                X.append((candidate_causal_pair[0], candidate_causal_pair[1]))
                y.append(label)
        return X, y
コード例 #5
0
import timeit
import collections

from utils.utilities import Utilities
from preprocessing.preprocesssor import Preprocessor
from causality_detection.causal_stength_calculator import CausalStrengthCalculator
from causality_detection.itemsest_causality import ItemsetCausality


if __name__ == "__main__":
    start_time = timeit.default_timer()
    event_file_path = 'events.csv'
    utilities = Utilities()
    causal_strength_calculator = CausalStrengthCalculator()
    itemset_causality = ItemsetCausality()
    preprocessor = Preprocessor(params=['lower', 'lemmatize'])

    rows = utilities.read_from_csv(event_file_path)
    header = rows[0]
    del rows[0]

    events_phrases = []
    for row in rows:
        phrases = [phrase.strip() for phrase in row[header.index('event_phrases')].split(',')]
        events_phrases += phrases

    sorted_event_phrases = collections.Counter(events_phrases).most_common()
    low_freq_events = [event[0] for event in sorted_event_phrases if event[1] <= 5]

    event_rows = []
    for row in rows:
コード例 #6
0
 def __init__(self):
     self.data_source_file = None
     self.utilities = Utilities()
コード例 #7
0
 def __init__(self):
     self.wiki_file = '/home/humayun/enwiki-latest-pages-articles.xml.bz2'
     self.utilities = Utilities()
     self.preprocessor = Preprocessor(
         ['remove_stopwords', 'remove_punctuation', 'lemmatize'])