Ejemplo n.º 1
0
    def load_resources(self):
        super().load_resources()
        logger.info('Load the corpus')
        with open(preprocess(res.train_path, force=self.repreprocess), 'rb') as p_file:
            self.train = pickle.load(p_file)
        with open(preprocess(res.test_path, force=self.repreprocess), 'rb') as p_file:
            self.test = pickle.load(p_file)
        self.train.truncate(self.train_truncate)
        self.test.truncate(self.test_truncate)
        self.train.filter_label(self.train_only_labels)
        self.test.filter_label(self.test_only_labels)
        if self.only_uid is not None:
            self.test.filter_uid(self.only_uid)

        logger.info('Load the lexicons')
        self.bing_liu_lexicon = read_bing_liu(res.bing_liu_lexicon_path)
        self.nrc_emotion_lexicon = read_nrc_emotion(res.nrc_emotion_lexicon_path)
        self.nrc_hashtag_unigram_lexicon = read_nrc_hashtag_unigram(res.nrc_hashtag_unigram_lexicon_path)
        self.nrc_hashtag_bigram_lexicon = read_nrc_hashtag_bigram(res.nrc_hashtag_bigram_lexicon_path)
        self.nrc_hashtag_pair_lexicon = read_nrc_hashtag_pair(res.nrc_hashtag_pair_lexicon_path)
        self.nrc_sentiment140_unigram_lexicon = read_nrc_hashtag_unigram(res.nrc_sentiment140_unigram_lexicon_path)
        self.nrc_sentiment140_bigram_lexicon = read_nrc_hashtag_bigram(res.nrc_sentiment140_bigram_lexicon_path)
        self.nrc_sentiment140_pair_lexicon = read_nrc_hashtag_pair(res.nrc_sentiment140_pair_lexicon_path)
        self.nrc_hashtag_sentimenthashtags_lexicon = read_nrc_hashtag_sentimenthashtags(res.nrc_hashtag_sentimenthashtags_lexicon_path)

        self.mpqa_lexicon = read_mpqa(res.mpqa_lexicon_path)

        logger.info('Load carnegie clusters')
        self.carnegie_clusters = read_carnegie_clusters(res.carnegie_clusters_path)
Ejemplo n.º 2
0
import embeddings as emb
import resources as res
import reader
import utils

models = []
# models.append(('model0', emb.get_custom0))
# models.append(('model1', emb.get_custom1))
models.append(('modelGnews', emb.get_gnews))

topn = 1000
sample_size = 5000

bing_liu_lexicon = reader.read_bing_liu(res.bing_liu_lexicon_path)
nrc_emotion_lexicon = reader.read_nrc_emotion(res.nrc_emotion_lexicon_path)
nrc_emotions_lexicon = reader.read_nrc_emotions(res.nrc_emotion_lexicon_path)
lexicons = [('bing', bing_liu_lexicon),
            ('nrc', nrc_emotion_lexicon),
            ('nrc2', nrc_emotions_lexicon)]

for m_name, m_fun in models:
    model = m_fun()
    for l_name, l_lex in lexicons:
        logger.info('Compare %s on %s', m_name, l_name)
        emb.compare_model_with_lexicon(model, l_lex)
        emb.compare_model_with_lexicon_class(model, l_lex)

        first = True
        for l_name2, l_lex2 in lexicons:
            if first: