Beispiel #1
0
    def __init__(self, io, network, settings, callback):
        assert (isinstance(io, RaNLPConfTaskDevIO)
                or isinstance(io, RaNLPConfTaskRuSentRelIO)
                or isinstance(io, RaNLPConfTaskRuSentRelWithDevIO))

        self.__train_helper = None
        self.__test_helper = None
        self.__eval_helper = OpinionBasedEvaluationHelper()
        self.__synonyms = SynonymsCollection.from_file(
            filepath=io_utils.get_synonyms_filepath(),
            stemmer=settings.Stemmer,
            is_read_only=True)

        super(RaNLPConfTaskModel, self).__init__(io=io,
                                                 network=network,
                                                 settings=settings,
                                                 callback=callback)
Beispiel #2
0
    def __init__(self,
                 word_embedding,
                 synonyms_filepath=io_utils.get_synonyms_filepath(),
                 train_indices=io_utils.train_indices(),
                 test_indices=io_utils.test_indices(),
                 words_per_news=25,
                 bag_size=1,
                 bags_per_minibatch=50,
                 callback=None):

        super(CNNModel, self).__init__(io=io_utils.NetworkIOProvider(),
                                       word_embedding=word_embedding,
                                       synonyms_filepath=synonyms_filepath,
                                       train_indices=train_indices,
                                       test_indices=test_indices,
                                       bag_size=bag_size,
                                       words_per_news=words_per_news,
                                       bags_per_minibatch=bags_per_minibatch,
                                       callback=callback)
#!/usr/bin/python
# -*- coding: utf-8 -*-

from core.classification import evaluate, fit_and_predict, \
    create_train_data, create_test_data

import io_utils
from classifiers import estimators

#
# MAIN
#
synonyms_filepath = io_utils.get_synonyms_filepath()

print "Preparing Train Collection"
X_train, y_train = create_train_data(io_utils.get_train_vectors_list())
print "Preparing Test Collection"
X_test, test_collections = create_test_data(io_utils.get_test_vectors_list())

BASELINES = {
    'baseline_pos': estimators.baseline_pos,
    'baseline_neg': estimators.baseline_neg,
    'baseline_rand': estimators.baseline_rand,
    'baseline_strat': estimators.baseline_strat,
}

# baseline estimators
for method_name in BASELINES:
    print "Evaluate for baseline estimator: {}".format(method_name)
    test_opinions = fit_and_predict(method_name, BASELINES[method_name],
                                    X_train, y_train, X_test, test_collections,
Beispiel #4
0
 def get_synonyms_collection_filepath():
     return io_utils.get_synonyms_filepath()
Beispiel #5
0
        for e2 in entities:
            i = e1.get_int_ID()
            j = e2.get_int_ID()
            E[i - 1][j - 1] = sentences_between(e1, e2, news)

    return opinions_between_entities(E,
                                     0,
                                     news,
                                     synonyms_collection,
                                     sentiment_opins=opinions)


#
# Main
#
synonyms = SynonymsCollection.from_file(io_utils.get_synonyms_filepath())

#
# Train
#
root = io_utils.train_root()
for n in io_utils.train_indices():
    entity_filepath = root + "art{}.ann".format(n)
    news_filepath = root + "art{}.txt".format(n)
    opin_filepath = root + "art{}.opin.txt".format(n)
    neutral_filepath = root + "art{}.neut.txt".format(n)

    print neutral_filepath

    entities = EntityCollection.from_file(entity_filepath)
    news = News.from_file(news_filepath, entities)
Beispiel #6
0
    E = np.zeros((entities.count(), entities.count()), dtype='int32')
    for e1 in entities:
        for e2 in entities:
            i = e1.get_int_ID()
            j = e2.get_int_ID()
            relation = Relation(e1.ID, e2.ID, news)
            E[i-1][j-1] = -1 if i == j else relation.get_distance_in_sentences()

    return opinions_between_entities(
        E, 0, news, synonyms_collection, sentiment_opins=opinions)


#
# Main
#
synonyms = SynonymsCollection.from_file(io_utils.get_synonyms_filepath())

#
# Collection as a Train Data
#
for n in io_utils.collection_indices():
    entity_filepath = io_utils.get_entity_filepath(n)
    news_filepath = io_utils.get_news_filepath(n)
    opin_filepath = io_utils.get_opin_filepath(n, is_etalon=True)
    neutral_filepath = io_utils.get_neutral_filepath(n, is_train=True)

    print "Create neutral file: '{}'".format(neutral_filepath)

    entities = EntityCollection.from_file(entity_filepath)
    news = News.from_file(news_filepath, entities)
    opinions = OpinionCollection.from_file(opin_filepath, io_utils.get_synonyms_filepath())
Beispiel #7
0
     features = [f.feature_function_names() for f in FEATURES]
     features = list(itertools.chain.from_iterable(features))
     with open(features_filepath, 'w') as out:
         for f in features:
             out.write('{}\n'.format(f))

#
# Main
#

w2v_model = Word2Vec.load_word2vec_format(io_utils.get_w2v_model_filepath(), binary=True)
prefix_processor = SentimentPrefixProcessor.from_file("data/prefixes.csv")
prepositions_list = io_utils.read_prepositions("data/prepositions.txt")
capitals_list = io_utils.read_lss("data/capitals.lss")
states_list = io_utils.read_lss("data/states.lss")
synonyms = SynonymsCollection.from_file(io_utils.get_synonyms_filepath())
lexicon = Lexicon.from_file("data/rusentilex.csv")

FEATURES = [
    DistanceFeature(),
    SimilarityFeature(w2v_model),
    LexiconFeature(lexicon, prefix_processor),
    PatternFeature([',']),
    EntitiesBetweenFeature(),
    PrepositionsCountFeature(prepositions_list),
    EntitiesFrequency(synonyms),
    EntityAppearanceFeature(),
    EntityTagFeature(),
    EntitySemanticClass(capitals_list, "capitals"),
    EntitySemanticClass(states_list, "states"),
    ContextFeature(lexicon),