def __init__(self, io, network, settings, callback): assert (isinstance(io, RaNLPConfTaskDevIO) or isinstance(io, RaNLPConfTaskRuSentRelIO) or isinstance(io, RaNLPConfTaskRuSentRelWithDevIO)) self.__train_helper = None self.__test_helper = None self.__eval_helper = OpinionBasedEvaluationHelper() self.__synonyms = SynonymsCollection.from_file( filepath=io_utils.get_synonyms_filepath(), stemmer=settings.Stemmer, is_read_only=True) super(RaNLPConfTaskModel, self).__init__(io=io, network=network, settings=settings, callback=callback)
def __init__(self, word_embedding, synonyms_filepath=io_utils.get_synonyms_filepath(), train_indices=io_utils.train_indices(), test_indices=io_utils.test_indices(), words_per_news=25, bag_size=1, bags_per_minibatch=50, callback=None): super(CNNModel, self).__init__(io=io_utils.NetworkIOProvider(), word_embedding=word_embedding, synonyms_filepath=synonyms_filepath, train_indices=train_indices, test_indices=test_indices, bag_size=bag_size, words_per_news=words_per_news, bags_per_minibatch=bags_per_minibatch, callback=callback)
#!/usr/bin/python # -*- coding: utf-8 -*- from core.classification import evaluate, fit_and_predict, \ create_train_data, create_test_data import io_utils from classifiers import estimators # # MAIN # synonyms_filepath = io_utils.get_synonyms_filepath() print "Preparing Train Collection" X_train, y_train = create_train_data(io_utils.get_train_vectors_list()) print "Preparing Test Collection" X_test, test_collections = create_test_data(io_utils.get_test_vectors_list()) BASELINES = { 'baseline_pos': estimators.baseline_pos, 'baseline_neg': estimators.baseline_neg, 'baseline_rand': estimators.baseline_rand, 'baseline_strat': estimators.baseline_strat, } # baseline estimators for method_name in BASELINES: print "Evaluate for baseline estimator: {}".format(method_name) test_opinions = fit_and_predict(method_name, BASELINES[method_name], X_train, y_train, X_test, test_collections,
def get_synonyms_collection_filepath(): return io_utils.get_synonyms_filepath()
for e2 in entities: i = e1.get_int_ID() j = e2.get_int_ID() E[i - 1][j - 1] = sentences_between(e1, e2, news) return opinions_between_entities(E, 0, news, synonyms_collection, sentiment_opins=opinions) # # Main # synonyms = SynonymsCollection.from_file(io_utils.get_synonyms_filepath()) # # Train # root = io_utils.train_root() for n in io_utils.train_indices(): entity_filepath = root + "art{}.ann".format(n) news_filepath = root + "art{}.txt".format(n) opin_filepath = root + "art{}.opin.txt".format(n) neutral_filepath = root + "art{}.neut.txt".format(n) print neutral_filepath entities = EntityCollection.from_file(entity_filepath) news = News.from_file(news_filepath, entities)
E = np.zeros((entities.count(), entities.count()), dtype='int32') for e1 in entities: for e2 in entities: i = e1.get_int_ID() j = e2.get_int_ID() relation = Relation(e1.ID, e2.ID, news) E[i-1][j-1] = -1 if i == j else relation.get_distance_in_sentences() return opinions_between_entities( E, 0, news, synonyms_collection, sentiment_opins=opinions) # # Main # synonyms = SynonymsCollection.from_file(io_utils.get_synonyms_filepath()) # # Collection as a Train Data # for n in io_utils.collection_indices(): entity_filepath = io_utils.get_entity_filepath(n) news_filepath = io_utils.get_news_filepath(n) opin_filepath = io_utils.get_opin_filepath(n, is_etalon=True) neutral_filepath = io_utils.get_neutral_filepath(n, is_train=True) print "Create neutral file: '{}'".format(neutral_filepath) entities = EntityCollection.from_file(entity_filepath) news = News.from_file(news_filepath, entities) opinions = OpinionCollection.from_file(opin_filepath, io_utils.get_synonyms_filepath())
features = [f.feature_function_names() for f in FEATURES] features = list(itertools.chain.from_iterable(features)) with open(features_filepath, 'w') as out: for f in features: out.write('{}\n'.format(f)) # # Main # w2v_model = Word2Vec.load_word2vec_format(io_utils.get_w2v_model_filepath(), binary=True) prefix_processor = SentimentPrefixProcessor.from_file("data/prefixes.csv") prepositions_list = io_utils.read_prepositions("data/prepositions.txt") capitals_list = io_utils.read_lss("data/capitals.lss") states_list = io_utils.read_lss("data/states.lss") synonyms = SynonymsCollection.from_file(io_utils.get_synonyms_filepath()) lexicon = Lexicon.from_file("data/rusentilex.csv") FEATURES = [ DistanceFeature(), SimilarityFeature(w2v_model), LexiconFeature(lexicon, prefix_processor), PatternFeature([',']), EntitiesBetweenFeature(), PrepositionsCountFeature(prepositions_list), EntitiesFrequency(synonyms), EntityAppearanceFeature(), EntityTagFeature(), EntitySemanticClass(capitals_list, "capitals"), EntitySemanticClass(states_list, "states"), ContextFeature(lexicon),