Ejemplo n.º 1
0
 def generate(self, dataset, classifier, settings):
     toolbox = SentimentAnalysisToolbox()
     settings.trainTestSplit = 1.0
     toolbox.train(classifier, dataset, settings)
     test_set, temp = toolbox.load_features(local_definitions.DATASET_PUDELEK_COMMENTS, settings)
     report = {}
     mistakes = defaultdict(int)
     report['classifier'] = classifier.get_name()
     report['dataset'] = dataset
     report['settings'] = settings.__dict__
     report['totalAccuracy'] = classifier.get_accuracy(test_set)
     report['accuracy'] = {}
     for key, group in groupby(test_set, lambda x: x[1]):
         correct = 0
         items = list(group)
         for item in items:
             class_result = classifier.classify(item[0])
             if class_result == key:
                 correct += 1
             else:
                 mistakes[class_result + '_' + key] += 1
         report['accuracy'][key] = correct / len(items)
     report['mostInformativeFeatures'] = classifier.most_informative_features(20)
     report['mostMistakes'] = sorted(mistakes.items(), key=lambda x: x[1], reverse=True)[:3]
     return report
Ejemplo n.º 2
0
 def train(self, datasetname, settings):
     lemmatizer = None
     if settings.LEMMATIZER_TYPE is not None:
         lemmatizer = settings.LEMMATIZER_TYPE()
         lemmatizer.initialize()
     toolbox = SentimentAnalysisToolbox()
     train_set, test_set = toolbox.load_data_set(datasetname, settings,
                                                 lambda x: bagofwords.get_processed_bag_of_words(x, lemmatizer,
                                                                                                 settings))
     self.__classifier = self.__classifierType.train(train_set)
     return train_set, test_set
Ejemplo n.º 3
0
 def train(self, datasetname, settings):
     lemmatizer = None
     if settings.LEMMATIZER_TYPE is not None:
         lemmatizer = settings.LEMMATIZER_TYPE()
         lemmatizer.initialize()
     toolbox = SentimentAnalysisToolbox()
     train_set, test_set = toolbox.load_data_set(
         datasetname, settings,
         lambda x: bagofwords.get_processed_bag_of_words(
             x, lemmatizer, settings))
     self.__classifier = self.__classifierType.train(train_set)
     return train_set, test_set
Ejemplo n.º 4
0
Archivo: api.py Proyecto: dhermyt/WONS
class SentimentAnalysis(Resource):
    parser = reqparse.RequestParser()
    parser.add_argument('text')
    schema = Schema({Required('text'): All(str, Length(min=1, max=1000))})
    toolbox = SentimentAnalysisToolbox()
    decoder = SettingsDecoder()
    settings = toolbox.load_json('default', Settings())
    settings.LEMMATIZER_TYPE = decoder.decode_type(settings.LEMMATIZER_TYPE)
    settings.CLASSIFIER_TYPE = decoder.decode_classifier(
        settings.CLASSIFIER_TYPE)
    classifier = settings.CLASSIFIER_TYPE()
    classifier.load('default')

    def post(self):
        args = self.parser.parse_args()
        try:
            self.schema(args)
        except MultipleInvalid as e:
            return str(e), 400, {'Access-Control-Allow-Origin': '*'}
        text = bagofwords.get_processed_bag_of_words(args['text'], None,
                                                     self.settings)
        sa = self.classifier.classify(text)
        return {
            'text': args['text'],
            'sentiment': sa
        }, 200, {
            'Access-Control-Allow-Origin': '*'
        }
Ejemplo n.º 5
0
 def save(self, filename):
     toolbox = SentimentAnalysisToolbox()
     filename += ".classifier"
     toolbox.save_binary(filename, self.__classifier)
Ejemplo n.º 6
0
 def load(self, filename):
     toolbox = SentimentAnalysisToolbox()
     filename += ".classifier"
     self.__classifier = toolbox.load_binary(filename)
Ejemplo n.º 7
0
import os

from configuration.Encoder import SettingsEncoder
from definitions import DATASETS_LOCAL_DIR
from tools.SentimentAnalysisToolbox import SentimentAnalysisToolbox
from tools.classifiergenerator.ToolSettings import ToolSettings
from configuration.Decoder import SettingsDecoder

if __name__ == '__main__':
    toolSettings = ToolSettings()
    decoder = SettingsDecoder()
    toolSettings.CLASSIFIER_TYPE = decoder.decode_classifier(toolSettings.CLASSIFIER_TYPE)
    toolSettings.LEMMATIZER_TYPE = decoder.decode_type(toolSettings.LEMMATIZER_TYPE)
    lemmatizer = None
    if toolSettings.LEMMATIZER_TYPE is not None:
        lemmatizer = toolSettings.LEMMATIZER_TYPE()
        lemmatizer.initialize()
    dest_dataset = os.path.join(DATASETS_LOCAL_DIR, toolSettings.WONS_DATASET_SOURCE)
    classifier = toolSettings.CLASSIFIER_TYPE()
    classifier.train(dest_dataset, toolSettings)
    classifier.save(toolSettings.WONS_CLASSIFIER_DESTINATION_NAME)
    toolbox = SentimentAnalysisToolbox()
    toolbox.save_json(toolSettings.WONS_CLASSIFIER_DESTINATION_NAME, toolSettings, SettingsEncoder)

Ejemplo n.º 8
0
 def save(self):
     toolbox = SentimentAnalysisToolbox()
     filename = '{}.classifier'.format(self.get_name())
     toolbox.save_binary(filename,self.__classifier)
     filename = '{}.vectorizer'.format(self.get_name())
     toolbox.save_binary(filename, self.__vectorizer)
Ejemplo n.º 9
0
 def load(self):
     toolbox = SentimentAnalysisToolbox()
     filename = '{}.classifier'.format(self.get_name())
     self.__classifier = toolbox.load_binary(filename)
     filename = '{}.vectorizer'.format(self.get_name())
     self.__vectorizer = toolbox.load_binary(filename)
Ejemplo n.º 10
0
 def save(self, filename):
     toolbox = SentimentAnalysisToolbox()
     filename += ".classifier"
     toolbox.save_binary(filename, self.__classifier)
Ejemplo n.º 11
0
 def load(self, filename):
     toolbox = SentimentAnalysisToolbox()
     filename += ".classifier"
     self.__classifier = toolbox.load_binary(filename)
Ejemplo n.º 12
0
 def save(self):
     toolbox = SentimentAnalysisToolbox()
     filename = '{}.classifier'.format(self.get_name())
     toolbox.save_binary(filename, self.__classifier)
     filename = '{}.vectorizer'.format(self.get_name())
     toolbox.save_binary(filename, self.__vectorizer)
Ejemplo n.º 13
0
 def load(self):
     toolbox = SentimentAnalysisToolbox()
     filename = '{}.classifier'.format(self.get_name())
     self.__classifier = toolbox.load_binary(filename)
     filename = '{}.vectorizer'.format(self.get_name())
     self.__vectorizer = toolbox.load_binary(filename)
Ejemplo n.º 14
0
import os

from configuration.Encoder import SettingsEncoder
from definitions import DATASETS_LOCAL_DIR
from tools.SentimentAnalysisToolbox import SentimentAnalysisToolbox
from tools.classifiergenerator.ToolSettings import ToolSettings
from configuration.Decoder import SettingsDecoder

if __name__ == '__main__':
    toolSettings = ToolSettings()
    decoder = SettingsDecoder()
    toolSettings.CLASSIFIER_TYPE = decoder.decode_classifier(
        toolSettings.CLASSIFIER_TYPE)
    toolSettings.LEMMATIZER_TYPE = decoder.decode_type(
        toolSettings.LEMMATIZER_TYPE)
    lemmatizer = None
    if toolSettings.LEMMATIZER_TYPE is not None:
        lemmatizer = toolSettings.LEMMATIZER_TYPE()
        lemmatizer.initialize()
    dest_dataset = os.path.join(DATASETS_LOCAL_DIR,
                                toolSettings.WONS_DATASET_SOURCE)
    classifier = toolSettings.CLASSIFIER_TYPE()
    classifier.train(dest_dataset, toolSettings)
    classifier.save(toolSettings.WONS_CLASSIFIER_DESTINATION_NAME)
    toolbox = SentimentAnalysisToolbox()
    toolbox.save_json(toolSettings.WONS_CLASSIFIER_DESTINATION_NAME,
                      toolSettings, SettingsEncoder)