Beispiel #1
0
 def test_negative_sentence(self):
     text = "Nie lubię go. Jest niedojrzały."
     s = Settings()
     lemmatizer = DbLookupLemmatizer()
     lemmatizer.initialize()
     tokens = bagofwords.get_processed_bag_of_words(text, lemmatizer, s)
     pass
Beispiel #2
0
 def post(self):
     args = self.parser.parse_args()
     try:
         self.schema(args)
     except MultipleInvalid as e:
         return str(e), 400, {'Access-Control-Allow-Origin': '*'}
     text = bagofwords.get_processed_bag_of_words(args['text'], None, self.settings)
     sa = self.classifier.classify(text)
     return {'text': args['text'], 'sentiment': sa}, 200, {'Access-Control-Allow-Origin': '*'}
 def load_features(self, datasetname, settings):
     lemmatizer = None
     if settings.lemmatizerType is not None:
         lemmatizer = settings.lemmatizerType()
         lemmatizer.initialize()
     return self.load_data_set(
         datasetname, settings,
         lambda x: bagofwords.get_processed_bag_of_words(
             x, lemmatizer, settings))
Beispiel #4
0
 def train(self, datasetname, settings):
     lemmatizer = None
     if settings.LEMMATIZER_TYPE is not None:
         lemmatizer = settings.LEMMATIZER_TYPE()
         lemmatizer.initialize()
     toolbox = SentimentAnalysisToolbox()
     train_set, test_set = toolbox.load_data_set(datasetname, settings,
                                                 lambda x: bagofwords.get_processed_bag_of_words(x, lemmatizer,
                                                                                                 settings))
     self.__classifier = self.__classifierType.train(train_set)
     return train_set, test_set
Beispiel #5
0
 def train(self, datasetname, settings):
     lemmatizer = None
     if settings.LEMMATIZER_TYPE is not None:
         lemmatizer = settings.LEMMATIZER_TYPE()
         lemmatizer.initialize()
     toolbox = SentimentAnalysisToolbox()
     train_set, test_set = toolbox.load_data_set(
         datasetname, settings,
         lambda x: bagofwords.get_processed_bag_of_words(
             x, lemmatizer, settings))
     self.__classifier = self.__classifierType.train(train_set)
     return train_set, test_set
Beispiel #6
0
 def post(self):
     args = self.parser.parse_args()
     try:
         self.schema(args)
     except MultipleInvalid as e:
         return str(e), 400, {'Access-Control-Allow-Origin': '*'}
     text = bagofwords.get_processed_bag_of_words(args['text'], None,
                                                  self.settings)
     sa = self.classifier.classify(text)
     return {
         'text': args['text'],
         'sentiment': sa
     }, 200, {
         'Access-Control-Allow-Origin': '*'
     }
Beispiel #7
0
 def is_uncertain_line(self, line):
     linesToPrint = []
     linesToPrint.append("line: {}".format(line))
     featureset = bagofwords.get_processed_bag_of_words(line, self.__lemmatizer, self.__settings)
     linesToPrint.append("featureset: {}".format(featureset))
     if len(featureset) == 0:
         [print(x) for x in linesToPrint]
         return True
     prob_classify = self.__classifier.prob_classify(featureset)
     for sample in prob_classify.samples():
         linesToPrint.append("{}: {}".format(sample, prob_classify.prob(sample)))
     prob_classify = self.__classifier.prob_classify(featureset)
     prob_samples = [round(prob_classify.prob(sample), 2) for sample in prob_classify.samples()]
     isUncertain = max(prob_samples) < self.__settings.CLASSIFIER_MATCH_UNCERTAINTY_THRESHOLD
     if isUncertain:
         linesToPrint.append("my type: {}".format(prob_classify.max()))
         [print(x) for x in linesToPrint]
     return isUncertain
Beispiel #8
0
 def is_unknown_line(self, line):
     linesToPrint = []
     linesToPrint.append("line: {}".format(line))
     featureset = bagofwords.get_processed_bag_of_words(line, self.__lemmatizer, self.__settings)
     linesToPrint.append("featureset: {}".format(featureset))
     if len(featureset) == 0:
         [print(x) for x in linesToPrint]
         return True
     prob_classify = self.__classifier.prob_classify(featureset)
     for sample in prob_classify.samples():
         linesToPrint.append(sample)
         linesToPrint.append(prob_classify.prob(sample))
     unknown_features = 0
     for feature in featureset:
         prob_classify = self.__classifier.prob_classify(dict([(feature, True)]))
         prob_samples = [round(prob_classify.prob(sample), 2) for sample in prob_classify.samples()]
         if (prob_samples.count(prob_samples[0]) == len(prob_samples)):
             unknown_features += 1
     isUnknown = (unknown_features / len(featureset)) > 0.8
     if isUnknown:
         [print(x) for x in linesToPrint]
     return isUnknown