Beispiel #1
0
    def classify(self, data):
        post_probability = {}
        for label, probability in self.label_probabilities.iteritems():
            words = re.compile('\w+').findall(data.text)
            post_probability[label] = reduce(
                lambda acc, word: acc + math.log(
                    self.parameters.probability(word, label), 10), words,
                math.log(probability))

        return data, max(post_probability.iteritems(),
                         key=operator.itemgetter(1))[0]


test_dir = sys.argv[1]

loader = ReviewLoader()
# truthful = loader.load(test_dir + "/negative/truthful", 'truthful') \
#            + loader.load(test_dir + "/positive/truthful", 'truthful')
# deceptive = loader.load(test_dir + "/negative/deceptive", 'deceptive') \
#             + loader.load(test_dir + "/positive/deceptive", 'deceptive')
# positive = loader.load(test_dir + '/positive', 'positive')
# negative = loader.load(test_dir + '/negative', 'negative')
#
# test_data1 = truthful + deceptive
# test_data2 = positive + negative
test_data = loader.load_without_label(test_dir)

model_params = ParameterReader('nbmodel.txt').read(2)
deceptive_model_params = model_params[0]
negative_model_params = model_params[1]
deception_classifier = NaiveClassifier(deceptive_model_params[0],
Beispiel #2
0
    def sampled_label_probability(self, label):
        count = 0
        for labelled_data in self.labelled_data.values():
            count += len(labelled_data)

        return 1.0 * len(self.labelled_data[label]) / count

    def sampled_label_probabilities(self):
        probabilities = {}
        for label in self.labels:
            probabilities[label] = self.sampled_label_probability(label)
        return probabilities


train_dir = sys.argv[1]

loader = ReviewLoader()
deceptive = loader.load(train_dir + '/positive_polarity/deceptive_from_MTurk', 'deceptive') + \
            loader.load(train_dir + '/negative_polarity/deceptive_from_MTurk', 'deceptive')
truthful = loader.load(train_dir + '/negative_polarity/truthful_from_Web', 'truthful') + \
           loader.load(train_dir + '/positive_polarity/truthful_from_TripAdvisor', 'truthful')
deception_learner = NaiveLearner(deceptive + truthful)

positive = loader.load(train_dir + '/positive_polarity', 'positive')
negative = loader.load(train_dir + '/negative_polarity', 'negative')
negativity_learner = NaiveLearner(positive + negative)

writer = ParameterWriter('nbmodel.txt')
writer.write(deception_learner.parameters, deception_learner.sampled_label_probabilities())
writer.write(negativity_learner.parameters, negativity_learner.sampled_label_probabilities())
Beispiel #3
0
        return map(self.classify, self.to_classify)

    def classify(self, data):
        post_probability = {}
        for label, probability in self.label_probabilities.iteritems():
            words = re.compile('\w+').findall(data.text)
            post_probability[label] = reduce(
                    lambda acc, word: acc + math.log(self.parameters.probability(word, label), 10), words,
                    math.log(probability))

        return data, max(post_probability.iteritems(), key=operator.itemgetter(1))[0]


test_dir = sys.argv[1]

loader = ReviewLoader()
# truthful = loader.load(test_dir + "/negative/truthful", 'truthful') \
#            + loader.load(test_dir + "/positive/truthful", 'truthful')
# deceptive = loader.load(test_dir + "/negative/deceptive", 'deceptive') \
#             + loader.load(test_dir + "/positive/deceptive", 'deceptive')
# positive = loader.load(test_dir + '/positive', 'positive')
# negative = loader.load(test_dir + '/negative', 'negative')
#
# test_data1 = truthful + deceptive
# test_data2 = positive + negative
test_data = loader.load_without_label(test_dir)

model_params = ParameterReader('nbmodel.txt').read(2)
deceptive_model_params = model_params[0]
negative_model_params = model_params[1]
deception_classifier = NaiveClassifier(deceptive_model_params[0], deceptive_model_params[1], test_data)
Beispiel #4
0
        count = 0
        for labelled_data in self.labelled_data.values():
            count += len(labelled_data)

        return 1.0 * len(self.labelled_data[label]) / count

    def sampled_label_probabilities(self):
        probabilities = {}
        for label in self.labels:
            probabilities[label] = self.sampled_label_probability(label)
        return probabilities


train_dir = sys.argv[1]

loader = ReviewLoader()
deceptive = loader.load(train_dir + '/positive_polarity/deceptive_from_MTurk', 'deceptive') + \
            loader.load(train_dir + '/negative_polarity/deceptive_from_MTurk', 'deceptive')
truthful = loader.load(train_dir + '/negative_polarity/truthful_from_Web', 'truthful') + \
           loader.load(train_dir + '/positive_polarity/truthful_from_TripAdvisor', 'truthful')
deception_learner = NaiveLearner(deceptive + truthful)

positive = loader.load(train_dir + '/positive_polarity', 'positive')
negative = loader.load(train_dir + '/negative_polarity', 'negative')
negativity_learner = NaiveLearner(positive + negative)

writer = ParameterWriter('nbmodel.txt')
writer.write(deception_learner.parameters,
             deception_learner.sampled_label_probabilities())
writer.write(negativity_learner.parameters,
             negativity_learner.sampled_label_probabilities())