def classify(self, data): post_probability = {} for label, probability in self.label_probabilities.iteritems(): words = re.compile('\w+').findall(data.text) post_probability[label] = reduce( lambda acc, word: acc + math.log( self.parameters.probability(word, label), 10), words, math.log(probability)) return data, max(post_probability.iteritems(), key=operator.itemgetter(1))[0] test_dir = sys.argv[1] loader = ReviewLoader() # truthful = loader.load(test_dir + "/negative/truthful", 'truthful') \ # + loader.load(test_dir + "/positive/truthful", 'truthful') # deceptive = loader.load(test_dir + "/negative/deceptive", 'deceptive') \ # + loader.load(test_dir + "/positive/deceptive", 'deceptive') # positive = loader.load(test_dir + '/positive', 'positive') # negative = loader.load(test_dir + '/negative', 'negative') # # test_data1 = truthful + deceptive # test_data2 = positive + negative test_data = loader.load_without_label(test_dir) model_params = ParameterReader('nbmodel.txt').read(2) deceptive_model_params = model_params[0] negative_model_params = model_params[1] deception_classifier = NaiveClassifier(deceptive_model_params[0],
def sampled_label_probability(self, label): count = 0 for labelled_data in self.labelled_data.values(): count += len(labelled_data) return 1.0 * len(self.labelled_data[label]) / count def sampled_label_probabilities(self): probabilities = {} for label in self.labels: probabilities[label] = self.sampled_label_probability(label) return probabilities train_dir = sys.argv[1] loader = ReviewLoader() deceptive = loader.load(train_dir + '/positive_polarity/deceptive_from_MTurk', 'deceptive') + \ loader.load(train_dir + '/negative_polarity/deceptive_from_MTurk', 'deceptive') truthful = loader.load(train_dir + '/negative_polarity/truthful_from_Web', 'truthful') + \ loader.load(train_dir + '/positive_polarity/truthful_from_TripAdvisor', 'truthful') deception_learner = NaiveLearner(deceptive + truthful) positive = loader.load(train_dir + '/positive_polarity', 'positive') negative = loader.load(train_dir + '/negative_polarity', 'negative') negativity_learner = NaiveLearner(positive + negative) writer = ParameterWriter('nbmodel.txt') writer.write(deception_learner.parameters, deception_learner.sampled_label_probabilities()) writer.write(negativity_learner.parameters, negativity_learner.sampled_label_probabilities())
return map(self.classify, self.to_classify) def classify(self, data): post_probability = {} for label, probability in self.label_probabilities.iteritems(): words = re.compile('\w+').findall(data.text) post_probability[label] = reduce( lambda acc, word: acc + math.log(self.parameters.probability(word, label), 10), words, math.log(probability)) return data, max(post_probability.iteritems(), key=operator.itemgetter(1))[0] test_dir = sys.argv[1] loader = ReviewLoader() # truthful = loader.load(test_dir + "/negative/truthful", 'truthful') \ # + loader.load(test_dir + "/positive/truthful", 'truthful') # deceptive = loader.load(test_dir + "/negative/deceptive", 'deceptive') \ # + loader.load(test_dir + "/positive/deceptive", 'deceptive') # positive = loader.load(test_dir + '/positive', 'positive') # negative = loader.load(test_dir + '/negative', 'negative') # # test_data1 = truthful + deceptive # test_data2 = positive + negative test_data = loader.load_without_label(test_dir) model_params = ParameterReader('nbmodel.txt').read(2) deceptive_model_params = model_params[0] negative_model_params = model_params[1] deception_classifier = NaiveClassifier(deceptive_model_params[0], deceptive_model_params[1], test_data)
count = 0 for labelled_data in self.labelled_data.values(): count += len(labelled_data) return 1.0 * len(self.labelled_data[label]) / count def sampled_label_probabilities(self): probabilities = {} for label in self.labels: probabilities[label] = self.sampled_label_probability(label) return probabilities train_dir = sys.argv[1] loader = ReviewLoader() deceptive = loader.load(train_dir + '/positive_polarity/deceptive_from_MTurk', 'deceptive') + \ loader.load(train_dir + '/negative_polarity/deceptive_from_MTurk', 'deceptive') truthful = loader.load(train_dir + '/negative_polarity/truthful_from_Web', 'truthful') + \ loader.load(train_dir + '/positive_polarity/truthful_from_TripAdvisor', 'truthful') deception_learner = NaiveLearner(deceptive + truthful) positive = loader.load(train_dir + '/positive_polarity', 'positive') negative = loader.load(train_dir + '/negative_polarity', 'negative') negativity_learner = NaiveLearner(positive + negative) writer = ParameterWriter('nbmodel.txt') writer.write(deception_learner.parameters, deception_learner.sampled_label_probabilities()) writer.write(negativity_learner.parameters, negativity_learner.sampled_label_probabilities())