def train_and_evaluate(self): all_ids = list(range(1, 2501)) random.shuffle(all_ids) training_ids, labeling_ids = all_ids[:2250], all_ids[2250:] with open("{0}/labels.csv".format(self.path), "r") as labels_csv: reader = csv.DictReader(labels_csv) for row in reader: label = row["Prediction"] filename = "%s/TR/TRAIN_%s.eml" % (path, row["Id"]) if int(row["Id"]) in training_ids: try: body = extract_body(filename) self.naive_bayes.train(int(label), body) except Exception as e: logger.info("Error training email %s: %s", row["Id"], e.message) correct, incorrect = 0, 0 with open("%s/labels.csv" % self.path, "r") as labels_csv: reader = csv.DictReader(labels_csv) for row in reader: label = row["Prediction"] filename = "%s/TR/TRAIN_%s.eml" % (path, row["Id"]) if int(row["Id"]) in labeling_ids: try: test_body = extract_body(filename) result = self.naive_bayes.classify(test_body) if result == int(label): correct += 1 else: incorrect += 1 except Exception as e: logger.info("Error classifying email %s: %s", row["Id"], e.message) return self._calculate_results(correct, incorrect)
def train(self): with open("{0}/labels.csv".format(self.path), "r") as labels_csv: reader = csv.DictReader(labels_csv) for row in reader: label = row["Prediction"] filename = "%s/TR/TRAIN_%s.eml" % (path, row["Id"]) try: body = extract_body(filename) self.naive_bayes.train(int(label), body) except Exception as e: logger.info("Error training email %s: %s", row["Id"], e.message)
def train(self): with open('%s/labels.csv' % self.path, 'r') as labels_csv: reader = csv.DictReader(labels_csv) for row in reader: label = (row['Prediction']) filename = '%s/TR/TRAIN_%s.eml' % (path, row['Id']) try: body = extract_body(filename) self.naive_bayes.train(int(label), body) except Exception as e: logger.info("Error training email %s: %s", row['Id'], e.message)
def classify(self, size): counter = 1 test = self.path + "/TT/TEST_%s.eml" while counter < size + 1: try: test_body = extract_body(test % counter) self.classified_examples[str(counter)] = str(self.naive_bayes.classify(test_body)) except Exception as e: logger.info("Error classifying email %s: %s", counter, e.message) counter += 1 self._store_results()
def classify(self, size): counter = 1 test = self.path + '/TT/TEST_%s.eml' while counter < size + 1: try: test_body = extract_body(test % counter) self.classified_examples[str(counter)] = str( self.naive_bayes.classify(test_body)) except Exception as e: logger.info("Error classifying email %s: %s", counter, e.message) counter += 1 self._store_results()
def train_and_evaluate(self): all_ids = list(range(1, 2501)) random.shuffle(all_ids) training_ids, labeling_ids = all_ids[:2250], all_ids[2250:] with open('%s/labels.csv' % self.path, 'r') as labels_csv: reader = csv.DictReader(labels_csv) for row in reader: label = (row['Prediction']) filename = '%s/TR/TRAIN_%s.eml' % (path, row['Id']) if int(row['Id']) in training_ids: try: body = extract_body(filename) self.naive_bayes.train(int(label), body) except Exception as e: logger.info("Error training email %s: %s", row['Id'], e.message) correct, incorrect = 0, 0 with open('%s/labels.csv' % self.path, 'r') as labels_csv: reader = csv.DictReader(labels_csv) for row in reader: label = (row['Prediction']) filename = '%s/TR/TRAIN_%s.eml' % (path, row['Id']) if int(row['Id']) in labeling_ids: try: test_body = extract_body(filename) result = self.naive_bayes.classify(test_body) if result == int(label): correct += 1 else: incorrect += 1 except Exception as e: logger.info("Error classifying email %s: %s", row['Id'], e.message) return self._calculate_results(correct, incorrect)