def main(train_dir, labels=['spam', 'ham']): print('Train dir: %s' % train_dir) paths = {} for label in labels: paths[label] = [] for label, path in read_training_data(train_dir): if label == None: print('Label is None for Path=%s' % path) paths[label].append(path) model = NaiveBayesModel(paths) fname = 'nbmodel.txt' model.save_to_path(fname)
def main(data_dir, model_path, out_path): print("Reading data from %s\nReading model from %s\n" \ "Storing output to %s" % (data_dir, model_path, out_path)) model = NaiveBayesModel.load_from_path(model_path) data = read_training_data(data_dir) with open(out_path, 'w', 1, encoding=ENCODING) as outf: for label, path in data: with open(path, 'r', encoding=ENCODING) as inf: doc = inf.read() preds = model.predict(doc) maxlabel = max(preds, key=preds.get) outf.write("%s %s\n" % (maxlabel, path))