def main(): # Load configuration from file config = Configuration(config_file='/home/huma/Downloads/irlib-0.1.1/irlib/classify.conf') try: config.load_configuration() config_data = config.get_configuration() except: print("Error loading configuration file.") print("Classifier aborting.") raise # config.display_configuration() print(config) # sys.exit() myfolds = config.get_folds() correctness = 0 # Preporcessor: tokenizer, stemmer, etc. prep_lower = config_data['lower'] prep_stem = config_data['stem'] prep_pos = config_data['pos'] prep_ngram = config_data['ngram'] prep = Preprocessor(pattern='\W+', lower=prep_lower, stem=prep_stem, pos=prep_pos, ngram=prep_ngram) for myfold in myfolds: ev = Evaluation(config=config, fold=myfold) if config_data['classifier'] == 'rocchio': ml = Rocchio(verbose=VERBOSE, fold=myfold, config=config, ev=ev) elif config_data['classifier'] == 'knn': ml = KNN(verbose=VERBOSE, fold=myfold, config=config, ev=ev) else: ml = NaiveBayes(verbose=VERBOSE, fold=myfold, config=config, ev=ev) training(config, myfold, ml, prep) ml.do_padding() ml.calculate_training_data() # r.display_idx() ml.diagnose() testing(config, myfold, ml, ev, prep) k = config_data['k'] results = ev.calculate(review_spam=True, k=k) print('Accuracy for fold %d: %s' % (myfold, results)) correctness += results print("Average accuracy for all folds:", correctness / len(myfolds))