def main(train_generator_list, labels, elements_count, classifier_name, k, smoothing_param, distancematrix, test_generator): PRINTER("Finding label list...") get_labels_of_record = mc2lmc_tomka_blad find_all_labels = lambda frecords: get_labels_min_occurence(lambda: gen_lmc(frecords), 1) PRINTER("Loading distance matrix...") import sys sys.path.append(r'../') from data_io.matrix_io import fread_smatrix (rows, cols, data) = fread_smatrix(distancematrix) id2rowind, id2colind = {}, {} for ind, id in enumerate(rows): id2rowind[id] = ind for ind, id in enumerate(cols): id2colind[id] = ind #print "len(train_generator_list):",len(train_generator_list) #print "len(test_generator_list):",len(test_generator) #print "len(rows):",len(rows) #print "(rows, cols, data):", (rows, cols, data) PRINTER("Training classifier...") from time import time def printer(x): #import logging logging.info('['+classifier_name+']'+x) def distance(a, b): try: return data[id2rowind[a['an']]][id2colind[b['an']]] except: return data[id2colind[b['an']]][id2rowind[a['an']]] start = time() if classifier_name=='mlknn_basic': def get_neighbours(sample, k): return find_closest_points_sorted(sample, train_generator_list, [sample], k, distance) k = int(k) from mlknn import mlknn_basic classifier = mlknn_basic.MlknnBasic(train_generator_list, get_neighbours, k, smoothing_param, get_labels_of_record, lambda x:1, printer) elif classifier_name == 'mlknn_threshold': def get_neighbours(sample, k): return find_closest_points_sorted(sample, train_generator_list, [sample], k, distance) k = int(k) from mlknn import mlknn_threshold classifier = mlknn_threshold.MlknnThreshold(train_generator_list, get_neighbours, k, smoothing_param, get_labels_of_record, lambda x:1, printer) elif classifier_name == 'mlknn_tensembled': def get_neighbours(sample, k): return find_closest_points_sorted(sample, train_generator_list, [sample], k, distance) k = map(int, k.strip().split(',')) PRINTER("loaded k-list: "+str(k)) from mlknn import mlknn_tensembled classifier = mlknn_tensembled.MlknnTEnsembled(train_generator_list, get_neighbours, k, get_labels_of_record, lambda x:1, printer) elif classifier_name=='mlknn-basic-tree': def get_neighbours(sample, k, train_gen): return find_closest_points_sorted(sample, train_gen, [sample], k, distance) k = int(k) from mlknn import mlknn_basic mlknn_callable = lambda train_gen, get_labels_of_record_arg: mlknn_basic.MlknnBasic(train_gen, lambda sample, k: get_neighbours(sample, k, train_gen), k, smoothing_param, get_labels_of_record_arg, lambda x:1, printer) label_mappings = (lambda x: x[:2], lambda x: x[:3], lambda x: x) from mltools.ml_hierarchical import MlHierarchical classifier = MlHierarchical(train_generator_list, mlknn_callable, label_mappings, get_labels_of_record) elif classifier_name == 'mlknn-threshold-tree': def get_neighbours(sample, k, train_gen): return find_closest_points_sorted(sample, train_gen, [sample], k, distance) k = int(k) from mlknn import mlknn_threshold mlknn_callable = lambda train_gen, get_labels_of_record_arg: mlknn_threshold.MlknnThreshold(train_gen, lambda sample, k: get_neighbours(sample, k, train_gen), k, smoothing_param, get_labels_of_record_arg, lambda x:1, printer) label_mappings = (lambda x: x[:2], lambda x: x[:3], lambda x: x) from mltools.ml_hierarchical import MlHierarchical classifier = MlHierarchical(train_generator_list, mlknn_callable, label_mappings, get_labels_of_record) elif classifier_name == 'mlknn-tensembled-tree': def get_neighbours(sample, k, train_gen): return find_closest_points_sorted(sample, train_gen, [sample], k, distance) k = map(int, k.strip().split(',')) PRINTER("loaded k-list: "+str(k)) from mlknn import mlknn_tensembled mlknn_callable = lambda train_gen, get_labels_of_record_arg: mlknn_tensembled.MlknnTEnsembled(train_gen, lambda sample, k: get_neighbours(sample, k, train_gen), k, get_labels_of_record_arg, lambda x:1, printer) label_mappings = (lambda x: x[:2], lambda x: x[:3], lambda x: x) from mltools.ml_hierarchical import MlHierarchical classifier = MlHierarchical(train_generator_list, mlknn_callable, label_mappings, get_labels_of_record) PRINTER("Time taken for training:"+str(start-time())) PRINTER("------------------------") PRINTER("---Testing classifier---") PRINTER("------------------------") classify_oracle = mc2lmc_tomka_blad from mltools.multilabel_evaluate import multilabel_evaluate, multilabel_evaluate_printresults accuracy, precision, recall, hammingloss, subset01loss, fmeasure = multilabel_evaluate(lambda: test_generator, classify_oracle, classifier.__getattribute__('classify'), len(labels), [('full label', lambda x: x), ('half label', lambda x: x[:3]), ('low label', lambda x: x[:2])]) PRINTER("-----------RESULTS-----------") multilabel_evaluate_printresults(accuracy, precision, recall, hammingloss, subset01loss, fmeasure, PRINTER) return accuracy, precision, recall, hammingloss, subset01loss, fmeasure
def jrs_evaluate(results_oracle, results_classifier): ''' Evaluate a multilabel classifier. @type results_oracle: list of lists of strings @param results_classifier: labels assigned to each consecutive object by a an expert @type results_classifier: list of lists of strings @param results_classifier: labels assigned to each consecutive object by a classifier being evaluated ''' precs = [] recals = [] f1s = [] for oracle, pred in izip(results_oracle, results_classifier): try: prec = float(len(set(oracle).intersection(set(pred)))) / len(set(pred)) except: prec = 0.0 try: recall = float(len(set(oracle).intersection(set(pred)))) / len(set(oracle)) except: recall = 0.0 try: f1 = 2.0*prec*recall/(prec+recall) except: f1 = 0.0 precs.append(prec) recals.append(recall) f1s.append(f1) avg_prec = sum(precs) / len(precs) avg_recal = sum(recals) / len(recals) avg_f1 = sum(f1s) / len(f1s) return 0.0, avg_prec, avg_recal, 0.0, 0.0, avg_f1 #print '[TOMKOWE]', avg_prec, avg_recal, avg_f1 labels_len = -1 try: labels_len = len(set(reduce(lambda a, b: a+b, results_oracle+results_classifier))) except: print "[jrs_evaluate]: Blad w liczeniu reduce! results_oracle:", results_oracle, "results_classifier", results_classifier raise Exception("x") #oracle_ans = map(lambda x: set(x), results_oracle) #classif_ans = map(lambda x: set(x), results_classifier) #all_ans = set() #for i in oracle_ans+classif_ans: # all_ans |= i #print 'all_ans', all_ans #print 'len(all_ans)', len(all_ans) #labels_len = len(all_ans) num_of_objects = len(results_oracle) test_generator = lambda: xrange(num_of_objects) classify_oracle = lambda x: results_oracle[x] classify_try = lambda x: results_classifier[x] results = multilabel_evaluate.multilabel_evaluate(test_generator, classify_oracle, classify_try, labels_len, {'full label': lambda x: x}) #wynik = map(lambda x: x['full label'], results) #print '[Michalowe]', wynik[1], wynik[2], wynik[5] return map(lambda x: x['full label'], results)