from tools.pickle_tools import read_pickle train_generator_list = read_pickle(load_train_generator_path) PRINTER('Loading labels path and elements count...') lenlabels = len(read_pickle(load_labels_path)) elements_count = read_pickle(load_elements_count_path) PRINTER("training distance...") train_generator = lambda: train_generator_list if distancetype=='jac': from mlknn.jaccard_distance import JaccardDistance zbldistance = JaccardDistance(train_generator, elements_count-int(elements_count/10), distancetrainingsteps) else: from mlknn.txt_cosine_distance import TxtCosineDistance zbldistance = TxtCosineDistance(distancetype) PRINTER("Finding label list...") get_labels_of_record = mc2lmc_tomka_blad find_all_labels = lambda frecords: get_labels_min_occurence(lambda: gen_lmc(frecords), 1) PRINTER("Training MLKNN...") from time import time start = time() mlknn_single = MlKnnFractional(train_generator, zbldistance, find_closest_points, k, get_labels_of_record) PRINTER("Time taken for training:"+str(start-time())) from tools.pickle_tools import save_pickle PRINTER("MLKNN: pickling the classifier...") save_pickle(mlknn_single, save_classifier_path)
#train mlknn: print "training distance..." zbldistance = jaccard_distance.JaccardDistance(train_generator, elements_count-int(elements_count/10), distancetrainingsteps) print "training hierarchical mlknn..." mlknn_callable = lambda train_gen: mlknn.MlKnn(train_gen, zbldistance, find_closest_points.find_closest_points, k, smoothingparam) label_mappings = (lambda x: x[:2], lambda x: x[:3], lambda x: x) record_mappings = (lambda x: gen_1record_prefixed(x, 2), lambda x: gen_1record_prefixed(x, 3), lambda x: x) hierarhical_mlknn = ml_hierarchical.MlHierarchical(train_generator, mlknn_callable, label_mappings, record_mappings) from tools.pickle_tools import save_pickle save_pickle(hierarhical_mlknn.mltree.content, save_hierarchical_path+"mlknn") save_pickle(hierarhical_mlknn, save_hierarchical_path) save_pickle(list(train_generator()), save_train_generator_path) save_pickle(len(labels), save_lenlabels_path) classify_oracle = lambda x: mc2lmc_tomka_blad(x) print "----------------------------------------------------" print "MLKNN:" print "PRINTING TEST SAMPLES:" for i in test_generator(): print classify_oracle(i) multilabel_evaluate_printresults(test_generator, classify_oracle, hierarhical_mlknn.classify, len(labels), {'full label': lambda x: x, 'half label': lambda x: x[:3], 'low label': lambda x: x[:2]})
PRINTER("training distance...") train_generator = lambda: train_generator_list if distancetype == 'jac': from mlknn.jaccard_distance import JaccardDistance zbldistance = JaccardDistance( train_generator, elements_count - int(elements_count / 10), distancetrainingsteps) else: from mlknn.txt_cosine_distance import TxtCosineDistance zbldistance = TxtCosineDistance(distancetype) PRINTER("Finding label list...") get_labels_of_record = mc2lmc_tomka_blad find_all_labels = lambda frecords: get_labels_min_occurence( lambda: gen_lmc(frecords), 1) PRINTER("Training MLKNN...") from time import time start = time() mlknn_single = MlKnnFractionalEnsembledStrongest(train_generator, zbldistance, find_closest_points, k_list, get_labels_of_record) PRINTER("Time taken for training:" + str(start - time())) from tools.pickle_tools import save_pickle PRINTER("MLKNN: pickling the classifier...") save_pickle(mlknn_single, save_classifier_path)
elements_count = read_pickle(load_elements_count_path) train_generator = lambda: train_generator_list #train mlknn: PRINTER("Training Distance...") zbldistance = JaccardDistance(train_generator, elements_count - int(elements_count / 10), distancetrainingsteps) get_labels_of_record = mc2lmc_tomka_blad find_all_labels = lambda frecords: get_labels_min_occurence( lambda: gen_lmc(frecords), 1) mlknn_callable = lambda train_gen: MlKnn( train_gen, zbldistance, find_closest_points, k, smoothingparam, find_all_labels, get_labels_of_record) label_mappings = (lambda x: x[:2], lambda x: x[:3], lambda x: x) record_mappings = (lambda x: gen_1record_prefixed(x, 2), lambda x: gen_1record_prefixed(x, 3), lambda x: x) PRINTER("Training hierarchical mlknn...") from time import time start = time() hierarhical_mlknn = MlHierarchical(train_generator, mlknn_callable, label_mappings, record_mappings) PRINTER("time taken for training:" + str(start - time())) from tools.pickle_tools import save_pickle save_pickle(hierarhical_mlknn, save_classifier_path)
if len(sys.argv) < 5: PRINTER("Not enough of argument!") exit(1) load_train_generator_path = sys.argv[1] load_labels_path = sys.argv[2] load_elements_count_path = sys.argv[3] save_classifier_path = sys.argv[4] PRINTER("Input arguments:") PRINTER("load_train_generator_path: " + str(load_train_generator_path)) PRINTER("load_labels_path: " + str(load_labels_path)) PRINTER("load_elements_count_path: " + str(load_elements_count_path)) PRINTER("save_classifier_path: " + str(save_classifier_path)) from tools.pickle_tools import read_pickle train_generator_list = read_pickle(load_train_generator_path) lenlabels = len(read_pickle(load_labels_path)) elements_count = read_pickle(load_elements_count_path) train_generator = lambda: train_generator_list get_labels_of_record = mc2lmc_tomka_blad classify_oracle = lambda x: mc2lmc_tomka_blad(x) random_classif = WeightedRandomLabelClassifier(train_generator, get_labels_of_record, classify_oracle) from tools.pickle_tools import save_pickle save_pickle(random_classif, save_classifier_path)
if __name__ == '__main__': if len(sys.argv) < 5: PRINTER("Not enough of argument!") exit(1) load_train_generator_path = sys.argv[1] load_labels_path = sys.argv[2] load_elements_count_path = sys.argv[3] save_classifier_path = sys.argv[4] PRINTER("Input arguments:") PRINTER("load_train_generator_path: "+str(load_train_generator_path)) PRINTER("load_labels_path: "+str(load_labels_path)) PRINTER("load_elements_count_path: "+str(load_elements_count_path)) PRINTER("save_classifier_path: "+str(save_classifier_path)) from tools.pickle_tools import read_pickle train_generator_list = read_pickle(load_train_generator_path) lenlabels = len(read_pickle(load_labels_path)) elements_count = read_pickle(load_elements_count_path) train_generator = lambda: train_generator_list get_labels_of_record = mc2lmc_tomka_blad find_all_labels = lambda frecords: get_labels_min_occurence(lambda: gen_lmc(frecords), 1) classify_oracle = lambda x: mc2lmc_tomka_blad(x) random_classif = RandomLabelClassifier(train_generator, get_labels_of_record, find_all_labels, classify_oracle) from tools.pickle_tools import save_pickle save_pickle(random_classif, save_classifier_path)
PRINTER("-------------------------------------------") PRINTER("Loading the input data.") from tools.pickle_tools import read_pickle train_generator_list = read_pickle(load_train_generator_path) lenlabels = len(read_pickle(load_labels_path)) elements_count = read_pickle(load_elements_count_path) train_generator = lambda: train_generator_list #train mlknn: PRINTER("Training Distance...") zbldistance = JaccardDistance(train_generator, elements_count-int(elements_count/10), distancetrainingsteps) get_labels_of_record = mc2lmc_tomka_blad find_all_labels = lambda frecords: get_labels_min_occurence(lambda: gen_lmc(frecords), 1) mlknn_callable = lambda train_gen: MlKnn(train_gen, zbldistance, find_closest_points, k, smoothingparam, find_all_labels, get_labels_of_record) label_mappings = (lambda x: x[:2], lambda x: x[:3], lambda x: x) record_mappings = (lambda x: gen_1record_prefixed(x, 2), lambda x: gen_1record_prefixed(x, 3), lambda x: x) PRINTER("Training hierarchical mlknn...") from time import time start = time() hierarhical_mlknn = MlHierarchical(train_generator, mlknn_callable, label_mappings, record_mappings) PRINTER("time taken for training:"+str(start-time())) from tools.pickle_tools import save_pickle save_pickle(hierarhical_mlknn, save_classifier_path)
print "training hierarchical mlknn..." mlknn_callable = lambda train_gen: mlknn.MlKnn( train_gen, zbldistance, find_closest_points.find_closest_points, k, smoothingparam) label_mappings = (lambda x: x[:2], lambda x: x[:3], lambda x: x) record_mappings = (lambda x: gen_1record_prefixed(x, 2), lambda x: gen_1record_prefixed(x, 3), lambda x: x) hierarhical_mlknn = ml_hierarchical.MlHierarchical(train_generator, mlknn_callable, label_mappings, record_mappings) from tools.pickle_tools import save_pickle save_pickle(hierarhical_mlknn.mltree.content, save_hierarchical_path + "mlknn") save_pickle(hierarhical_mlknn, save_hierarchical_path) save_pickle(list(train_generator()), save_train_generator_path) save_pickle(len(labels), save_lenlabels_path) classify_oracle = lambda x: mc2lmc_tomka_blad(x) print "----------------------------------------------------" print "MLKNN:" print "PRINTING TEST SAMPLES:" for i in test_generator(): print classify_oracle(i) multilabel_evaluate_printresults( test_generator, classify_oracle, hierarhical_mlknn.classify, len(labels), {
try: save_elements_count_path = sys.argv[7] except: print '7th argument: path where elements count is to be stored.' sys.exit(1) try: filtered_by = sys.argv[8:] except: print '8th argument: field names which have to occur for the record to be considered.' sys.exit(1) """ PRINTERMAIN("Input arguments:") PRINTERMAIN("fname: "+fname) PRINTERMAIN("codeprefixlen: "+str(codeprefixlen)) PRINTERMAIN("mincodeoccurences: "+str(mincodeoccurences)) PRINTERMAIN("save_train_generator_path: "+save_train_generator_path) PRINTERMAIN("save_test_generator_path: "+save_test_generator_path) PRINTERMAIN("save_labels_path: "+save_labels_path) PRINTERMAIN("save_elements_count_path: "+save_elements_count_path) PRINTERMAIN("filtered_by: "+str(filtered_by)) """ train_generator, test_generator, elements_count, labels, elements_count = split_train_test_highest(fname, codeprefixlen, mincodeoccurences, filtered_by) from tools.pickle_tools import save_pickle save_pickle(list(train_generator()), save_train_generator_path) save_pickle(list(test_generator()), save_test_generator_path) save_pickle(labels, save_labels_path) save_pickle(elements_count, save_elements_count_path)