'test' : ('', 'Path of testing data file'), 'class' : (None, 'Class index'), 'n' : (250, 'Maximum number of rules') }) if not config.load(sys.argv): print ('Argument is not correct. Please try again') sys.exit(2) class_index = int(config.get_value('class')) nrules = int(config.get_value('n')) for i in range(5): train_data = DataSet() train_data.load(config.get_value('train')+'.'+str(i), class_index) test_data = DataSet() test_data.load(config.get_value('test')+'.'+str(i), class_index) print(train_data.size()) ''' Convert data into binary ''' rel_train_X = train_data.get_X_in_binary() rel_train_Y = train_data.get_Y_in_numeric() train_X = rel_train_X.relation_matrix train_Y = rel_train_Y.values train_Y[train_Y < 1] = -1 train_Y[train_Y >= 1] = 1
if __name__ == '__main__': config = CommandArgs({ 'train': ('', 'Path of train data.'), 'test': ('', 'Paht of test data.'), 'rules': ('', 'Path of unexpected rules.'), 'class': (0, 'Index of class in data.') }) if not config.load(sys.argv): print('Argument is not correct. Please try again') sys.exit(2) print('Loading data....') class_index = int(config.get_value('class')) train_data_set = DataSet() train_data_set.load(config.get_value('train'), class_index, has_header=False) X_train, Y_train = train_data_set.convert_2_binary_format() test_data_set = DataSet() test_data_set.load(config.get_value('test'), class_index, has_header=False) Xtest, Ytest = test_data_set.convert_2_binary_format_with( X_train.item_dict, Y_train.item_dict) Ytest = Ytest.flatten() class_count = train_data_set.number_of_classes() unexpected_rules = IOHelper.load_json_object(config.get_value('rules')) refined_unexpected_rules = filter_association_rules(unexpected_rules)
'class': (0, 'Class index'), 'minsup': (0.1, 'Minimum support'), 'nloop': (100, 'Number of loops'), 'lambda': (0.1, 'Lambda value'), 'beta': (0.01, 'Beta value') }) if not config.load(sys.argv): print('Argument is not correct. Please try again') sys.exit(2) min_conf = 0.0 rule_format = 'spect' class_index = int(config.get_value('class')) train_data = DataSet() train_data.load(config.get_value('train'), class_index) test_data = DataSet() test_data.load(config.get_value('test'), class_index) min_sup = float(config.get_value('minsup')) nloop = int(config.get_value('nloop')) labels = sorted(train_data.count_classes().keys()) my_lambda = float(config.get_value('lambda')) my_beta = float(config.get_value('beta')) ''' Generate association rules ''' rule_miner = RuleMiner(rule_format, train_data.create_dataset_without_class())
__author__ = 'punki' from common.DataSet import DataSet from common.LinearRegresion import LinearRegresion import time, random, numpy as np from sklearn import svm training_data_set = DataSet('features.train.txt') test_data_set = DataSet('features.test.txt') classes = np.unique(training_data_set.get_y()) fake_transofrmation = (lambda x1, x2: (1, x1, x2)) ex8_transofrmation = (lambda x1, x2: (1, x1, x2, x1 * x2, x1**2, x2**2)) def experiment1(training, test, transformation, reg_lambda, exp_id): lr = LinearRegresion(reg_lambda, transformation) lr.fit(training) e_in = lr.error(training) e_out = lr.error(test) all_e_in.append((exp_id, e_in)) all_e_out.append((exp_id, e_out)) print('lambda={} exp_id={} e_in={} e_out={}'.format( reg_lambda, exp_id, e_in, e_out)) print 'trans non' all_e_in = [] all_e_out = [] for one in range(0, 10): experiment1(training_data_set.one_versus_all(one),
@author: danhbuithi ''' import sys from common.CommandArgs import CommandArgs from common.DataSet import DataSet if __name__ == '__main__': config = CommandArgs({ 'data': ('', 'Path of training data file'), 'n': (5, 'Number of sub-learning sets'), 'class': (-1, 'Class index') }) if not config.load(sys.argv): print('Argument is not correct. Please try again') sys.exit(2) nsubsets = int(config.get_value('n')) class_index = int(config.get_value('class')) all_data = DataSet() all_data.load(config.get_value('data'), class_index) subsets = all_data.split_random_in_k(nsubsets) for i in range(nsubsets): test_data, train_data = DataSet.create_datasets_by_crossvalidation( subsets, i) test_data.save(config.get_value('data') + '.test' + '.' + str(i)) train_data.save(config.get_value('data') + '.train' + '.' + str(i))
from rules_mining.RuleMiner import RuleMiner if __name__ == '__main__': config = CommandArgs({ 'input': ('', 'Path of data-set file'), 'format': ('mydefault', 'Format of input data'), 'minsup': (0.1, 'Minimum support'), 'minconf': (0.3, 'Minimum confidence'), 'maxitems': (-1, 'Maximum number of items in the rules'), 'class': (-1, 'Class index') }) if not config.load(sys.argv): print('Argument is not correct. Please try again') sys.exit(2) print('Loading data....') train_data_set = DataSet() class_index = int(config.get_value('class')) train_data_set.load(config.get_value('input'), class_index) print('Generating rules ....') min_sup_src = float(config.get_value('minsup')) min_conf = float(config.get_value('minconf')) itemset_max_size = int(config.get_value('maxitems')) miner = RuleMiner(config.get_value('format'), train_data_set) miner.generate_itemsets_and_rules(min_sup_src, min_conf, itemset_max_size) print('Finished!!!')
__author__ = 'punki' from common.DataSet import DataSet import time, random, numpy as np from sklearn import svm def compute_error(clf, x, target): predict = clf.predict(x) return len(target[target != predict]) / float(len(target)) training_data_set = DataSet('features.train.txt') test_data_set = DataSet('features.test.txt') classes = np.unique(training_data_set.get_y()) e_in_for_n = [] e_out_for_n = [] for one in {1, 5}: e_in_all = [] e_out_all = [] for C in {0.0001, 0.001, 0.01, 0.1, 1}: q = 5 clf = svm.SVC(kernel='poly', C=C, degree=q) training_one_versus = training_data_set.one_versus_all(one) test_one_versus = test_data_set.one_versus_all(one) clf.fit(training_one_versus.get_x(), training_one_versus.get_y()) e_in = compute_error(clf, training_one_versus.get_x(), training_one_versus.get_y()) e_out = compute_error(clf, test_one_versus.get_x(),
__author__ = 'punki' from common.DataSet import DataSet import time, random, numpy as np from sklearn import svm def compute_error(clf, x, target): predict = clf.predict(x) return len(target[target != predict]) / float(len(target)) training_data_set = DataSet('features.train.txt') test_data_set = DataSet('features.test.txt') classes = np.unique(training_data_set.get_y()) e_in_for_n = [] e_out_for_n = [] for one in {1, 5}: e_in_all = [] e_out_all = [] for C in {0.0001,0.001, 0.01, 0.1, 1}: q=5 clf = svm.SVC(kernel='poly', C=C, degree=q) training_one_versus = training_data_set.one_versus_all(one) test_one_versus = test_data_set.one_versus_all(one) clf.fit(training_one_versus.get_x(), training_one_versus.get_y()) e_in = compute_error(clf, training_one_versus.get_x(), training_one_versus.get_y()) e_out = compute_error(clf, test_one_versus.get_x(), test_one_versus.get_y()) e_in_all.append(e_in)
__author__ = 'punki' from common.DataSet import DataSet from common.LinearRegresion import LinearRegresion import time, random, numpy as np from sklearn import svm training_data_set = DataSet('features.train.txt') test_data_set = DataSet('features.test.txt') classes = np.unique(training_data_set.get_y()) fake_transofrmation = (lambda x1, x2: (1, x1, x2)) ex8_transofrmation = (lambda x1, x2: (1, x1, x2, x1 * x2, x1 ** 2, x2 ** 2)) def experiment1(training, test, transformation, reg_lambda, exp_id): lr = LinearRegresion(reg_lambda, transformation) lr.fit(training) e_in = lr.error(training) e_out = lr.error(test) all_e_in.append((exp_id, e_in)) all_e_out.append((exp_id, e_out)) print('lambda={} exp_id={} e_in={} e_out={}'.format(reg_lambda, exp_id, e_in, e_out)) print 'trans non' all_e_in = [] all_e_out = [] for one in range(0, 10): experiment1(training_data_set.one_versus_all(one), test_data_set.one_versus_all(one), fake_transofrmation, 1, one)
if not config.load(sys.argv): print ('Argument is not correct. Please try again') sys.exit(2) class_index = int(config.get_value('class')) min_sup = float(config.get_value('minsup')) min_conf = float(config.get_value('minconf')) rule_format = config.get_value('format') nsubsets = int(config.get_value('n')) nloop = int(config.get_value('nloop')) for i in range(nsubsets): print('Test for case ...' + str(i)) train_data = DataSet() train_data.load(config.get_value('train')+'.'+str(i), class_index) print('#transactions', train_data.size()) test_data = DataSet() test_data.load(config.get_value('test')+'.'+str(i), class_index) labels = sorted(train_data.count_classes().keys()) ''' Convert data into binary ''' rel_train_X = train_data.get_X_in_binary() rel_train_Y = train_data.get_Y_in_numeric() test_X = test_data.get_X_in_binary_with(rel_train_X.item_dict) test_Y = test_data.get_Y_in_numeric_with(rel_train_Y.item_dict)