コード例 #1
0
ファイル: TestRuleFit.py プロジェクト: banhdzui/MoMAC-v1
                          'test'   : ('', 'Path of testing data file'),
                          'class'   : (None, 'Class index'),
                          'n'   : (250, 'Maximum number of rules')
                          })    
    
    if not config.load(sys.argv):
        print ('Argument is not correct. Please try again')
        sys.exit(2)
        
    
    class_index = int(config.get_value('class'))
    nrules = int(config.get_value('n'))

    
    for i in range(5):
        train_data = DataSet()
        train_data.load(config.get_value('train')+'.'+str(i), class_index)
        
        test_data = DataSet()
        test_data.load(config.get_value('test')+'.'+str(i), class_index)
        print(train_data.size())
        
        '''
        Convert data into binary
        '''
        rel_train_X = train_data.get_X_in_binary()
        rel_train_Y = train_data.get_Y_in_numeric()
        train_X = rel_train_X.relation_matrix
        train_Y = rel_train_Y.values
        train_Y[train_Y < 1] = -1
        train_Y[train_Y >= 1] = 1
コード例 #2
0
if __name__ == '__main__':
    config = CommandArgs({
        'train': ('', 'Path of train data.'),
        'test': ('', 'Paht of test data.'),
        'rules': ('', 'Path of unexpected rules.'),
        'class': (0, 'Index of class in data.')
    })

    if not config.load(sys.argv):
        print('Argument is not correct. Please try again')
        sys.exit(2)

    print('Loading data....')
    class_index = int(config.get_value('class'))
    train_data_set = DataSet()
    train_data_set.load(config.get_value('train'),
                        class_index,
                        has_header=False)
    X_train, Y_train = train_data_set.convert_2_binary_format()

    test_data_set = DataSet()
    test_data_set.load(config.get_value('test'), class_index, has_header=False)
    Xtest, Ytest = test_data_set.convert_2_binary_format_with(
        X_train.item_dict, Y_train.item_dict)
    Ytest = Ytest.flatten()

    class_count = train_data_set.number_of_classes()

    unexpected_rules = IOHelper.load_json_object(config.get_value('rules'))
    refined_unexpected_rules = filter_association_rules(unexpected_rules)
コード例 #3
0
        'class': (0, 'Class index'),
        'minsup': (0.1, 'Minimum support'),
        'nloop': (100, 'Number of loops'),
        'lambda': (0.1, 'Lambda value'),
        'beta': (0.01, 'Beta value')
    })

    if not config.load(sys.argv):
        print('Argument is not correct. Please try again')
        sys.exit(2)

    min_conf = 0.0
    rule_format = 'spect'

    class_index = int(config.get_value('class'))
    train_data = DataSet()
    train_data.load(config.get_value('train'), class_index)

    test_data = DataSet()
    test_data.load(config.get_value('test'), class_index)
    min_sup = float(config.get_value('minsup'))

    nloop = int(config.get_value('nloop'))
    labels = sorted(train_data.count_classes().keys())
    my_lambda = float(config.get_value('lambda'))
    my_beta = float(config.get_value('beta'))
    '''
    Generate association rules
    '''
    rule_miner = RuleMiner(rule_format,
                           train_data.create_dataset_without_class())
コード例 #4
0
__author__ = 'punki'

from common.DataSet import DataSet
from common.LinearRegresion import LinearRegresion
import time, random, numpy as np
from sklearn import svm

training_data_set = DataSet('features.train.txt')
test_data_set = DataSet('features.test.txt')
classes = np.unique(training_data_set.get_y())

fake_transofrmation = (lambda x1, x2: (1, x1, x2))
ex8_transofrmation = (lambda x1, x2: (1, x1, x2, x1 * x2, x1**2, x2**2))


def experiment1(training, test, transformation, reg_lambda, exp_id):
    lr = LinearRegresion(reg_lambda, transformation)
    lr.fit(training)
    e_in = lr.error(training)
    e_out = lr.error(test)
    all_e_in.append((exp_id, e_in))
    all_e_out.append((exp_id, e_out))
    print('lambda={} exp_id={} e_in={} e_out={}'.format(
        reg_lambda, exp_id, e_in, e_out))


print 'trans non'
all_e_in = []
all_e_out = []
for one in range(0, 10):
    experiment1(training_data_set.one_versus_all(one),
コード例 #5
0
@author: danhbuithi
'''
import sys
from common.CommandArgs import CommandArgs
from common.DataSet import DataSet

if __name__ == '__main__':
    config = CommandArgs({
        'data': ('', 'Path of training data file'),
        'n': (5, 'Number of sub-learning sets'),
        'class': (-1, 'Class index')
    })

    if not config.load(sys.argv):
        print('Argument is not correct. Please try again')
        sys.exit(2)

    nsubsets = int(config.get_value('n'))
    class_index = int(config.get_value('class'))

    all_data = DataSet()
    all_data.load(config.get_value('data'), class_index)

    subsets = all_data.split_random_in_k(nsubsets)

    for i in range(nsubsets):
        test_data, train_data = DataSet.create_datasets_by_crossvalidation(
            subsets, i)

        test_data.save(config.get_value('data') + '.test' + '.' + str(i))
        train_data.save(config.get_value('data') + '.train' + '.' + str(i))
コード例 #6
0
from rules_mining.RuleMiner import RuleMiner

if __name__ == '__main__':
    config = CommandArgs({
        'input': ('', 'Path of data-set file'),
        'format': ('mydefault', 'Format of input data'),
        'minsup': (0.1, 'Minimum support'),
        'minconf': (0.3, 'Minimum confidence'),
        'maxitems': (-1, 'Maximum number of items in the rules'),
        'class': (-1, 'Class index')
    })

    if not config.load(sys.argv):
        print('Argument is not correct. Please try again')
        sys.exit(2)

    print('Loading data....')
    train_data_set = DataSet()
    class_index = int(config.get_value('class'))
    train_data_set.load(config.get_value('input'), class_index)

    print('Generating rules ....')
    min_sup_src = float(config.get_value('minsup'))
    min_conf = float(config.get_value('minconf'))
    itemset_max_size = int(config.get_value('maxitems'))

    miner = RuleMiner(config.get_value('format'), train_data_set)
    miner.generate_itemsets_and_rules(min_sup_src, min_conf, itemset_max_size)

    print('Finished!!!')
コード例 #7
0
__author__ = 'punki'
from common.DataSet import DataSet
import time, random, numpy as np
from sklearn import svm


def compute_error(clf, x, target):
    predict = clf.predict(x)
    return len(target[target != predict]) / float(len(target))


training_data_set = DataSet('features.train.txt')
test_data_set = DataSet('features.test.txt')
classes = np.unique(training_data_set.get_y())

e_in_for_n = []
e_out_for_n = []

for one in {1, 5}:
    e_in_all = []
    e_out_all = []
    for C in {0.0001, 0.001, 0.01, 0.1, 1}:
        q = 5
        clf = svm.SVC(kernel='poly', C=C, degree=q)
        training_one_versus = training_data_set.one_versus_all(one)
        test_one_versus = test_data_set.one_versus_all(one)
        clf.fit(training_one_versus.get_x(), training_one_versus.get_y())

        e_in = compute_error(clf, training_one_versus.get_x(),
                             training_one_versus.get_y())
        e_out = compute_error(clf, test_one_versus.get_x(),
コード例 #8
0
ファイル: SvmSoft.py プロジェクト: punki/LinearRegresion
__author__ = 'punki'
from common.DataSet import DataSet
import time, random, numpy as np
from sklearn import svm


def compute_error(clf, x, target):
    predict = clf.predict(x)
    return len(target[target != predict]) / float(len(target))


training_data_set = DataSet('features.train.txt')
test_data_set = DataSet('features.test.txt')
classes = np.unique(training_data_set.get_y())

e_in_for_n = []
e_out_for_n = []

for one in {1, 5}:
    e_in_all = []
    e_out_all = []
    for C in {0.0001,0.001, 0.01, 0.1, 1}:
        q=5
        clf = svm.SVC(kernel='poly', C=C, degree=q)
        training_one_versus = training_data_set.one_versus_all(one)
        test_one_versus = test_data_set.one_versus_all(one)
        clf.fit(training_one_versus.get_x(), training_one_versus.get_y())

        e_in = compute_error(clf, training_one_versus.get_x(), training_one_versus.get_y())
        e_out = compute_error(clf, test_one_versus.get_x(), test_one_versus.get_y())
        e_in_all.append(e_in)
コード例 #9
0
__author__ = 'punki'

from common.DataSet import DataSet
from common.LinearRegresion import LinearRegresion
import time, random, numpy as np
from sklearn import svm

training_data_set = DataSet('features.train.txt')
test_data_set = DataSet('features.test.txt')
classes = np.unique(training_data_set.get_y())

fake_transofrmation = (lambda x1, x2: (1, x1, x2))
ex8_transofrmation = (lambda x1, x2: (1, x1, x2, x1 * x2, x1 ** 2, x2 ** 2))


def experiment1(training, test, transformation, reg_lambda, exp_id):
    lr = LinearRegresion(reg_lambda, transformation)
    lr.fit(training)
    e_in = lr.error(training)
    e_out = lr.error(test)
    all_e_in.append((exp_id, e_in))
    all_e_out.append((exp_id, e_out))
    print('lambda={} exp_id={} e_in={} e_out={}'.format(reg_lambda, exp_id, e_in, e_out))


print 'trans non'
all_e_in = []
all_e_out = []
for one in range(0, 10):
    experiment1(training_data_set.one_versus_all(one), test_data_set.one_versus_all(one), fake_transofrmation, 1, one)
コード例 #10
0
 
 if not config.load(sys.argv):
     print ('Argument is not correct. Please try again')
     sys.exit(2)
     
 class_index = int(config.get_value('class'))
 min_sup = float(config.get_value('minsup'))
 min_conf = float(config.get_value('minconf'))
 rule_format = config.get_value('format')
 
 nsubsets = int(config.get_value('n'))
 nloop = int(config.get_value('nloop'))    
 
 for i in range(nsubsets):
     print('Test for case ...' + str(i))
     train_data = DataSet()
     train_data.load(config.get_value('train')+'.'+str(i), class_index)
     print('#transactions', train_data.size())
     test_data = DataSet()
     test_data.load(config.get_value('test')+'.'+str(i), class_index)
 
     labels = sorted(train_data.count_classes().keys())
     
     '''
     Convert data into binary
     '''
     rel_train_X = train_data.get_X_in_binary()
     rel_train_Y = train_data.get_Y_in_numeric()
     
     test_X = test_data.get_X_in_binary_with(rel_train_X.item_dict)
     test_Y = test_data.get_Y_in_numeric_with(rel_train_Y.item_dict)