from common.IOHelper import IOHelper
from common.CommandArgs import CommandArgs
from common.ArgumentTuple import ContrastParams, dbscanParams

from rules_mining.RuleMiner import RuleMiner
from rules_mining.RulesClustering import UnexpectednessExtractor

if __name__ == '__main__':

    config = CommandArgs({
        'output': ('', 'Path of clusters file'),
        'minpts':
        (3, 'Minimum of neighbors making a rule to become a core rule'),
        'eps': (0.1, 'Radius of neighbors'),
        'delta1': (0.0, 'Value of delta 1'),
        'delta2': (-0.9, 'value of delta 2'),
        'minconf': (0.8, 'Minimum confidence for unexpected rules'),
        'subthres': (0.0, 'Threshold for substantial subset'),
        'epsilon': (5e-3, 'Epsilon value')
    })

    if not config.load(sys.argv):
        print('Argument is not correct. Please try again')
        sys.exit(2)

    miner = RuleMiner(None, None)

    print('Loading features of association rules ....')
    X_train, lengths, lhs_feature_count, rhs_feature_count = miner.load_feature_vectors(
    )
Exemple #2
0
        X.append(value)
    return rules, np.array(X)


def preprocessRuleFeatureDict(rule_feature_dict):
    rules, features = separateRulesAndFeatures(rule_feature_dict)
    rule_full_list = [AssociationRule.string_2_rule(x) for x in rules]
    return rule_full_list, features, features[:, 0]


if __name__ == '__main__':
    config = CommandArgs({
        'train': ('', 'Path of training data file'),
        'test': ('', 'Path of testing data file'),
        'class': (0, 'Class index'),
        'minsup': (0.1, 'Minimum support'),
        'nloop': (100, 'Number of loops'),
        'lambda': (0.1, 'Lambda value'),
        'beta': (0.01, 'Beta value')
    })

    if not config.load(sys.argv):
        print('Argument is not correct. Please try again')
        sys.exit(2)

    min_conf = 0.0
    rule_format = 'spect'

    class_index = int(config.get_value('class'))
    train_data = DataSet()
    train_data.load(config.get_value('train'), class_index)
Exemple #3
0
from common.DataSet import DataSet
from rulefit.rulefit import RuleFit
from sklearn.metrics.classification import f1_score
from sklearn.ensemble.gradient_boosting import GradientBoostingClassifier
import time


def evaluateByF1(y_pred, y_true):
    a = f1_score(y_true, y_pred, average='micro')
    b = f1_score(y_true, y_pred, average='macro')
    return (a, b)

if __name__ == '__main__':
    config = CommandArgs({
                          'train'   : ('', 'Path of training data file'),
                          'test'   : ('', 'Path of testing data file'),
                          'class'   : (None, 'Class index'),
                          'n'   : (250, 'Maximum number of rules')
                          })    
    
    if not config.load(sys.argv):
        print ('Argument is not correct. Please try again')
        sys.exit(2)
        
    
    class_index = int(config.get_value('class'))
    nrules = int(config.get_value('n'))

    
    for i in range(5):
        train_data = DataSet()
        train_data.load(config.get_value('train')+'.'+str(i), class_index)
Exemple #4
0
'''
Created on 27 Feb 2018

@author: danhbuithi
'''

import sys

from common.CommandArgs import CommandArgs
from sampling.RandomSplitter import RandomSplitter
from common.IOHelper import IOHelper

if __name__ == '__main__':
    config = CommandArgs({
        'input': ('', 'Path of input data'),
        'test': ('', 'Path of test data.'),
        'train': ('', 'Path of train data'),
        'rate': ('', 'Rating of data')
    })

    if not config.load(sys.argv):
        print('Argument is not correct. Please try again')
        sys.exit(2)

    rate = float(config.get_value('rate'))
    train, test = RandomSplitter.split(config.get_value('input'), rate)
    IOHelper.write_file_in_lines(config.get_value('train'), train)
    IOHelper.write_file_in_lines(config.get_value('test'), test)

    print('Finished!!!')
        fpr, tpr, _ = roc_curve(Ytrue, y_pred.flatten())
        print(auc(fpr, tpr))


def filter_association_rules(unexpected_rules, delta_1=0):
    rules = []
    for x in unexpected_rules:
        if x[2][0][1] > delta_1:
            rules.append(AssociationRule.string_2_rule(x[0]))
    return rules


if __name__ == '__main__':
    config = CommandArgs({
        'train': ('', 'Path of train data.'),
        'test': ('', 'Paht of test data.'),
        'rules': ('', 'Path of unexpected rules.'),
        'class': (0, 'Index of class in data.')
    })

    if not config.load(sys.argv):
        print('Argument is not correct. Please try again')
        sys.exit(2)

    print('Loading data....')
    class_index = int(config.get_value('class'))
    train_data_set = DataSet()
    train_data_set.load(config.get_value('train'),
                        class_index,
                        has_header=False)
    X_train, Y_train = train_data_set.convert_2_binary_format()
Exemple #6
0
'''
Created on 12 Nov 2019

@author: danhbuithi
'''
import sys
from common.CommandArgs import CommandArgs
from common.DataSet import DataSet

if __name__ == '__main__':
    config = CommandArgs({
        'data': ('', 'Path of training data file'),
        'n': (5, 'Number of sub-learning sets'),
        'class': (-1, 'Class index')
    })

    if not config.load(sys.argv):
        print('Argument is not correct. Please try again')
        sys.exit(2)

    nsubsets = int(config.get_value('n'))
    class_index = int(config.get_value('class'))

    all_data = DataSet()
    all_data.load(config.get_value('data'), class_index)

    subsets = all_data.split_random_in_k(nsubsets)

    for i in range(nsubsets):
        test_data, train_data = DataSet.create_datasets_by_crossvalidation(
            subsets, i)
Exemple #7
0
def get_N_HexCol(N=5):

    HSV_tuples = [(x * 1.0 / N, 1, 1) for x in range(N)]
    hex_out = []
    for rgb in HSV_tuples:
        rgb = map(lambda x: int(x * 255), colorsys.hsv_to_rgb(*rgb))
        hex_out.append('#%02x%02x%02x' % tuple(rgb))
    #print(hex_out)
    return hex_out

            
if __name__ == '__main__':
    config = CommandArgs({'feature'     : ('', 'Path of features file'),
                          'cluster'      : ('', 'Path of clusters file'),
                          'output'      : ('', 'Path of output file'),
                          'title'       : ('Dataset', 'Title of charts')
                          })
    
    if not config.load(sys.argv):
        print ('Argument is not correct. Please try again')
        sys.exit(2)
        
    X, association_rules = load_feature_vectors(config.get_value('feature'))
    
    m = 2
    print('dimensional reduce: ' + str(m))
    
    pca = IncrementalPCA(n_components = X.shape[1]//m)
    new_X = pca.fit_transform(X)
    clusters, number_of_clusters = load_clusters(config.get_value('cluster'))
'''
Created on 22 Oct 2018

@author: danhbuithi
'''
import sys
from common.CommandArgs import CommandArgs
from preprocessing.PubMedCorpus import PubMedCorpus

if __name__ == '__main__':
    config = CommandArgs({
        'output': ('', 'Path of output file'),
        'limit': (100000, 'Number of maximum returned abstracts')
    })

    if not config.load(sys.argv):
        print('Argument is not correct. Please try again')
        sys.exit(2)

    corpus = PubMedCorpus(api_key='6a2ee604e5f4598018ce6134ab6fc8851308')
    keywords = 'mycobacterium tuberculosis gene'
    corpus.downloadAbstract(keywords,
                            file_name=config.get_value('output'),
                            max_return=int(config.get_value('limit')))
Created on Feb 6, 2017

@author: BanhDzui
'''

import sys

from common.DataSet import DataSet
from common.CommandArgs import CommandArgs
from rules_mining.RuleMiner import RuleMiner

if __name__ == '__main__':
    config = CommandArgs({
        'input': ('', 'Path of data-set file'),
        'format': ('mydefault', 'Format of input data'),
        'minsup': (0.1, 'Minimum support'),
        'minconf': (0.3, 'Minimum confidence'),
        'maxitems': (-1, 'Maximum number of items in the rules'),
        'class': (-1, 'Class index')
    })

    if not config.load(sys.argv):
        print('Argument is not correct. Please try again')
        sys.exit(2)

    print('Loading data....')
    train_data_set = DataSet()
    class_index = int(config.get_value('class'))
    train_data_set.load(config.get_value('input'), class_index)

    print('Generating rules ....')
    min_sup_src = float(config.get_value('minsup'))
Exemple #10
0
'''
Created on 29 Oct 2018

@author: danhbuithi
'''

import sys
from common.CommandArgs import CommandArgs
from preprocessing.GOCollection import GOCollection
from preprocessing.PubMedCorpus import PubMedCorpus
from preprocessing.GOExtractor import GOExtractor

if __name__ == '__main__':
    config = CommandArgs({
        'input': ('', 'Path of document files'),
        'go': ('', 'Path of GO terms'),
        'output': ('', 'Path of output file')
    })

    if not config.load(sys.argv):
        print('Argument is not correct. Please try again')
        sys.exit(2)
    '''
    collector = GOCollection()
    collector.extractFromObo(config.get_value('input'))
    collector.saveAsXML(config.get_value('output'))
    
    '''
    corpus = PubMedCorpus(None)
    documents = corpus.load(config.get_value('input'))
Exemple #11
0
def preprocessRuleFeatureDict(rule_feature_dict):
    '''
    Normalize feature using min-max scaler
    '''
    rules, features = separateRulesAndFeatures(rule_feature_dict)
    rule_full_list = [AssociationRule.string_2_rule(x) for x in rules]
    
   
    return rule_full_list, features, features[:, 0]
    
if __name__ == '__main__':
    config = CommandArgs({
                          'train'   : ('', 'Path of training data file'),
                          'test'   : ('', 'Path of training data file'),
                          'class'   : (None, 'Class index'),
                          'minsup'  : (0.1, 'Minimum support'),
                          'minconf' : (0.0, 'Minimum confidence'),
                          'format'  : ('spect', 'valid format of rules/item-sets'),
                          'n'       : (5, 'Number of sub-learning sets'),
                          'nloop'   : (100, 'Number of loops')
                          })    
    
    if not config.load(sys.argv):
        print ('Argument is not correct. Please try again')
        sys.exit(2)
        
    class_index = int(config.get_value('class'))
    min_sup = float(config.get_value('minsup'))
    min_conf = float(config.get_value('minconf'))
    rule_format = config.get_value('format')
    
    nsubsets = int(config.get_value('n'))
Exemple #12
0
from common.CommandArgs import CommandArgs
from common.DataSet import DataSet

from rules_mining.RuleMiner import RuleMiner
from rule_based_classifiers.NetMMAC import NetMMAC
from rule_based_classifiers.MMAC import MMAC

from TestRuleBasedMethods import preprocessRuleFeatureDict

if __name__ == '__main__':
    config = CommandArgs({
        'train': ('', 'Path of training data file'),
        'test': ('', 'Path of testing data file'),
        'class': (0, 'Class index'),
        'minsup': (0.1, 'Minimum support'),
        'minconf': (0.0, 'Minimum confidence'),
        'format': ('spect', 'valid format of rules/item-sets'),
        'out': ('', 'Path of output file'),
        'nloop': (10000, 'Number of loops'),
        'label': ('', 'Positive label'),
        'option': ('net', 'Name of algorithm')
    })

    if not config.load(sys.argv):
        print('Argument is not correct. Please try again')
        sys.exit(2)

    class_index = int(config.get_value('class'))
    nloop = int(config.get_value('nloop'))

    min_sup = float(config.get_value('minsup'))
    min_conf = float(config.get_value('minconf'))
Exemple #13
0
def preprocessRuleFeatureDict(rule_feature_dict):
    '''
    Normalize feature using min-max scaler
    '''
    rules, features = separateRulesAndFeatures(rule_feature_dict)
    rule_full_list = [AssociationRule.string_2_rule(x) for x in rules]

    return rule_full_list, features, features[:, 0]


if __name__ == '__main__':
    config = CommandArgs({
        'train': ('', 'Path of training data file'),
        'test': ('', 'Path of testing data file'),
        'class': (None, 'Class index'),
        'minsup': (0.1, 'Minimum support'),
        'minconf': (0.0, 'Minimum confidence'),
        'format': ('spect', 'valid format of rules/item-sets'),
        'sol': ('', 'Path of output file'),
        'option': ('net', 'Selected algorithm')
    })

    if not config.load(sys.argv):
        print('Argument is not correct. Please try again')
        sys.exit(2)

    class_index = int(config.get_value('class'))
    train_data = DataSet()
    train_data.load(config.get_value('train'), class_index)

    test_data = DataSet()
    test_data.load(config.get_value('test'), class_index)
Exemple #14
0
@author: danhbuithi
'''
import sys
from common.CommandArgs import CommandArgs
from common.DataSet import DataSet
from corels.corels import CorelsClassifier
from TestRuleBasedMethods import evaluateByF1
    
             
if __name__ == '__main__':
    config = CommandArgs({
                          'train'   : ('', 'Path of training data file'),
                          'class'   : (None, 'Class index'),
                          'test'  : ('', 'Path of testing data file'),
                          'nloop'   : (10000, 'Number of loops'),
                          'c': (0.01, 'Length penalty'),
                          'n':  (5, 'Number of subsets'),
                          'minsup'  : (0.1, 'Minimum support'),
                          'card'    : (2, 'Maximum card')
                          })    
    
    if not config.load(sys.argv):
        print ('Argument is not correct. Please try again')
        sys.exit(2)
    
    class_index = int(config.get_value('class')) 
    nsubsets = int(config.get_value('n'))   
    min_sup = float(config.get_value('minsup'))
    max_card = int(config.get_value('card'))
    
    C = float(config.get_value('c'))