from common.IOHelper import IOHelper from common.CommandArgs import CommandArgs from common.ArgumentTuple import ContrastParams, dbscanParams from rules_mining.RuleMiner import RuleMiner from rules_mining.RulesClustering import UnexpectednessExtractor if __name__ == '__main__': config = CommandArgs({ 'output': ('', 'Path of clusters file'), 'minpts': (3, 'Minimum of neighbors making a rule to become a core rule'), 'eps': (0.1, 'Radius of neighbors'), 'delta1': (0.0, 'Value of delta 1'), 'delta2': (-0.9, 'value of delta 2'), 'minconf': (0.8, 'Minimum confidence for unexpected rules'), 'subthres': (0.0, 'Threshold for substantial subset'), 'epsilon': (5e-3, 'Epsilon value') }) if not config.load(sys.argv): print('Argument is not correct. Please try again') sys.exit(2) miner = RuleMiner(None, None) print('Loading features of association rules ....') X_train, lengths, lhs_feature_count, rhs_feature_count = miner.load_feature_vectors( )
X.append(value) return rules, np.array(X) def preprocessRuleFeatureDict(rule_feature_dict): rules, features = separateRulesAndFeatures(rule_feature_dict) rule_full_list = [AssociationRule.string_2_rule(x) for x in rules] return rule_full_list, features, features[:, 0] if __name__ == '__main__': config = CommandArgs({ 'train': ('', 'Path of training data file'), 'test': ('', 'Path of testing data file'), 'class': (0, 'Class index'), 'minsup': (0.1, 'Minimum support'), 'nloop': (100, 'Number of loops'), 'lambda': (0.1, 'Lambda value'), 'beta': (0.01, 'Beta value') }) if not config.load(sys.argv): print('Argument is not correct. Please try again') sys.exit(2) min_conf = 0.0 rule_format = 'spect' class_index = int(config.get_value('class')) train_data = DataSet() train_data.load(config.get_value('train'), class_index)
from common.DataSet import DataSet from rulefit.rulefit import RuleFit from sklearn.metrics.classification import f1_score from sklearn.ensemble.gradient_boosting import GradientBoostingClassifier import time def evaluateByF1(y_pred, y_true): a = f1_score(y_true, y_pred, average='micro') b = f1_score(y_true, y_pred, average='macro') return (a, b) if __name__ == '__main__': config = CommandArgs({ 'train' : ('', 'Path of training data file'), 'test' : ('', 'Path of testing data file'), 'class' : (None, 'Class index'), 'n' : (250, 'Maximum number of rules') }) if not config.load(sys.argv): print ('Argument is not correct. Please try again') sys.exit(2) class_index = int(config.get_value('class')) nrules = int(config.get_value('n')) for i in range(5): train_data = DataSet() train_data.load(config.get_value('train')+'.'+str(i), class_index)
''' Created on 27 Feb 2018 @author: danhbuithi ''' import sys from common.CommandArgs import CommandArgs from sampling.RandomSplitter import RandomSplitter from common.IOHelper import IOHelper if __name__ == '__main__': config = CommandArgs({ 'input': ('', 'Path of input data'), 'test': ('', 'Path of test data.'), 'train': ('', 'Path of train data'), 'rate': ('', 'Rating of data') }) if not config.load(sys.argv): print('Argument is not correct. Please try again') sys.exit(2) rate = float(config.get_value('rate')) train, test = RandomSplitter.split(config.get_value('input'), rate) IOHelper.write_file_in_lines(config.get_value('train'), train) IOHelper.write_file_in_lines(config.get_value('test'), test) print('Finished!!!')
fpr, tpr, _ = roc_curve(Ytrue, y_pred.flatten()) print(auc(fpr, tpr)) def filter_association_rules(unexpected_rules, delta_1=0): rules = [] for x in unexpected_rules: if x[2][0][1] > delta_1: rules.append(AssociationRule.string_2_rule(x[0])) return rules if __name__ == '__main__': config = CommandArgs({ 'train': ('', 'Path of train data.'), 'test': ('', 'Paht of test data.'), 'rules': ('', 'Path of unexpected rules.'), 'class': (0, 'Index of class in data.') }) if not config.load(sys.argv): print('Argument is not correct. Please try again') sys.exit(2) print('Loading data....') class_index = int(config.get_value('class')) train_data_set = DataSet() train_data_set.load(config.get_value('train'), class_index, has_header=False) X_train, Y_train = train_data_set.convert_2_binary_format()
''' Created on 12 Nov 2019 @author: danhbuithi ''' import sys from common.CommandArgs import CommandArgs from common.DataSet import DataSet if __name__ == '__main__': config = CommandArgs({ 'data': ('', 'Path of training data file'), 'n': (5, 'Number of sub-learning sets'), 'class': (-1, 'Class index') }) if not config.load(sys.argv): print('Argument is not correct. Please try again') sys.exit(2) nsubsets = int(config.get_value('n')) class_index = int(config.get_value('class')) all_data = DataSet() all_data.load(config.get_value('data'), class_index) subsets = all_data.split_random_in_k(nsubsets) for i in range(nsubsets): test_data, train_data = DataSet.create_datasets_by_crossvalidation( subsets, i)
def get_N_HexCol(N=5): HSV_tuples = [(x * 1.0 / N, 1, 1) for x in range(N)] hex_out = [] for rgb in HSV_tuples: rgb = map(lambda x: int(x * 255), colorsys.hsv_to_rgb(*rgb)) hex_out.append('#%02x%02x%02x' % tuple(rgb)) #print(hex_out) return hex_out if __name__ == '__main__': config = CommandArgs({'feature' : ('', 'Path of features file'), 'cluster' : ('', 'Path of clusters file'), 'output' : ('', 'Path of output file'), 'title' : ('Dataset', 'Title of charts') }) if not config.load(sys.argv): print ('Argument is not correct. Please try again') sys.exit(2) X, association_rules = load_feature_vectors(config.get_value('feature')) m = 2 print('dimensional reduce: ' + str(m)) pca = IncrementalPCA(n_components = X.shape[1]//m) new_X = pca.fit_transform(X) clusters, number_of_clusters = load_clusters(config.get_value('cluster'))
''' Created on 22 Oct 2018 @author: danhbuithi ''' import sys from common.CommandArgs import CommandArgs from preprocessing.PubMedCorpus import PubMedCorpus if __name__ == '__main__': config = CommandArgs({ 'output': ('', 'Path of output file'), 'limit': (100000, 'Number of maximum returned abstracts') }) if not config.load(sys.argv): print('Argument is not correct. Please try again') sys.exit(2) corpus = PubMedCorpus(api_key='6a2ee604e5f4598018ce6134ab6fc8851308') keywords = 'mycobacterium tuberculosis gene' corpus.downloadAbstract(keywords, file_name=config.get_value('output'), max_return=int(config.get_value('limit')))
Created on Feb 6, 2017 @author: BanhDzui ''' import sys from common.DataSet import DataSet from common.CommandArgs import CommandArgs from rules_mining.RuleMiner import RuleMiner if __name__ == '__main__': config = CommandArgs({ 'input': ('', 'Path of data-set file'), 'format': ('mydefault', 'Format of input data'), 'minsup': (0.1, 'Minimum support'), 'minconf': (0.3, 'Minimum confidence'), 'maxitems': (-1, 'Maximum number of items in the rules'), 'class': (-1, 'Class index') }) if not config.load(sys.argv): print('Argument is not correct. Please try again') sys.exit(2) print('Loading data....') train_data_set = DataSet() class_index = int(config.get_value('class')) train_data_set.load(config.get_value('input'), class_index) print('Generating rules ....') min_sup_src = float(config.get_value('minsup'))
''' Created on 29 Oct 2018 @author: danhbuithi ''' import sys from common.CommandArgs import CommandArgs from preprocessing.GOCollection import GOCollection from preprocessing.PubMedCorpus import PubMedCorpus from preprocessing.GOExtractor import GOExtractor if __name__ == '__main__': config = CommandArgs({ 'input': ('', 'Path of document files'), 'go': ('', 'Path of GO terms'), 'output': ('', 'Path of output file') }) if not config.load(sys.argv): print('Argument is not correct. Please try again') sys.exit(2) ''' collector = GOCollection() collector.extractFromObo(config.get_value('input')) collector.saveAsXML(config.get_value('output')) ''' corpus = PubMedCorpus(None) documents = corpus.load(config.get_value('input'))
def preprocessRuleFeatureDict(rule_feature_dict): ''' Normalize feature using min-max scaler ''' rules, features = separateRulesAndFeatures(rule_feature_dict) rule_full_list = [AssociationRule.string_2_rule(x) for x in rules] return rule_full_list, features, features[:, 0] if __name__ == '__main__': config = CommandArgs({ 'train' : ('', 'Path of training data file'), 'test' : ('', 'Path of training data file'), 'class' : (None, 'Class index'), 'minsup' : (0.1, 'Minimum support'), 'minconf' : (0.0, 'Minimum confidence'), 'format' : ('spect', 'valid format of rules/item-sets'), 'n' : (5, 'Number of sub-learning sets'), 'nloop' : (100, 'Number of loops') }) if not config.load(sys.argv): print ('Argument is not correct. Please try again') sys.exit(2) class_index = int(config.get_value('class')) min_sup = float(config.get_value('minsup')) min_conf = float(config.get_value('minconf')) rule_format = config.get_value('format') nsubsets = int(config.get_value('n'))
from common.CommandArgs import CommandArgs from common.DataSet import DataSet from rules_mining.RuleMiner import RuleMiner from rule_based_classifiers.NetMMAC import NetMMAC from rule_based_classifiers.MMAC import MMAC from TestRuleBasedMethods import preprocessRuleFeatureDict if __name__ == '__main__': config = CommandArgs({ 'train': ('', 'Path of training data file'), 'test': ('', 'Path of testing data file'), 'class': (0, 'Class index'), 'minsup': (0.1, 'Minimum support'), 'minconf': (0.0, 'Minimum confidence'), 'format': ('spect', 'valid format of rules/item-sets'), 'out': ('', 'Path of output file'), 'nloop': (10000, 'Number of loops'), 'label': ('', 'Positive label'), 'option': ('net', 'Name of algorithm') }) if not config.load(sys.argv): print('Argument is not correct. Please try again') sys.exit(2) class_index = int(config.get_value('class')) nloop = int(config.get_value('nloop')) min_sup = float(config.get_value('minsup')) min_conf = float(config.get_value('minconf'))
def preprocessRuleFeatureDict(rule_feature_dict): ''' Normalize feature using min-max scaler ''' rules, features = separateRulesAndFeatures(rule_feature_dict) rule_full_list = [AssociationRule.string_2_rule(x) for x in rules] return rule_full_list, features, features[:, 0] if __name__ == '__main__': config = CommandArgs({ 'train': ('', 'Path of training data file'), 'test': ('', 'Path of testing data file'), 'class': (None, 'Class index'), 'minsup': (0.1, 'Minimum support'), 'minconf': (0.0, 'Minimum confidence'), 'format': ('spect', 'valid format of rules/item-sets'), 'sol': ('', 'Path of output file'), 'option': ('net', 'Selected algorithm') }) if not config.load(sys.argv): print('Argument is not correct. Please try again') sys.exit(2) class_index = int(config.get_value('class')) train_data = DataSet() train_data.load(config.get_value('train'), class_index) test_data = DataSet() test_data.load(config.get_value('test'), class_index)
@author: danhbuithi ''' import sys from common.CommandArgs import CommandArgs from common.DataSet import DataSet from corels.corels import CorelsClassifier from TestRuleBasedMethods import evaluateByF1 if __name__ == '__main__': config = CommandArgs({ 'train' : ('', 'Path of training data file'), 'class' : (None, 'Class index'), 'test' : ('', 'Path of testing data file'), 'nloop' : (10000, 'Number of loops'), 'c': (0.01, 'Length penalty'), 'n': (5, 'Number of subsets'), 'minsup' : (0.1, 'Minimum support'), 'card' : (2, 'Maximum card') }) if not config.load(sys.argv): print ('Argument is not correct. Please try again') sys.exit(2) class_index = int(config.get_value('class')) nsubsets = int(config.get_value('n')) min_sup = float(config.get_value('minsup')) max_card = int(config.get_value('card')) C = float(config.get_value('c'))