def save_data(data, name): dataset_path = _datasets_path + name print("data saved to {}".format(dataset_path)) for field in _csv_files: filename = field + '.csv' file_path = get_path(dataset_path, filename) before_save(file_path) save_file(data[field], file_path) descriptor = {key: data[key] for key in _json_files if key in data} save_file(descriptor, get_path(dataset_path, 'spec.json'))
def categorical2pysbrl_data(x: np.ndarray, y: np.ndarray, data_name, supp=0.05, zmin=1, zmax=3): assert len(y.shape) == 1 assert y.dtype == np.int labels = np.unique(y) labels = np.arange(np.max(labels) + 1) # assert max(labels) + 1 == len(labels) x_by_labels = [] for label in labels: x_by_labels.append(x[y == label]) transactions_by_labels = [ categorical2transactions(_x) for _x in x_by_labels ] itemsets = transactions2freqitems(transactions_by_labels, supp=supp, zmin=zmin, zmax=zmax) rules = [itemset2feature_categories(itemset) for itemset in itemsets] data_by_rule = [] for features, categories in rules: satisfied = rule_satisfied(x, features, categories) data_by_rule.append(satisfied) # Write data file data_filename = get_path(_datasets_path, data_name + '.data') before_save(data_filename) with open(data_filename, 'w') as f: for itemset, data in zip(itemsets, data_by_rule): rule_str = '{' + ','.join(itemset) + '}' + ' ' f.write(rule_str) bit_s = ' '.join(['1' if bit else '0' for bit in data]) f.write(bit_s) f.write('\n') # Write label file label_filename = get_path(_datasets_path, data_name + '.label') before_save(label_filename) with open(label_filename, 'w') as f: for label in labels: f.write('{label=%d} ' % label) bits = y == label bit_s = ' '.join(['1' if bit else '0' for bit in bits]) f.write(bit_s) f.write('\n') return data_filename, label_filename
def load_data(name): dataset_path = _datasets_path + name dataset = {} for field in _csv_files: filename = field + '.csv' file_path = get_path(dataset_path, filename) dataset[field] = load_file(file_path) for field in ['target']: dataset[field] = dataset[field].reshape((-1)) descriptor = load_file(get_path(dataset_path, 'spec.json')) for key, val in descriptor.items(): dataset[key] = val return dataset
def load_cache(self, is_train=False): file_name = self.name + ('-train' if is_train else '-test') + '.csv' file_path = get_path(sample_cache_dir, file_name) return load_file(file_path)
def cache_sample(self, x, is_train=False): file_name = self.name + ('-train' if is_train else '-test') + '.csv' file_path = get_path(sample_cache_dir, file_name) save_file(x, file_path)
def _format_name(name): return get_path(Config.model_dir(), "{}{}".format(name, FILE_EXTENSION))
def get_dataset(data_name, discrete=False, seed=None, split=False, train_size=0.75, shuffle=True, one_hot=True, verbose=1): if data_name in sklearn_datasets: if data_name == 'breast_cancer': data = load_breast_cancer() elif data_name == 'iris': data = load_iris() else: # data_name == 'wine': data = load_wine() data['is_categorical'] = np.array([False] * data['data'].shape[1]) data['categories'] = [None] * data['data'].shape[1] opts = sklearn_datasets[data_name] elif data_name in local_datasets: data = load_data(data_name) opts = local_datasets[data_name] else: raise LookupError("Unknown data_name: {}".format(data_name)) is_categorical = data['is_categorical'] x = data['data'] y = data['target'] # feature_names = data['feature_names'] if one_hot: if verbose: print('Converting categorical features to one hot numeric') one_hot_features = is_categorical if 'is_binary' in data: # We don't want to one hot already binary data one_hot_features = np.logical_and( is_categorical, np.logical_not(data['is_binary'])) one_hot_encoder = OneHotEncoder( categorical_features=one_hot_features).fit(data['data']) data['one_hot_encoder'] = one_hot_encoder if verbose: print('Total number of categorical features:', np.sum(one_hot_features)) if hasattr(one_hot_encoder, 'n_values_'): print('One hot value numbers:', one_hot_encoder.n_values_) if discrete: if verbose: print( 'Discretizing all continuous features using MDLP discretizer') discretizer_name = data_name + '-discretizer' + ( '' if seed is None else ('-' + str(seed))) + '.pkl' discretizer_path = get_path(_cached_path, discretizer_name) min_depth = 0 if 'min_depth' not in opts else opts['min_depth'] discretizer = get_discretizer( x, y, continuous_features=np.logical_not(is_categorical), filenames=discretizer_path, min_depth=min_depth) # data['data'] = discretizer.transform(x) data['discretizer'] = discretizer if split: names = [ get_path(_datasets_path, data_name + suffix) for suffix in ['/train_x.npy', '/test_x.npy', '/train_y.npy', '/test_y.npy'] ] train_x, test_x, train_y, test_y = get_split(x, y, train_size=train_size, shuffle=shuffle, filenames=names) data.update({ 'train_x': train_x, 'test_x': test_x, 'train_y': train_y, 'test_y': test_y, }) mins = np.min(x, axis=0) maxs = np.max(x, axis=0) ranges = np.vstack([mins, maxs]).T data['ranges'] = ranges # hacker for some feature_names are arrays for key in ['feature_names', 'target_names']: if isinstance(data[key], np.ndarray): data[key] = data[key].tolist() if verbose > 0: print("-----------------------") print("Data Specs: {:s}".format(data_name)) print("#data: {:d}".format(len(data['target']))) uniq, counts = np.unique(data['target'], return_counts=True) counts = [str(c) for c in counts] print("Label distribution: [{}]".format('/'.join(counts))) print("#features: {:d}".format(data['data'].shape[1])) print("#labels: {:d}".format(len(np.unique(data['target'])))) print("-----------------------") return data
import numpy as np from flask import Flask, json from rule_surrogate.utils.io_utils import get_path # path = get_path('frontend/dist/static', absolute=True) # print("Static folder: {:s}".format(path)) class NumpyEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, np.ndarray): return obj.tolist() return json.JSONEncoder.default(self, obj) class HashableList(list): def __hash__(self): # l = [e if hasattr(e, '__hass__') else frozenset(e) for e in self] return hash(json.jsonify(self)) app = Flask(__name__) app.config['FRONT_END_ROOT'] = get_path('front-end/build', absolute=True) app.config['STATIC_FOLDER'] = get_path('front-end/build/static', absolute=True) # This will make life easier when we have np.ndarray in the object to be jsonified app.json_encoder = NumpyEncoder