class MIKernelSVR(MIKernelSVM): def __init__(self, **parameters): svr_params = { 'kernel' : 'precomputed', 'max_iter': MAX_ITERS, } if 'C' in parameters: svr_params['C'] = parameters.pop('C') if 'nu' in parameters: svr_params['nu'] = parameters.pop('nu') self.estimator = NuSVR(**svr_params) # Get kernel name and pass remaining parameters to kernel mi_kernel_name = parameters.pop('kernel') self.mi_kernel = kernel.by_name(mi_kernel_name, **parameters) def fit(self, X, y): X = map(np.asarray, X) self.fit_data = X self.gram_matrix = self.mi_kernel(X, X) self.estimator.fit(self.gram_matrix, y) return self def predict(self, X=None): if X is None: gram_matrix = self.gram_matrix else: X = map(np.asarray, X) gram_matrix = self.mi_kernel(X, self.fit_data) return self.estimator.predict(gram_matrix)
def nusvr_sklearn(adata, test=False): from scipy.sparse import issparse from sklearn.svm import NuSVR adata_sc = adata.uns["sc_reference"].copy() labels = adata_sc.obs["label"].cat.categories adata_means = obs_means(adata_sc, "label") if issparse(adata.X): X = adata_means.X.T.toarray() y = adata.X.T.toarray() else: X = adata_means.X.T y = adata.X.T res = np.zeros((y.shape[1], X.shape[1])) # (voxels,cells) for i in range(y.shape[1]): model = NuSVR(kernel="linear") model.fit(X, y[:, i]) res[i] = model.coef_ res_prop = normalize_coefficients(res) adata.obsm["proportions_pred"] = pd.DataFrame(res_prop, columns=labels, index=adata.obs_names) return adata
def train_nusvr(x_train: np.ndarray, y_train: np.ndarray, **kwargs) -> (NuSVR, dict): mdl = NuSVR(**kwargs) mdl.degree = 2 should_grid_search = False param_grid = {} if 'C' not in kwargs: should_grid_search = True param_grid['C'] = [0.1, 1, 10, 100, 250] if 'gamma' not in kwargs: should_grid_search = True param_grid['gamma'] = [1e-5, 1e-4, 1e-3, 0.01, 0.1, 0.25] if 'kernel' not in kwargs: should_grid_search = True param_grid['kernel'] = ['rbf', 'linear', 'poly'] if 'nu' not in kwargs: should_grid_search = True param_grid['nu'] = [0.001, 0.1, 0.25, 0.5, 1] if should_grid_search: param_grid['degree'] = [2] logger.info(f'Performing grid search. Using param_grid: {param_grid} ' f'This may take a while...') gs = GridSearchCV(estimator=mdl, param_grid=param_grid) gs.fit(x_train, y_train) mdl = gs.best_estimator_ mdl.fit(x_train, y_train) return mdl, get_model_params(model_name='nusvr', model=mdl)
def train(self, x, y, param_names, random_search=100, kernel_cache_size=2000, **kwargs): if self._debug: print "Before preprocessing: 1st sample:\n", x[0] start = time.time() scaled_x = self._set_and_preprocess(x=x, param_names=param_names) # Check that each input is between 0 and 1 self._check_scaling(scaled_x=scaled_x) if self._debug: print "Shape of training data: ", scaled_x.shape print "Param names: ", self._used_param_names print "First training sample\n", scaled_x[0] print "Encode: ", self._encode # Do a random search nu, c, gamma = self._random_search(random_iter=100, x=scaled_x, y=y, kernel_cache_size=kernel_cache_size) # Now train model try: nusvr = NuSVR(gamma=gamma, C=c, nu=nu, random_state=self._rng, cache_size=kernel_cache_size) nusvr.fit(scaled_x, y) self._model = nusvr except Exception, e: print "Training failed", e.message svr = None
def nuSvr(X, Y, nu, delta, verbose=False): # Run NuSVR clf = NuSVR(kernel='linear', C=1.0, nu=nu, verbose=verbose) clf.fit(X, Y) # Get betas w = clf.coef_ # Set values i to zero where i < 0 w = np.where(w < 0, 0, w) # Normalize data w = w / np.sum(w) # Set values i to zero where i < delta w = np.where(w < delta, 0, w) # Product betas per rows X neww = X.apply(lambda row: row * w[0], axis=1) # Get Predict predict = neww.sum(axis=1) # Get Rmse predict for nuseq RmsePredict = math.sqrt(pow((Y - predict), 2).mean()) return [RmsePredict, clf]
def NuSVRRegressor(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = NuSVR() reg1.fit(X_train, y_train1) reg2 = NuSVR() reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="NuSVRRegressor", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def test_vcl_returning_for_nusvr(self): n_samples, n_features = 10, 5 np.random.seed(0) y = np.random.randn(n_samples) X = np.random.randn(n_samples, n_features) regr = NuSVR(C=1.0, nu=0.1) regr.fit(X, y) self.assertNotEqual(m2vcl.export_to_vcl(regr), "")
def fit(self, X, Y, W): clf = NuSVR(nu=self.nu, C=self.C, kernel=self.kernel, degree=self.degree, gamma=self.gamma, coef0=self.coef0, shrinking=self.shrinking, tol=self.tol, cache_size=self.cache_size, max_iter=self.max_iter) if W is not None: return NuSVRClassifier(clf.fit(X, Y.reshape(-1), W.reshape(-1))) return NuSVRClassifier(clf.fit(X, Y.reshape(-1)))
def NuSVRRegressorGS(X_train, X_test, y_train, y_test): y_train1 = y_train[:, 0] y_train2 = y_train[:, 1] reg1 = NuSVR() reg2 = NuSVR() grid_values = { 'nu': [value * 0.1 for value in range(1, 3)], 'C': list(range(1, 3)), 'kernel': ['poly', 'rbf'], 'degree': list(range(1, 3)) } grid_reg1 = GridSearchCV( reg1, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg1.fit(X_train, y_train1) reg1 = grid_reg1.best_estimator_ reg1.fit(X_train, y_train1) grid_reg2 = GridSearchCV( reg2, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg2.fit(X_train, y_train2) reg2 = grid_reg1.best_estimator_ reg2.fit(X_train, y_train2) y_pred1 = reg1.predict(X=X_test) y_pred2 = reg2.predict(X=X_test) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred1 = reg1.predict(X=X_train) y_pred2 = reg2.predict(X=X_train) y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1))) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params1: dict = grid_reg1.best_params_ best_params2: dict = grid_reg2.best_params_ best_params = {} for key in best_params1.keys(): best_params[key] = [best_params1[key], best_params2[key]] saveBestParams(nameOfModel="NuSVRRegressorGS", best_params=best_params) logSave(nameOfModel="NuSVRRegressorGS", reg=[reg1, reg2], metrics=metrics, val_metrics=val_metrics)
def fit(self, X, y, sample_weight=None): if self.kernel in ['linear', 'rbf', 'poly', 'sigmoid']: logging.info("sklearn.svm.NuSVR.fit: " + get_patch_message("onedal")) self._onedal_fit(X, y, sample_weight) else: logging.info("sklearn.svm.NuSVR.fit: " + get_patch_message("sklearn")) sklearn_NuSVR.fit(self, X, y, sample_weight) return self
def cibersort(rna_sample, sig_df, nu=0.5, C=1.0, kernel='linear', shrinking=True): """ Uses NuSVR from sklearn to solve for the cell type frequencies Inputs: - rna_sample: pandas series. A single sample of rna expression values - sig_df: pandas df of Signature gene expression values for given cell types. Rows are genes (indexed by 'Hugo_Symbol') and columns are cell types - nu: see sklearn's NuSVR - C: see sklearn's NuSVR - kernel: see sklearn's NuSVR - shrinking: see sklearn's NuSVR Outputs: - weights: NuSVR solution vector for the given sample. (Negative values in the solution vector are set to 0 for interpretation as cell type frequencies) """ from sklearn.svm import NuSVR import numpy as np from sklearn.model_selection import GridSearchCV # If a numerical of nu not explicitly specified, use gridsearch to find the best nu: if nu == 'best': gridsearch = GridSearchCV(NuSVR(C=C, kernel=kernel, max_iter=-1, shrinking=shrinking), cv=5, param_grid={"nu": [0.25, 0.5, 0.75]}, scoring='neg_mean_squared_error', refit=True) gridsearch.fit(sig_df, rna_sample) nu = gridsearch.best_params_['nu'] # Fit nuSVR with best (or specified) value of nu: clf = NuSVR(nu=nu, C=C, kernel=kernel, max_iter=-1, shrinking=shrinking, tol=1e-3) clf.fit(sig_df, rna_sample) # Replace negative "frequencies" with 0: weights = np.array( clf.coef_ )[0] # equivalent to np.matmul(np.array(clf.dual_coef_), np.array(clf.support_vectors_))[0] weights[weights < 0] = 0 # Sum to 1 contraint: weights = weights / np.sum(weights) return weights
def svm_cross_validation(x, y): model = NuSVR(kernel='rbf') param_grid = {'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000], 'gamma': [0.001, 0.0001]} grid_search = GridSearchCV(model, param_grid, n_jobs = 8, verbose=1) grid_search.fit(x, y) best_parameters = grid_search.best_estimator_.get_params() for para, val in list(best_parameters.items()): print(para, val) model = NuSVR(kernel='rbf', C=best_parameters['C'], gamma=best_parameters['gamma']) model.fit(x, y) return model
def _random_search(self, random_iter, x, y, kernel_cache_size): # Default Values c = 1.0 gamma = 0.0 nu = 0.5 best_score = -sys.maxint if random_iter > 0: sys.stdout.write("Do a random search %d times" % random_iter) param_dist = { "C": numpy.power(2.0, range(-5, 16)), "gamma": numpy.power(2.0, range(-15, 4)), "nu": uniform(loc=0.0001, scale=1 - 0.0001) } param_list = [ { "C": c, "gamma": gamma, "nu": nu }, ] param_list.extend( list( ParameterSampler(param_dist, n_iter=random_iter - 1, random_state=self._rng))) for idx, d in enumerate(param_list): nusvr = NuSVR(kernel='rbf', gamma=d['gamma'], C=d['C'], nu=d['nu'], random_state=self._rng, cache_size=kernel_cache_size) train_x, test_x, train_y, test_y = \ train_test_split(x, y, test_size=0.5, random_state=self._rng) self._check_scaling(scaled_x=train_x) nusvr.fit(train_x, train_y) sc = nusvr.score(test_x, test_y) # Tiny output m = "." if idx % 10 == 0: m = "#" if sc > best_score: m = "<" best_score = sc c = d['C'] gamma = d['gamma'] nu = d['nu'] sys.stdout.write(m) sys.stdout.flush() sys.stdout.write("Using C: %f, nu: %f and Gamma: %f\n" % (c, nu, gamma)) return nu, c, gamma
def update_event(self, input_called=-1): if input_called == 0: regr = NuSVR() if self.input(1) != None: regr.set_params(**self.input(1)) X = self.input(2) y = self.input(3) regr.fit(X, y) self.set_output_val(1, regr) self.exec_output(0)
class _NuSVRImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def fit(self, X, Y, W): clf = NuSVR(nu=self.nu, C=self.C, kernel=self.kernel, degree=self.degree, gamma=self.gamma, coef0=self.coef0, shrinking=self.shrinking, tol=self.tol, cache_size=self.cache_size, max_iter=self.max_iter) if W.shape[1] > 0: return NuSVRClassifier(clf.fit(X, Y.reshape(-1), W.reshape(-1))) return NuSVRClassifier(clf.fit(X, Y.reshape(-1)))
def analyze_past_learning_data(num_of_support_vectors=5, verbose=False): recent_signals = get_brain_signals_data() recent_learned_colors = get_learning_raw_data() X, y = match_signals_as_colors(recent_signals, recent_learned_colors, signals_format='CyKIT') y = y.reshape(-1, 1) X = backward_eliminate_features(X, y, 0.05, 'pValues+rSquared', verbose) # Feature Scaling from sklearn.preprocessing import StandardScaler sc_X = StandardScaler() X = sc_X.fit_transform(X) sc_y = StandardScaler() y = sc_y.fit_transform(y) # Fitting the Classifier to the recent_signals & recent_learned_colors # after feature scaling and backward elimination # Notes: # - nu = 5/len(x) supposed to give 5 support_vectors but that does not happen # as their are some boundaries, but it still decreases the number of support_vectors # to a good value from sklearn.svm import NuSVR print('--nu = num_of_support_vectors/len(X), num_of_support_vectors = ', num_of_support_vectors, 'and len(x) = ', len(X)) regressor = NuSVR(kernel='rbf', nu=(num_of_support_vectors / len(X)), verbose=True) regressor.fit(X, y) support_vectors = regressor.support_vectors_ if verbose: # TODO -- Fitting hierarichal clustering to the support_vectors of # the past Support Vector Regressor to biggest cluseters (TODO yet) # Now just for acknowledgement from sklearn.cluster import AgglomerativeClustering hc = AgglomerativeClustering(n_clusters=2, affinity='euclidean', linkage='ward') y_hc = hc.fit_predict(support_vectors) visualizing_the_clusters(regressor, support_vectors, y_hc, len(support_vectors[-1])) return regressor, sc_X, sc_y, recent_signals, recent_learned_colors
def _test_diabetes_compare_with_sklearn(kernel): diabetes = datasets.load_diabetes() clf_onedal = NuSVR(kernel=kernel, nu=.25, C=10.) clf_onedal.fit(diabetes.data, diabetes.target) result = clf_onedal.score(diabetes.data, diabetes.target) clf_sklearn = SklearnNuSVR(kernel=kernel, nu=.25, C=10.) clf_sklearn.fit(diabetes.data, diabetes.target) expected = clf_sklearn.score(diabetes.data, diabetes.target) assert result > expected - 1e-5 assert_allclose(clf_sklearn.intercept_, clf_onedal.intercept_, atol=1e-4) assert_allclose(clf_sklearn.support_vectors_.shape, clf_sklearn.support_vectors_.shape) assert_allclose(clf_sklearn.dual_coef_, clf_onedal.dual_coef_, atol=1e-2)
def produce_proportions(self): for i in range(len(self.ag)): pro_df = pd.DataFrame(columns=self.clusters) bulk_sub = self.bulk.loc[self.ag[i].index, :] bulk_sub = bulk_sub.dropna() self.ag[i] = self.ag[i].loc[bulk_sub.index] for j in bulk_sub.columns: regr = NuSVR(nu=0.5, C=0.5, kernel='linear') regr.fit(self.ag[i], bulk_sub[j]) pro_df.loc[j] = regr.coef_[0] pro_df[pro_df < 0] = 0 for k in pro_df.index: summ = pro_df.loc[k].sum() pro_df.loc[k] = np.divide(pro_df.loc[k], summ) self.pro.append(pro_df)
def distill(model, dataloaders, task_permutation, device): train_loader = dataloaders['train'] valid_loader = dataloaders['valid'] X = [] Y = [] tar_class = 0 for i, (inputs, lbls) in enumerate(train_loader): inputs = inputs.view(-1, MNIST_DIM) inputs = inputs[:, task_permutation].to(device) logits = model(inputs) probs = F.gumbel_softmax(logits, tau=1) for input, prob, lbl in zip(inputs, probs, lbls): X.append(input.data.numpy()) Y.append(prob.data[tar_class].numpy()) # Y.append(prob.data[lbl.item()].numpy()) X = np.array(X) Y = np.array(Y) clf = NuSVR(C=1.0, nu=0.1, max_iter=10) t1 = time.time() res = clf.fit(X, Y) t2 = time.time() model_fname = 'distilled_svr.m' joblib.dump(clf, model_fname) print('saving to', model_fname) # print(clf.support_vectors_) print(res) print(t2 - t1)
def applySVR(X_train, X_test, y_train, n_components, gamma): print('n_components=', n_components, 'gamma=', gamma) """To apply PCA to reduce time. I experimented with quite a values of this. Around 150 is the number of features/components that seem to work good for this problem. Anyways, a better idea would be check it up again manually by experimenting.""" # pca = PCA(n_components=n_components).fit(X_train) # X_train = pca.transform(X_train) # X_test = pca.transform(X_test) # clf = NuSVR(C=100.0, cache_size=200, coef0=0.0, degree=3, gamma=gamma, # kernel='rbf', max_iter=-1, nu=0.5, shrinking=True, tol=0.001, # verbose=False) clf = NuSVR(C=100, cache_size=200, coef0=0.0, degree=3, gamma=gamma, kernel='rbf', max_iter=-1, nu=0.5, shrinking=True, tol=0.001, verbose=False) clf.fit(X_train, y_train) np.set_printoptions(threshold=np.inf) #print(len(clf.support_), clf.support_) print('number of test data', len(X_test)) y_rbf = clf.predict(X_test) print('\n\npredictions\n\n') # print(y_rbf) for i in range(len(y_rbf)): # print(X_test[i]) print(test_files[i] + ", " + str(y_rbf[i])) # print('predictions made are as follows.') # for i in range(len(y_rbf)): # print(y_rbf[i], y_test[i]) #for y in y_rbf: # print(y, end=' ') # """These are the set of methods which are useful metrics. The paper used rmse value as one of the metrics.
def _test_evaluation(self, allow_slow): """ Test that the same predictions are made """ # Generate some smallish (some kernels take too long on anything else) random data x, y = [], [] for _ in range(50): cur_x1, cur_x2 = random.gauss(2,3), random.gauss(-1,2) x.append([cur_x1, cur_x2]) y.append( 1 + 2*cur_x1 + 3*cur_x2 ) input_names = ['x1', 'x2'] df = pd.DataFrame(x, columns=input_names) # Parameters to test kernel_parameters = [{}, {'kernel': 'rbf', 'gamma': 1.2}, {'kernel': 'linear'}, {'kernel': 'poly'}, {'kernel': 'poly', 'degree': 2}, {'kernel': 'poly', 'gamma': 0.75}, {'kernel': 'poly', 'degree': 0, 'gamma': 0.9, 'coef0':2}, {'kernel': 'sigmoid'}, {'kernel': 'sigmoid', 'gamma': 1.3}, {'kernel': 'sigmoid', 'coef0': 0.8}, {'kernel': 'sigmoid', 'coef0': 0.8, 'gamma': 0.5} ] non_kernel_parameters = [{}, {'C': 1}, {'C': 1.5, 'shrinking': True}, {'C': 0.5, 'shrinking': False, 'nu': 0.9}] # Test for param1 in non_kernel_parameters: for param2 in kernel_parameters: cur_params = param1.copy() cur_params.update(param2) cur_model = NuSVR(**cur_params) cur_model.fit(x, y) df['prediction'] = cur_model.predict(x) spec = scikit_converter.convert(cur_model, input_names, 'target') if is_macos() and macos_version() >= (10, 13): metrics = evaluate_regressor(spec, df) self.assertAlmostEquals(metrics['max_error'], 0) if not allow_slow: break if not allow_slow: break
def traindt(x,y): global clf #print "training surrogate" #clft = DecisionTreeRegressor(max_depth=tree_max_depth,splitter='random') #clft = RandomForestRegressor() #clft = GradientBoostingRegressor(loss='lad',n_estimators=50,learning_rate=0.3,max_depth=2) clft = NuSVR(C=1e6) clf = clft.fit(x,y)
def traindt(x, y): global clf #print "training surrogate" #clft = DecisionTreeRegressor(max_depth=tree_max_depth,splitter='random') #clft = RandomForestRegressor() #clft = GradientBoostingRegressor(loss='lad',n_estimators=50,learning_rate=0.3,max_depth=2) clft = NuSVR(C=1e6) clf = clft.fit(x, y)
def cv_nu_SVR(X, y, K, C_test, nu_test): Accuracy = np.zeros((len(C_test), len(nu_test))) Xcv, Ycv = create_cv_set(X, y, K) k1 = 0 for c in C_test: k2 = 0 for nu in nu_test: current_acc = 0.0 for n in range(K): svc = NuSVR(C=c, nu=nu) X_train, y_train, X_test, y_test = create_train_set( Xcv, Ycv, n) #On entraine le SVM svc.fit(X_train, y_train) res_tmp = svc.score(X_test, y_test) current_acc = current_acc + res_tmp / (1.0 * K) Accuracy[k1, k2] = current_acc k2 = k2 + 1 k1 = k1 + 1 acc_test = 0 C_opt = 0 nu_opt = 0 for k1 in range(Accuracy.shape[0]): for k2 in range(Accuracy.shape[1]): if (Accuracy[k1, k2] > acc_test): acc_test = Accuracy[k1, k2] C_opt = C_test[k1] nu_opt = nu_test[k2] print("NuSVR, Parametres optimaux: C=", C_opt, " nu=", nu_opt) return C_opt, nu_opt
def _random_search(self, random_iter, x, y, kernel_cache_size): # Default Values c = 1.0 gamma = 0.0 nu = 0.5 best_score = -sys.maxint if random_iter > 0: sys.stdout.write("Do a random search %d times" % random_iter) param_dist = {"C": numpy.power(2.0, range(-5, 16)), "gamma": numpy.power(2.0, range(-15, 4)), "nu": uniform(loc=0.0001, scale=1-0.0001)} param_list = [{"C": c, "gamma": gamma, "nu": nu}, ] param_list.extend(list(ParameterSampler(param_dist, n_iter=random_iter-1, random_state=self._rng))) for idx, d in enumerate(param_list): nusvr = NuSVR(kernel='rbf', gamma=d['gamma'], C=d['C'], nu=d['nu'], random_state=self._rng, cache_size=kernel_cache_size) train_x, test_x, train_y, test_y = \ train_test_split(x, y, test_size=0.5, random_state=self._rng) self._check_scaling(scaled_x=train_x) nusvr.fit(train_x, train_y) sc = nusvr.score(test_x, test_y) # Tiny output m = "." if idx % 10 == 0: m = "#" if sc > best_score: m = "<" best_score = sc c = d['C'] gamma = d['gamma'] nu = d['nu'] sys.stdout.write(m) sys.stdout.flush() sys.stdout.write("Using C: %f, nu: %f and Gamma: %f\n" % (c, nu, gamma)) return nu, c, gamma
def stacking(base_models, X, Y, T): models = base_models folds = list(KFold(len(Y), n_folds=10, random_state=0)) S_train = np.zeros((X.shape[0], len(models))) S_test = np.zeros((T.shape[0], len(models))) for i, bm in enumerate(models): clf = bm[1] S_test_i = np.zeros((T.shape[0], len(folds))) for j, (train_idx, test_idx) in enumerate(folds): X_train = X[train_idx] y_train = Y[train_idx] X_holdout = X[test_idx] clf.fit(X_train, y_train) y_pred = clf.predict(X_holdout)[:] S_train[test_idx, i] = y_pred S_test_i[:, j] = clf.predict(T)[:] S_test[:, i] = S_test_i.mean(1) nuss = NuSVR(kernel='rbf') nuss.fit(S_train, Y) yp = nuss.predict(S_test)[:] return yp
class TestNuSVRIntegration(TestCase): def setUp(self): df = pd.read_csv(path.join(BASE_DIR, '../models/categorical-test.csv')) Xte = df.iloc[:, 1:] Xenc = pd.get_dummies(Xte, prefix_sep='') yte = df.iloc[:, 0] self.test = (Xte, yte) self.enc = (Xenc, yte) pmml = path.join(BASE_DIR, '../models/svr-cat-pima.pmml') self.clf = PMMLNuSVR(pmml) self.ref = NuSVR() self.ref.fit(Xenc, yte == 'Yes') def test_fit_exception(self): with self.assertRaises(Exception) as cm: self.clf.fit(np.array([[]]), np.array([])) assert str(cm.exception) == 'Not supported.' def test_more_tags(self): assert self.clf._more_tags() == NuSVR()._more_tags() def test_sklearn2pmml(self): # Export to PMML pipeline = PMMLPipeline([("regressor", self.ref)]) pipeline.fit(self.enc[0], self.enc[1] == 'Yes') sklearn2pmml(pipeline, "svr-sklearn2pmml.pmml", with_repr=True) try: # Import PMML model = PMMLNuSVR(pmml='svr-sklearn2pmml.pmml') # Verify classification Xenc, _ = self.enc assert np.allclose(self.ref.predict(Xenc), model.predict(Xenc)) finally: remove("svr-sklearn2pmml.pmml")
def NuSupportVector(self,Results='',TestSet=False): NSV = NuSVR(nu=0.5, C=1.0, kernel='rbf', degree=3, gamma='auto', coef0=0.0, shrinking=True, tol=0.001, cache_size=200, verbose=False, max_iter=-1) if TestSet==False: NSVResult = NSV.fit(self.X,np.ravel(self.y,1)) if Results==True: print(str(NSVResult.score(self.X,np.ravel(self.y,1))) + '\n' + str(NSVResult.get_params())) plt.plot(NSV.fit(self.X,np.ravel(self.y,1)).predict(self.X)) y = np.array(self.y[self.DVCols]) plt.plot(y,'ro') plt.show() else: x_train = self.X[:len(self.X)//2] y_train = np.ravel(self.y,1)[:len(self.y)//2] x_test = self.X[len(self.X)//2:] y_test = np.ravel(self.y,1)[len(self.y)//2:] NSVResult = NSV.fit(x_train,y_train) if Results==True: print(str(NSVResult.score(self.X,np.ravel(self.y,1))) + '\n' + str(NSVResult.get_params())) NSVRPredict = NSVResult.predict(x_test) plt.plot(NSVRPredict,polyval(polyfit(NSVRPredict,y_test.reshape(-1),1),NSVRPredict),'r-',label='predicted') plt.plot(NSVRPredict,y_test.reshape(-1),'bo') plt.legend() plt.show()
class NuSVRModel(object): # building def __init__(self, dataset, response, kernel, degree, gamma, nu): # init attributes values... self.dataset = dataset self.response = response self.kernel = kernel self.degree = degree self.gamma = gamma self.nu = nu # instance training... def trainingMethod(self): self.model = NuSVR(kernel=self.kernel, degree=self.degree, gamma=self.gamma, nu=self.nu) self.model = self.model.fit(self.dataset, self.response) cross_val_score(self.model, self.dataset, self.response, cv=10)
class NuSVRModel(object): #building def __init__(self, dataset, response, kernel, degree, gamma, nu): #init attributes values... self.dataset = dataset self.response = response self.kernel = kernel self.degree = degree self.gamma = gamma self.nu = nu #instance training... def trainingMethod(self): self.model = NuSVR(kernel=self.kernel, degree=self.degree, gamma=self.gamma, nu=self.nu) self.SVRAlgorithm = self.model.fit(self.dataset, self.response) self.predicctions = self.SVRAlgorithm.predict(self.dataset) self.r_score = self.SVRAlgorithm.score(self.dataset, self.response)
def runTcheby(): global param, approx_pareto_front, archiveOK, NO_FILE_TO_WRITE ############################################################################ # PARAMETER # clf = SVR(C=1.0, epsilon=0.1, kernel="rbf") clf = NuSVR() clf2 = -1 two_models_bool = False isReals = True start_fct, nb_functions = param[0:2] nb_iterations, neighboring_size = param[2:4] init_decisions, problem_size = param[4:6] max_decisions_maj, delta_neighbourhood = param[6:8] CR, search_space = param[8:10] F, distrib_index_n = param[10:12] pm, operator_fct = param[12:14] nb_samples, training_neighborhood_size = param[14:16] strategy, file_to_write = param[16:18] filter_strat, free_eval = param[18:20] param_print_every, file_to_writeR2 = param[20:22] filenameDIR, filenameSCORE = param[22:24] nb_objectives = len(start_fct) # get separatly offspring operator fct crossover_fct, mutation_fct, repair_fct = operator_fct best_decisions = copy.deepcopy(init_decisions) sampling_param = [ crossover_fct, mutation_fct, repair_fct, best_decisions, F, problem_size, CR, search_space, distrib_index_n, pm, ] ############################################################################ # INITIALISATION qual_tools.resetGlobalVariables(filenameDIR, filenameSCORE, nb_iterations, nb_functions) eval_to.resetEval() # get the directions weight for both starting functions directions = dec.getDirections(nb_functions, nb_objectives) # init the neighboring constant nt.initNeighboringTab(nb_functions, neighboring_size, directions, nb_objectives) # giving global visibility to the best_decisions to get the result at the end approx_pareto_front = best_decisions # initial best decisions scores best_decisions_scores = [eval_to.free_eval(start_fct, best_decisions[i], problem_size) for i in range(nb_functions)] pop_size = nb_functions # current optimal scores for both axes z_opt_scores = gt.getMinTabOf(best_decisions_scores) eval_to.initZstar(z_opt_scores) # get the first training part of the item we will learn on model_directions = train_to.getDirectionsTrainingMatrix(directions) # if the data shall be write in a file writeOK = False if file_to_write != NO_FILE_TO_WRITE: writeOK = True writeR2OK = False if file_to_writeR2 != NO_FILE_TO_WRITE: writeR2OK = True ############################################################################ # MAIN ALGORITHM if writeOK: iot.printObjectives(file_to_write, eval_to.getNbEvals(), 0, best_decisions_scores, problem_size, nb_objectives) # IDs tab to allow a random course through the directions in the main loop id_directions = [i for i in range(nb_functions)] # iterations loop for itera in range(nb_iterations): if not free_eval: # Update model training_inputs, training_outputs, training_set_size, training_scores = train_to.getTrainingSet( model_directions, best_decisions, best_decisions_scores, eval_to.getZstar_with_decal(), strategy, nb_functions, training_neighborhood_size, ) clf.fit(training_inputs, training_outputs) """ if(writeR2OK and not free_eval): training_inputs_tcheby = eval_to.getManyTcheby(training_inputs, training_scores, eval_to.getZstar_with_decal(), training_set_size) random_index = numpy.arange(0,training_set_size) numpy.random.shuffle(random_index) n_folds = 10 folds_sizes = (training_set_size // n_folds) * numpy.ones(n_folds, dtype=numpy.int) folds_sizes[:training_set_size % n_folds] += 1 training_inputs_array = numpy.array(training_inputs) training_tcheby_array = numpy.array(training_inputs_tcheby) R2_cv = [] MSE_cv = [] MAE_cv = [] MDAE_cv = [] clfCV = NuSVR() current = 0 for fold_size in folds_sizes: start, stop = current, current + fold_size mask = numpy.ones(training_set_size, dtype=bool) mask[start:stop] = 0 current = stop clfCV.fit(training_inputs_array[random_index[mask]], training_tcheby_array[random_index[mask]]) test_fold_tcheby = training_tcheby_array[random_index[start:stop]] test_fold_predict = clfCV.predict(training_inputs_array[random_index[start:stop]]) R2_cv .append(r2_score (test_fold_tcheby, test_fold_predict)) MSE_cv .append(mean_squared_error (test_fold_tcheby, test_fold_predict)) MAE_cv .append(mean_absolute_error (test_fold_tcheby, test_fold_predict)) MDAE_cv.append(median_absolute_error(test_fold_tcheby, test_fold_predict)) R2 = clf.score(training_inputs, training_outputs) MSE_cv_mean = numpy.mean(MSE_cv) RMSE_cv_mean = math.sqrt(MSE_cv_mean) MAE_cv_mean = numpy.mean(MAE_cv) MDAE_cv_mean = numpy.mean(MDAE_cv) R2_cv_mean = numpy.mean(R2_cv) iot.printR2(file_to_writeR2, eval_to.getNbEvals(), itera, R2, R2_cv_mean, MSE_cv_mean , MAE_cv_mean, MDAE_cv_mean, RMSE_cv_mean, problem_size, print_every=1) """ # random course through the directions random.shuffle(id_directions) # functions loop for f in id_directions: # get all the indice of neighbors of a function in a certain distance of f and include f in f_neighbors, current_neighbourhing_size = nt.getNeighborsOf(f, delta_neighbourhood) # get a list of offspring from the neighbors list_offspring = samp_to.extended_sampling(f, f_neighbors, sampling_param, nb_samples) # apply a filter on the offspring list and select the best one filter_param = [ itera, f, clf, clf2, two_models_bool, f_neighbors, list_offspring, model_directions, start_fct, problem_size, eval_to.getZstar_with_decal(), best_decisions_scores, best_decisions, nb_objectives, ] best_candidate = filt_to.model_based_filtring(filter_strat, free_eval, filter_param) # evaluation of the newly made solution mix_scores = eval_to.eval(start_fct, best_candidate, problem_size) # MAJ of the z_star point has_changed = eval_to.min_update_Z_star(mix_scores, nb_objectives) # retraining of the model with the new z_star if has_changed and not free_eval: train_to.updateTrainingZstar(eval_to.getZstar_with_decal()) training_outputs = train_to.retrainSet( training_inputs, training_scores, eval_to.getZstar_with_decal(), training_set_size, nb_objectives ) clf.fit(training_inputs, training_outputs) # boolean that is True if the offspring has been add to the archive added_to_S = False # count how many best decisions has been changed by the newly offspring cmpt_best_maj = 0 # random course through the neighbors list random.shuffle(f_neighbors) # course through the neighbors list for j in f_neighbors: # stop if already max number of remplacement reach if cmpt_best_maj >= max_decisions_maj: break # compute g_tcheby # wj = (directions[0][j],directions[1][j]) wj = [directions[obj][j] for obj in range(0, nb_objectives)] g_mix = eval_to.g_tcheby(wj, mix_scores, eval_to.getZstar_with_decal()) g_best = eval_to.g_tcheby(wj, best_decisions_scores[j], eval_to.getZstar_with_decal()) # if the g_tcheby of the new solution is less distant from the z_optimal solution than the current best solution of the function j if g_mix < g_best: cmpt_best_maj += 1 best_decisions[j] = best_candidate best_decisions_scores[j] = mix_scores # if we manage the archive and the solution have not been add already if archiveOK and not (added_to_S): arch_to.archivePut(best_candidate, mix_scores) added_to_S = True # print("Update", itera, "done.") # if manage archive if archiveOK: arch_to.maintain_archive() # if write the result in a file if writeOK: iot.printObjectives( file_to_write, eval_to.getNbEvals(), itera + 1, best_decisions_scores, problem_size, nb_objectives, print_every=param_print_every, ) continue # graphic update # yield arch_to.getArchiveScore(), best_decisions_scores, itera+1, eval_to.getNbEvals(), eval_to.getZstar_with_decal(), pop_size, isReals if not free_eval and writeR2OK: qual_tools.computeQualityEvaluation() qual_tools.generateDiffPredFreeFile() return
horizon = 7 # Form feature and target vectors featureVectors, targetVectors = util.formFeatureAndTargetVectorsMultiHorizon(correctedSeries, depth, horizon) outputFolderName = "Outputs/Outputs" + datetime.now().strftime("%Y_%m_%d_%H_%M_%S") os.mkdir(outputFolderName) for i in range(horizon): # Train different models for different horizon # Train the model #model = Pipeline([('poly', PolynomialFeatures(degree=2)), ('linear', LinearRegression(fit_intercept=False))]) #model = NuSVR(kernel='linear', nu=1.0) model = NuSVR(kernel="rbf", nu=1.0, tol=1e-10, gamma=1.0) #model = RidgeCV() model.fit(featureVectors, targetVectors[:, i]) predictedTargetVectors = model.predict(featureVectors) # Plot the actual and predicted actual = targetVectors[:, i] predicted = predictedTargetVectors # Descale actual = util.scalingFunction.inverse_transform(actual) predicted = util.scalingFunction.inverse_transform(predicted) outplot = outputPlot.OutputPlot(outputFolderName + "/Prediction_horizon"+str(i+1)+".html", "Facebook Fans Change - Linear Regression", "Taylor Swift", "Time", "Output") outplot.setXSeries(np.arange(1, targetVectors.shape[0])) outplot.setYSeries('Actual Output', actual) outplot.setYSeries('Predicted Output', predicted)
# print "Max shuf: ", np.max(ramp) # plt.plot(train_mask) # plt.show() # print np.shape(train_mask) # print np.shape(inp) #inp2 = np.vstack([inp, t]) X = np.array(X) Y = np.array(Y) print "X: ", np.shape(X) print "Y: ", np.shape(Y) print "isnans: ", np.sum(np.isnan(Y)) print "Fitting to S..." S.fit(X[train_mask,:],Y[train_mask]) print "Saving S " with open('S.pkl','wb') as file: pickle.dump(S, file, pickle.HIGHEST_PROTOCOL) # plt.figure() plt.plot(Y[test_mask],color='red',marker='.') plt.plot(S.predict(X[test_mask,:])) plt.show()
'nu': nu, 'C': C }, np.zeros(len(X_train_scaled)), np.zeros(len(X_test_scaled))]) scores3_fold = [] print('Training model with') print(grid_search_results3[-1][0]) for fold_n, (train_index, valid_index) in enumerate(folds.split(X_train_scaled)): X_train, X_valid, X_test, Y_train, Y_valid = get_train_valid_test_samples( X_train_scaled, Y_tr, X_test_scaled, train_index, valid_index) Y_train = Y_train.squeeze() Y_valid = Y_valid.squeeze() model = NuSVR(gamma='scale', nu=nu, C=C, tol=0.01) model.fit(X_train, Y_train) Y_pred_valid = model.predict(X_valid).reshape(-1, ) scores3_fold.append(mean_absolute_error(Y_valid, Y_pred_valid)) print('Fold {0}. MAE: {1}.'.format(fold_n + 1, scores3_fold[-1])) grid_search_results3[-1][1][valid_index] = Y_pred_valid y_pred = model.predict(X_test).reshape(-1, ) grid_search_results3[-1][2] += y_pred scores3_total = np.mean(scores3_fold) grid_search_results3[-1][2] /= n_fold grid_search_results3[-1].append(scores3_total) grid_search_results3[-1].append('NuSVR') grid_search_results3[-1].append([]) if scores3_total < min_score3: min_score3 = scores3_total best_params3 = grid_search_results3[-1][0] oof[-1] = grid_search_results3[-1][1]
cwt_wdw_fetal[regr_idx,:] = cwt_trans_fetal[wdw_idx - cwt_wdw_lth_h : wdw_idx + cwt_wdw_lth_h -1, :].flatten() # Extract feature vectors for regression & knn regr_idx = regr_idx +1 init_sect_end = timer() # print(" Array collection sect elapsed time: @ " + str(svr_wdw_beg) + " " + str(init_sect_end - init_sect_beg)) if(n_svrs < n_coef_tpls): # Initialization phase (fill template library) init_sect_beg = timer() maternal_feature_vectors[n_svrs, :] = cwt_wdw.flatten() maternal_fetal_feature_vectors[n_svrs, :] = np.concatenate((cwt_wdw.flatten(), cwt_wdw_fetal.flatten()), axis = None) nusv_res = NuSVR(nu=0.95, C=10.0, kernel='linear', degree=3, gamma='scale', coef0=0.0, shrinking=True, tol=0.001, cache_size=200, verbose=False, max_iter=10000) z_rbf = nusv_res.fit(cwt_wdw, fetal_lead_wdw).predict(cwt_wdw) # z_rbf = nusv_res.fit(cwt_wdw, mat_lead_wdw).predict(cwt_wdw) nusv_lin_coef = np.float32(nusv_res.coef_) linear_regression_coefs[n_svrs, :] = np.float32(nusv_lin_coef) linear_regression_intercepts[n_svrs] = np.float32(nusv_res.intercept_) nusv_intercept = np.float32(nusv_res.intercept_) mat_lead_wdw_hist[n_svrs, :] = mat_lead_wdw # Save maternal lead for this window init_sect_end = timer() # print(" NuSVR sect elapsed time: @ " + str(svr_wdw_beg) + " " + str(init_sect_end - init_sect_beg)) else: # Estimates based on retrieved templates / update templates
print '' nusvc = NuSVC() print 'NuSVC config:' print nusvc.get_params() nusvc.fit(smr_train.feature_matrix, smr_train.labels) nusvc_score_train = nusvc.score(smr_train.feature_matrix, smr_train.labels) print 'NuSVC precision train: {}'.format(nusvc_score_train) nusvc_score_test = nusvc.score(smr_test.feature_matrix, smr_test.labels) print 'NuSVC precision test: {}'.format(nusvc_score_test) print '' nusvr = NuSVR() print 'NuSVR config:' print nusvr.get_params() nusvr.fit(smr_train.feature_matrix, smr_train.labels) nusvr_score_train = svc.score(smr_train.feature_matrix, smr_train.labels) print 'NuSVR precision train: {}'.format(nusvr_score_train) nusvr_score_test = nusvr.score(smr_test.feature_matrix, smr_test.labels) print 'NuSVR precision test: {}'.format(nusvr_score_test) print '' dtc = DecisionTreeClassifier() print 'DecisionTreeClassifier config:' print dtc.get_params() dtc.fit(smr_train.feature_matrix, smr_train.labels) dtc_score_train = dtc.score(smr_train.feature_matrix, smr_train.labels) print 'DecisionTreeClassifier precision train: {}'.format(dtc_score_train) dtc_score_test = dtc.score(smr_test.feature_matrix, smr_test.labels) print 'DecisionTreeClassifier precision test: {}'.format(dtc_score_test)