def findLassoAlpha(alpha, y, X, returnPred=False): X_train, X_test = X.loc['2013-10-01':'2015-04-01'], X.loc[ '2015-05-01':'2016-04-01'] y_train, y_test = y.loc['2013-10-01':'2015-04-01'], y.loc[ '2015-05-01':'2016-04-01'] datestotest = y_test.index dt = datestotest[0] lassoreg2 = MultiTaskLasso(alpha=alpha, max_iter=1e5) lassoreg2.fit(X_train, y_train) y_pred2 = lassoreg2.predict(X_test.loc[dt].reshape(1, -1)) y_pred2 = pd.DataFrame(y_pred2) y_pred2.columns = y.columns prediction = y_pred2 X_train = X.loc['2013-10-01':dt] y_train = y.loc['2013-10-01':dt] for dt in datestotest[1:]: lassoreg2 = MultiTaskLasso(alpha=alpha, max_iter=1e5) lassoreg2.fit(X_train, y_train) y_pred2 = lassoreg2.predict(X_test.loc[dt].reshape(1, -1)) y_pred2 = pd.DataFrame(y_pred2) y_pred2.columns = y.columns prediction = pd.concat([prediction, y_pred2]) X_train = X.loc['2013-10-01':dt] y_train = y.loc['2013-10-01':dt] prediction.index = y_test.index if (returnPred): return (y_test, prediction) else: return mean_squared_error(y_test, prediction)
def mtlasso_model(self, X_train, y_train, X_test, y_test): mtlasso_model = MultiTaskLasso(alpha=.005) mtlasso_model.fit(X_train, y_train) y_train_pred = mtlasso_model.predict(X_train) y_test_pred = mtlasso_model.predict(X_test) # Scoring the model print(mtlasso_model.score(X_train, y_train)) print(mtlasso_model.score(X_test, y_test)) print('MSE train: %.6f, MSE test: %.6f' % (mean_squared_error( y_train, y_train_pred), mean_squared_error(y_test, y_test_pred))) print('R^2 train: %.6f, R^2 test: %.6f' % (r2_score(y_train, y_train_pred), r2_score(y_test, y_test_pred)))
def main(): pickledname = sys.argv[1] _qmDL = qmDL() dataset = _qmDL.load(pickledname=pickledname) X, Y, labels = dataset['XX'], dataset['T'], dataset['names'] #5000 training samples, with 2211 test samples X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=2211, random_state=42) print 'Len X train , test:', len(X_train), len(X_test) regressor = MultiTaskLasso().fit(X_train, Y_train) #r = SVR() #regressor = multiTargetRegressor(rObject=r).fit(X_train,Y_train) Y_pred = regressor.predict(X_test) print Y_pred print 'Y_pred', Y_pred.shape for i in xrange(len(labels)): print '*** MAE ', labels[i], print mean_absolute_error(Y_test[:, i], Y_pred[:, i])
class MultiTaskLassoImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def run_one_configuration( full_train_covariate_matrix, complete_target, new_valid_covariate_data_frames, new_valid_target_data_frame, std_data_frame, target_clusters, featurizer, model_name, parameters, log_file, ): model_baseline = dict() model_baseline["type"] = model_name model_baseline["target_clusters"] = target_clusters if model_name == "multi_task_lasso": model = MultiTaskLasso(max_iter=5000, **parameters) elif model_name == "xgboost": model = MultiOutputRegressor( XGBRegressor(n_jobs=10, objective="reg:squarederror", verbosity=0, **parameters)) model.fit(featurizer(full_train_covariate_matrix), complete_target.to_numpy(copy=True)) model_baseline["model"] = lambda x: model.predict(featurizer(x)) skill, _, _, _ = location_wise_metric( new_valid_target_data_frame, new_valid_covariate_data_frames, std_data_frame, model_baseline, "skill", ) cos_sim, _, _, _ = location_wise_metric( new_valid_target_data_frame, new_valid_covariate_data_frames, std_data_frame, model_baseline, "cosine-sim", ) with open(log_file, "a") as f: f.write(f"{len(target_clusters)} {parameters} {skill} {cos_sim}\n")
class LELM: upper_bound = 1. lower_bound = -1. def __init__(self, n_hidden, C=1., max_iter=10000): self.n_hidden = n_hidden self.C = C self.max_iter = max_iter def fit(self, X, y): # check label has form of 2-dim array X, y, = copy.deepcopy(X), copy.deepcopy(y) self.sample_weight = None if y.shape.__len__() != 2: self.classes_ = np.unique(y) self.n_classes_ = self.classes_.__len__() y = self.__one2array(y, self.n_classes_) else: self.classes_ = np.arange(y.shape[1]) self.n_classes_ = self.classes_.__len__() self.W = np.random.uniform(self.lower_bound, self.upper_bound, size=(X.shape[1], self.n_hidden)) self.b = np.random.uniform(self.lower_bound, self.upper_bound, size=self.n_hidden) H = expit(np.dot(X, self.W) + self.b) self.multi_lasso = MultiTaskLasso(self.C, max_iter=self.max_iter).fit(H, y) def __one2array(self, y, n_dim): y_expected = np.zeros((y.shape[0], n_dim)) for i in range(y.shape[0]): y_expected[i][y[i]] = 1 return y_expected def predict(self, X): H = expit(np.dot(X, self.W) + self.b) output = self.multi_lasso.predict(H) return output.argmax(axis=1)
def main(): pickledname = sys.argv[1] _qmDL = qmDL() dataset = _qmDL.load(pickledname=pickledname) X, Y, labels = dataset["XX"], dataset["T"], dataset["names"] # 5000 training samples, with 2211 test samples X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=2211, random_state=42) print "Len X train , test:", len(X_train), len(X_test) regressor = MultiTaskLasso().fit(X_train, Y_train) # r = SVR() # regressor = multiTargetRegressor(rObject=r).fit(X_train,Y_train) Y_pred = regressor.predict(X_test) print Y_pred print "Y_pred", Y_pred.shape for i in xrange(len(labels)): print "*** MAE ", labels[i], print mean_absolute_error(Y_test[:, i], Y_pred[:, i])
class classSparser(object): def __init__(self,mapperType='PIMP',support=150,projectOnSubspace=False): #options are #'PIMP' for Moore Penrose Pseudo Inverse #'Regressor' for using a regression task on each dimension self.mapperType = mapperType self.sparsed_X = None self.transformation_matrix = None self.Regressor = None self.support = support self.projectOnSubspace = projectOnSubspace def fit(self,X,Y): self.sparsed_X = list() #First, tranlate points to the origin main_centroid = [ np.mean(x) for x in np.transpose(X) ] print 'Main centroid:', main_centroid X = X - main_centroid byClassDict = defaultdict(list) for i in xrange(len(Y)): byClassDict[Y[i]].append(X[i]) class_centroids = dict() centroids_matrix = list() kindexmap = dict() _i = 0 for k in byClassDict: class_centroid = [ np.mean(x) for x in np.transpose(byClassDict[k]) ] #np.mean(byClassDict[k]) _norm = np.linalg.norm(class_centroid) _scaling_factor = _norm**2#(i+1)**2 #+ (i+_norm) #Play with this using _norm, i and any otrher function/constant _centroid = np.array(class_centroid)#*(_scaling_factor) print '*** Class centroid:', _centroid class_centroids[k] = _centroid centroids_matrix.append(_centroid) kindexmap[k] = _i _i+=1 centroids_matrix = np.array(centroids_matrix) ortho_centroids_matrix = np.array(gram_schmidt.gs(centroids_matrix)) ortho_centroids_matrix = normalize(ortho_centroids_matrix) print '*Centroids matrix',centroids_matrix print '*Ortho centroids matrix', ortho_centroids_matrix newX, newY = list(), list() ks = list() for k in byClassDict: #byClassDict[k] = np.array(byClassDict[k]) - centroids_matrix[kindexmap[k]] + np.array(ortho_centroids_matrix[kindexmap[k]]) #class_centroids[k] #this is the basis vector corresponding to current class classvector = np.array(ortho_centroids_matrix[kindexmap[k]]) kScalingFactor = self.support #This section tries to get a good scaling factor for each orthonormal vector maxks = list() for _k in ks: projs = [scalarProjection(x,classvector) for x in byClassDict[_k]] maxk = max(projs) maxks.append(maxk) maxownk = max([scalarProjection(x,classvector) for x in byClassDict[k]]) if len(ks): kScalingFactor = max(maxks) + abs(maxownk) + self.support for v in byClassDict[k]: vv = np.array(v) - centroids_matrix[kindexmap[k]] + classvector*kScalingFactor self.sparsed_X.append(vv) newX.append(v) newY.append(k) ks.append(k) self.sparsed_X = np.array(self.sparsed_X) if self.projectOnSubspace: #Project on to new subspace spawned by class vectors self.sparsed_X = np.dot(self.sparsed_X,np.transpose(centroids_matrix) ) if self.mapperType == 'PIMP': #self.scaler = preprocessing.StandardScaler().fit(self.sparsed_X) #self.sparsed_X = self.scaler.transform(self.sparsed_X) self.transformation_matrix = self.sparsed_X*(np.transpose(np.linalg.pinv(X) ) ) #self.transformation_matrix = X*(np.transpose(np.linalg.pinv(self.sparsed_X) ) ) if self.mapperType == 'Regressor': self.Regressor = MultiTaskLasso(alpha=0.00000001,max_iter=2000) self.Regressor.fit(newX,self.sparsed_X) return self.sparsed_X, newY def transform(self,X): Xs = X#self.scaler.transform(X) if self.mapperType == 'PIMP': transformed_data = self.transformation_matrix*Xs #transformed_data = Xs*self.transformation_matrix if self.mapperType == 'Regressor': transformed_data = self.Regressor.predict(Xs) return transformed_data
import numpy as np from src.common.my_data import Data from sklearn.linear_model import LassoCV from sklearn.linear_model import MultiTaskLasso data = Data() agg_train_have_log = pd.read_table(data.output.sorted_train_agg_have_log_usr).drop('USRID', axis=1) print('agg_train_have_log : ', agg_train_have_log.shape) agg_test_have_log = pd.read_table(data.output.sorted_test_agg_have_log_usr).drop('USRID', axis=1) print('agg_test_have_log : ', agg_test_have_log.shape) agg_all_have_log = pd.concat([agg_train_have_log, agg_test_have_log], axis=0) print('agg_all_have_log : ', agg_all_have_log.shape) tf_idf_all_have_log = pd.read_table(data.feature.tf_idf_have_log_usr_evt_all) tf_idf_all_have_log_name = tf_idf_all_have_log.head(0) print(tf_idf_all_have_log_name) print('tf_idf_all_have_log : ', tf_idf_all_have_log.shape) # print(tf_idf_all) agg_no_have_log = pd.read_table(data.output.sorted_test_agg_no_have_log_usr).drop('USRID', axis=1) print('agg_no_have_log : ', agg_no_have_log.shape) lasso = MultiTaskLasso() lasso.fit(agg_all_have_log, tf_idf_all_have_log) result_lasso = lasso.predict(agg_no_have_log) print(result_lasso) # result_csv = pd.DataFrame(result_lasso) # data.to_csv(data.output.prediction_test_no_log_tf_idf, index=False, sep='\t')
k_fold = KFold(Y_train_raw.shape[0], n_folds=10) for train, test in k_fold: X1 = X_train_reduced[train] Y1 = Y_train_raw[train] X2 = X_train_reduced[test] Y2 = Y_train_raw[test] ## Train Classifiers on fold mcl_clf = MultiTaskLasso(alpha=.3) mcl_clf.fit(X1, Y1) ## Score Classifiers on fold mcl_clf_score = mcl_clf.score(X2, Y2) print "MultiTaskLasso: ", mcl_clf_score ## Lasso CV for parameter optimization t1 = time.time() clf = MultiTaskLasso(alpha=.3).fit(X_train_reduced, Y_train_raw) t_lasso_cv = time.time() - t1 print 'time to train', t_lasso_cv Y_predicted = clf.predict(X_test_reduced) ## Save results to csv np.savetxt('prediction.csv', Y_predicted, fmt='%.5f',delimiter=',')
n_samples = 100 n_features = 40 n_tasks = 12 rel_f = 7 coef = np.zeros((n_tasks, n_features)) times = np.linspace(0, 2 * np.pi, n_tasks) for k in range(rel_f): coef[:, k] = np.sin((1.0 + rr.randn(1)) * times + 3 * rr.randn(1)) X = rr.randn(n_samples, n_features) y = np.dot(X, coef.T) + rr.randn(n_samples, n_tasks) X_train = X[:-20] y_train = y[:-20] X_test = X[-20:] y_test = y[-20:] print("Fitting LASSO model...") ll = Lasso(alpha=0.45) ll.fit(X_train, y_train) print("R2 score: {0}".format(r2_score(y_test, ll.predict(X_test)))) print("Fitting Multitask LASSO model...") ml = MultiTaskLasso(alpha=0.45) ml.fit(X_train, y_train) print("R2 score: {0}".format(r2_score(y_test, ml.predict(X_test)))) print("Plotting predictions...") plt.scatter(X[:, 1], y[:, 1]) plt.scatter(X[:, 1], ll.predict(X)[:, 1], color="blue") plt.scatter(X[:, 1], ml.predict(X)[:, 1], color="red") plt.show()
# # X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.4, random_state=0) import sys sys.path.insert(0, 'C:\\r workspace\\MultiSconES\\py') from load_data import load_dataset dataset = load_dataset() X = dataset["data"] Y = dataset["labels"] X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42) clf = MultiTaskLasso(alpha=1) print "train start" clf.fit(X_train, Y_train) print "train end" print "coef start" coef_multi_task_lasso_ = clf.coef_ print "coef end" plot_coef(coef_multi_task_lasso_) zero_coefs = get_stats(coef_multi_task_lasso_) print len(zero_coefs) Y_pred = clf.predict(X_test) clf_score = clf.score(X_test, Y_test) score = r2_score(Y_test[:, 5], Y_pred[:, 5])
precedent[4:7, :, :, :] = block[i - 337:i - 334, :, :, :] # 前一周 precedent_frames.append(precedent) #regr = (max_depth=8, random_state=0,n_estimators=1000) model = MultiTaskLasso(alpha=1) X_train, X_val, y_train, y_val = train_test_split(precedent_frames, label_frames, test_size=0.2, random_state=4) # 转化为5D的numpy数组,训练集(920,7,64,64,2), 测试集(231,1,64,64,2) X_train = np.array(X_train) y_train = np.array(y_train) X_val = np.array(X_val) y_val = np.array(y_val) print(X_train.shape) print(X_val.shape) print(y_train.shape) print(y_val.shape) # 把5D数据转化为randomForest输入的2D数据 X_train = X_train.reshape((920, 7 * 64 * 64 * 2)) X_val = X_val.reshape((231, 7 * 64 * 64 * 2)) y_train = y_train.reshape((920, 1 * 64 * 64 * 2)) y_val = y_val.reshape((231, 1 * 64 * 64 * 2)) model.fit(X_train, y_train) y_pred = model.predict(X_val) from sklearn.metrics import mean_squared_error print(mean_squared_error(y_val, y_pred))
tss, rss, ess, r2 = xss(Y, ompCV.predict(X)) print "TSS(Total Sum of Squares): ", tss print "RSS(Residual Sum of Squares): ", rss print "ESS(Explained Sum of Squares): ", ess print "R^2: ", r2 print "\n**********测试MultiTaskLasso类**********" # 在初始化MultiTaskLasso类时, 指定参数alpha, 默认值是1.0. multiTaskLasso = MultiTaskLasso(alpha=1.0) # 拟合训练集 multiTaskLasso.fit(train_X, train_Y) # 打印模型的系数 print "系数:", multiTaskLasso.coef_ print "截距:", multiTaskLasso.intercept_ print '训练集R2: ', r2_score(train_Y, multiTaskLasso.predict(train_X)) # 对于线性回归模型, 一般使用均方误差(Mean Squared Error,MSE)或者 # 均方根误差(Root Mean Squared Error,RMSE)在测试集上的表现来评该价模型的好坏. test_Y_pred = multiTaskLasso.predict(test_X) print "测试集得分:", multiTaskLasso.score(test_X, test_Y) print "测试集MSE:", mean_squared_error(test_Y, test_Y_pred) print "测试集RMSE:", np.sqrt(mean_squared_error(test_Y, test_Y_pred)) print "测试集R2:", r2_score(test_Y, test_Y_pred) tss, rss, ess, r2 = xss(Y, multiTaskLasso.predict(X)) print "TSS(Total Sum of Squares): ", tss print "RSS(Residual Sum of Squares): ", rss print "ESS(Explained Sum of Squares): ", ess print "R^2: ", r2
class classSparser(object): def __init__(self, mapperType='PIMP', support=150, projectOnSubspace=False): #options are #'PIMP' for Moore Penrose Pseudo Inverse #'Regressor' for using a regression task on each dimension self.mapperType = mapperType self.sparsed_X = None self.transformation_matrix = None self.Regressor = None self.support = support self.projectOnSubspace = projectOnSubspace def fit(self, X, Y): self.sparsed_X = list() #First, tranlate points to the origin main_centroid = [np.mean(x) for x in np.transpose(X)] print 'Main centroid:', main_centroid X = X - main_centroid byClassDict = defaultdict(list) for i in xrange(len(Y)): byClassDict[Y[i]].append(X[i]) class_centroids = dict() centroids_matrix = list() kindexmap = dict() _i = 0 for k in byClassDict: class_centroid = [ np.mean(x) for x in np.transpose(byClassDict[k]) ] #np.mean(byClassDict[k]) _norm = np.linalg.norm(class_centroid) _scaling_factor = _norm**2 #(i+1)**2 #+ (i+_norm) #Play with this using _norm, i and any otrher function/constant _centroid = np.array(class_centroid) #*(_scaling_factor) print '*** Class centroid:', _centroid class_centroids[k] = _centroid centroids_matrix.append(_centroid) kindexmap[k] = _i _i += 1 centroids_matrix = np.array(centroids_matrix) ortho_centroids_matrix = np.array(gram_schmidt.gs(centroids_matrix)) ortho_centroids_matrix = normalize(ortho_centroids_matrix) print '*Centroids matrix', centroids_matrix print '*Ortho centroids matrix', ortho_centroids_matrix newX, newY = list(), list() ks = list() for k in byClassDict: #byClassDict[k] = np.array(byClassDict[k]) - centroids_matrix[kindexmap[k]] + np.array(ortho_centroids_matrix[kindexmap[k]]) #class_centroids[k] #this is the basis vector corresponding to current class classvector = np.array(ortho_centroids_matrix[kindexmap[k]]) kScalingFactor = self.support #This section tries to get a good scaling factor for each orthonormal vector maxks = list() for _k in ks: projs = [ scalarProjection(x, classvector) for x in byClassDict[_k] ] maxk = max(projs) maxks.append(maxk) maxownk = max( [scalarProjection(x, classvector) for x in byClassDict[k]]) if len(ks): kScalingFactor = max(maxks) + abs(maxownk) + self.support for v in byClassDict[k]: vv = np.array(v) - centroids_matrix[ kindexmap[k]] + classvector * kScalingFactor self.sparsed_X.append(vv) newX.append(v) newY.append(k) ks.append(k) self.sparsed_X = np.array(self.sparsed_X) if self.projectOnSubspace: #Project on to new subspace spawned by class vectors self.sparsed_X = np.dot(self.sparsed_X, np.transpose(centroids_matrix)) if self.mapperType == 'PIMP': #self.scaler = preprocessing.StandardScaler().fit(self.sparsed_X) #self.sparsed_X = self.scaler.transform(self.sparsed_X) self.transformation_matrix = self.sparsed_X * (np.transpose( np.linalg.pinv(X))) #self.transformation_matrix = X*(np.transpose(np.linalg.pinv(self.sparsed_X) ) ) if self.mapperType == 'Regressor': self.Regressor = MultiTaskLasso(alpha=0.00000001, max_iter=2000) self.Regressor.fit(newX, self.sparsed_X) return self.sparsed_X, newY def transform(self, X): Xs = X #self.scaler.transform(X) if self.mapperType == 'PIMP': transformed_data = self.transformation_matrix * Xs #transformed_data = Xs*self.transformation_matrix if self.mapperType == 'Regressor': transformed_data = self.Regressor.predict(Xs) return transformed_data
y_pred_lasso = lasso_model.predict(X_test)[:, 1] for i in range(n_relevant_features): fpr_lasso[i], tpr_lasso[i], _ = roc_curve(y_test_classes[:, i], y_pred_lasso[:]) """##MultiTaskLasso Model Also computes false positive rate and true positive rate for each relevant feature """ multi_task_model = MultiTaskLasso(alpha=1.).fit(X, y) multi_task_lasso_coefficients = multi_task_model.coef_ fpr_l1l2 = dict() tpr_l1l2 = dict() y_pred_l1l2 = multi_task_model.predict(X_test)[:, 1] for i in range(n_relevant_features): fpr_l1l2[i], tpr_l1l2[i], _ = roc_curve(y_test_classes[:, i], y_pred_l1l2[:]) """##ROC Curve ROC Curve for GFLasso, Lasso and MultiTaskLasso Models """ from matplotlib import pyplot as plt plt.plot([0, 1], [0, 1], 'k--') plt.plot(fpr_lasso[2], tpr_lasso[2], label='Lasso') plt.plot(fpr_l1l2[2], tpr_l1l2[2], label='l1l2') plt.plot(fpr_gfl[2], tpr_gfl[2], label='GFLasso') plt.xlabel('False positive rate')
path_test = 'data_test.txt' X, Y = get_data_own(path_train) print(X.shape) print(Y.shape) print("Split data for CV") X_train, X_test , y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=1) lasso = MultiTaskLasso(max_iter = max_iter, normalize = True) print("Init train with multitasklassocv") lassocv = MultiTaskLassoCV(alphas=None, cv=10, max_iter=max_iter, verbose=True, normalize=True) lassocv.fit(X_train, y_train) print("Fit multitasklasso with alpha from cv lasso") lasso.set_params(alpha=lassocv.alpha_) lasso.fit(X_train, y_train) print("get mean square error") mae = mean_absolute_error(y_test, lasso.predict(X_test)) print("mae: {}".format(mae)) rmsle = mean_squared_log_error(y_test, lasso.predict(X_test)) print("rmsle: {}".format(rmsle)) mape = mean_absolute_percentage_error(y_test, lasso.predict(X_test)) print("mape: {}".format(mape))
class SparseRegression: def __init__(self, v, delta_v, f, q, lin_args=(), force_args=(), split='shuffle', split_kargs={}): """ v.shape = (n_steps, n_variables), delta_v.shape = (n_steps, n_variables) q (in [1, n_variables]) number of first variables to fit the linear model to, remaining n_variables-q are used as forcing f: (n_steps, n_variables) -> (n_steps, n_features) f will be called with f(..., *lin_args) when fitting the linear model and with f(..., *force_args) when fitting the force """ if v.shape == delta_v.shape and type(q) == int and q > 0 \ and q <= v.shape[1]: self.v, self.delta_v = self._check_reduce(v, delta_v) self.params = [*self.v.shape, q] # [n_steps, n_vars, q] # derivatives used for the model self.delta_v = self.delta_v[:, :q] # calculate features based on first q variables for linear model self.features_lin_model = f(self.v[:, :q], *lin_args) # calculate features based on remaining variables for forcing term self.features_forcing = f(self.v[:, q:], *force_args) # two different types of splitting split_dict = { 'shuffle': self._shuffle_split, 'lorenz': self._lobes_split } # split the timesteps into two parts: # first is used for fitting linear model, second for forcing self.mask_l_m, self.mask_f = split_dict[split](**split_kargs) # self.mask_l_m, self.mask_f = self._split_lobes(self.v[:, 0]) self.feature_generation = f self.feature_generation_args = { 'linear': lin_args, 'forcing': force_args } else: raise Exception('Error: invalid init parameter') def _shuffle_split(self, fraction=0.5): """ creates two masks to split n_steps elements into two disjunct sets where the first has length=fraction*n """ assert fraction > 0 and fraction < 1 n_steps = self.params[0] n_1 = int(n_steps * fraction) shuffled_ind = np.random.permutation(n_steps) ind_1 = shuffled_ind[:n_1] mask_1 = np.zeros(n_steps, dtype=np.bool) mask_1[ind_1] = True ind_2 = shuffled_ind[n_1:] mask_2 = np.zeros(n_steps, dtype=np.bool) mask_2[ind_2] = True # each element is part of either one or the other mask assert np.all(mask_1 ^ mask_2) return mask_1, mask_2 def _lobes_split(self, window_pos=200, window_neg=400): """ use regions in which trajectories are on the lobes to fit the linear model and the remaining steps for modeling the force """ v_1 = self.v[:, 0] n_steps = self.params[0] # find lobe switches m_pos = v_1 > 0 m_neg = v_1 < 0 mask_switch = (m_pos[:-1] & m_neg[1:]) | (m_neg[:-1] & m_pos[1:]) switch_ind = np.nonzero(mask_switch)[0] print('no. of lobe switches detected in v_1: {:d}'.format( len(switch_ind))) force_ind_list = [] for switch in switch_ind: if switch + 1 - window_neg < 0: l_neg = switch else: l_neg = window_neg if switch + 1 + window_pos > n_steps: l_pos = n_steps - switch else: l_pos = window_pos force_ind_list.append(np.arange(switch - l_neg, switch + l_pos)) force_ind = np.concatenate(force_ind_list) assert np.all(force_ind >= 0) and np.all(force_ind < n_steps) mask_lobes = np.ones(n_steps, dtype=np.bool) mask_lobes[force_ind] = False mask_switch = np.zeros(n_steps, dtype=np.bool) mask_switch[force_ind] = True assert np.all(mask_lobes ^ mask_switch) return mask_lobes, mask_switch def _check_reduce(self, v, delta_v): """ check both matrices for columns containg nan and excludes them """ invalid_v = np.any(np.isnan(v), axis=1) if np.any(invalid_v): print('Warning: v matrix contains NaNs') invalid_delta_v = np.any(np.isnan(delta_v), axis=1) if np.any(invalid_delta_v): print('Warning: delta_v matrix contains NaNs') valid_steps = (~invalid_v) & (~invalid_delta_v) valid_fraction = np.sum(valid_steps) / len(valid_steps) if not np.isclose(valid_fraction, 1): print('Warning: only {:.1%} of time steps are valid'.format( valid_fraction)) if valid_fraction < 0.95: raise Exception('Error: less than 95% of time steps are valid') return v[valid_steps], delta_v[valid_steps] def fit_lin_model(self, alpha=None): """ fit sparse linear regression on first q variables alpha is penalization parameter, None triggers cross validation """ if alpha is None: # do cross validation self.lin_model = \ MultiTaskLassoCV(eps=1e-3, n_alphas=50, cv=10, n_jobs=-1, fit_intercept=False, normalize=False, max_iter=3500) else: self.lin_model = \ MultiTaskLasso(alpha=alpha, fit_intercept=False, normalize=False) self.lin_model.fit(self.features_lin_model[self.mask_l_m], self.delta_v[self.mask_l_m]) def pred_lin_model(self): """ calculate prediction of the linear model on the data set not used for training it """ pred_d_v = self.lin_model.predict(self.features_lin_model[self.mask_f]) d_v = self.delta_v[self.mask_f] # calculate correlation for each variable n_variables = d_v.shape[1] print('corr. of prediction and true delta_v:') for i in range(n_variables): r, p = pearsonr(pred_d_v[:, i], d_v[:, i]) print('{:d}th variable: r={:.2f} (p={:.2f})'.format(i + 1, r, p)) self.eps = d_v - pred_d_v # d_v - Af(v) def fit_force_params(self, alpha=None): """ fit sparse linear regression on remaining n_variables-q variables alpha is penalization parameter, None triggers cross validation """ if alpha is None: # do cross validation self.force_model = \ MultiTaskLassoCV(eps=1e-3, n_alphas=50, cv=10, n_jobs=-1, fit_intercept=False, normalize=False) else: self.force_model = \ MultiTaskLasso(alpha=alpha, fit_intercept=False, normalize=False) self.force_model.fit(self.features_forcing[self.mask_f], self.eps) def fit(self, alpha_lin=None, alpha_force=None): self.fit_lin_model(alpha=alpha_lin) self.pred_lin_model() self.fit_force_params(alpha=alpha_force) def plot_coefs(self, f_descr=None): """ plot coef matrix of linear and force model f_descr(n_vars, offset, *args) -> n_features """ n_f_lin_model = self.features_lin_model.shape[1] n_f_forcing = self.features_forcing.shape[1] q = self.params[-1] if f_descr is not None: # get names of the features f_lin_model_str = f_descr(q, 0, *self.feature_generation_args['linear']) f_forcing_str = f_descr(self.v.shape[1] - q, q, *self.feature_generation_args['forcing']) assert len(f_lin_model_str) == n_f_lin_model assert len(f_forcing_str) == n_f_forcing else: f_lin_model_str = \ [str(i) for i in range(n_f_lin_model)] f_forcing_str = \ [str(i) for i in range(n_f_forcing)] n_f = n_f_lin_model + n_f_forcing fractions = (n_f_lin_model / n_f, n_f_forcing / n_f) fig, axes = plt.subplots(ncols=2, sharey=True, gridspec_kw={'width_ratios': fractions}) plt.subplots_adjust(wspace=0.2) a = self.lin_model.coef_ b = self.force_model.coef_ assert a.shape[0] == b.shape[0] n_vars = a.shape[0] max_abs_coef = max(abs(a.min()), abs(b.min()), a.max(), b.max()) titles = ['A', 'B'] matrices = [a, b] ticklabels = [f_lin_model_str, f_forcing_str] for i, ax in enumerate(axes): ax.set_title(titles[i]) im = ax.imshow(matrices[i], vmin=-max_abs_coef, vmax=max_abs_coef, origin='upper', cmap='seismic') ax.set_xticks(np.arange(len(ticklabels[i]))) ax.set_xticklabels(ticklabels[i], rotation=45) ax.set_xlabel('features') ax.set_yticks(np.arange(n_vars)) ax.set_yticklabels( ['$v_{:d}$'.format(i + 1) for i in range(n_vars)]) axes[0].set_ylabel('variables') plt.colorbar(im, ax=axes, fraction=0.05, shrink=0.75) def _dv(self, t, v, force): """ v.shape = (q,) force(t) """ # linear part lin_args = self.feature_generation_args['linear'] features_lin = \ self.feature_generation(v.reshape(1, -1), *lin_args).squeeze() lin_contr = np.dot(self.lin_model.coef_, features_lin) # forcing part force_args = self.feature_generation_args['forcing'] features_force = \ self.feature_generation(force(t).reshape(1, -1), *force_args).squeeze() force_contr = np.dot(self.force_model.coef_, features_force) dv = lin_contr + force_contr return dv def solve_model(self, dt, ind_v_init, force=None): """ use time serie of the force variables and simulate the system from ind_v_init """ n_steps, n_vars, q = self.params v_init = self.v[ind_v_init, :q] # resemble the timesteps at which the original data was evaluated n_remaining = n_steps - ind_v_init t_remaining = dt * (n_remaining - 1) t_eval = np.linspace(0, t_remaining, num=n_remaining) if force is None: def f_dummy(t): return np.zeros(n_vars - q) dv = partial(self._dv, force=f_dummy) elif force.shape == (n_remaining, n_vars - q): f_interp = interp1d(t_eval, force, axis=0, kind='quadratic') dv = partial(self._dv, force=f_interp) else: raise Exception('invalid force') result = solve_ivp(dv, [0, t_remaining], v_init, t_eval=t_eval, method='RK45', rtol=1e-6, atol=1e-12) print(result.message) return result