def get_signature_genes(X, n, lda=10): W = np.zeros((X.shape[0], X.shape[0])) # coarse search from the bottom while (abs(W).sum(1) > 0).sum() < n: lda /= 10. model = MultiTaskLasso(alpha=lda, max_iter=100, tol=.001, selection='random', warm_start=True) model.fit(X.T, X.T) W = model.coef_.T #print len(np.nonzero(abs(W).sum(1))[0]),model.score(X.T,X.T) # fine search from the top while (abs(W).sum(1) > 0).sum() > n * 1.2: lda *= 2. model.set_params(alpha=lda) model.fit(X.T, X.T) W = model.coef_.T #print len(np.nonzero(abs(W).sum(1))[0]),model.score(X.T,X.T) # finer search while (abs(W).sum(1) > 0).sum() > n: lda *= 1.1 model.set_params(alpha=lda) model.fit(X.T, X.T) W = model.coef_.T #print len(np.nonzero(abs(W).sum(1))[0]),model.score(X.T,X.T) return np.nonzero(abs(W).sum(1))[0]
path_test = 'data_test.txt' X, Y = get_data_own(path_train) print(X.shape) print(Y.shape) print("Split data for CV") X_train, X_test , y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=1) lasso = MultiTaskLasso(max_iter = max_iter, normalize = True) print("Init train with multitasklassocv") lassocv = MultiTaskLassoCV(alphas=None, cv=10, max_iter=max_iter, verbose=True, normalize=True) lassocv.fit(X_train, y_train) print("Fit multitasklasso with alpha from cv lasso") lasso.set_params(alpha=lassocv.alpha_) lasso.fit(X_train, y_train) print("get mean square error") mae = mean_absolute_error(y_test, lasso.predict(X_test)) print("mae: {}".format(mae)) rmsle = mean_squared_log_error(y_test, lasso.predict(X_test)) print("rmsle: {}".format(rmsle)) mape = mean_absolute_percentage_error(y_test, lasso.predict(X_test)) print("mape: {}".format(mape))