Esempio n. 1
0
def findLassoAlpha(alpha, y, X, returnPred=False):
    X_train, X_test = X.loc['2013-10-01':'2015-04-01'], X.loc[
        '2015-05-01':'2016-04-01']
    y_train, y_test = y.loc['2013-10-01':'2015-04-01'], y.loc[
        '2015-05-01':'2016-04-01']
    datestotest = y_test.index
    dt = datestotest[0]
    lassoreg2 = MultiTaskLasso(alpha=alpha, max_iter=1e5)
    lassoreg2.fit(X_train, y_train)
    y_pred2 = lassoreg2.predict(X_test.loc[dt].reshape(1, -1))
    y_pred2 = pd.DataFrame(y_pred2)
    y_pred2.columns = y.columns
    prediction = y_pred2
    X_train = X.loc['2013-10-01':dt]
    y_train = y.loc['2013-10-01':dt]
    for dt in datestotest[1:]:
        lassoreg2 = MultiTaskLasso(alpha=alpha, max_iter=1e5)
        lassoreg2.fit(X_train, y_train)
        y_pred2 = lassoreg2.predict(X_test.loc[dt].reshape(1, -1))
        y_pred2 = pd.DataFrame(y_pred2)
        y_pred2.columns = y.columns
        prediction = pd.concat([prediction, y_pred2])
        X_train = X.loc['2013-10-01':dt]
        y_train = y.loc['2013-10-01':dt]
    prediction.index = y_test.index
    if (returnPred):
        return (y_test, prediction)
    else:
        return mean_squared_error(y_test, prediction)
Esempio n. 2
0
    def mtlasso_model(self, X_train, y_train, X_test, y_test):

        mtlasso_model = MultiTaskLasso(alpha=.005)

        mtlasso_model.fit(X_train, y_train)

        y_train_pred = mtlasso_model.predict(X_train)
        y_test_pred = mtlasso_model.predict(X_test)

        # Scoring the model
        print(mtlasso_model.score(X_train, y_train))
        print(mtlasso_model.score(X_test, y_test))
        print('MSE train: %.6f, MSE test: %.6f' % (mean_squared_error(
            y_train, y_train_pred), mean_squared_error(y_test, y_test_pred)))
        print('R^2 train: %.6f, R^2 test: %.6f' %
              (r2_score(y_train, y_train_pred), r2_score(y_test, y_test_pred)))
Esempio n. 3
0
def main():
    pickledname = sys.argv[1]
    _qmDL = qmDL()
    dataset = _qmDL.load(pickledname=pickledname)

    X, Y, labels = dataset['XX'], dataset['T'], dataset['names']

    #5000 training samples, with 2211 test samples
    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        test_size=2211,
                                                        random_state=42)
    print 'Len X train , test:', len(X_train), len(X_test)

    regressor = MultiTaskLasso().fit(X_train, Y_train)
    #r = SVR()
    #regressor = multiTargetRegressor(rObject=r).fit(X_train,Y_train)
    Y_pred = regressor.predict(X_test)

    print Y_pred
    print 'Y_pred', Y_pred.shape

    for i in xrange(len(labels)):
        print '*** MAE ', labels[i],
        print mean_absolute_error(Y_test[:, i], Y_pred[:, i])
Esempio n. 4
0
class MultiTaskLassoImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Esempio n. 5
0
def run_one_configuration(
    full_train_covariate_matrix,
    complete_target,
    new_valid_covariate_data_frames,
    new_valid_target_data_frame,
    std_data_frame,
    target_clusters,
    featurizer,
    model_name,
    parameters,
    log_file,
):
    model_baseline = dict()
    model_baseline["type"] = model_name
    model_baseline["target_clusters"] = target_clusters

    if model_name == "multi_task_lasso":
        model = MultiTaskLasso(max_iter=5000, **parameters)
    elif model_name == "xgboost":
        model = MultiOutputRegressor(
            XGBRegressor(n_jobs=10,
                         objective="reg:squarederror",
                         verbosity=0,
                         **parameters))

    model.fit(featurizer(full_train_covariate_matrix),
              complete_target.to_numpy(copy=True))
    model_baseline["model"] = lambda x: model.predict(featurizer(x))

    skill, _, _, _ = location_wise_metric(
        new_valid_target_data_frame,
        new_valid_covariate_data_frames,
        std_data_frame,
        model_baseline,
        "skill",
    )
    cos_sim, _, _, _ = location_wise_metric(
        new_valid_target_data_frame,
        new_valid_covariate_data_frames,
        std_data_frame,
        model_baseline,
        "cosine-sim",
    )
    with open(log_file, "a") as f:
        f.write(f"{len(target_clusters)} {parameters} {skill} {cos_sim}\n")
Esempio n. 6
0
class LELM:
    upper_bound = 1.
    lower_bound = -1.

    def __init__(self, n_hidden, C=1., max_iter=10000):
        self.n_hidden = n_hidden
        self.C = C
        self.max_iter = max_iter

    def fit(self, X, y):
        # check label has form of 2-dim array
        X, y, = copy.deepcopy(X), copy.deepcopy(y)
        self.sample_weight = None
        if y.shape.__len__() != 2:
            self.classes_ = np.unique(y)
            self.n_classes_ = self.classes_.__len__()
            y = self.__one2array(y, self.n_classes_)
        else:
            self.classes_ = np.arange(y.shape[1])
            self.n_classes_ = self.classes_.__len__()
        self.W = np.random.uniform(self.lower_bound,
                                   self.upper_bound,
                                   size=(X.shape[1], self.n_hidden))
        self.b = np.random.uniform(self.lower_bound,
                                   self.upper_bound,
                                   size=self.n_hidden)
        H = expit(np.dot(X, self.W) + self.b)
        self.multi_lasso = MultiTaskLasso(self.C,
                                          max_iter=self.max_iter).fit(H, y)

    def __one2array(self, y, n_dim):
        y_expected = np.zeros((y.shape[0], n_dim))
        for i in range(y.shape[0]):
            y_expected[i][y[i]] = 1
        return y_expected

    def predict(self, X):
        H = expit(np.dot(X, self.W) + self.b)
        output = self.multi_lasso.predict(H)
        return output.argmax(axis=1)
Esempio n. 7
0
def main():
    pickledname = sys.argv[1]
    _qmDL = qmDL()
    dataset = _qmDL.load(pickledname=pickledname)

    X, Y, labels = dataset["XX"], dataset["T"], dataset["names"]

    # 5000 training samples, with 2211 test samples
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=2211, random_state=42)
    print "Len X train , test:", len(X_train), len(X_test)

    regressor = MultiTaskLasso().fit(X_train, Y_train)
    # r = SVR()
    # regressor = multiTargetRegressor(rObject=r).fit(X_train,Y_train)
    Y_pred = regressor.predict(X_test)

    print Y_pred
    print "Y_pred", Y_pred.shape

    for i in xrange(len(labels)):
        print "*** MAE ", labels[i],
        print mean_absolute_error(Y_test[:, i], Y_pred[:, i])
class classSparser(object):
    def __init__(self,mapperType='PIMP',support=150,projectOnSubspace=False):
        #options are
        #'PIMP' for Moore Penrose Pseudo Inverse
        #'Regressor' for using a regression task on each dimension
        self.mapperType = mapperType
        self.sparsed_X = None
        self.transformation_matrix = None
        self.Regressor = None
        self.support = support
        self.projectOnSubspace = projectOnSubspace

    def fit(self,X,Y):
        self.sparsed_X = list()
        #First, tranlate points to the origin
        main_centroid = [ np.mean(x) for x in np.transpose(X) ]
        print 'Main centroid:', main_centroid
        X = X - main_centroid

        byClassDict = defaultdict(list)
        for i in xrange(len(Y)):
            byClassDict[Y[i]].append(X[i])


        class_centroids = dict()

        centroids_matrix = list()
        kindexmap = dict()

        _i = 0
        for k in byClassDict:
            class_centroid = [ np.mean(x) for x in np.transpose(byClassDict[k]) ] #np.mean(byClassDict[k])
            _norm = np.linalg.norm(class_centroid)
            _scaling_factor = _norm**2#(i+1)**2 #+ (i+_norm)  #Play with this using _norm, i and any otrher function/constant
            _centroid = np.array(class_centroid)#*(_scaling_factor)
            print '*** Class centroid:', _centroid
            class_centroids[k] = _centroid
            centroids_matrix.append(_centroid)
            kindexmap[k] = _i
            _i+=1

        centroids_matrix = np.array(centroids_matrix)
        ortho_centroids_matrix = np.array(gram_schmidt.gs(centroids_matrix))
        ortho_centroids_matrix = normalize(ortho_centroids_matrix)

        print '*Centroids matrix',centroids_matrix
        print '*Ortho centroids matrix', ortho_centroids_matrix


        newX, newY = list(), list()
        ks = list()
        for k in byClassDict:
            #byClassDict[k] = np.array(byClassDict[k]) - centroids_matrix[kindexmap[k]] + np.array(ortho_centroids_matrix[kindexmap[k]]) #class_centroids[k]

            #this is the basis vector corresponding to current class
            classvector = np.array(ortho_centroids_matrix[kindexmap[k]])
            kScalingFactor = self.support

            #This section tries to get a good scaling factor for each orthonormal vector
            maxks = list()
            for _k in ks:
                projs = [scalarProjection(x,classvector) for x in byClassDict[_k]]
                maxk = max(projs)
                maxks.append(maxk)

                maxownk = max([scalarProjection(x,classvector) for x in byClassDict[k]])

            if len(ks):
                kScalingFactor = max(maxks) + abs(maxownk) + self.support


            for v in byClassDict[k]:
                vv = np.array(v) - centroids_matrix[kindexmap[k]] + classvector*kScalingFactor
                self.sparsed_X.append(vv)
                newX.append(v)
                newY.append(k)
                ks.append(k)

        self.sparsed_X = np.array(self.sparsed_X)

        if self.projectOnSubspace:
            #Project on to new subspace spawned by class vectors
            self.sparsed_X = np.dot(self.sparsed_X,np.transpose(centroids_matrix) )


        if self.mapperType == 'PIMP':
            #self.scaler = preprocessing.StandardScaler().fit(self.sparsed_X)
            #self.sparsed_X = self.scaler.transform(self.sparsed_X)

            self.transformation_matrix = self.sparsed_X*(np.transpose(np.linalg.pinv(X) ) )
            #self.transformation_matrix = X*(np.transpose(np.linalg.pinv(self.sparsed_X) ) )

        if self.mapperType == 'Regressor':
            self.Regressor = MultiTaskLasso(alpha=0.00000001,max_iter=2000)
            self.Regressor.fit(newX,self.sparsed_X)

        return self.sparsed_X, newY


    def transform(self,X):
        Xs = X#self.scaler.transform(X)
        if self.mapperType == 'PIMP':
            transformed_data = self.transformation_matrix*Xs
            #transformed_data = Xs*self.transformation_matrix
        if self.mapperType == 'Regressor':
            transformed_data = self.Regressor.predict(Xs)

        return transformed_data
Esempio n. 9
0
import numpy as np
from src.common.my_data import Data
from sklearn.linear_model import LassoCV
from sklearn.linear_model import MultiTaskLasso

data = Data()

agg_train_have_log = pd.read_table(data.output.sorted_train_agg_have_log_usr).drop('USRID', axis=1)
print('agg_train_have_log : ', agg_train_have_log.shape)
agg_test_have_log = pd.read_table(data.output.sorted_test_agg_have_log_usr).drop('USRID', axis=1)
print('agg_test_have_log : ', agg_test_have_log.shape)
agg_all_have_log = pd.concat([agg_train_have_log, agg_test_have_log], axis=0)
print('agg_all_have_log : ', agg_all_have_log.shape)

tf_idf_all_have_log = pd.read_table(data.feature.tf_idf_have_log_usr_evt_all)
tf_idf_all_have_log_name = tf_idf_all_have_log.head(0)
print(tf_idf_all_have_log_name)
print('tf_idf_all_have_log : ', tf_idf_all_have_log.shape)
# print(tf_idf_all)

agg_no_have_log = pd.read_table(data.output.sorted_test_agg_no_have_log_usr).drop('USRID', axis=1)

print('agg_no_have_log : ', agg_no_have_log.shape)

lasso = MultiTaskLasso()
lasso.fit(agg_all_have_log, tf_idf_all_have_log)
result_lasso = lasso.predict(agg_no_have_log)
print(result_lasso)
# result_csv = pd.DataFrame(result_lasso)
# data.to_csv(data.output.prediction_test_no_log_tf_idf, index=False, sep='\t')
k_fold = KFold(Y_train_raw.shape[0], n_folds=10)
for train, test in k_fold:
    X1 = X_train_reduced[train]
    Y1 = Y_train_raw[train]
    
    X2 = X_train_reduced[test]
    Y2 = Y_train_raw[test]    

    ## Train Classifiers on fold
    mcl_clf = MultiTaskLasso(alpha=.3)
    mcl_clf.fit(X1, Y1)


    ## Score Classifiers on fold

    mcl_clf_score = mcl_clf.score(X2, Y2)

    print "MultiTaskLasso:  ", mcl_clf_score



## Lasso CV for parameter optimization
t1 = time.time()
clf = MultiTaskLasso(alpha=.3).fit(X_train_reduced, Y_train_raw)
t_lasso_cv = time.time() - t1
print 'time to train', t_lasso_cv

Y_predicted = clf.predict(X_test_reduced)

## Save results to csv
np.savetxt('prediction.csv', Y_predicted, fmt='%.5f',delimiter=',')
Esempio n. 11
0
    n_samples = 100
    n_features = 40
    n_tasks = 12
    rel_f = 7
    coef = np.zeros((n_tasks, n_features))
    times = np.linspace(0, 2 * np.pi, n_tasks)
    for k in range(rel_f):
        coef[:, k] = np.sin((1.0 + rr.randn(1)) * times + 3 * rr.randn(1))
    X = rr.randn(n_samples, n_features)
    y = np.dot(X, coef.T) + rr.randn(n_samples, n_tasks)
    X_train = X[:-20]
    y_train = y[:-20]
    X_test = X[-20:]
    y_test = y[-20:]

    print("Fitting LASSO model...")
    ll = Lasso(alpha=0.45)
    ll.fit(X_train, y_train)
    print("R2 score: {0}".format(r2_score(y_test, ll.predict(X_test))))

    print("Fitting Multitask LASSO model...")
    ml = MultiTaskLasso(alpha=0.45)
    ml.fit(X_train, y_train)
    print("R2 score: {0}".format(r2_score(y_test, ml.predict(X_test))))

    print("Plotting predictions...")
    plt.scatter(X[:, 1], y[:, 1])
    plt.scatter(X[:, 1], ll.predict(X)[:, 1], color="blue")
    plt.scatter(X[:, 1], ml.predict(X)[:, 1], color="red")
    plt.show()
Esempio n. 12
0
    #
    #    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.4, random_state=0)

    import sys
    sys.path.insert(0, 'C:\\r workspace\\MultiSconES\\py')
    from load_data import load_dataset

    dataset = load_dataset()
    X = dataset["data"]
    Y = dataset["labels"]

    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        test_size=0.33,
                                                        random_state=42)

    clf = MultiTaskLasso(alpha=1)
    print "train start"
    clf.fit(X_train, Y_train)
    print "train end"
    print "coef start"
    coef_multi_task_lasso_ = clf.coef_
    print "coef end"
    plot_coef(coef_multi_task_lasso_)
    zero_coefs = get_stats(coef_multi_task_lasso_)
    print len(zero_coefs)

    Y_pred = clf.predict(X_test)
    clf_score = clf.score(X_test, Y_test)
    score = r2_score(Y_test[:, 5], Y_pred[:, 5])
    precedent[4:7, :, :, :] = block[i - 337:i - 334, :, :, :]  # 前一周
    precedent_frames.append(precedent)

#regr = (max_depth=8, random_state=0,n_estimators=1000)
model = MultiTaskLasso(alpha=1)

X_train, X_val, y_train, y_val = train_test_split(precedent_frames,
                                                  label_frames,
                                                  test_size=0.2,
                                                  random_state=4)
# 转化为5D的numpy数组,训练集(920,7,64,64,2), 测试集(231,1,64,64,2)
X_train = np.array(X_train)
y_train = np.array(y_train)
X_val = np.array(X_val)
y_val = np.array(y_val)

print(X_train.shape)
print(X_val.shape)
print(y_train.shape)
print(y_val.shape)
# 把5D数据转化为randomForest输入的2D数据
X_train = X_train.reshape((920, 7 * 64 * 64 * 2))
X_val = X_val.reshape((231, 7 * 64 * 64 * 2))
y_train = y_train.reshape((920, 1 * 64 * 64 * 2))
y_val = y_val.reshape((231, 1 * 64 * 64 * 2))

model.fit(X_train, y_train)
y_pred = model.predict(X_val)

from sklearn.metrics import mean_squared_error
print(mean_squared_error(y_val, y_pred))
Esempio n. 14
0
    tss, rss, ess, r2 = xss(Y, ompCV.predict(X))
    print "TSS(Total Sum of Squares): ", tss
    print "RSS(Residual Sum of Squares): ", rss
    print "ESS(Explained Sum of Squares): ", ess
    print "R^2: ", r2

    print "\n**********测试MultiTaskLasso类**********"
    # 在初始化MultiTaskLasso类时, 指定参数alpha, 默认值是1.0.
    multiTaskLasso = MultiTaskLasso(alpha=1.0)
    # 拟合训练集
    multiTaskLasso.fit(train_X, train_Y)
    # 打印模型的系数
    print "系数:", multiTaskLasso.coef_
    print "截距:", multiTaskLasso.intercept_
    print '训练集R2: ', r2_score(train_Y, multiTaskLasso.predict(train_X))

    # 对于线性回归模型, 一般使用均方误差(Mean Squared Error,MSE)或者
    # 均方根误差(Root Mean Squared Error,RMSE)在测试集上的表现来评该价模型的好坏.
    test_Y_pred = multiTaskLasso.predict(test_X)
    print "测试集得分:", multiTaskLasso.score(test_X, test_Y)
    print "测试集MSE:", mean_squared_error(test_Y, test_Y_pred)
    print "测试集RMSE:", np.sqrt(mean_squared_error(test_Y, test_Y_pred))
    print "测试集R2:", r2_score(test_Y, test_Y_pred)

    tss, rss, ess, r2 = xss(Y, multiTaskLasso.predict(X))
    print "TSS(Total Sum of Squares): ", tss
    print "RSS(Residual Sum of Squares): ", rss
    print "ESS(Explained Sum of Squares): ", ess
    print "R^2: ", r2
Esempio n. 15
0
class classSparser(object):
    def __init__(self,
                 mapperType='PIMP',
                 support=150,
                 projectOnSubspace=False):
        #options are
        #'PIMP' for Moore Penrose Pseudo Inverse
        #'Regressor' for using a regression task on each dimension
        self.mapperType = mapperType
        self.sparsed_X = None
        self.transformation_matrix = None
        self.Regressor = None
        self.support = support
        self.projectOnSubspace = projectOnSubspace

    def fit(self, X, Y):
        self.sparsed_X = list()
        #First, tranlate points to the origin
        main_centroid = [np.mean(x) for x in np.transpose(X)]
        print 'Main centroid:', main_centroid
        X = X - main_centroid

        byClassDict = defaultdict(list)
        for i in xrange(len(Y)):
            byClassDict[Y[i]].append(X[i])

        class_centroids = dict()

        centroids_matrix = list()
        kindexmap = dict()

        _i = 0
        for k in byClassDict:
            class_centroid = [
                np.mean(x) for x in np.transpose(byClassDict[k])
            ]  #np.mean(byClassDict[k])
            _norm = np.linalg.norm(class_centroid)
            _scaling_factor = _norm**2  #(i+1)**2 #+ (i+_norm)  #Play with this using _norm, i and any otrher function/constant
            _centroid = np.array(class_centroid)  #*(_scaling_factor)
            print '*** Class centroid:', _centroid
            class_centroids[k] = _centroid
            centroids_matrix.append(_centroid)
            kindexmap[k] = _i
            _i += 1

        centroids_matrix = np.array(centroids_matrix)
        ortho_centroids_matrix = np.array(gram_schmidt.gs(centroids_matrix))
        ortho_centroids_matrix = normalize(ortho_centroids_matrix)

        print '*Centroids matrix', centroids_matrix
        print '*Ortho centroids matrix', ortho_centroids_matrix

        newX, newY = list(), list()
        ks = list()
        for k in byClassDict:
            #byClassDict[k] = np.array(byClassDict[k]) - centroids_matrix[kindexmap[k]] + np.array(ortho_centroids_matrix[kindexmap[k]]) #class_centroids[k]

            #this is the basis vector corresponding to current class
            classvector = np.array(ortho_centroids_matrix[kindexmap[k]])
            kScalingFactor = self.support

            #This section tries to get a good scaling factor for each orthonormal vector
            maxks = list()
            for _k in ks:
                projs = [
                    scalarProjection(x, classvector) for x in byClassDict[_k]
                ]
                maxk = max(projs)
                maxks.append(maxk)

                maxownk = max(
                    [scalarProjection(x, classvector) for x in byClassDict[k]])

            if len(ks):
                kScalingFactor = max(maxks) + abs(maxownk) + self.support

            for v in byClassDict[k]:
                vv = np.array(v) - centroids_matrix[
                    kindexmap[k]] + classvector * kScalingFactor
                self.sparsed_X.append(vv)
                newX.append(v)
                newY.append(k)
                ks.append(k)

        self.sparsed_X = np.array(self.sparsed_X)

        if self.projectOnSubspace:
            #Project on to new subspace spawned by class vectors
            self.sparsed_X = np.dot(self.sparsed_X,
                                    np.transpose(centroids_matrix))

        if self.mapperType == 'PIMP':
            #self.scaler = preprocessing.StandardScaler().fit(self.sparsed_X)
            #self.sparsed_X = self.scaler.transform(self.sparsed_X)

            self.transformation_matrix = self.sparsed_X * (np.transpose(
                np.linalg.pinv(X)))
            #self.transformation_matrix = X*(np.transpose(np.linalg.pinv(self.sparsed_X) ) )

        if self.mapperType == 'Regressor':
            self.Regressor = MultiTaskLasso(alpha=0.00000001, max_iter=2000)
            self.Regressor.fit(newX, self.sparsed_X)

        return self.sparsed_X, newY

    def transform(self, X):
        Xs = X  #self.scaler.transform(X)
        if self.mapperType == 'PIMP':
            transformed_data = self.transformation_matrix * Xs
            #transformed_data = Xs*self.transformation_matrix
        if self.mapperType == 'Regressor':
            transformed_data = self.Regressor.predict(Xs)

        return transformed_data
y_pred_lasso = lasso_model.predict(X_test)[:, 1]

for i in range(n_relevant_features):
    fpr_lasso[i], tpr_lasso[i], _ = roc_curve(y_test_classes[:, i],
                                              y_pred_lasso[:])
"""##MultiTaskLasso Model
Also computes false positive rate and true positive rate for each relevant feature
"""

multi_task_model = MultiTaskLasso(alpha=1.).fit(X, y)
multi_task_lasso_coefficients = multi_task_model.coef_
fpr_l1l2 = dict()
tpr_l1l2 = dict()

y_pred_l1l2 = multi_task_model.predict(X_test)[:, 1]

for i in range(n_relevant_features):
    fpr_l1l2[i], tpr_l1l2[i], _ = roc_curve(y_test_classes[:, i],
                                            y_pred_l1l2[:])
"""##ROC Curve
ROC Curve for GFLasso, Lasso and MultiTaskLasso Models
"""

from matplotlib import pyplot as plt
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr_lasso[2], tpr_lasso[2], label='Lasso')
plt.plot(fpr_l1l2[2], tpr_l1l2[2], label='l1l2')
plt.plot(fpr_gfl[2], tpr_gfl[2], label='GFLasso')

plt.xlabel('False positive rate')
Esempio n. 17
0
path_test = 'data_test.txt'

X, Y = get_data_own(path_train)

print(X.shape)
print(Y.shape)

print("Split data for CV")
X_train, X_test , y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=1)

lasso = MultiTaskLasso(max_iter = max_iter, normalize = True)

print("Init train with multitasklassocv")
lassocv = MultiTaskLassoCV(alphas=None, cv=10, max_iter=max_iter, verbose=True, normalize=True)
lassocv.fit(X_train, y_train)

print("Fit multitasklasso with alpha from cv lasso")
lasso.set_params(alpha=lassocv.alpha_)
lasso.fit(X_train, y_train)

print("get mean square error")
mae = mean_absolute_error(y_test, lasso.predict(X_test))
print("mae: {}".format(mae))
rmsle = mean_squared_log_error(y_test, lasso.predict(X_test))
print("rmsle: {}".format(rmsle))
mape = mean_absolute_percentage_error(y_test, lasso.predict(X_test))
print("mape: {}".format(mape))



class SparseRegression:
    def __init__(self,
                 v,
                 delta_v,
                 f,
                 q,
                 lin_args=(),
                 force_args=(),
                 split='shuffle',
                 split_kargs={}):
        """
        v.shape = (n_steps, n_variables),
        delta_v.shape = (n_steps, n_variables)

        q (in [1, n_variables]) number of first variables
        to fit the linear model to,
        remaining n_variables-q are used as forcing

        f: (n_steps, n_variables) -> (n_steps, n_features)
        f will be called with f(..., *lin_args) when fitting the linear model
        and with f(..., *force_args) when fitting the force
        """
        if v.shape == delta_v.shape and type(q) == int and q > 0 \
                and q <= v.shape[1]:
            self.v, self.delta_v = self._check_reduce(v, delta_v)
            self.params = [*self.v.shape, q]  # [n_steps, n_vars, q]
            # derivatives used for the model
            self.delta_v = self.delta_v[:, :q]
            # calculate features based on first q variables for linear model
            self.features_lin_model = f(self.v[:, :q], *lin_args)
            # calculate features based on remaining variables for forcing term
            self.features_forcing = f(self.v[:, q:], *force_args)
            # two different types of splitting
            split_dict = {
                'shuffle': self._shuffle_split,
                'lorenz': self._lobes_split
            }
            # split the timesteps into two parts:
            # first is used for fitting linear model, second for forcing
            self.mask_l_m, self.mask_f = split_dict[split](**split_kargs)
            # self.mask_l_m, self.mask_f = self._split_lobes(self.v[:, 0])
            self.feature_generation = f
            self.feature_generation_args = {
                'linear': lin_args,
                'forcing': force_args
            }
        else:
            raise Exception('Error: invalid init parameter')

    def _shuffle_split(self, fraction=0.5):
        """
        creates two masks to split n_steps elements into two disjunct sets
        where the first has length=fraction*n
        """
        assert fraction > 0 and fraction < 1
        n_steps = self.params[0]
        n_1 = int(n_steps * fraction)
        shuffled_ind = np.random.permutation(n_steps)

        ind_1 = shuffled_ind[:n_1]
        mask_1 = np.zeros(n_steps, dtype=np.bool)
        mask_1[ind_1] = True

        ind_2 = shuffled_ind[n_1:]
        mask_2 = np.zeros(n_steps, dtype=np.bool)
        mask_2[ind_2] = True

        # each element is part of either one or the other mask
        assert np.all(mask_1 ^ mask_2)
        return mask_1, mask_2

    def _lobes_split(self, window_pos=200, window_neg=400):
        """
        use regions in which trajectories are on the lobes to fit the
        linear model and the remaining steps for modeling the force
        """
        v_1 = self.v[:, 0]
        n_steps = self.params[0]
        # find lobe switches
        m_pos = v_1 > 0
        m_neg = v_1 < 0
        mask_switch = (m_pos[:-1] & m_neg[1:]) | (m_neg[:-1] & m_pos[1:])
        switch_ind = np.nonzero(mask_switch)[0]
        print('no. of lobe switches detected in v_1: {:d}'.format(
            len(switch_ind)))
        force_ind_list = []
        for switch in switch_ind:
            if switch + 1 - window_neg < 0:
                l_neg = switch
            else:
                l_neg = window_neg
            if switch + 1 + window_pos > n_steps:
                l_pos = n_steps - switch
            else:
                l_pos = window_pos
            force_ind_list.append(np.arange(switch - l_neg, switch + l_pos))
        force_ind = np.concatenate(force_ind_list)
        assert np.all(force_ind >= 0) and np.all(force_ind < n_steps)

        mask_lobes = np.ones(n_steps, dtype=np.bool)
        mask_lobes[force_ind] = False
        mask_switch = np.zeros(n_steps, dtype=np.bool)
        mask_switch[force_ind] = True
        assert np.all(mask_lobes ^ mask_switch)
        return mask_lobes, mask_switch

    def _check_reduce(self, v, delta_v):
        """
        check both matrices for columns containg nan and excludes them
        """
        invalid_v = np.any(np.isnan(v), axis=1)
        if np.any(invalid_v):
            print('Warning: v matrix contains NaNs')
        invalid_delta_v = np.any(np.isnan(delta_v), axis=1)
        if np.any(invalid_delta_v):
            print('Warning: delta_v matrix contains NaNs')
        valid_steps = (~invalid_v) & (~invalid_delta_v)
        valid_fraction = np.sum(valid_steps) / len(valid_steps)
        if not np.isclose(valid_fraction, 1):
            print('Warning: only {:.1%} of time steps are valid'.format(
                valid_fraction))
        if valid_fraction < 0.95:
            raise Exception('Error: less than 95% of time steps are valid')
        return v[valid_steps], delta_v[valid_steps]

    def fit_lin_model(self, alpha=None):
        """
        fit sparse linear regression on first q variables
        alpha is penalization parameter, None triggers cross validation
        """
        if alpha is None:  # do cross validation
            self.lin_model = \
                MultiTaskLassoCV(eps=1e-3, n_alphas=50, cv=10, n_jobs=-1,
                                 fit_intercept=False, normalize=False,
                                 max_iter=3500)
        else:
            self.lin_model = \
                MultiTaskLasso(alpha=alpha, fit_intercept=False,
                               normalize=False)
        self.lin_model.fit(self.features_lin_model[self.mask_l_m],
                           self.delta_v[self.mask_l_m])

    def pred_lin_model(self):
        """
        calculate prediction of the linear model on the data set not used for
        training it
        """
        pred_d_v = self.lin_model.predict(self.features_lin_model[self.mask_f])
        d_v = self.delta_v[self.mask_f]
        # calculate correlation for each variable
        n_variables = d_v.shape[1]
        print('corr. of prediction and true delta_v:')
        for i in range(n_variables):
            r, p = pearsonr(pred_d_v[:, i], d_v[:, i])
            print('{:d}th variable: r={:.2f} (p={:.2f})'.format(i + 1, r, p))
        self.eps = d_v - pred_d_v  # d_v - Af(v)

    def fit_force_params(self, alpha=None):
        """
        fit sparse linear regression on remaining n_variables-q variables
        alpha is penalization parameter, None triggers cross validation
        """
        if alpha is None:  # do cross validation
            self.force_model = \
                MultiTaskLassoCV(eps=1e-3, n_alphas=50, cv=10, n_jobs=-1,
                                 fit_intercept=False, normalize=False)
        else:
            self.force_model = \
                MultiTaskLasso(alpha=alpha, fit_intercept=False,
                               normalize=False)
        self.force_model.fit(self.features_forcing[self.mask_f], self.eps)

    def fit(self, alpha_lin=None, alpha_force=None):
        self.fit_lin_model(alpha=alpha_lin)
        self.pred_lin_model()
        self.fit_force_params(alpha=alpha_force)

    def plot_coefs(self, f_descr=None):
        """
        plot coef matrix of linear and force model
        f_descr(n_vars, offset, *args) -> n_features
        """
        n_f_lin_model = self.features_lin_model.shape[1]
        n_f_forcing = self.features_forcing.shape[1]
        q = self.params[-1]
        if f_descr is not None:
            # get names of the features
            f_lin_model_str = f_descr(q, 0,
                                      *self.feature_generation_args['linear'])
            f_forcing_str = f_descr(self.v.shape[1] - q, q,
                                    *self.feature_generation_args['forcing'])
            assert len(f_lin_model_str) == n_f_lin_model
            assert len(f_forcing_str) == n_f_forcing
        else:
            f_lin_model_str = \
                [str(i) for i in range(n_f_lin_model)]
            f_forcing_str = \
                [str(i) for i in range(n_f_forcing)]

        n_f = n_f_lin_model + n_f_forcing
        fractions = (n_f_lin_model / n_f, n_f_forcing / n_f)
        fig, axes = plt.subplots(ncols=2,
                                 sharey=True,
                                 gridspec_kw={'width_ratios': fractions})
        plt.subplots_adjust(wspace=0.2)
        a = self.lin_model.coef_
        b = self.force_model.coef_
        assert a.shape[0] == b.shape[0]
        n_vars = a.shape[0]
        max_abs_coef = max(abs(a.min()), abs(b.min()), a.max(), b.max())

        titles = ['A', 'B']
        matrices = [a, b]
        ticklabels = [f_lin_model_str, f_forcing_str]
        for i, ax in enumerate(axes):
            ax.set_title(titles[i])
            im = ax.imshow(matrices[i],
                           vmin=-max_abs_coef,
                           vmax=max_abs_coef,
                           origin='upper',
                           cmap='seismic')
            ax.set_xticks(np.arange(len(ticklabels[i])))
            ax.set_xticklabels(ticklabels[i], rotation=45)
            ax.set_xlabel('features')
            ax.set_yticks(np.arange(n_vars))
            ax.set_yticklabels(
                ['$v_{:d}$'.format(i + 1) for i in range(n_vars)])
        axes[0].set_ylabel('variables')
        plt.colorbar(im, ax=axes, fraction=0.05, shrink=0.75)

    def _dv(self, t, v, force):
        """
        v.shape = (q,)
        force(t)
        """
        # linear part
        lin_args = self.feature_generation_args['linear']
        features_lin = \
            self.feature_generation(v.reshape(1, -1), *lin_args).squeeze()
        lin_contr = np.dot(self.lin_model.coef_, features_lin)
        # forcing part
        force_args = self.feature_generation_args['forcing']
        features_force = \
            self.feature_generation(force(t).reshape(1, -1),
                                    *force_args).squeeze()
        force_contr = np.dot(self.force_model.coef_, features_force)
        dv = lin_contr + force_contr
        return dv

    def solve_model(self, dt, ind_v_init, force=None):
        """
        use time serie of the force variables and simulate the system from
        ind_v_init
        """
        n_steps, n_vars, q = self.params
        v_init = self.v[ind_v_init, :q]
        # resemble the timesteps at which the original data was evaluated
        n_remaining = n_steps - ind_v_init
        t_remaining = dt * (n_remaining - 1)
        t_eval = np.linspace(0, t_remaining, num=n_remaining)
        if force is None:

            def f_dummy(t):
                return np.zeros(n_vars - q)

            dv = partial(self._dv, force=f_dummy)
        elif force.shape == (n_remaining, n_vars - q):
            f_interp = interp1d(t_eval, force, axis=0, kind='quadratic')
            dv = partial(self._dv, force=f_interp)
        else:
            raise Exception('invalid force')

        result = solve_ivp(dv, [0, t_remaining],
                           v_init,
                           t_eval=t_eval,
                           method='RK45',
                           rtol=1e-6,
                           atol=1e-12)
        print(result.message)
        return result