Ejemplo n.º 1
0
class cca:
    def __init__(self, n_components=1, ccatype=None):
        self.n_components = n_components
        self.ccatype = ccatype

    def derive_transform(self, A, B):
        self.model = CCA(n_components=self.n_components, scale=False).fit(A, B)
        if self.ccatype == 'new':
            # http://onlinelibrary.wiley.com/doi/10.1002/cem.2637/abstract
            F1 = np.linalg.pinv(self.model.x_scores_).dot(self.model.y_scores_)
            F2 = np.linalg.pinv(self.model.y_scores_).dot(B)
            P = ct.multi_dot((self.model.x_weights_, F1, F2))
            self.proj_to_B = P

        else:
            return self.model

    def apply_transform(self, C):
        C = np.array(C)
        if self.ccatype == 'new':
            return C.dot(self.proj_to_B)
        else:
            if len(C.shape) == 1:
                C = C.reshape(1, -1)
            return self.model.predict(C)
Ejemplo n.º 2
0
class _CCAImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)

    def predict(self, X):
        return self._wrapped_model.predict(X)
Ejemplo n.º 3
0
class BiomeCCA():
    def __init__(self, args):
        self.CCA = CCA(n_components=args.latent_size)
    def fit(self,X1_train, X2_train, y_train, X1_val, X2_val, y_val, args,):
        if args.latent_size > min(X1_train.shape[1], X2_train.shape[1]):
            print("Warning: auto reduce latent size")
            self.CCA = CCA(n_components=min(X1_train.shape[1], X2_train.shape[1]))
        return self.CCA.fit(X1_train, X2_train)
    def transform(self,x1_train, x2_train, y, args):
        return self.CCA.transform(x1_train, x2_train)
    def predict(self,X1_val, X2_val, y_val, args):
        return self.CCA.predict(X1_val)
    def get_transformation(self):
        return self.CCA.coef_
    def param_l0(self):
        return {"Encoder":self.CCA.coef_.shape[0]*self.CCA.n_components,
                "Decoder":self.CCA.n_components*self.CCA.coef_.shape[1]}
    def get_graph(self):
        return ([],[])
Ejemplo n.º 4
0
    def cca(self, X, y):
        cca_model = CCA(n_components=self.n_comp, scale=False)
        cca_model.fit(X, y)
        X_c, y_c = cca_model.transform(X, y)
        y_predict = cca_model.predict(X, copy=True)
        #    R2=cca_model.score(X, y, sample_weight=None)

        # loading 为每个原始变量与对应典型变量的相关性*
        loading_x = cca_model.x_loadings_
        loading_y = cca_model.y_loadings_

        # weight即为线性组合的系数,可能可以用来将降维的变量投射到原始空间
        # 注意:如果scale参数设置为Ture,则weight是原始数据经过标准化后得到的weight
        weight_x = cca_model.x_weights_
        weight_y = cca_model.y_weights_
        #    weight_orig=np.dot(y_c[0,:],weight_y.T)

        # coef为X对y的系数,可以用来预测y(np.dot,矩阵乘法)
        coef = cca_model.coef_

        # 此算法中rotations==weight
        #        rotation_y=cca_model.y_rotations_
        #        rotation_x=cca_model.x_rotations_
        # score(X,y)返回R squre

        # 求某个典型变量对本组变量的协方差解释度(covariance explained by each canonical variate or component)
        cov_x = np.cov(X_c.T)
        cov_y = np.cov(y_c.T)
        #        np.diag(cov_x)
        eigvals_x, _ = np.linalg.eig(cov_x)
        eigvals_y, _ = np.linalg.eig(cov_y)
        explain_x = pow(eigvals_x, 2) / np.sum(pow(eigvals_x, 2))
        explain_y = pow(eigvals_y, 2) / np.sum(pow(eigvals_y, 2))
        #    np.sort(explain)

        return (cca_model,\
                X_c,y_c,\
                loading_x,loading_y,\
                weight_x,weight_y,\
                explain_x,explain_y,\
                coef,y_predict)
Ejemplo n.º 5
0
def main(args):
    (training_file, label_file, test_file, test_label, u_file) = args
    X_training = load_feat(training_file)
    n = len(X_training)
    U = load_feat(u_file)
    y_training = [int(line.strip()) for line in open(label_file)]
   
    U = np.asarray(U)
    X_training = np.asarray(X_training)
    #X = preprocessing.normalize(X, norm='l2')
    y_training = np.asarray(y_training)
    
    X_test = load_feat(test_file)
    y_test = [int(line.strip()) for line in open(test_label)]
    X_test = np.asarray(X_test)
    #test_X = preprocessing.normalize(test_X, norm='l2')
    y_test = np.asarray(y_test)

    
    cca = CCA(n_components=100)
    (X_cca, U_cca) = cca.fit_transform(X_training, U[:n])
    X_test_cca = cca.predict(X_test)
    
    svr = SVC()
    svr.fit(X_cca, y_training)    
    pred = svr.predict(X_test_cca)
    
    print pred
    print test_y
    print accuracy_score(y_test, pred)
    with open(test_file + '.cca.2.pred', 'w') as output:
        for p in pred:
            print >>output, p
    #svm_model.fit(X, y)
    #pickle.dump(lr, open(model_file, "wb"))
    return


    return
Ejemplo n.º 6
0
def main(args):
    (training_file, label_file, test_file, test_label, u_file) = args
    X_training = load_feat(training_file)
    n = len(X_training)
    U = load_feat(u_file)
    y_training = [int(line.strip()) for line in open(label_file)]

    U = np.asarray(U)
    X_training = np.asarray(X_training)
    #X = preprocessing.normalize(X, norm='l2')
    y_training = np.asarray(y_training)

    X_test = load_feat(test_file)
    y_test = [int(line.strip()) for line in open(test_label)]
    X_test = np.asarray(X_test)
    #test_X = preprocessing.normalize(test_X, norm='l2')
    y_test = np.asarray(y_test)

    cca = CCA(n_components=100)
    (X_cca, U_cca) = cca.fit_transform(X_training, U[:n])
    X_test_cca = cca.predict(X_test)

    svr = SVC()
    svr.fit(X_cca, y_training)
    pred = svr.predict(X_test_cca)

    print pred
    print test_y
    print accuracy_score(y_test, pred)
    with open(test_file + '.cca.2.pred', 'w') as output:
        for p in pred:
            print >> output, p
    #svm_model.fit(X, y)
    #pickle.dump(lr, open(model_file, "wb"))
    return

    return
Ejemplo n.º 7
0
def test_cca_implementation():
    X = np.random.multivariate_normal(np.random.randint(50,100,(10)).astype('float'),np.identity(10),200)
    Y = np.random.multivariate_normal(np.random.randint(80,200,(6)).astype('float'),np.identity(6),200)

    X_test = np.random.multivariate_normal(np.random.randint(50,100,(10)).astype('float'),np.identity(10),20)
    Y_test = np.random.multivariate_normal(np.random.randint(50,100,(6)).astype('float'),np.identity(6),20)
    
    mdl_test = CCA(n_components = 6)
    mdl_test.fit(X,Y)
    
    Y_pred = mdl_test.predict(X)
    
    print Y_pred
    print '-'*50
#    print Y_test

    from sklearn.cross_decomposition import CCA as CCA_sklearn
    
    mdl_actual = CCA_sklearn(n_components = 6)
    mdl_actual.fit(X,Y)
    
    print '-'*50
    Y_actual = mdl_actual.predict(X)
    print Y_actual
Ejemplo n.º 8
0
def cca(x,
        neural_data,
        region=None,
        brain_region=['IT', 'V4'],
        cv=5,
        n_components=5,
        variation=[0, 3, 6],
        sortby='image_id',
        train_size=0.75):
    #     var_lookup = stimulus_set[stimulus_set.variation.isin(variation)].image_id.values
    #     x = x.where(x.image_id.isin(var_lookup),drop=True)
    #     nd = neural_data.where(neural_data.image_id.isin(var_lookup),drop=True)

    x = x.sortby(sortby)
    nd = neural_data.sortby(sortby)

    assert list(getattr(x, sortby).values) == list(getattr(nd, sortby).values)
    num_images = x.shape[0]
    out_recs = []

    cv_tr = []
    cv_te = []

    kf = KFold(n_splits=cv, shuffle=True, random_state=cv)
    for tr, te in kf.split(np.arange(num_images)):
        cv_tr.append(tr)
        cv_te.append(te)

    for rand_delta in np.arange(cv):
        tr_idx, te_idx, _, _ = train_test_split(
            np.arange(num_images),
            np.arange(num_images),
            train_size=train_size,
            random_state=np.random.randint(0, 50) + rand_delta)
        cv_tr.append(tr_idx)
        cv_te.append(te_idx)

    for br in brain_region:
        nd_reg = nd.sel(region=br)

        if region is None:
            region = np.unique(x.region.values)

        for reg in region:
            if reg == 'pixel':
                continue
            x_reg = x.sel(region=reg)

            depth = np.unique(x_reg.layer.values)[0]
            with tqdm(zip(np.arange(cv), cv_tr, cv_te), total=cv) as t:
                t.set_description('{}{} x {}{}'.format(reg, x_reg.shape, br,
                                                       nd_reg.shape))

                r_mean = []
                fve_mean = []
                cca_mean = []
                for n, tr, te in t:
                    cca = CCA(n_components=n_components)
                    cca.fit(x_reg.values[tr], nd_reg.values[tr])

                    u, v = cca.transform(x_reg.values[te], nd_reg.values[te])

                    y_pred = cca.predict(x_reg.values[te])
                    y_true = nd_reg.values[te]

                    fve = explained_variance_score(y_true,
                                                   y_pred,
                                                   multioutput='raw_values')
                    r_vals = [
                        pearsonr(y_pred[:, i], y_true[:, i])
                        for i in range(y_pred.shape[-1])
                    ]

                    cca_r = np.mean([
                        pearsonr(u[:, i], v[:, i])
                        for i in np.arange(n_components)
                    ])

                    #                     r_vals = [pearsonr(ab_vec[0][:,i],ab_vec[1][:,i]) for i in range(ab_vec[0].shape[-1])]

                    r_mean.append(np.mean([r for r, v in r_vals]))
                    cca_mean.append(cca_r)
                    fve_mean.append(np.mean(fve))

                    for rv, f, nid in zip(r_vals, fve,
                                          nd_reg[te].neuroid_id.values):
                        out_recs.append({
                            'region': br,
                            'layer': reg,
                            'pearsonr': rv[0],
                            'cca_r': cca_r,
                            'fve': f,
                            'iter': n,
                            'depth': depth,
                            'neuroid_id': nid,
                        })

                    t.set_postfix(pearson=np.mean(r_mean),
                                  cca=np.mean(cca_mean),
                                  fve=np.mean(fve_mean))

    return pd.DataFrame.from_records(out_recs)
Ejemplo n.º 9
0
def cca_ds(A, B, n_components=1):
    model = CCA(n_components=n_components, scale=False).fit(B, A)
    return model.coefs, model.predict(B)
Ejemplo n.º 10
0
class CcaClass:
    """
    Name      : CCA
    Attribute : None
    Method    : predict, predict_by_cv, save_model
    """
    def __init__(self):
        # 알고리즘 이름
        self._name = 'cca'

        # 기본 경로
        self._f_path = os.path.abspath(
            os.path.join(os.path.dirname(os.path.abspath(__file__)),
                         os.pardir))

        # 경고 메시지 삭제
        warnings.filterwarnings('ignore')

        # 원본 데이터 로드
        data = pd.read_csv(self._f_path +
                           "/regression/resource/regression_sample.csv",
                           sep=",",
                           encoding="utf-8")

        # 학습 및 테스트 데이터 분리
        self._x = (data["year"] <= 2017)
        self._y = (data["year"] >= 2018)

        # 학습 데이터 분리
        self._x_train, self._y_train = self.preprocessing(data[self._x])
        # 테스트 데이터 분리
        self._x_test, self._y_test = self.preprocessing(data[self._y])

        # 모델 선언
        self._model = CCA()

        # 모델 학습
        self._model.fit(self._x_train, self._y_train)

    # 데이터 전처리
    def preprocessing(self, data):
        # 학습
        x = []
        # 레이블
        y = []
        # 기준점(7일)
        base_interval = 7
        # 기온
        temps = list(data["temperature"])

        for i in range(len(temps)):
            if i < base_interval:
                continue
            y.append(temps[i])

            xa = []

            for p in range(base_interval):
                d = i + p - base_interval
                xa.append(temps[d])
            x.append(xa)
        return x, y

    # 일반 예측
    def predict(self, save_img=False, show_chart=False):
        # 예측
        y_pred = self._model.predict(self._x_test)

        # 스코어 정보
        score = r2_score(self._y_test, y_pred)

        # 리포트 확인
        if hasattr(self._model, 'coef_') and hasattr(self._model,
                                                     'intercept_'):
            print(f'Coef = {self._model.coef_}')
            print(f'intercept = {self._model.intercept_}')

        print(f'Score = {score}')

        # 이미지 저장 여부
        if save_img:
            self.save_chart_image(y_pred, show_chart)

        # 예측 값  & 스코어
        return [list(y_pred.tolist()), score]

    #  CV 예측(Cross Validation)
    def predict_by_cv(self):
        # Regression 알고리즘은 실 프로젝트 상황에 맞게 Cross Validation 구현
        return False

    #  GridSearchCV 예측
    def predict_by_gs(self):
        pass

    # 모델 저장 및 갱신
    def save_model(self, renew=False):
        # 모델 저장
        if not renew:
            # 처음 저장
            joblib.dump(self._model,
                        self._f_path + f'/model/{self._name}_rg.pkl')
        else:
            # 기존 모델 대체
            if os.path.isfile(self._f_path + f'/model/{self._name}_rg.pkl'):
                os.rename(
                    self._f_path + f'/model/{self._name}_rg.pkl',
                    self._f_path +
                    f'/model/{str(self._name) + str(time.time())}_rg.pkl')
            joblib.dump(self._model,
                        self._f_path + f'/model/{self._name}_rg.pkl')

    # 회귀 차트 저장
    def save_chart_image(self, data, show_chart):
        # 사이즈
        plt.figure(figsize=(15, 10), dpi=100)

        # 레이블
        plt.plot(self._y_test, c='r')

        # 예측 값
        plt.plot(data, c='b')

        # 이미지로 저장
        plt.savefig('./chart_images/tenki-kion-lr.png')

        # 차트 확인(Optional)
        if show_chart:
            plt.show()

    def __del__(self):
        del self._x_train, self._x_test, self._y_train, self._y_test, self._x, self._y, self._model