Beispiel #1
0
def do_cca(X, y, X_orig, n_components=10, permutations=10):
    '''
    Performs a CCA using components
    Projects scores back to edge space
    '''
    cca = CCA(n_components=n_components)
    cca.fit(X, y)

    # save the latent component correlation
    cca.mode_r = []
    for component in range(n_components):
        cca.mode_r.append(
            np.corrcoef(cca.x_scores_[:, component],
                        cca.y_scores_[:, component])[0, 1])

    # correlate behaviour with LC score
    cca.y_score_correlation = np.zeros((np.shape(y)[1], n_components))
    for component in range(n_components):
        for beh in range(np.shape(y)[1]):
            cca.y_score_correlation[beh, component] = np.corrcoef(
                y[:, beh].T, cca.y_scores_[:, component])[0, 1]

    # correlate edges with LC score
    cca.x_score_correlation = np.zeros((np.shape(X_orig)[1], n_components))
    for component in range(n_components):
        cca.x_score_correlation[:, component] = np.corrcoef(
            cca.x_scores_[:, component], X_orig.T)[1::, 0]

    # non parametric max T tests for component significance
    max_r = []
    for perm in tqdm(range(permutations)):
        #shuffle the behaviour for each permutation
        y_shuffle = shuffle(y)

        #perform a new CCA with shuffled data
        cca_perm = []
        cca_perm = CCA(n_components=n_components)
        cca_perm.fit(X, y_shuffle)

        # save the latent component correlation
        mode_r_perm = []
        for component in range(n_components):
            mode_r_perm.append(
                np.corrcoef(cca_perm.x_scores_[:, component],
                            cca_perm.y_scores_[:, component])[0, 1])

        # take the max r value
        max_r.append(np.max(mode_r_perm))

    # Compute adjusted p-values via percentile
    p_adj = []
    for component in range(n_components):
        p_adj.append(np.mean(max_r >= cca.mode_r[component]))

    return cca, p_adj
Beispiel #2
0
def run(data, template, reference):
    n_components = 1
    corrs = []
    cca = CCA(n_components)
    for target in range(0, len(st.frequencies)):
        results = []
        X = data
        cor, _, xweights, yweights = su.find_correlation_for_one_pair(
            cca, 1, data, template[target, :, :])
        cor_ref, _, xweights_ref, yweights_ref = su.find_correlation_for_one_pair(
            cca, 1, data, reference[target, :, :])
        cor_ref_tem, _, xweights_ref_tem, yweights_ref_tem = su.find_correlation_for_one_pair(
            cca, 1, np.squeeze(template[target, :, :]),
            reference[target, :, :])
        corr_t = get_cor_template(X, template[target, :, :], xweights_ref,
                                  xweights_ref)
        corr_ref_temp = get_cor_template(X, template[target, :, :],
                                         xweights_ref_tem, xweights_ref_tem)
        corr_temp = get_cor_template(np.squeeze(template[target, :, :]),
                                     template[target, :, :], xweights,
                                     yweights)

        results = [cor, cor_ref, corr_t, corr_ref_temp, corr_temp]
        corsum = map(lambda x: np.sign(x) * x**2, results)
        corsum = list(corsum)
        corsum = np.sum(corsum)
        corrs.append(np.sum(corsum))
    pre = np.argmax(corrs)
    return pre
Beispiel #3
0
    def fit(self, model):
        """Fits the model and creates a (random) orthogonal transformation.

        Args:
            model: string; a value in ["logistic", "svm", "linear", "svr", "cca"]
        """
        if model == 'linear':
            self.mod = LinearRegression()
        elif model == 'logistic':
            self.mod = LogisticRegression(penalty='none',
                                          class_weight='balanced',
                                          solver='saga')
            #self.mod = LogisticRegression()
        elif model == 'svr':
            self.mod = SVR(kernel='linear')
        elif model == 'svm':
            self.mod = SVC(C=1.0, kernel='linear')
        elif model == 'cca':
            self.mod = CCA(n_components=1,
                           scale=True,
                           max_iter=500,
                           tol=1e-06,
                           copy=True)
            self.mod.intercept_ = 0.0
        self.mod.fit(self.Xrel, self.Yrel)
        # now compute T with a random orthogonal basis
        # todo potential bug: what to do with the intercept_?
        w0 = self.mod.coef_  # + self.mod.intercept_
        if len(w0.shape) < 2:
            w0 = w0.reshape(1, -1)
        w0 = w0 / np.linalg.norm(w0)
        Wcompl = null_space(w0)
        self.T = np.hstack((w0.transpose(), Wcompl))
        assert np.allclose(self.T.transpose().dot(self.T),
                           np.eye(self.T.shape[0])), "self.T not orthonormal."
    def compCorrCoefs(self, learningSet, EEGSignals):
        n_components = 1
        cca = CCA(n_components)
        #print(EEGSignals.shape)
        '''
        correlation14 = abs(np.corrcoef(np.mean(learningSet[0:3].T, axis=1),np.mean(EEGSignals.T, axis=1))[0, 1])
        correlation28 = abs(np.corrcoef(np.mean(learningSet[3:6].T, axis=1),np.mean(EEGSignals.T, axis=1))[0, 1])
        correlation8 = abs(np.corrcoef(np.mean(learningSet[6:9].T, axis=1),np.mean(EEGSignals.T, axis=1))[0, 1])

        print(learningSet[0][0],learningSet[1][0],learningSet[2][0])
        for i in range(0,9,3):
            print(abs(np.corrcoef(learningSet[i].T,EEGSignals[int(i/3)].T)[0, 1]),
                  abs(np.corrcoef(learningSet[i+1].T,EEGSignals[int(i/3)].T)[0, 1]),
                  abs(np.corrcoef(learningSet[i+2].T,EEGSignals[int(i/3)].T)[0, 1]))
        print("---")
        '''

        cca.fit(learningSet[0:3].T, EEGSignals.T)
        U, V = cca.transform(learningSet[0:3].T, EEGSignals.T)
        correlation14 = abs(np.corrcoef(U.T, V.T)[0, 1])

        cca.fit(learningSet[3:6].T, EEGSignals.T)
        U, V = cca.transform(learningSet[3:6].T, EEGSignals.T)
        correlation28 = abs(np.corrcoef(U.T, V.T)[0, 1])

        cca.fit(learningSet[6:9].T, EEGSignals.T)
        U, V = cca.transform(learningSet[6:9].T, EEGSignals.T)
        correlation8 = abs(np.corrcoef(U.T, V.T)[0, 1])

        return correlation14, correlation28, correlation8
    def train_eval(self, train_index, test_index, ignore_eval=False):
        normalized_train, normalized_test = normalize_by_train(self.source[train_index], self.source[test_index])

        if self.comp is not None:
            if self.use_scikit is not None:
                if self.use_scikit == 'cca':
                    dim_reduction = CCA(n_components=self.comp)
                else:
                    dim_reduction = PCA(n_components=self.comp)
                # fit cca according to train data only
                dim_reduction.fit(normalized_train, self.target[train_index])
                # convert source into lower dimensional representation
                normalized_train = dim_reduction.transform(normalized_train)
                normalized_test = dim_reduction.transform(normalized_test)
            else:
                _, wa, _ = tutorial_on_cca(normalized_train, self.target[train_index])
                normalized_train = normalized_train @ wa[:, :self.comp]
                normalized_test = normalized_test @ wa[:, :self.comp]

        model = self.build_model()


        model.fit(normalized_train, self.target[train_index])

        prediction = model.predict(normalized_test)

        # res_df.to_csv(f"{self.out_name}/res1.csv")
        if not ignore_eval:
            return self.evaluate_regression(prediction, test_index)
        else:
            return prediction
Beispiel #6
0
    def __init__(self,
                 model_name,
                 model_type,
                 n_clusters=None,
                 n_components=None,
                 n_lag=None,
                 regularisation=None):
        self.n_lag = n_lag
        self.model_name = model_name
        self.model_type = model_type
        # self.n_clusters = n_clusters
        # self.clustering = NeuronClustering(self.n_clusters, signal_correlation)

        if model_name == 'cca':
            self.n_components = n_components
            self.model = CCA(n_components=self.n_components)
        elif model_name == 'linear-regression':
            if regularisation is None:
                self.model = LinearRegression()
            elif regularisation == 'l1':
                self.model = Lasso()
            elif regularisation == 'l2':
                self.model = Ridge()
            elif regularisation == 'l1l2':
                self.model = ElasticNet()
            else:
                raise NotImplementedError
Beispiel #7
0
def test_cca():
    """Test CCA."""
    # Compare results with Matlab
    # x = np.random.randn(1000, 11)
    # y = np.random.randn(1000, 9)
    # x = demean(x).squeeze()
    # y = demean(y).squeeze()
    mat = loadmat('./tests/data/ccadata.mat')
    x = mat['x']
    y = mat['y']
    A2 = mat['A2']
    B2 = mat['B2']

    A1, B1, R = nt_cca(x, y)  # if mean(A1(:).*A2(:))<0; A2=-A2; end
    X1 = np.dot(x, A1)
    Y1 = np.dot(y, B1)
    C1 = tscov(np.hstack((X1, Y1)))[0]

    # Sklearn CCA
    cca = CCA(n_components=9, scale=False, max_iter=1e6)
    X2, Y2 = cca.fit_transform(x, y)
    # C2 = tscov(np.hstack((X2, Y2)).T)[0]
    # import matplotlib.pyplot as plt
    # f, (ax1, ax2) = plt.subplots(2, 1)
    # ax1.imshow(C1)
    # ax2.imshow(C2)
    # plt.show()
    # assert_almost_equal(C1, C2, decimal=4)

    # Compare with matlab
    X2 = np.dot(x, A2)
    Y2 = np.dot(y, B2)
    C2 = tscov(np.hstack((X2, Y2)))[0]

    assert_almost_equal(C1, C2)
Beispiel #8
0
def compute_SVCCA(activation1, activation2):
    '''
	activation1 - Activation array 1 as a numpy array of size n X m1 
	activation2 - Activation array 2 as a numpy array of size n X m2

	'''
    pca_r = 40  # value from Shi et al NeurIPS 2019
    n = activation1.shape[0]
    assert n == activation2.shape[
        0], "Size of activation arrays are different!!"
    if pca_r > activation1.shape[1]:
        print(
            "Activation 1 array has less neurons.. changing number of PCs to ",
            activation1.shape[1])
        pca_r = activation1.shape[1]
    if pca_r > activation2.shape[1]:
        print(
            "Activation 2 array has less neurons.. changing number of PCs to ",
            activation2.shape[1])
        pca_r = activation2.shape[1]

    pca1 = PCA(n_components=pca_r)
    red_activation1 = pca1.fit_transform(activation1)
    pca2 = PCA(n_components=pca_r)
    red_activation2 = pca2.fit_transform(activation2)
    cca = CCA(n_components=pca_r)
    red_activation1_c, red_activation2_c = cca.fit_transform(
        red_activation1, red_activation2)
    corr_values = np.zeros(pca_r)
    for idx in range(pca_r):
        corr_values[idx] = np.corrcoef(
            red_activation1_c[:, idx],
            red_activation2_c[:, idx])[0, 1]  # get the off-diagonal element

    return np.mean(corr_values)
Beispiel #9
0
def perform(arrs):
    blocks_cnt = sum([arr.shape[1] * arr.shape[2] for arr in arrs])
    X = np.zeros((blocks_cnt, 16))
    Y = np.zeros((blocks_cnt, 64))
    for c in range(3):
        for i, arr in enumerate(arrs):
            height = arr.shape[1]
            width = arr.shape[2]
            for y in range(height):
                for x in range(width):
                    X[y * width + x] = np.hstack(
                        [arr[c][y][x - 1][:][-1], arr[c][y - 1][x][-1][:]])
                    Y[y * width + x] = arr[c][y][x].ravel()

        X_mc = (X - X.mean()) / (X.std())
        Y_mc = (Y - Y.mean()) / (Y.std())

        ca = CCA(n_components=1)
        ca.fit(X_mc, Y_mc)

        print(f'\nColor {c}:')
        weights = ca.x_weights_.ravel()
        print(weights.shape)
        print(', '.join(map(lambda a: str(a), weights)))
        print(ca.n_iter_)
Beispiel #10
0
def cca_classify(X_eeg_signals, Yi_frequency_signals):
    cca = CCA(1)
    corr_results = []
    for fr in range(0, Yi_frequency_signals.shape[0]):
        X = X_eeg_signals
        Yi = Yi_frequency_signals[fr, :, :]
        #计算X与Yi之间的相关性
        cca.fit(X.T, np.squeeze(Yi).T)
        X_train_r, Yi_train_r = cca.transform(X.T, np.squeeze(Yi).T)
        corr = np.corrcoef(X_train_r[:, 0], Yi_train_r[:, 0])[0, 1]
        #得出X与每个Yi的相关性
        corr_results.append(corr)
    if corr_results[np.argmax(corr_results)] > 0.50:
        #设置阈值
        global index
        global all_data
        classify_result = np.argmax(corr_results) + 1
        print(corr_results)
        index += 1
        #保存数据
        TT = pd.DataFrame(X_eeg_signals)
        all_data = all_data.append(np.transpose(TT[1:9]))
        if index == 50:
            #保存数据
            all_data = pd.DataFrame(all_data)
            all_data.to_csv('./j_8_all_data.csv', index=False)
        return classify_result
    else:
        return -1
    def Initialize(self):
        self.configs = self.Config[0]
        self.Channellist = self.configs['Channellist']

        self.Trigger = {'state': 0, 'code': 0}  #注册trigger
        self.eeg = np.empty(0)
        self.trigger = np.empty(0)

        # self.mudname = rootdir+'/src/mud/MUD.mat'
        # self.sigparm = sio.loadmat(self.mudname)
        # self.FilterLR = self.sigparm['FilterLR']

        n = 3
        MdB = 20
        bprange = np.array([6.0, 35.0])
        Ws = bprange / (self.configs['SamplingRate'] / 2)
        self.b, self.a = scipy.signal.iirfilter(n, Ws, rs=MdB,
                                                ftype='cheby2')  # Hd_Bandpass
        self.prodata = np.empty(0)
        self.frequency = [9, 11.7, 14.5]
        t = np.arange(0.005, 5.5, 0.005)
        self.Y = {}
        for i in range(len(self.frequency)):
            y = np.array([
                np.sin(2 * np.pi * self.frequency[i] * t),
                np.cos(2 * np.pi * self.frequency[i] * t),
                np.sin(4 * np.pi * self.frequency[i] * t),
                np.cos(4 * np.pi * self.frequency[i] * t),
                np.sin(6 * np.pi * self.frequency[i] * t),
                np.cos(6 * np.pi * self.frequency[i] * t)
            ])
            self.Y[str(i)] = y
        self.rank_min = min(len(self.Channellist), self.Y[str(0)].shape[0])
        self.cca = CCA(n_components=self.rank_min)
        self.score_threshold = 0.25  #####load .mat
Beispiel #12
0
def doCCA(metrics, color):
    inp = np.array([metrics[m] for m in metricsInput2]).T.astype(float)
    out = np.array([metrics[m] for m in metricsOutput2]).T.astype(float)
    inp0 = np.zeros(len(metricsInput2))
    out0 = np.zeros(len(metricsOutput2))
    inp = np.vstack((inp, inp0))
    out = np.vstack((out, out0))
    cca = CCA(n_components=1, scale=False)
    cca.fit(inp, out)
    inp_cca = inp.dot(cca.x_weights_)
    out_cca = out.dot(cca.y_weights_)

    # Create linear regression object
    regr = linear_model.LinearRegression()
    # Train the model using the training sets
    regr.fit(inp_cca, out_cca)
    cca_regr = regr.predict(inp_cca)
    # The coefficients
    print('Coefficients: \n', regr.coef_)

    plt.scatter(inp_cca, out_cca, c=color)
    plt.plot(inp_cca, cca_regr, color=color, linewidth=0.5)

    logging.info('cca')
    logging.info(cca.x_rotations_)
    logging.info(cca.y_rotations_)
Beispiel #13
0
def distance(s1, s2, type="dist", x=None):
    # TODO: type check

    if (type == "dist"):
        mat_1 = np.matmul(
            np.matmul(s1, np.linalg.solve(np.matmul(np.transpose(s1), s1))),
            np.transpose(s1))
        mat_2 = np.matmul(
            np.matmul(s2, np.linalg.solve(np.matmul(np.transpose(s2), s2))),
            np.transpose(s2))
        return math.sqrt(np.sum(np.subtract(mat_1, mat_2))**2)

    if (type == "trace"):
        mat_1 = np.matmul(
            np.matmul(s1, np.linalg.solve(np.matmul(np.transpose(s1), s1))),
            np.transpose(s1))
        mat_2 = np.matmul(
            np.matmul(s2, np.linalg.solve(np.matmul(np.transpose(s2), s2))),
            np.transpose(s2))
        return np.sum(np.diag(np.matmul(mat_1, mat_2))) / helper.n_col(s1)

    if (type == "canonical"):
        if (x == None):
            raise Exception("x must be specified if use type = 'canonical'")
        if (helper.n_col(x) != helper.n_row(s1)):
            raise Exception("Dimension of x is not correct.")

        cca = CCA(n_components=1)

        return np.mean(cca.fit(np.matmul(x, s1), np.matmul(x, s2)))
Beispiel #14
0
    def __init__(self):
        # 알고리즘 이름
        self._name = 'cca'

        # 기본 경로
        self._f_path = os.path.abspath(
            os.path.join(os.path.dirname(os.path.abspath(__file__)),
                         os.pardir))

        # 경고 메시지 삭제
        warnings.filterwarnings('ignore')

        # 원본 데이터 로드
        data = pd.read_csv(self._f_path +
                           "/regression/resource/regression_sample.csv",
                           sep=",",
                           encoding="utf-8")

        # 학습 및 테스트 데이터 분리
        self._x = (data["year"] <= 2017)
        self._y = (data["year"] >= 2018)

        # 학습 데이터 분리
        self._x_train, self._y_train = self.preprocessing(data[self._x])
        # 테스트 데이터 분리
        self._x_test, self._y_test = self.preprocessing(data[self._y])

        # 모델 선언
        self._model = CCA()

        # 모델 학습
        self._model.fit(self._x_train, self._y_train)
    def train_eval(self, train_index, test_index):
        train_source, test_source = self.source[train_index], self.source[
            test_index]
        train_target, test_target = self.target[train_index], self.target[
            test_index]

        train_source, test_source = scale_train_test(train_source, test_source)
        train_target, _ = scale_train_test(train_target, test_target)

        # rho, w_t, w_s, _ = evaluate_cca_wa_wb(train_target, train_source)
        cca = CCA(n_components=min(train_source.shape[1],
                                   train_target.shape[1]),
                  max_iter=1000)
        cca.fit(train_source, train_target)
        w_s = cca.x_rotations_
        w_t = cca.y_rotations_

        predicted_target = test_source @ w_s @ np.linalg.pinv(w_t)
        predicted_target = unscale_prediction(train_target, predicted_target)

        if self.target_encoder is not None:
            test_target = self.original_target[test_index]
            predicted_target = self.target_encoder.decode(
                torch.as_tensor(predicted_target)).detach().numpy()

        scores = np.zeros(self.original_target.shape[1])
        for i in range(self.original_target.shape[1]):
            predicted = predicted_target[:, i]
            actual = test_target[:, i]
            r, pval = pearsonr(predicted, actual)
            scores[i] = r

        return scores
Beispiel #16
0
def rdc1(x, y, k=10, s=0.2):
    if len(x.shape) == 1: x = x.reshape((-1, 1))
    if len(y.shape) == 1: y = y.reshape((-1, 1))

    cx = np.column_stack([rankdata(xc, method='ordinal')
                          for xc in x.T]) / float(x.size)
    cy = np.column_stack([rankdata(yc, method='ordinal')
                          for yc in y.T]) / float(y.size)

    # Add a vector of ones so that w.x + b is just a dot product
    O = np.ones(cx.shape[0])
    X = np.column_stack([cx, O])
    Y = np.column_stack([cy, O])

    Rx = (s / X.shape[1]) * np.random.randn(X.shape[1], k)
    Ry = (s / Y.shape[1]) * np.random.randn(Y.shape[1], k)
    X = np.dot(X, Rx)
    Y = np.dot(Y, Ry)
    X = np.sin()
    """print rcancor(np.sin(X),np.sin(Y))
        return 0"
        """
    cca = CCA(n_components=1)
    xc, yc = cca.fit_transform(X, Y)
    result = np.corrcoef(xc.T, yc.T)[0, 1]
    print(result)
Beispiel #17
0
def visualize_with_cca(X, y, title):
    cca = CCA(n_components=2)
    cca.fit(X, y)
    X_cca = cca.transform(X)
    Xax = X_cca[:, 0]
    Yax = X_cca[:, 1]
    labels = (y > 0).astype(int)
    cdict = {0: 'red', 1: 'green'}
    labl = {0: 'home_loss', 1: 'home_win'}
    marker = {0: '*', 1: 'o'}
    alpha = {0: .3, 1: .5}

    fig, ax = plt.subplots(figsize=(7, 5))
    fig.patch.set_facecolor('white')

    for l in np.unique(labels):
        ix = np.where(labels == l)
        ax.scatter(Xax[ix],
                   Yax[ix],
                   c=cdict[l],
                   s=40,
                   label=labl[l],
                   marker=marker[l],
                   alpha=alpha[l])

    plt.xlabel("First Principal Component", fontsize=14)
    plt.ylabel("Second Principal Component", fontsize=14)
    plt.legend()
    plt.title(title)
    plt.show()
Beispiel #18
0
def find_correlation_cca_method1(signal, reference_signals, n_components=2):
    r"""
    Perform canonical correlation analysis (CCA)
    Reference: https://github.com/aaravindravi/Brain-computer-interfaces/blob/master/notebook_12_class_cca.ipynb

    Args:
        signal : ndarray, shape (channel,time)
            Input signal in time domain
        reference_signals : ndarray, shape (len(flick_freq),2*num_harmonics,time)
            Required sinusoidal reference templates corresponding to the flicker frequency for SSVEP classification
        n_components : int, default: 2
            number of components to keep (for sklearn.cross_decomposition.CCA)
    Returns:
        result : array, size: len(flick_freq)
            Probability for each reference signals
    Dependencies:
        CCA : sklearn.cross_decomposition.CCA
        np : numpy package
    """

    cca = CCA(n_components)
    corr = np.zeros(n_components)
    result = np.zeros(reference_signals.shape[0])
    for freq_idx in range(0, reference_signals.shape[0]):
        cca_x = signal.T
        cca_y = np.squeeze(reference_signals[freq_idx, :, :]).T
        cca.fit(cca_x, cca_y)
        a, b = cca.transform(cca_x, cca_y)
        for ind_val in range(0, n_components):
            corr[ind_val] = np.corrcoef(a[:, ind_val], b[:, ind_val])[0, 1]
        result[freq_idx] = np.max(corr)
    return result
Beispiel #19
0
def CCA_project_vectors(args,
                        src_dico,
                        tgt_dico,
                        src_full,
                        tgt_full,
                        src_train,
                        tgt_train,
                        NUM_dim=100):

    print('Exporting embeddings...')
    OutputDir = "output/{}-{}/".format(args.src_lang, args.tgt_lang)
    if not os.path.exists(OutputDir):
        os.makedirs(OutputDir)

    cca = CCA(n_components=NUM_dim)
    print("Fitting...")
    cca.fit(src_train, tgt_train)
    print(cca.get_params())
    X_c, Y_c = cca.transform(src_full, tgt_full)
    src_out, tgt_out = utils.norm_embeddings(X_c), utils.norm_embeddings(Y_c)
    print("Exporting embeddings...")
    utils.export_embeddings(src_dico[0], src_out,
                            OutputDir + 'projected.{}'.format(args.src_lang))
    utils.export_embeddings(tgt_dico[0], tgt_out,
                            OutputDir + 'projected.{}'.format(args.tgt_lang))
    print("work over!")
Beispiel #20
0
    def __init__(self, controller=None, num_targets=8, num_seconds=3):

        self.controller = controller

        self.num_targets = num_targets
        self.num_seconds = num_seconds  # number of seconds for a stimulus cycle

        self.sampling_rate = 128.0
        # frequencies calculated by frames/len(array) as seen in flicker_patterns.txt
        # or frequencies are set from paper we are basing our experiment from
        # check for either 4 or 8 targets
        if self.num_targets == 8:
            self.frequencies = np.asarray(
                [43.0, 37.0, 29.0, 21.0, 17.0, 11.0, 8.0, 5.0])
        elif self.num_targets == 4:
            # up, down, right, left from paper
            # NOTE: using old csv files with these new frequencies is not valid
            self.frequencies = [15.0, 12.0, 8.57, 5.45]
        else:
            print("cca did not get a good target number.")

        # prediction should be targets 1 to num_targets, not 0 to num_targets - 1
        # based on command_to_keyboard action
        self.prediction = None

        self.ref_signals = []
        self.getAllReferenceSignals()

        self.cca = CCA(n_components=1)

        self.fig = None
        self.ax = None
        self.plotter = None

        self.filter_obj = Filter()
Beispiel #21
0
def plot_subfigure(X, Y, subplot, title, transform):
    if transform == "pca":
        X = PCA(n_components=2).fit_transform(X)
    elif transform == "cca":
        X = CCA(n_components=2).fit(X[0:len(Y),:], Y).transform(X)
    else:
        raise ValueError

    min_x = np.min(X[:, 0])
    max_x = np.max(X[:, 0])

    min_y = np.min(X[:, 1])
    max_y = np.max(X[:, 1])

    plt.subplot(1, 2, subplot)
    plt.title(title)

    zero_class = np.where(Y[:, 0])
    one_class = np.where(Y[:, 1])
    two_class = np.where(Y[:, 2])
    three_class = np.where(Y[:, 3])
    plt.scatter(X[:, 0], X[:, 1], s=40, c='gray')

    plt.scatter(X[zero_class, 0], X[zero_class, 1], s=160, edgecolors='b',
               facecolors='none', linewidths=2, label='Class 1')
Beispiel #22
0
    def map_spaces(self, algo, src_mapped_embed=None, trg_mapped_embed=None):

        # (There may be duplicates in self.shared_vocab_src and/or self.shared_vocab_trg,
        # swap_vocab can be used to only inspect one-to-one translations)
        src_embed = self.model_src[self.shared_vocab_src]
        trg_embed = self.model_trg[self.shared_vocab_trg]

        os.makedirs(algo, exist_ok=True)

        if algo == "procrustes":
            logging.info(
                "Calculating Rotation Matrix (Procrustes Problem) and applying it to first embedding"
            )
            #ortho, _ = orthogonal_procrustes(src_embed, trg_embed)
            # does the same as
            u, _, vt = np.linalg.svd(trg_embed.T.dot(src_embed))
            w = vt.T.dot(u.T)
            self.model_src.vectors.dot(w, out=self.model_src.vectors)

        elif algo == "noise":
            logging.info(
                "Calculating Rotation Matrix with noise aware algorithm and applying it to first embedding"
            )
            transform_matrix, alpha, clean_indices, noisy_indices = noise_aware(
                src_embed, trg_embed)
            #write cleaned vocab to file
            with open("vocab.clean.txt", 'w') as v:
                for src, trg in np.asarray(self.shared_vocab)[clean_indices]:
                    v.write("{}\t{}\n".format(src, trg))
            self.model_src.vectors.dot(transform_matrix,
                                       out=self.model_src.vectors)
            logging.info("Percentage of clean indices: {}".format(alpha))

        elif algo == "cca":
            logging.info(
                "Calculating Mapping based on CCA and applying it to both embeddings"
            )
            cca = CCA(n_components=100, max_iter=5000)
            cca.fit(src_embed, trg_embed)
            self.model_src.vectors, self.model_trg.vectors = cca.transform(
                self.model_src.vectors, self.model_trg.vectors)

        elif algo == "gcca":
            logging.info(
                "Calculating Mapping based on GCCA and applying it to both embeddings"
            )
            gcca = GCCA()
            gcca.fit([src_embed, trg_embed])
            transform_l = gcca.transform_as_list(
                (self.model_src.vectors, self.model_trg.vectors))
            # gcca computes positive and negative correlations (eigenvalues), sorted in ascending order.
            # We are only interested in the positive portion
            self.model_src.vectors = transform_l[0][:, 100:]
            self.model_trg.vectors = transform_l[1][:, 100:]

        # save transformed model(s)
        if src_mapped_embed:
            self.model_src.save(os.path.join(algo, src_mapped_embed))
        if trg_mapped_embed:
            self.model_trg.save(os.path.join(algo, trg_mapped_embed))
Beispiel #23
0
def rdc_cca(indexes):
    i, j, rdc_features = indexes
    cca = CCA(n_components=1, max_iter=CCA_MAX_ITER)
    X_cca, Y_cca = cca.fit_transform(rdc_features[i], rdc_features[j])
    rdc = np.corrcoef(X_cca.T, Y_cca.T)[0, 1]
    # logger.info(i, j, rdc)
    return rdc
Beispiel #24
0
def new_cca_ds(A, B, n_components=1):
    # http://onlinelibrary.wiley.com/doi/10.1002/cem.2637/abstract
    model = CCA(n_components=n_components, scale=False).fit(B, A)
    F1 = np.linalg.pinv(model.x_scores_).dot(model.y_scores_)
    F2 = np.linalg.pinv(model.y_scores_).dot(A)
    P = ct.multi_dot((model.x_weights_, F1, F2))
    return P, B.dot(P)
Beispiel #25
0
    def fit_cca(self, outfile=''):

        # fits linear CCA constraint and replaces pretrained name embeddings with CCA transformed embeddings

        self.load_embeddings()
        self.extract_pretrained_prototype_embeddings()

        items, vectors = zip(
            *[(k, v) for k, v in self.pretrained_prototype_embeddings.items()
              if k in self.exemplar_to_concept])
        concept_embs = Reach(vectors, items)

        train_vectors = []
        for x in items:
            train_vectors.append(self.train_embeddings[x])
        train_vectors = Reach.normalize(train_vectors)

        cca = CCA(n_components=self.train_embeddings.size, max_iter=10000)
        cca.fit(train_vectors, concept_embs.norm_vectors)

        # transform all name embeddings using the CCA mapping
        all_name_embeddings = deepcopy(self.pretrained_name_embeddings)
        items = [x for _, x in sorted(all_name_embeddings.indices.items())]
        projected_name_embeddings = cca.transform(
            all_name_embeddings.norm_vectors)
        new_name_embeddings = Reach(projected_name_embeddings, items)

        self.pretrained_name_embeddings = new_name_embeddings
        self.load_embeddings()

        if outfile:
            with open('{}_cca.p', 'wb') as f:
                pickle.dump(cca, f)
def get_cca(X, Y, n_comp=10):
    cca = CCA(n_components=n_comp)
    print("X.shape", X.shape)
    print("Y.shape", Y.shape)
    x_scores, y_scores = cca.fit_transform(X, Y)

    # Manual Transform
    X -= cca.x_mean_
    X /= cca.x_std_
    Y -= cca.y_mean_
    Y /= cca.y_std_
    calc_scores_x = np.dot(X, cca.x_rotations_)
    calc_scores_y = np.dot(Y, cca.y_rotations_)
    # id_x = cca.x_rotations_ @ linalg.pinv2(cca.x_rotations_)
    # id_y = cca.y_rotations_ @ linalg.pinv2(cca.y_rotations_)

    print("x_scores.shape", x_scores.shape)
    print("y_scores.shape", y_scores.shape)

    correlations = np.diag(
        np.corrcoef(x_scores, y_scores, rowvar=False)[:n_comp, n_comp:])
    calc_correlations = np.diag(
        np.corrcoef(calc_scores_x, calc_scores_y, rowvar=False)[:n_comp,
                                                                n_comp:])

    print(correlations)
    print(calc_correlations)
    return x_scores, y_scores
Beispiel #27
0
def main(args):
    (training_file, label_file, test_file, u_file, e, c, output_file,
     components) = args
    X_training = load_feat(training_file)
    n = len(X_training)
    U = load_feat(u_file)
    y_training = [float(line.strip()) for line in open(label_file)]

    U = np.asarray(U)
    X_training = np.asarray(X_training)
    #X = preprocessing.normalize(X, norm='l2')
    y_training = np.asarray(y_training)

    X_test = load_feat(test_file)
    y_test = [float(line.strip()) for line in open(test_label)]
    X_test = np.asarray(X_test)
    X_test[np.isnan(X_test)] = 0.0
    #test_X = preprocessing.normalize(test_X, norm='l2')
    y_test = np.asarray(y_test)
    s = min(len(X_training), len(U))

    cca = CCA(n_components=components, max_iter=50)
    (X_cca, U_cca) = cca.fit_transform(X_training[:s], U[:s])
    X_test_cca = cca.transform(X_test)

    svr = SVR(C=c, epsilon=e, kernel='rbf')
    svr.fit(X_cca, y_training[:s])
    pred = svr.predict(X_test_cca)

    with open(output_file, 'w') as output:
        for p in pred:
            print >> output, p
    return
def cca(m1, m2, preprocessing=None):
    """
    Use CCA to decompose two views and plot result.

    Params:
        m1, m2: Every column is a example with every row as a feature.
        preprocessing: If None, we don't do pre-processing; if 'orth', we adjust center to 0 and perform PCA.
    """
    # Adjust means to be 0 and perform PCA.
    if preprocessing == "orth":
        # Zero means.
        m1 -= np.mean(m1, axis=1, keepdims=True)

        # print("m1=", np.sum(m1, axis=1))
        m2 -= np.mean(m2, axis=1, keepdims=True)

        # PCA.

    cca = CCA(n_components=3, max_iter=100)
    cca.fit(m1.T, m2.T)

    X_c = cca.transform(m1.T)

    fig, ax = plt.subplots()
    ax.set_title('Fig.2.(c)')
    # ax.set_color_cycle(['blue', 'green', 'red'])
    ax.set_prop_cycle('color', ['blue', 'red', 'green'])
    ax.plot(X_c)
    # ax.plot(Y_c)
    plt.show()
def cca_coef(X, Y):
    '''
    Apply CCA method to compute inter_channel correlation
    :param X: data 1 (n_events, n_epochs, n_chans, n_times)
    :param Y: data 2 (actually equal to data 1)
    '''
    cca = CCA(n_components=1)
Beispiel #30
0
def fbcca(eeg, list_freqs, fs, num_harms=3, num_fbs=5):
    
    fb_coefs = np.power(np.arange(1,num_fbs+1),(-1.25)) + 0.25
    
    num_targs, _, num_smpls = eeg.shape  #40 taget (means 40 fre-phase combination that we want to predict)
    y_ref = cca_reference(list_freqs, fs, num_smpls, num_harms)
    cca = CCA(n_components=1) #initilize CCA
    
    # result matrix
    r = np.zeros((num_fbs,num_targs))
    results = np.zeros(num_targs)
    
    for targ_i in range(num_targs):
        test_tmp = np.squeeze(eeg[targ_i, :, :])  #deal with one target a time
        for fb_i in range(num_fbs):  #filter bank number, deal with different filter bank
             testdata = filterbank(test_tmp, fs, fb_i)  #data after filtering
             for class_i in range(num_targs):
                 refdata = np.squeeze(y_ref[class_i, :, :])   #pick corresponding freq target reference signal
                 test_C, ref_C = cca.fit_transform(testdata.T, refdata.T)
                 # len(row) = len(observation), len(column) = variables of each observation
                 # number of rows should be the same, so need transpose here
                 # output is the highest correlation linear combination of two sets
                 r_tmp, _ = pearsonr(np.squeeze(test_C), np.squeeze(ref_C)) #return r and p_value, use np.squeeze to adapt the API 
                 r[fb_i, class_i] = r_tmp
                 
        rho = np.dot(fb_coefs, r)  #weighted sum of r from all different filter banks' result
        tau = np.argmax(rho)  #get maximum from the target as the final predict (get the index)
        results[targ_i] = tau #index indicate the maximum(most possible) target
    return results