Beispiel #1
0
def rdc1(x, y, k=10, s=0.2):
    if len(x.shape) == 1: x = x.reshape((-1, 1))
    if len(y.shape) == 1: y = y.reshape((-1, 1))

    cx = np.column_stack([rankdata(xc, method='ordinal')
                          for xc in x.T]) / float(x.size)
    cy = np.column_stack([rankdata(yc, method='ordinal')
                          for yc in y.T]) / float(y.size)

    # Add a vector of ones so that w.x + b is just a dot product
    O = np.ones(cx.shape[0])
    X = np.column_stack([cx, O])
    Y = np.column_stack([cy, O])

    Rx = (s / X.shape[1]) * np.random.randn(X.shape[1], k)
    Ry = (s / Y.shape[1]) * np.random.randn(Y.shape[1], k)
    X = np.dot(X, Rx)
    Y = np.dot(Y, Ry)
    X = np.sin()
    """print rcancor(np.sin(X),np.sin(Y))
        return 0"
        """
    cca = CCA(n_components=1)
    xc, yc = cca.fit_transform(X, Y)
    result = np.corrcoef(xc.T, yc.T)[0, 1]
    print(result)
def get_cca(X, Y, n_comp=10):
    cca = CCA(n_components=n_comp)
    print("X.shape", X.shape)
    print("Y.shape", Y.shape)
    x_scores, y_scores = cca.fit_transform(X, Y)

    # Manual Transform
    X -= cca.x_mean_
    X /= cca.x_std_
    Y -= cca.y_mean_
    Y /= cca.y_std_
    calc_scores_x = np.dot(X, cca.x_rotations_)
    calc_scores_y = np.dot(Y, cca.y_rotations_)
    # id_x = cca.x_rotations_ @ linalg.pinv2(cca.x_rotations_)
    # id_y = cca.y_rotations_ @ linalg.pinv2(cca.y_rotations_)

    print("x_scores.shape", x_scores.shape)
    print("y_scores.shape", y_scores.shape)

    correlations = np.diag(
        np.corrcoef(x_scores, y_scores, rowvar=False)[:n_comp, n_comp:])
    calc_correlations = np.diag(
        np.corrcoef(calc_scores_x, calc_scores_y, rowvar=False)[:n_comp,
                                                                n_comp:])

    print(correlations)
    print(calc_correlations)
    return x_scores, y_scores
def fbcca(eeg, list_freqs, fs, num_harms=3, num_fbs=5):
    
    fb_coefs = np.power(np.arange(1,num_fbs+1),(-1.25)) + 0.25
    
    num_targs, _, num_smpls = eeg.shape  #40 taget (means 40 fre-phase combination that we want to predict)
    y_ref = cca_reference(list_freqs, fs, num_smpls, num_harms)
    cca = CCA(n_components=1) #initilize CCA
    
    # result matrix
    r = np.zeros((num_fbs,num_targs))
    results = np.zeros(num_targs)
    
    for targ_i in range(num_targs):
        test_tmp = np.squeeze(eeg[targ_i, :, :])  #deal with one target a time
        for fb_i in range(num_fbs):  #filter bank number, deal with different filter bank
             testdata = filterbank(test_tmp, fs, fb_i)  #data after filtering
             for class_i in range(num_targs):
                 refdata = np.squeeze(y_ref[class_i, :, :])   #pick corresponding freq target reference signal
                 test_C, ref_C = cca.fit_transform(testdata.T, refdata.T)
                 # len(row) = len(observation), len(column) = variables of each observation
                 # number of rows should be the same, so need transpose here
                 # output is the highest correlation linear combination of two sets
                 r_tmp, _ = pearsonr(np.squeeze(test_C), np.squeeze(ref_C)) #return r and p_value, use np.squeeze to adapt the API 
                 r[fb_i, class_i] = r_tmp
                 
        rho = np.dot(fb_coefs, r)  #weighted sum of r from all different filter banks' result
        tau = np.argmax(rho)  #get maximum from the target as the final predict (get the index)
        results[targ_i] = tau #index indicate the maximum(most possible) target
    return results
Beispiel #4
0
def test_cca():
    """Test CCA."""
    # Compare results with Matlab
    # x = np.random.randn(1000, 11)
    # y = np.random.randn(1000, 9)
    # x = demean(x).squeeze()
    # y = demean(y).squeeze()
    mat = loadmat('./tests/data/ccadata.mat')
    x = mat['x']
    y = mat['y']
    A2 = mat['A2']
    B2 = mat['B2']

    A1, B1, R = nt_cca(x, y)  # if mean(A1(:).*A2(:))<0; A2=-A2; end
    X1 = np.dot(x, A1)
    Y1 = np.dot(y, B1)
    C1 = tscov(np.hstack((X1, Y1)))[0]

    # Sklearn CCA
    cca = CCA(n_components=9, scale=False, max_iter=1e6)
    X2, Y2 = cca.fit_transform(x, y)
    # C2 = tscov(np.hstack((X2, Y2)).T)[0]
    # import matplotlib.pyplot as plt
    # f, (ax1, ax2) = plt.subplots(2, 1)
    # ax1.imshow(C1)
    # ax2.imshow(C2)
    # plt.show()
    # assert_almost_equal(C1, C2, decimal=4)

    # Compare with matlab
    X2 = np.dot(x, A2)
    Y2 = np.dot(y, B2)
    C2 = tscov(np.hstack((X2, Y2)))[0]

    assert_almost_equal(C1, C2)
Beispiel #5
0
def compute_SVCCA(activation1, activation2):
    '''
	activation1 - Activation array 1 as a numpy array of size n X m1 
	activation2 - Activation array 2 as a numpy array of size n X m2

	'''
    pca_r = 40  # value from Shi et al NeurIPS 2019
    n = activation1.shape[0]
    assert n == activation2.shape[
        0], "Size of activation arrays are different!!"
    if pca_r > activation1.shape[1]:
        print(
            "Activation 1 array has less neurons.. changing number of PCs to ",
            activation1.shape[1])
        pca_r = activation1.shape[1]
    if pca_r > activation2.shape[1]:
        print(
            "Activation 2 array has less neurons.. changing number of PCs to ",
            activation2.shape[1])
        pca_r = activation2.shape[1]

    pca1 = PCA(n_components=pca_r)
    red_activation1 = pca1.fit_transform(activation1)
    pca2 = PCA(n_components=pca_r)
    red_activation2 = pca2.fit_transform(activation2)
    cca = CCA(n_components=pca_r)
    red_activation1_c, red_activation2_c = cca.fit_transform(
        red_activation1, red_activation2)
    corr_values = np.zeros(pca_r)
    for idx in range(pca_r):
        corr_values[idx] = np.corrcoef(
            red_activation1_c[:, idx],
            red_activation2_c[:, idx])[0, 1]  # get the off-diagonal element

    return np.mean(corr_values)
Beispiel #6
0
def main(args):
    (training_file, label_file, test_file, u_file, e, c, output_file, components) = args
    X_training = load_feat(training_file)
    n = len(X_training)
    U = load_feat(u_file)
    y_training = [float(line.strip()) for line in open(label_file)]
   
    U = np.asarray(U)
    X_training = np.asarray(X_training)
    #X = preprocessing.normalize(X, norm='l2')
    y_training = np.asarray(y_training)
    
    X_test = load_feat(test_file)
    y_test = [float(line.strip()) for line in open(test_label)]
    X_test = np.asarray(X_test)
    X_test[np.isnan(X_test)] = 0.0
    #test_X = preprocessing.normalize(test_X, norm='l2')
    y_test = np.asarray(y_test)
    s = min(len(X_training), len(U))

    
    cca = CCA(n_components=components, max_iter=50)
    (X_cca, U_cca) = cca.fit_transform(X_training[:s], U[:s])
    X_test_cca = cca.transform(X_test)
    
    svr = SVR(C=c, epsilon=e, kernel='rbf')
    svr.fit(X_cca, y_training[:s])    
    pred = svr.predict(X_test_cca)
    
 
    with open(output_file, 'w') as output:
        for p in pred:
            print >>output, p
    return
Beispiel #7
0
def rdc_cca(indexes):
    i, j, rdc_features = indexes
    cca = CCA(n_components=1, max_iter=CCA_MAX_ITER)
    X_cca, Y_cca = cca.fit_transform(rdc_features[i], rdc_features[j])
    rdc = np.corrcoef(X_cca.T, Y_cca.T)[0, 1]
    # logger.info(i, j, rdc)
    return rdc
Beispiel #8
0
def main(args):
    (training_file, label_file, test_file, u_file, e, c, output_file,
     components) = args
    X_training = load_feat(training_file)
    n = len(X_training)
    U = load_feat(u_file)
    y_training = [float(line.strip()) for line in open(label_file)]

    U = np.asarray(U)
    X_training = np.asarray(X_training)
    #X = preprocessing.normalize(X, norm='l2')
    y_training = np.asarray(y_training)

    X_test = load_feat(test_file)
    y_test = [float(line.strip()) for line in open(test_label)]
    X_test = np.asarray(X_test)
    X_test[np.isnan(X_test)] = 0.0
    #test_X = preprocessing.normalize(test_X, norm='l2')
    y_test = np.asarray(y_test)
    s = min(len(X_training), len(U))

    cca = CCA(n_components=components, max_iter=50)
    (X_cca, U_cca) = cca.fit_transform(X_training[:s], U[:s])
    X_test_cca = cca.transform(X_test)

    svr = SVR(C=c, epsilon=e, kernel='rbf')
    svr.fit(X_cca, y_training[:s])
    pred = svr.predict(X_test_cca)

    with open(output_file, 'w') as output:
        for p in pred:
            print >> output, p
    return
def fbcca_realtime(data, list_freqs, fs, num_harms=3, num_fbs=5):
    
    fb_coefs = np.power(np.arange(1,num_fbs+1),(-1.25)) + 0.25
    
    num_targs = len(list_freqs)
    _, num_smpls = data.shape
    
    y_ref = cca_reference(list_freqs, fs, num_smpls, num_harms)
    cca = CCA(n_components=1) #initialize CCA
    
    # result matrix
    r = np.zeros((num_fbs,num_targs))
    
    for fb_i in range(num_fbs):  #filter bank number, deal with different filter bank
        testdata = filterbank(data, fs, fb_i)  #data after filtering
        for class_i in range(num_targs):
            refdata = np.squeeze(y_ref[class_i, :, :])   #pick corresponding freq target reference signal
            test_C, ref_C = cca.fit_transform(testdata.T, refdata.T)
            r_tmp, _ = pearsonr(np.squeeze(test_C), np.squeeze(ref_C)) #return r and p_value
            if r_tmp == np.nan:
                r_tmp=0
            r[fb_i, class_i] = r_tmp
    
    rho = np.dot(fb_coefs, r)  #weighted sum of r from all different filter banks' result
    print(rho) #print out the correlation
    result = np.argmax(rho)  #get maximum from the target as the final predict (get the index), and index indicates the maximum entry(most possible target)
    ''' Threshold '''
    THRESHOLD = 2.1
    if abs(rho[result])<THRESHOLD:  #2.587=np.sum(fb_coefs*0.8) #2.91=np.sum(fb_coefs*0.9) #1.941=np.sum(fb_coefs*0.6)
        return 999 #if the correlation isn't big enough, do not return any command
    else:
        return result
Beispiel #10
0
def rdc_cca(indexes):
    i, j, cca = indexes
    cca = CCA(n_components=1)
    X_cca, Y_cca = cca.fit_transform(GLOBAL_RDC_FEATURES[i],
                                     GLOBAL_RDC_FEATURES[j])
    # rdc = 1
    rdc = numpy.corrcoef(X_cca.T, Y_cca.T)[0, 1]
    print('ij', i, j)
    return rdc
def cca(X,Y,K):
    '''
    Perform CCA on two views X, Y and reduce dimension to K
    
    return pro
    '''
    cca = CCA(n_components = K,scale=False,max_iter = 1000)
    X_c, Y_c = cca.fit_transform(X, Y)
    return X_c,Y_c, cca
Beispiel #12
0
def CCA_corrcoeff(X, Y, n_components):
    n_components = 3
    cca = CCA(n_components)
    U, V = cca.fit_transform(X, Y)

    X_mean = np.subtract(X, X.mean(axis=0))
    Y_mean = np.subtract(Y, Y.mean(axis=0))

    A = np.linalg.solve(X_mean.T.dot(X_mean), X_mean.T.dot(U))
    B = np.linalg.solve(Y_mean.T.dot(Y_mean), Y_mean.T.dot(V))
    return A, B, U, V
Beispiel #13
0
    def _extract_corr(data, reference):
        """Correlation extractor. Takes as an input signal and reference,
        then calculates canonical correlation between them. After that
        it aquires cross-correlation between cca coefficients and returns
        asolute value of it."""

        data = data.reshape(data.shape[1], 1)
        reference = reference.reshape(reference.shape[0], 1)
        cancor = CCA(n_components=1)
        u, v = cancor.fit_transform(data, reference)
        coef = np.corrcoef(u.T, v.T)
        return np.abs(coef[0, 1])
Beispiel #14
0
 def compute_corr(self, X_test, method="cca"):
     if self.Y is None:
         raise ValueError(
             "Reference matrix Y must be computed using `fit` before computing corr"
         )
     if method == "eig":
         rho = CCA.cca_eig(X_test.T, self.Y.T)[0]
     else:  # use sklearn implementation
         cca = CCA_sklearn(n_components=1)
         Xc, Yc = cca.fit_transform(X_test.T, self.Y.T)
         rho = pearsonr(Xc[:, 0], Yc[:, 0])[0]
     return rho
def rdc(x, y, k=20, s=1 / 6., f=np.sin):
    """
    Compute the randomized dependence coefficient

    This algorithm is able to detect linear and non-linear correlations in the
    data vectors x and y.
    This is based on the paper titled "The Randomized Dependence Coefficient"
    located here https://arxiv.org/abs/1304.7717.

    Parameters
    ----------
    x : 1D numpy array with shape (N,)
        data coordinates
    y : 1D numpy array with shape (N,)
        data coordinates
    k,s : float
          tuning parameters - do not alter unless you really know what you're
          doing
    f : non-linear basis function

    Returns
    -------
    randomized dependence coefficient
    """
    import scipy.stats as stat
    from sklearn.cross_decomposition import CCA

    # the original was written in R (just 5 lines!), this is my translation
    # to numpy/scipy/scikit-learn (the original code is in the comments)

    # x <- cbind(apply(as.matrix(x),2,function(u)rank(u)/length(u)),1)
    # y <- cbind(apply(as.matrix(y),2,function(u)rank(u)/length(u)),1)
    x = stat.rankdata(x) / x.size
    y = stat.rankdata(y) / y.size
    x = np.insert(x[:, np.newaxis], 1, 1, axis=1)
    y = np.insert(y[:, np.newaxis], 1, 1, axis=1)
    # x <- s/ncol(x)*x%*%matrix(rnorm(ncol(x)*k),ncol(x))
    # y <- s/ncol(y)*y%*%matrix(rnorm(ncol(y)*k),ncol(y))
    x = np.dot(s / x.shape[1] * x,
               np.random.normal(size=x.shape[1] * k).reshape((x.shape[1], -1)))
    y = np.dot(s / y.shape[1] * y,
               np.random.normal(size=y.shape[1] * k).reshape((y.shape[1], -1)))
    # cancor(cbind(f(x),1),cbind(f(y),1))$cor[1]
    x = np.insert(f(x), x.shape[1], 1, axis=1)
    y = np.insert(f(y), y.shape[1], 1, axis=1)
    # the following is taken from:
    # http://stackoverflow.com/questions/37398856/
    # how-to-get-the-first-canonical-correlation-from-sklearns-cca-module
    cca = CCA(n_components=1)
    x_c, y_c = cca.fit_transform(x, y)
    return np.corrcoef(x_c.T, y_c.T)[0, 1]
Beispiel #16
0
def rdc(X, Y, k=None, s=1. / 6., f=numpy.sin, rand_gen=None, rnorm_X=None, rnorm_Y=None):

    if X.ndim == 1:
        X = X[:, numpy.newaxis]
    if Y.ndim == 1:
        Y = Y[:, numpy.newaxis]

    #
    # heuristic assumption
    if k is None:
        k = max(X.shape[1], Y.shape[1]) + 1
        # print(k)

    n_instances = X.shape[0]
    assert Y.shape[0] == n_instances, (Y.shape[0], n_instances)

    if rand_gen is None:
        rand_gen = numpy.random.RandomState(RAND_STATE)

    #
    # empirical copula transformation
    ones_column = numpy.ones((n_instances, 1))
    X_c = numpy.concatenate((numpy.apply_along_axis(ecdf, 0, X),
                             ones_column), axis=1)
    Y_c = numpy.concatenate((numpy.apply_along_axis(ecdf, 0, Y),
                             ones_column), axis=1)

    #
    # linear projection through a random gaussian
    if rnorm_X is None:
        rnorm_X = rand_gen.normal(size=(X_c.shape[1], k))
    if rnorm_Y is None:
        rnorm_Y = rand_gen.normal(size=(Y_c.shape[1], k))
    X_proj = s / X_c.shape[1] * numpy.dot(X_c, rnorm_X)
    Y_proj = s / Y_c.shape[1] * numpy.dot(Y_c, rnorm_Y)

    #
    # non-linear projection
    # print(f(X_proj), f(X_proj).shape, X_proj.shape)
    X_proj = numpy.concatenate((f(X_proj), ones_column), axis=1)
    Y_proj = numpy.concatenate((f(Y_proj), ones_column), axis=1)

    #
    # canonical correlation analysis
    cca = CCA(n_components=1)
    X_cca, Y_cca = cca.fit_transform(X_proj, Y_proj)

    rdc = numpy.corrcoef(X_cca.T, Y_cca.T)

    # print(rdc)
    return rdc[0, 1]
Beispiel #17
0
def cca_correlation(X, Y, n_comp=50):
    """
    :param X, Y: should be N-by-p, N-by-q matrices,
    :param n_comp: a integer, how many components we want to create and compare.
    :return: cca_corr, n_comp-by-n_comp matrix
       X_c, Y_c will be the linear mapped version of X, Y with shape  N-by-n_comp, N-by-n_comp shape
       cc_mat is the
    """
    cca = CCA(n_components=n_comp)
    X_c, Y_c = cca.fit_transform(X, Y)
    ccmat = np.corrcoef(X_c, Y_c, rowvar=False)
    cca_corr = np.diag(
        ccmat[n_comp:, :n_comp])  # slice out the cross corr part
    return cca_corr
Beispiel #18
0
def cca_subspace(X, Y, n_comp=50, **kwargs):
    """
    :param X, Y: should be N-by-p, N-by-q matrices, N is the dimension for the whole space, p, q are number of basis
                 vectors (Note p, q functions as number of features to be recombined, while N functions as number of
                 sampled). CCA will maximize
    :param n_comp: a integer, how many components we want to create and compare.
    :return: cca_corr, n_comp-by-n_comp matrix
       X_c, Y_c will be the linear mapped version of X, Y with shape  N-by-n_comp, N-by-n_comp shape
       cc_mat is the
    """
    cca = CCA(n_components=n_comp, **kwargs)
    X_c, Y_c = cca.fit_transform(X, Y)
    ccmat = np.corrcoef(X_c, Y_c, rowvar=False)
    cca_corr = np.diag(ccmat[n_comp:, :n_comp])  # slice out the cross corr part
    return cca_corr, cca
Beispiel #19
0
def getCoeff(id,sample,framePeriod,currentTimeMillis):
	T = framePeriod[0]/60
	tau = framePeriod[1]/60
	t = currentTimeMillis/1000
	x = list(map(lambda x: x - tau/T,sample.copy()))
	y = []
	for n in range(1,N+1):
		y.append((2/(n*pi)) * sin(pi*n*tau/T) * cos(2*pi*n*(t - tau/2)/T))
	del X[id][0]
	del Y[id][0]
	X[id].append(x.copy())
	Y[id].append(y.copy())
	cca = CCA(n_components=1)
	X_c, Y_c = cca.fit_transform(X[id],Y[id])
	result = np.corrcoef(X_c.T, Y_c.T)[0,1]
	return result
Beispiel #20
0
def canonical_correlation_analysis(list_a, list_b, list_y):
    X = []
    Y = []
    if len(list_a) != len(list_b) or len(list_b) != len(list_y):
        return None

    for i in range(len(list_a)):
        X.append([list_a[i], list_b[i]])
        Y.append(list_y[i])

    cca = CCA(n_components=1)
    X_c, Y_c = cca.fit_transform(X, Y)
    result = np.corrcoef(X_c.T, Y_c.T)[0, 1]

    print(np.corrcoef(X_c.T, Y_c.T))
    return result
Beispiel #21
0
def fbcca_realtime(eeg, list_freqs, fs, num_harms=3, num_fbs=5):
    print("EEG shape: ", eeg.shape)

    fb_coefs = np.power(np.arange(1, num_fbs + 1), (-1.25)) + 0.25

    num_targs = len(list_freqs)
    events, _, num_smpls = eeg.shape  # 40 taget (means 40 fre-phase combination that we want to predict)
    y_ref = cca_reference(list_freqs, fs, num_smpls, num_harms)
    cca = CCA(n_components=1)  # initilize CCA

    # result matrix
    r = np.zeros((num_fbs, num_targs))
    results = np.zeros(num_targs)
    r_tmp_mode = []
    r_tmp_corr_avg = []

    for event in range(eeg.shape[0]):
        test_tmp = np.squeeze(eeg[event, :, :])  # deal with one event a time
        for fb_i in range(num_fbs):  # filter bank number, deal with different filter bank
            for class_i in range(num_targs):
                testdata = filterbank(test_tmp, fs, fb_i)  # data after filtering
                refdata = np.squeeze(y_ref[class_i, :, :])  # pick corresponding freq target reference signal
                test_C, ref_C = cca.fit_transform(testdata.T, refdata.T)
                # len(row) = len(observation), len(column) = variables of each observation
                # number of rows should be the same, so need transpose here
                # output is the highest correlation linear combination of two sets
                r_tmp, _ = pearsonr(np.squeeze(test_C),
                                    np.squeeze(ref_C))  # return r and p_value, use np.squeeze to adapt the API
                if r_tmp == np.nan:
                    r_tmp = 0
                r[fb_i, class_i] = r_tmp
        rho = np.dot(fb_coefs, r)  # weighted sum of r from all different filter banks' result
        print("rho: ", rho)
        result = np.argmax(rho) # get maximum from the target as the final predict (get the index), and index indicates the maximum entry(most possible target)
        print("result: ", result)
        r_tmp_mode.append(result)
        print("correlation: ", abs(rho[result]))
        r_tmp_corr_avg.append(abs(rho[result]))
    r_mode = mode(r_tmp_mode)[0][0]
    r_corr_avg = np.mean(r_tmp_corr_avg)
    print("====Most recurrent class: ====", r_mode)
    print("====Average correlation: =====", r_corr_avg)

    THRESHOLD = 0.3
    if r_corr_avg >= THRESHOLD:  # 2.749=np.sum(fb_coefs*0.85)
        return r_mode  # if the correlation isn't big enough, do not return any command
Beispiel #22
0
def load_word_bank_dataset():
    """
    This function loads the World Bank Data and return it as NxD numpy arrays
    """
    fert_dataset_path = './demo/WorldBankData/fertility_rate.csv'
    life_exp_dataset_path = './demo/WorldBankData/life_expectancy.csv'
    years_str_list = [str(year) for year in range(1960, 2017)]
    if os.path.exists(fert_dataset_path) & os.path.exists(
            life_exp_dataset_path):
        # If files exists, load from files
        # Load and drop rows with missing values
        fert_rate = pd.read_csv(fert_dataset_path).dropna()
        life_exp = pd.read_csv(life_exp_dataset_path).dropna()
        country_field_name = 'Country Code'
    else:
        # If files don't exist, download data with wbdata instead
        # Get life expectancy and fertility rate data
        life_exp = wbdata.get_dataframe(indicators={
            "SP.DYN.LE00.IN": 'value'
        }).unstack(level=0).transpose().reset_index()
        fert_rate = wbdata.get_dataframe(indicators={
            "SP.DYN.TFRT.IN": 'value'
        }).unstack(level=0).transpose().reset_index()

        # Keep only country name and years columns, filter row with N/A's
        life_exp = life_exp[['country'] + years_str_list].dropna()
        fert_rate = fert_rate[['country'] + years_str_list].dropna()
        country_field_name = 'country'

    # Keep only countries which appear on both dataframes
    valid_countries = list(
        set(life_exp[country_field_name]) & set(fert_rate[country_field_name]))
    life_exp = life_exp[life_exp[country_field_name].isin(valid_countries)]
    fert_rate = fert_rate[fert_rate[country_field_name].isin(valid_countries)]

    # Convert to numpy
    life_exp = life_exp[years_str_list].to_numpy()
    fert_rate = fert_rate[years_str_list].to_numpy()

    # Apply CCA
    cca_transformer = CCA(n_components=2)
    life_exp_cca, fert_rate_cca = cca_transformer.fit_transform(
        fert_rate, life_exp)
    return life_exp_cca, fert_rate_cca
    def CanonCoff(self, X):

        Y = [i for i in range(len(self.cca_frequency))]
        for i in range(len(self.cca_frequency)):
            ref = 2 * np.pi * self.t * self.cca_frequency[i]
            Y[i] = [np.sin(ref), np.cos(ref), np.sin(2 * ref), np.cos(2 * ref)]

        print(len(X))
        cca = CCA(n_components=4)
        result = np.zeros((len(self.cca_frequency), 4))

        for i in range(len(self.cca_frequency)):
            Z = np.array([Y[i]])
            X_c, Y_c = cca.fit_transform(X, Z[0].T)
            cca_value = np.corrcoef(X_c.T, Y_c.T)
            for k in range(4):
                result[i][k] = cca_value[0 + k, 4 + k]
            result[i] = np.max(result[i])
        return result[:, 0]
Beispiel #24
0
    def cca(self, X1, X2, n_components=2):
        cca = CCA(n_components=n_components)
        X1, X2 = cca.fit_transform(X1, X2)
        '''
        from scipy.stats import pearsonr
        print("Correlation Coefficient")
        for i in range(n_components):
            print("{0}:{1:.3f}".format(i, pearsonr(cca.x_scores_[:,i], cca.y_scores_[:,i])[0]))
        print("")
        print("")
        np.set_printoptions(formatter={'float': '{: 0.3f}'.format})
        print("X1 loadings")
        print(cca.x_loadings_.T)
        print("")
        print("X2 loadings")
        print(cca.y_loadings_.T)
        '''

        return pd.DataFrame(X1)
Beispiel #25
0
class CCAAnalysis:
    """Canonical Correlation Analysis for SSVEP paradigm"""
    def __init__(self, freqs, win_len, s_rate, n_harmonics=1):
        """
        Args:
            freqs (list): List of target frequencies
            win_len (float): Window length
            s_rate (int): Sampling rate of EEG signal
            n_harmonics (int): Number of harmonics to be considered
        """
        self.freqs = freqs
        self.win_len = win_len
        self.s_rate = s_rate
        self.n_harmonics = n_harmonics
        self.train_data = self._init_train_data()
        self.cca = CCA(n_components=1)

    def _init_train_data(self):
        t_vec = np.linspace(0, self.win_len, int(self.s_rate * self.win_len))
        targets = {}
        for freq in self.freqs:
            sig_sin, sig_cos = [], []
            for harmonics in range(self.n_harmonics):
                sig_sin.append(np.sin(2 * np.pi * harmonics * freq * t_vec))
                sig_cos.append(np.cos(2 * np.pi * harmonics * freq * t_vec))
            targets[freq] = np.array(sig_sin + sig_cos).T
        return targets

    def apply_cca(self, eeg):
        """Apply CCA analysis to EEG data and return scores for each target frequency

        Args:
            eeg (np.array): EEG array [n_samples, n_chan]

        Returns:
            list of scores for target frequencies
        """
        scores = []
        for key in self.train_data:
            sig_c, t_c = self.cca.fit_transform(eeg, self.train_data[key])
            scores.append(np.corrcoef(sig_c.T, t_c.T)[0, 1])
        return scores
Beispiel #26
0
    def transform_cca(self,
                      abs_cols=None,
                      cols=None,
                      clusters=None,
                      kwargs=None):
        # process arg: cols, clusters
        df = self.select_data(cols=cols, clusters=clusters)
        # process arg: abs_cols
        if abs_cols is not None:
            df = df[abs_cols]
        # process arg: kwargs
        kwargs = self.process_kwargs('cca', kwargs)
        o_cca = CCA(**kwargs)

        arr = o_cca.fit_transform(df)
        nrows, ncols = arr.shape
        cca_cols = ["cca_{}".format(i) for i in range(ncols)]
        self.pca_names = cca_cols
        cca_df = pd.DataFrame(data=arr, columns=cca_cols)
        self.df = pd.concat([self.df, cca_df], axis=1)
Beispiel #27
0
def main(args):
    (training_file, label_file, test_file, test_label, u_file) = args
    X_training = load_feat(training_file)
    n = len(X_training)
    U = load_feat(u_file)
    y_training = [int(line.strip()) for line in open(label_file)]
   
    U = np.asarray(U)
    X_training = np.asarray(X_training)
    #X = preprocessing.normalize(X, norm='l2')
    y_training = np.asarray(y_training)
    
    X_test = load_feat(test_file)
    y_test = [int(line.strip()) for line in open(test_label)]
    X_test = np.asarray(X_test)
    #test_X = preprocessing.normalize(test_X, norm='l2')
    y_test = np.asarray(y_test)

    
    cca = CCA(n_components=100)
    (X_cca, U_cca) = cca.fit_transform(X_training, U[:n])
    X_test_cca = cca.predict(X_test)
    
    svr = SVC()
    svr.fit(X_cca, y_training)    
    pred = svr.predict(X_test_cca)
    
    print pred
    print test_y
    print accuracy_score(y_test, pred)
    with open(test_file + '.cca.2.pred', 'w') as output:
        for p in pred:
            print >>output, p
    #svm_model.fit(X, y)
    #pickle.dump(lr, open(model_file, "wb"))
    return


    return
Beispiel #28
0
    def fbcca_feature(self,eeg, parameter_list, num_harms=3, num_fbs=10):
        fs = parameter_list[2] / parameter_list[3]
        fb_coefs = np.power(np.arange(1, num_fbs + 1), (-1.25)) + 0.25
        num_targs = len(parameter_list[1])
        y_ref = self.cca_reference(parameter_list[1], fs, parameter_list[2], num_harms)
        cca = CCA(n_components=1)  # initilize CCA
        # result matrix
        r = np.zeros((num_fbs, num_targs))
        for fb_i in range(num_fbs):  # filter bank number, deal with different filter bank
            testdata = self.filter_bank(eeg, fs, fb_i)  # data after filtering
            for class_i in range(num_targs):
                refdata = np.squeeze(y_ref[class_i, :, :])  # pick corresponding freq target reference signal
                test_C, ref_C = cca.fit_transform(testdata.T, refdata.T)
                # len(row) = len(observation), len(column) = variables of each observation
                # number of rows should be the same, so need transpose here
                # output is the highest correlation linear combination of two sets
                r_tmp, _ = pearsonr(np.squeeze(test_C),
                                    np.squeeze(ref_C))  # return r and p_value, use np.squeeze to adapt the API
                r[fb_i, class_i] = r_tmp

        results = np.dot(fb_coefs, r)  # weighted sum of r from all different filter banks' result
        print("fb_cca:",results)
        return results
Beispiel #29
0
def main(args):
    (training_file, label_file, test_file, test_label, u_file) = args
    X_training = load_feat(training_file)
    n = len(X_training)
    U = load_feat(u_file)
    y_training = [int(line.strip()) for line in open(label_file)]

    U = np.asarray(U)
    X_training = np.asarray(X_training)
    #X = preprocessing.normalize(X, norm='l2')
    y_training = np.asarray(y_training)

    X_test = load_feat(test_file)
    y_test = [int(line.strip()) for line in open(test_label)]
    X_test = np.asarray(X_test)
    #test_X = preprocessing.normalize(test_X, norm='l2')
    y_test = np.asarray(y_test)

    cca = CCA(n_components=100)
    (X_cca, U_cca) = cca.fit_transform(X_training, U[:n])
    X_test_cca = cca.predict(X_test)

    svr = SVC()
    svr.fit(X_cca, y_training)
    pred = svr.predict(X_test_cca)

    print pred
    print test_y
    print accuracy_score(y_test, pred)
    with open(test_file + '.cca.2.pred', 'w') as output:
        for p in pred:
            print >> output, p
    #svm_model.fit(X, y)
    #pickle.dump(lr, open(model_file, "wb"))
    return

    return
def qvec_cca(**kwargs):
    embeddings = load_embeddings(**kwargs).T
    lg = kwargs["lg"]
    features = load_features(lg).T
    common_phonemes = embeddings.columns.intersection(features.columns)
    S = features[common_phonemes]
    X = embeddings[common_phonemes]
    cca = CCA(n_components=1)
    a, b = cca.fit_transform(X.T, S.T)
    a, b = a.reshape(-1), b.reshape(-1)
    r, p = pearsonr(a, b)
    # Write results to disk
    level, lg, name = kwargs["level"], kwargs["lg"], kwargs["name"]
    if "hidden" in kwargs:
        hyperparams = f"{kwargs['size']}-{kwargs['hidden']}"
    else:
        hyperparams = f"{kwargs['size']}-{kwargs['window']}"
    path = f"results/{level}/qvec/{lg}/{name}/{hyperparams}"
    ensure_dir(path)
    epoch = kwargs["epoch"]
    filename = os.path.join(path, f"{epoch}.txt")
    with open(filename, "w") as file:
        file.write(str((r, p)))
    return r, p
def ccr_median(U, V):
    cca = CCA(n_components=5)
    U_c, V_c = cca.fit_transform(U, V)
    coef = np.abs(np.corrcoef(U_c.T, V_c.T).diagonal(offset=5))
    return (np.median(coef))
def getVLADDescriptors(path, pathVD, pathCNNGT, pathColf):
    with open(pathVD, 'rb') as f:
        visualDictionary = pickle.load(f)
    # load cnn features
    with open(pathCNNGT, 'rb') as f:
        pkl = pickle.load(f)
        # fcs = pkl[2]
        scenefs = pkl[1]
        img_names = pkl[0]
    

    # update 3/22 VLAD on column feature 
    with open(pathColf, 'rb') as f:
        vd_colf = pickle.load(f)


    descriptors = list()
    idImage = list()
    for imagePath in glob.glob(path + "/*.jpg"):
        print(imagePath)
        img = cv2.imread(imagePath)
        print(img_names.index(imagePath.split("/")[-1]))
        scenef = scenefs[img_names.index(imagePath.split("/")[-1])]
        print("scenef.shape = ")
        print(scenef.shape)
        # fc = fcs[img_names.index(imagePath.split("/")[-1])][0]
        # print("fc.shape = ")
        # print(fc.shape)

        # if scenef.shape[1]>2:
        #     scenef = scenef[:,:2,:,:]
        # if scenef.shape[1]<2:
        #     npad = ((0, 0), (0, 2-scenef.shape[1]), (0, 0), (0, 0))
        #     scenef = np.pad(scenef, pad_width=npad, mode='constant', constant_values=0)
        # if scenef.shape[2]>7:
        #     scenef = scenef[:,:,:7,:]
        # if scenef.shape[2]<7:
        #     npad = ((0, 0), (0, 0), (0, 7-scenef.shape[2]), (0, 0))
        #     scenef = np.pad(scenef, pad_width=npad, mode='constant', constant_values=0)


        colf = []
        scenef = scenef[0]
        rows = scenef.shape[0]
        columns = scenef.shape[1]
        for i in range(rows):
            for j in range(columns):
                colf.append(scenef[i,j])
        colf = np.asarray(colf)
        print(colf.shape)

            

        sift = cv2.xfeatures2d.SIFT_create()
        kp, des = sift.detectAndCompute(img, None)
        if np.any(des) != None:  # and np.any(colf) != None
            v = VLAD(des, visualDictionary)
            vlad_colf = VLAD(colf, vd_colf)

            # mergedf = scenef.flatten()
            # mergedf = np.concatenate([v, scenef.flatten()])
            # mergedf = np.concatenate([v, vlad_colf])
            # mergedf = np.concatenate([v, fc])
            # print("mergedf.shape = ")
            # print(mergedf.shape)

            print("==========Performing CCA==========")
            cca = CCA(n_components=1)
            v_c, vlad_colf_c = cca.fit_transform(v, vlad_colf)
            # print(v_c)
            print(v_c.shape)
            # print(vlad_colf_c)
            print(vlad_colf_c.shape)
            mergedf = np.concatenate([v_c, vlad_colf_c])
            mergedf = mergedf.reshape(1, -1)[0]
            print("mergedf.shape = ")
            print(mergedf.shape)
            print("==================================")


            # descriptors.append(fc)
            # if '127696' in imagePath:
                # print(fc)
            descriptors.append(mergedf)
            idImage.append(imagePath)

    descriptors = np.asarray(descriptors)
    print(descriptors.shape)
    return descriptors, idImage
Beispiel #33
0
    if not os.path.isdir(dir_name):
        os.makedirs(dir_name)

    OutputLog().set_path(dir_name)
    OutputLog().set_verbosity(configuration.output_parameters['verbosity'])

    data_config = ConfigParser.ConfigParser()
    data_config.read(data_set_config)
    data_parameters = ConfigSectionMap("dataset_parameters", data_config)

    # construct data set
    data_set = Container().create(data_parameters['name'], data_parameters)

    cca_model = CCA(n_components=top, scale=True, copy=False)

    train_transformed_x, train_transformed_y = cca_model.fit_transform(data_set.trainset[0], data_set.trainset[1])
    test_transformed_x, test_transformed_y = cca_model.transform(data_set.testset[0], data_set.testset[1])

    OutputLog().write('test results:')
    correlations, trace_correlation, var, x_test, y_test, test_best_layer = TraceCorrelationTester(
        data_set.testset[0],
        data_set.testset[1], top).test(IdentityTransformer(), configuration.hyper_parameters)

    OutputLog().write('train results:')
    correlations, train_trace_correlation, var, x_train, y_train, train_best_layer = TraceCorrelationTester(
        data_set.trainset[0],
        data_set.trainset[1], top).test(IdentityTransformer(), configuration.hyper_parameters)

    OutputLog().write('\nTest results : \n')

    configuration.hyper_parameters.print_parameters(OutputLog())
Beispiel #34
0
    X, good_idx = remove_outliers(X, 6.0)
    y = y.ix[y.index[good_idx]]

    # sanity check
    # idx = np.random.permutation(len(y))[0]
    # idx = np.where(y.index == 119384)[0][0]
    # image_sanity_check(y.index[idx], X[idx])

    # only keep unique values
    unique_cols = ['Class1.1', 'Class1.2', 'Class2.1', 'Class3.1', 'Class4.1', 'Class5.1', 'Class5.2', 'Class5.3',
                   'Class6.1', 'Class7.1', 'Class7.2', 'Class8.1', 'Class8.2', 'Class8.3', 'Class8.4', 'Class8.5',
                   'Class8.6', 'Class9.1', 'Class9.2', 'Class10.1', 'Class10.2', 'Class11.1', 'Class11.2',
                   'Class11.3', 'Class11.4', 'Class11.5']

    # do CCA
    if verbose:
        print 'Doing CCA...'
    cca = CCA(n_components=len(unique_cols), copy=False)
    X_cca, y_cca = cca.fit_transform(X, y[unique_cols].values.astype(np.float32))

    cPickle.dump(cca, open(base_dir + 'data/CCA_DCT.pickle', 'wb'))

    # make plots
    make_cca_images(cca, (100, 100), dct_idx=dct_idx)
    fig = plot_cca_projections(X_cca)
    fig.savefig(plot_dir + 'CCA_dist_no_outliers.png')
    if doshow:
        plt.show()

    print 'Saving the transformed values...'
    np.save(base_dir + 'data/CCA_training_transform', X_cca)