def canonical_correlation_analysis(occurences_a, occurences_b):
    occurences_a = pd.Series(occurences_a, dtype="category")
    occurences_a = pd.get_dummies(occurences_a)
    occurences_b = pd.DataFrame.from_items(occurences_b)
    occurences_b = pd.get_dummies(occurences_b)
    cca = CCA(n_components=1)
    cca.fit(occurences_a, occurences_b)
    return cca.score(occurences_a, occurences_b)
Beispiel #2
0
def cca_fit(X, Y):
    cca = CCA(n_components=1)
    cca.fit(X, Y)

    X = list(itertools.islice(X, 10))
    Y = list(itertools.islice(Y, 10))

    return cca.score(X, Y)
def get_cca(chip_cors, rna_vec):
    Y_vec = np.array([[each_val / max(chip_cors) for each_val in chip_cors]])
    X_vec = np.array([[each_val / max(rna_vec) for each_val in rna_vec]])
    Y_vec = Y_vec.transpose()
    X_vec = X_vec.transpose()
    cca_obj = CCA(n_components=1)
    cca_obj.fit(X_vec, Y_vec)
    r_squared_canonical = cca_obj.score(X_vec, Y_vec)
    return r_squared_canonical
Beispiel #4
0
def cca_for_ssvep(input_data, sampling_rate, compared_frequencies):

    # TODO: Strick input checks, exceptions and avoid crashing and processing errors

    # Pre-allocate SSVEP signals matrix to be compared with original EEG recordings using CCA
    number_time_points = input_data.shape[1]
    number_harmonics = 2
    cca_base_signal_matrix = [[] for loop_var in compared_frequencies]

    # Pre-allocate output: one correlation coefficient (Rho) for each target SSVEP frequency
    # Note: Row 1 is for default Rho scores, Row 2 is for the Rho scores After cca transformation
    cca_rho_values = numpy.zeros([1, len(compared_frequencies)], dtype='float')

    # For each target frequency, fill Y matrix with sine and cosine signals for every harmonic
    for loop_frequencies in range(len(compared_frequencies)):

        # For this current SSVEP frequency, pre-allocate the harmonics matrix
        cca_base_signal_matrix[loop_frequencies] = numpy.zeros(
            [number_harmonics * 2, number_time_points])
        time_points_count = numpy.arange(number_time_points, dtype='float')
        time_points_count = time_points_count / sampling_rate

        # Generate sine and cosine reference signals, for every harmonic
        for loop_harmonics in range(number_harmonics):

            # Compute the reference signals for current harmonic
            base_constant = 2 * numpy.pi * (
                loop_harmonics + 1) * compared_frequencies[loop_frequencies]
            base_sine_signal = numpy.sin((base_constant * time_points_count))
            base_cosine_signal = numpy.cos((base_constant * time_points_count))

            # Copy signals back to reference matrix
            base_position = loop_harmonics + 1
            sine_position = (2 * (base_position - 1) + 1)
            cosine_position = 2 * base_position
            cca_base_signal_matrix[loop_frequencies][sine_position -
                                                     1, :] = base_sine_signal
            cca_base_signal_matrix[loop_frequencies][cosine_position -
                                                     1, :] = base_cosine_signal

        # After the loop, extract the y_matrix from reference matrix for current SSVEP frequency
        y_matrix = cca_base_signal_matrix[loop_frequencies]

        # Create a CCA object and compute the correlation score
        cca_object = CCA(n_components=number_harmonics)
        cca_object.fit(numpy.transpose(input_data), numpy.transpose(y_matrix))
        values_x, values_y = cca_object.transform(input_data, y_matrix)
        cca_rho_values[0, loop_frequencies] = cca_object.score(
            input_data, y_matrix, values_y)  # Score = Rho value?

    # After loop return and exit
    return cca_rho_values
Beispiel #5
0
def doCCA(metrics, color):

    inp = np.array([metrics[m] for m in metricsInput2]).T.astype(float)
    out = np.array([metrics[m] for m in metricsOutput2]).T.astype(float)
    inp0 = np.zeros(len(metricsInput2))
    out0 = np.zeros(len(metricsOutput2))
    inp = np.vstack((inp, inp0))
    out = np.vstack((out, out0))

    all = np.concatenate((inp, out), axis=1)
    # fixed cache
    fixed = all[all[:, 0] == 90]
    inp_fixed = fixed[:, 1:2]
    out_fixed = fixed[:, 2:6]
    #singleScatter2(1, 2, fixed)
    #singleScatter2(1, 3, fixed)
    # singleScatter2(1, 4, fixed)
    # singleScatter2(1, 5, fixed)

    inp = inp_fixed  #inpnSat #inp_fixed
    out = out_fixed  #outnSat #out_fixed

    poly = PolynomialFeatures(1, include_bias=False, interaction_only=False)
    inp = poly.fit_transform(inp)
    # inp = inp_poly[:, 2:]

    cca = CCA(n_components=1, scale=False)
    cca.fit(inp, out)
    print(cca.score(inp, out))
    inp_cca = inp.dot(cca.x_rotations_)
    out_cca = out.dot(cca.y_rotations_)

    # Create linear regression object
    regr = linear_model.LinearRegression()
    # Train the model using the training sets
    regr.fit(inp_cca, out_cca)
    cca_regr = regr.predict(inp_cca)
    # The coefficients
    print('Coefficients: \n', regr.coef_)

    plt.scatter(inp_cca, out_cca, c=color)
    plt.plot(inp_cca, cca_regr, color=color, linewidth=0.5)

    logging.info('cca')
    logging.info(cca.x_loadings_)
    logging.info(cca.y_loadings_)
    logging.info(cca.coef_)
    return cca.coef_
Beispiel #6
0
def cca_for_ssvep(input_data, sampling_rate, compared_frequencies):

    # TODO: Strick input checks, exceptions and avoid crashing and processing errors

    # Pre-allocate SSVEP signals matrix to be compared with original EEG recordings using CCA
    number_time_points = input_data.shape[1]
    number_harmonics = 2
    cca_base_signal_matrix = [[] for loop_var in compared_frequencies]

    # Pre-allocate output: one correlation coefficient (Rho) for each target SSVEP frequency
    # Note: Row 1 is for default Rho scores, Row 2 is for the Rho scores After cca transformation
    cca_rho_values = numpy.zeros([1, len(compared_frequencies)], dtype='float')

    # For each target frequency, fill Y matrix with sine and cosine signals for every harmonic
    for loop_frequencies in range(len(compared_frequencies)):

        # For this current SSVEP frequency, pre-allocate the harmonics matrix
        cca_base_signal_matrix[loop_frequencies] = numpy.zeros([number_harmonics * 2, number_time_points])
        time_points_count = numpy.arange(number_time_points, dtype='float')
        time_points_count = time_points_count / sampling_rate

        # Generate sine and cosine reference signals, for every harmonic
        for loop_harmonics in range(number_harmonics):

            # Compute the reference signals for current harmonic
            base_constant = 2 * numpy.pi * (loop_harmonics + 1) * compared_frequencies[loop_frequencies]
            base_sine_signal = numpy.sin((base_constant * time_points_count))
            base_cosine_signal = numpy.cos((base_constant * time_points_count))

            # Copy signals back to reference matrix
            base_position = loop_harmonics + 1
            sine_position = (2 * (base_position - 1) + 1)
            cosine_position = 2 * base_position
            cca_base_signal_matrix[loop_frequencies][sine_position - 1, :] = base_sine_signal
            cca_base_signal_matrix[loop_frequencies][cosine_position - 1, :] = base_cosine_signal

        # After the loop, extract the y_matrix from reference matrix for current SSVEP frequency
        y_matrix = cca_base_signal_matrix[loop_frequencies]

        # Create a CCA object and compute the correlation score
        cca_object = CCA(n_components=number_harmonics)
        cca_object.fit(numpy.transpose(input_data), numpy.transpose(y_matrix))
        values_x, values_y = cca_object.transform(input_data, y_matrix)
        cca_rho_values[0, loop_frequencies] = cca_object.score(input_data, y_matrix, values_y)   # Score = Rho value?

    # After loop return and exit
    return cca_rho_values
Beispiel #7
0
def SVCCA_distance(checkpoint_1, checkpoint_2, R=32):
    """Compute the singular-value canonical correlation analysis distance
    between two different networks."""

    A_1 = checkpoint_1['test_data']
    A_2 = checkpoint_2['test_data']

    #U_1, S_1, V_1 = np.linalg.svd(A_1)
    #U_2, S_2, V_2 = np.linalg.svd(A_2)

    cca = CCA(n_components=R, max_iter=1000)
    #cca.fit(V_1, V_2)
    #cca.fit(A_1.dot(V_1), A_2.dot(V_2))
    cca.fit(A_1, A_2)

    #return 1 - cca.score(A_1.dot(V_1), A_2.dot(V_2))
    #return 1 - cca.score(V_1, V_2)
    return 1 - cca.score(A_1, A_2)
def grid_cca(activations1, act_labels1, activations2, act_labels2, n_clusters):

    cca_grid = np.zeros((n_clusters, n_clusters))
    for clust_i in range(n_clusters):
        for clust_j in range(n_clusters):
            i_mask = act_labels1 == clust_i
            j_mask = act_labels2 == clust_j
            if sum(i_mask) == 0 or sum(j_mask) == 0:
                cca_grid[clust_i, clust_j] = 0
                cca_grid[clust_j, clust_i] = 0
            else:
                n_comps = min(sum(i_mask), sum(j_mask))
                cca = CCA(n_components=n_comps)
                cca.fit(activations1[i_mask].T, activations2[j_mask].T)
                cca_score = cca.score(activations1[i_mask].T,
                                      activations2[j_mask].T)
                cca_grid[clust_i, clust_j] = cca_score

    return cca_grid
Beispiel #9
0
def PLS_CCA(csv_data,
            point_index,
            sub_index,
            var_name,
            train=None,
            components=None):
    X_array = []
    temp_array = []
    for j in csv_data:
        temp_array = j[point_index - 1:point_index + 8]
        X_array.append(temp_array)
    X_array = np.array(X_array)
    if components == None:
        components = np.shape(X_array)[1]
    for i in range(7):
        Y_array = np.array(csv_data[:, sub_index - 1 + i])
        ccaModel = CCA(n_components=1)
        ccaModel.fit(X_array, Y_array)
        print(var_name[sub_index + i])
        print("R^2 =", np.around(ccaModel.score(X_array, Y_array), decimals=2))
Beispiel #10
0
     
     # Get CCA transformation
     U_c, V_c  = cca.x_scores_, cca.y_scores_ #= cca.transform(sat_data, y_data)
     
     # From: https://stackoverflow.com/questions/37398856/
     rho_cca = np.corrcoef(U_c.T, V_c.T).diagonal(offset=n_cca_comp)
     #score = np.diag(np.corrcoef(cca.x_scores_, cca.y_scores_, rowvar=False)[:n_cca_comp, n_cca_comp:])
     
     # Use function definition
     cod_cca2 = rsquare(U_c, y_data)
     print(cod_cca2)
     # Add to output dict
     cca_plc_r2[dataset_use] = cod_cca2[0] # TODO: set index programatically
     cca_pdc_r2[dataset_use] = cod_cca2[1] # TODO: set index programatically
     # Calculate Coefficient of Determination (COD) = R²
     cod_cca = cca.score(sat_data, y_data)
     print(cod_cca)
 
     # Plot number of CCA U and V
     if 'CCA'.lower() in plot_list:
         legend_list = []
         fig = plt.figure()
         for i_comp in range(n_cca_comp):
             plt.scatter(U_c[:,i_comp], V_c[:,i_comp], c=c_vec[i_comp])
             legend_list.append('Comp. nr. '+str(i_comp)+ r' $\rho$ = ' +'{:.3f}'.format(rho_cca[i_comp]))
         plt.title(dataset_use+' CCA: R^2 = ' +'{:.3f}'.format(cod_cca))
         plt.legend(legend_list)
         plt.show()  # display it
     
     # Plot number of CCA U and PLC
     if 'PxCvsU'.lower() in plot_list:
Beispiel #11
0
def compute_correlation(directory, blob, num_samples=None, num_components=1, out_file=None,
        verbose=False):
    ed = expdir.ExperimentDirectory(directory)

    info = ed.load_info()
    ds = loadseg.SegmentationData(info.dataset)
    
    L = ds.label_size()
    N = ds.size()

    blob_info = ed.load_info(blob=blob)
    shape = blob_info.shape
    K = shape[1]

    categories = np.array(ds.category_names())
    label_names = np.array([ds.label[i]['name'] for i in range(L)])

    (Hs, Ws) = get_seg_size(info.input_dim)

    if verbose:
        start = time.time()
        print 'Loading data...'
    upsampled_data = ed.open_mmap(blob=blob, part='upsampled', mode='r',
            shape=(N,K,Hs,Ws))
    concept_data = ed.open_mmap(part='concept_data', mode='r',
            shape=(N,L,Hs,Ws))
    if verbose:
        print 'Finished loading data in %d secs.' % (time.time() - start)

    if verbose:
        start = time.time()
        print 'Selecting data...'

    if num_samples is not None:
        rand_idx = np.random.choice(N, num_samples, replace=False)
        X = upsampled_data[rand_idx,:,Hs/2,Ws/2]
        Y = concept_data[rand_idx,:,Hs/2,Ws/2]
    else:
        X = upsampled_data[:,:,Hs/2,Ws/2]
        Y = concept_data[:,:,Hs/2,Ws/2]

    if verbose:
        print 'Finished selecting data in %d secs.' % (time.time() - start)

    cca = CCA(n_components=num_components)

    if verbose:
        start = time.time()
        if num_samples is None:
            num_samples = N
        print 'Fitting %d-component CCA with N = %d samples...' % (num_components, num_samples)
    cca.fit(X,Y)
    if verbose:
        print 'Fitted %d-component CCA with N = %d samples in %d secs.' % (num_components,
                num_samples, time.time() - start)

    X_c, Y_c = cca.transform(X,Y)
    score = cca.score(X,Y)

    results = {}
    if out_file is not None:
        if verbose:
            start = time.time()
            print 'Saving results...'
        results['model'] = cca
        try:
            results['idx'] = rand_idx
        except:
            results['idx'] = None 
        results['directory'] = directory
        results['blob'] = blob
        results['num_samples'] = num_samples
        results['num_components'] = num_components
        results['score'] = score

        pkl.dump(results, open(out_file, 'wb'))
        if verbose:
            print 'Saved results at %s in %d secs.' % (out_file, time.time() - start)

    return results
    Sat = all[all[:, 2] > 40]
    inpSat = Sat[:, 0:2]
    outSat = Sat[:, 2:]
    inpnSat = nSat[:, 0:2]
    outnSat = nSat[:, 2:]

    scale = False
    ccanSat = CCA(n_components=1, scale=scale)
    ccanSat.fit(inpnSat, outnSat)
    inp_ccanSat = inpnSat.dot(ccanSat.x_weights_)
    out_ccanSat = outnSat.dot(ccanSat.y_weights_)
    plt.scatter(inp_ccanSat, out_ccanSat, c='orange', s=50)
    logging.info('ccanSat')
    logging.info(ccanSat.x_loadings_)
    logging.info(ccanSat.y_loadings_)
    print(ccanSat.score(inpnSat, outnSat))
    out_pred0 = inpnSat.dot(ccanSat.coef_[:, 0])
    plt.scatter(outnSat[:, 0], out_pred0, c='r', marker='+')

    ccaSat = CCA(n_components=1, scale=scale)
    ccaSat.fit(inpSat, outSat)
    inp_ccaSat = inpSat.dot(ccaSat.x_weights_)
    out_ccaSat = outSat.dot(ccaSat.y_weights_)
    plt.scatter(inp_ccaSat, out_ccaSat, c='purple')
    logging.info('ccaSat')
    #logging.info(ccaSat.x_rotations_)
    #logging.info(ccaSat.y_rotations_)

    # compare with second measurement
    inp2 = np.array([metrics2[m] for m in metricsInput2]).T.astype(float)
    out2 = np.array([metrics2[m] for m in metricsOutput2]).T.astype(float)
def calculate_sklearn_var(cca: CCA, X: np.ndarray, Y: np.ndarray, X_encoder):
    shared_var = cca.score(X, Y)  # Return R^2
    return shared_var
Beispiel #14
0
def CCA_across_patients(data_files,
                        alg='cca',
                        freq_clustering='cannonical',
                        bin_size=10,
                        window_size=500,
                        post_shift=0,
                        pre_shift=0,
                        band='alpha',
                        pair=(1, 1)):

    # Assemble the set of feature vectors

    # Send the arguments in units of ms
    samp_factor = 10
    window_size = int(window_size / samp_factor)
    pre_shift = pre_shift / samp_factor
    post_shift = post_shift / samp_factor

    pre_stim_feature_vector = np.array([])
    post_stim_feature_vector = np.array([])

    for data_file in data_files:

        with h5py.File(data_file, 'r') as f:

            # ERSP time series references
            ERSP_refs = f['cfg_PAINT_cond']['ChanERSP']

            for i in range(ERSP_refs.size):
                # Use 32 bit floating precision
                ERSP = np.zeros((250, 51, 95), dtype=np.float64)
                f[ERSP_refs[i][0]].read_direct(ERSP)

                # Need to exclude the maximum nan padding
                leading_nan_count = np.zeros((51, 95))
                trailing_nan_count = np.zeros((51, 95))
                for j in range(51):
                    for k in range(95):
                        x1, x2 = count_leading_trailing_true(
                            np.isnan(ERSP[:, j, k]))
                        leading_nan_count[j, k] = x1
                        trailing_nan_count[j, k] = x2

                # Select pre and post stimulation
                leading_max = int(np.amax(leading_nan_count))
                trailing_max = int(np.amax(trailing_nan_count))

                pre_window_end = int(1000 / samp_factor - pre_shift)
                post_window_start = int(1000 / samp_factor + post_shift)

                # Ensure that we don't encroach on the nan-padding
                window_size1 = min(window_size, pre_window_end - leading_max)

                window_size2 = min(
                    window_size,
                    int(2500 / samp_factor - trailing_max - post_window_start))

                window_size = int(min(window_size1, window_size2))

                pre_stim = ERSP[pre_window_end -
                                window_size:pre_window_end, :, :]
                post_stim = ERSP[post_window_start:post_window_start +
                                 window_size, :, :]

                # Re-arrange axes so that frequency bins are last
                pre_stim = np.swapaxes(pre_stim, 1, 2)
                post_stim = np.swapaxes(post_stim, 1, 2)

                if freq_clustering == 'cannonical':

                    # Average across cannonical frequency bands
                    pre_stim_theta = np.mean(pre_stim[:, :, 0:4], axis=-1)
                    pre_stim_alpha = np.mean(pre_stim[:, :, 4:8], axis=-1)
                    pre_stim_beta = np.mean(pre_stim[:, :, 8:26], axis=-1)
                    pre_stim_gamma = np.mean(pre_stim[:, :, 26::], axis=-1)

                    pre_stim = np.concatenate([
                        pre_stim_theta, pre_stim_alpha, pre_stim_beta,
                        pre_stim_gamma
                    ],
                                              axis=-1)

                    post_stim_theta = np.mean(post_stim[:, :, 0:4], axis=-1)
                    post_stim_alpha = np.mean(post_stim[:, :, 4:8], axis=-1)
                    post_stim_beta = np.mean(post_stim[:, :, 8:26], axis=-1)
                    post_stim_gamma = np.mean(post_stim[:, :, 26::], axis=-1)

                    post_stim = np.concatenate([
                        post_stim_theta, post_stim_alpha, post_stim_beta,
                        post_stim_gamma
                    ],
                                               axis=-1)
                elif freq_clustering == 'equal':
                    # Chop off the lowest frequency bin so we have a non-prime number of bins...
                    pre_stim = pre_stim[..., 1::]
                    post_stim = post_stim[..., 1::]

                    # Average across equal number of frequency bands
                    pre_stim = np.mean(pre_stim.reshape(
                        (pre_stim.shape[0], pre_stim.shape[1], -1, bin_size)),
                                       axis=-1)
                    post_stim = np.mean(post_stim.reshape(
                        (post_stim.shape[0], post_stim.shape[1], -1,
                         bin_size)),
                                        axis=-1)

                    # Collapse
                    pre_stim = pre_stim.reshape(
                        (pre_stim.shape[0],
                         pre_stim.shape[1] * pre_stim.shape[2]))
                    post_stim = post_stim.reshape(
                        (post_stim.shape[0],
                         post_stim.shape[1] * post_stim.shape[2]))

                elif freq_clustering == 'random':

                    # Chop off the lowest frequency bin so we have a non-prime number of bins...
                    pre_stim = pre_stim[..., 1::]
                    post_stim = post_stim[..., 1::]

                    # Average across random collection of frequency bins
                    idxs = np.arange(pre_stim.shape[-1])
                    np.random.shuffle(idxs)
                    idxs = np.split(idxs, int(pre_stim.shape[-1] / bin_size))

                    pre_stim_rand1 = np.mean(pre_stim[:, :, idxs[0]], axis=-1)
                    pre_stim_rand2 = np.mean(pre_stim[:, :, idxs[1]], axis=-1)
                    pre_stim_rand3 = np.mean(pre_stim[:, :, idxs[2]], axis=-1)
                    pre_stim_rand4 = np.mean(pre_stim[:, :, idxs[3]], axis=-1)
                    pre_stim_rand5 = np.mean(pre_stim[:, :, idxs[4]], axis=-1)

                    pre_stim = np.concatenate([
                        pre_stim_rand1, pre_stim_rand2, pre_stim_rand3,
                        pre_stim_rand4, pre_stim_rand5
                    ],
                                              axis=-1)

                    post_stim_rand1 = np.mean(post_stim[:, :, idxs[0]],
                                              axis=-1)
                    post_stim_rand2 = np.mean(post_stim[:, :, idxs[1]],
                                              axis=-1)
                    post_stim_rand3 = np.mean(post_stim[:, :, idxs[2]],
                                              axis=-1)
                    post_stim_rand4 = np.mean(post_stim[:, :, idxs[3]],
                                              axis=-1)
                    post_stim_rand5 = np.mean(post_stim[:, :, idxs[4]],
                                              axis=-1)

                    post_stim = np.concatenate([
                        post_stim_rand1, post_stim_rand2, post_stim_rand3,
                        post_stim_rand4, post_stim_rand5
                    ],
                                               axis=-1)
                elif freq_clustering == 'single_band':

                    if band == 'theta':
                        pre_stim = pre_stim[:, :, 0:4]
                        post_stim = post_stim[:, :, 0:4]
                    elif band == 'alpha':
                        pre_stim = pre_stim[:, :, 4:8]
                        post_stim = post_stim[:, :, 4:8]
                    elif band == 'beta':
                        pre_stim = pre_stim[:, :, 8:26]
                        post_stim = post_stim[:, :, 8:26]
                    elif band == 'gamma':
                        pre_stim = pre_stim[:, :, 26::]
                        post_stim = post_stim[:, :, 26::]
                    elif band == 'topgamma':
                        pre_stim = pre_stim[:, :, 41:51]
                        post_stim = post_stim[:, :, 41:51]
                    elif band == 'all':
                        pass
                elif freq_clustering == 'pairwise':

                    pre_stim = pre_stim[:, :, pair[0]]
                    post_stim = post_stim[:, :, pair[1]]

                # Collpase and append
                if pre_stim_feature_vector.size == 0:
                    pre_stim_feature_vector = np.append(
                        pre_stim_feature_vector, pre_stim.reshape((1, -1)))
                    post_stim_feature_vector = np.append(
                        post_stim_feature_vector, post_stim.reshape((1, -1)))

                    pre_stim_feature_vector = pre_stim_feature_vector.reshape(
                        (1, -1))
                    post_stim_feature_vector = post_stim_feature_vector.reshape(
                        (1, -1))
                else:
                    pre_stim_feature_vector = np.concatenate(
                        [pre_stim_feature_vector,
                         pre_stim.reshape((1, -1))])
                    post_stim_feature_vector = np.concatenate(
                        [post_stim_feature_vector,
                         post_stim.reshape((1, -1))])

    # Convert to 32 bit floating precision
    pre_stim_feature_vector = pre_stim_feature_vector.astype(np.float32)
    post_stim_feature_vector = post_stim_feature_vector.astype(np.float32)

    # Attempt to do a cross-validated CCA across all the features
    # Perform a cross-validated cannonical correlation analysis on the basis of this data

    if alg == 'cca':
        corrmodel = CCA(n_components=1)
        crsval = cross_validate(corrmodel,
                                pre_stim_feature_vector,
                                post_stim_feature_vector,
                                cv=5,
                                return_train_score=True)
        return np.mean(crsval['test_score']), np.mean(crsval['train_score'])

    elif alg == 'pls':
        corrmodel = PLSRegression()
        # Manually cross-validate
        folds = KFold(n_splits=5)
        test_scores = []
        train_scores = []
        for train_index, test_index in folds.split(pre_stim_feature_vector,
                                                   post_stim_feature_vector):
            corrmodel.fit(pre_stim_feature_vector[train_index],
                          post_stim_feature_vector[train_index])
            test_scores.append(
                corrmodel.score(pre_stim_feature_vector[test_index],
                                post_stim_feature_vector[test_index]))
            train_scores.append(
                corrmodel.score(pre_stim_feature_vector[train_index],
                                post_stim_feature_vector[train_index]))
        return np.mean(test_scores), np.mean(train_scores)
Beispiel #15
0
X = []
X.append(hour)
X.append(o3)
# X.append(pm10)
X.append(so2)
X.append(no2)
X.append(co)
X.append(temperature)
X.append(wind)
# X.append(weather)
X.append(moisture)
X.append(pressure)
X.append(precipitation)
X = np.array(X)
X = np.transpose(X)
print(X.shape)

Y = np.array(pm25)
print(Y.shape)

regr = RandomForestRegressor().fit(X, Y)
print("RandomForestRegressor.feature_importances_:\n", regr.feature_importances_)

cca = CCA().fit(X, Y)
print("cca.x_weights_:\n", cca.x_weights_)
# print("cca.x_loadings_:\n", cca.x_loadings_)
# print("cca.x_scores_:\n", cca.x_scores_)
print("cca.score:\n", cca.score(X, Y))
# print("cca.predict:\n", cca.predict(X))