Exemple #1
0
def clust_forc(matfile, k, ccenter, t):
    mat = loadmat(matfile)
    colname = np.array([])
    for i in range(1, len(mat['clust_data'][0])):
        colname = np.append(colname, mat['clust_data'][0][i][0])
    indx = np.array([])
    for i in range(1, len(mat['clust_data'])):
        indx = np.append(indx, mat['clust_data'][i][0][0])
    cols = np.zeros(shape=(len(mat['clust_data']) - 1,
                           len(mat['clust_data'][0]) - 1))
    for i in range(1, len(mat['clust_data'])):
        for j in range(1, len(mat['clust_data'][i])):
            cols[i - 1, j - 1] = mat['clust_data'][i][j][0][0]
    df = pd.DataFrame(data=cols, index=indx, columns=colname)

    if ccenter.shape[1] > df.shape[1]:
        pca = PCA(n_components=df.shape[1])
        ccenter = pca.fit_transform(ccenter)

    df_norm = predim.norm_features(df)
    df_norm = df_norm.fillna(0.00)
    x, y, z, ccenter = clusmat.mat_kmeans(df_norm, k, 'mat_kmeans_t' + str(t),
                                          ccenter)
    clusters = pd.DataFrame(z).combine_first(df_norm)

    return x, y, z, ccenter, clusters
Exemple #2
0
def struct_features():
    data = predim.read_data(INPUTFILE)
    df_AS, AS, TtE = predim.calc_active_storage(data)
    df_Feature = predim.extr_features(df_AS, AS, TtE)
    df_Feat_Norm = predim.norm_features(df_Feature)
    df_Corr_pearson = predim.calc_correlation(df_Feat_Norm)
    s_feat = predim.filt_features(df_Feat_Norm, df_Corr_pearson)

    return s_feat
Exemple #3
0
def all_features(matfile, s_feat):
    mat = loadmat(matfile)
    colname = np.array([])
    for i in range(1, len(mat['clust_data'][0])):
        colname = np.append(colname, mat['clust_data'][0][i][0])
    indx = np.array([])
    for i in range(1, len(mat['clust_data'])):
        indx = np.append(indx, mat['clust_data'][i][0][0])
    cols = np.zeros(shape=(len(mat['clust_data']) - 1,
                           len(mat['clust_data'][0]) - 1))
    for i in range(1, len(mat['clust_data'])):
        for j in range(1, len(mat['clust_data'][i])):
            cols[i - 1, j - 1] = mat['clust_data'][i][j][0][0]

    df = pd.DataFrame(data=cols, index=indx, columns=colname)
    df_norm = predim.norm_features(df)
    df_norm = df_norm.fillna(0.00)
    a_feat = pd.concat([s_feat, df_norm], axis=1, sort=False)

    return a_feat
Exemple #4
0
def struct_features():
    data = reda.read_nc(INPUTFILE)
    df_Feature = pd.DataFrame()
    for k in data.variables.keys():
        if k not in ['clon', 'clon_bnds', 'clat', 'clat_bnds', 'height', 'height_bnds', 'time', 'cosmu0']:
            var = reda.read_varnc(data, k, 0)
            sf = predim.extr_feat(var)
            sf = sf.rename(columns=lambda s: s + '-' + k)
            df_Feature = pd.concat([df_Feature, sf], axis=1)

    df_Feat_Norm = predim.norm_features(df_Feature)
    # zero values are NAN after normalization with (x-mean(x))/std(x)
    # sf.columns[sf.isnull().all()].tolist()  92 features - 28 nan - 3 inf
    df_Feat_Norm = df_Feat_Norm.replace([np.inf, -np.inf], np.nan)
    df_Feat_Norm = df_Feat_Norm.dropna(axis=1, how='all')
    df_Corr_pearson = predim.calc_correlation(df_Feat_Norm)
    s_feat = predim.filt_features(df_Feat_Norm, df_Corr_pearson)

    pca = PCA(n_components=4)
    s_feat_p = pca.fit_transform(s_feat)
    s_feat_pca = pd.DataFrame(s_feat_p, index=s_feat.index.values, columns=['F1', 'F2', 'F3', 'F4'])

    return s_feat_pca
Exemple #5
0
 data = predim.read_data(INPUTFILE)    
 vis_indata = input('Visualize input data (y/n)? ')
 if vis_indata == 'y':
     visvis.plot_timeseries(data, 'total_storage')
 else:
     print('Processing ...')
     
 df_AS, AS, TtE = predim.calc_active_storage(data)
 vis_ac = input('Visualize active storage data (y/n)? ')
 if vis_ac == 'y':
     visvis.plot_timeseries(df_AS, 'active_storage')
 else:
     print('Processing ...')
     
 df_Feature = predim.extr_features(df_AS, AS, TtE)
 df_Feat_Norm = predim.norm_features(df_Feature)
 df_Corr_pearson = predim.calc_correlation(df_Feat_Norm)
 vis_corr = input('Visualize correlation coefficient of the features (y/n)? ')
 if  vis_corr == 'y':
     visvis.plot_correlations(df_Corr_pearson, 'features_correlation')
 else:
     print('Processing ...')
     
 df_Feat_Norm = predim.filt_features(df_Feat_Norm, df_Corr_pearson)
 
 vis_elbow = input('Process elbow method and visualize (y/n)? ')
 if vis_elbow == 'y':
     distortions = clusclus.apply_elbow(df_Feat_Norm)
     visvis.plot_elbow(distortions, 'elbow_kmeans')
 else:
     print('Processing ...')