コード例 #1
0
def run_clustering(X, k, dists_all):

    cent, assign, shift, dists_cent = ksc.inc_ksc(X, k)

    intra = metrics.avg_intra_dist(X, assign, dists_all)[0]
    inter = metrics.avg_inter_dist(X, assign, dists_all)[0]
    bcv = metrics.beta_cv(X, assign, dists_all)
    cost = metrics.cost(X, assign, None, dists_cent)

    return intra, inter, bcv, cost
コード例 #2
0
ファイル: plot_quality.py プロジェクト: flaviovdf/pyksc
def run_clustering(X, k, dists_all):

    cent, assign, shift, dists_cent = ksc.inc_ksc(X, k)

    intra = metrics.avg_intra_dist(X, assign, dists_all)[0]
    inter = metrics.avg_inter_dist(X, assign, dists_all)[0]
    bcv = metrics.beta_cv(X, assign, dists_all)
    cost = metrics.cost(X, assign, None, dists_cent)

    return intra, inter, bcv, cost
コード例 #3
0
ファイル: cluster.py プロジェクト: flaviovdf/pyksc
def main(tseries_fpath, base_folder, k):
    k = int(k)

    idx_fpath = os.path.join(os.path.join(base_folder, ".."), "train.dat")
    X = ioutil.load_series(tseries_fpath, idx_fpath)

    cent, assign, shift, dists_cent = ksc.inc_ksc(X, k)
    np.savetxt(os.path.join(base_folder, "cents.dat"), cent, fmt="%.5f")
    np.savetxt(os.path.join(base_folder, "assign.dat"), assign, fmt="%d")
    np.savetxt(os.path.join(base_folder, "shift.dat"), shift, fmt="%d")
    np.savetxt(os.path.join(base_folder, "dists_cent.dat"), dists_cent, fmt="%.5f")
コード例 #4
0
ファイル: cluster.py プロジェクト: regstrtn/pyksc
def main(tseries_fpath, base_folder, k):
    k = int(k)
    
    idx_fpath = os.path.join(os.path.join(base_folder, '..'), 'train.dat')
    X = ioutil.load_series(tseries_fpath, idx_fpath)

    cent, assign, shift, dists_cent = ksc.inc_ksc(X, k)
    np.savetxt(os.path.join(base_folder, 'cents.dat'), cent, fmt='%.5f')
    np.savetxt(os.path.join(base_folder, 'assign.dat'), assign, fmt='%d')
    np.savetxt(os.path.join(base_folder, 'shift.dat'), shift, fmt='%d')
    np.savetxt(os.path.join(base_folder, 'dists_cent.dat'), dists_cent, 
               fmt='%.5f')
コード例 #5
0
def main(tseries_fpath, k, plot_foldpath):
    import mkl
    mkl.set_num_threads(16)

    initialize_matplotlib()

    X = np.genfromtxt(tseries_fpath)[:, 1:]
    aux = X.sum(axis=1)
    fix = np.where(aux == 0)[0]
    X[fix] += .001  #fixing zero only rows
    X = X.copy()

    cent, assign, shift, dists_cent = ksc.inc_ksc(X, k)

    for i in range(cent.shape[0]):
        t_series = cent[i]

        plt.plot(t_series, '-k')
        plt.gca().get_xaxis().set_visible(False)
        plt.gca().get_yaxis().set_visible(False)
        #plt.ylabel('Views')
        #plt.xlabel('Time')
        plt.savefig(os.path.join(plot_foldpath, '%d.pdf' % i))
        plt.close()

        half = t_series.shape[0] // 2
        to_shift = half - np.argmax(t_series)
        to_plot_peak_center = dist.shift(t_series, to_shift, rolling=True)
        plt.plot(to_plot_peak_center, '-k')
        plt.gca().get_xaxis().set_visible(False)
        plt.gca().get_yaxis().set_visible(False)
        #plt.ylabel('Views')
        #plt.xlabel('Time')
        plt.savefig(os.path.join(plot_foldpath, '%d-peak-center.pdf' % i))
        plt.close()

        to_shift = 0 - np.argmin(t_series)
        to_plot_min_first = dist.shift(t_series, to_shift, rolling=True)
        plt.plot(to_plot_min_first, '-k')
        plt.gca().get_xaxis().set_visible(False)
        plt.gca().get_yaxis().set_visible(False)
        #plt.ylabel('Views')
        #plt.xlabel('Time')
        plt.savefig(os.path.join(plot_foldpath, '%d-min-first.pdf' % i))
        plt.close()

    np.savetxt(os.path.join(plot_foldpath, 'cents.dat'), cent, fmt='%.5f')
    np.savetxt(os.path.join(plot_foldpath, 'assign.dat'), assign, fmt='%d')
    np.savetxt(os.path.join(plot_foldpath, 'shift.dat'), shift, fmt='%d')
    np.savetxt(os.path.join(plot_foldpath, 'dists_cent.dat'),
               dists_cent,
               fmt='%.5f')
コード例 #6
0
def cluster(T, num_clust=5):
    '''
    Runs the KSC algorithm on time series matrix T.

    Parameters
    ----------
    T : ndarray of shape (row, time series length)
        The time series to cluster
    num_clust : int
        Number of clusters to create
    '''
    T = np.asarray(T + 1e-20, order='C').copy()
    cents, assign, _, _ = ksc.inc_ksc(T, num_clust)
    return cents, assign
コード例 #7
0
ファイル: get_clusters.py プロジェクト: anair5/zika_python
    return ksc_input


if __name__ == "__main__":
    df = pd.read_csv(
        "new_daily_frequencies/zika_jj_cols_daily_frequencies_new.csv",
        encoding="iso-8859-1")
    # df = pd.read_csv("Frequencies.csv")

    ksc_input = alter_inputs(df)
    ksc_input = ksc_input.copy(order='C')

    # ksc_input_values = ksc_input.values
    #min_max_scaler = preprocessing.MinMaxScaler()
    #x_scaled = min_max_scaler.fit_transform(ksc_input)
    # ksc_input_normalized = pd.DataFrame(x_scaled)

    k = 3  # Number of clusters

    centers, assign, series_shifts, dists = ksc.inc_ksc(ksc_input, k)

    # Creating a Dataframe to store Hashtags and Cluster
    result_df = pd.DataFrame(columns=['Words', 'Cluster'])
    result_df['Words'] = df['Words']
    result_df['Cluster'] = assign

    result_df.to_csv(
        "new_daily_frequencies/Words_Daily_Frequencies_3Clusters.csv",
        index=False,
        encoding='utf-8')