Esempio n. 1
0
def elbowmethod(df: pd.DataFrame):
    """
    Function that finds the best number of clusters and plots a graph of the distortion with the number of clusters
    Args:
        pd.DataFrame: Columns are (country(index), year_week, value)
    Returns:
        int: Best number of cluster 
    """
    import numpy as np
    from tslearn.clustering import silhouette_score
    import matplotlib.pyplot as plt

    distortions = []
    inertias = []
    mapping1 = {}
    mapping2 = {}

    K = range(2, df.shape[0])
    for k in K:
        # Building and fitting the model
        model = TimeSeriesKMeans(n_clusters=k, metric="softdtw", max_iter=50)
        model.fit(df.values[..., np.newaxis])
        distortions.append(
            silhouette_score(df, model.labels_, metric="softdtw"))

    plt.plot(K, distortions, 'bx-')
    plt.xlabel('Values of K')
    plt.ylabel('Distortion')
    plt.title('The Elbow Method using Distortion')
    plt.show()

    best_num_cluster = np.argmax(distortions) + 2
    return best_num_cluster
    def do_kmeans(days, km_size):
        """
        From a time series (as a list of df called days), creates km_size 
        clusters using kmeans algo.

        Parameters
        ----------
          * days: time series to cluster 
          * km_size: number of clusters needed 

        Returns
        ----------
          * km: k-means object generated for the clustering, it contains info about the algorithm
          * y_pred: results of the clustering, it contains the clusters themselves
        """
        # Arrange data for our lib
        unq = days["n_day_"].unique()
        values = [days[days["n_day_"] == l]["val_"].values for l in unq]
        formatted_dataset = to_time_series_dataset(values)

        # Configure our kmeans
        km = TimeSeriesKMeans(n_clusters=km_size,
                              metric="euclidean",
                              random_state=42,
                              verbose=False)

        y_pred = km.fit_predict(formatted_dataset)

        return km, y_pred
def cluster_time_series(time_series_data,
                        cluster_method='HDBSCAN',
                        metric='euclidean',
                        n_clusters=4,
                        min_cluster_size=2,
                        min_sample=1):
    features = time_series_data.T
    if cluster_method == 'HDBSCAN':
        clusterer = hdbscan.HDBSCAN(min_cluster_size=min_cluster_size,
                                    min_samples=min_sample)
        clusterer.fit(features)
        features['cluster'] = clusterer.labels_
        feature_dict = features.groupby('cluster').groups
    if cluster_method == 'kmeans':

        #kmeans = KMeans(n_clusters=n_clusters)
        kmeans = TimeSeriesKMeans(n_clusters=n_clusters,
                                  metric=metric,
                                  max_iter=5,
                                  max_iter_barycenter=5,
                                  random_state=0)
        y = kmeans.fit_predict(features)
        features['cluster'] = y
        feature_dict = features.groupby('cluster').groups
    if cluster_method == 'AgglomerativeClustering':
        AC = AgglomerativeClustering(n_clusters=4)
        y = AC.fit_predict(features)
        features['cluster'] = y
        feature_dict = features.groupby('cluster').groups
    return feature_dict, features
def get_dds_km(cl_lab, ds, z='LEV0', h0=0, h1=24, nc=4):
    # %%
    dds = get_ds_for_dtw_kmeans(cl_lab, ds, z, h0=h0, h1=h1)[C18]
    from tslearn.clustering import TimeSeriesKMeans
    from tslearn.metrics import dtw
    km = TimeSeriesKMeans(nc,
                          metric='dtw',
                          metric_params={'sakoe_chiba_radius': 4},
                          random_state=789)
    km.fit(dds.values)
    _ = km.cluster_centers_
    for c in _:
        plt.plot(c)
    plt.show()
    # %%
    labs = km.predict(dds.values)
    lb = xr.zeros_like(dds['date'], dtype=int) + labs
    # %%
    dds['labs'] = lb
    # %%
    dds['labs'].reset_coords(drop=True). \
        to_dataframe()['labs'].value_counts(). \
        sort_index(). \
        plot.bar()
    plt.show()
    # %%
    # dds['hour'] = xr.zeros_like(dds['time'], dtype=float) + \
    #               np.arange(0, 24, .5)
    # dds['nday'] = xr.zeros_like(
    #     dds['date'], dtype=int) + \
    #               np.arange(len(dds['date']))
    # dds = dds.swap_dims({'date': 'nday'})
    # dds = dds.swap_dims({'time': 'hour'})
    return dds, km
Esempio n. 5
0
def visualize_n_cluster(train_ts,
                        n_lists=[3, 4, 5, 6],
                        metric='dtw',
                        seed=2021,
                        vis=True):

    if vis:
        fig = plt.figure(figsize=(20, 5))
        plt.title('군집 개수별 건물수 분포', fontsize=15, y=1.2)
        plt.axis('off')

    for idx, n in enumerate(n_lists):
        ts_kmeans = TimeSeriesKMeans(n_clusters=n,
                                     metric=metric,
                                     random_state=seed)
        train_ts['cluster(n={})'.format(n)] = ts_kmeans.fit_predict(train_ts)
        score = round(
            silhouette_score(train_ts,
                             train_ts['cluster(n={})'.format(n)],
                             metric='euclidean'), 3)

        vc = train_ts['cluster(n={})'.format(n)].value_counts()

        if vis:
            ax = fig.add_subplot(1, len(n_lists), idx + 1)
            sns.barplot(x=vc.index, y=vc, palette='Pastel1')
            ax.set(title='n_cluster={0}\nscore:{1}'.format(n, score))
    if vis:
        plt.tight_layout()
        plt.show()

    return train_ts
def train_model(train_data, k=300, distance_matrix='eucl'):
    if distance_matrix == 'dtw':
        model = TimeSeriesKMeans(n_clusters=k, metric='dtw',
                                 n_init=10).fit(train_data)
    else:
        model = TimeSeriesKMeans(n_clusters=k, n_init=10).fit(train_data)
    return model
Esempio n. 7
0
def xgb_softdtw(train, test, return_pred, num_cluster):
    ts=TimeSeries_Clustering.make_timeseries(train)

    formatted_dataset = TimeSeries_Clustering.to_time_series_dataset(ts)
    X_train, sz = normalize_data(formatted_dataset)
    sdtw_km = TimeSeriesKMeans(n_clusters=num_cluster, metric="softdtw", metric_params={"gamma_sdtw": .01}, verbose=True,random_state=0)
    y_pred = sdtw_km.fit_predict(X_train)
    scores = TimeSeries_Clustering.compute_scores(sdtw_km, X_train, y_pred)
    plt.boxplot(scores)
    TimeSeries_Clustering.plot_data(sdtw_km, X_train, y_pred, sz, sdtw_km.n_clusters, centroid=True)
    y_pred_df = pd.DataFrame(y_pred)
    userindex = train.user_id.unique()
    userindex = np.sort(userindex)
    y_pred_df['user_id'] = userindex
    test = test.merge(y_pred_df, on='user_id').rename({0: 'cluster_softdtw'}, axis='columns')
    train = train.merge(y_pred_df, on='user_id').rename({0: 'cluster_softdtw'}, axis='columns')
    user_cluster = test.drop_duplicates(subset=['user_id'], keep='first')[['user_id', 'cluster_softdtw']]
    user_cluster.to_csv('\clusters\softdtw_'+str(test.batch.unique())+'.csv')
    sum_recall = 0
    sum_precision = 0
    sum_fscore = 0

    clusters = test.cluster_softdtw.unique()
    for cluster in clusters:
        train_i = train[train['cluster_softdtw'] == cluster]
        test_i = test[test['cluster_softdtw'] == cluster]
        recall, precision, fscore = do_xgboost(train_i, test_i, return_pred, num_cluster)
        sum_recall = sum_recall + recall
        sum_precision = sum_precision + precision
        sum_fscore = sum_fscore + fscore
    n = len(clusters)
    print('FINISH SOFTDTW')
    return sum_recall / n, sum_precision / n, sum_fscore / n
Esempio n. 8
0
 def __init__(self, k):
     self.k = k
     self.model = TimeSeriesKMeans(n_clusters=k,
                                   n_init=2,
                                   metric="dtw",
                                   verbose=False,
                                   max_iter_barycenter=10,
                                   random_state=0)
Esempio n. 9
0
def visualize_n_cluster(train_ts, n_lists=[3,4,5,6],metric='dtw',seed=2021,vis=True):

    for idx,n in enumerate(n_lists):
        ts_kmeans=TimeSeriesKMeans(n_clusters=n, metric=metric, random_state=seed)
        train_ts['cluster(n={})'.format(n)]=ts_kmeans.fit_predict(train_ts)
        score=round(silhouette_score(train_ts,train_ts['cluster(n={})'.format(n)],metric='euclidean'),3)

    return train_ts
def cl_KMeansDTW(pivot, original, pixels, metrics, k, cc):
    kmeans = TimeSeriesKMeans(n_clusters=k, metric="dtw", random_state=42)
    method = 'KDTW_{0}_{1}'.format(k, cc * 10)
    newpivot = pivot.values
    assignments = kmeans.fit_predict(newpivot)
    return add_computed_typed(method, assignments,
                              original), add_computed_typed_pixels(
                                  method, assignments,
                                  pixels), add_metrics_typed(
                                      method, metrics, assignments, pivot)
Esempio n. 11
0
def get_cluster_labels(actions, x, n_clusters):
    km = TimeSeriesKMeans(n_clusters=n_clusters, metric='dtw').fit(x['train'])
    actions_split = {}
    for type in ['train', 'dev', 'test']:
        actions_split[type] = actions[actions['type'] == type]
        labels = km.predict(x[type])
        actions_split[type].loc[:, 'label'] = labels
    actions = pd.concat(
        [actions_split[type] for type in ['train', 'dev', 'test']])
    return actions
Esempio n. 12
0
    def extract_clusters(self, new_featues):
        print("Extracting clusters ...")

        km = TimeSeriesKMeans(n_clusters=2, random_state=42)
        km.fit(new_featues)
        y_label = km.labels_
        new_featues['km_clusters'] = y_label

        print("Extracting clusters ... DONE.")
        return new_featues
def cluster_examples(
    train: pd.DataFrame,
    test: pd.DataFrame,
    dataset_name: str,
    nclusters: int = 5,
    n_examples: int = 3,
):
    """Explore:
    -cluster series and look at examples from each cluster
    """
    clusterer = TimeSeriesKMeans(n_clusters=nclusters)

    group_cols = [train.columns[c] for c in grouping_cols[dataset_name]]
    train_groups = train.groupby(group_cols)
    test_groups = test.groupby(group_cols)
    max_l = max(
        [len(trg) + len(teg) for (_, trg), (_, teg) in zip(train_groups, test_groups)]
    )

    timeseries = []
    keys = []
    for (group_name, train_group), (_, test_group) in zip(train_groups, test_groups):
        t_values = train_group.iloc[:, target_cols[dataset_name]].astype(float)
        t_values = t_values.append(
            test_group.iloc[:, target_cols[dataset_name]].astype(float)
        )
        t_padded = t_values.append(
            pd.Series([np.nan] * (max_l - t_values.shape[0])),
        )
        t_padded = t_padded.interpolate()
        assert len(t_padded) == max_l
        timeseries.append(t_padded)
        keys.append(group_name)

    timeseries_dataset = ts_utils.to_time_series_dataset(timeseries)
    clusters = clusterer.fit_predict(timeseries_dataset)

    plot_hist(clusters, "Distribution of Clusters")

    for i in range(nclusters):
        print(f"Looking at examples from cluster {i}")
        idxs = np.where(clusters == i)[0]
        examples = np.random.choice(idxs, size=n_examples, replace=False)
        for j, ex in enumerate(examples):
            query_list = [
                f'{grp_col}=="{key}"' for grp_col, key in zip(group_cols, keys[ex])
            ]
            values = train.query(" & ".join(query_list)).iloc[
                :, target_cols[dataset_name]
            ]
            # values = values.append(
            #     test.query(' & '.join(query_list)).iloc[:, target_cols[dataset_name]]
            # )
            plot_ts(values, f"Example {j} of cluster {i}")
Esempio n. 14
0
def multi_plot(row, col, fs_tuple, sy_bool, sx_bool, X, num_cluster, lineW, ts=False, labels = None):

    if ts==True:
        # model generation
        model=TimeSeriesKMeans(n_clusters = num_cluster, tol=1e-05, metric='euclidean')
        fitted_model = model.fit(X)
        labels = fitted_model.predict(X)

    f, axes = plt.subplots(row, col, figsize=fs_tuple, sharey=sy_bool, sharex=sx_bool)


    labelsize=10
    fontsize=10

    cluster_pool = np.unique(labels)
    for index, i_cluster in enumerate(cluster_pool):
        sub_mat = X[labels==i_cluster, :]
        # unravel
        figrow, figcol = np.unravel_index(index, dims=[row, col])
        # plot
        if row > 1 and col > 1:

            for iCurve in range(np.shape(sub_mat)[0]):
                axes[figrow,figcol].plot(sub_mat[iCurve,:], 'r', linewidth=lineW)
            # after plot, modify the axes
            for i_col in range(col):
                axes[-1,i_col].set_xticks([0,16,32,48,64,80])
                axes[-1,i_col].set_xticklabels(str(300+80*(i)) for i in np.arange(6))
                axes[-1,i_col].set_xlabel('Wavelength [nm]', fontsize=fontsize)
                axes[-1,i_col].tick_params(axis='x', labelsize=labelsize)
            for i_row in range(row):
                axes[i_row,0].set_yticks([-1,0,1])
                axes[i_row,0].tick_params(axis='y', labelsize=labelsize)
        elif row > 1 and col == 1:
            for i_curve in range(np.shape(sub_mat)[0]):
                axes[figrow].plot(sub_mat[i_curve,:], 'r', linewidth=lineW)
            axes[-1,0].set_xticks([0,16,32,48,64,80])
            axes[-1,0].set_xticklabels(str(300+80*(i)) for i in np.arange(6))
            axes[-1,0].set_xlabel('Wavelength [nm]', fontsize=fontsize)
            axes[-1,0].tick_params(axis='x', labelsize=labelsize)
            for i_row in range(row):
                axes[i_row,0].set_yticks([-1,0,1])
                axes[i_row,0].tick_params(axis='y', labelsize=labelsize)
        elif row == 1 and col > 1:
            for i_curve in range(np.shape(sub_mat)[0]):
                axes[figcol].plot(sub_mat[i_curve,:], 'r', linewidth=lineW)
            for i_col in range(col):
                axes[-1,i_col].set_xticks([0,16,32,48,64,80])
                axes[-1,i_col].set_xticklabels(str(300+80*(i)) for i in np.arange(6))
                axes[-1,i_col].set_xlabel('Wavelength [nm]', fontsize=fontsize)
                axes[-1,i_col].tick_params(axis='x', labelsize=labelsize)
            axes[0,0].set_yticks([-1,0,1])
            axes[0,0].tick_params(axis='y', labelsize=labelsize)
    return (f, axes)
Esempio n. 15
0
def clustering(df, n_cluster: int = 2, metric: str = 'softdtw', init='k-means++', random_state=1234, verbose=False,
               n_init=1):

    tsk = TimeSeriesKMeans(n_clusters=n_cluster, metric=metric, init=init, random_state=random_state, verbose=verbose,
                           n_init=n_init)
    df = np.roll(df, -6, axis=0)
    M = to_time_series_dataset(df.T)

    cluster_labels = tsk.fit_predict(M)

    return cluster_labels
Esempio n. 16
0
    def k_init(self, v = True):
        """
        initialisation de l'instance de l'algorithm avec les parametres actuels

        Parameters:
            * v: boolean
                Verbose, affiche les info lie au partitionnement

        Returns:
            NA
        """
        self.km = TimeSeriesKMeans(n_clusters = self.n, metric = self.metric, metric_params = {"gamma_sdtw": .01}, verbose = v, random_state = self.seed)
Esempio n. 17
0
def cluster_annotation_dimension(data, n_clusters=3):
    data = np.array([list(moving_average(d, 2)) for d in data])

    clustering = TimeSeriesKMeans(n_clusters=n_clusters, metric="euclidean")
    clustering.fit(data)

    clusters = []
    for cluster_ix in range(n_clusters):
        cl = np.where(clustering.labels_ == cluster_ix)[0]
        clusters.append(data[cl])

    return clusters
def plot_KMeansDTW_elbow(pivot):
    maxK = 2
    maxSil = -1
    for i in range(2, 20):
        kmeans = TimeSeriesKMeans(n_clusters=i, metric="dtw", random_state=42)
        labels = kmeans.fit_predict(pivot)
        silhouette = silhouette_score(pivot, labels, sample_size=10000)
        print("For n_clusters =", i, "The average silhouette_score is :",
              silhouette, "SSE is :", kmeans.inertia_)
        if silhouette > maxSil:
            maxSil = silhouette
            maxK = i
    return maxK
Esempio n. 19
0
def init(X, l, k):
    # Good initial start improves the convergence speed
    seed = 0
    sdtw_km = TimeSeriesKMeans(n_clusters=k,
                               metric="euclidean",
                               max_iter=10,
                               random_state=seed)
    sdtw_km.fit(X)
    G_init = np.zeros((sdtw_km.labels_.size, sdtw_km.labels_.max() + 1))
    G_init[np.arange(sdtw_km.labels_.size), sdtw_km.labels_] = 1
    G_init = G_init + np.random.rand(G_init.shape[0], G_init.shape[1])
    F_init = sdtw_km.cluster_centers_[:, :, 0] + 2**psi * np.random.rand(k, l)
    return F_init, G_init
Esempio n. 20
0
def clustering_TimeSeriesKMeans(tsdata, n_clusters, random_state, n_init, metric="softdtw", metric_params={"gamma_sdtw": 0.01}):
	np.random.seed(random_state)
	# Instantiate of TimeSeriesKMeans Class
	dtw_km = TimeSeriesKMeans(
		n_clusters=n_clusters,
		n_init=n_init,
		metric=metric,
		metric_params=metric_params,
		verbose=True,
		random_state=random_state
	)
	y_pred = dtw_km.fit_predict(tsdata)

	return y_pred
Esempio n. 21
0
def tsKMeans_num_cluster(X, n_trials, max_n_cluster):
    min_n_cluster = 2
    v_clusters = np.arange(min_n_cluster, max_n_cluster)
    n_seeds = n_trials
        # recorder
    sc_recorder = np.zeros((len(v_clusters),n_seeds))
    for i_seed in range(n_seeds):
        for num_cluster in v_clusters:
            model=TimeSeriesKMeans(n_clusters = num_cluster, tol=1e-05, metric='euclidean',    random_state=i_seed)
            fitted_model = model.fit(X)
            y_pred= fitted_model.predict(X)
            s_sc = sklearn.metrics.silhouette_score(X, y_pred, metric='euclidean')
            sc_recorder[num_cluster-min_n_cluster, i_seed]=s_sc
    return sc_recorder
Esempio n. 22
0
def test_serialize_timeserieskmeans():
    n, sz, d = 15, 10, 3
    rng = numpy.random.RandomState(0)
    X = rng.randn(n, sz, d)

    dba_km = TimeSeriesKMeans(n_clusters=3,
                              n_init=2,
                              metric="dtw",
                              verbose=True,
                              max_iter_barycenter=10)

    _check_not_fitted(dba_km)

    dba_km.fit(X)

    _check_params_predict(dba_km, X, ['predict'])

    sdtw_km = TimeSeriesKMeans(n_clusters=3,
                               metric="softdtw",
                               metric_params={"gamma": .01},
                               verbose=True)

    _check_not_fitted(sdtw_km)

    sdtw_km.fit(X)

    _check_params_predict(sdtw_km, X, ['predict'])
def get_fitted_model(dataset, clusters = 3, timepoints = 0, verbose = False,
                     return_dataset = True):
    if timepoints!=0:
        dataset = dataset[:,:timepoints]
    if verbose: print(f'Segmenting data into {clusters} clusters...')
    model = TimeSeriesKMeans(
        n_clusters = clusters,
        n_init = 10,
        metric = 'dtw',         #Dynamic time warping
        verbose = verbose,
        n_jobs = -1             #Use all cores
        )
    model.fit(dataset)
    return model
Esempio n. 24
0
def test_variable_length_clustering():
    # TODO: here we just check that they can accept variable-length TS, not
    # that they do clever things
    X = to_time_series_dataset([[1, 2, 3, 4], [1, 2, 3], [2, 5, 6, 7, 8, 9],
                                [3, 5, 6, 7, 8]])
    rng = np.random.RandomState(0)

    clf = KernelKMeans(n_clusters=2, random_state=rng)
    clf.fit(X)

    clf = TimeSeriesKMeans(n_clusters=2, metric="dtw", random_state=rng)
    clf.fit(X)

    clf = TimeSeriesKMeans(n_clusters=2, metric="softdtw", random_state=rng)
    clf.fit(X)
Esempio n. 25
0
 def dmp_cluster_planning_history(self, k=2):
     self.dmp_traj_clusters = []
     from tslearn.clustering import TimeSeriesKMeans
     kmeans = TimeSeriesKMeans(n_clusters=k).fit(self.history.paths)
     # fit all of these to different DMPS then cluster
     for i in range(self.history.paths.shape[0]):
         path_data = self.history.paths[i]
         formatted_path_data = path_data.reshape(
             (1, path_data.shape[1], path_data.shape[0]))
         center = formatted_path_data[0]
         new_dmp_traj_cluster = DMPTrajCluster(
             formatted_path_data,
             center,
             execution_times=[self.history.execution_times[i]],
             rl=True)
         self.dmp_traj_clusters.append(new_dmp_traj_cluster)
     # cluster based on weights
     weight_list = [dmp.dmp.w for dmp in self.dmp_traj_clusters]
     weights_to_cluster = np.zeros(
         (len(self.dmp_traj_clusters),
          weight_list[0].shape[1] * weight_list[0].shape[0]))
     for i in range(len(weight_list)):
         weights_to_cluster[i] = weight_list[i].flatten()
     self.gmm = mixture.GaussianMixture(
         n_components=3, covariance_type='full').fit(weights_to_cluster)
     import ipdb
     ipdb.set_trace()
Esempio n. 26
0
    def traj_cluster_planning_history(self, k=2):

        self.dmp_traj_clusters = []

        from tslearn.clustering import TimeSeriesKMeans
        kmeans = TimeSeriesKMeans(n_clusters=k).fit(self.history.paths)
        # sort into groups, make them into a dmp_traj_cluster, all into one cluster for now
        for i in range(k):
            center = kmeans.cluster_centers_[i]
            relevant_labels = np.where(kmeans.labels_ == i)[0]
            if len(relevant_labels) == 0:
                print("Empty cluster")
                continue
            execution_times = np.array(
                self.history.execution_times)[relevant_labels]
            path_data = self.history.paths[relevant_labels]
            formatted_path_data = np.zeros(
                (path_data.shape[0], path_data.shape[2],
                 path_data.shape[1]))  # gets ugly if there's only one now
            n_training_paths = path_data.shape[0]
            for i in range(n_training_paths):
                formatted_path_data[i] = path_data[i].T

            new_dmp_traj_cluster = DMPTrajCluster(
                formatted_path_data,
                center,
                execution_times=execution_times,
                rl=True)
            self.dmp_traj_clusters.append(new_dmp_traj_cluster)
 def fit_tskmeans(self):
     data_scaled = self.data_scaled
     center = self.center_num
     km = TimeSeriesKMeans(n_clusters=center, metric="softdtw", max_iter=5, verbose=False, random_state=0).fit(
         data_scaled)
     self.fietted_cluster = km
     self.labels = km.labels_
def tsclusteringN(ts_data, names):
    # クラスタリング

    # 正規化
    ts_dataset = TimeSeriesScalerMinMax().fit_transform(ts_data)

    metric = 'dtw'
    n_clusters = [n for n in range(2, 6)]
    for n in n_clusters:
        print('クラスター数 =', n)

        # metricが「DTW」か「softdtw」なら異なるデータ数の時系列データでもOK
        km = TimeSeriesKMeans(n_clusters=n,
                              metric=metric,
                              verbose=False,
                              random_state=1).fit(ts_dataset)

        # クラスタリングの結果
        print('クラスタリング結果 =', km.labels_)

        # -1から1の範囲の値。シルエット値が1に近く、かつシルエット値をプロットしたシルエット図でクラスター間の幅の差が最も少ないクラスター数が最適
        # 今回はシルエット値のみを確認
        print('シルエット値 =',
              silhouette_score(ts_dataset, km.labels_, metric=metric))
        print()
Esempio n. 29
0
def knn(p_id):
    # get_session_data(p_id)
    (exists, test) = get_session_data(p_id)
    if not exists:
        print("Running KNN Model")
        s_id = crud.get_latest_session_table_by_id(db, p_id).split('_')[3]
        model = TimeSeriesKMeans.from_json('res/knn_model.txt')
        pred = model.predict(test)

        leng = len(test)
        more = int(0.8 * leng)
        less = int(0.2 * leng)
        if more + less < leng:
            more += 1
        elif more + less > leng:
            more -= 1
        a = np.zeros((more,), dtype=int)
        b = np.ones((less,), dtype=int)
        true = np.concatenate([a, b])

        res = 0
        if (pred == 0).sum() < (pred == 1).sum():
            res = 1
        print("-----------------------------")
        print(exists)
        print("-----------------------------")
        res = models.Result(session_id=int(s_id), patient_id=int(p_id), result=int(res), model_id=0)
        return crud.create_patient_result(db, res)
    else:
        return crud.get_last_result(db, p_id, m_id)
        # return crud.get_last_result_by_patient_id(db, p_id)
    print(pred)
    print(confusion_matrix(true, pred))
Esempio n. 30
0
def _kmeans_init_shapelets(X, n_shapelets, shp_len, n_draw=10000):
    n_ts, sz, d = X.shape
    indices_ts = numpy.random.choice(n_ts, size=n_draw, replace=True)
    indices_time = numpy.random.choice(sz - shp_len + 1, size=n_draw, replace=True)
    subseries = numpy.zeros((n_draw, shp_len, d))
    for i in range(n_draw):
        subseries[i] = X[indices_ts[i], indices_time[i]:indices_time[i] + shp_len]
    return TimeSeriesKMeans(n_clusters=n_shapelets, metric="euclidean", verbose=False).fit(subseries).cluster_centers_