Esempio n. 1
0
  def test_ncc_c_2_and_3dim_matches(self):
      test = zscore(np.array([
          [1, 2, 3, 4, 5],
          [0, 10, 4, 5, 7],
          [-1, 15, -12, 8, 9],
      ]), axis=1)
      centroids = np.array([[1, 1, 0, 1, 1], [10, 12, 0, 0, 1]])
      distances1 = np.empty((3, 2))
      distances2 = np.empty((3, 2))
      for i in range(3):
          for j in range(2):
              distances1[i, j] = 1 - _ncc_c(test[i], centroids[j]).max()

      for j in range(2):
          distances2[:,j] = 1 - _ncc_c_2dim(test, centroids[j]).max(axis=1)

      distances3 = (1 - _ncc_c_3dim(test, centroids).max(axis=2)).T

      np.testing.assert_array_equal(distances1, distances2)
      np.testing.assert_array_equal(distances2, distances3)
Esempio n. 2
0
def test_ncc_c_2_and_3dim_matches():
    test = zscore(np.array([
        [1, 2, 3, 4, 5],
        [0, 10, 4, 5, 7],
        [-1, 15, -12, 8, 9],
    ]),
                  axis=1)
    centroids = np.array([[1, 1, 0, 1, 1], [10, 12, 0, 0, 1]])
    distances1 = np.empty((3, 2))
    distances2 = np.empty((3, 2))
    for i in range(3):
        for j in range(2):
            distances1[i, j] = 1 - _ncc_c(test[i], centroids[j]).max()

    for j in range(2):
        distances2[:, j] = 1 - _ncc_c_2dim(test, centroids[j]).max(axis=1)

    distances3 = (1 - _ncc_c_3dim(test, centroids).max(axis=2)).T

    assert not (distances1 != distances2).any()
    assert not (distances2 != distances3).any()
Esempio n. 3
0
from kshape.core import kshape, zscore

time_series = [[1, 2, 3, 4], [0, 1, 2, 3], [0, 1, 2, 3], [1, 2, 2, 3]]
cluster_num = 2
clusters = kshape(zscore(time_series, axis=1), cluster_num)
Esempio n. 4
0
#def data_plotter(data):

if __name__ == '__main__':
    with open(
            'C://Users//k_mathin//PycharmProjects//Masters//ciena_trials//Kamal//data//vodafone_data_clusters_filtered.pkl',
            'rb') as f:
        data_set = pickle.load(f)
    data = []
    for d in data_set['data']:
        data.append(d)
    data = np.asarray(data)
    #data = data[:,:15]
    print(data.shape[0])
    label_data = np.asarray(data_set['osid'])
    labels, levels = pd.factorize(label_data)
    shelves = np.asarray(data_set['shelf'])
    cluster_num = levels.shape[0]
    print(cluster_num)
    clusters = kshape(zscore(data, axis=1), cluster_num)
    #clusters = kshape(data,cluster_num)
    y_pred = []
    for i in range(0, data.shape[0]):
        for j in range(0, cluster_num):
            if i in clusters[j][1]:
                y_pred.append(j)
                continue
    conf = conf_mat(labels, y_pred)

    print(conf_mat(labels, y_pred))
    print("done")
Esempio n. 5
0
def kshape_clusters(arr, cluster_num, ax=1):
    from kshape.core import kshape, zscore
    clusters = kshape(zscore(arr, ax), cluster_num)
    re_arr = apply_clusters(clusters, arr)
    return re_arr
Esempio n. 6
0
from kshape.core import kshape, zscore

time_series = [[1, 2, 3, 4, 5], [0, 1, 2, 3, 4], [3, 2, 1, 0, -1],
               [1, 2, 2, 3, 3]]
cluster_num = 2
clusters = kshape(zscore(time_series), cluster_num)
print(clusters)
Esempio n. 7
0
#%%
causes = pd.read_pickle('data/causes.pkl')


def cluster_show(cluster_rep, cluster_id):
    #plt.plot(cluster_rep)
    for i in cluster_id:
        ev = causes.iloc[i]['id']
        temp_In = list(Event(ev, start, end).data[' In'].values)
        plt.plot(temp_In)
    plt.legend(list(causes.iloc[cluster_id]['cause']))
    plt.show()


cluster_num = 6
clusters = kshape(zscore(I_ns, axis=1), cluster_num)
for i in range(cluster_num):
    print(causes.iloc[clusters[i][1]], '\n', '----------------------')
    cluster_show(clusters[i][0], clusters[i][1])

#%%
import statsmodels.api as sm

dta = sm.datasets.co2.load_pandas().data
# deal with missing values. see issue
dta.co2.interpolate(inplace=True)

id = whole_events[100]
start = 0
end = -1
e = Event(id, start, end)