def prepare_fn(inst):
        # get good centroids
        x, y = requires(['x', 'y'], inst)

        if name is not None:
            full_name = 'centroids_' + name + '_l_method'
            file = 'storage/' + full_name + '.json'
            cache = StorageCache(file)

        if 'cache' in locals() and not cache.isnew():
            centroids = np.array(cache.get())
        else:
            # get the 'good' centroids
            result = Pipe() \
                .x(x) \
                .y(y) \
                .pipe(agglomerative_l_method()) \
                .connect(stop())

            if not 'centroids' in result:
                raise Exception('no centroids in pipe')

            centroids = result['centroids']

            if 'cache' in locals():
                # update the cache and save to the storage
                cache.update(array_to_list(centroids))
                cache.save()

        return inst.set('l_method_centroids', centroids)
    def prepare_fn(inst):
        if not bandwidth:
            raise Exception('no bandwidth given!')

        # get good centroids
        x = requires('x', inst)

        if name is not None:
            full_name = 'centroids_' + name + '_denclue_bandwidth_' + str(bandwidth)
            file = 'storage/' + full_name + '.json'
            cache = StorageCache(file)

        if 'cache' in locals() and not cache.isnew():
            # load good centroids from storage and convert to np array
            centroids = np.array(cache.get())
        else:
            if len(x) < 200:
                sample_size = len(x)
            else:
                # 200 < sample_size * 0.2 < 10000
                sample_size = max(min(10000, int(len(x) * 0.2)), 200)

            # get the 'good' centroids
            centroids = denclue(x, bandwidth, sample_size)
            if 'cache' in locals():
                # update cache, save to the storage
                cache.update(array_to_list(centroids))
                cache.save()

        return inst\
            .set('denclue_centroids_' + id, centroids)\
            .set('denclue_bandwidth_' + id, bandwidth)
    def map_fn(inst, idx, total):
        # now using caching technique to have the consistent result for each runtime
        file = 'seeding/' + name + '_' + seeding_names[idx] + '.json'
        cache = StorageCache(file)

        if not cache.isnew():
            y_seed = np.array(cache.get())
        else:
            seeding_fn = seeding_fns[idx]
            y_seed = seeding_fn(inst)

            # save to the cache
            cache.update(array_to_list(y_seed))
            cache.save()

        return inst \
            .set('y_seed', y_seed) \
            .set('name', seeding_names[idx])
def seed_cache(file):
    file = 'seeding/' + file
    cache = StorageCache(file)
    y_seed = np.array(cache.get())
    return y_seed
from cache import StorageCache

file = 'test_cache.json'
cache = StorageCache(file)

data = cache.get()
data['b'] = 10
cache.save()

cache2 = StorageCache(file)

assert cache2.has('b')
def plot(X, **kwargs):
    x, y = list(zip(*X))
    plt.scatter(x, y, **kwargs)

group = {}
for x, y in zip(X, Y):
    if y not in group:
        group[y] = []

    group[y].append(x)

for i, (name, points) in zip(range(dataset.cluster_cnt), group.items()):
    # plot X on it deverse it using color according to different Y
    print('color:', cmap(i))
    print('count:', len(points))
    plot(points, color=cmap(i))

cache = StorageCache('seeding/' + dataset.name + '_some-1-prob-0.1.json')
seeds = cache.get()
seeds = list(map(lambda x: x[1],
                 filter(lambda x: x[0],
                        zip(seeds, dataset.X))))
seeds = pca.transform(seeds)
plot(seeds, color='grey')

cache = StorageCache('storage/centroids_iris_denclue_bandwidth_0.1082972972972973.json')
centroids = np.array(cache.get())
centroids = pca.transform(centroids)
plot(centroids, color='black')

plt.show()