Example #1
0
    def prepare_fn(inst):
        # get good centroids
        x, y = requires(['x', 'y'], inst)

        if name is not None:
            full_name = 'centroids_' + name + '_l_method'
            file = 'storage/' + full_name + '.json'
            cache = StorageCache(file)

        if 'cache' in locals() and not cache.isnew():
            centroids = np.array(cache.get())
        else:
            # get the 'good' centroids
            result = Pipe() \
                .x(x) \
                .y(y) \
                .pipe(agglomerative_l_method()) \
                .connect(stop())

            if not 'centroids' in result:
                raise Exception('no centroids in pipe')

            centroids = result['centroids']

            if 'cache' in locals():
                # update the cache and save to the storage
                cache.update(array_to_list(centroids))
                cache.save()

        return inst.set('l_method_centroids', centroids)
Example #2
0
    def prepare_fn(inst):
        if not bandwidth:
            raise Exception('no bandwidth given!')

        # get good centroids
        x = requires('x', inst)

        if name is not None:
            full_name = 'centroids_' + name + '_denclue_bandwidth_' + str(bandwidth)
            file = 'storage/' + full_name + '.json'
            cache = StorageCache(file)

        if 'cache' in locals() and not cache.isnew():
            # load good centroids from storage and convert to np array
            centroids = np.array(cache.get())
        else:
            if len(x) < 200:
                sample_size = len(x)
            else:
                # 200 < sample_size * 0.2 < 10000
                sample_size = max(min(10000, int(len(x) * 0.2)), 200)

            # get the 'good' centroids
            centroids = denclue(x, bandwidth, sample_size)
            if 'cache' in locals():
                # update cache, save to the storage
                cache.update(array_to_list(centroids))
                cache.save()

        return inst\
            .set('denclue_centroids_' + id, centroids)\
            .set('denclue_bandwidth_' + id, bandwidth)
    def map_fn(inst, idx, total):
        # now using caching technique to have the consistent result for each runtime
        file = 'seeding/' + name + '_' + seeding_names[idx] + '.json'
        cache = StorageCache(file)

        if not cache.isnew():
            y_seed = np.array(cache.get())
        else:
            seeding_fn = seeding_fns[idx]
            y_seed = seeding_fn(inst)

            # save to the cache
            cache.update(array_to_list(y_seed))
            cache.save()

        return inst \
            .set('y_seed', y_seed) \
            .set('name', seeding_names[idx])