def prepare_fn(inst): # get good centroids x, y = requires(['x', 'y'], inst) if name is not None: full_name = 'centroids_' + name + '_l_method' file = 'storage/' + full_name + '.json' cache = StorageCache(file) if 'cache' in locals() and not cache.isnew(): centroids = np.array(cache.get()) else: # get the 'good' centroids result = Pipe() \ .x(x) \ .y(y) \ .pipe(agglomerative_l_method()) \ .connect(stop()) if not 'centroids' in result: raise Exception('no centroids in pipe') centroids = result['centroids'] if 'cache' in locals(): # update the cache and save to the storage cache.update(array_to_list(centroids)) cache.save() return inst.set('l_method_centroids', centroids)
def prepare_fn(inst): if not bandwidth: raise Exception('no bandwidth given!') # get good centroids x = requires('x', inst) if name is not None: full_name = 'centroids_' + name + '_denclue_bandwidth_' + str(bandwidth) file = 'storage/' + full_name + '.json' cache = StorageCache(file) if 'cache' in locals() and not cache.isnew(): # load good centroids from storage and convert to np array centroids = np.array(cache.get()) else: if len(x) < 200: sample_size = len(x) else: # 200 < sample_size * 0.2 < 10000 sample_size = max(min(10000, int(len(x) * 0.2)), 200) # get the 'good' centroids centroids = denclue(x, bandwidth, sample_size) if 'cache' in locals(): # update cache, save to the storage cache.update(array_to_list(centroids)) cache.save() return inst\ .set('denclue_centroids_' + id, centroids)\ .set('denclue_bandwidth_' + id, bandwidth)
def map_fn(inst, idx, total): # now using caching technique to have the consistent result for each runtime file = 'seeding/' + name + '_' + seeding_names[idx] + '.json' cache = StorageCache(file) if not cache.isnew(): y_seed = np.array(cache.get()) else: seeding_fn = seeding_fns[idx] y_seed = seeding_fn(inst) # save to the cache cache.update(array_to_list(y_seed)) cache.save() return inst \ .set('y_seed', y_seed) \ .set('name', seeding_names[idx])