Beispiel #1
0
def noop(args, phase_prefix="local"):
    # Compile results to be transmitted to remote and cached for reuse in next iteration
    computation_output = ut.default_computation_output(args)
    computation_output["output"] = {
        "computation_phase": '%s_noop' % phase_prefix
    }
    return computation_output
def dkm_local_compute_clustering(args,
                                 config_file=CONFIG_FILE,
                                 **kwargs):
    """
        # Description:
            Assign data instances to clusters.

        # PREVIOUS PHASE:
            remote_init_centroids (on first run only)
            remote_cehck_convergence

        # INPUT:

            |   name                |   type    |   default     |
            |   ---                 |   ---     |   ---         |
            |   config_file         |   str     |   config.cfg  |
            |   remote_centroids    |   list    |   None        |
            |   computation_phase   |   list    |   None        |

        # OUTPUT:
            - centroids: list of numpy arrays

        # NEXT PHASE:
            remote_init_centroids
    """
    state, inputs, cache = ut.resolve_args(args)
    config_file = ut.resolve_input('config_file', cache)
    remote_centroids = ut.resolve_input('remote_centroids', inputs)
    computation_phase = ut.resolve_input('computation_phase', inputs)
    ut.log('LOCAL: computing clustering', state)
    if remote_centroids is None:
        raise ValueError(
            "LOCAL: at local_compute_clustering - remote_centroids not passed correctly"
        )
    if computation_phase is None:
        raise ValueError(
            "LOCAL: at local_compute_clustering - computation_phase not passed correctly"
        )
    config = configparser.ConfigParser()
    config.read(config_file)
    ut.log('Config file is %s, with keys %s' % (config_file, str(dict(config))), state)

    data = np.load(config['LOCAL']['data_file'])

    cluster_labels = local.compute_clustering(data, remote_centroids)

    new_comp_phase = "dkm_local_compute_clustering"
    if computation_phase == "dkm_remote_optimization_step":
        new_comp_phase = "dkm_local_compute_clustering_2"

    computation_output = ut.default_computation_output(args)
    cache['cluster_labels'] = cluster_labels
    cache['remote_centroids'] = remote_centroids
    computation_output['output'] = dict(
        computation_phase=new_comp_phase,
        remote_centroids=remote_centroids,
        cluster_labels=cluster_labels
    )
    computation_output['cache'] = cache
    return computation_output
Beispiel #3
0
def dump_cache_to_file(args,
                       filename=None,
                       phase_prefix="local",
                       keys=[],
                       **kwargs):
    state = args['state']
    cache = args['cache']
    if len(keys) == 0:
        keys = cache.keys()
    else:
        keys = [k for k in keys if k in cache.keys()]
    cache = {k: v for k, v in cache.items() if k in keys}
    # Compile results to be transmitted to remote and cached for reuse in next iteration
    computation_output = ut.default_computation_output(args)
    in_file = os.path.join(state["outputDirectory"], filename)
    prefix, ext = os.path.splitext(filename)
    loaded = dict()
    try:
        if 'npy' in ext:
            np.save(in_file, cache)
        elif 'json' in ext:
            json.dump(cache, open(in_file, "w"))
        elif 'pkl' in ext:
            pickle.load(cache, open(in_file, "w"))
        elif 'mat' in ext:
            sio.savemat(in_file, cache)
    except Exception as e:
        raise (Exception("Encountered an exception while saving data... %s" %
                         e))
    computation_output["output"] = {
        "computation_phase": '%s_dump_cache' % phase_prefix,
    }
    return computation_output
Beispiel #4
0
def load_cache_from_file(args,
                         filename=None,
                         phase_prefix="local",
                         keys=[],
                         **kwargs):
    state = args['state']
    # Compile results to be transmitted to remote and cached for reuse in next iteration
    computation_output = ut.default_computation_output(args)
    in_file = os.path.join(state["outputDirectory"], filename)
    prefix, ext = os.path.splitext(filename)
    loaded = dict()
    try:
        if 'npy' in ext:
            loaded = np.load(in_file).item()
        elif 'json' in ext:
            loaded = json.load(open(in_file, "r", encoding="utf8"))
        elif 'pkl' in ext:
            loaded = pickle.load(open(in_file, "r", encoding="utf8"))
        elif 'mat' in ext:
            loaded = sio.loadmat(in_file)
    except Exception:
        pass
    computation_output["output"] = {
        "computation_phase": '%s_load_cache' % phase_prefix
    }
    if len(keys) == 0:
        keys = loaded.keys()
    else:
        keys = [k for k in keys if k in loaded.keys()]
    for key in keys:
        computation_output["cache"][key] = loaded[key]
    return computation_output
Beispiel #5
0
def output_to_input(args, phase_prefix="local"):
    # Compile results to be transmitted to remote and cached for reuse in next iteration
    computation_output = ut.default_computation_output(args)
    computation_output["output"] = {
        "computation_phase": '%s_output_to_input' % phase_prefix
    }
    for key in args["output"].keys():
        computation_output["input"][key] = args["output"][key]
    return computation_output
Beispiel #6
0
def clear_cache(args, phase_prefix="local"):
    state = args['state']
    # Compile results to be transmitted to remote and cached for reuse in next iteration
    computation_output = ut.default_computation_output(args)
    computation_output["output"] = {
        "computation_phase": '%s_clear_cache' % phase_prefix
    }
    computation_output["cache"] = dict()
    return computation_output
Beispiel #7
0
def dump_cache_to_npy(args, phase_prefix="local"):
    state = args['state']
    # Compile results to be transmitted to remote and cached for reuse in next iteration
    computation_output = ut.default_computation_output(args)
    out_file = os.path.join(state["outputDirectory"], "cache.npy")
    np.save(out_file, args["cache"])
    computation_output["output"] = {
        "computation_phase": '%s_dump_cache_to_npy' % phase_prefix
    }
    return computation_output
Beispiel #8
0
def dump_cache(args, phase_prefix="local"):
    state = args['state']
    # Compile results to be transmitted to remote and cached for reuse in next iteration
    computation_output = ut.default_computation_output(args)
    out_file = os.path.join(state["outputDirectory"], "cache.json")
    json.dump(args["cache"], open(out_file, "w", encoding="utf8"))
    computation_output["output"] = {
        "computation_phase": '%s_dump_cache' % phase_prefix
    }
    return computation_output
Beispiel #9
0
def input_to_cache(args, phase_prefix="local"):
    inputs = args['input']
    # Compile results to be transmitted to remote and cached for reuse in next iteration
    computation_output = ut.default_computation_output(args)
    computation_output["output"] = {
        "computation_phase": '%s_input_to_cache' % phase_prefix
    }
    for key in inputs.keys():
        computation_output["cache"][key] = inputs[key]
    computation_output["input"] = dict()
    return computation_output
Beispiel #10
0
def load_cache_from_npy(args, phase_prefix="local"):
    state = args['state']
    # Compile results to be transmitted to remote and cached for reuse in next iteration
    computation_output = ut.default_computation_output(args)
    in_file = os.path.join(state["outputDirectory"], "cache.npy")
    loaded = np.load(in_file).item()
    computation_output["output"] = {
        "computation_phase": '%s_dump_cache' % phase_prefix
    }
    for key in loaded.keys():
        computation_output["cache"][key] = loaded[key]

    return computation_output
Beispiel #11
0
def load_cache(args, phase_prefix="local"):
    state = args['state']
    # Compile results to be transmitted to remote and cached for reuse in next iteration
    computation_output = ut.default_computation_output(args)
    in_file = os.path.join(state["outputDirectory"], "cache.json")
    loaded = json.load(open(in_file, "r", encoding="utf8"))
    computation_output["output"] = {
        "computation_phase": '%s_dump_cache' % phase_prefix
    }
    for key in loaded.keys():
        computation_output["cache"][key] = loaded[key]

    return computation_output
Beispiel #12
0
def load_datasets(args, phase_prefix="local"):
    state = args['state']
    inputs = args['input']

    csv_file = os.path.join(state["baseDirectory"], inputs['datafile'][0])
    datasets = ut.read_data_csv(csv_file, state["baseDirectory"],
                                state["clientId"])

    # Compile results to be transmitted to remote and cached for reuse in next iteration
    computation_output = ut.default_computation_output(args)
    computation_output["output"] = {
        "datasets": datasets,
        "computation_phase": '%s_load_datasets' % phase_prefix
    }
    return computation_output