Exemple #1
0
@ex.capture
def save(results, experiment_detailed_name, _config, _log):
    _config_cleaned = copy.deepcopy(_config)
    del _config_cleaned['force_reload']
    del _config_cleaned['n_jobs']
    del _config_cleaned['ipcluster_workers']
    del _config_cleaned['recalculate_experiments']
    ninja_set_value(value=results,
                    master_key=experiment_detailed_name,
                    **_config_cleaned)


@ex.capture
def try_load(experiment_detailed_name, _config, _log):
    _config_cleaned = copy.deepcopy(_config)
    del _config_cleaned['force_reload']
    del _config_cleaned['ipcluster_workers']
    del _config_cleaned['n_jobs']
    del _config_cleaned['recalculate_experiments']
    return ninja_get_value(master_key=experiment_detailed_name,
                           **_config_cleaned)


if __name__ == '__main__':
    results = ex.run_commandline().result

import kaggle_ninja

kaggle_ninja.register("fit_grid", ex)
            logger.info("Cache miss, calculating")
            if timeout > 0:
                result = abortable_worker(run, timeout=timeout)
            else:
                result = run()
            save(result)
            return result
    except Exception, err:
        logger.error(traceback.format_exc())
        logger.error(sys.exc_info()[0])
        raise(err)

@ex.capture
def save(results, experiment_detailed_name, _config, _log):
    _config_cleaned = copy.deepcopy(_config)
    del _config_cleaned['force_reload']

    ninja_set_value(value=results, master_key=experiment_detailed_name, **_config_cleaned)

@ex.capture
def try_load(experiment_detailed_name, _config, _log):
    _config_cleaned = copy.deepcopy(_config)
    del _config_cleaned['force_reload']
    return ninja_get_value(master_key=experiment_detailed_name, **_config_cleaned)

if __name__ == '__main__':
    results = ex.run_commandline().result

import kaggle_ninja
kaggle_ninja.register("fit_active_learning", ex)
Exemple #3
0
                result = run()
            save(result)
            return result
    except Exception, err:
        logger.error(traceback.format_exc())
        logger.error(sys.exc_info()[0])
        raise(err)
@ex.capture
def save(results, experiment_detailed_name, _config, _log):
    _config_cleaned = copy.deepcopy(_config)
    del _config_cleaned['force_reload']
    del _config_cleaned['n_jobs']
    del _config_cleaned['ipcluster_workers']
    del _config_cleaned['recalculate_experiments']
    ninja_set_value(value=results, master_key=experiment_detailed_name, **_config_cleaned)

@ex.capture
def try_load(experiment_detailed_name, _config, _log):
    _config_cleaned = copy.deepcopy(_config)
    del _config_cleaned['force_reload']
    del _config_cleaned['ipcluster_workers']
    del _config_cleaned['n_jobs']
    del _config_cleaned['recalculate_experiments']
    return ninja_get_value(master_key=experiment_detailed_name, **_config_cleaned)

if __name__ == '__main__':
    results = ex.run_commandline().result

import kaggle_ninja
kaggle_ninja.register("fit_grid", ex)

import os


def restart(n=2):
    os.system("scripts/restart_ipengines.sh " + str(n) + " &")


def tester(sleep=1):
    import time
    time.sleep(sleep)
    return "Test successful"


register("tester", tester)


def abortable_worker(func, func_kwargs={}, **kwargs):
    timeout = kwargs.get('timeout', 0)
    id = kwargs.get('id', -1)

    if isinstance(func, str):
        # Remember to register it!
        func = find_obj(func)

    if timeout > 0:
        p = ThreadPool(1)
        res = p.apply_async(partial(func, **func_kwargs))
        try:
            out = res.get(
Exemple #5
0
    else:
        if timeout > 0:
            result = abortable_worker(run, timeout=timeout)
        else:
            result = run()
        save(result)
        return result


@ex.capture
def save(results, experiment_sub_name, _config, _log):
    _config_cleaned = copy.deepcopy(_config)
    del _config_cleaned['force_reload']
    ninja_set_value(value=results,
                    master_key=experiment_sub_name,
                    **_config_cleaned)


@ex.capture
def try_load(_config, experiment_sub_name, _log):
    _config_cleaned = copy.deepcopy(_config)
    del _config_cleaned['force_reload']
    return ninja_get_value(master_key=experiment_sub_name, **_config_cleaned)


if __name__ == '__main__':
    results = ex.run_commandline().result

import kaggle_ninja
kaggle_ninja.register("random_query_exp", ex)
Exemple #6
0
        self.beta = la.inv(S[0] + S[1]).dot(self.m[1]-self.m[0])

        self.proj_mean = [ float(self.beta.T.dot(self.m[k])) for k in range(2)]
        self.proj_var = [ float(self.beta.T.dot(S[k]).dot(self.beta)) for k in range(2)]

        return self

    def Nor(self,x,m,s):

        x = np.array(x.ravel().tolist()[0])
        return 1.0 / (np.sqrt(s) * np.sqrt(2 * np.pi)) * np.exp( -(x-m)**2 / (2*s) )

    def predict(self, X):
        X = self.projector.project(X)
        c0 = self.Nor(X.dot(self.beta),self.proj_mean[0],self.proj_var[0])
        c1 = self.Nor(X.dot(self.beta),self.proj_mean[1],self.proj_var[1])
        return np.array(map(lambda x: self.neg_label if x==-1 else self.pos_label,np.sign(c1-c0)))

    def predict_proba(self, X):
        X = self.projector.project(X)
        c0 = self.Nor(X.dot(self.beta),self.proj_mean[0],self.proj_var[0])
        c1 = self.Nor(X.dot(self.beta),self.proj_mean[1],self.proj_var[1])
        cum = np.vstack((c0, c1)).T
        return (cum /cum.sum(axis=1).reshape(-1,1)).max(axis=1).reshape(-1, 1)

import sys
sys.path.append("..")
import kaggle_ninja
kaggle_ninja.register("EEM", EEM)
Exemple #7
0
def main(timeout, force_reload, _log):
    # Load cache unless forced not to
    cached_result = try_load() if not force_reload else None
    if cached_result:
        return cached_result
    else:
        if timeout > 0:
            result = abortable_worker(run, timeout=timeout)
        else:
            result = run()
        save(result)
        return result

@ex.capture
def save(results, experiment_sub_name, _config, _log):
    _config_cleaned = copy.deepcopy(_config)
    del _config_cleaned['force_reload']
    ninja_set_value(value=results, master_key=experiment_sub_name, **_config_cleaned)

@ex.capture
def try_load(experiment_sub_name, _config, _log):
    _config_cleaned = copy.deepcopy(_config)
    del _config_cleaned['force_reload']
    return ninja_get_value(master_key=experiment_sub_name, **_config_cleaned)

if __name__ == '__main__':
    results = ex.run_commandline().result

import kaggle_ninja
kaggle_ninja.register("random_query_composite", ex)
Exemple #8
0
        import os
        import psutil
        return psutil.Process(os.getpid()).get_memory_info().rss / 1e6

    return client[:].apply(memory_mb).get_dict()

import os
def restart(n=2):
    os.system("scripts/restart_ipengines.sh "+str(n)+" &")

def tester(sleep=1):
    import time
    time.sleep(sleep)
    return "Test successful"

register("tester", tester)



def abortable_worker(func, func_kwargs={}, **kwargs):
    timeout = kwargs.get('timeout', 0)
    id = kwargs.get('id', -1)


    if isinstance(func, str):
        # Remember to register it!
        func = find_obj(func)

    if timeout > 0:
        p = ThreadPool(1)
        res = p.apply_async(partial(func, **func_kwargs))
Exemple #9
0
            os.system("rm " + partial_results_file)
            pool.terminate()
            pool.join()
            raise ValueError("raising value to prevent caching")

    dump_results(start_time, last_dump)
    pool.terminate()
    pool.join()
    # Cache results with timeout
    results = _merge(pull_results(tasks))
    misc = {'grid_time': time.time() - start_time}
    package = GridExperimentResult(experiments=results, misc=misc,
                                config=kwargs, grid_params=grid_params, name=kwargs.get("experiment_detailed_name"))

    return package

def run_experiment_kwargs(name, kwargs):
    ex = find_obj(name)
    return ex.run(config_updates=kwargs).result

import json
def run_experiment(name, **kwargs):
    ex = find_obj(name)
    return ex.run(config_updates=kwargs).result

kaggle_ninja.register("run_experiment", run_experiment)
kaggle_ninja.register("run_experiment_kwargs", run_experiment_kwargs)
kaggle_ninja.register("run_experiment_grid", run_experiment_grid)


Exemple #10
0
        quasi_greedy_batch(X=X,
                           y=y,
                           current_model=current_model,
                           batch_size=batch_size,
                           rng=rng,
                           D=D,
                           c=c,
                           sample_first=False)
    ]
    for i in range(k - 1):
        results.append(
            quasi_greedy_batch(X=X,
                               y=y,
                               current_model=current_model,
                               batch_size=batch_size,
                               rng=rng,
                               D=D,
                               c=c,
                               sample_first=True))

    return results[np.argmax([r[1] for r in results])]


kaggle_ninja.register("rand_greedy", rand_greedy)
kaggle_ninja.register("CSJ_sampling", CSJ_sampling)
kaggle_ninja.register("query_by_bagging", query_by_bagging)
kaggle_ninja.register("uncertainty_sampling", uncertainty_sampling)
kaggle_ninja.register("random_query", random_query)
kaggle_ninja.register("quasi_greedy_batch", quasi_greedy_batch)
kaggle_ninja.register("chen_krause", chen_krause)
Exemple #11
0
                    frequencies[str(column)+"="+str(value)] += 1
        return dicted_rows, frequencies

    D, freqs = to_dict_values(X_train)
    fold["X_train"]["data"] = transformer.fit_transform(D)

    if X_valid.shape[0]:
        fold["X_valid"]["data"] = transformer.transform(to_dict_values(X_valid)[0])

    # Wychodzi 0 dla valid and test
    assert(len(others_to_preprocess ) <= 1)
    if len(others_to_preprocess):
        X = others_to_preprocess[0]["X"]["data"]
        Y = others_to_preprocess[0]["Y"]["data"]
        if X.shape[0]:
            D, _ = to_dict_values(X.astype("int32"))
            others_to_preprocess[0]["X"]["data"] = transformer.transform(D)
            others_to_preprocess[0]["Y"]["data"] = Y
        else:
            others_to_preprocess[0]["X"]["data"] = X.astype("int32")
            others_to_preprocess[0]["Y"]["data"] = Y
    return fold, others_to_preprocess


import kaggle_ninja
kaggle_ninja.register("get_splitted_data", get_splitted_data)
kaggle_ninja.register("get_splitted_data_clusterwise", get_splitted_data_clusterwise)
kaggle_ninja.register("get_splitted_data_checkerboard", get_splitted_data_checkerboard)
kaggle_ninja.register("get_splitted_uniform_data", get_splitted_uniform_data)
kaggle_ninja.register("to_binary", to_binary)
Exemple #12
0
        if known_labeles + len(picked) == y.shape[0]:
            break
        candidates_scores = [(score(i),i) for i in xrange(X_unknown.shape[0]) if i not in picked]
        new_index = max(candidates_scores)[1]
        picked_dissimilarity += sum(D[new_index, j] for j in picked)
        picked.add(new_index)
        picked_sequence.append(new_index)

    return [y.unknown_ids[i] for i in picked_sequence], \
           (1 - c)*base_scores[np.array(list(picked))].mean() + c*(1.0/max(1,len(picked)*(len(picked) - 1)/2.0))*picked_dissimilarity


def rand_greedy(X, y, current_model, batch_size, rng, k=20, D=None, c=1.0):

    results = [quasi_greedy_batch(X=X, y=y, current_model=current_model,
                                     batch_size=batch_size, rng=rng, D=D, c=c, sample_first=False)]
    for i in range(k-1):
        results.append(quasi_greedy_batch(X=X, y=y, current_model=current_model,
                                     batch_size=batch_size, rng=rng, D=D, c=c, sample_first=True))

    return results[np.argmax([r[1] for r in results])]

kaggle_ninja.register("rand_greedy", rand_greedy)
kaggle_ninja.register("CSJ_sampling", CSJ_sampling)
kaggle_ninja.register("query_by_bagging", query_by_bagging)
kaggle_ninja.register("uncertainty_sampling", uncertainty_sampling)
kaggle_ninja.register("random_query", random_query)
kaggle_ninja.register("quasi_greedy_batch", quasi_greedy_batch)
kaggle_ninja.register("chen_krause", chen_krause)

Exemple #13
0
    pool.terminate()
    pool.join()
    # Cache results with timeout
    results = _merge(pull_results(tasks))
    misc = {'grid_time': time.time() - start_time}
    package = GridExperimentResult(experiments=results,
                                   misc=misc,
                                   config=kwargs,
                                   grid_params=grid_params,
                                   name=kwargs.get("experiment_detailed_name"))

    return package


def run_experiment_kwargs(name, kwargs):
    ex = find_obj(name)
    return ex.run(config_updates=kwargs).result


import json


def run_experiment(name, **kwargs):
    ex = find_obj(name)
    return ex.run(config_updates=kwargs).result


kaggle_ninja.register("run_experiment", run_experiment)
kaggle_ninja.register("run_experiment_kwargs", run_experiment_kwargs)
kaggle_ninja.register("run_experiment_grid", run_experiment_grid)
Exemple #14
0
    # Load cache unless forced not to
    cached_result = try_load() if not force_reload else None
    if cached_result:
        logger.info("Reading from cache "+ex.name)
        return cached_result
    else:
        if timeout > 0:
            result = abortable_worker(run, timeout=timeout)
        else:
            result = run()
        save(result)
        return result

@ex.capture
def save(results, experiment_sub_name, _config, _log):
    _config_cleaned = copy.deepcopy(_config)
    del _config_cleaned['force_reload']
    ninja_set_value(value=results, master_key=experiment_sub_name, **_config_cleaned)

@ex.capture
def try_load(_config,experiment_sub_name, _log):
    _config_cleaned = copy.deepcopy(_config)
    del _config_cleaned['force_reload']
    return ninja_get_value(master_key=experiment_sub_name, **_config_cleaned)

if __name__ == '__main__':
    results = ex.run_commandline().result

import kaggle_ninja
kaggle_ninja.register("random_query_exp", ex)
Exemple #15
0
    D, freqs = to_dict_values(X_train)
    fold["X_train"]["data"] = transformer.fit_transform(D)

    if X_valid.shape[0]:
        fold["X_valid"]["data"] = transformer.transform(
            to_dict_values(X_valid)[0])

    # Wychodzi 0 dla valid and test
    assert (len(others_to_preprocess) <= 1)
    if len(others_to_preprocess):
        X = others_to_preprocess[0]["X"]["data"]
        Y = others_to_preprocess[0]["Y"]["data"]
        if X.shape[0]:
            D, _ = to_dict_values(X.astype("int32"))
            others_to_preprocess[0]["X"]["data"] = transformer.transform(D)
            others_to_preprocess[0]["Y"]["data"] = Y
        else:
            others_to_preprocess[0]["X"]["data"] = X.astype("int32")
            others_to_preprocess[0]["Y"]["data"] = Y
    return fold, others_to_preprocess


import kaggle_ninja
kaggle_ninja.register("get_splitted_data", get_splitted_data)
kaggle_ninja.register("get_splitted_data_clusterwise",
                      get_splitted_data_clusterwise)
kaggle_ninja.register("get_splitted_data_checkerboard",
                      get_splitted_data_checkerboard)
kaggle_ninja.register("get_splitted_uniform_data", get_splitted_uniform_data)
kaggle_ninja.register("to_binary", to_binary)