@ex.capture def save(results, experiment_detailed_name, _config, _log): _config_cleaned = copy.deepcopy(_config) del _config_cleaned['force_reload'] del _config_cleaned['n_jobs'] del _config_cleaned['ipcluster_workers'] del _config_cleaned['recalculate_experiments'] ninja_set_value(value=results, master_key=experiment_detailed_name, **_config_cleaned) @ex.capture def try_load(experiment_detailed_name, _config, _log): _config_cleaned = copy.deepcopy(_config) del _config_cleaned['force_reload'] del _config_cleaned['ipcluster_workers'] del _config_cleaned['n_jobs'] del _config_cleaned['recalculate_experiments'] return ninja_get_value(master_key=experiment_detailed_name, **_config_cleaned) if __name__ == '__main__': results = ex.run_commandline().result import kaggle_ninja kaggle_ninja.register("fit_grid", ex)
logger.info("Cache miss, calculating") if timeout > 0: result = abortable_worker(run, timeout=timeout) else: result = run() save(result) return result except Exception, err: logger.error(traceback.format_exc()) logger.error(sys.exc_info()[0]) raise(err) @ex.capture def save(results, experiment_detailed_name, _config, _log): _config_cleaned = copy.deepcopy(_config) del _config_cleaned['force_reload'] ninja_set_value(value=results, master_key=experiment_detailed_name, **_config_cleaned) @ex.capture def try_load(experiment_detailed_name, _config, _log): _config_cleaned = copy.deepcopy(_config) del _config_cleaned['force_reload'] return ninja_get_value(master_key=experiment_detailed_name, **_config_cleaned) if __name__ == '__main__': results = ex.run_commandline().result import kaggle_ninja kaggle_ninja.register("fit_active_learning", ex)
result = run() save(result) return result except Exception, err: logger.error(traceback.format_exc()) logger.error(sys.exc_info()[0]) raise(err) @ex.capture def save(results, experiment_detailed_name, _config, _log): _config_cleaned = copy.deepcopy(_config) del _config_cleaned['force_reload'] del _config_cleaned['n_jobs'] del _config_cleaned['ipcluster_workers'] del _config_cleaned['recalculate_experiments'] ninja_set_value(value=results, master_key=experiment_detailed_name, **_config_cleaned) @ex.capture def try_load(experiment_detailed_name, _config, _log): _config_cleaned = copy.deepcopy(_config) del _config_cleaned['force_reload'] del _config_cleaned['ipcluster_workers'] del _config_cleaned['n_jobs'] del _config_cleaned['recalculate_experiments'] return ninja_get_value(master_key=experiment_detailed_name, **_config_cleaned) if __name__ == '__main__': results = ex.run_commandline().result import kaggle_ninja kaggle_ninja.register("fit_grid", ex)
import os def restart(n=2): os.system("scripts/restart_ipengines.sh " + str(n) + " &") def tester(sleep=1): import time time.sleep(sleep) return "Test successful" register("tester", tester) def abortable_worker(func, func_kwargs={}, **kwargs): timeout = kwargs.get('timeout', 0) id = kwargs.get('id', -1) if isinstance(func, str): # Remember to register it! func = find_obj(func) if timeout > 0: p = ThreadPool(1) res = p.apply_async(partial(func, **func_kwargs)) try: out = res.get(
else: if timeout > 0: result = abortable_worker(run, timeout=timeout) else: result = run() save(result) return result @ex.capture def save(results, experiment_sub_name, _config, _log): _config_cleaned = copy.deepcopy(_config) del _config_cleaned['force_reload'] ninja_set_value(value=results, master_key=experiment_sub_name, **_config_cleaned) @ex.capture def try_load(_config, experiment_sub_name, _log): _config_cleaned = copy.deepcopy(_config) del _config_cleaned['force_reload'] return ninja_get_value(master_key=experiment_sub_name, **_config_cleaned) if __name__ == '__main__': results = ex.run_commandline().result import kaggle_ninja kaggle_ninja.register("random_query_exp", ex)
self.beta = la.inv(S[0] + S[1]).dot(self.m[1]-self.m[0]) self.proj_mean = [ float(self.beta.T.dot(self.m[k])) for k in range(2)] self.proj_var = [ float(self.beta.T.dot(S[k]).dot(self.beta)) for k in range(2)] return self def Nor(self,x,m,s): x = np.array(x.ravel().tolist()[0]) return 1.0 / (np.sqrt(s) * np.sqrt(2 * np.pi)) * np.exp( -(x-m)**2 / (2*s) ) def predict(self, X): X = self.projector.project(X) c0 = self.Nor(X.dot(self.beta),self.proj_mean[0],self.proj_var[0]) c1 = self.Nor(X.dot(self.beta),self.proj_mean[1],self.proj_var[1]) return np.array(map(lambda x: self.neg_label if x==-1 else self.pos_label,np.sign(c1-c0))) def predict_proba(self, X): X = self.projector.project(X) c0 = self.Nor(X.dot(self.beta),self.proj_mean[0],self.proj_var[0]) c1 = self.Nor(X.dot(self.beta),self.proj_mean[1],self.proj_var[1]) cum = np.vstack((c0, c1)).T return (cum /cum.sum(axis=1).reshape(-1,1)).max(axis=1).reshape(-1, 1) import sys sys.path.append("..") import kaggle_ninja kaggle_ninja.register("EEM", EEM)
def main(timeout, force_reload, _log): # Load cache unless forced not to cached_result = try_load() if not force_reload else None if cached_result: return cached_result else: if timeout > 0: result = abortable_worker(run, timeout=timeout) else: result = run() save(result) return result @ex.capture def save(results, experiment_sub_name, _config, _log): _config_cleaned = copy.deepcopy(_config) del _config_cleaned['force_reload'] ninja_set_value(value=results, master_key=experiment_sub_name, **_config_cleaned) @ex.capture def try_load(experiment_sub_name, _config, _log): _config_cleaned = copy.deepcopy(_config) del _config_cleaned['force_reload'] return ninja_get_value(master_key=experiment_sub_name, **_config_cleaned) if __name__ == '__main__': results = ex.run_commandline().result import kaggle_ninja kaggle_ninja.register("random_query_composite", ex)
import os import psutil return psutil.Process(os.getpid()).get_memory_info().rss / 1e6 return client[:].apply(memory_mb).get_dict() import os def restart(n=2): os.system("scripts/restart_ipengines.sh "+str(n)+" &") def tester(sleep=1): import time time.sleep(sleep) return "Test successful" register("tester", tester) def abortable_worker(func, func_kwargs={}, **kwargs): timeout = kwargs.get('timeout', 0) id = kwargs.get('id', -1) if isinstance(func, str): # Remember to register it! func = find_obj(func) if timeout > 0: p = ThreadPool(1) res = p.apply_async(partial(func, **func_kwargs))
os.system("rm " + partial_results_file) pool.terminate() pool.join() raise ValueError("raising value to prevent caching") dump_results(start_time, last_dump) pool.terminate() pool.join() # Cache results with timeout results = _merge(pull_results(tasks)) misc = {'grid_time': time.time() - start_time} package = GridExperimentResult(experiments=results, misc=misc, config=kwargs, grid_params=grid_params, name=kwargs.get("experiment_detailed_name")) return package def run_experiment_kwargs(name, kwargs): ex = find_obj(name) return ex.run(config_updates=kwargs).result import json def run_experiment(name, **kwargs): ex = find_obj(name) return ex.run(config_updates=kwargs).result kaggle_ninja.register("run_experiment", run_experiment) kaggle_ninja.register("run_experiment_kwargs", run_experiment_kwargs) kaggle_ninja.register("run_experiment_grid", run_experiment_grid)
quasi_greedy_batch(X=X, y=y, current_model=current_model, batch_size=batch_size, rng=rng, D=D, c=c, sample_first=False) ] for i in range(k - 1): results.append( quasi_greedy_batch(X=X, y=y, current_model=current_model, batch_size=batch_size, rng=rng, D=D, c=c, sample_first=True)) return results[np.argmax([r[1] for r in results])] kaggle_ninja.register("rand_greedy", rand_greedy) kaggle_ninja.register("CSJ_sampling", CSJ_sampling) kaggle_ninja.register("query_by_bagging", query_by_bagging) kaggle_ninja.register("uncertainty_sampling", uncertainty_sampling) kaggle_ninja.register("random_query", random_query) kaggle_ninja.register("quasi_greedy_batch", quasi_greedy_batch) kaggle_ninja.register("chen_krause", chen_krause)
frequencies[str(column)+"="+str(value)] += 1 return dicted_rows, frequencies D, freqs = to_dict_values(X_train) fold["X_train"]["data"] = transformer.fit_transform(D) if X_valid.shape[0]: fold["X_valid"]["data"] = transformer.transform(to_dict_values(X_valid)[0]) # Wychodzi 0 dla valid and test assert(len(others_to_preprocess ) <= 1) if len(others_to_preprocess): X = others_to_preprocess[0]["X"]["data"] Y = others_to_preprocess[0]["Y"]["data"] if X.shape[0]: D, _ = to_dict_values(X.astype("int32")) others_to_preprocess[0]["X"]["data"] = transformer.transform(D) others_to_preprocess[0]["Y"]["data"] = Y else: others_to_preprocess[0]["X"]["data"] = X.astype("int32") others_to_preprocess[0]["Y"]["data"] = Y return fold, others_to_preprocess import kaggle_ninja kaggle_ninja.register("get_splitted_data", get_splitted_data) kaggle_ninja.register("get_splitted_data_clusterwise", get_splitted_data_clusterwise) kaggle_ninja.register("get_splitted_data_checkerboard", get_splitted_data_checkerboard) kaggle_ninja.register("get_splitted_uniform_data", get_splitted_uniform_data) kaggle_ninja.register("to_binary", to_binary)
if known_labeles + len(picked) == y.shape[0]: break candidates_scores = [(score(i),i) for i in xrange(X_unknown.shape[0]) if i not in picked] new_index = max(candidates_scores)[1] picked_dissimilarity += sum(D[new_index, j] for j in picked) picked.add(new_index) picked_sequence.append(new_index) return [y.unknown_ids[i] for i in picked_sequence], \ (1 - c)*base_scores[np.array(list(picked))].mean() + c*(1.0/max(1,len(picked)*(len(picked) - 1)/2.0))*picked_dissimilarity def rand_greedy(X, y, current_model, batch_size, rng, k=20, D=None, c=1.0): results = [quasi_greedy_batch(X=X, y=y, current_model=current_model, batch_size=batch_size, rng=rng, D=D, c=c, sample_first=False)] for i in range(k-1): results.append(quasi_greedy_batch(X=X, y=y, current_model=current_model, batch_size=batch_size, rng=rng, D=D, c=c, sample_first=True)) return results[np.argmax([r[1] for r in results])] kaggle_ninja.register("rand_greedy", rand_greedy) kaggle_ninja.register("CSJ_sampling", CSJ_sampling) kaggle_ninja.register("query_by_bagging", query_by_bagging) kaggle_ninja.register("uncertainty_sampling", uncertainty_sampling) kaggle_ninja.register("random_query", random_query) kaggle_ninja.register("quasi_greedy_batch", quasi_greedy_batch) kaggle_ninja.register("chen_krause", chen_krause)
pool.terminate() pool.join() # Cache results with timeout results = _merge(pull_results(tasks)) misc = {'grid_time': time.time() - start_time} package = GridExperimentResult(experiments=results, misc=misc, config=kwargs, grid_params=grid_params, name=kwargs.get("experiment_detailed_name")) return package def run_experiment_kwargs(name, kwargs): ex = find_obj(name) return ex.run(config_updates=kwargs).result import json def run_experiment(name, **kwargs): ex = find_obj(name) return ex.run(config_updates=kwargs).result kaggle_ninja.register("run_experiment", run_experiment) kaggle_ninja.register("run_experiment_kwargs", run_experiment_kwargs) kaggle_ninja.register("run_experiment_grid", run_experiment_grid)
# Load cache unless forced not to cached_result = try_load() if not force_reload else None if cached_result: logger.info("Reading from cache "+ex.name) return cached_result else: if timeout > 0: result = abortable_worker(run, timeout=timeout) else: result = run() save(result) return result @ex.capture def save(results, experiment_sub_name, _config, _log): _config_cleaned = copy.deepcopy(_config) del _config_cleaned['force_reload'] ninja_set_value(value=results, master_key=experiment_sub_name, **_config_cleaned) @ex.capture def try_load(_config,experiment_sub_name, _log): _config_cleaned = copy.deepcopy(_config) del _config_cleaned['force_reload'] return ninja_get_value(master_key=experiment_sub_name, **_config_cleaned) if __name__ == '__main__': results = ex.run_commandline().result import kaggle_ninja kaggle_ninja.register("random_query_exp", ex)
D, freqs = to_dict_values(X_train) fold["X_train"]["data"] = transformer.fit_transform(D) if X_valid.shape[0]: fold["X_valid"]["data"] = transformer.transform( to_dict_values(X_valid)[0]) # Wychodzi 0 dla valid and test assert (len(others_to_preprocess) <= 1) if len(others_to_preprocess): X = others_to_preprocess[0]["X"]["data"] Y = others_to_preprocess[0]["Y"]["data"] if X.shape[0]: D, _ = to_dict_values(X.astype("int32")) others_to_preprocess[0]["X"]["data"] = transformer.transform(D) others_to_preprocess[0]["Y"]["data"] = Y else: others_to_preprocess[0]["X"]["data"] = X.astype("int32") others_to_preprocess[0]["Y"]["data"] = Y return fold, others_to_preprocess import kaggle_ninja kaggle_ninja.register("get_splitted_data", get_splitted_data) kaggle_ninja.register("get_splitted_data_clusterwise", get_splitted_data_clusterwise) kaggle_ninja.register("get_splitted_data_checkerboard", get_splitted_data_checkerboard) kaggle_ninja.register("get_splitted_uniform_data", get_splitted_uniform_data) kaggle_ninja.register("to_binary", to_binary)