def make_experiment(exp_name, zip_project=False, track_git=True): # create experiment :: need to add any information here that is relevant. assert not exp_exists(exp_name), """experiment with this name already exists, either remove the existing version or rename the new launch.""" arg_dict = {} expID = make_id(32) arg_dict['expID'] = expID arg_dict['script'] = sys.argv[0] # TODO: check that this is the correct approach. _logger.configure(LOG_ROOT, prefix=exp_name) if zip_project: dir_zip(PROJECT_ROOT, output_file='source.zip', excludes=["*.ckpt*", "*tmp_dir*", "*.mp4", "*.png", "*data*", "*.pkl", "*.git*"]) shutil.move('source.zip', os.path.join(LOG_ROOT, exp_name + '/' + 'source.zip')) if track_git: arg_dict['gitcommit'] = get_gitcommit() arg_dict['gitbranch'] = get_gitbranch() _logger.log_params(Args={'expID': expID}) timestamp = datetime.datetime.now().strftime("%x :: %X") exp_doc = { 'expID': expID, 'expName': exp_name, 'logdir': os.path.join(LOG_ROOT, exp_name), 'time': timestamp } coll = get_exp_coll() coll.insert(exp_doc) return expID
def launch(root, seed=None): from ml_logger import logger logger.configure(root_dir=root, prefix=f"geyang/jaynes-demo/seed-{seed}", register_experiment=True) logger.print("this has be ran")
def setup(log_dir): logger.configure(log_dir, prefix='main_test_script') logger.remove('') logger.log_line('hey') logger.log_data(dict(test=True), "test-data/dict.pkl") print(f"logging to {pathJoin(logger.log_directory, logger.prefix)}")
def setup(log_dir): logger.configure(log_dir, prefix='main_test_script') logger.remove('') logger.log_line('hey') sleep(1.0) print(f"logging to {pathJoin(logger.log_directory, logger.prefix)}")
def adapt_and_test(): import os import dill from playground.maml.maml_torch.maml_multi_step import FunctionalMLP logger.configure(log_directory=Args.log_dir, prefix=Args.log_prefix) logger.log_params(Args=vars(Args)) # load weights with open(os.path.join(Args.log_dir, Args.log_prefix, Args.weight_path), 'rb') as f: weights = dill.load(f) model = FunctionalMLP(1, 1) losses = DefaultBear(list) for amp, task in amp_tasks: model.params.update({ k: t.tensor(v, requires_grad=True, dtype=t.double).to(device) for k, v in weights[0].items() }) sgd = t.optim.SGD(model.parameters(), lr=Args.learning_rate) proper = t.tensor(task.proper()).to(device) samples = t.tensor(task.samples(Args.k_shot)).to(device) for grad_ind in range(Args.grad_steps): with t.no_grad(): xs, labels = proper ys = model(xs.unsqueeze(-1)) loss = model.criteria(ys, labels.unsqueeze(-1)) logger.log(grad_ind, loss=loss.item(), silent=grad_ind != Args.grad_steps - 1) losses[f"amp-{amp:.2f}-loss"].append(loss.item()) xs, labels = samples ys = model(xs.unsqueeze(-1)) loss = model.criteria(ys, labels.unsqueeze(-1)) sgd.zero_grad() loss.backward() sgd.step() # losses = np.array([v for k, v in losses.items()]) import matplotlib.pyplot as plt fig = plt.figure() plt.title(f'Learning Curves') for amp, task in amp_tasks: plt.plot(losses[f"amp-{amp:.2f}-loss"], label=f"amp {amp:.2f}") plt.legend() logger.log_pyplot(None, key=f"losses/learning_curves_amp.png", fig=fig) plt.close() average_losses = np.array( [losses[f"amp-{amp:.2f}-loss"] for amp, task in amp_tasks]) fig = plt.figure() plt.title(f'Learning Curves Averaged amp ~ [5 - 10]') plt.plot(average_losses.mean(0)) plt.ylim(0, 28) logger.log_pyplot(None, key=f"losses/learning_curves_amp_all.png", fig=fig) plt.close()
def __init__(self, exp_prefix, est_params, sim_params, observations, keys_of_interest, n_mc_samples=10**7, n_x_cond=5, n_seeds=5, use_gpu=True, tail_measures=True): assert est_params and exp_prefix and sim_params and keys_of_interest assert observations.all() # every simulator configuration will be run multiple times with different randomness seeds sim_params = _add_seeds_to_sim_params(n_seeds, sim_params) self.observations = observations self.n_mc_samples = n_mc_samples self.n_x_cond = n_x_cond self.keys_of_interest = keys_of_interest self.exp_prefix = exp_prefix self.use_gpu = use_gpu self.tail_measures = tail_measures logger.configure(log_directory=config.DATA_DIR, prefix=exp_prefix, color='green') ''' ---------- Either load or generate the configs ----------''' config_pkl_path = os.path.join(logger.log_directory, logger.prefix, EXP_CONFIG_FILE) if os.path.isfile(config_pkl_path): logger.log("{:<70s} {:<30s}".format( "Loading experiment previous configs from file: ", config_pkl_path)) self.configs = logger.load_pkl(EXP_CONFIG_FILE) else: logger.log("{:<70s} {:<30s}".format( "Generating and storing experiment configs under: ", config_pkl_path)) self.configs = self._generate_configuration_variants( est_params, sim_params) logger.dump_pkl(data=self.configs, path=EXP_CONFIG_FILE) ''' ---------- Either load already existing results or start a new result collection ---------- ''' results_pkl_path = os.path.join(logger.log_directory, logger.prefix, RESULTS_FILE) if os.path.isfile(results_pkl_path): logger.log_line("{:<70s} {:<30s}".format("Continue with: ", results_pkl_path)) self.gof_single_res_collection = dict( logger.load_pkl_log(RESULTS_FILE)) else: # start from scratch self.gof_single_res_collection = {} self.gof_results = GoodnessOfFitResults(self.gof_single_res_collection)
def launch_maml_mlp(log_prefix=None, **_G): G.log_prefix = log_prefix or f'{now:%Y-%m-%d}/debug-maml-baselines/sinusoid-maml-mlp' G.update(_G) logger.configure(log_directory=G.log_dir, prefix=G.log_prefix) logger.log_params(G=vars(G)) np.random.seed(G.seed) t.manual_seed(G.seed) t.cuda.manual_seed(G.seed) maml(test_fn=standard_sine_test)
def run_benchmark_train_test_fit_cv_ml(dataset, model_dict, seed=27, n_train_valid_splits=1, shuffle_splits=True, n_jobs_outer=-1): if logger.log_directory is None: logger.configure(log_directory='/tmp/ml-logger') rds = np.random.RandomState(seed) logger.log("\n------------------ empirical cv_ml benchmark with %s ----------------------" % str(dataset)) datasets = zip(*dataset.get_train_valid_splits(valid_portion=0.2, n_splits=n_train_valid_splits, shuffle=shuffle_splits, random_state=rds)) exps = list(zip(*itertools.product(model_dict.items(), datasets))) manager = Manager() result_dict = manager.dict() for estimator_key in model_dict.keys(): result_dict[estimator_key] = manager.list() def _fit_by_cv_ml_eval(model_dict_tuple, train_valid_set_tuple): estimator_key, conf_dict = model_dict_tuple X_train, Y_train, X_valid, Y_valid = train_valid_set_tuple estimator = _initialize_model_cv_ml(conf_dict) estimator.fit(X_train, Y_train) score = estimator.score(X_valid, Y_valid) result_dict[estimator_key].append(score) logger.log('%s: score: %.4f'%(estimator_key, score)) executor = AsyncExecutor(n_jobs=n_jobs_outer) executor.run(_fit_by_cv_ml_eval, *exps) # convert result_dict into normal python dict containing lists result_dict = dict([(key, list(value)) for key, value in result_dict.items()]) pprint(result_dict) # rearrange results as pandas df final_results_dict = {'scores_mean': [], 'scores_std': [], 'dataset': []} for estimator_key, scores in result_dict.items(): final_results_dict['scores_mean'].append(np.nanmean(scores)) final_results_dict['scores_std'].append(np.nanstd(scores)) final_results_dict['dataset'].append(str(dataset)) df = pd.DataFrame.from_dict(data=final_results_dict, orient='columns') df.index = list(model_dict.keys()) logger.log('\n' + str(df)) return df
def launch_reptile_auto_gru(log_prefix=None, **_G): G.log_prefix = log_prefix or f'{now:%Y-%m-%d}/debug-maml-baselines/sinusoid-reptile-auto-gru' G.update(_G) logger.configure(log_directory=G.log_dir, prefix=G.log_prefix) logger.log_params(G=vars(G)) np.random.seed(G.seed) t.manual_seed(G.seed) t.cuda.manual_seed(G.seed) auto_rnn = FunctionalAutoGRU(1, 1, 10) reptile(model=auto_rnn, test_fn=standard_sine_test)
def launch_maml_lstm(log_prefix=None, **_G): G.log_prefix = log_prefix or f'{now:%Y-%m-%d}/debug-maml-baselines/sinusoid-maml-lstm' G.update(_G) logger.configure(log_directory=G.log_dir, prefix=G.log_prefix) logger.log_params(G=vars(G)) np.random.seed(G.seed) t.manual_seed(G.seed) t.cuda.manual_seed(G.seed) auto_rnn = FunctionalLSTM(1, 1, 10) maml(model=auto_rnn, test_fn=standard_sine_test)
def experiment(): logger.configure(log_directory=config.DATA_DIR, prefix=EXP_PREFIX, color='green') # 1) EUROSTOXX dataset = datasets.EuroStoxx50() result_df = run_benchmark_train_test_fit_cv(dataset, model_dict, n_train_valid_splits=3, n_eval_seeds=5, shuffle_splits=False, n_folds=5, seed=22, n_jobs_inner=-1, n_jobc_outer=3) # 2) NYC Taxi for n_samples in [10000]: dataset = datasets.NCYTaxiDropoffPredict(n_samples=n_samples) df = run_benchmark_train_test_fit_cv(dataset, model_dict, n_train_valid_splits=3, n_eval_seeds=5, shuffle_splits=True, n_folds=5, seed=22, n_jobs_inner=-1, n_jobc_outer=3) result_df = pd.concat([result_df, df], ignore_index=True) # 3) UCI for dataset_class in [ datasets.BostonHousing, datasets.Conrete, datasets.Energy ]: dataset = dataset_class() df = run_benchmark_train_test_fit_cv(dataset, model_dict, n_train_valid_splits=3, n_eval_seeds=5, shuffle_splits=True, n_folds=5, seed=22, n_jobs_inner=-1, n_jobc_outer=3) result_df = pd.concat([result_df, df], ignore_index=True) logger.log('\n', str(result_df)) logger.log('\n', result_df.tolatex())
def run_e_maml(): # print(config.RUN.log_directory) # if config.G.run_mode == "e_maml": # print('{G.inner_alg} E-MAML'.format(G=config.G)) # elif config.G.run_mode == "maml": # print('{G.inner_alg} Vanilla MAML'.format(G=config.G)) # todo: let's take the control of the log director away from the train script. It should all be set from outside. logger.configure(log_directory=config.RUN.log_directory, prefix=f"run_maml-{config.G.seed}") logger.log_params(RUN=vars(config.RUN), G=vars(config.G), Reporting=vars(config.Reporting), DEBUG=vars(config.DEBUG)) import sys print(" ".join(sys.argv)) tasks = MetaRLTasks(env_name=config.G.env_name, batch_size=config.G.n_parallel_envs, start_seed=config.G.start_seed, task_seed=config.G.task_seed, log_directory=(config.RUN.log_directory + "/{seed}") if config.G.render else None, max_steps=config.G.env_max_timesteps) test_tasks = MetaRLTasks(env_name=config.G.env_name, batch_size=config.G.n_parallel_envs, start_seed=config.G.test_start_seed, task_seed=config.G.test_task_seed, log_directory=(config.RUN.log_directory + "/{seed}") if config.G.render else None, max_steps=config.G.env_max_timesteps) if config.G.eval_test_interval \ else ExitStack() # with Dashboard(config.RUN.prefix, server=config.Reporting.plot_server, # port=config.Reporting.plot_server_port) as dash, U.single_threaded_session(), tasks, test_tasks: with U.make_session(num_cpu=config.G.n_cpu), tasks, test_tasks: # logger.on_dumpkvs(make_plot_fn(dash)) maml = E_MAML(ob_space=tasks.envs.observation_space, act_space=tasks.envs.action_space) summary = tf.summary.FileWriter(config.RUN.log_directory, tf.get_default_graph()) summary.flush() trainer = Trainer() U.initialize() trainer.train(tasks=tasks, maml=maml, test_tasks=test_tasks) # logger.clear_callback() tf.reset_default_graph()
def torch_upload(): from ml_logger import logger import numpy as np logger.configure(root_dir="http://54.71.92.65:9080", prefix="geyang/ml_logger-debug/test-1", register_experiment=True) logger.log_params(args={}) with logger.Sync(): import os import torch from pycurl import Curl from tempfile import NamedTemporaryFile logger.remove('upload/example.pt') with NamedTemporaryFile(delete=True) as f: torch.save(np.ones([10_000_000]), f) # torch.save(np.ones([1000_000]), f) logger.print(f.name) c = Curl() c.setopt(c.URL, logger.root_dir) # proxy = os.environ.get('HTTP_PROXY') # c.setopt(c.PROXY, proxy) # logger.print('proxy:', proxy) c.setopt(c.TIMEOUT, 100000) c.setopt(c.HTTPPOST, [ ('file', ( c.FORM_FILE, f.name, c.FORM_FILENAME, logger.prefix + '/upload/example.pt', c.FORM_CONTENTTYPE, 'plain/text', )), ]) c.perform() c.close() logger.print('done') # logger.remove(".") # a = np.ones([1, 1, 100_000_000 // 4]) # logger.print(f"the size of the tensor is {a.size}") # data = dict(key="ok", large=a) # logger.torch_save(data, f"save/data-{logger.now('%H.%M.%S')}.pkl") logger.print('done')
def run_e_maml(_G=None): import baselines.common.tf_util as U if _G is not None: config.G.update(_G) for k, v in [ *vars(config.RUN).items(), *vars(config.G).items(), *vars(config.Reporting).items(), *vars(config.DEBUG).items() ]: comet_logger.log_parameter(k, v) # todo: let's take the control of the log director away from the train script. It should all be set from outside. logger.configure(log_directory=config.RUN.log_dir, prefix=config.RUN.log_prefix) logger.log_params(RUN=vars(config.RUN), G=vars(config.G), Reporting=vars(config.Reporting), DEBUG=vars(config.DEBUG)) logger.log_file(__file__) tasks = MetaRLTasks(env_name=config.G.env_name, batch_size=config.G.n_parallel_envs, start_seed=config.G.start_seed, log_directory=(config.RUN.log_directory + "/{seed}") if config.G.render else None, max_steps=config.G.env_max_timesteps) # sess_config = tf.ConfigProto(log_device_placement=config.Reporting.log_device_placement) # with tf.Session(config=sess_config), tf.device('/gpu:0'), tasks: graph = tf.Graph() with graph.as_default(), U.make_session(num_cpu=config.G.n_cpu), tasks: maml = E_MAML(ob_space=tasks.envs.observation_space, act_space=tasks.envs.action_space) comet_logger.set_model_graph(tf.get_default_graph()) # writer = tf.summary.FileWriter(logdir='/opt/project/debug-graph', graph=graph) # writer.flush() # exit() trainer = Trainer() U.initialize() trainer.train(tasks=tasks, maml=maml) logger.flush() tf.reset_default_graph()
def launch(**_G): import matplotlib matplotlib.use('Agg') G.update(_G) import numpy as np np.random.seed(G.seed) t.manual_seed(G.seed) t.cuda.manual_seed(G.seed) logger.configure(log_directory=G.log_dir, prefix=G.log_prefix) logger.log_params(G=vars(G)) model = Model(**vars(G)) from playground.maml.maml_torch.tasks import Sine maml_supervised(model, Sine, **vars(G))
def thunk(*args, **kwargs): import traceback from ml_logger import logger assert not (args and ARGS), \ f"can not use position argument at both thunk creation as well as run.\n" \ f"_args: {args}\n" \ f"ARGS: {ARGS}\n" logger.configure(root_dir=RUN.server, prefix=PREFIX, register_experiment=False, max_workers=10) logger.log_params(host=dict(hostname=logger.hostname), run=dict(status="running", startTime=logger.now(), job_id=logger.job_id)) import time try: _KWARGS = {**KWARGS} _KWARGS.update(**kwargs) results = fn(*(args or ARGS), **_KWARGS) logger.log_line("========== execution is complete ==========") logger.log_params( run=dict(status="completed", completeTime=logger.now())) logger.flush() time.sleep(3) except Exception as e: tb = traceback.format_exc() with logger.SyncContext( ): # Make sure uploaded finished before termination. logger.print(tb, color="red") logger.log_text(tb, filename="traceback.err") logger.log_params( run=dict(status="error", exitTime=logger.now())) logger.flush() time.sleep(3) raise e return results
def run_maml(_G=None): if _G is not None: G.update(_G) for k, v in vars(G).items(): comet_logger.log_parameter(k, v) # todo: let's take the control of the log director away from the train script. It should all be set from outside. logger.configure(log_directory=G.log_dir, prefix=G.log_prefix) logger.log_params(G=vars(G), ) logger.log_file(__file__) tasks = MetaRLTasks(env_name=G.env_name, batch_size=G.n_parallel_envs, start_seed=G.start_seed, max_steps=G.env_max_timesteps) env = tasks.sample() print(env)
def test_store_load_configrunner_pipeline(self): logger.configure(log_directory=config.DATA_DIR, prefix=EXP_PREFIX) test_dir = os.path.join(logger.log_directory, logger.prefix) if os.path.exists(test_dir): shutil.rmtree(test_dir) keys_of_interest = ['task_name', 'estimator', 'simulator', 'n_observations', 'center_sampling_method', 'x_noise_std', 'y_noise_std', 'ndim_x', 'ndim_y', 'n_centers', "n_mc_samples", "n_x_cond", 'mean_est', 'cov_est', 'mean_sim', 'cov_sim', 'kl_divergence', 'hellinger_distance', 'js_divergence', 'x_cond', 'random_seed', "mean_sim", "cov_sim", "mean_abs_diff", "cov_abs_diff", "VaR_sim", "VaR_est", "VaR_abs_diff", "CVaR_sim", "CVaR_est", "CVaR_abs_diff", "time_to_fit"] conf_est, conf_sim, observations = question1() conf_runner = ConfigRunner(EXP_PREFIX, conf_est, conf_sim, observations=observations, keys_of_interest=keys_of_interest, n_mc_samples=1 * 10 ** 2, n_x_cond=5, n_seeds=5) conf_runner.configs = random.sample(conf_runner.configs, NUM_CONFIGS_TO_TEST) conf_runner.run_configurations(dump_models=True, multiprocessing=False) results_from_pkl_file = dict({logger.load_pkl(RESULTS_FILE)}) """ check if model dumps have all been created """ dump_dir = os.path.join(logger.log_directory, logger.prefix, 'model_dumps') model_dumps_list = os.listdir(dump_dir) # get list of all model files model_dumps_list_no_suffix = [os.path.splitext(entry)[0] for entry in model_dumps_list] # remove suffix for conf in conf_runner.configs: self.assertTrue(conf['task_name'] in model_dumps_list_no_suffix) """ check if model dumps can be used successfully""" for model_dump_i in model_dumps_list: #tf.reset_default_graph() with tf.Session(graph=tf.Graph()): model = logger.load_pkl("model_dumps/"+model_dump_i) self.assertTrue(model) if model.ndim_x == 1 and model.ndim_y == 1: self.assertTrue(model.plot3d(show=False))
def launch_training(): from playground.maml.maml_torch.maml_multi_step import maml, G np.random.seed(G.seed) t.manual_seed(G.seed) t.cuda.manual_seed(G.seed) from datetime import datetime now = datetime.now() G.log_prefix = f"{now:%Y-%m-%d}/new-maml-torch/out-of-distribution" G.n_epochs = 70000 # from cbfinn universality paper G.n_gradient_steps = 5 G.test_grad_steps = [1, 5] G.test_interval = 5 G.save_interval = 100 # save the weights ever 100 epoch. logger.configure(log_directory=G.log_dir, prefix=G.log_prefix) logger.log_params(G=vars(G)) maml(test_fn=all_tests)
def start_run(exp_name, params, runID=None): coll = get_exp_coll() if runID is None: runID = make_id(8) run_tstamp = datetime.datetime.now().strftime("%x :: %X") script_txt = ' '.join(sys.argv) run_doc = { 'runID': runID, 'params': params, 'runpath': os.path.join(LOG_ROOT, exp_name + '/' + runID), 'script': sys.argv[0], 'command': script_txt, 'timestamp': run_tstamp } _logger.configure(LOG_ROOT, prefix=exp_name + '/' + runID) coll.update_one({'expName': exp_name}, {'$push': {'runs': run_doc}}) _logger.log_params(Args=params) return _logger
def _(*args, **kwargs): import traceback from ml_logger import logger assert not (args and ARGS), f"can not use position argument at both thunk creation as well as " \ f"run.\n_args: {args}\nARGS: {ARGS}" logger.configure(log_directory=RUN.server, prefix=PREFIX, register_experiment=False, max_workers=10) logger.log_params(host=dict(hostname=logger.hostname), run=dict(status="running", startTime=logger.now())) try: _KWARGS = KWARGS.copy() _KWARGS.update(kwargs) fn(*(args or ARGS), **_KWARGS) logger.log_line("========= execution is complete ==========") logger.log_params( run=dict(status="completed", completeTime=logger.now())) except Exception as e: import time time.sleep(1) tb = traceback.format_exc() with logger.SyncContext( ): # Make sure uploaded finished before termination. logger.log_text(tb, filename="traceback.err") logger.log_params( run=dict(status="error", exitTime=logger.now())) logger.log_line(tb) logger.flush() time.sleep(30) raise e import time time.sleep(30)
from ml_logger import logger ### First configure the logger to log to a direction (or a server) logger.configure('/tmp/ml-logger-debug') # outputs ~> # logging data to /tmp/ml-logger-debug # We can log individual keys for i in range(1): logger.log(metrics={ 'some_val/smooth': 10, 'status': f"step ({i})" }, reward=20, timestep=i) ### flush the data, otherwise the value would be overwritten with new values in the next iteration. logger.flush() # outputs ~> # ╒════════════════════╤════════════════════════════╕ # │ reward │ 20 │ # ├────────────────────┼────────────────────────────┤ # │ timestep │ 0 │ # ├────────────────────┼────────────────────────────┤ # │ some val/smooth │ 10 │ # ├────────────────────┼────────────────────────────┤ # │ status │ step (0) │ # ├────────────────────┼────────────────────────────┤ # │ timestamp │'2018-11-04T11:37:03.324824'│ # ╘════════════════════╧════════════════════════════╛ for i in range(100):
def get_density_plots(estimators_list, simulators_dict, path_to_results, exp_prefix="question1_noise_reg_x", task_ids=None): """ This function allows to compare plots from estimators and simulators (i.e. fitted and true densities). Two modes are currently available: 1) by specifying estimators and simulator, the function picks one result pair randomly that matches the given simulator/estimator selection 2) by specifying the task_ids as list, it is possible to pick specific plots to compare Args: estimators: a list containing strings of estimators to be evaluated, e.g. ['KernelMixtureNetwork', 'MixtureDensityNetwork'] simulators: a dict containing specifications of a simulator under which the estimators shall be compared, e.g. {'heteroscedastic': True, 'random_seed': 20, 'std': 1, 'simulator': 'EconDensity'} path_to_results: absolute path to where the dumped model files are stored exp_prefix: specifies the task question Returns: A list of figures for fitted and true densities. """ if task_ids is not None: assert type(task_ids) == list assert len(task_ids) == len(estimators_list) RESULTS_FILE = 'results.pkl' logger.configure(path_to_results, exp_prefix) results_from_pkl_file = dict(logger.load_pkl_log(RESULTS_FILE)) gof_result = GoodnessOfFitResults( single_results_dict=results_from_pkl_file) results_df = gof_result.generate_results_dataframe( base_experiment.KEYS_OF_INTEREST) """ load model's estimators """ if task_ids is None: models_of_interest = { k: v for k, v in gof_result.single_results_dict.items() if v.probabilistic_model_params == simulators_dict and v.ndim_x + v.ndim_y == 2 } models = [ ConfigRunner.load_dumped_estimator( take_of_type(1, estimator_str, models_of_interest)) for estimator_str in estimators_list ] else: models = [ ConfigRunner.load_dumped_estimators(gof_result, task_id=task_ids) ] """ load model's simulators """ # todo: implement when simulator dumps exist figs = [] for model in models: graph = model.estimator.sess.graph sess = tf.Session(graph=graph) with sess: sess.run(tf.global_variables_initializer()) model.estimator.sess = sess """ fitted density figures""" plt.suptitle(model.estimator.name) fig_fitted = model.estimator.plot3d() figs.append(fig_fitted) """ true density figures """ # todo: use newly dumped simulators sess.close() return figs
'y_noise_std': [0.1], }, } simulators_params = {'LinearStudentT': {'ndim_x': [10]}} observations = 100 * np.logspace(2, 6, num=8, base=2.0, dtype=np.int32) return estimator_params, simulators_params, observations if __name__ == '__main__': estimator_params, simulators_params, observations = question4() load = base_experiment.launch_experiment(estimator_params, simulators_params, observations, EXP_PREFIX, n_mc_samples=N_MC_SAMPLES, tail_measures=False) if load: logger.configure(config.DATA_DIR, EXP_PREFIX) results_from_pkl_file = dict(logger.load_pkl_log(RESULTS_FILE)) gof_result = GoodnessOfFitResults( single_results_dict=results_from_pkl_file) results_df = gof_result.generate_results_dataframe( base_experiment.KEYS_OF_INTEREST) gof_result = ConfigRunner.load_dumped_estimators(gof_result)
def instr(fn, *ARGS, __file=False, __silent=False, **KWARGS): """ thunk for configuring the logger. The reason why this is not a decorator is :param fn: function to be called :param *ARGS: position arguments for the call :param __file__: console mode, by-pass file related logging :param __silent: do not print :param **KWARGS: keyword arguments for the call :return: a thunk that can be called without parameters """ from ml_logger import logger if __file: caller_script = pJoin(os.getcwd(), __file) else: launch_module = inspect.getmodule(inspect.stack()[1][0]) __file = launch_module.__file__ caller_script = abspath(__file) # note: for scripts in the `plan2vec` module this also works -- b/c we truncate fixed depth. script_path = logger.truncate(caller_script, depth=len(__file__.split('/')) - 1) file_stem = logger.stem(script_path) file_name = basename(file_stem) RUN(file_name=file_name, file_stem=file_stem, now=logger.now()) PREFIX = RUN.PREFIX # todo: there should be a better way to log these. # todo: we shouldn't need to log to the same directory, and the directory for the run shouldn't be fixed. logger.configure( root_dir=RUN.server, prefix=PREFIX, asynchronous=False, # use sync logger max_workers=4, register_experiment=False) if RUN.restart: with logger.Sync(): logger.remove(".") logger.upload_file(caller_script) # the tension is in between creation vs run. Code snapshot are shared, but runs need to be unique. _ = dict() if ARGS: _['args'] = ARGS if KWARGS: _['kwargs'] = KWARGS logger.log_params(run=logger.run_info(status="created", script_path=script_path), revision=logger.rev_info(), fn=logger.fn_info(fn), **_, silent=__silent) logger.print( 'taking diff, if this step takes too long, check if your ' 'uncommitted changes are too large.', color="green") logger.diff() if RUN.readme: logger.log_text(RUN.readme, "README.md", dedent=True) import jaynes # now set the job name to prefix if jaynes.RUN.config and jaynes.RUN.mode != "local": runner_class, runner_args = jaynes.RUN.config['runner'] if 'name' in runner_args: # ssh mode does not have 'name'. runner_args['name'] = pJoin(file_name, RUN.JOB_NAME) del logger, jaynes, runner_args, runner_class if not __file: cprint(f'Set up job name', "green") def thunk(*args, **kwargs): import traceback from ml_logger import logger assert not (args and ARGS), \ f"can not use position argument at both thunk creation as well as run.\n" \ f"_args: {args}\n" \ f"ARGS: {ARGS}\n" logger.configure(root_dir=RUN.server, prefix=PREFIX, register_experiment=False, max_workers=10) logger.log_params(host=dict(hostname=logger.hostname), run=dict(status="running", startTime=logger.now(), job_id=logger.job_id)) import time try: _KWARGS = {**KWARGS} _KWARGS.update(**kwargs) results = fn(*(args or ARGS), **_KWARGS) logger.log_line("========== execution is complete ==========") logger.log_params( run=dict(status="completed", completeTime=logger.now())) logger.flush() time.sleep(3) except Exception as e: tb = traceback.format_exc() with logger.SyncContext( ): # Make sure uploaded finished before termination. logger.print(tb, color="red") logger.log_text(tb, filename="traceback.err") logger.log_params( run=dict(status="error", exitTime=logger.now())) logger.flush() time.sleep(3) raise e return results return thunk
from cde.model_fitting.GoodnessOfFitResults import GoodnessOfFitResults from cde.evaluation.simulation_eval import base_experiment import cde.model_fitting.ConfigRunner as ConfigRunner import matplotlib.pyplot as plt import numpy as np import pandas as pd import pickle import os EXP_PREFIX = "question5_benchmark" RESULTS_FILE = "results.pkl" CLUSTER_DIR = "/home/simon/Documents/KIT/Informatik/Bachelorarbeit/Conditional_Density_Estimation/data/cluster" LOCATION = "{}/{}/{}".format(CLUSTER_DIR, EXP_PREFIX, RESULTS_FILE) logger.configure( "/home/simon/Documents/KIT/Informatik/Bachelorarbeit/Conditional_Density_Estimation/data/cluster", EXP_PREFIX, ) with open(LOCATION, "rb") as fh: results_from_pkl_file = pickle.load(fh) gof_result = GoodnessOfFitResults(single_results_dict=results_from_pkl_file) results_df = gof_result.generate_results_dataframe( base_experiment.KEYS_OF_INTEREST) results_df.replace(to_replace=[None], value="None", inplace=True) estimators = [ "MixtureDensityNetwork", "KernelMixtureNetwork", "NormalizingFlowEstimator", ] simulators = ["ArmaJump", "EconDensity", "GaussianMixture", "SkewNormal"]
def continue_run(exp_name, runID): _logger.configure(LOG_ROOT, prefix=exp_name + '/' + runID) return _logger
def test_configuration(log_dir): logger.configure(log_dir, prefix='main_test_script', color='green') logger.log("This is a unittest") logger.log("Some stats", reward=0.05, kl=0.001) logger.flush()
from ml_logger import logger from dmc_gen_analysis import RUN for i in range(9): logger.configure( root_dir=RUN.server, prefix=f"/geyang/dmc_gen/2021/03-05/00_setup/train/01.17.36/{i}") logger.log_text(""" keys: - Args.seed - Args.algorithm charts: - yKey: episode_reward/mean xKey: step - yKey: train/episode_reward/mean xKey: step """, filename=".charts.yml", overwrite=True)
from cde.model_fitting.GoodnessOfFitResults import GoodnessOfFitResults from cde.evaluation.simulation_eval import base_experiment import cde.model_fitting.ConfigRunner as ConfigRunner import matplotlib.pyplot as plt import os EXP_PREFIX = "question6_noise_schedules" RESULTS_FILE = "results.pkl" CLUSTER_DIR = "/local/rojonas/cde/data/local" LOCATION = "{}/{}/{}".format(CLUSTER_DIR, EXP_PREFIX, RESULTS_FILE) DATA_DIR_LOCAL = "/home/jonasrothfuss/Dropbox/Eigene_Dateien/ETH/02_Projects/02_Noise_Regularization/02_Code_Conditional_Density_Estimation/data/cluster" logger.configure( #"/local/rojonas/cde/data/local", DATA_DIR_LOCAL, EXP_PREFIX, ) results_from_pkl_file = dict(logger.load_pkl_log(RESULTS_FILE)) gof_result = GoodnessOfFitResults(single_results_dict=results_from_pkl_file) results_df = gof_result.generate_results_dataframe(base_experiment.KEYS_OF_INTEREST_LOGPROB) results_df.replace(to_replace=[None], value="None", inplace=True) estimators = [ "MixtureDensityNetwork", "KernelMixtureNetwork", "NormalizingFlowEstimator" ] simulators = ["EconDensity", "GaussianMixture", "SkewNormal"]