chart = top_chart & bottom_chart # put a timestamp ts = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") chart = chart.properties(title=f'{trial_id} {ts}') file_name = os.path.join(directory, 'vegalite', f'{trial_id}_view_compare.html') logging.info(f'writing comparison visualisation to {file_name}') chart.save(file_name) if __name__ == "__main__": # an entry point to let us compare across two experiments, particularly two verde sets # Note that the vis will still refer to baseline and verde. logging = vutils.configure_logger('compare.log', logging.DEBUG) _trial_id = 'trial_01.verde_verde_exp_01_02' _directory = '../laboratory/trial_01' with open(f'{_directory}/trial_01.exp_01_verde_results.json') as f: _baseline_results = json.load(f) with open(f'{_directory}/trial_01.exp_02_verde_results.json') as f: _verde_results = json.load(f) # but in this case the match won't work because the data url will be different, # so we trust you know what you are doing and we will copy across the data url from # one set to the other. data_url = _baseline_results[0]['vl']['data']['url'] for result in _verde_results:
SpearmanCorrelationEliminator, ) if __name__ == "__main__": sys.path.append("./") pool = cp.cuda.MemoryPool(cp.cuda.malloc_managed) cp.cuda.set_allocator(pool.malloc) warnings.filterwarnings("ignore") parser = get_preprocess_parser() args = parser.parse_args() config = load_config(args.config) configure_logger(args.config, log_dir=args.log_dir, debug=args.debug) seed_everything(config["seed_everything"]) logging.info(f"config: {args.config}") logging.info(f"debug: {args.debug}") config["args"] = dict() config["args"]["config"] = args.config # make output dir output_root_dir = Path(config["output_dir"]) feature_dir = Path(config["dataset"]["feature_dir"]) config_name = args.config.split("/")[-1].replace(".yml", "") output_dir = output_root_dir / config_name
figure_name = figure_name.replace("=", "").replace(":", "").replace("_", "").replace(".", "") \ .replace("/", "_").replace("$", "").replace("\\", "") path = os.path.join(dir_name, figure_name + ".pdf") if not os.path.exists(os.path.dirname(path)): os.system("mkdir -p " + os.path.dirname(path)) logger.info('Figure saved to: ' + path) fig = plt.gcf() fig.savefig(path, bbox_inches='tight', transparent=True, pad_inches=0) if copy_to_dropbox: # / because it is a dropbox app with own folder transferData.upload_file(path, os.path.join("/", PROJECT_NAME, path)) if copy_to_neptune: neptune_exp = get_neptune_exp() neptune_exp.send_image(figure_name, fig) if __name__ == "__main__": configure_logger('') configure_neptune_exp('tst') plt.plot([1, 2, 3], [1, 2, 4]) save_fig("examples", "qudratic.pdf", copy_to_dropbox=True, copy_to_neptune=True) plt.show() plt.close()
* Each script saves to the save_dir FINISHED when done """ print("Remember to multiple your number of jobs by factor of threads per cpu") import os import time import numpy as np from os import path import pandas as pd import argh import subprocess import logging from src.utils import configure_logger configure_logger('', log_file=None) RESULTS_DIR = os.environ.get("RESULTS_DIR", os.path.join(os.path.dirname(__file__), "results")) logger = logging.getLogger(__name__) def get_next_free_gpu(): for i in range(10): try: output = subprocess.check_output(['nvidia-smi', '-i', str(i)]).decode("utf-8") except: logger.warning("Failed nvidia-smi {}".format(i)) output = "" if output == "No devices were found": return None
def train(args): # create necessary dirs if not exists if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # init logger configure_logger(args.save_dir, args.log_name) logging.info("Got arguments: %s" % args.__dict__) logging.info("Getting Dataset...") data = preprocess_dataset(get_datasets(args.data_dir, "formal"), get_datasets(args.data_dir, "informal"), args.embedding_path, args.embedding_size, args.limit_evals) # extract train data X, X_len, Y = (data["train"][v] for v in ("sentences", "sentence_lengths", "labels")) logging.info( "Found train/tune/test inputs of dims %s/%s/%s, embedder of dims %s" % (X.shape, data["tune"]["sentences"].shape, data["test"]["sentences"].shape, data["embedding"].shape)) if args.debug: logging.info( "Entering debug mode: training for a maximum of 2 batches") # shuffle and batch trn_X, trn_X_len, trn_Y, trn_batches = shuffle_and_batch( data, "train", args.batch_size) tun_X, tun_X_len, tun_Y, tun_batches = shuffle_and_batch( data, "tune", args.batch_size) tst_X, tst_X_len, tst_Y, tst_batches = shuffle_and_batch( data, "test", args.batch_size) logging.info("Building Model and Saver...") # build model model = GruClassifier(deep_dict_defaults( { 'vocab_size': data["embedding"].shape[0], 'embedding_size': data["embedding"].shape[1], 'max_time': trn_X.shape[0], }, args.__dict__), mode='train') logging.info("Created model %s" % model) # keep a global step counter, and build saver gstep = tf.Variable(-1, name="global_step", trainable=False, dtype=tf.int32) saver = tf.train.Saver(tf.global_variables(), max_to_keep=args.epoches) logging.info("Starting TensorFlow session...") with tf.Session() as sess: # restore or init variables if args.restore: try: ghistory = pd.read_pickle( os.path.join(args.save_dir, args.metrics_name)) saver.restore(sess, tf.train.latest_checkpoint(args.save_dir)) except (ValueError, FileNotFoundError): logging.error("Failed to restore variables or metrics.") raise else: ghistory = pd.DataFrame(columns=("data", "epoch", "batch", "metric", "value")) sess.run(tf.global_variables_initializer()) try: for epoch_num in range(gstep.eval() + 1, args.epoches): _ = sess.run(tf.assign(gstep, epoch_num)) # evaluate train set eval_step(sess, model, saver, ghistory, gstep, data["embedding"], trn_X, trn_X_len, trn_Y, trn_batches, "train", args.debug, update=True) # evaluate tune dataset if epoch_num % args.save_step == 0: eval_step(sess, model, saver, ghistory, gstep, data["embedding"], tun_X, tun_X_len, tun_Y, tun_batches, "tune", args.debug, save=True) except KeyboardInterrupt: pass # evaluate test dataset eval_step(sess, model, saver, ghistory, gstep, data["embedding"], tst_X, tst_X_len, tst_Y, tst_batches, "test", args.debug, save=True) logging.info('#' * 10 + "Training is COMPLETE!" + '#' * 10)
import numpy as np from src.utils import configure_logger DATA_DIR = os.environ.get("DATA_DIR", './data') LOGS_DIR = os.environ.get("LOGS_DIR", './logs') CONFIGS_DIR = os.environ.get( "CONFIG_DIR", os.path.join(os.environ['PROJECT_ROOT'], "configs")) N_JOBS = int(os.environ.get("N_JOBS", -1)) if N_JOBS == -1: N_JOBS = multiprocessing.cpu_count() if int(os.environ.get("DEBUG", 0)) >= 1: LOG_LEVEL = logging.DEBUG else: LOG_LEVEL = logging.INFO configure_logger(name='', console_logging_level=LOG_LEVEL, logs_dir=LOGS_DIR) logger = logging.getLogger(__name__) def set_random_seed(): seed = int(os.environ.get('RANDOM_SEED', 0)) logger.info(f"Setting random seed to {seed}") np.random.seed(seed) random.seed(seed) set_random_seed()
def run(opt): torch.set_printoptions(precision=8, sci_mode=False) opt = augment_options(opt) configure_logger(opt) check_options_are_valid(opt) rlog.info(f"\n{config_to_string(opt)}") # configure the environment env = wrap_env(gym.make(opt.game), opt) # configure estimator and policy if hasattr(opt.estimator, 'categorical'): _s = opt.estimator.categorical.support support = [_s.min, _s.max, _s.bin_no] estimator = MiniGridFF( opt.er.hist_len * 3, env.action_space.n, hidden_size=opt.estimator.lin_size, support=support, ).cuda() elif opt.estimator.ff: estimator = MiniGridFF( opt.er.hist_len * 3, env.action_space.n, hidden_size=opt.estimator.lin_size, ).cuda() else: estimator = MiniGridNet( opt.er.hist_len * 3, env.action_space.n, hidden_size=opt.estimator.lin_size, ).cuda() if hasattr(opt.estimator, "ensemble"): # Build Bootstrapped Ensembles objects estimator = BootstrappedEstimator(estimator, **opt.estimator.ensemble.__dict__) policy_evaluation = BootstrappedPE(estimator, env.action_space.n, opt.exploration.__dict__, vote=True) if hasattr(opt.estimator, 'categorical'): policy_improvement = BootstrappedPI( wt.CategoricalPolicyImprovement( estimator, optim.Adam(estimator.parameters(), lr=opt.lr, eps=1e-4), opt.gamma, ), categorical=True) else: policy_improvement = BootstrappedPI( wt.DQNPolicyImprovement( estimator, optim.Adam(estimator.parameters(), lr=opt.lr, eps=1e-4), opt.gamma, is_double=opt.double, )) elif hasattr(opt.estimator, "dropout"): # Build Variational Dropout objects estimator = MiniGridDropnet( opt.er.hist_len * 3, env.action_space.n, hidden_size=opt.estimator.lin_size, p=opt.estimator.dropout, mc_samples=opt.estimator.mc_samples, ).cuda() policy_evaluation = DropPE( estimator, env.action_space.n, epsilon=opt.exploration.__dict__, thompson=opt.estimator.thompson, ) policy_improvement = DropPI( estimator, optim.Adam(estimator.parameters(), lr=opt.lr, eps=1e-4), opt.gamma, is_double=opt.double, ) elif hasattr(opt.estimator, "categorical"): policy_evaluation = wt.EpsilonGreedyPolicy( estimator, env.action_space.n, epsilon=opt.exploration.__dict__) policy_improvement = wt.CategoricalPolicyImprovement( estimator, optim.Adam(estimator.parameters(), lr=opt.lr, eps=1e-4), opt.gamma, ) else: policy_evaluation = wt.EpsilonGreedyPolicy( estimator, env.action_space.n, epsilon=opt.exploration.__dict__) policy_improvement = wt.DQNPolicyImprovement( estimator, optim.Adam(estimator.parameters(), lr=opt.lr, eps=1e-4), opt.gamma, is_double=opt.double, ) policy = DQNPolicy( policy_evaluation, policy_improvement, wt.ExperienceReplay(**opt.er.__dict__)(), priority=opt.er.priority, ) # additionally info rlog.info(policy) rlog.info(estimator) # start training policy_iteration(env, policy, opt)