Example #1
0
    chart = top_chart & bottom_chart
    # put a timestamp
    ts = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    chart = chart.properties(title=f'{trial_id} {ts}')

    file_name = os.path.join(directory, 'vegalite',
                             f'{trial_id}_view_compare.html')
    logging.info(f'writing comparison visualisation to {file_name}')
    chart.save(file_name)


if __name__ == "__main__":

    # an entry point to let us compare across two experiments, particularly two verde sets
    # Note that the vis will still refer to baseline and verde.
    logging = vutils.configure_logger('compare.log', logging.DEBUG)
    _trial_id = 'trial_01.verde_verde_exp_01_02'
    _directory = '../laboratory/trial_01'

    with open(f'{_directory}/trial_01.exp_01_verde_results.json') as f:
        _baseline_results = json.load(f)

    with open(f'{_directory}/trial_01.exp_02_verde_results.json') as f:
        _verde_results = json.load(f)

    # but in this case the match won't work because the data url will be different,
    # so we trust you know what you are doing and we will copy across the data url from
    # one set to the other.

    data_url = _baseline_results[0]['vl']['data']['url']
    for result in _verde_results:
Example #2
0
    SpearmanCorrelationEliminator,
)

if __name__ == "__main__":
    sys.path.append("./")

    pool = cp.cuda.MemoryPool(cp.cuda.malloc_managed)
    cp.cuda.set_allocator(pool.malloc)

    warnings.filterwarnings("ignore")

    parser = get_preprocess_parser()
    args = parser.parse_args()

    config = load_config(args.config)
    configure_logger(args.config, log_dir=args.log_dir, debug=args.debug)

    seed_everything(config["seed_everything"])

    logging.info(f"config: {args.config}")
    logging.info(f"debug: {args.debug}")

    config["args"] = dict()
    config["args"]["config"] = args.config

    # make output dir
    output_root_dir = Path(config["output_dir"])
    feature_dir = Path(config["dataset"]["feature_dir"])

    config_name = args.config.split("/")[-1].replace(".yml", "")
    output_dir = output_root_dir / config_name
Example #3
0
    figure_name = figure_name.replace("=", "").replace(":", "").replace("_", "").replace(".", "") \
        .replace("/", "_").replace("$", "").replace("\\", "")
    path = os.path.join(dir_name, figure_name + ".pdf")

    if not os.path.exists(os.path.dirname(path)):
        os.system("mkdir -p " + os.path.dirname(path))

    logger.info('Figure saved to: ' + path)

    fig = plt.gcf()
    fig.savefig(path, bbox_inches='tight', transparent=True, pad_inches=0)

    if copy_to_dropbox:
        # / because it is a dropbox app with own folder
        transferData.upload_file(path, os.path.join("/", PROJECT_NAME, path))

    if copy_to_neptune:
        neptune_exp = get_neptune_exp()
        neptune_exp.send_image(figure_name, fig)


if __name__ == "__main__":
    configure_logger('')
    configure_neptune_exp('tst')
    plt.plot([1, 2, 3], [1, 2, 4])
    save_fig("examples",
             "qudratic.pdf",
             copy_to_dropbox=True,
             copy_to_neptune=True)
    plt.show()
    plt.close()
Example #4
0
* Each script saves to the save_dir FINISHED when done
"""

print("Remember to multiple your number of jobs by factor of threads per cpu")

import os
import time
import numpy as np
from os import path
import pandas as pd
import argh
import subprocess
import logging

from src.utils import configure_logger
configure_logger('', log_file=None)
RESULTS_DIR = os.environ.get("RESULTS_DIR", os.path.join(os.path.dirname(__file__), "results"))

logger = logging.getLogger(__name__)


def get_next_free_gpu():
    for i in range(10):
        try:
            output = subprocess.check_output(['nvidia-smi', '-i', str(i)]).decode("utf-8")
        except:
            logger.warning("Failed nvidia-smi {}".format(i))
            output = ""

        if output == "No devices were found":
            return None
Example #5
0
def train(args):

    # create necessary dirs if not exists
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    # init logger
    configure_logger(args.save_dir, args.log_name)
    logging.info("Got arguments: %s" % args.__dict__)

    logging.info("Getting Dataset...")
    data = preprocess_dataset(get_datasets(args.data_dir, "formal"),
                              get_datasets(args.data_dir,
                                           "informal"), args.embedding_path,
                              args.embedding_size, args.limit_evals)

    # extract train data
    X, X_len, Y = (data["train"][v]
                   for v in ("sentences", "sentence_lengths", "labels"))
    logging.info(
        "Found train/tune/test inputs of dims %s/%s/%s, embedder of dims %s" %
        (X.shape, data["tune"]["sentences"].shape,
         data["test"]["sentences"].shape, data["embedding"].shape))

    if args.debug:
        logging.info(
            "Entering debug mode: training for a maximum of 2 batches")

    # shuffle and batch
    trn_X, trn_X_len, trn_Y, trn_batches = shuffle_and_batch(
        data, "train", args.batch_size)
    tun_X, tun_X_len, tun_Y, tun_batches = shuffle_and_batch(
        data, "tune", args.batch_size)
    tst_X, tst_X_len, tst_Y, tst_batches = shuffle_and_batch(
        data, "test", args.batch_size)

    logging.info("Building Model and Saver...")

    # build model
    model = GruClassifier(deep_dict_defaults(
        {
            'vocab_size': data["embedding"].shape[0],
            'embedding_size': data["embedding"].shape[1],
            'max_time': trn_X.shape[0],
        }, args.__dict__),
                          mode='train')
    logging.info("Created model %s" % model)

    # keep a global step counter, and build saver
    gstep = tf.Variable(-1,
                        name="global_step",
                        trainable=False,
                        dtype=tf.int32)
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=args.epoches)

    logging.info("Starting TensorFlow session...")
    with tf.Session() as sess:

        # restore or init variables
        if args.restore:
            try:
                ghistory = pd.read_pickle(
                    os.path.join(args.save_dir, args.metrics_name))
                saver.restore(sess, tf.train.latest_checkpoint(args.save_dir))
            except (ValueError, FileNotFoundError):
                logging.error("Failed to restore variables or metrics.")
                raise
        else:
            ghistory = pd.DataFrame(columns=("data", "epoch", "batch",
                                             "metric", "value"))
            sess.run(tf.global_variables_initializer())

        try:
            for epoch_num in range(gstep.eval() + 1, args.epoches):
                _ = sess.run(tf.assign(gstep, epoch_num))

                # evaluate train set
                eval_step(sess,
                          model,
                          saver,
                          ghistory,
                          gstep,
                          data["embedding"],
                          trn_X,
                          trn_X_len,
                          trn_Y,
                          trn_batches,
                          "train",
                          args.debug,
                          update=True)

                # evaluate tune dataset
                if epoch_num % args.save_step == 0:
                    eval_step(sess,
                              model,
                              saver,
                              ghistory,
                              gstep,
                              data["embedding"],
                              tun_X,
                              tun_X_len,
                              tun_Y,
                              tun_batches,
                              "tune",
                              args.debug,
                              save=True)
        except KeyboardInterrupt:
            pass

        # evaluate test dataset
        eval_step(sess,
                  model,
                  saver,
                  ghistory,
                  gstep,
                  data["embedding"],
                  tst_X,
                  tst_X_len,
                  tst_Y,
                  tst_batches,
                  "test",
                  args.debug,
                  save=True)

    logging.info('#' * 10 + "Training is COMPLETE!" + '#' * 10)
Example #6
0
import numpy as np

from src.utils import configure_logger

DATA_DIR = os.environ.get("DATA_DIR", './data')
LOGS_DIR = os.environ.get("LOGS_DIR", './logs')
CONFIGS_DIR = os.environ.get(
    "CONFIG_DIR", os.path.join(os.environ['PROJECT_ROOT'], "configs"))
N_JOBS = int(os.environ.get("N_JOBS", -1))

if N_JOBS == -1:
    N_JOBS = multiprocessing.cpu_count()

if int(os.environ.get("DEBUG", 0)) >= 1:
    LOG_LEVEL = logging.DEBUG
else:
    LOG_LEVEL = logging.INFO

configure_logger(name='', console_logging_level=LOG_LEVEL, logs_dir=LOGS_DIR)
logger = logging.getLogger(__name__)


def set_random_seed():
    seed = int(os.environ.get('RANDOM_SEED', 0))
    logger.info(f"Setting random seed to {seed}")
    np.random.seed(seed)
    random.seed(seed)


set_random_seed()
Example #7
0
def run(opt):
    torch.set_printoptions(precision=8, sci_mode=False)
    opt = augment_options(opt)
    configure_logger(opt)
    check_options_are_valid(opt)

    rlog.info(f"\n{config_to_string(opt)}")

    # configure the environment
    env = wrap_env(gym.make(opt.game), opt)

    # configure estimator and policy
    if hasattr(opt.estimator, 'categorical'):
        _s = opt.estimator.categorical.support
        support = [_s.min, _s.max, _s.bin_no]
        estimator = MiniGridFF(
            opt.er.hist_len * 3,
            env.action_space.n,
            hidden_size=opt.estimator.lin_size,
            support=support,
        ).cuda()
    elif opt.estimator.ff:
        estimator = MiniGridFF(
            opt.er.hist_len * 3,
            env.action_space.n,
            hidden_size=opt.estimator.lin_size,
        ).cuda()
    else:
        estimator = MiniGridNet(
            opt.er.hist_len * 3,
            env.action_space.n,
            hidden_size=opt.estimator.lin_size,
        ).cuda()

    if hasattr(opt.estimator, "ensemble"):
        # Build Bootstrapped Ensembles objects
        estimator = BootstrappedEstimator(estimator,
                                          **opt.estimator.ensemble.__dict__)
        policy_evaluation = BootstrappedPE(estimator,
                                           env.action_space.n,
                                           opt.exploration.__dict__,
                                           vote=True)
        if hasattr(opt.estimator, 'categorical'):
            policy_improvement = BootstrappedPI(
                wt.CategoricalPolicyImprovement(
                    estimator,
                    optim.Adam(estimator.parameters(), lr=opt.lr, eps=1e-4),
                    opt.gamma,
                ),
                categorical=True)
        else:
            policy_improvement = BootstrappedPI(
                wt.DQNPolicyImprovement(
                    estimator,
                    optim.Adam(estimator.parameters(), lr=opt.lr, eps=1e-4),
                    opt.gamma,
                    is_double=opt.double,
                ))
    elif hasattr(opt.estimator, "dropout"):
        # Build Variational Dropout objects
        estimator = MiniGridDropnet(
            opt.er.hist_len * 3,
            env.action_space.n,
            hidden_size=opt.estimator.lin_size,
            p=opt.estimator.dropout,
            mc_samples=opt.estimator.mc_samples,
        ).cuda()
        policy_evaluation = DropPE(
            estimator,
            env.action_space.n,
            epsilon=opt.exploration.__dict__,
            thompson=opt.estimator.thompson,
        )
        policy_improvement = DropPI(
            estimator,
            optim.Adam(estimator.parameters(), lr=opt.lr, eps=1e-4),
            opt.gamma,
            is_double=opt.double,
        )
    elif hasattr(opt.estimator, "categorical"):
        policy_evaluation = wt.EpsilonGreedyPolicy(
            estimator, env.action_space.n, epsilon=opt.exploration.__dict__)
        policy_improvement = wt.CategoricalPolicyImprovement(
            estimator,
            optim.Adam(estimator.parameters(), lr=opt.lr, eps=1e-4),
            opt.gamma,
        )
    else:
        policy_evaluation = wt.EpsilonGreedyPolicy(
            estimator, env.action_space.n, epsilon=opt.exploration.__dict__)
        policy_improvement = wt.DQNPolicyImprovement(
            estimator,
            optim.Adam(estimator.parameters(), lr=opt.lr, eps=1e-4),
            opt.gamma,
            is_double=opt.double,
        )

    policy = DQNPolicy(
        policy_evaluation,
        policy_improvement,
        wt.ExperienceReplay(**opt.er.__dict__)(),
        priority=opt.er.priority,
    )

    # additionally info
    rlog.info(policy)
    rlog.info(estimator)

    # start training
    policy_iteration(env, policy, opt)