def initial_setup(args: config.Params, cl_args: argparse.Namespace) -> (config.Params, int): """Perform setup steps: 1. create project, exp, and run dirs if they don't already exist 2. create log formatter 3. configure GCP remote logging 4. set up email notifier 5. log git info 6. write the config out to file 7. log diff between default and experiment's configs 8. choose torch's and random's random seed 9. if config specifies a single GPU, then set the GPU's random seed (doesn't cover multi-GPU) 10. resolve "auto" settings for tokenizer and pool_type parameters Parameters ---------- args : config.Params config map cl_args : argparse.Namespace mapping named arguments to parsed values Returns ------- args : config.Params config map seed : int random's and pytorch's random seed """ output = io.StringIO() maybe_make_dir(args.project_dir) # e.g. /nfs/jsalt/exp/$HOSTNAME maybe_make_dir(args.exp_dir) # e.g. <project_dir>/jiant-demo maybe_make_dir(args.run_dir) # e.g. <project_dir>/jiant-demo/sst log_fh = log.FileHandler(args.local_log_path) log_fmt = log.Formatter("%(asctime)s: %(message)s", datefmt="%m/%d %I:%M:%S %p") log_fh.setFormatter(log_fmt) log.getLogger().addHandler(log_fh) if cl_args.remote_log: from jiant.utils import gcp gcp.configure_remote_logging(args.remote_log_name) if cl_args.notify: from jiant.utils import emails global EMAIL_NOTIFIER log.info("Registering email notifier for %s", cl_args.notify) EMAIL_NOTIFIER = emails.get_notifier(cl_args.notify, args) if EMAIL_NOTIFIER: EMAIL_NOTIFIER(body="Starting run.", prefix="") _log_git_info() config_file = os.path.join(args.run_dir, "params.conf") config.write_params(args, config_file) print_args = select_relevant_print_args(args) log.info("Parsed args: \n%s", print_args) log.info("Saved config to %s", config_file) seed = random.randint(1, 10000) if args.random_seed < 0 else args.random_seed random.seed(seed) torch.manual_seed(seed) log.info("Using random seed %d", seed) if isinstance(args.cuda, int) and args.cuda >= 0: # If only running on one GPU. try: if not torch.cuda.is_available(): raise EnvironmentError("CUDA is not available, or not detected" " by PyTorch.") log.info("Using GPU %d", args.cuda) torch.cuda.set_device(args.cuda) torch.cuda.manual_seed_all(seed) except Exception: log.warning( "GPU access failed. You might be using a CPU-only installation of PyTorch. " "Falling back to CPU." ) args.cuda = -1 if args.tokenizer == "auto": args.tokenizer = tokenizers.select_tokenizer(args) if args.pool_type == "auto": args.pool_type = select_pool_type(args) return args, seed
def initial_setup(args, cl_args): """ Sets up email hook, creating seed, and cuda settings. Parameters ---------------- args: Params object cl_args: list of arguments Returns ---------------- tasks: list of Task objects pretrain_tasks: list of pretraining tasks target_tasks: list of target tasks vocab: list of vocab word_embs: loaded word embeddings, may be None if args.input_module in {gpt, elmo, elmo-chars-only, bert-*} model: a MultiTaskModel object """ output = io.StringIO() maybe_make_dir(args.project_dir) # e.g. /nfs/jsalt/exp/$HOSTNAME maybe_make_dir(args.exp_dir) # e.g. <project_dir>/jiant-demo maybe_make_dir(args.run_dir) # e.g. <project_dir>/jiant-demo/sst log_fh = log.FileHandler(args.local_log_path) log_fmt = log.Formatter("%(asctime)s: %(message)s", datefmt="%m/%d %I:%M:%S %p") log_fh.setFormatter(log_fmt) log.getLogger().addHandler(log_fh) if cl_args.remote_log: from jiant.utils import gcp gcp.configure_remote_logging(args.remote_log_name) if cl_args.notify: from jiant.utils import emails global EMAIL_NOTIFIER log.info("Registering email notifier for %s", cl_args.notify) EMAIL_NOTIFIER = emails.get_notifier(cl_args.notify, args) if EMAIL_NOTIFIER: EMAIL_NOTIFIER(body="Starting run.", prefix="") _log_git_info() config_file = os.path.join(args.run_dir, "params.conf") config.write_params(args, config_file) print_args = select_relevant_print_args(args) log.info("Parsed args: \n%s", print_args) log.info("Saved config to %s", config_file) seed = random.randint(1, 10000) if args.random_seed < 0 else args.random_seed random.seed(seed) torch.manual_seed(seed) log.info("Using random seed %d", seed) if args.cuda >= 0: try: if not torch.cuda.is_available(): raise EnvironmentError("CUDA is not available, or not detected" " by PyTorch.") log.info("Using GPU %d", args.cuda) torch.cuda.set_device(args.cuda) torch.cuda.manual_seed_all(seed) except Exception: log.warning( "GPU access failed. You might be using a CPU-only installation of PyTorch. " "Falling back to CPU.") args.cuda = -1 if args.tokenizer == "auto": args.tokenizer = tokenizers.select_tokenizer(args) if args.pool_type == "auto": args.pool_type = select_pool_type(args) return args, seed
def main(cl_arguments): """ Run REPL for a CoLA model """ # Arguments handling # cl_args = handle_arguments(cl_arguments) args = config.params_from_file(cl_args.config_file, cl_args.overrides) check_arg_name(args) assert args.target_tasks == "cola", "Currently only supporting CoLA. ({})".format( args.target_tasks) if args.cuda >= 0: try: if not torch.cuda.is_available(): raise EnvironmentError("CUDA is not available, or not detected" " by PyTorch.") log.info("Using GPU %d", args.cuda) torch.cuda.set_device(args.cuda) except Exception: log.warning("GPU access failed. You might be using a CPU-only" " installation of PyTorch. Falling back to CPU.") args.cuda = -1 if args.tokenizer == "auto": args.tokenizer = select_tokenizer(args) if args.pool_type == "auto": args.pool_type = select_pool_type(args) # Prepare data # _, target_tasks, vocab, word_embs = build_tasks(args) tasks = sorted(set(target_tasks), key=lambda x: x.name) # Build or load model # cuda_device = parse_cuda_list_arg(args.cuda) model = build_model(args, vocab, word_embs, tasks, cuda_device) log.info("Loading existing model from %s...", cl_args.model_file_path) load_model_state(model, cl_args.model_file_path, args.cuda, [], strict=False) # Inference Setup # model.eval() vocab = Vocabulary.from_files(os.path.join(args.exp_dir, "vocab")) indexers = build_indexers(args) task = take_one(tasks) model_preprocessing_interface = ModelPreprocessingInterface(args) # Run Inference # if cl_args.inference_mode == "repl": assert cl_args.input_path is None assert cl_args.output_path is None print("Running REPL for task: {}".format(task.name)) run_repl(model, model_preprocessing_interface, vocab, indexers, task, args) elif cl_args.inference_mode == "corpus": run_corpus_inference( model, model_preprocessing_interface, vocab, indexers, task, args, cl_args.input_path, cl_args.input_format, cl_args.output_path, cl_args.eval_output_path, ) else: raise KeyError(cl_args.inference_mode)