def initial_setup(args: config.Params, cl_args: argparse.Namespace) -> (config.Params, int):
    """Perform setup steps:

    1. create project, exp, and run dirs if they don't already exist
    2. create log formatter
    3. configure GCP remote logging
    4. set up email notifier
    5. log git info
    6. write the config out to file
    7. log diff between default and experiment's configs
    8. choose torch's and random's random seed
    9. if config specifies a single GPU, then set the GPU's random seed (doesn't cover multi-GPU)
    10. resolve "auto" settings for tokenizer and pool_type parameters

    Parameters
    ----------
    args : config.Params
        config map
    cl_args : argparse.Namespace
        mapping named arguments to parsed values

    Returns
    -------
    args : config.Params
        config map
    seed : int
        random's and pytorch's random seed

    """
    output = io.StringIO()
    maybe_make_dir(args.project_dir)  # e.g. /nfs/jsalt/exp/$HOSTNAME
    maybe_make_dir(args.exp_dir)  # e.g. <project_dir>/jiant-demo
    maybe_make_dir(args.run_dir)  # e.g. <project_dir>/jiant-demo/sst
    log_fh = log.FileHandler(args.local_log_path)
    log_fmt = log.Formatter("%(asctime)s: %(message)s", datefmt="%m/%d %I:%M:%S %p")
    log_fh.setFormatter(log_fmt)
    log.getLogger().addHandler(log_fh)

    if cl_args.remote_log:
        from jiant.utils import gcp

        gcp.configure_remote_logging(args.remote_log_name)

    if cl_args.notify:
        from jiant.utils import emails

        global EMAIL_NOTIFIER
        log.info("Registering email notifier for %s", cl_args.notify)
        EMAIL_NOTIFIER = emails.get_notifier(cl_args.notify, args)

    if EMAIL_NOTIFIER:
        EMAIL_NOTIFIER(body="Starting run.", prefix="")

    _log_git_info()
    config_file = os.path.join(args.run_dir, "params.conf")
    config.write_params(args, config_file)

    print_args = select_relevant_print_args(args)
    log.info("Parsed args: \n%s", print_args)

    log.info("Saved config to %s", config_file)

    seed = random.randint(1, 10000) if args.random_seed < 0 else args.random_seed
    random.seed(seed)
    torch.manual_seed(seed)
    log.info("Using random seed %d", seed)
    if isinstance(args.cuda, int) and args.cuda >= 0:
        # If only running on one GPU.
        try:
            if not torch.cuda.is_available():
                raise EnvironmentError("CUDA is not available, or not detected" " by PyTorch.")
            log.info("Using GPU %d", args.cuda)
            torch.cuda.set_device(args.cuda)
            torch.cuda.manual_seed_all(seed)
        except Exception:
            log.warning(
                "GPU access failed. You might be using a CPU-only installation of PyTorch. "
                "Falling back to CPU."
            )
            args.cuda = -1

    if args.tokenizer == "auto":
        args.tokenizer = tokenizers.select_tokenizer(args)
    if args.pool_type == "auto":
        args.pool_type = select_pool_type(args)

    return args, seed
def initial_setup(args, cl_args):
    """
    Sets up email hook, creating seed, and cuda settings.

    Parameters
    ----------------
    args: Params object
    cl_args: list of arguments

    Returns
    ----------------
    tasks: list of Task objects
    pretrain_tasks: list of pretraining tasks
    target_tasks: list of target tasks
    vocab: list of vocab
    word_embs: loaded word embeddings, may be None if args.input_module in
    {gpt, elmo, elmo-chars-only, bert-*}
    model: a MultiTaskModel object
    """
    output = io.StringIO()
    maybe_make_dir(args.project_dir)  # e.g. /nfs/jsalt/exp/$HOSTNAME
    maybe_make_dir(args.exp_dir)  # e.g. <project_dir>/jiant-demo
    maybe_make_dir(args.run_dir)  # e.g. <project_dir>/jiant-demo/sst
    log_fh = log.FileHandler(args.local_log_path)
    log_fmt = log.Formatter("%(asctime)s: %(message)s",
                            datefmt="%m/%d %I:%M:%S %p")
    log_fh.setFormatter(log_fmt)
    log.getLogger().addHandler(log_fh)

    if cl_args.remote_log:
        from jiant.utils import gcp

        gcp.configure_remote_logging(args.remote_log_name)

    if cl_args.notify:
        from jiant.utils import emails

        global EMAIL_NOTIFIER
        log.info("Registering email notifier for %s", cl_args.notify)
        EMAIL_NOTIFIER = emails.get_notifier(cl_args.notify, args)

    if EMAIL_NOTIFIER:
        EMAIL_NOTIFIER(body="Starting run.", prefix="")

    _log_git_info()
    config_file = os.path.join(args.run_dir, "params.conf")
    config.write_params(args, config_file)

    print_args = select_relevant_print_args(args)
    log.info("Parsed args: \n%s", print_args)

    log.info("Saved config to %s", config_file)

    seed = random.randint(1,
                          10000) if args.random_seed < 0 else args.random_seed
    random.seed(seed)
    torch.manual_seed(seed)
    log.info("Using random seed %d", seed)
    if args.cuda >= 0:
        try:
            if not torch.cuda.is_available():
                raise EnvironmentError("CUDA is not available, or not detected"
                                       " by PyTorch.")
            log.info("Using GPU %d", args.cuda)
            torch.cuda.set_device(args.cuda)
            torch.cuda.manual_seed_all(seed)
        except Exception:
            log.warning(
                "GPU access failed. You might be using a CPU-only installation of PyTorch. "
                "Falling back to CPU.")
            args.cuda = -1

    if args.tokenizer == "auto":
        args.tokenizer = tokenizers.select_tokenizer(args)
    if args.pool_type == "auto":
        args.pool_type = select_pool_type(args)

    return args, seed
Esempio n. 3
0
def main(cl_arguments):
    """ Run REPL for a CoLA model """

    # Arguments handling #
    cl_args = handle_arguments(cl_arguments)
    args = config.params_from_file(cl_args.config_file, cl_args.overrides)
    check_arg_name(args)

    assert args.target_tasks == "cola", "Currently only supporting CoLA. ({})".format(
        args.target_tasks)

    if args.cuda >= 0:
        try:
            if not torch.cuda.is_available():
                raise EnvironmentError("CUDA is not available, or not detected"
                                       " by PyTorch.")
            log.info("Using GPU %d", args.cuda)
            torch.cuda.set_device(args.cuda)
        except Exception:
            log.warning("GPU access failed. You might be using a CPU-only"
                        " installation of PyTorch. Falling back to CPU.")
            args.cuda = -1

    if args.tokenizer == "auto":
        args.tokenizer = select_tokenizer(args)
    if args.pool_type == "auto":
        args.pool_type = select_pool_type(args)

    # Prepare data #
    _, target_tasks, vocab, word_embs = build_tasks(args)
    tasks = sorted(set(target_tasks), key=lambda x: x.name)

    # Build or load model #
    cuda_device = parse_cuda_list_arg(args.cuda)
    model = build_model(args, vocab, word_embs, tasks, cuda_device)
    log.info("Loading existing model from %s...", cl_args.model_file_path)
    load_model_state(model,
                     cl_args.model_file_path,
                     args.cuda, [],
                     strict=False)

    # Inference Setup #
    model.eval()
    vocab = Vocabulary.from_files(os.path.join(args.exp_dir, "vocab"))
    indexers = build_indexers(args)
    task = take_one(tasks)
    model_preprocessing_interface = ModelPreprocessingInterface(args)

    # Run Inference #
    if cl_args.inference_mode == "repl":
        assert cl_args.input_path is None
        assert cl_args.output_path is None
        print("Running REPL for task: {}".format(task.name))
        run_repl(model, model_preprocessing_interface, vocab, indexers, task,
                 args)
    elif cl_args.inference_mode == "corpus":
        run_corpus_inference(
            model,
            model_preprocessing_interface,
            vocab,
            indexers,
            task,
            args,
            cl_args.input_path,
            cl_args.input_format,
            cl_args.output_path,
            cl_args.eval_output_path,
        )
    else:
        raise KeyError(cl_args.inference_mode)