Ejemplo n.º 1
0
def _get_task(name, args, data_path, scratch_path):
    """ Build or load a single task. """
    assert name in TASKS_REGISTRY, f"Task '{name:s}' not found!"
    task_cls, rel_path, task_kw = TASKS_REGISTRY[name]
    pkl_path = os.path.join(scratch_path, "tasks",
                            f"{name:s}.{args.tokenizer:s}.pkl")
    # TODO: refactor to always read from disk, even if task is constructed
    # here. This should avoid subtle bugs from deserialization issues.
    if os.path.isfile(pkl_path) and not args.reload_tasks:
        task = pkl.load(open(pkl_path, "rb"))
        log.info("\tLoaded existing task %s", name)
    else:
        log.info("\tCreating task %s from scratch.", name)
        # These tasks take an additional kwarg.
        if name == "nli-prob" or name == "nli-alt":
            # TODO: remove special case, replace with something general
            # to pass custom loader args to task.
            task_kw["probe_path"] = args["nli-prob"].probe_path
        task_src_path = os.path.join(data_path, rel_path)
        task = task_cls(
            task_src_path,
            max_seq_len=args.max_seq_len,
            name=name,
            tokenizer_name=args.tokenizer,
            **task_kw,
        )
        task.load_data()
        utils.maybe_make_dir(os.path.dirname(pkl_path))
        pkl.dump(task, open(pkl_path, "wb"))

    return task
Ejemplo n.º 2
0
    def __init__(self, args):
        super().__init__()
        self.cache_dir = FLAGS.cache_dir
        utils.maybe_make_dir(self.cache_dir)

        self.output_mode = args.transformers_output_mode
        self.input_module = args.input_module
        self.tokenizer_required = input_module_tokenizer_name(
            args.input_module)

        # If set, treat these special tokens as part of input segments other than A/B.
        self._SEG_ID_CLS = None
        self._SEG_ID_SEP = None
        # self.model = transformers.RobertaModel.from_pretrained(
        #     args.input_module, cache_dir=self.cache_dir, output_hidden_states=True
        # )
        if FLAGS.saved_pretrained_model_path:
            self.model = load_pretrained_model_for_SG()
        else:
            self.model = MODEL_MAPPING[FLAGS.model](finetune_stage=True)
        self.max_pos = None

        self.tokenizer = get_my_tokenizer()
        self._sep_id = self.tokenizer.sep_token_id
        self._cls_id = self.tokenizer.cls_token_id
        self._pad_id = self.tokenizer.pad_token_id
        self._unk_id = self.tokenizer.unk_token_id

        self.parameter_setup(args)
def _get_task(name: str, args: config.Params, data_path: str,
              scratch_path: str) -> Task:
    """Get task object from disk if available. Else construct, prepare and save a new task object.

    Parameters
    ----------
    name : str
        task name to load.
    args : config.Params
        param handler object.
    data_path : str
        base data directory.
    scratch_path : str
        where to save Task objects.

    Returns
    -------
    Task
        loaded task object.

    """
    assert name in TASKS_REGISTRY, f"Task '{name:s}' not found!"
    task_cls, rel_path, task_kw = TASKS_REGISTRY[name]
    pkl_path = os.path.join(scratch_path, "tasks",
                            f"{name:s}.{args.tokenizer:s}.pkl")
    # TODO: refactor to always read from disk, even if task is constructed
    # here. This should avoid subtle bugs from deserialization issues.
    if os.path.isfile(pkl_path) and not args.reload_tasks:
        task = pkl.load(open(pkl_path, "rb"))
        log.info("\tLoaded existing task %s", name)
    else:
        log.info("\tCreating task %s from scratch.", name)
        # These tasks take an additional kwarg.
        if name == "nli-prob" or name == "nli-alt":
            # TODO: remove special case, replace with something general
            # to pass custom loader args to task.
            task_kw["probe_path"] = args["nli-prob"].probe_path
        if name in ALL_SEQ2SEQ_TASKS:
            task_kw["max_targ_v_size"] = args.max_targ_word_v_size
        task_src_path = os.path.join(data_path, rel_path)
        task = task_cls(
            task_src_path,
            max_seq_len=args.max_seq_len,
            name=name,
            tokenizer_name=args.tokenizer,
            **task_kw,
        )
        log.info('testing: %s', str(args))
        # if the user requires to calculate the online code of an edge probing task
        if args.get("online_code_preshuffle_seed", False) and args.get(
                "online_code_data_split", False):
            task.load_data(args)
            log.info('testing, flags detected; preprocess.py')
        else:
            task.load_data()
        utils.maybe_make_dir(os.path.dirname(pkl_path))
        pkl.dump(task, open(pkl_path, "wb"))

    return task
Ejemplo n.º 4
0
    def __init__(self, args):
        super(PytorchTransformersEmbedderModule, self).__init__()

        self.cache_dir = os.getenv(
            "PYTORCH_PRETRAINED_BERT_CACHE",
            os.path.join(args.exp_dir, "pytorch_transformers_cache"),
        )
        utils.maybe_make_dir(self.cache_dir)

        self.embeddings_mode = args.pytorch_transformers_output_mode

        # Integer token indices for special symbols.
        self._sep_id = None
        self._cls_id = None
        self._pad_id = None

        # If set, treat these special tokens as part of input segments other than A/B.
        self._SEG_ID_CLS = None
        self._SEG_ID_SEP = None
Ejemplo n.º 5
0
    def __init__(self, args):
        super(HuggingfaceTransformersEmbedderModule, self).__init__()

        self.cache_dir = os.getenv(
            "HUGGINGFACE_TRANSFORMERS_CACHE",
            os.path.join(args.exp_dir, "transformers_cache"))
        utils.maybe_make_dir(self.cache_dir)

        self.output_mode = args.transformers_output_mode
        self.input_module = args.input_module
        self.max_pos = None
        self.tokenizer_required = input_module_tokenizer_name(
            args.input_module)

        # Integer token indices for special symbols.
        self._cls_id = None
        self._sep_id = None
        self._pad_id = None
        self._unk_id = None

        # If set, treat these special tokens as part of input segments other than A/B.
        self._SEG_ID_CLS = None
        self._SEG_ID_SEP = None
Ejemplo n.º 6
0
def build_tasks(args):
    """Main logic for preparing tasks, doing so by
    1) creating / loading the tasks
    2) building / loading the vocabulary
    3) building / loading the word vectors
    4) indexing each task's data
    5) initializing lazy loaders (streaming iterators)
    """

    # 1) create / load tasks
    tasks, pretrain_task_names, target_task_names = get_tasks(args)
    for task in tasks:
        task_classifier = config.get_task_attr(args, task.name,
                                               "use_classifier")
        setattr(task, "_classifier_name",
                task_classifier if task_classifier else task.name)

    tokenizer_names = {task.name: task.tokenizer_name for task in tasks}
    assert len(set(tokenizer_names.values())) == 1, (
        f"Error: mixing tasks with different tokenizers!"
        " Tokenizations: {tokenizer_names:s}")

    # 2) build / load vocab and indexers
    indexers = build_indexers(args)

    vocab_path = os.path.join(args.exp_dir, "vocab")
    if args.reload_vocab or not os.path.exists(vocab_path):
        _build_vocab(args, tasks, vocab_path)

    # Always load vocab from file.
    vocab = Vocabulary.from_files(vocab_path)
    log.info("\tLoaded vocab from %s", vocab_path)

    for namespace, mapping in vocab._index_to_token.items():
        log.info("\tVocab namespace %s: size %d", namespace, len(mapping))
    log.info("\tFinished building vocab.")
    args.max_word_v_size = vocab.get_vocab_size("tokens")
    args.max_char_v_size = vocab.get_vocab_size("chars")

    # 3) build / load word vectors
    word_embs = None
    if args.input_module in ["glove", "fastText"]:
        emb_file = os.path.join(args.exp_dir, "embs.pkl")
        if args.reload_vocab or not os.path.exists(emb_file):
            word_embs = _build_embeddings(args, vocab, emb_file)
        else:  # load from file
            word_embs = pkl.load(open(emb_file, "rb"))
        log.info("Trimmed word embeddings: %s", str(word_embs.size()))

    # 4) Index tasks using vocab (if preprocessed copy not available).
    preproc_dir = os.path.join(args.exp_dir, "preproc")
    utils.maybe_make_dir(preproc_dir)
    reindex_tasks = parse_task_list_arg(args.reindex_tasks)
    utils.assert_for_log(
        not (args.reload_indexing and not reindex_tasks),
        'Flag reload_indexing was set, but no tasks are set to reindex (use -o "args.reindex_tasks'
        ' = "task1,task2,..."")',
    )

    # Set up boundary_token_fn, which applies SOS/EOS/SEP/CLS delimiters
    if args.input_module.startswith("bert"):
        from jiant.pytorch_transformers_interface.modules import BertEmbedderModule

        boundary_token_fn = BertEmbedderModule.apply_boundary_tokens
    elif args.input_module.startswith("xlnet"):
        from jiant.pytorch_transformers_interface.modules import XLNetEmbedderModule

        boundary_token_fn = XLNetEmbedderModule.apply_boundary_tokens
    else:
        boundary_token_fn = utils.apply_standard_boundary_tokens

    for task in tasks:
        force_reindex = args.reload_indexing and task.name in reindex_tasks
        for split in ALL_SPLITS:
            log_prefix = "\tTask '%s', split '%s'" % (task.name, split)
            relative_path = _get_serialized_record_path(
                task.name, split, "preproc")
            cache_found = _find_cached_file(args.exp_dir,
                                            args.global_ro_exp_dir,
                                            relative_path,
                                            log_prefix=log_prefix)
            if force_reindex or not cache_found:
                # Re-index from scratch.
                record_file = _get_serialized_record_path(
                    task.name, split, preproc_dir)
                if os.path.exists(record_file) and os.path.islink(record_file):
                    os.remove(record_file)

                _index_split(task, split, indexers, vocab, record_file,
                             boundary_token_fn)

        # Delete in-memory data - we'll lazy-load from disk later.
        # TODO: delete task.{split}_data_text as well?
        task.train_data = None
        task.val_data = None
        task.test_data = None

    log.info("\tFinished indexing tasks")

    # 5) Initialize tasks with data iterators.
    pretrain_tasks = []
    target_tasks = []
    for task in tasks:
        # Replace lists of instances with lazy generators from disk.
        task.val_data = _get_instance_generator(task.name, "val", preproc_dir)
        task.test_data = _get_instance_generator(task.name, "test",
                                                 preproc_dir)
        # When using pretrain_data_fraction, we need modified iterators for use
        # only on training datasets at pretraining time.
        if task.name in pretrain_task_names:
            log.info("\tCreating trimmed pretraining-only version of " +
                     task.name + " train.")
            task.train_data = _get_instance_generator(
                task.name,
                "train",
                preproc_dir,
                fraction=args.pretrain_data_fraction)
            pretrain_tasks.append(task)
        # When using target_train_data_fraction, we need modified iterators
        # only for training datasets at do_target_task_training time.
        if task.name in target_task_names:
            log.info("\tCreating trimmed target-only version of " + task.name +
                     " train.")
            task.train_data = _get_instance_generator(
                task.name,
                "train",
                preproc_dir,
                fraction=args.target_train_data_fraction)
            target_tasks.append(task)

    log.info("\t  Training on %s", ", ".join(pretrain_task_names))
    log.info("\t  Evaluating on %s", ", ".join(target_task_names))
    return pretrain_tasks, target_tasks, vocab, word_embs
def initial_setup(args, cl_args):
    """
    Sets up email hook, creating seed, and cuda settings.

    Parameters
    ----------------
    args: Params object
    cl_args: list of arguments

    Returns
    ----------------
    tasks: list of Task objects
    pretrain_tasks: list of pretraining tasks
    target_tasks: list of target tasks
    vocab: list of vocab
    word_embs: loaded word embeddings, may be None if args.input_module in
    {gpt, elmo, elmo-chars-only, bert-*}
    model: a MultiTaskModel object
    """
    output = io.StringIO()
    maybe_make_dir(args.project_dir)  # e.g. /nfs/jsalt/exp/$HOSTNAME
    maybe_make_dir(args.exp_dir)  # e.g. <project_dir>/jiant-demo
    maybe_make_dir(args.run_dir)  # e.g. <project_dir>/jiant-demo/sst
    log_fh = log.FileHandler(args.local_log_path)
    log_fmt = log.Formatter("%(asctime)s: %(message)s",
                            datefmt="%m/%d %I:%M:%S %p")
    log_fh.setFormatter(log_fmt)
    log.getLogger().addHandler(log_fh)

    if cl_args.remote_log:
        from jiant.utils import gcp

        gcp.configure_remote_logging(args.remote_log_name)

    if cl_args.notify:
        from jiant.utils import emails

        global EMAIL_NOTIFIER
        log.info("Registering email notifier for %s", cl_args.notify)
        EMAIL_NOTIFIER = emails.get_notifier(cl_args.notify, args)

    if EMAIL_NOTIFIER:
        EMAIL_NOTIFIER(body="Starting run.", prefix="")

    _log_git_info()
    config_file = os.path.join(args.run_dir, "params.conf")
    config.write_params(args, config_file)

    print_args = select_relevant_print_args(args)
    log.info("Parsed args: \n%s", print_args)

    log.info("Saved config to %s", config_file)

    seed = random.randint(1,
                          10000) if args.random_seed < 0 else args.random_seed
    random.seed(seed)
    torch.manual_seed(seed)
    log.info("Using random seed %d", seed)
    if args.cuda >= 0:
        try:
            if not torch.cuda.is_available():
                raise EnvironmentError("CUDA is not available, or not detected"
                                       " by PyTorch.")
            log.info("Using GPU %d", args.cuda)
            torch.cuda.set_device(args.cuda)
            torch.cuda.manual_seed_all(seed)
        except Exception:
            log.warning(
                "GPU access failed. You might be using a CPU-only installation of PyTorch. "
                "Falling back to CPU.")
            args.cuda = -1

    if args.tokenizer == "auto":
        args.tokenizer = tokenizers.select_tokenizer(args)
    if args.pool_type == "auto":
        args.pool_type = select_pool_type(args)

    return args, seed
Ejemplo n.º 8
0
def build_tasks(
    args: config.Params, cuda_device: Any
) -> (List[Task], List[Task], Vocabulary, Union[np.ndarray, float]):
    """Main logic for preparing tasks:

    1. create or load the tasks
    2. configure classifiers for tasks
    3. set up indexers
    4. build and save vocab to disk
    5. load vocab from disk
    6. if specified, load word embeddings
    7. set up ModelPreprocessingInterface (MPI) to handle model-specific preprocessing
    8. index tasks using vocab and task-specific MPI, save to disk.
    9. return: task data lazy-loaders in phase-specific lists w/ vocab, and word embeddings

    Parameters
    ----------
    args : Params
        config map

    Returns
    -------
    List[Task]
        list of pretrain Tasks.
    List[Task]
        list of target Tasks.
    allennlp.data.Vocabulary
        vocabulary from task data.
    Union[np.ndarray, float]
        Word embeddings.

    """
    # 1) create / load tasks
    tasks, pretrain_task_names, target_task_names = get_tasks(
        args, cuda_device)
    for task in tasks:
        task_classifier = config.get_task_attr(args, task.name,
                                               "use_classifier")
        setattr(task, "_classifier_name",
                task_classifier if task_classifier else task.name)

    tokenizer_names = {task.name: task.tokenizer_name for task in tasks}
    assert not len(set(tokenizer_names.values())) > 1, (
        f"Error: mixing tasks with different tokenizers!"
        " Tokenizations: {tokenizer_names:s}")

    # 2) build / load vocab and indexers
    indexers = build_indexers(args)

    vocab_path = os.path.join(args.exp_dir, "vocab")
    if args.reload_vocab or not os.path.exists(vocab_path):
        _build_vocab(args, tasks, vocab_path)

    # Always load vocab from file.
    vocab = Vocabulary.from_files(vocab_path)
    log.info("\tLoaded vocab from %s", vocab_path)

    for namespace, mapping in vocab._index_to_token.items():
        log.info("\tVocab namespace %s: size %d", namespace, len(mapping))
    log.info("\tFinished building vocab.")
    args.max_word_v_size = vocab.get_vocab_size("tokens")
    args.max_char_v_size = vocab.get_vocab_size("chars")

    # 3) build / load word vectors
    word_embs = None
    if args.input_module in ["glove", "fastText"]:
        emb_file = os.path.join(args.exp_dir, "embs.pkl")
        if args.reload_vocab or not os.path.exists(emb_file):
            word_embs = _build_embeddings(args, vocab, emb_file)
        else:  # load from file
            word_embs = pkl.load(open(emb_file, "rb"))
        log.info("Trimmed word embeddings: %s", str(word_embs.size()))

    # 4) Set up model_preprocessing_interface
    model_preprocessing_interface = ModelPreprocessingInterface(args)

    # 5) Index tasks using vocab (if preprocessed copy not available).
    preproc_dir = os.path.join(args.exp_dir, "preproc")
    utils.maybe_make_dir(preproc_dir)
    reindex_tasks = parse_task_list_arg(args.reindex_tasks)
    utils.assert_for_log(
        not (args.reload_indexing and not reindex_tasks),
        'Flag reload_indexing was set, but no tasks are set to reindex (use -o "args.reindex_tasks'
        ' = "task1,task2,..."")',
    )

    for task in tasks:
        force_reindex = args.reload_indexing and task.name in reindex_tasks
        for split in ALL_SPLITS:
            log_prefix = "\tTask '%s', split '%s'" % (task.name, split)
            relative_path = _get_serialized_record_path(
                task.name, split, "preproc")
            cache_found = _find_cached_file(args.exp_dir,
                                            args.global_ro_exp_dir,
                                            relative_path,
                                            log_prefix=log_prefix)
            if force_reindex or not cache_found:
                # Re-index from scratch.
                record_file = _get_serialized_record_path(
                    task.name, split, preproc_dir)
                if os.path.exists(record_file) and os.path.islink(record_file):
                    os.remove(record_file)

                _index_split(task, split, indexers, vocab, record_file,
                             model_preprocessing_interface)

        # Delete in-memory data - we'll lazy-load from disk later.
        # TODO: delete task.{split}_data_text?

    log.info("\tFinished indexing tasks")

    # 6) Initialize tasks with data iterators.
    pretrain_tasks = []
    target_tasks = []
    for task in tasks:
        # Replace lists of instances with lazy generators from disk.
        task.set_instance_iterable(
            split_name="val",
            instance_iterable=_get_instance_generator(task.name, "val",
                                                      preproc_dir),
        )
        task.set_instance_iterable(
            split_name="test",
            instance_iterable=_get_instance_generator(task.name, "test",
                                                      preproc_dir),
        )
        # When using pretrain_data_fraction, we need modified iterators for use
        # only on training datasets at pretraining time.
        if task.name in pretrain_task_names:
            log.info("\tCreating trimmed pretraining-only version of " +
                     task.name + " train.")
            task.set_instance_iterable(
                split_name="train",
                instance_iterable=_get_instance_generator(
                    task.name,
                    "train",
                    preproc_dir,
                    fraction=args.pretrain_data_fraction),
                phase="pretrain",
            )
            pretrain_tasks.append(task)
        # When using target_train_data_fraction, we need modified iterators
        # only for training datasets at do_target_task_training time.
        if task.name in target_task_names:
            log.info("\tCreating trimmed target-only version of " + task.name +
                     " train.")
            task.set_instance_iterable(
                split_name="train",
                instance_iterable=_get_instance_generator(
                    task.name,
                    "train",
                    preproc_dir,
                    fraction=args.target_train_data_fraction),
                phase="target_train",
            )
            target_tasks.append(task)

    log.info("\t  Training on %s", ", ".join(pretrain_task_names))
    log.info("\t  Evaluating on %s", ", ".join(target_task_names))
    return pretrain_tasks, target_tasks, vocab, word_embs
Ejemplo n.º 9
0
def initial_setup(args: config.Params, cl_args: argparse.Namespace) -> (config.Params, int):
    """Perform setup steps:

    1. create project, exp, and run dirs if they don't already exist
    2. create log formatter
    3. configure GCP remote logging
    4. set up email notifier
    5. log git info
    6. write the config out to file
    7. log diff between default and experiment's configs
    8. choose torch's and random's random seed
    9. if config specifies a single GPU, then set the GPU's random seed (doesn't cover multi-GPU)
    10. resolve "auto" settings for tokenizer and pool_type parameters

    Parameters
    ----------
    args : config.Params
        config map
    cl_args : argparse.Namespace
        mapping named arguments to parsed values

    Returns
    -------
    args : config.Params
        config map
    seed : int
        random's and pytorch's random seed

    """
    output = io.StringIO()
    maybe_make_dir(args.project_dir)  # e.g. /nfs/jsalt/exp/$HOSTNAME
    maybe_make_dir(args.exp_dir)  # e.g. <project_dir>/jiant-demo
    maybe_make_dir(args.run_dir)  # e.g. <project_dir>/jiant-demo/sst
    log_fh = log.FileHandler(args.local_log_path)
    log_fmt = log.Formatter("%(asctime)s: %(message)s", datefmt="%m/%d %I:%M:%S %p")
    log_fh.setFormatter(log_fmt)
    log.getLogger().addHandler(log_fh)

    if cl_args.remote_log:
        from jiant.utils import gcp

        gcp.configure_remote_logging(args.remote_log_name)

    if cl_args.notify:
        from jiant.utils import emails

        global EMAIL_NOTIFIER
        log.info("Registering email notifier for %s", cl_args.notify)
        EMAIL_NOTIFIER = emails.get_notifier(cl_args.notify, args)

    if EMAIL_NOTIFIER:
        EMAIL_NOTIFIER(body="Starting run.", prefix="")

    _log_git_info()
    config_file = os.path.join(args.run_dir, "params.conf")
    config.write_params(args, config_file)

    print_args = select_relevant_print_args(args)
    log.info("Parsed args: \n%s", print_args)

    log.info("Saved config to %s", config_file)

    seed = random.randint(1, 10000) if args.random_seed < 0 else args.random_seed
    random.seed(seed)
    torch.manual_seed(seed)
    log.info("Using random seed %d", seed)
    if isinstance(args.cuda, int) and args.cuda >= 0:
        # If only running on one GPU.
        try:
            if not torch.cuda.is_available():
                raise EnvironmentError("CUDA is not available, or not detected" " by PyTorch.")
            log.info("Using GPU %d", args.cuda)
            torch.cuda.set_device(args.cuda)
            torch.cuda.manual_seed_all(seed)
        except Exception:
            log.warning(
                "GPU access failed. You might be using a CPU-only installation of PyTorch. "
                "Falling back to CPU."
            )
            args.cuda = -1

    if args.tokenizer == "auto":
        args.tokenizer = tokenizers.select_tokenizer(args)
    if args.pool_type == "auto":
        args.pool_type = select_pool_type(args)

    return args, seed
Ejemplo n.º 10
0
def build_model(args, vocab, pretrained_embs, tasks):
    """
    Build model according to args
    Returns: model which has attributes set in it with the attrbutes.
    """

    # Build embeddings.
    if args.input_module == "gpt":
        # Note: incompatible with other embedders, but logic in preprocess.py
        # should prevent these from being enabled anyway.
        from .openai_transformer_lm.utils import OpenAIEmbedderModule

        log.info("Using OpenAI transformer model.")
        cove_layer = None
        # Here, this uses openAIEmbedder.
        embedder = OpenAIEmbedderModule(args)
        d_emb = embedder.get_output_dim()
    elif args.input_module.startswith("bert"):
        # Note: incompatible with other embedders, but logic in preprocess.py
        # should prevent these from being enabled anyway.
        from .bert.utils import BertEmbedderModule

        log.info(f"Using BERT model ({args.input_module}).")
        cove_layer = None
        # Set PYTORCH_PRETRAINED_BERT_CACHE environment variable to an existing
        # cache; see
        # https://github.com/huggingface/pytorch-pretrained-BERT/blob/master/pytorch_pretrained_bert/file_utils.py  # noqa
        bert_cache_dir = os.getenv(
            "PYTORCH_PRETRAINED_BERT_CACHE", os.path.join(args.exp_dir, "bert_cache")
        )
        maybe_make_dir(bert_cache_dir)
        embedder = BertEmbedderModule(args, cache_dir=bert_cache_dir)
        d_emb = embedder.get_output_dim()
    else:
        # Default case, used for ELMo, CoVe, word embeddings, etc.
        d_emb, embedder, cove_layer = build_embeddings(args, vocab, tasks, pretrained_embs)
    d_sent_input = args.d_hid

    sent_encoder, d_sent_output = build_sent_encoder(
        args, vocab, d_emb, tasks, embedder, cove_layer
    )
    # d_task_input is the input dimension of the task-specific module
    # set skip_emb = 1 if you want to concatenate the encoder input with encoder output to pass
    # into task specific module.
    d_task_input = d_sent_output + (args.skip_embs * d_emb)

    # Build model and classifiers
    model = MultiTaskModel(args, sent_encoder, vocab)
    build_task_modules(args, tasks, model, d_task_input, d_emb, embedder, vocab)
    model = model.cuda() if args.cuda >= 0 else model
    log.info("Model specification:")
    log.info(model)
    param_count = 0
    trainable_param_count = 0
    if args.list_params:
        log.info("Model parameters:")
    for name, param in model.named_parameters():
        param_count += np.prod(param.size())
        if param.requires_grad:
            trainable_param_count += np.prod(param.size())
            if args.list_params:
                log.info(
                    "\t%s: Trainable parameter, count %d with %s",
                    name,
                    np.prod(param.size()),
                    str(param.size()),
                )
        elif args.list_params:
            log.info(
                "\t%s: Non-trainable parameter, count %d with %s",
                name,
                np.prod(param.size()),
                str(param.size()),
            )
    log.info("Total number of parameters: {ct:d} ({ct:g})".format(ct=param_count))
    log.info("Number of trainable parameters: {ct:d} ({ct:g})".format(ct=trainable_param_count))
    return model