Beispiel #1
0
def build_sent_encoder(args, vocab, d_emb, tasks, embedder, cove_layer):
    # Build single sentence encoder: the main component of interest
    # Need special handling for language modeling
    # Note: sent_enc is expected to apply dropout to its input _and_ output if needed.
    tfm_params = Params({
        'input_dim': d_emb,
        'hidden_dim': args.d_hid,
        'projection_dim': args.d_tproj,
        'feedforward_hidden_dim': args.d_ff,
        'num_layers': args.n_layers_enc,
        'num_attention_heads': args.n_heads
    })
    rnn_params = Params({
        'input_size': d_emb,
        'bidirectional': True,
        'hidden_size': args.d_hid,
        'num_layers': args.n_layers_enc
    })
    # Make sentence encoder
    if any(isinstance(task, LanguageModelingTask) for task in tasks) or \
            args.sent_enc == 'bilm':
        assert_for_log(args.sent_enc in ['rnn', 'bilm'],
                       "Only RNNLM supported!")
        if args.elmo:
            assert_for_log(args.elmo_chars_only,
                           "LM with full ELMo not supported")
        bilm = BiLMEncoder(d_emb, args.d_hid, args.d_hid, args.n_layers_enc)
        sent_encoder = SentenceEncoder(
            vocab,
            embedder,
            args.n_layers_highway,
            bilm,
            skip_embs=args.skip_embs,
            dropout=args.dropout,
            sep_embs_for_skip=args.sep_embs_for_skip,
            cove_layer=cove_layer)
        d_sent = 2 * args.d_hid
        log.info("Using BiLM architecture for shared encoder!")
    elif args.sent_enc == 'bow':
        sent_encoder = BoWSentEncoder(vocab, embedder)
        log.info("Using BoW architecture for shared encoder!")
        assert_for_log(
            not args.skip_embs,
            "Skip connection not currently supported with `bow` encoder.")
        d_sent = d_emb
    elif args.sent_enc == 'rnn':
        sent_rnn = s2s_e.by_name('lstm').from_params(copy.deepcopy(rnn_params))
        sent_encoder = SentenceEncoder(
            vocab,
            embedder,
            args.n_layers_highway,
            sent_rnn,
            skip_embs=args.skip_embs,
            dropout=args.dropout,
            sep_embs_for_skip=args.sep_embs_for_skip,
            cove_layer=cove_layer)
        d_sent = 2 * args.d_hid
        log.info("Using BiLSTM architecture for shared encoder!")
    elif args.sent_enc == 'transformer':
        transformer = StackedSelfAttentionEncoder.from_params(
            copy.deepcopy(tfm_params))
        sent_encoder = SentenceEncoder(
            vocab,
            embedder,
            args.n_layers_highway,
            transformer,
            dropout=args.dropout,
            skip_embs=args.skip_embs,
            cove_layer=cove_layer,
            sep_embs_for_skip=args.sep_embs_for_skip)
        log.info("Using Transformer architecture for shared encoder!")
    elif args.sent_enc == 'null':
        # Expose word representation layer (GloVe, ELMo, etc.) directly.
        assert_for_log(
            args.skip_embs, f"skip_embs must be set for "
            "'{args.sent_enc}' encoder")
        phrase_layer = NullPhraseLayer(rnn_params['input_size'])
        sent_encoder = SentenceEncoder(
            vocab,
            embedder,
            args.n_layers_highway,
            phrase_layer,
            skip_embs=args.skip_embs,
            dropout=args.dropout,
            sep_embs_for_skip=args.sep_embs_for_skip,
            cove_layer=cove_layer)
        d_sent = 0  # skip connection added below
        log.info("No shared encoder (just using word embeddings)!")
    else:
        assert_for_log(False, "No valid sentence encoder specified.")
    return sent_encoder, d_sent
Beispiel #2
0
def build_model(args, vocab, pretrained_embs, tasks):
    '''Build model according to args '''

    # Build embeddings.
    if args.openai_transformer:
        # Note: incompatible with other embedders, but logic in preprocess.py
        # should prevent these from being enabled anyway.
        from .openai_transformer_lm.utils import OpenAIEmbedderModule
        log.info("Using OpenAI transformer model; skipping other embedders.")
        cove_layer = None
        embedder = OpenAIEmbedderModule(args)
        d_emb = embedder.get_output_dim()
    else:
        # Default case, used for ELMo, CoVe, word embeddings, etc.
        d_emb, embedder, cove_layer = build_embeddings(args, vocab, tasks,
                                                       pretrained_embs)
    d_sent = args.d_hid

    # Build single sentence encoder: the main component of interest
    # Need special handling for language modeling

    # Note: sent_enc is expected to apply dropout to its input _and_ output if needed.
    # So, embedding modules and classifier modules should not apply dropout there.
    tfm_params = Params({
        'input_dim': d_emb,
        'hidden_dim': args.d_hid,
        'projection_dim': args.d_tproj,
        'feedforward_hidden_dim': args.d_ff,
        'num_layers': args.n_layers_enc,
        'num_attention_heads': args.n_heads
    })
    rnn_params = Params({
        'input_size': d_emb,
        'bidirectional': True,
        'hidden_size': args.d_hid,
        'num_layers': args.n_layers_enc
    })

    if any(isinstance(task, LanguageModelingTask) for task in tasks) or \
            args.sent_enc == 'bilm':
        assert_for_log(args.sent_enc in ['rnn', 'bilm'],
                       "Only RNNLM supported!")
        if args.elmo:
            assert_for_log(args.elmo_chars_only,
                           "LM with full ELMo not supported")
        bilm = BiLMEncoder(d_emb, args.d_hid, args.d_hid, args.n_layers_enc)
        sent_encoder = SentenceEncoder(
            vocab,
            embedder,
            args.n_layers_highway,
            bilm,
            skip_embs=args.skip_embs,
            dropout=args.dropout,
            sep_embs_for_skip=args.sep_embs_for_skip,
            cove_layer=cove_layer)
        d_sent = 2 * args.d_hid
        log.info("Using BiLM architecture for shared encoder!")
    elif args.sent_enc == 'bow':
        sent_encoder = BoWSentEncoder(vocab, embedder)
        log.info("Using BoW architecture for shared encoder!")
        assert_for_log(
            not args.skip_embs,
            "Skip connection not currently supported with `bow` encoder.")
        d_sent = d_emb
    elif args.sent_enc == 'rnn':
        sent_rnn = s2s_e.by_name('lstm').from_params(copy.deepcopy(rnn_params))
        sent_encoder = SentenceEncoder(
            vocab,
            embedder,
            args.n_layers_highway,
            sent_rnn,
            skip_embs=args.skip_embs,
            dropout=args.dropout,
            sep_embs_for_skip=args.sep_embs_for_skip,
            cove_layer=cove_layer)
        d_sent = 2 * args.d_hid
        log.info("Using BiLSTM architecture for shared encoder!")
    elif args.sent_enc == 'transformer':
        transformer = StackedSelfAttentionEncoder.from_params(
            copy.deepcopy(tfm_params))
        sent_encoder = SentenceEncoder(
            vocab,
            embedder,
            args.n_layers_highway,
            transformer,
            dropout=args.dropout,
            skip_embs=args.skip_embs,
            cove_layer=cove_layer,
            sep_embs_for_skip=args.sep_embs_for_skip)
        log.info("Using Transformer architecture for shared encoder!")
    elif args.sent_enc == 'null':
        # Expose word representation layer (GloVe, ELMo, etc.) directly.
        assert_for_log(
            args.skip_embs, f"skip_embs must be set for "
            "'{args.sent_enc}' encoder")
        phrase_layer = NullPhraseLayer(rnn_params['input_size'])
        sent_encoder = SentenceEncoder(
            vocab,
            embedder,
            args.n_layers_highway,
            phrase_layer,
            skip_embs=args.skip_embs,
            dropout=args.dropout,
            sep_embs_for_skip=args.sep_embs_for_skip,
            cove_layer=cove_layer)
        d_sent = 0  # skip connection added below
        log.info("No shared encoder (just using word embeddings)!")
    else:
        assert_for_log(False, "No valid sentence encoder specified.")

    d_sent += args.skip_embs * d_emb

    # Build model and classifiers
    model = MultiTaskModel(args, sent_encoder, vocab)

    if args.is_probing_task:
        # TODO: move this logic to preprocess.py;
        # current implementation reloads MNLI data, which is slow.
        train_task_whitelist, eval_task_whitelist = get_task_whitelist(args)
        tasks_to_build, _, _ = get_tasks(train_task_whitelist,
                                         eval_task_whitelist,
                                         args.max_seq_len,
                                         path=args.data_dir,
                                         scratch_path=args.exp_dir)
    else:
        tasks_to_build = tasks

    # Attach task-specific params.
    for task in set(tasks + tasks_to_build):
        task_params = get_task_specific_params(args, task.name)
        log.info("\tTask '%s' params: %s", task.name,
                 json.dumps(task_params.as_dict(), indent=2))
        # Store task-specific params in case we want to access later
        setattr(model, '%s_task_params' % task.name, task_params)

    # Actually construct modules.
    for task in tasks_to_build:
        # If the name of the task is different than the classifier it should use
        # then skip the module creation.
        if task.name != model._get_task_params(task.name).get(
                'use_classifier', task.name):
            continue
        build_module(task, model, d_sent, d_emb, vocab, embedder, args)
    model = model.cuda() if args.cuda >= 0 else model
    log.info(model)
    param_count = 0
    trainable_param_count = 0
    for name, param in model.named_parameters():
        param_count += np.prod(param.size())
        if param.requires_grad:
            trainable_param_count += np.prod(param.size())
            log.info(">> Trainable param %s: %s = %d", name, str(param.size()),
                     np.prod(param.size()))
    log.info(
        "Total number of parameters: {ct:d} ({ct:g})".format(ct=param_count))
    log.info("Number of trainable parameters: {ct:d} ({ct:g})".format(
        ct=trainable_param_count))
    return model
Beispiel #3
0
def build_model(args, vocab, pretrained_embs, tasks):
    '''Build model according to args '''

    # Build embeddings.
    d_emb, embedder, cove_emb = build_embeddings(args, vocab, pretrained_embs)
    d_sent = args.d_hid

    # Build single sentence encoder: the main component of interest
    # Need special handling for language modeling
    tfm_params = Params({'input_dim': d_emb, 'hidden_dim': args.d_hid,
                         'projection_dim': args.d_tproj,
                         'feedforward_hidden_dim': args.d_ff,
                         'num_layers': args.n_layers_enc,
                         'num_attention_heads': args.n_heads})
    rnn_params = Params({'input_size': d_emb, 'bidirectional': args.bidirectional,
                         'hidden_size': args.d_hid, 'num_layers': args.n_layers_enc})

    if sum([isinstance(task, LanguageModelingTask) for task in tasks]):
        if args.bidirectional:
            rnn_params['bidirectional'] = False
            if args.sent_enc == 'rnn':
                fwd = s2s_e.by_name('lstm').from_params(copy.deepcopy(rnn_params))
                bwd = s2s_e.by_name('lstm').from_params(copy.deepcopy(rnn_params))
            elif args.sent_enc == 'transformer':
                fwd = MaskedStackedSelfAttentionEncoder.from_params(copy.deepcopy(tfm_params))
                bwd = MaskedStackedSelfAttentionEncoder.from_params(copy.deepcopy(tfm_params))
            sent_encoder = BiLMEncoder(vocab, embedder, args.n_layers_highway,
                                       fwd, bwd, dropout=args.dropout,
                                       skip_embs=args.skip_embs, cove_layer=cove_emb)
        else:  # not bidirectional
            if args.sent_enc == 'rnn':
                fwd = s2s_e.by_name('lstm').from_params(copy.deepcopy(rnn_params))
            elif args.sent_enc == 'transformer':
                fwd = MaskedStackedSelfAttentionEncoder.from_params(copy.deepcopy(tfm_params))
            sent_encoder = SentenceEncoder(vocab, embedder, args.n_layers_highway,
                                           fwd, skip_embs=args.skip_embs,
                                           dropout=args.dropout, cove_layer=cove_emb)
    elif args.sent_enc == 'bow':
        sent_encoder = BoWSentEncoder(vocab, embedder)
        d_sent = d_emb
    elif args.sent_enc == 'rnn':
        sent_rnn = s2s_e.by_name('lstm').from_params(copy.deepcopy(rnn_params))
        sent_encoder = SentenceEncoder(vocab, embedder, args.n_layers_highway,
                                       sent_rnn, skip_embs=args.skip_embs,
                                       dropout=args.dropout, cove_layer=cove_emb)
        d_sent = (1 + args.bidirectional) * args.d_hid
    elif args.sent_enc == 'transformer':
        transformer = StackedSelfAttentionEncoder.from_params(copy.deepcopy(tfm_params))
        sent_encoder = SentenceEncoder(vocab, embedder, args.n_layers_highway,
                                       transformer, dropout=args.dropout,
                                       skip_embs=args.skip_embs, cove_layer=cove_emb)
    else:
        assert_for_log(False, "No valid sentence encoder specified.")

    d_sent += args.skip_embs * d_emb

    # Build model and classifiers
    model = MultiTaskModel(args, sent_encoder, vocab)

    if args.is_probing_task:
        # TODO: move this logic to preprocess.py;
        # current implementation reloads MNLI data, which is slow.
        train_task_whitelist, eval_task_whitelist = get_task_whitelist(args)
        tasks_to_build, _, _ = get_tasks(train_task_whitelist,
                                         eval_task_whitelist,
                                         args.max_seq_len,
                                         path=args.data_dir,
                                         scratch_path=args.exp_dir)
    else:
        tasks_to_build = tasks

    # Attach task-specific params.
    for task in set(tasks + tasks_to_build):
        task_params = get_task_specific_params(args, task.name)
        log.info("\tTask '%s' params: %s", task.name,
                 json.dumps(task_params.as_dict(), indent=2))
        # Store task-specific params in case we want to access later
        setattr(model, '%s_task_params' % task.name, task_params)

    # Actually construct modules.
    for task in tasks_to_build:
        build_module(task, model, d_sent, vocab, embedder, args)
    model = model.cuda() if args.cuda >= 0 else model
    log.info(model)
    param_count = 0
    trainable_param_count = 0
    for name, param in model.named_parameters():
        param_count += np.prod(param.size())
        if param.requires_grad:
            trainable_param_count += np.prod(param.size())
    log.info("Total number of parameters: {}".format(param_count))
    log.info("Number of trainable parameters: {}".format(trainable_param_count))
    return model