def build_decoder(task, d_inp, vocab, embedder, args): ''' Build a task specific decoder ''' rnn = s2s_e.by_name('lstm').from_params( Params({'input_size': embedder.get_output_dim(), 'hidden_size': args.d_hid_dec, 'num_layers': args.n_layers_dec, 'bidirectional': False})) decoder = SentenceEncoder(vocab, embedder, 0, rnn) hid2voc = nn.Linear(args.d_hid_dec, args.max_word_v_size) return decoder, hid2voc
def build_decoder(task, d_inp, vocab, embedder, args): """ Build a task specific decoder """ rnn = s2s_e.by_name("lstm").from_params( Params({ "input_size": embedder.get_output_dim(), "hidden_size": args.s2s["d_hid_dec"], "num_layers": args.s2s["n_layers_dec"], "bidirectional": False, })) decoder = SentenceEncoder(vocab, embedder, 0, rnn) hid2voc = nn.Linear(args.s2s["d_hid_dec"], args.max_word_v_size) return decoder, hid2voc
def build_pair_attn(d_in, use_attn, d_hid_attn): ''' Build the pair model ''' if not use_attn: pair_attn = None else: d_inp_model = 2 * d_in modeling_layer = s2s_e.by_name('lstm').from_params( Params({'input_size': d_inp_model, 'hidden_size': d_hid_attn, 'num_layers': 1, 'bidirectional': True})) pair_attn = AttnPairEncoder(vocab, modeling_layer, dropout=params["dropout"]) return pair_attn
def build_pair_attn(d_in, d_hid_attn): """ Build the pair model """ d_inp_model = 2 * d_in modeling_layer = s2s_e.by_name("lstm").from_params( Params({ "input_size": d_inp_model, "hidden_size": d_hid_attn, "num_layers": 1, "bidirectional": True, })) pair_attn = AttnPairEncoder(model.vocab, modeling_layer, dropout=params["dropout"]) return pair_attn
def build_model(args, vocab, pretrained_embs, tasks): ''' Build model according to arguments ''' d_word, n_layers_highway = args.d_word, args.n_layers_highway # Build embedding layers if args.glove: word_embs = pretrained_embs train_embs = bool(args.train_words) else: logging.info("\tLearning embeddings from scratch!") word_embs = None train_embs = True word_embedder = Embedding(vocab.get_vocab_size('tokens'), d_word, weight=word_embs, trainable=train_embs, padding_index=vocab.get_token_index('@@PADDING@@')) d_inp_phrase = 0 token_embedder = {"words": word_embedder} d_inp_phrase += d_word text_field_embedder = BasicTextFieldEmbedder(token_embedder) d_hid_phrase = args.d_hid # Build encoders phrase_layer = s2s_e.by_name('lstm').from_params(Params({'input_size': d_inp_phrase, 'hidden_size': d_hid_phrase, 'num_layers': args.n_layers_enc, 'bidirectional': True})) pair_encoder = HeadlessPairEncoder(vocab, text_field_embedder, n_layers_highway, phrase_layer, dropout=args.dropout) d_pair = 2 * d_hid_phrase if args.fds: _FDS = FDS(feature_dim=d_pair * 4, bucket_num=args.bucket_num, bucket_start=args.bucket_start, start_update=args.start_update, start_smooth=args.start_smooth, kernel=args.fds_kernel, ks=args.fds_ks, sigma=args.fds_sigma, momentum=args.fds_mmt) # Build model and classifiers model = MultiTaskModel(args, pair_encoder, _FDS if args.fds else None) build_regressor(tasks, model, d_pair) if args.cuda >= 0: model = model.cuda() return model
def build_model(args, vocab, pretrained_embs, tasks): '''Build model according to args ''' # Build embeddings. if args.openai_transformer: # Note: incompatible with other embedders, but logic in preprocess.py # should prevent these from being enabled anyway. from .openai_transformer_lm.utils import OpenAIEmbedderModule log.info("Using OpenAI transformer model; skipping other embedders.") cove_layer = None embedder = OpenAIEmbedderModule(args) d_emb = embedder.get_output_dim() else: # Default case, used for ELMo, CoVe, word embeddings, etc. d_emb, embedder, cove_layer = build_embeddings(args, vocab, tasks, pretrained_embs) d_sent = args.d_hid # Build single sentence encoder: the main component of interest # Need special handling for language modeling # Note: sent_enc is expected to apply dropout to its input _and_ output if needed. # So, embedding modules and classifier modules should not apply dropout there. tfm_params = Params({ 'input_dim': d_emb, 'hidden_dim': args.d_hid, 'projection_dim': args.d_tproj, 'feedforward_hidden_dim': args.d_ff, 'num_layers': args.n_layers_enc, 'num_attention_heads': args.n_heads }) rnn_params = Params({ 'input_size': d_emb, 'bidirectional': True, 'hidden_size': args.d_hid, 'num_layers': args.n_layers_enc }) if any(isinstance(task, LanguageModelingTask) for task in tasks) or \ args.sent_enc == 'bilm': assert_for_log(args.sent_enc in ['rnn', 'bilm'], "Only RNNLM supported!") if args.elmo: assert_for_log(args.elmo_chars_only, "LM with full ELMo not supported") bilm = BiLMEncoder(d_emb, args.d_hid, args.d_hid, args.n_layers_enc) sent_encoder = SentenceEncoder( vocab, embedder, args.n_layers_highway, bilm, skip_embs=args.skip_embs, dropout=args.dropout, sep_embs_for_skip=args.sep_embs_for_skip, cove_layer=cove_layer) d_sent = 2 * args.d_hid log.info("Using BiLM architecture for shared encoder!") elif args.sent_enc == 'bow': sent_encoder = BoWSentEncoder(vocab, embedder) log.info("Using BoW architecture for shared encoder!") assert_for_log( not args.skip_embs, "Skip connection not currently supported with `bow` encoder.") d_sent = d_emb elif args.sent_enc == 'rnn': sent_rnn = s2s_e.by_name('lstm').from_params(copy.deepcopy(rnn_params)) sent_encoder = SentenceEncoder( vocab, embedder, args.n_layers_highway, sent_rnn, skip_embs=args.skip_embs, dropout=args.dropout, sep_embs_for_skip=args.sep_embs_for_skip, cove_layer=cove_layer) d_sent = 2 * args.d_hid log.info("Using BiLSTM architecture for shared encoder!") elif args.sent_enc == 'transformer': transformer = StackedSelfAttentionEncoder.from_params( copy.deepcopy(tfm_params)) sent_encoder = SentenceEncoder( vocab, embedder, args.n_layers_highway, transformer, dropout=args.dropout, skip_embs=args.skip_embs, cove_layer=cove_layer, sep_embs_for_skip=args.sep_embs_for_skip) log.info("Using Transformer architecture for shared encoder!") elif args.sent_enc == 'null': # Expose word representation layer (GloVe, ELMo, etc.) directly. assert_for_log( args.skip_embs, f"skip_embs must be set for " "'{args.sent_enc}' encoder") phrase_layer = NullPhraseLayer(rnn_params['input_size']) sent_encoder = SentenceEncoder( vocab, embedder, args.n_layers_highway, phrase_layer, skip_embs=args.skip_embs, dropout=args.dropout, sep_embs_for_skip=args.sep_embs_for_skip, cove_layer=cove_layer) d_sent = 0 # skip connection added below log.info("No shared encoder (just using word embeddings)!") else: assert_for_log(False, "No valid sentence encoder specified.") d_sent += args.skip_embs * d_emb # Build model and classifiers model = MultiTaskModel(args, sent_encoder, vocab) if args.is_probing_task: # TODO: move this logic to preprocess.py; # current implementation reloads MNLI data, which is slow. train_task_whitelist, eval_task_whitelist = get_task_whitelist(args) tasks_to_build, _, _ = get_tasks(train_task_whitelist, eval_task_whitelist, args.max_seq_len, path=args.data_dir, scratch_path=args.exp_dir) else: tasks_to_build = tasks # Attach task-specific params. for task in set(tasks + tasks_to_build): task_params = get_task_specific_params(args, task.name) log.info("\tTask '%s' params: %s", task.name, json.dumps(task_params.as_dict(), indent=2)) # Store task-specific params in case we want to access later setattr(model, '%s_task_params' % task.name, task_params) # Actually construct modules. for task in tasks_to_build: # If the name of the task is different than the classifier it should use # then skip the module creation. if task.name != model._get_task_params(task.name).get( 'use_classifier', task.name): continue build_module(task, model, d_sent, d_emb, vocab, embedder, args) model = model.cuda() if args.cuda >= 0 else model log.info(model) param_count = 0 trainable_param_count = 0 for name, param in model.named_parameters(): param_count += np.prod(param.size()) if param.requires_grad: trainable_param_count += np.prod(param.size()) log.info(">> Trainable param %s: %s = %d", name, str(param.size()), np.prod(param.size())) log.info( "Total number of parameters: {ct:d} ({ct:g})".format(ct=param_count)) log.info("Number of trainable parameters: {ct:d} ({ct:g})".format( ct=trainable_param_count)) return model
def build_sent_encoder(args, vocab, d_emb, tasks, embedder, cove_layer): # Build single sentence encoder: the main component of interest # Need special handling for language modeling # Note: sent_enc is expected to apply dropout to its input _and_ output if # needed. rnn_params = Params({ "input_size": d_emb, "bidirectional": True, "hidden_size": args.d_hid, "num_layers": args.n_layers_enc, }) if args.sent_enc == "onlstm": onlayer = ONLSTMPhraseLayer( vocab, args.d_word, args.d_hid, args.n_layers_enc, args.onlstm_chunk_size, args.onlstm_dropconnect, args.onlstm_dropouti, args.dropout, args.onlstm_dropouth, embedder, args.batch_size, ) # The 'onlayer' acts as a phrase layer module for the larger SentenceEncoder module. sent_encoder = SentenceEncoder( vocab, embedder, args.n_layers_highway, onlayer.onlayer, skip_embs=args.skip_embs, dropout=args.dropout, sep_embs_for_skip=args.sep_embs_for_skip, cove_layer=cove_layer, ) d_sent = args.d_word log.info("Using ON-LSTM sentence encoder!") elif args.sent_enc == "prpn": prpnlayer = PRPNPhraseLayer( vocab, args.d_word, args.d_hid, args.n_layers_enc, args.n_slots, args.n_lookback, args.resolution, args.dropout, args.idropout, args.rdropout, args.res, embedder, args.batch_size, ) # The 'prpn' acts as a phrase layer module for the larger SentenceEncoder module. sent_encoder = SentenceEncoder( vocab, embedder, args.n_layers_highway, prpnlayer.prpnlayer, skip_embs=args.skip_embs, dropout=args.dropout, sep_embs_for_skip=args.sep_embs_for_skip, cove_layer=cove_layer, ) d_sent = args.d_word log.info("Using PRPN sentence encoder!") elif any(isinstance(task, LanguageModelingTask) for task in tasks) or args.sent_enc == "bilm": assert_for_log(args.sent_enc in ["rnn", "bilm"], "Only RNNLM supported!") assert_for_log( args.input_module != "elmo" and not args.input_module.startswith("bert"), "LM with full ELMo and BERT not supported", ) bilm = BiLMEncoder(d_emb, args.d_hid, args.d_hid, args.n_layers_enc) sent_encoder = SentenceEncoder( vocab, embedder, args.n_layers_highway, bilm, skip_embs=args.skip_embs, dropout=args.dropout, sep_embs_for_skip=args.sep_embs_for_skip, cove_layer=cove_layer, ) d_sent = 2 * args.d_hid elif args.sent_enc == "bow": sent_encoder = BoWSentEncoder(vocab, embedder) assert_for_log( not args.skip_embs, "Skip connection not currently supported with `bow` encoder.") d_sent = d_emb elif args.sent_enc == "rnn": sent_rnn = s2s_e.by_name("lstm").from_params(copy.deepcopy(rnn_params)) sent_encoder = SentenceEncoder( vocab, embedder, args.n_layers_highway, sent_rnn, skip_embs=args.skip_embs, dropout=args.dropout, sep_embs_for_skip=args.sep_embs_for_skip, cove_layer=cove_layer, ) d_sent = 2 * args.d_hid elif args.sent_enc == "none": # Expose word representation layer (GloVe, ELMo, etc.) directly. assert_for_log( args.skip_embs, f"skip_embs must be set for " "'{args.sent_enc}' encoder") phrase_layer = NullPhraseLayer(rnn_params["input_size"]) sent_encoder = SentenceEncoder( vocab, embedder, args.n_layers_highway, phrase_layer, skip_embs=args.skip_embs, dropout=args.dropout, sep_embs_for_skip=args.sep_embs_for_skip, cove_layer=cove_layer, ) d_sent = 0 # skip connection added below log.info("No shared encoder (just using word embeddings)!") else: assert_for_log(False, "No valid sentence encoder specified.") return sent_encoder, d_sent
def build_sent_encoder(args, vocab, d_emb, tasks, embedder, cove_layer): # Build single sentence encoder: the main component of interest # Need special handling for language modeling # Note: sent_enc is expected to apply dropout to its input _and_ output if needed. tfm_params = Params({ 'input_dim': d_emb, 'hidden_dim': args.d_hid, 'projection_dim': args.d_tproj, 'feedforward_hidden_dim': args.d_ff, 'num_layers': args.n_layers_enc, 'num_attention_heads': args.n_heads }) rnn_params = Params({ 'input_size': d_emb, 'bidirectional': True, 'hidden_size': args.d_hid, 'num_layers': args.n_layers_enc }) # Make sentence encoder if any(isinstance(task, LanguageModelingTask) for task in tasks) or \ args.sent_enc == 'bilm': assert_for_log(args.sent_enc in ['rnn', 'bilm'], "Only RNNLM supported!") if args.elmo: assert_for_log(args.elmo_chars_only, "LM with full ELMo not supported") bilm = BiLMEncoder(d_emb, args.d_hid, args.d_hid, args.n_layers_enc) sent_encoder = SentenceEncoder( vocab, embedder, args.n_layers_highway, bilm, skip_embs=args.skip_embs, dropout=args.dropout, sep_embs_for_skip=args.sep_embs_for_skip, cove_layer=cove_layer) d_sent = 2 * args.d_hid log.info("Using BiLM architecture for shared encoder!") elif args.sent_enc == 'bow': sent_encoder = BoWSentEncoder(vocab, embedder) log.info("Using BoW architecture for shared encoder!") assert_for_log( not args.skip_embs, "Skip connection not currently supported with `bow` encoder.") d_sent = d_emb elif args.sent_enc == 'rnn': sent_rnn = s2s_e.by_name('lstm').from_params(copy.deepcopy(rnn_params)) sent_encoder = SentenceEncoder( vocab, embedder, args.n_layers_highway, sent_rnn, skip_embs=args.skip_embs, dropout=args.dropout, sep_embs_for_skip=args.sep_embs_for_skip, cove_layer=cove_layer) d_sent = 2 * args.d_hid log.info("Using BiLSTM architecture for shared encoder!") elif args.sent_enc == 'transformer': transformer = StackedSelfAttentionEncoder.from_params( copy.deepcopy(tfm_params)) sent_encoder = SentenceEncoder( vocab, embedder, args.n_layers_highway, transformer, dropout=args.dropout, skip_embs=args.skip_embs, cove_layer=cove_layer, sep_embs_for_skip=args.sep_embs_for_skip) log.info("Using Transformer architecture for shared encoder!") elif args.sent_enc == 'null': # Expose word representation layer (GloVe, ELMo, etc.) directly. assert_for_log( args.skip_embs, f"skip_embs must be set for " "'{args.sent_enc}' encoder") phrase_layer = NullPhraseLayer(rnn_params['input_size']) sent_encoder = SentenceEncoder( vocab, embedder, args.n_layers_highway, phrase_layer, skip_embs=args.skip_embs, dropout=args.dropout, sep_embs_for_skip=args.sep_embs_for_skip, cove_layer=cove_layer) d_sent = 0 # skip connection added below log.info("No shared encoder (just using word embeddings)!") else: assert_for_log(False, "No valid sentence encoder specified.") return sent_encoder, d_sent
def build_model(args, vocab, pretrained_embs, tasks): '''Build model according to arguments args: - args (TODO): object with attributes: - vocab (Vocab): - pretrained_embs (TODO): word embeddings to use returns ''' d_word, n_layers_highway = args.d_word, args.n_layers_highway # Build embedding layers if args.glove: word_embs = pretrained_embs train_embs = bool(args.train_words) else: log.info("\tLearning embeddings from scratch!") word_embs = None train_embs = True word_embedder = Embedding( vocab.get_vocab_size('tokens'), d_word, weight=word_embs, trainable=train_embs, padding_index=vocab.get_token_index('@@PADDING@@')) d_inp_phrase = 0 # Handle elmo and cove token_embedder = {} if args.elmo: log.info("\tUsing ELMo embeddings!") if args.deep_elmo: n_reps = 2 log.info("\tUsing deep ELMo embeddings!") else: n_reps = 1 if args.elmo_no_glove: log.info("\tNOT using GLoVe embeddings!") else: token_embedder = {"words": word_embedder} log.info("\tUsing GLoVe embeddings!") d_inp_phrase += d_word elmo = Elmo(options_file=ELMO_OPT_PATH, weight_file=ELMO_WEIGHTS_PATH, num_output_representations=n_reps) d_inp_phrase += 1024 else: elmo = None token_embedder = {"words": word_embedder} d_inp_phrase += d_word text_field_embedder = BasicTextFieldEmbedder(token_embedder) if "words" in token_embedder \ else None d_hid_phrase = args.d_hid if args.pair_enc != 'bow' else d_inp_phrase if args.cove: cove_layer = cove_lstm(n_vocab=vocab.get_vocab_size('tokens'), vectors=word_embedder.weight.data) d_inp_phrase += 600 log.info("\tUsing CoVe embeddings!") else: cove_layer = None # Build encoders phrase_layer = s2s_e.by_name('lstm').from_params( Params({ 'input_size': d_inp_phrase, 'hidden_size': d_hid_phrase, 'num_layers': args.n_layers_enc, 'bidirectional': True })) if args.pair_enc == 'bow': sent_encoder = BoWSentEncoder( vocab, text_field_embedder) # maybe should take in CoVe/ELMO? pair_encoder = None # model will just run sent_encoder on both inputs else: # output will be 2 x d_hid_phrase (+ deep elmo) sent_encoder = HeadlessSentEncoder(vocab, text_field_embedder, n_layers_highway, phrase_layer, dropout=args.dropout, cove_layer=cove_layer, elmo_layer=elmo) d_single = 2 * d_hid_phrase + (args.elmo and args.deep_elmo) * 1024 if args.pair_enc == 'simple': # output will be 4 x [2 x d_hid_phrase (+ deep elmo)] pair_encoder = HeadlessPairEncoder(vocab, text_field_embedder, n_layers_highway, phrase_layer, cove_layer=cove_layer, elmo_layer=elmo, dropout=args.dropout) d_pair = d_single elif args.pair_enc == 'attn': log.info("\tUsing attention!") d_inp_model = 4 * d_hid_phrase + (args.elmo and args.deep_elmo) * 1024 d_hid_model = d_hid_phrase # make it as large as the original sentence encoding modeling_layer = s2s_e.by_name('lstm').from_params( Params({ 'input_size': d_inp_model, 'hidden_size': d_hid_model, 'num_layers': 1, 'bidirectional': True })) pair_encoder = HeadlessPairAttnEncoder(vocab, text_field_embedder, n_layers_highway, phrase_layer, DotProductSimilarity(), modeling_layer, cove_layer=cove_layer, elmo_layer=elmo, deep_elmo=args.deep_elmo, dropout=args.dropout) d_pair = 2 * d_hid_phrase # output will be 4 x [2 x d_hid_model], where d_hid_model = 2 x d_hid_phrase # = 4 x [2 x 2 x d_hid_phrase] # Build model and classifiers model = MultiTaskModel(args, sent_encoder, pair_encoder) build_classifiers(tasks, model, d_pair, d_single) if args.cuda >= 0: model = model.cuda() return model
def build_model(args, vocab, pretrained_embs, tasks): '''Build model according to args ''' # Build embeddings. d_emb, embedder, cove_emb = build_embeddings(args, vocab, pretrained_embs) d_sent = args.d_hid # Build single sentence encoder: the main component of interest # Need special handling for language modeling tfm_params = Params({'input_dim': d_emb, 'hidden_dim': args.d_hid, 'projection_dim': args.d_tproj, 'feedforward_hidden_dim': args.d_ff, 'num_layers': args.n_layers_enc, 'num_attention_heads': args.n_heads}) rnn_params = Params({'input_size': d_emb, 'bidirectional': args.bidirectional, 'hidden_size': args.d_hid, 'num_layers': args.n_layers_enc}) if sum([isinstance(task, LanguageModelingTask) for task in tasks]): if args.bidirectional: rnn_params['bidirectional'] = False if args.sent_enc == 'rnn': fwd = s2s_e.by_name('lstm').from_params(copy.deepcopy(rnn_params)) bwd = s2s_e.by_name('lstm').from_params(copy.deepcopy(rnn_params)) elif args.sent_enc == 'transformer': fwd = MaskedStackedSelfAttentionEncoder.from_params(copy.deepcopy(tfm_params)) bwd = MaskedStackedSelfAttentionEncoder.from_params(copy.deepcopy(tfm_params)) sent_encoder = BiLMEncoder(vocab, embedder, args.n_layers_highway, fwd, bwd, dropout=args.dropout, skip_embs=args.skip_embs, cove_layer=cove_emb) else: # not bidirectional if args.sent_enc == 'rnn': fwd = s2s_e.by_name('lstm').from_params(copy.deepcopy(rnn_params)) elif args.sent_enc == 'transformer': fwd = MaskedStackedSelfAttentionEncoder.from_params(copy.deepcopy(tfm_params)) sent_encoder = SentenceEncoder(vocab, embedder, args.n_layers_highway, fwd, skip_embs=args.skip_embs, dropout=args.dropout, cove_layer=cove_emb) elif args.sent_enc == 'bow': sent_encoder = BoWSentEncoder(vocab, embedder) d_sent = d_emb elif args.sent_enc == 'rnn': sent_rnn = s2s_e.by_name('lstm').from_params(copy.deepcopy(rnn_params)) sent_encoder = SentenceEncoder(vocab, embedder, args.n_layers_highway, sent_rnn, skip_embs=args.skip_embs, dropout=args.dropout, cove_layer=cove_emb) d_sent = (1 + args.bidirectional) * args.d_hid elif args.sent_enc == 'transformer': transformer = StackedSelfAttentionEncoder.from_params(copy.deepcopy(tfm_params)) sent_encoder = SentenceEncoder(vocab, embedder, args.n_layers_highway, transformer, dropout=args.dropout, skip_embs=args.skip_embs, cove_layer=cove_emb) else: assert_for_log(False, "No valid sentence encoder specified.") d_sent += args.skip_embs * d_emb # Build model and classifiers model = MultiTaskModel(args, sent_encoder, vocab) if args.is_probing_task: # TODO: move this logic to preprocess.py; # current implementation reloads MNLI data, which is slow. train_task_whitelist, eval_task_whitelist = get_task_whitelist(args) tasks_to_build, _, _ = get_tasks(train_task_whitelist, eval_task_whitelist, args.max_seq_len, path=args.data_dir, scratch_path=args.exp_dir) else: tasks_to_build = tasks # Attach task-specific params. for task in set(tasks + tasks_to_build): task_params = get_task_specific_params(args, task.name) log.info("\tTask '%s' params: %s", task.name, json.dumps(task_params.as_dict(), indent=2)) # Store task-specific params in case we want to access later setattr(model, '%s_task_params' % task.name, task_params) # Actually construct modules. for task in tasks_to_build: build_module(task, model, d_sent, vocab, embedder, args) model = model.cuda() if args.cuda >= 0 else model log.info(model) param_count = 0 trainable_param_count = 0 for name, param in model.named_parameters(): param_count += np.prod(param.size()) if param.requires_grad: trainable_param_count += np.prod(param.size()) log.info("Total number of parameters: {}".format(param_count)) log.info("Number of trainable parameters: {}".format(trainable_param_count)) return model
def build_model(args, vocab, pretrained_embs, tasks): '''Build model according to arguments args: - args (TODO): object with attributes: - vocab (Vocab): - pretrained_embs (TODO): word embeddings to use returns ''' d_word, n_layers_highway = args.d_word, args.n_layers_highway # Build embedding layers if args.glove: word_embs = pretrained_embs train_embs = bool(args.train_words) else: log.info("\tLearning embeddings from scratch!") word_embs = None train_embs = True word_embedder = Embedding(vocab.get_vocab_size('tokens'), d_word, weight=word_embs, trainable=train_embs, padding_index=vocab.get_token_index('@@PADDING@@')) d_inp_phrase = 0 # Handle elmo and cove token_embedder = {} if args.elmo: log.info("\tUsing ELMo embeddings!") if args.deep_elmo: n_reps = 2 log.info("\tUsing deep ELMo embeddings!") else: n_reps = 1 if args.elmo_no_glove: log.info("\tNOT using GLoVe embeddings!") else: token_embedder = {"words": word_embedder} log.info("\tUsing GLoVe embeddings!") d_inp_phrase += d_word elmo = Elmo(options_file=ELMO_OPT_PATH, weight_file=ELMO_WEIGHTS_PATH, num_output_representations=n_reps) d_inp_phrase += 1024 else: elmo = None token_embedder = {"words": word_embedder} d_inp_phrase += d_word text_field_embedder = BasicTextFieldEmbedder(token_embedder) if "words" in token_embedder \ else None d_hid_phrase = args.d_hid if args.pair_enc != 'bow' else d_inp_phrase if args.cove: cove_layer = cove_lstm(n_vocab=vocab.get_vocab_size('tokens'), vectors=word_embedder.weight.data) d_inp_phrase += 600 log.info("\tUsing CoVe embeddings!") else: cove_layer = None # Build encoders phrase_layer = s2s_e.by_name('lstm').from_params(Params({'input_size': d_inp_phrase, 'hidden_size': d_hid_phrase, 'num_layers': args.n_layers_enc, 'bidirectional': True})) if args.pair_enc == 'bow': sent_encoder = BoWSentEncoder(vocab, text_field_embedder) # maybe should take in CoVe/ELMO? pair_encoder = None # model will just run sent_encoder on both inputs else: # output will be 2 x d_hid_phrase (+ deep elmo) sent_encoder = HeadlessSentEncoder(vocab, text_field_embedder, n_layers_highway, phrase_layer, dropout=args.dropout, cove_layer=cove_layer, elmo_layer=elmo) d_single = 2 * d_hid_phrase + (args.elmo and args.deep_elmo) * 1024 if args.pair_enc == 'simple': # output will be 4 x [2 x d_hid_phrase (+ deep elmo)] pair_encoder = HeadlessPairEncoder(vocab, text_field_embedder, n_layers_highway, phrase_layer, cove_layer=cove_layer, elmo_layer=elmo, dropout=args.dropout) d_pair = d_single elif args.pair_enc == 'attn': log.info("\tUsing attention!") d_inp_model = 4 * d_hid_phrase + (args.elmo and args.deep_elmo) * 1024 d_hid_model = d_hid_phrase # make it as large as the original sentence encoding modeling_layer = s2s_e.by_name('lstm').from_params(Params({'input_size': d_inp_model, 'hidden_size': d_hid_model, 'num_layers': 1, 'bidirectional': True})) pair_encoder = HeadlessPairAttnEncoder(vocab, text_field_embedder, n_layers_highway, phrase_layer, DotProductSimilarity(), modeling_layer, cove_layer=cove_layer, elmo_layer=elmo, deep_elmo=args.deep_elmo, dropout=args.dropout) d_pair = 2 * d_hid_phrase # output will be 4 x [2 x d_hid_model], where d_hid_model = 2 x d_hid_phrase # = 4 x [2 x 2 x d_hid_phrase] # Build model and classifiers model = MultiTaskModel(args, sent_encoder, pair_encoder) build_classifiers(tasks, model, d_pair, d_single) if args.cuda >= 0: model = model.cuda() return model
def build_sent_encoder(args, vocab, d_emb, tasks, embedder, cove_layer): # Build single sentence encoder: the main component of interest # Need special handling for language modeling # Note: sent_enc is expected to apply dropout to its input _and_ output if # needed. rnn_params = Params( { "input_size": d_emb, "bidirectional": True, "hidden_size": args.d_hid, "num_layers": args.n_layers_enc, } ) if args.sent_enc == "onlstm": onlayer = ONLSTMPhraseLayer( vocab, args.d_word, args.d_hid, args.n_layers_enc, args.onlstm_chunk_size, args.onlstm_dropconnect, args.onlstm_dropouti, args.dropout, args.onlstm_dropouth, embedder, args.batch_size, ) # The 'onlayer' acts as a phrase layer module for the larger SentenceEncoder module. sent_encoder = SentenceEncoder( vocab, embedder, args.n_layers_highway, onlayer.onlayer, skip_embs=args.skip_embs, dropout=args.dropout, sep_embs_for_skip=args.sep_embs_for_skip, cove_layer=cove_layer, ) d_sent = args.d_word log.info("Using ON-LSTM sentence encoder!") elif args.sent_enc == "prpn": prpnlayer = PRPNPhraseLayer( vocab, args.d_word, args.d_hid, args.n_layers_enc, args.n_slots, args.n_lookback, args.resolution, args.dropout, args.idropout, args.rdropout, args.res, embedder, args.batch_size, ) # The 'prpn' acts as a phrase layer module for the larger SentenceEncoder module. sent_encoder = SentenceEncoder( vocab, embedder, args.n_layers_highway, prpnlayer.prpnlayer, skip_embs=args.skip_embs, dropout=args.dropout, sep_embs_for_skip=args.sep_embs_for_skip, cove_layer=cove_layer, ) d_sent = args.d_word log.info("Using PRPN sentence encoder!") elif any(isinstance(task, LanguageModelingTask) for task in tasks) or args.sent_enc == "bilm": assert_for_log(args.sent_enc in ["rnn", "bilm"], "Only RNNLM supported!") assert_for_log( not ( args.input_module == "elmo" or args.input_module.startswith("bert") or args.input_module.startswith("xlnet") ), f"Using input_module = {args.input_module} for language modeling is probably not a " "good idea, since it allows the language model to use information from the right-hand " "context.", ) bilm = BiLMEncoder(d_emb, args.d_hid, args.d_hid, args.n_layers_enc) sent_encoder = SentenceEncoder( vocab, embedder, args.n_layers_highway, bilm, skip_embs=args.skip_embs, dropout=args.dropout, sep_embs_for_skip=args.sep_embs_for_skip, cove_layer=cove_layer, ) d_sent = 2 * args.d_hid elif args.sent_enc == "bow": sent_encoder = BoWSentEncoder(vocab, embedder) assert_for_log( not args.skip_embs, "Skip connection not currently supported with `bow` encoder." ) d_sent = d_emb elif args.sent_enc == "rnn": sent_rnn = s2s_e.by_name("lstm").from_params(copy.deepcopy(rnn_params)) sent_encoder = SentenceEncoder( vocab, embedder, args.n_layers_highway, sent_rnn, skip_embs=args.skip_embs, dropout=args.dropout, sep_embs_for_skip=args.sep_embs_for_skip, cove_layer=cove_layer, ) d_sent = 2 * args.d_hid elif args.sent_enc == "none": # Expose word representation layer (GloVe, ELMo, etc.) directly. assert_for_log( args.skip_embs, "skip_embs is false and sent_enc is none, " "which means that your token representations are zero-dimensional. Consider setting skip_embs.", ) phrase_layer = NullPhraseLayer(rnn_params["input_size"]) sent_encoder = SentenceEncoder( vocab, embedder, args.n_layers_highway, phrase_layer, skip_embs=args.skip_embs, dropout=args.dropout, sep_embs_for_skip=args.sep_embs_for_skip, cove_layer=cove_layer, ) d_sent = 0 else: assert_for_log( False, f"Shared encoder layer specification `{args.sent_enc}` not recognized." ) return sent_encoder, d_sent