def make_base_model(model_opt, fields, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ # embedding w_embeddings = make_word_embeddings(model_opt, fields["src"].vocab, fields) if model_opt.ent_vec_size > 0: ent_embedding = make_embeddings(fields["ent"].vocab, model_opt.ent_vec_size) else: ent_embedding = None # Make question encoder. q_encoder = make_encoder(model_opt, w_embeddings, ent_embedding) if model_opt.seprate_encoder: q_tgt_encoder = make_encoder(model_opt, w_embeddings, ent_embedding) q_encoder = (q_encoder, q_tgt_encoder) if model_opt.layout_token_prune: w_token_embeddings = make_word_embeddings(model_opt, fields["src"].vocab, fields) q_token_encoder = make_encoder(model_opt, w_token_embeddings, ent_embedding) token_pruner = nn.Sequential( nn.Dropout(model_opt.dropout), # skip special tokens nn.Linear( model_opt.rnn_size, len(fields['lay'].vocab) - len(table.IO.special_token_list))) else: q_token_encoder = None token_pruner = None # Make layout decoder models. lay_field = 'lay' lay_embeddings = make_embeddings(fields[lay_field].vocab, model_opt.decoder_input_size) lay_decoder, lay_classifier = make_decoder(model_opt, fields, lay_field, lay_embeddings, model_opt.decoder_input_size) # Make target decoder models. if model_opt.no_share_emb_layout_encoder: lay_encoder_embeddings = make_embeddings(fields[lay_field].vocab, model_opt.decoder_input_size) else: lay_encoder_embeddings = lay_embeddings if model_opt.no_lay_encoder: lay_encoder = lay_embeddings else: lay_encoder = make_layout_encoder(model_opt, lay_encoder_embeddings) q_co_attention = make_q_co_attention(model_opt) lay_co_attention = make_lay_co_attention(model_opt) tgt_embeddings = make_embeddings(fields['lay'].vocab, model_opt.decoder_input_size) tgt_decoder, tgt_classifier = make_decoder(model_opt, fields, 'tgt', None, model_opt.decoder_input_size) # Make ParserModel model = ParserModel(q_encoder, q_token_encoder, token_pruner, lay_decoder, lay_classifier, lay_encoder, q_co_attention, lay_co_attention, tgt_embeddings, tgt_decoder, tgt_classifier, model_opt) if checkpoint is not None: print('Loading model') model.load_state_dict(checkpoint['model']) model.cuda() return model
def make_base_model(model_opt, fields, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ # embedding w_embeddings = make_word_embeddings(model_opt, fields["src"].vocab, fields) if model_opt.ent_vec_size > 0: ent_embedding = make_embeddings(fields["ent"].vocab, model_opt.ent_vec_size) else: ent_embedding = None # Make question encoder. q_encoder = make_encoder(model_opt, w_embeddings, ent_embedding) # Make table encoder. tbl_encoder = make_table_encoder(model_opt, w_embeddings) co_attention = make_co_attention(model_opt) agg_classifier = nn.Sequential(nn.Dropout(model_opt.dropout), nn.Linear(model_opt.rnn_size, len(agg_ops)), nn.LogSoftmax()) sel_match = MatchScorer(2 * model_opt.rnn_size, model_opt.score_size, model_opt.dropout) lay_classifier = nn.Sequential( nn.Dropout(model_opt.dropout), nn.Linear(model_opt.rnn_size, len(fields['lay'].vocab)), nn.LogSoftmax()) # embedding # layout encoding if model_opt.layout_encode == 'rnn': cond_embedding = make_embeddings(fields["cond_op"].vocab, model_opt.cond_op_vec_size) lay_encoder = make_encoder(model_opt, cond_embedding) else: cond_embedding = make_embeddings(fields["cond_op"].vocab, model_opt.rnn_size) lay_encoder = None # Make cond models. cond_decoder = make_cond_decoder(model_opt) cond_col_match = CondMatchScorer( MatchScorer(2 * model_opt.rnn_size, model_opt.score_size, model_opt.dropout)) cond_span_l_match = CondMatchScorer( MatchScorer(2 * model_opt.rnn_size, model_opt.score_size, model_opt.dropout)) cond_span_r_match = CondMatchScorer( MatchScorer(3 * model_opt.rnn_size, model_opt.score_size, model_opt.dropout)) # Make ParserModel pad_word_index = fields["src"].vocab.stoi[table.IO.PAD_WORD] model = ParserModel(q_encoder, tbl_encoder, co_attention, agg_classifier, sel_match, lay_classifier, cond_embedding, lay_encoder, cond_decoder, cond_col_match, cond_span_l_match, cond_span_r_match, model_opt, pad_word_index) if checkpoint is not None: print('Loading model') model.load_state_dict(checkpoint['model']) # model.cuda() return model
def make_base_model(model_args, fields, checkpoint=None): logger.info(" * make word embeddings") w_embeddings = make_word_embeddings(model_args, vocab=fields["src"].vocab) if model_args.ent_vec_size > 0: logger.info(" * make entity type embeddings") ent_embedding = make_embeddings(fields["ent"].vocab, model_args.ent_vec_size) else: ent_embedding = None logger.info(" * make question encoder") q_encoder = make_encoder(model_args, w_embeddings, ent_embedding) # TODO??? if model_args.separate_encoder: q_tgt_encoder = make_encoder(model_args, w_embeddings, ent_embedding) q_encoder = (q_encoder, q_tgt_encoder) if model_args.layout_token_prune: logger.info(" * make layout token pruner") w_token_embeddings = make_word_embeddings(model_args, vocab=fields["src"].vocab) q_token_encoder = make_encoder(model_args, w_token_embeddings, ent_embedding) token_pruner = nn.Sequential( nn.Dropout(model_args.dropout), nn.Linear(model_args.rnn_size, len(fields['lay'].vocab) - len(table.IO.SPECIAL_TOKEN_LIST)) # skip special tokens ) else: q_token_encoder = None token_pruner = None logger.info(" * make layout embeddings") lay_embeddings = make_embeddings(fields['lay'].vocab, model_args.decoder_input_size) logger.info(" * make layout decoder + classifier") lay_decoder, lay_classifier = make_decoder(model_args, fields, 'lay', lay_embeddings, model_args.decoder_input_size) if model_args.no_share_emb_layout_encoder: lay_encoder_embeddings = make_embeddings(fields['lay'].vocab, model_args.decoder_input_size) else: lay_encoder_embeddings = lay_embeddings if model_args.no_lay_encoder: lay_encoder = lay_embeddings else: lay_encoder = make_layout_encoder(model_args, lay_encoder_embeddings) logger.info(" * make question co-attention") q_co_attention = make_q_co_attention(model_args) logger.info(" * make layout co-attention") lay_co_attention = make_lay_co_attention(model_args) logger.info(" * make target embeddings") tgt_embeddings = make_embeddings(fields['tgt'].vocab, model_args.decoder_input_size) logger.info(" * make target decoder + classifier") tgt_decoder, tgt_classifier = make_decoder(model_args, fields, 'tgt', None, model_args.decoder_input_size) logger.info(" * make ParserModel") model = ParserModel(q_encoder, q_token_encoder, token_pruner, lay_decoder, lay_classifier, lay_encoder, q_co_attention, lay_co_attention, tgt_embeddings, tgt_decoder, tgt_classifier, model_args) if checkpoint is not None: logger.info(' * loading model from checkpoint [%s]' % model_args.model_path) model.load_state_dict(checkpoint['model']) if model_args.cuda: logger.info(" * use cuda") model.cuda() else: logger.info(" * use cpu") return model