def __init__(self, vision_size, hidden_size, dec_hidden_size, dropout_ratio, bidirectional, update, bert_n_layers, reverse_input, top_lstm, vl_layers, bert_type, vocab_size, action_space): super(BertAddActionSepPreTrain, self).__init__() self.bert = BertAddSepEncoder(vision_size, hidden_size, dec_hidden_size, dropout_ratio, bidirectional, update, bert_n_layers, reverse_input, top_lstm, vl_layers, bert_type) self.config = self.bert.config self.dropout = nn.Dropout(dropout_ratio) if not self.bert.top_lstm: self.next_action = NextActionPrediction( self.bert.transformer_hidden_size, action_space) else: self.num_directions = 2 if bidirectional else 1 self.in_size = hidden_size * self.num_directions self.next_action = NextActionPrediction(self.in_size, action_space) self.config.hidden_size = self.in_size self.lang_criterion = nn.CrossEntropyLoss(ignore_index=0) self.act_criterion = nn.CrossEntropyLoss(ignore_index=-1) self.mlmhead = BertOnlyMLMHead(self.config)
def __init__(self, config, mask_word_id=0, search_beam_size=1, length_penalty=1.0, eos_id=0, sos_id=0, forbid_duplicate_ngrams=False, forbid_ignore_set=None, ngram_size=3, min_len=0): super(UnilmForSeq2SeqDecode, self).__init__(config) self.bert = UnilmModelIncr(config) self.cls = BertOnlyMLMHead(config) self.crit_mask_lm = nn.CrossEntropyLoss(reduction='none') self.mask_word_id = mask_word_id self.search_beam_size = search_beam_size self.length_penalty = length_penalty self.eos_id = eos_id self.sos_id = sos_id self.forbid_duplicate_ngrams = forbid_duplicate_ngrams self.forbid_ignore_set = forbid_ignore_set self.ngram_size = ngram_size self.min_len = min_len self.init_weights() self.tie_weights()
def __init__(self, config): super(DicAddActionPreTrain, self).__init__(config) self.config = config self.bert = DicModel(config) self.next_action = NextActionPrediction(self.config.hidden_size, self.config.action_space) self.criterion = nn.CrossEntropyLoss(ignore_index=-1) self.mlmhead = BertOnlyMLMHead(self.config) self.init_weights() self.tie_weights()
def __init__(self, vision_size, hidden_size, dec_hidden_size, dropout_ratio, bidirectional, update, bert_n_layers, reverse_input, top_lstm, vl_layers, bert_type, vocab_size): super(BertLangPreTrain, self).__init__() self.bert = BertLangEncoder(vision_size, hidden_size, dec_hidden_size, dropout_ratio, bidirectional, update, bert_n_layers, reverse_input, top_lstm, vl_layers, bert_type) self.config = self.bert.config self.criterion = nn.CrossEntropyLoss(ignore_index=0) self.mlmhead = BertOnlyMLMHead(self.config)
def __init__(self, config): super(UnilmForSeq2Seq, self).__init__(config) self.bert = UnilmModel(config) self.cls = BertOnlyMLMHead(config) self.crit_mask_lm = nn.CrossEntropyLoss(reduction='none') if hasattr(config, 'label_smoothing') and config.label_smoothing: self.crit_mask_lm_smoothed = LabelSmoothingLoss( config.label_smoothing, config.vocab_size, ignore_index=0, reduction='none') else: self.crit_mask_lm_smoothed = None self.init_weights() self.tie_weights()
def __init__(self, config): super(BertForMLM, self).__init__(config) self.bert = BertModel(config) self.cls = BertOnlyMLMHead(config) self.init_weights()
def __init__(self, config): ''' 추가적인 Pretraining을 진행하기 위한 모듈 ''' super(BertForMLM, self).__init__(config) self.bert = BertModel(config) self.cls = BertOnlyMLMHead(config) self.init_weights()