def __init__(self, model_name, cache_dir, task_list): super(MultiTaskModel, self).__init__() cache = os.path.join(cache_dir, model_name) self.transformer = XLNetModel.from_pretrained(model_name, cache_dir=cache) self.transformer_config = self.transformer.config self.dropout = DropoutWrapper(self.transformer_config.dropout) self.decoderID = {} #模型内部的task_id与decoder_id的映射 # self.decoder = {} self.decoder_list = nn.ModuleList() for innerid, task in enumerate(task_list): if task[1] == TaskType["classification"]: # task[1] = tasktype classifier = Classification(self.transformer_config) # classifier = Classification(self.transformer_config) print("use simple classification") self.decoder_list.append(classifier) elif task[1] == TaskType["SANclassification"]: classifier = SANClassifier(self.transformer_config.hidden_size, self.transformer_config.hidden_size, label_size=1, dropout=self.dropout) print("use SANClassifier") self.decoder_list.append(classifier) else: pass self.decoderID[task[0]] = innerid
def __init__(self, opt, bert_config=None): super(SANBertNetwork, self).__init__() self.dropout_list = nn.ModuleList() self.bert_config = BertConfig.from_dict(opt) self.bert = BertModel(self.bert_config) if opt.get('dump_feature', False): self.opt = opt return if opt['update_bert_opt'] > 0: for p in self.bert.parameters(): p.requires_grad = False mem_size = self.bert_config.hidden_size self.decoder_opt = opt['answer_opt'] self.scoring_list = nn.ModuleList() labels = [int(ls) for ls in opt['label_size'].split(',')] task_dropout_p = opt['tasks_dropout_p'] self.bert_pooler = None for task, lab in enumerate(labels): decoder_opt = self.decoder_opt[task] dropout = DropoutWrapper(task_dropout_p[task], opt['vb_dropout']) self.dropout_list.append(dropout) if decoder_opt == 1: out_proj = SANClassifier(mem_size, mem_size, lab, opt, prefix='answer', dropout=dropout) self.scoring_list.append(out_proj) else: out_proj = nn.Linear(self.bert_config.hidden_size, lab) self.scoring_list.append(out_proj) self.opt = opt self._my_init() self.set_embed(opt)
def __init__(self, opt, bert_config=None): super(SANBertNetwork, self).__init__() self.dropout_list = nn.ModuleList() self.encoder_type = opt['encoder_type'] if opt['encoder_type'] == EncoderModelType.ROBERTA: from fairseq.models.roberta import RobertaModel self.bert = RobertaModel.from_pretrained(opt['init_checkpoint']) hidden_size = self.bert.args.encoder_embed_dim self.pooler = LinearPooler(hidden_size) else: self.bert_config = BertConfig.from_dict(opt) self.bert = BertModel(self.bert_config) hidden_size = self.bert_config.hidden_size if opt.get('dump_feature', False): self.opt = opt return if opt['update_bert_opt'] > 0: for p in self.bert.parameters(): p.requires_grad = False self.decoder_opt = opt['answer_opt'] self.task_types = opt["task_types"] self.scoring_list = nn.ModuleList() labels = [int(ls) for ls in opt['label_size'].split(',')] task_dropout_p = opt['tasks_dropout_p'] for task, lab in enumerate(labels): decoder_opt = self.decoder_opt[task] task_type = self.task_types[task] dropout = DropoutWrapper(task_dropout_p[task], opt['vb_dropout']) self.dropout_list.append(dropout) if task_type == TaskType.Span: assert decoder_opt != 1 out_proj = nn.Linear(hidden_size, 2) elif task_type == TaskType.SeqenceLabeling: out_proj = nn.Linear(hidden_size, lab) elif task_type == TaskType.MaskLM: if opt['encoder_type'] == EncoderModelType.ROBERTA: # TODO: xiaodl out_proj = MaskLmHeader( self.bert.embeddings.word_embeddings.weight) else: out_proj = MaskLmHeader( self.bert.embeddings.word_embeddings.weight) else: if decoder_opt == 1: out_proj = SANClassifier(hidden_size, hidden_size, lab, opt, prefix='answer', dropout=dropout) else: out_proj = nn.Linear(hidden_size, lab) self.scoring_list.append(out_proj) self.opt = opt self._my_init()
def __init__(self, opt, bert_config=None, use_parse=False, embedding_matrix=None, token2idx=None, stx_parse_dim=None, unked_words=None, use_generic_features=False, num_generic_features=None, use_domain_features=False, num_domain_features=None, feature_dim=None): super(SANBertNetwork, self).__init__() self.dropout_list = [] self.bert_config = BertConfig.from_dict(opt) self.bert = BertModel(self.bert_config) if opt['update_bert_opt'] > 0: for p in self.bert.parameters(): p.requires_grad = False mem_size = self.bert_config.hidden_size self.scoring_list = nn.ModuleList() labels = [int(ls) for ls in opt['label_size'].split(',')] task_dropout_p = opt['tasks_dropout_p'] self.bert_pooler = None self.use_parse = use_parse self.stx_parse_dim = stx_parse_dim self.use_generic_features = use_generic_features self.use_domain_features = use_domain_features clf_dim = self.bert_config.hidden_size if self.use_parse: self.treelstm = BinaryTreeLSTM(self.stx_parse_dim, embedding_matrix.clone(), token2idx, unked_words=unked_words) parse_clf_dim = self.stx_parse_dim * 2 clf_dim += parse_clf_dim self.parse_clf = nn.Linear(parse_clf_dim, labels[0]) if self.use_generic_features: self.generic_feature_proj = nn.Linear(num_generic_features, num_generic_features * feature_dim) generic_feature_clf_dim = num_generic_features * feature_dim clf_dim += generic_feature_clf_dim self.generic_feature_clf = nn.Linear(generic_feature_clf_dim, labels[0]) if self.use_domain_features: self.domain_feature_proj = nn.Linear(num_domain_features, num_domain_features * feature_dim) domain_feature_clf_dim = num_domain_features * feature_dim clf_dim += domain_feature_clf_dim self.domain_feature_clf = nn.Linear(domain_feature_clf_dim, labels[0]) assert len(labels) == 1 for task, lab in enumerate(labels): dropout = DropoutWrapper(task_dropout_p[task], opt['vb_dropout']) self.dropout_list.append(dropout) out_proj = nn.Linear(self.bert_config.hidden_size, lab) self.scoring_list.append(out_proj) self.opt = opt self._my_init() self.set_embed(opt) if embedding_matrix is not None and self.use_parse: self.treelstm.embedding.weight = nn.Parameter(embedding_matrix) # set again b/c self._my_init() overwrites it
def __init__(self, x_size, y_size, opt, prefix='decoder', dropout=None): super(Classifier, self).__init__() self.opt = opt if dropout is None: self.dropout = DropoutWrapper(opt.get('{}_dropout_p'.format(prefix), 0)) else: self.dropout = dropout self.merge_opt = opt.get('{}_merge_opt'.format(prefix), 0) self.weight_norm_on = opt.get('{}_weight_norm_on'.format(prefix), False) if self.merge_opt == 1: self.proj = nn.Linear(x_size * 4, y_size) else: self.proj = nn.Linear(x_size * 2, y_size) if self.weight_norm_on: self.proj = weight_norm(self.proj)
def __init__(self, x_size, h_size, label_size, opt={}, prefix='decoder', dropout=None): super(SANClassifier, self).__init__() if dropout is None: self.dropout = DropoutWrapper( opt.get('{}_dropout_p'.format(self.prefix), 0)) else: self.dropout = dropout self.prefix = prefix self.query_wsum = SelfAttnWrapper(x_size, prefix='mem_cum', opt=opt, dropout=self.dropout) self.attn = FlatSimilarityWrapper(x_size, h_size, prefix, opt, self.dropout) self.rnn_type = '{}{}'.format( opt.get('{}_rnn_type'.format(prefix), 'gru').upper(), 'Cell') self.rnn = getattr(nn, self.rnn_type)(x_size, h_size) self.num_turn = opt.get('{}_num_turn'.format(prefix), 5) self.opt = opt self.mem_random_drop = opt.get('{}_mem_drop_p'.format(prefix), 0) self.mem_type = opt.get('{}_mem_type'.format(prefix), 0) self.weight_norm_on = opt.get('{}_weight_norm_on'.format(prefix), False) self.label_size = label_size self.dump_state = opt.get('dump_state_on', False) self.alpha = Parameter(torch.zeros(1, 1), requires_grad=False) if self.weight_norm_on: self.rnn = WN(self.rnn) self.classifier = Classifier(x_size, self.label_size, opt, prefix=prefix, dropout=self.dropout)
def __init__(self, opt, bert_config=None, initial_from_local=False): super(SANBertNetwork, self).__init__() self.dropout_list = nn.ModuleList() if opt['encoder_type'] not in EncoderModelType._value2member_map_: raise ValueError("encoder_type is out of pre-defined types") self.encoder_type = opt['encoder_type'] self.preloaded_config = None literal_encoder_type = EncoderModelType(self.encoder_type).name.lower() config_class, model_class, tokenizer_class = MODEL_CLASSES[ literal_encoder_type] self.preloaded_config = config_class.from_dict( opt) # load config from opt self.preloaded_config.output_hidden_states = True # return all hidden states self.bert = model_class(self.preloaded_config) hidden_size = self.bert.config.hidden_size if opt.get('dump_feature', False): self.opt = opt return if opt['update_bert_opt'] > 0: for p in self.bert.parameters(): p.requires_grad = False task_def_list = opt['task_def_list'] self.task_def_list = task_def_list self.decoder_opt = [] self.task_types = [] for task_id, task_def in enumerate(task_def_list): self.decoder_opt.append( generate_decoder_opt(task_def.enable_san, opt['answer_opt'])) self.task_types.append(task_def.task_type) # create output header self.scoring_list = nn.ModuleList() self.dropout_list = nn.ModuleList() for task_id in range(len(task_def_list)): task_def: TaskDef = task_def_list[task_id] lab = task_def.n_class decoder_opt = self.decoder_opt[task_id] task_type = self.task_types[task_id] task_dropout_p = opt[ 'dropout_p'] if task_def.dropout_p is None else task_def.dropout_p dropout = DropoutWrapper(task_dropout_p, opt['vb_dropout']) self.dropout_list.append(dropout) task_obj = tasks.get_task_obj(task_def) if task_obj is not None: out_proj = task_obj.train_build_task_layer(decoder_opt, hidden_size, lab, opt, prefix='answer', dropout=dropout) elif task_type == TaskType.Span: assert decoder_opt != 1 out_proj = nn.Linear(hidden_size, 2) elif task_type == TaskType.SeqenceLabeling: out_proj = nn.Linear(hidden_size, lab) elif task_type == TaskType.MaskLM: if opt['encoder_type'] == EncoderModelType.ROBERTA: # TODO: xiaodl out_proj = MaskLmHeader( self.bert.embeddings.word_embeddings.weight) else: out_proj = MaskLmHeader( self.bert.embeddings.word_embeddings.weight) else: if decoder_opt == 1: out_proj = SANClassifier(hidden_size, hidden_size, lab, opt, prefix='answer', dropout=dropout) else: out_proj = nn.Linear(hidden_size, lab) self.scoring_list.append(out_proj) self.opt = opt self._my_init() # if not loading from local, loading model weights from pre-trained model, after initialization if not initial_from_local: config_class, model_class, tokenizer_class = MODEL_CLASSES[ literal_encoder_type] self.bert = model_class.from_pretrained( opt['init_checkpoint'], config=self.preloaded_config)
def __init__(self, hidden_size, dropout_p=0.1, actf='tanh'): super(Pooler, self).__init__() self.dense = nn.Linear(hidden_size, hidden_size) self.activation = activation(actf) self.dropout = DropoutWrapper(dropout_p=dropout_p)
def __init__(self, hidden_size, dropout_p): super().__init__() my_dropout = DropoutWrapper(dropout_p, False) self.self_att = SelfAttnWrapper(hidden_size, dropout=my_dropout) self.dense = nn.Linear(hidden_size, hidden_size) self.activation = nn.Tanh()
def __init__(self, opt, bert_config=None, initial_from_local=False): super(SANBertNetwork, self).__init__() self.dropout_list = nn.ModuleList() if opt["encoder_type"] not in EncoderModelType._value2member_map_: raise ValueError("encoder_type is out of pre-defined types") self.encoder_type = opt["encoder_type"] self.preloaded_config = None literal_encoder_type = EncoderModelType(self.encoder_type).name.lower() config_class, model_class, _ = MODEL_CLASSES[literal_encoder_type] if not initial_from_local: # self.bert = model_class.from_pretrained(opt['init_checkpoint'], config=self.preloaded_config) self.bert = model_class.from_pretrained( opt["init_checkpoint"], cache_dir=opt["transformer_cache"]) else: self.preloaded_config = config_class.from_dict( opt) # load config from opt self.preloaded_config.output_hidden_states = ( True # return all hidden states ) self.bert = model_class(self.preloaded_config) hidden_size = self.bert.config.hidden_size if opt.get("dump_feature", False): self.config = opt return if opt["update_bert_opt"] > 0: for p in self.bert.parameters(): p.requires_grad = False task_def_list = opt["task_def_list"] self.task_def_list = task_def_list self.decoder_opt = [] self.task_types = [] for task_id, task_def in enumerate(task_def_list): self.decoder_opt.append( generate_decoder_opt(task_def.enable_san, opt["answer_opt"])) self.task_types.append(task_def.task_type) # create output header self.scoring_list = nn.ModuleList() self.dropout_list = nn.ModuleList() for task_id in range(len(task_def_list)): task_def: TaskDef = task_def_list[task_id] lab = task_def.n_class decoder_opt = self.decoder_opt[task_id] task_type = self.task_types[task_id] task_dropout_p = (opt["dropout_p"] if task_def.dropout_p is None else task_def.dropout_p) dropout = DropoutWrapper(task_dropout_p, opt["vb_dropout"]) self.dropout_list.append(dropout) task_obj = tasks.get_task_obj(task_def) if task_obj is not None: # Move this to task_obj self.pooler = Pooler(hidden_size, dropout_p=opt["dropout_p"], actf=opt["pooler_actf"]) out_proj = task_obj.train_build_task_layer(decoder_opt, hidden_size, lab, opt, prefix="answer", dropout=dropout) elif task_type == TaskType.Span: assert decoder_opt != 1 out_proj = nn.Linear(hidden_size, 2) elif task_type == TaskType.SpanYN: assert decoder_opt != 1 out_proj = nn.Linear(hidden_size, 2) elif task_type == TaskType.SeqenceLabeling: out_proj = nn.Linear(hidden_size, lab) # elif task_type == TaskType.MaskLM: # if opt["encoder_type"] == EncoderModelType.ROBERTA: # # TODO: xiaodl # out_proj = MaskLmHeader(self.bert.embeddings.word_embeddings.weight) # else: # out_proj = MaskLmHeader(self.bert.embeddings.word_embeddings.weight) elif task_type == TaskType.SeqenceGeneration: # use orginal header out_proj = None elif task_type == TaskType.ClozeChoice: self.pooler = Pooler(hidden_size, dropout_p=opt["dropout_p"], actf=opt["pooler_actf"]) out_proj = nn.Linear(hidden_size, lab) else: if decoder_opt == 1: out_proj = SANClassifier( hidden_size, hidden_size, lab, opt, prefix="answer", dropout=dropout, ) else: out_proj = nn.Linear(hidden_size, lab) self.scoring_list.append(out_proj) self.config = opt