def __init__(self, model_name, cache_dir, task_list):
     super(MultiTaskModel, self).__init__()
     cache = os.path.join(cache_dir, model_name)
     self.transformer = XLNetModel.from_pretrained(model_name,
                                                   cache_dir=cache)
     self.transformer_config = self.transformer.config
     self.dropout = DropoutWrapper(self.transformer_config.dropout)
     self.decoderID = {}  #模型内部的task_id与decoder_id的映射
     # self.decoder = {}
     self.decoder_list = nn.ModuleList()
     for innerid, task in enumerate(task_list):
         if task[1] == TaskType["classification"]:  # task[1] = tasktype
             classifier = Classification(self.transformer_config)
             # classifier = Classification(self.transformer_config)
             print("use simple classification")
             self.decoder_list.append(classifier)
         elif task[1] == TaskType["SANclassification"]:
             classifier = SANClassifier(self.transformer_config.hidden_size,
                                        self.transformer_config.hidden_size,
                                        label_size=1,
                                        dropout=self.dropout)
             print("use SANClassifier")
             self.decoder_list.append(classifier)
         else:
             pass
         self.decoderID[task[0]] = innerid
예제 #2
0
    def __init__(self, opt, bert_config=None):
        super(SANBertNetwork, self).__init__()
        self.dropout_list = nn.ModuleList()
        self.bert_config = BertConfig.from_dict(opt)
        self.bert = BertModel(self.bert_config)
        if opt.get('dump_feature', False):
            self.opt = opt
            return
        if opt['update_bert_opt'] > 0:
            for p in self.bert.parameters():
                p.requires_grad = False
        mem_size = self.bert_config.hidden_size
        self.decoder_opt = opt['answer_opt']
        self.scoring_list = nn.ModuleList()
        labels = [int(ls) for ls in opt['label_size'].split(',')]
        task_dropout_p = opt['tasks_dropout_p']
        self.bert_pooler = None

        for task, lab in enumerate(labels):
            decoder_opt = self.decoder_opt[task]
            dropout = DropoutWrapper(task_dropout_p[task], opt['vb_dropout'])
            self.dropout_list.append(dropout)
            if decoder_opt == 1:
                out_proj = SANClassifier(mem_size, mem_size, lab, opt, prefix='answer', dropout=dropout)
                self.scoring_list.append(out_proj)
            else:
                out_proj = nn.Linear(self.bert_config.hidden_size, lab)
                self.scoring_list.append(out_proj)

        self.opt = opt
        self._my_init()
        self.set_embed(opt)
예제 #3
0
    def __init__(self, opt, bert_config=None):
        super(SANBertNetwork, self).__init__()
        self.dropout_list = nn.ModuleList()
        self.encoder_type = opt['encoder_type']
        if opt['encoder_type'] == EncoderModelType.ROBERTA:
            from fairseq.models.roberta import RobertaModel
            self.bert = RobertaModel.from_pretrained(opt['init_checkpoint'])
            hidden_size = self.bert.args.encoder_embed_dim
            self.pooler = LinearPooler(hidden_size)
        else:
            self.bert_config = BertConfig.from_dict(opt)
            self.bert = BertModel(self.bert_config)
            hidden_size = self.bert_config.hidden_size

        if opt.get('dump_feature', False):
            self.opt = opt
            return
        if opt['update_bert_opt'] > 0:
            for p in self.bert.parameters():
                p.requires_grad = False
        self.decoder_opt = opt['answer_opt']
        self.task_types = opt["task_types"]
        self.scoring_list = nn.ModuleList()
        labels = [int(ls) for ls in opt['label_size'].split(',')]
        task_dropout_p = opt['tasks_dropout_p']

        for task, lab in enumerate(labels):
            decoder_opt = self.decoder_opt[task]
            task_type = self.task_types[task]
            dropout = DropoutWrapper(task_dropout_p[task], opt['vb_dropout'])
            self.dropout_list.append(dropout)
            if task_type == TaskType.Span:
                assert decoder_opt != 1
                out_proj = nn.Linear(hidden_size, 2)
            elif task_type == TaskType.SeqenceLabeling:
                out_proj = nn.Linear(hidden_size, lab)
            elif task_type == TaskType.MaskLM:
                if opt['encoder_type'] == EncoderModelType.ROBERTA:
                    # TODO: xiaodl
                    out_proj = MaskLmHeader(
                        self.bert.embeddings.word_embeddings.weight)
                else:
                    out_proj = MaskLmHeader(
                        self.bert.embeddings.word_embeddings.weight)
            else:
                if decoder_opt == 1:
                    out_proj = SANClassifier(hidden_size,
                                             hidden_size,
                                             lab,
                                             opt,
                                             prefix='answer',
                                             dropout=dropout)
                else:
                    out_proj = nn.Linear(hidden_size, lab)
            self.scoring_list.append(out_proj)

        self.opt = opt
        self._my_init()
예제 #4
0
    def __init__(self, opt, bert_config=None,
                 use_parse=False, embedding_matrix=None, token2idx=None, stx_parse_dim=None, unked_words=None,
                 use_generic_features=False, num_generic_features=None, use_domain_features=False, num_domain_features=None, feature_dim=None):
        super(SANBertNetwork, self).__init__()
        self.dropout_list = []
        self.bert_config = BertConfig.from_dict(opt)
        self.bert = BertModel(self.bert_config)
        if opt['update_bert_opt'] > 0:
            for p in self.bert.parameters():
                p.requires_grad = False
        mem_size = self.bert_config.hidden_size
        self.scoring_list = nn.ModuleList()
        labels = [int(ls) for ls in opt['label_size'].split(',')]
        task_dropout_p = opt['tasks_dropout_p']
        self.bert_pooler = None

        self.use_parse = use_parse
        self.stx_parse_dim = stx_parse_dim
        self.use_generic_features = use_generic_features
        self.use_domain_features = use_domain_features

        clf_dim = self.bert_config.hidden_size
        if self.use_parse:
            self.treelstm = BinaryTreeLSTM(self.stx_parse_dim, embedding_matrix.clone(), token2idx, unked_words=unked_words)
            parse_clf_dim = self.stx_parse_dim * 2
            clf_dim += parse_clf_dim
            self.parse_clf = nn.Linear(parse_clf_dim, labels[0])
        if self.use_generic_features:
            self.generic_feature_proj = nn.Linear(num_generic_features, num_generic_features * feature_dim)
            generic_feature_clf_dim = num_generic_features * feature_dim
            clf_dim += generic_feature_clf_dim
            self.generic_feature_clf = nn.Linear(generic_feature_clf_dim, labels[0])
        if self.use_domain_features:
            self.domain_feature_proj = nn.Linear(num_domain_features, num_domain_features * feature_dim)
            domain_feature_clf_dim = num_domain_features * feature_dim
            clf_dim += domain_feature_clf_dim
            self.domain_feature_clf = nn.Linear(domain_feature_clf_dim, labels[0])

        assert len(labels) == 1
        for task, lab in enumerate(labels):
            dropout = DropoutWrapper(task_dropout_p[task], opt['vb_dropout'])
            self.dropout_list.append(dropout)
            out_proj = nn.Linear(self.bert_config.hidden_size, lab)
            self.scoring_list.append(out_proj)

        self.opt = opt
        self._my_init()
        self.set_embed(opt)
        if embedding_matrix is not None and self.use_parse:
            self.treelstm.embedding.weight = nn.Parameter(embedding_matrix)  # set again b/c self._my_init() overwrites it
예제 #5
0
    def __init__(self, x_size, y_size, opt, prefix='decoder', dropout=None):
        super(Classifier, self).__init__()
        self.opt = opt
        if dropout is None:
            self.dropout = DropoutWrapper(opt.get('{}_dropout_p'.format(prefix), 0))
        else:
            self.dropout = dropout
        self.merge_opt = opt.get('{}_merge_opt'.format(prefix), 0)
        self.weight_norm_on = opt.get('{}_weight_norm_on'.format(prefix), False)

        if self.merge_opt == 1:
            self.proj = nn.Linear(x_size * 4, y_size)
        else:
            self.proj = nn.Linear(x_size * 2, y_size)

        if self.weight_norm_on:
            self.proj = weight_norm(self.proj)
예제 #6
0
    def __init__(self,
                 x_size,
                 h_size,
                 label_size,
                 opt={},
                 prefix='decoder',
                 dropout=None):
        super(SANClassifier, self).__init__()
        if dropout is None:
            self.dropout = DropoutWrapper(
                opt.get('{}_dropout_p'.format(self.prefix), 0))
        else:
            self.dropout = dropout
        self.prefix = prefix
        self.query_wsum = SelfAttnWrapper(x_size,
                                          prefix='mem_cum',
                                          opt=opt,
                                          dropout=self.dropout)
        self.attn = FlatSimilarityWrapper(x_size, h_size, prefix, opt,
                                          self.dropout)
        self.rnn_type = '{}{}'.format(
            opt.get('{}_rnn_type'.format(prefix), 'gru').upper(), 'Cell')
        self.rnn = getattr(nn, self.rnn_type)(x_size, h_size)
        self.num_turn = opt.get('{}_num_turn'.format(prefix), 5)
        self.opt = opt
        self.mem_random_drop = opt.get('{}_mem_drop_p'.format(prefix), 0)
        self.mem_type = opt.get('{}_mem_type'.format(prefix), 0)
        self.weight_norm_on = opt.get('{}_weight_norm_on'.format(prefix),
                                      False)
        self.label_size = label_size
        self.dump_state = opt.get('dump_state_on', False)
        self.alpha = Parameter(torch.zeros(1, 1), requires_grad=False)
        if self.weight_norm_on:
            self.rnn = WN(self.rnn)

        self.classifier = Classifier(x_size,
                                     self.label_size,
                                     opt,
                                     prefix=prefix,
                                     dropout=self.dropout)
예제 #7
0
    def __init__(self, opt, bert_config=None, initial_from_local=False):
        super(SANBertNetwork, self).__init__()
        self.dropout_list = nn.ModuleList()

        if opt['encoder_type'] not in EncoderModelType._value2member_map_:
            raise ValueError("encoder_type is out of pre-defined types")
        self.encoder_type = opt['encoder_type']
        self.preloaded_config = None

        literal_encoder_type = EncoderModelType(self.encoder_type).name.lower()
        config_class, model_class, tokenizer_class = MODEL_CLASSES[
            literal_encoder_type]
        self.preloaded_config = config_class.from_dict(
            opt)  # load config from opt
        self.preloaded_config.output_hidden_states = True  # return all hidden states
        self.bert = model_class(self.preloaded_config)
        hidden_size = self.bert.config.hidden_size

        if opt.get('dump_feature', False):
            self.opt = opt
            return
        if opt['update_bert_opt'] > 0:
            for p in self.bert.parameters():
                p.requires_grad = False

        task_def_list = opt['task_def_list']
        self.task_def_list = task_def_list
        self.decoder_opt = []
        self.task_types = []
        for task_id, task_def in enumerate(task_def_list):
            self.decoder_opt.append(
                generate_decoder_opt(task_def.enable_san, opt['answer_opt']))
            self.task_types.append(task_def.task_type)

        # create output header
        self.scoring_list = nn.ModuleList()
        self.dropout_list = nn.ModuleList()
        for task_id in range(len(task_def_list)):
            task_def: TaskDef = task_def_list[task_id]
            lab = task_def.n_class
            decoder_opt = self.decoder_opt[task_id]
            task_type = self.task_types[task_id]
            task_dropout_p = opt[
                'dropout_p'] if task_def.dropout_p is None else task_def.dropout_p
            dropout = DropoutWrapper(task_dropout_p, opt['vb_dropout'])
            self.dropout_list.append(dropout)
            task_obj = tasks.get_task_obj(task_def)
            if task_obj is not None:
                out_proj = task_obj.train_build_task_layer(decoder_opt,
                                                           hidden_size,
                                                           lab,
                                                           opt,
                                                           prefix='answer',
                                                           dropout=dropout)
            elif task_type == TaskType.Span:
                assert decoder_opt != 1
                out_proj = nn.Linear(hidden_size, 2)
            elif task_type == TaskType.SeqenceLabeling:
                out_proj = nn.Linear(hidden_size, lab)
            elif task_type == TaskType.MaskLM:
                if opt['encoder_type'] == EncoderModelType.ROBERTA:
                    # TODO: xiaodl
                    out_proj = MaskLmHeader(
                        self.bert.embeddings.word_embeddings.weight)
                else:
                    out_proj = MaskLmHeader(
                        self.bert.embeddings.word_embeddings.weight)
            else:
                if decoder_opt == 1:
                    out_proj = SANClassifier(hidden_size,
                                             hidden_size,
                                             lab,
                                             opt,
                                             prefix='answer',
                                             dropout=dropout)
                else:
                    out_proj = nn.Linear(hidden_size, lab)
            self.scoring_list.append(out_proj)

        self.opt = opt
        self._my_init()
        # if not loading from local, loading model weights from pre-trained model, after initialization
        if not initial_from_local:
            config_class, model_class, tokenizer_class = MODEL_CLASSES[
                literal_encoder_type]
            self.bert = model_class.from_pretrained(
                opt['init_checkpoint'], config=self.preloaded_config)
예제 #8
0
 def __init__(self, hidden_size, dropout_p=0.1, actf='tanh'):
     super(Pooler, self).__init__()
     self.dense = nn.Linear(hidden_size, hidden_size)
     self.activation = activation(actf)
     self.dropout = DropoutWrapper(dropout_p=dropout_p)
예제 #9
0
 def __init__(self, hidden_size, dropout_p):
     super().__init__()
     my_dropout = DropoutWrapper(dropout_p, False)
     self.self_att = SelfAttnWrapper(hidden_size, dropout=my_dropout)
     self.dense = nn.Linear(hidden_size, hidden_size)
     self.activation = nn.Tanh()
예제 #10
0
    def __init__(self, opt, bert_config=None, initial_from_local=False):
        super(SANBertNetwork, self).__init__()
        self.dropout_list = nn.ModuleList()

        if opt["encoder_type"] not in EncoderModelType._value2member_map_:
            raise ValueError("encoder_type is out of pre-defined types")
        self.encoder_type = opt["encoder_type"]
        self.preloaded_config = None

        literal_encoder_type = EncoderModelType(self.encoder_type).name.lower()
        config_class, model_class, _ = MODEL_CLASSES[literal_encoder_type]
        if not initial_from_local:
            # self.bert = model_class.from_pretrained(opt['init_checkpoint'], config=self.preloaded_config)
            self.bert = model_class.from_pretrained(
                opt["init_checkpoint"], cache_dir=opt["transformer_cache"])
        else:
            self.preloaded_config = config_class.from_dict(
                opt)  # load config from opt
            self.preloaded_config.output_hidden_states = (
                True  # return all hidden states
            )
            self.bert = model_class(self.preloaded_config)

        hidden_size = self.bert.config.hidden_size

        if opt.get("dump_feature", False):
            self.config = opt
            return
        if opt["update_bert_opt"] > 0:
            for p in self.bert.parameters():
                p.requires_grad = False

        task_def_list = opt["task_def_list"]
        self.task_def_list = task_def_list
        self.decoder_opt = []
        self.task_types = []
        for task_id, task_def in enumerate(task_def_list):
            self.decoder_opt.append(
                generate_decoder_opt(task_def.enable_san, opt["answer_opt"]))
            self.task_types.append(task_def.task_type)

        # create output header
        self.scoring_list = nn.ModuleList()
        self.dropout_list = nn.ModuleList()
        for task_id in range(len(task_def_list)):
            task_def: TaskDef = task_def_list[task_id]
            lab = task_def.n_class
            decoder_opt = self.decoder_opt[task_id]
            task_type = self.task_types[task_id]
            task_dropout_p = (opt["dropout_p"] if task_def.dropout_p is None
                              else task_def.dropout_p)
            dropout = DropoutWrapper(task_dropout_p, opt["vb_dropout"])
            self.dropout_list.append(dropout)
            task_obj = tasks.get_task_obj(task_def)
            if task_obj is not None:
                # Move this to task_obj
                self.pooler = Pooler(hidden_size,
                                     dropout_p=opt["dropout_p"],
                                     actf=opt["pooler_actf"])
                out_proj = task_obj.train_build_task_layer(decoder_opt,
                                                           hidden_size,
                                                           lab,
                                                           opt,
                                                           prefix="answer",
                                                           dropout=dropout)
            elif task_type == TaskType.Span:
                assert decoder_opt != 1
                out_proj = nn.Linear(hidden_size, 2)
            elif task_type == TaskType.SpanYN:
                assert decoder_opt != 1
                out_proj = nn.Linear(hidden_size, 2)
            elif task_type == TaskType.SeqenceLabeling:
                out_proj = nn.Linear(hidden_size, lab)
            # elif task_type == TaskType.MaskLM:
            #     if opt["encoder_type"] == EncoderModelType.ROBERTA:
            #         # TODO: xiaodl
            #         out_proj = MaskLmHeader(self.bert.embeddings.word_embeddings.weight)
            #     else:
            #         out_proj = MaskLmHeader(self.bert.embeddings.word_embeddings.weight)
            elif task_type == TaskType.SeqenceGeneration:
                # use orginal header
                out_proj = None
            elif task_type == TaskType.ClozeChoice:
                self.pooler = Pooler(hidden_size,
                                     dropout_p=opt["dropout_p"],
                                     actf=opt["pooler_actf"])
                out_proj = nn.Linear(hidden_size, lab)
            else:
                if decoder_opt == 1:
                    out_proj = SANClassifier(
                        hidden_size,
                        hidden_size,
                        lab,
                        opt,
                        prefix="answer",
                        dropout=dropout,
                    )
                else:
                    out_proj = nn.Linear(hidden_size, lab)
            self.scoring_list.append(out_proj)
        self.config = opt