Beispiel #1
0
 def __init__(self, opt, lbstokens, emb_matrix=None, cls=None):
     self.opt = opt
     self.emb_matrix = emb_matrix
     if cls is None:
         self.model = GCNClassifier(opt, lbstokens, emb_matrix=emb_matrix)
     else:
         self.model = GCNClassifier(opt,
                                    lbstokens,
                                    emb_matrix=emb_matrix,
                                    cls=cls)
     self.cls = cls
     self.rel_types = len(constant.LABEL_TO_ID)
     self.loss_matrix = torch.zeros((self.rel_types, self.rel_types),
                                    requires_grad=False)
     self.miss_matrix = torch.zeros((self.rel_types, self.rel_types),
                                    requires_grad=False)
     # self.alpha=torch.full((1,),0.1,requires_grad=True)
     # self.beta=torch.full((1,),0.1,requires_grad=True)
     #self.model = nn.DataParallel(GCNClassifier(opt, emb_matrix=emb_matrix),device_ids=[0,1,2,3])
     #self.model.half()
     print(self.get_parameter_number(self.model))
     self.soft_criterion = SoftCrossEntropyLoss()
     self.criterion = nn.CrossEntropyLoss()
     self.parameters = [
         p for p in self.model.parameters() if p.requires_grad
     ]
     if opt['cuda']:
         self.model.cuda()
         self.criterion.cuda()
         self.loss_matrix = self.loss_matrix.cuda()
         self.miss_matrix = self.miss_matrix.cuda()
     self.optimizer = torch_utils.get_optimizer(
         opt['optim'], [{
             'params': self.parameters
         }], opt['lr'])
Beispiel #2
0
    def __init__(self, opt, emb_matrix=None):

        self.opt = opt
        self.model = Our_Model(opt, emb_matrix)

        # # pass weights per class, each class corresponds to its index
        # weights = [opt['weight_no_rel']]
        # rel_classes_weights = [opt["weight_rest"]] * 41
        # weights.extend(rel_classes_weights)
        # print("Using weights", weights)
        # assert len(weights) == 42
        # class_weights = torch.FloatTensor(weights).to("cuda")

        self.criterion = nn.CrossEntropyLoss()  # weight=class_weights
        self.parameters = [
            p for p in self.model.parameters() if p.requires_grad
        ]
        # print(self.parameters)
        # print(len(self.parameters))

        if opt['cuda']:
            self.model.to("cuda")
            self.criterion.to("cuda")

        self.optimizer = torch_utils.get_optimizer(opt['optim'],
                                                   self.parameters, opt['lr'])
Beispiel #3
0
    def __init__(self, opt, emb_matrix=None, data_dir='dataset/tacred/', init_temp=160, alpha=1, train_batch=None):
        self.opt = opt
        self.model = PositionAwareRNN(opt, emb_matrix)
        self.criterion = nn.CrossEntropyLoss()
        self.parameters = [p for p in self.model.parameters() if p.requires_grad]
        scores = []
        self.data_dir = data_dir
        self.temp = init_temp
        self.alpha = alpha
        with open(os.path.join(data_dir, 'kl_prob.json'), 'r') as f:
            kl = json.load(f)
        for i in range(len(kl)):
            scores.append(kl[str(i)][0])
        self.kl = torch.Tensor(scores)
        self.kl = torch.exp(-self.kl / init_temp) * alpha
        self.kl[[i for i in range(len(kl))], [i for i in range(len(kl))]] = 0

        self.true_rel = None
        if train_batch is not None:
            self.true_rel = {}
            for batch in train_batch:
                for i in range(len(batch[0])):
                    sub = batch[-1][i][0]
                    obj = batch[-1][i][1]
                    if (sub, obj) not in self.true_rel:
                        self.true_rel[(sub, obj)] = set()
                    self.true_rel[(sub, obj)].add((batch[7][i]).item())
        self.kl = F.normalize(self.kl, p=1, dim=-1)       # 归一化??
        if opt['cuda']:
            self.model.cuda()
            self.criterion.cuda()
            self.kl = self.kl.cuda()
        self.optimizer = torch_utils.get_optimizer(opt['optim'], self.parameters, opt['lr'], weight_decay=opt['weight_decay'])
Beispiel #4
0
 def __init__(self, args, emb_matrix=None):
     self.args = args
     self.emb_matrix = emb_matrix
     self.model = RGATABSA(args, emb_matrix=emb_matrix)
     self.parameters = [p for p in self.model.parameters() if p.requires_grad]
     self.model.cuda()
     self.optimizer = torch_utils.get_optimizer(args.optim, self.parameters, args.lr)
Beispiel #5
0
 def __init__(self, opt, emb_matrix=None):
     self.opt = opt
     self.model = PositionAwareRNN(opt, emb_matrix)
     self.criterion = nn.CrossEntropyLoss()
     self.parameters = [p for p in self.model.parameters() if p.requires_grad]
     if opt['cuda']:
         self.model.cuda()
         self.criterion.cuda()
     self.optimizer = torch_utils.get_optimizer(opt['optim'], self.parameters, opt['lr'])
 def __init__(self, opt):
     self.opt = opt
     self.model = BasicClassifier(opt)
     self.parameters = [
         p for p in self.model.parameters() if p.requires_grad
     ]
     if opt['cuda']:
         self.model.cuda()
     self.optimizer = torch_utils.get_optimizer(opt['optim'],
                                                self.parameters, opt['lr'])
Beispiel #7
0
 def __init__(self, opt):
     self.opt = opt
     self.model = BiGI(opt)
     self.criterion = nn.BCELoss()
     if opt['cuda']:
         self.model.cuda()
         self.criterion.cuda()
     self.optimizer = torch_utils.get_optimizer(opt['optim'],
                                                self.model.parameters(),
                                                opt['lr'])
     self.epoch_rec_loss = []
     self.epoch_dgi_loss = []
Beispiel #8
0
 def __init__(self, opt, emb_matrix=None):
     self.opt = opt
     self.emb_matrix = emb_matrix
     self.model = GCNClassifier(opt, emb_matrix=emb_matrix)
     self.sigmoid = nn.Sigmoid()
     self.criterion = nn.BCEWithLogitsLoss()
     self.parameters = [p for p in self.model.parameters() if p.requires_grad]
     if opt['cuda']:
         self.model.cuda()
         self.sigmoid.cuda()
         self.criterion.cuda()
     self.optimizer = torch_utils.get_optimizer(opt['optim'], self.parameters, opt['lr'])
Beispiel #9
0
 def __init__(self, opt, bert_model):
     self.opt = opt
     self.bert_model = bert_model
     self.model = BiLSTMCNN(opt, bert_model)
     self.subj_criterion = nn.BCELoss(reduction='none')
     self.obj_criterion = nn.CrossEntropyLoss(reduction='none')
     self.parameters = [p for p in self.model.parameters() if p.requires_grad]
     if opt['cuda']:
         self.model.cuda()
         self.subj_criterion.cuda()
         self.obj_criterion.cuda()
     self.optimizer = torch_utils.get_optimizer(opt['optim'], self.parameters, opt['lr'], opt['weight_decay'])
Beispiel #10
0
 def __init__(self, opt, emb_matrix=None):
     self.opt = opt
     self.model = PositionAwareRNN(opt, emb_matrix)
     self.criterion = nn.CrossEntropyLoss()
     self.parameters = [p for p in self.model.parameters() if p.requires_grad]
     if opt["cuda"]:
         print("starting cuda.")
         self.model.cuda()
         self.criterion.cuda()
     self.optimizer = torch_utils.get_optimizer(
         opt["optim"], self.parameters, opt["lr"]
     )
Beispiel #11
0
    def __init__(self, opt, emb_matrix=None, asp_emb_matrix=None):
        self.opt = opt
        self.emb_matrix = emb_matrix
        self.model = AspModel(opt, emb_matrix, asp_emb_matrix)
        self.parameters = [
            p for p in self.model.parameters() if p.requires_grad
        ]

        if opt['cuda']:
            self.model.cuda()
        self.optimizer = torch_utils.get_optimizer(opt['optim'],
                                                   self.parameters, opt['lr'])
Beispiel #12
0
 def __init__(self, opt, model_name=None):
     self.opt = opt
     self.model = BertPositionAwareRNN(opt, model_name=model_name)
     self.criterion = nn.CrossEntropyLoss()
     self.parameters = [
         p for p in self.model.parameters() if p.requires_grad
     ]
     if opt['cuda']:
         print("starting cuda.")
         self.model.cuda()
         self.criterion.cuda()
     self.optimizer = torch_utils.get_optimizer(opt['optim'],
                                                self.parameters, opt['lr'])
Beispiel #13
0
 def __init__(self, opt, emb_matrix=None):
     self.opt = opt
     self.classifier = SynGCN(opt, emb_matrix)
     self.decoder = Decoder(opt)
     self.criterion = nn.CrossEntropyLoss()
     self.criterion_d = nn.NLLLoss(ignore_index=constant.PAD_ID)
     self.parameters = [p for p in self.classifier.parameters() if p.requires_grad] + [p for p in self.decoder.parameters() if p.requires_grad]
     if opt['cuda']:
         self.classifier.cuda()
         self.decoder.cuda()
         self.criterion.cuda()
         self.criterion_d.cuda()
     self.optimizer = torch_utils.get_optimizer(opt['optim'], self.parameters, opt['lr'])
Beispiel #14
0
 def __init__(self, opt, emb_matrix=None):
     self.opt = opt
     self.emb_matrix = emb_matrix
     self.model = GCNClassifier(opt, emb_matrix=emb_matrix)
     self.criterion = nn.CrossEntropyLoss(
         weight=torch.from_numpy(np.array([1.0, 5.0])).float())
     self.parameters = [
         p for p in self.model.parameters() if p.requires_grad
     ]
     if opt['cuda']:
         self.model.cuda()
         self.criterion.cuda()
     self.optimizer = torch_utils.get_optimizer(opt['optim'],
                                                self.parameters, opt['lr'])
Beispiel #15
0
 def __init__(self, opt, predictor):
     # options
     self.opt = opt
     # encoding model
     self.model = predictor
     # loss function
     self.criterion = nn.CrossEntropyLoss()
     # all parameters of the model
     self.parameters = [p for p in self.model.parameters() if p.requires_grad]
     # whether moving all data to gpu
     if opt['cuda']:
         self.model.cuda()
         self.criterion.cuda()
     # intialize the optimizer
     self.optimizer = torch_utils.get_optimizer(opt['optim'], self.parameters, opt['lr'])
 def __init__(self, opt, emb_matrix=None):
     """
     GCN Trainer
     :param opt:
     :param emb_matrix: word embedding matrix, torch tensor
     """
     self.opt = opt
     self.emb_matrix = emb_matrix
     self.model = GCNClassifier(opt, emb_matrix=emb_matrix)
     self.criterion = nn.CrossEntropyLoss()
     self.parameters = [
         p for p in self.model.parameters() if p.requires_grad
     ]  # only update some parameter, because we may not update some parameter
     self.optimizer = torch_utils.get_optimizer(opt['optim'],
                                                self.parameters, opt['lr'])
Beispiel #17
0
 def __init__(self, opt, emb_matrix=None):
     self.opt = opt
     self.model = PositionAwareRNN(opt, emb_matrix)
     self.criterion = nn.CrossEntropyLoss()
     # self.attn_loss = nn.KLDivLoss(reduction='sum')
     self.attn_loss = nn.CosineSimilarity()
     self.loss_scaler = opt["loss_scaler"]
     self.parameters = [p for p in self.model.parameters() if p.requires_grad]
     if opt["cuda"]:
         print("starting cuda.")
         self.model.cuda()
         self.criterion.cuda()
     self.optimizer = torch_utils.get_optimizer(
         opt["optim"], self.parameters, opt["lr"]
     )
Beispiel #18
0
 def __init__(self, opt, selector):
     # options
     self.opt = opt
     # encoding model
     self.model = selector
     # all parameters of the model
     self.parameters = [
         p for p in self.model.parameters() if p.requires_grad
     ]
     # whether moving all data to gpu
     if opt['cuda']:
         self.model.cuda()
     # intialize the optimizer
     self.optimizer = torch_utils.get_optimizer(opt['optim'],
                                                self.parameters, opt['lr'])
Beispiel #19
0
 def __init__(self, opt, emb_matrix=None, ucca_embedding_matrix=None):
     self.opt = opt
     self.emb_matrix = emb_matrix
     self.ucca_embedding_matrix = ucca_embedding_matrix
     self.model = GCNClassifier(opt,
                                emb_matrix=emb_matrix,
                                ucca_embedding_matrix=ucca_embedding_matrix)
     self.criterion = nn.CrossEntropyLoss()
     self.parameters = [
         p for p in self.model.parameters() if p.requires_grad
     ]
     if opt['cuda']:
         self.model.cuda()
         self.criterion.cuda()
     self.optimizer = get_optimizer(opt['optim'], self.parameters,
                                    opt['lr'])
Beispiel #20
0
 def __init__(self, opt, emb_matrix=None):
     self.opt = opt
     self.emb_matrix = emb_matrix
     self.model = GCNClassifier(opt, emb_matrix=emb_matrix)
     self.criterion = nn.CrossEntropyLoss(reduction="none")
     self.parameters = [
         p for p in self.model.parameters() if p.requires_grad
     ]
     self.crf = CRF(self.opt['num_class'], batch_first=True)
     self.bc = nn.BCELoss()
     if opt['cuda']:
         self.model.cuda()
         self.criterion.cuda()
         self.crf.cuda()
         self.bc.cuda()
     self.optimizer = torch_utils.get_optimizer(opt['optim'],
                                                self.parameters, opt['lr'])
Beispiel #21
0
    def __init__(self, opt, knowledge_emb=None, word_emb=None):
        self.opt = opt
        self.knowledge_emb = knowledge_emb
        self.word_emb = word_emb
        self.model = GCNClassifier(opt,
                                   knowledge_emb=knowledge_emb,
                                   word_emb=word_emb)
        #print(self.model)
        self.criterion = nn.BCEWithLogitsLoss()

        self.parameters = [
            p for p in self.model.parameters() if p.requires_grad
        ]
        if opt['cuda']:
            self.model.cuda()
            self.criterion.cuda()
        self.optimizer = torch_utils.get_optimizer(opt['optim'],
                                                   self.parameters, opt['lr'])
Beispiel #22
0
 def __init__(self, opt, emb_matrix=None, joint=False):
     self.opt = opt
     self.model = model.BLSTM_CRF(opt, emb_matrix)
     if opt['crf']:
         print("Using CRF loss...")
         self.crit = crf.CRFLoss(opt['num_class'], True)
     else:
         self.crit = loss.SequenceLoss(opt['num_class'])
     self.parameters = [
         p for m in (self.model, self.crit) for p in m.parameters()
         if p.requires_grad
     ]
     if opt['cuda']:
         self.model.cuda()
         self.crit.cuda()
     self.optimizer = torch_utils.get_optimizer(opt['optim'],
                                                self.parameters, opt['lr'],
                                                opt.get('momentum', 0))
Beispiel #23
0
 def __init__(self, opt, emb_matrix=None):
     self.opt = opt
     self.model = PositionAwareRNN(opt, emb_matrix)
     self.criterion = nn.CrossEntropyLoss()
     # self.criterion2 = torch.nn.BCELoss(size_average=True)
     self.criterion2 = nn.CrossEntropyLoss(
         weight=torch.Tensor([1.0, 1.0]).cuda())
     self.criterion3 = nn.NLLLoss()
     self.mse = nn.MSELoss()
     self.parameters = [
         p for p in self.model.parameters() if p.requires_grad
     ]
     if opt['cuda']:
         self.model.cuda()
         self.criterion.cuda()
     self.optimizer = torch_utils.get_optimizer(opt['optim'],
                                                self.parameters, opt['lr'])
     self.att_w = torch.eye(len(constant.LABEL_TO_ID)).cuda()
     # self.att_w[0][0] = 1
     self.epoch = 0
Beispiel #24
0
    def __init__(self, opt, model, model_type='predictor'):
        self.opt = opt
        self.model_type = model_type
        self.model = model
        if model_type == 'predictor':
            self.criterion = nn.CrossEntropyLoss(reduction='none')
        elif model_type == 'pointwise':
            self.criterion = nn.BCEWithLogitsLoss()
        elif model_type == 'pairwise':
            self.criterion = nn.BCEWithLogitsLoss(
            )  # Only a placeholder, will NOT use this criterion
        self.parameters = [
            p for p in self.model.parameters() if p.requires_grad
        ]

        if opt['cuda']:
            self.model.cuda()
            self.criterion.cuda()

        self.optimizer = torch_utils.get_optimizer(opt['optim'],
                                                   self.parameters, opt['lr'])
Beispiel #25
0
 def __init__(self, opt=None, vocab=None, emb_matrix=None, model_file=None):
     if model_file is not None:
         # load model, config and vocab directly from file
         self.load(model_file)
     else:
         # otherwise build model from scratch
         self.opt = opt
         # use pointer-generator
         self.model = Seq2SeqWithCopyModel(opt, emb_matrix=emb_matrix)
         self.vocab = vocab
     # by default use 0 weight for coverage loss
     self.criterion = SequenceLoss(self.vocab.size,
                                   self.opt.get('cov_alpha', 0))
     self.parameters = [
         p for p in self.model.parameters() if p.requires_grad
     ]
     if self.opt['cuda']:
         self.model.cuda()
         self.criterion.cuda()
     self.optimizer = torch_utils.get_optimizer(self.parameters,
                                                self.opt['lr'])
Beispiel #26
0
 def __init__(self, opt, emb_matrix=None):
     self.opt = opt
     self.emb_matrix = emb_matrix
     #self.model = GCNClassifier(opt, emb_matrix=emb_matrix)
     self.model = DGAModel(opt, emb_matrix=emb_matrix)
     # self.criterion = nn.CrossEntropyLoss()
     self.alpha = []
     with open("alpha.txt", 'r') as f:
         for line in f.readlines():
             self.alpha.append(float(line.strip().split('\t')[1]))
     assert len(self.alpha) == len(constant.LABEL_TO_ID)
     self.alpha = np.array(self.alpha)
     #self.criterion = FocalLoss(len(constant.LABEL_TO_ID), size_average=True)
     self.criterion = nn.CrossEntropyLoss()
     self.parameters = [
         p for p in self.model.parameters() if p.requires_grad
     ]
     if opt['cuda']:
         self.model.cuda()
         self.criterion.cuda()
     self.optimizer = torch_utils.get_optimizer(opt['optim'],
                                                self.parameters, opt["lr"],
                                                opt["conv_l2"])
Beispiel #27
0
    def build_model(self, model_kwargs):
        """Build the necessary model.

        Args:
            model_kwargs (dict): model args
        """
        # model parameters
        if model_kwargs['TYPE'] == 'vs_gcnn':
            params = [
                self.num_classes, model_kwargs['IN_CHANNELS'],
                self.num_viewgroups, model_kwargs['DROPOUT'],
                model_kwargs['LAYER_CHANNELS']
            ]
        else:
            raise ValueError("Invalid Model. Model Type should be \
                one of %s" % ', '.join(MODEL_TYPE.keys()))

        # model
        self.model = MODEL_TYPE[model_kwargs['TYPE']](*params)
        self.loss = get_loss_fn(model_kwargs['LOSS'])
        self.step_epochs = np.array([
            math.ceil(float(self.args['EPOCHS'] * x))
            for x in self.args['STEP']
        ])

        # optimizer
        optimizer_args = model_kwargs['OPTIMIZER']
        self.lr = optimizer_args['LR']
        self.model.apply(weights_init)
        self.model.to(self.cuda)
        self.optimizer = get_optimizer(optimizer_args['TYPE'])(
            self.model.parameters(),
            lr=self.lr,
            weight_decay=optimizer_args['WEIGHT_DECAY'])

        if model_kwargs['PRETRAIN_NAME'] != '':
            self.load_model()
Beispiel #28
0
 def __init__(self, args, emb_matrix=None):
     self.args = args
     self.emb_matrix = emb_matrix
     self.model = RGATABSA(args)
     self.parameters = [
         p for p in self.model.parameters() if p.requires_grad
     ]
     self.model.cuda()
     self.optimizer = torch_utils.get_optimizer(args.optim,
                                                self.parameters,
                                                args.lr,
                                                l2=1e-5)
     # '''
     bert_model = self.model.enc.encoder.Sent_encoder
     bert_params_dict = list(map(id, bert_model.parameters()))
     base_params = filter(lambda p: id(p) not in bert_params_dict,
                          self.model.parameters())
     # no_decay = ["bias", "LayerNorm.weight"]
     # optimizer_grouped_parameters = [
     #    {"params": [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],"weight_decay": args.l2,},
     #    {"params": [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": args.l2},
     #    {"params": base_params},
     #    {"params": bert_model.parameters(), "lr": args.bert_lr}
     # ]
     optimizer_grouped_parameters = [
         {
             "params": base_params
         },
         {
             "params": bert_model.parameters(),
             "lr": args.bert_lr
         },
     ]
     self.optimizer = torch.optim.Adam(optimizer_grouped_parameters,
                                       lr=args.lr,
                                       weight_decay=args.l2)
Beispiel #29
0
    def train(self, model_pattern, model_name):

        ori_model = model_pattern(config=self)
        if self.pretrain_model != None:
            ori_model.load_state_dict(torch.load(self.pretrain_model))
        ori_model.cuda()

        parameters = [p for p in ori_model.parameters() if p.requires_grad]

        optimizer = torch_utils.get_optimizer(self.optim, parameters, self.lr)

        scheduler = optim.lr_scheduler.ExponentialLR(optimizer, self.lr_decay)

        model = nn.DataParallel(ori_model)
        # 模块在多个GPU上进行并行化
        BCE = nn.BCEWithLogitsLoss(reduction='none')

        if not os.path.exists(self.checkpoint_dir):
            os.mkdir(self.checkpoint_dir)

        best_auc = 0.0
        best_f1 = 0.0
        best_epoch = 0

        model.train()

        global_step = 0
        total_loss = 0
        start_time = time.time()

        def logging(s, print_=True, log_=True):
            if print_:
                print(s)
            if log_:
                with open(os.path.join(os.path.join("log", model_name)),
                          'a+') as f_log:
                    f_log.write(s + '\n')

        dev_score_list = []
        f1 = 0
        dev_score_list.append(f1)

        for epoch in range(self.max_epoch):
            gc.collect()
            self.acc_NA.clear()
            self.acc_not_NA.clear()
            self.acc_total.clear()
            print("epoch:{}, Learning rate:{}".format(
                epoch, optimizer.param_groups[0]['lr']))

            epoch_start_time = time.time()

            for no, data in enumerate(self.get_train_batch()):
                context_idxs = data['context_idxs']
                context_pos = data['context_pos']
                h_mapping = data['h_mapping']
                t_mapping = data['t_mapping']
                relation_label = data['relation_label']
                input_lengths = data['input_lengths']
                relation_multi_label = data['relation_multi_label']
                relation_mask = data['relation_mask']
                context_ner = data['context_ner']
                context_char_idxs = data['context_char_idxs']
                ht_pair_pos = data['ht_pair_pos']
                context_seg = data['context_seg']

                dis_h_2_t = ht_pair_pos + 10
                dis_t_2_h = -ht_pair_pos + 10

                torch.cuda.empty_cache()

                context_idxs = context_idxs.cuda()
                context_pos = context_pos.cuda()
                context_ner = context_ner.cuda()
                #context_char_idxs = context_char_idxs.cuda()
                #input_lengths = input_lengths.cuda()
                h_mapping = h_mapping.cuda()
                t_mapping = t_mapping.cuda()
                relation_mask = relation_mask.cuda()
                dis_h_2_t = dis_h_2_t.cuda()
                dis_t_2_h = dis_t_2_h.cuda()

                node_position = data['node_position'].cuda()
                entity_position = data['entity_position'].cuda()
                node_sent_num = data['node_sent_num'].cuda()
                all_node_num = data['all_node_num'].cuda()
                entity_num = torch.Tensor(data['entity_num']).cuda()
                #sent_num = torch.Tensor(data['sent_num']).cuda()

                sdp_pos = data['sdp_position'].cuda()
                sdp_num = torch.Tensor(data['sdp_num']).cuda()

                predict_re = model(context_idxs, context_pos, context_ner,
                                   h_mapping, t_mapping, relation_mask,
                                   dis_h_2_t, dis_t_2_h, context_seg,
                                   node_position, entity_position,
                                   node_sent_num, all_node_num, entity_num,
                                   sdp_pos, sdp_num)

                relation_multi_label = relation_multi_label.cuda()

                loss = torch.sum(
                    BCE(predict_re, relation_multi_label) *
                    relation_mask.unsqueeze(2)) / torch.sum(relation_mask)

                output = torch.argmax(predict_re, dim=-1)
                output = output.data.cpu().numpy()

                optimizer.zero_grad()

                loss.backward()

                torch.nn.utils.clip_grad_norm_(model.parameters(),
                                               self.max_grad_norm)
                # 梯度裁剪
                optimizer.step()

                relation_label = relation_label.data.cpu().numpy()

                for i in range(output.shape[0]):
                    for j in range(output.shape[1]):
                        label = relation_label[i][j]
                        if label < 0:
                            break
                        if label == 0:
                            self.acc_NA.add(output[i][j] == label)
                        else:
                            self.acc_not_NA.add(output[i][j] == label)

                        self.acc_total.add(output[i][j] == label)

                global_step += 1
                total_loss += loss.item()

                if global_step % self.period == 0:
                    cur_loss = total_loss / self.period
                    elapsed = time.time() - start_time
                    logging(
                        '| epoch {:2d} | step {:4d} |  ms/b {:5.2f} | train loss {:5.3f} | NA acc: {:4.2f} | not NA acc: {:4.2f}  | tot acc: {:4.2f} '
                        .format(epoch, global_step,
                                elapsed * 1000 / self.period, cur_loss,
                                self.acc_NA.get(), self.acc_not_NA.get(),
                                self.acc_total.get()))
                    total_loss = 0
                    start_time = time.time()

            if epoch > self.evaluate_epoch:

                logging('-' * 89)
                eval_start_time = time.time()
                model.eval()

                f1, f1_ig, auc, pr_x, pr_y = self.test(model, model_name)

                model.train()
                logging('| epoch {:3d} | time: {:5.2f}s'.format(
                    epoch,
                    time.time() - eval_start_time))
                logging('-' * 89)

                if f1 > best_f1:
                    best_f1 = f1
                    best_auc = auc
                    best_epoch = epoch
                    path = os.path.join(self.checkpoint_dir, model_name)
                    torch.save(ori_model.state_dict(), path)
                    logging("best f1 is: {}, epoch is: {}, save path is: {}".
                            format(best_f1, best_epoch, path))

            if epoch > self.decay_epoch:  # and epoch < self.evaluate_epoch:# and epoch < self.evaluate_epoch:
                if self.optim == 'sgd' and f1 < dev_score_list[-1]:
                    self.lr *= self.lr_decay
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = self.lr

                if self.optim == 'adam' and optimizer.param_groups[0][
                        'lr'] > 1e-4:  #epoch < 30:# and f1 < dev_score_list[-1]:
                    scheduler.step()

            dev_score_list.append(f1)
            print("train time for epoch {}: {}".format(
                epoch,
                time.time() - epoch_start_time))

        print("Finish training")
        print("Best epoch = {} | F1 {}, auc = {}".format(
            best_epoch, best_f1, best_auc))
        print("Storing best result...")
        print("Finish storing")
Beispiel #30
0
def train(task_id, data, mnet, hnet, device, config, shared, writer, logger):
    """Train the hyper network using the task-specific loss plus a regularizer
    that should overcome catastrophic forgetting.

    :code:`loss = task_loss + beta * regularizer`.

    Args:
        task_id: The index of the task on which we train.
        data: The dataset handler.
        mnet: The model of the main network.
        hnet: The model of the hyper network. May be ``None``.
        device: Torch device (cpu or gpu).
        config: The command line arguments.
        shared (argparse.Namespace): Set of variables shared between functions.
        writer: The tensorboard summary writer.
        logger: The logger that should be used rather than the print method.
    """
    start_time = time()

    print('data: ', data)
    print('data.num_classes: ', data.num_classes)
    print('data.num_train_samples: ', data.num_train_samples)

    logger.info('Training network ...')

    mnet.train()
    if hnet is not None:
        hnet.train()

    #################
    ### Optimizer ###
    #################
    # Define the optimizers used to train main network and hypernet.
    if hnet is not None:
        theta_params = list(hnet.theta)
        if config.continue_emb_training:
            for i in range(task_id):  # for all previous task embeddings
                theta_params.append(hnet.get_task_emb(i))

        # Only for the current task embedding.
        # Important that this embedding is in a different optimizer in case
        # we use the lookahead.
        emb_optimizer = get_optimizer([hnet.get_task_emb(task_id)],
                                      config.lr,
                                      momentum=config.momentum,
                                      weight_decay=config.weight_decay,
                                      use_adam=config.use_adam,
                                      adam_beta1=config.adam_beta1,
                                      use_rmsprop=config.use_rmsprop)
    else:
        theta_params = mnet.weights
        emb_optimizer = None

    theta_optimizer = get_optimizer(theta_params,
                                    config.lr,
                                    momentum=config.momentum,
                                    weight_decay=config.weight_decay,
                                    use_adam=config.use_adam,
                                    adam_beta1=config.adam_beta1,
                                    use_rmsprop=config.use_rmsprop)

    ################################
    ### Learning rate schedulers ###
    ################################
    if config.plateau_lr_scheduler:
        assert (config.epochs != -1)
        # The scheduler config has been taken from here:
        # https://keras.io/examples/cifar10_resnet/
        # Note, we use 'max' instead of 'min' as we look at accuracy rather
        # than validation loss!
        plateau_scheduler_theta = optim.lr_scheduler.ReduceLROnPlateau( \
            theta_optimizer, 'max', factor=np.sqrt(0.1), patience=5,
            min_lr=0.5e-6, cooldown=0)
        plateau_scheduler_emb = None
        if emb_optimizer is not None:
            plateau_scheduler_emb = optim.lr_scheduler.ReduceLROnPlateau( \
                emb_optimizer, 'max', factor=np.sqrt(0.1), patience=5,
                min_lr=0.5e-6, cooldown=0)

    if config.lambda_lr_scheduler:
        assert (config.epochs != -1)

        def lambda_lr(epoch):
            """Multiplicative Factor for Learning Rate Schedule.

            Computes a multiplicative factor for the initial learning rate based
            on the current epoch. This method can be used as argument
            ``lr_lambda`` of class :class:`torch.optim.lr_scheduler.LambdaLR`.

            The schedule is inspired by the Resnet CIFAR-10 schedule suggested
            here https://keras.io/examples/cifar10_resnet/.

            Args:
                epoch (int): The number of epochs

            Returns:
                lr_scale (float32): learning rate scale
            """
            lr_scale = 1.
            if epoch > 180:
                lr_scale = 0.5e-3
            elif epoch > 160:
                lr_scale = 1e-3
            elif epoch > 120:
                lr_scale = 1e-2
            elif epoch > 80:
                lr_scale = 1e-1
            return lr_scale

        lambda_scheduler_theta = optim.lr_scheduler.LambdaLR(
            theta_optimizer, lambda_lr)
        lambda_scheduler_emb = None
        if emb_optimizer is not None:
            lambda_scheduler_emb = optim.lr_scheduler.LambdaLR(
                emb_optimizer, lambda_lr)

    ##############################
    ### Prepare CL Regularizer ###
    ##############################
    # Whether we will calculate the regularizer.
    calc_reg = task_id > 0 and not config.mnet_only and config.beta > 0 and \
        not config.train_from_scratch

    # Compute targets when the reg is activated and we are not training
    # the first task
    if calc_reg:
        if config.online_target_computation:
            # Compute targets for the regularizer whenever they are needed.
            # -> Computationally expensive.
            targets_hypernet = None
            prev_theta = [p.detach().clone() for p in hnet.theta]
            prev_task_embs = [p.detach().clone() for p in hnet.get_task_embs()]
        else:
            # Compute targets for the regularizer once and keep them all in
            # memory -> Memory expensive.
            targets_hypernet = hreg.get_current_targets(task_id, hnet)
            prev_theta = None
            prev_task_embs = None

        # If we do not want to regularize all outputs (in a multi-head setup).
        # Note, we don't care whether output heads other than the current one
        # change.
        regged_outputs = None
        if config.cl_scenario != 2:
            # FIXME We assume here that all tasks have the same output size.
            n_y = data.num_classes
            regged_outputs = [
                list(range(i * n_y, (i + 1) * n_y)) for i in range(task_id)
            ]

    # We need to tell the main network, which batch statistics to use, in case
    # batchnorm is used and we checkpoint the batchnorm stats.
    mnet_kwargs = {}
    if mnet.batchnorm_layers is not None:
        if config.bn_distill_stats:
            raise NotImplementedError()
        elif not config.bn_no_running_stats and \
                not config.bn_no_stats_checkpointing:
            # Specify current task as condition to select correct
            # running stats.
            mnet_kwargs['condition'] = task_id

    ######################
    ### Start training ###
    ######################

    iter_per_epoch = -1
    if config.epochs == -1:
        training_iterations = config.n_iter
    else:
        assert (config.epochs > 0)
        iter_per_epoch = int(np.ceil(data.num_train_samples / \
                                     config.batch_size))
        training_iterations = config.epochs * iter_per_epoch

    summed_iter_runtime = 0

    for i in range(training_iterations):
        ### Evaluate network.
        # We test the network before we run the training iteration.
        # That way, we can see the initial performance of the untrained network.
        if i % config.val_iter == 0:
            test(task_id,
                 data,
                 mnet,
                 hnet,
                 device,
                 shared,
                 config,
                 writer,
                 logger,
                 train_iter=i)
            mnet.train()
            if hnet is not None:
                hnet.train()

        if i % 200 == 0:
            logger.info('Training step: %d ...' % i)

        iter_start_time = time()

        theta_optimizer.zero_grad()
        if emb_optimizer is not None:
            emb_optimizer.zero_grad()

        #######################################
        ### Data for current task and batch ###
        #######################################
        batch = data.next_train_batch(config.batch_size)
        X = data.input_to_torch_tensor(batch[0], device, mode='train')
        T = data.output_to_torch_tensor(batch[1], device, mode='train')

        # Get the output neurons depending on the continual learning scenario.
        n_y = data.num_classes
        if config.cl_scenario == 1:
            # Choose current head.
            task_out = [task_id * n_y, (task_id + 1) * n_y]
        elif config.cl_scenario == 2:
            # Always all output neurons, only one head is used.
            task_out = [0, n_y]
        else:
            # Choose current head, which will be inferred during inference.
            task_out = [task_id * n_y, (task_id + 1) * n_y]

        ########################
        ### Loss computation ###
        ########################
        if config.mnet_only:
            weights = None
        else:
            weights = hnet.forward(task_id=task_id)
        Y_hat_logits = mnet.forward(X, weights, **mnet_kwargs)

        # Restrict output neurons
        Y_hat_logits = Y_hat_logits[:, task_out[0]:task_out[1]]
        assert (T.shape[1] == Y_hat_logits.shape[1])
        # compute loss on task and compute gradients
        if config.soft_targets:
            soft_label = 0.95
            num_classes = data.num_classes
            soft_targets = torch.where(
                T == 1, torch.Tensor([soft_label]),
                torch.Tensor([(1 - soft_label) / (num_classes - 1)]))
            soft_targets = soft_targets.to(device)
            loss_task = Classifier.softmax_and_cross_entropy(
                Y_hat_logits, soft_targets)
        else:
            loss_task = Classifier.logit_cross_entropy_loss(Y_hat_logits, T)

        # Compute gradients based on task loss (those might be used in the CL
        # regularizer).
        loss_task.backward(retain_graph=calc_reg, create_graph=calc_reg and \
                           config.backprop_dt)

        # The current task embedding only depends in the task loss, so we can
        # update it already.
        if emb_optimizer is not None:
            emb_optimizer.step()

        #############################
        ### CL (HNET) Regularizer ###
        #############################
        loss_reg = 0
        dTheta = None

        if calc_reg:
            if config.no_lookahead:
                dTembs = None
                dTheta = None
            else:
                dTheta = opstep.calc_delta_theta(
                    theta_optimizer,
                    False,
                    lr=config.lr,
                    detach_dt=not config.backprop_dt)

                if config.continue_emb_training:
                    dTembs = dTheta[-task_id:]
                    dTheta = dTheta[:-task_id]
                else:
                    dTembs = None

            loss_reg = hreg.calc_fix_target_reg(
                hnet,
                task_id,
                targets=targets_hypernet,
                dTheta=dTheta,
                dTembs=dTembs,
                mnet=mnet,
                inds_of_out_heads=regged_outputs,
                prev_theta=prev_theta,
                prev_task_embs=prev_task_embs,
                batch_size=config.cl_reg_batch_size)

            loss_reg *= config.beta

            loss_reg.backward()

        # Now, that we computed the regularizer, we can use the accumulated
        # gradients and update the hnet (or mnet) parameters.
        theta_optimizer.step()

        Y_hat = F.softmax(Y_hat_logits, dim=1)
        classifier_accuracy = Classifier.accuracy(Y_hat, T) * 100.0

        # print('train T: ',Y_hat.argmax(dim=1, keepdim=False))
        # print('train T: ',T.argmax(dim=1, keepdim=False))
        # print('train Y_hat: ',Y_hat.size())
        # print('train T: ',T.size())

        #########################
        # Learning rate scheduler
        #########################
        if config.plateau_lr_scheduler:
            assert (iter_per_epoch != -1)
            if i % iter_per_epoch == 0 and i > 0:
                curr_epoch = i // iter_per_epoch
                logger.info('Computing test accuracy for plateau LR ' +
                            'scheduler (epoch %d).' % curr_epoch)
                # We need a validation quantity for the plateau LR scheduler.
                # FIXME we should use an actual validation set rather than the
                # test set.
                # Note, https://keras.io/examples/cifar10_resnet/ uses the test
                # set to compute the validation loss. We use the "validation"
                # accuracy instead.
                # FIXME We increase `train_iter` as the print messages in the
                # test method suggest that the testing has been executed before
                test_acc, _ = test(task_id,
                                   data,
                                   mnet,
                                   hnet,
                                   device,
                                   shared,
                                   config,
                                   writer,
                                   logger,
                                   train_iter=i + 1)
                mnet.train()
                if hnet is not None:
                    hnet.train()

                plateau_scheduler_theta.step(test_acc)
                if plateau_scheduler_emb is not None:
                    plateau_scheduler_emb.step(test_acc)

        if config.lambda_lr_scheduler:
            assert (iter_per_epoch != -1)
            if i % iter_per_epoch == 0 and i > 0:
                curr_epoch = i // iter_per_epoch
                logger.info('Applying Lambda LR scheduler (epoch %d).' %
                            curr_epoch)

                lambda_scheduler_theta.step()
                if lambda_scheduler_emb is not None:
                    lambda_scheduler_emb.step()

        ###########################
        ### Tensorboard summary ###
        ###########################
        # We don't wanna slow down training by having too much output.
        if i % 50 == 0:
            writer.add_scalar('train/task_%d/class_accuracy' % task_id,
                              classifier_accuracy, i)
            writer.add_scalar('train/task_%d/loss_task' % task_id, loss_task,
                              i)
            writer.add_scalar('train/task_%d/loss_reg' % task_id, loss_reg, i)

        ### Show the current training progress to the user.
        if i % config.val_iter == 0:
            msg = 'Training step {}: Classifier Accuracy: {:.3f} ' + \
                  '(on current training batch).'
            logger.debug(msg.format(i, classifier_accuracy))

        iter_end_time = time()
        summed_iter_runtime += (iter_end_time - iter_start_time)

        if i % 200 == 0:
            logger.info('Training step: %d ... Done -- (runtime: %f sec)' % \
                        (i, iter_end_time - iter_start_time))

    if mnet.batchnorm_layers is not None:
        if not config.bn_distill_stats and \
                not config.bn_no_running_stats and \
                not config.bn_no_stats_checkpointing:
            # Checkpoint the current running statistics (that have been
            # estimated while training the current task).
            for bn_layer in mnet.batchnorm_layers:
                assert (bn_layer.num_stats == task_id + 1)
                bn_layer.checkpoint_stats()

    avg_iter_time = summed_iter_runtime / config.n_iter
    logger.info('Average runtime per training iteration: %f sec.' % \
                avg_iter_time)

    logger.info('Elapsed time for training task %d: %f sec.' % \
                (task_id+1, time()-start_time))