Esempio n. 1
0
    def iterate_data_bert(self):
        def eee(desc):
            def f():
                lut = {
                    'valid': self.data.get_valid_qids,
                    'test': self.data.get_test_qids
                }
                return self.eval_bert(qids=lut[desc](), desc=desc)

            return f

        assert isinstance(self.model, B1)
        self.get_writer()
        for e in range(self.epoch_num):
            self.ppp('\nepoch:{}'.format(e))
            train_qids = au.shuffle(self.data.get_train_qids())
            train_size = len(train_qids)
            with my_pbar(desc='train', total=train_size, leave=True,
                         ncols=50) as pbar:
                for bid, qid in enumerate(train_qids):
                    al, ul, vl = self.data.get_auv_bert(qid)
                    self.model.train_step(al, ul, vl)
                    pbar.update()
                    if reach_partition(bid, train_size,
                                       3) or bid == train_size - 1:
                        # self.ppp(self.model.get_loss(al, ul, vl))
                        if self.should_early_stop(eval_valid=eee('valid'),
                                                  eval_test=eee('test')):
                            self.ppp('early stop')
                            return
Esempio n. 2
0
def update_od_list(od_list, log_path, shuffle):
    for i, od in enumerate(od_list):
        od[gid_] = i
        od[lg_] = log_path
    if shuffle:
        od_list = au.shuffle(od_list)
    for i, od in enumerate(od_list):
        print(au.entries2name(od, inner='=', inter=' ')) if i <= 10 else None
    return od_list
Esempio n. 3
0
 def shuffle_generate(self, batch_size, neg_batch_num):
     docarr = au.shuffle(self.docarr)
     batches = self.split_length(docarr, batch_size)
     print('shuffle_generate - batch num:', len(batches))
     i_range = range(len(batches))
     for i in i_range:
         p_batch = batches[i]
         n_idxes = np.random.choice([j for j in i_range if j != i],
                                    neg_batch_num)
         n_batches = [batches[j] for j in n_idxes]
         yield i, p_batch, n_batches
Esempio n. 4
0
 def generate(self, batch_size: int, neg_batch_num: int, shuffle: bool):
     docarr = au.shuffle(self.docarr) if shuffle else self.docarr
     docarr_list = au.split_slices(docarr, batch_size)
     for docarr in docarr_list:
         yield docarr, None
Esempio n. 5
0
 def gen(self, rvs, source: dict, shuffle: bool):
     qids = self.rvs2qids[rvs]
     if shuffle:
         qids = au.shuffle(qids)
     for qid in qids:
         yield source[qid]