def iterate_data_bert(self): def eee(desc): def f(): lut = { 'valid': self.data.get_valid_qids, 'test': self.data.get_test_qids } return self.eval_bert(qids=lut[desc](), desc=desc) return f assert isinstance(self.model, B1) self.get_writer() for e in range(self.epoch_num): self.ppp('\nepoch:{}'.format(e)) train_qids = au.shuffle(self.data.get_train_qids()) train_size = len(train_qids) with my_pbar(desc='train', total=train_size, leave=True, ncols=50) as pbar: for bid, qid in enumerate(train_qids): al, ul, vl = self.data.get_auv_bert(qid) self.model.train_step(al, ul, vl) pbar.update() if reach_partition(bid, train_size, 3) or bid == train_size - 1: # self.ppp(self.model.get_loss(al, ul, vl)) if self.should_early_stop(eval_valid=eee('valid'), eval_test=eee('test')): self.ppp('early stop') return
def update_od_list(od_list, log_path, shuffle): for i, od in enumerate(od_list): od[gid_] = i od[lg_] = log_path if shuffle: od_list = au.shuffle(od_list) for i, od in enumerate(od_list): print(au.entries2name(od, inner='=', inter=' ')) if i <= 10 else None return od_list
def shuffle_generate(self, batch_size, neg_batch_num): docarr = au.shuffle(self.docarr) batches = self.split_length(docarr, batch_size) print('shuffle_generate - batch num:', len(batches)) i_range = range(len(batches)) for i in i_range: p_batch = batches[i] n_idxes = np.random.choice([j for j in i_range if j != i], neg_batch_num) n_batches = [batches[j] for j in n_idxes] yield i, p_batch, n_batches
def generate(self, batch_size: int, neg_batch_num: int, shuffle: bool): docarr = au.shuffle(self.docarr) if shuffle else self.docarr docarr_list = au.split_slices(docarr, batch_size) for docarr in docarr_list: yield docarr, None
def gen(self, rvs, source: dict, shuffle: bool): qids = self.rvs2qids[rvs] if shuffle: qids = au.shuffle(qids) for qid in qids: yield source[qid]