def dry_run(self, mass): batch = len(mass) sss, labels, poss = handle_mass(mass) pooled_embs = [] for ss, pos in zip(sss, poss): left, right = get_left_right_by_ss_pos(ss, pos) emb1 = B.compress_left_get_embs(self.bert, self.toker, left) # (seq_len, 784) emb2 = B.compress_right_get_embs(self.bert, self.toker, right) # (seq_len, 784) left, right = get_left_right_by_ss_pos(ss, pos, distant=2) emb3 = B.compress_left_get_embs(self.bert, self.toker, left) # (seq_len, 784) emb4 = B.compress_right_get_embs(self.bert, self.toker, right) # (seq_len, 784) # print(f'{emb1.shape[0]}, {emb2.shape[0]}') assert emb1.shape[0] == emb2.shape[0] == emb3.shape[ 0] == emb4.shape[0] mean = (emb1 + emb2 + emb3 + emb4) / 4 # (seq_len, 784) pooled = mean.mean(0) # (784) pooled_embs.append(pooled) pooled_embs = t.stack(pooled_embs) # (batch, 784) labels = t.LongTensor(labels) # (batch), (0 or 1) if GPU_OK: labels = labels.cuda() o = self.classifier(pooled_embs) # (batch, 2) self.print_train_info(o, labels, -1) return o.argmax(1), labels
def train(self, mass): batch = len(mass) sss, labels, poss = handle_mass(mass) pooled_embs = [] for ss, pos in zip(sss, poss): left, right = get_left_right_by_ss_pos(ss, pos) emb1 = B.compress_left_get_embs(self.bert, self.toker, left) # (seq_len, 784) emb2 = B.compress_right_get_embs(self.bert, self.toker, right) # (seq_len, 784) # print(f'{emb1.shape[0]}, {emb2.shape[0]}') assert emb1.shape[0] == emb2.shape[0] mean = (emb1 + emb2) / 2 # (seq_len, 784) pooled = mean.mean(0) # (784) pooled_embs.append(pooled) pooled_embs = t.stack(pooled_embs) # (batch, 784) labels = t.LongTensor(labels) # (batch), (0 or 1) if GPU_OK: labels = labels.cuda() o = self.classifier(pooled_embs) # (batch, 1) loss = self.cal_loss(o, labels) self.zero_grad() loss.backward() self.optim.step() self.print_train_info(o, labels, loss.detach().item()) return loss.detach().item()
def train(self, mass): batch = len(mass) sss, labels, poss = handle_mass(mass) pooled_embs = [] clss_for_order_checking = [] # For order detector order_labels = [] # For order detector for ss, pos in zip(sss, poss): left, right = get_left_right_by_ss_pos(ss, pos) emb1 = B.compress_left_get_embs(self.bert, self.toker, left) # (seq_len, 784) emb2 = B.compress_right_get_embs(self.bert, self.toker, right) # (seq_len, 784) # print(f'{emb1.shape[0]}, {emb2.shape[0]}') assert emb1.shape[0] == emb2.shape[0] mean = (emb1 + emb2) / 2 # (seq_len, 784) pooled = mean.mean(0) # (784) pooled_embs.append(pooled) # For order detector if len(left) == 2: if random.randrange(100) > 50: # 1/2的概率倒序 left_disturbed = list(reversed(left)) order_labels.append(1) else: left_disturbed = left.copy() order_labels.append(0) cls = B.compress_by_ss_pair_get_mean(self.bert, self.toker, left_disturbed) # (784) clss_for_order_checking.append(cls) else: print(f'Warning, left length = {len(left)}') pooled_embs = t.stack(pooled_embs) # (batch, 784) labels = t.LongTensor(labels) # (batch), (0 or 1) order_labels = t.LongTensor( order_labels) # (x <= batch) For order detector if GPU_OK: labels = labels.cuda() order_labels = order_labels.cuda() o = self.classifier(pooled_embs) # (batch, 1) sector_loss = self.cal_loss(o, labels) # For order detector clss_for_order_checking = t.stack( clss_for_order_checking) # (x <= batch, 784) output_ordering = self.classifier2(clss_for_order_checking) # (x, 1) ordering_loss = self.cal_loss(output_ordering, order_labels, rate=0) # 不存在数据不均衡问题 loss = sector_loss + ordering_loss self.zero_grad() loss.backward() self.optim.step() self.print_train_info(o, labels, loss.detach().item()) return loss.detach().item()
def dry_run(self, mass): batch = len(mass) sss, labels, poss = handle_mass(mass) pooled_embs = [] for ss, pos in zip(sss, poss): left, right = get_left_right_by_ss_pos(ss, pos) emb1 = B.compress_left_get_embs(self.bert, self.toker, left) # (seq_len, 784) emb2 = B.compress_right_get_embs(self.bert, self.toker, right) # (seq_len, 784) assert emb1.shape[0] == emb2.shape[0] mean = (emb1 + emb2) / 2 # (seq_len, 784) pooled = mean.mean(0) # (784) pooled_embs.append(pooled) pooled_embs = t.stack(pooled_embs) # (batch, 784) labels = t.LongTensor(labels) # (batch), (0 or 1) if GPU_OK: labels = labels.cuda() o = self.classifier(pooled_embs) # (batch, 1), sigmoided self.print_train_info(o, labels, -1) return fit_sigmoided_to_label(o), labels