def train(model, args, epoch, dataset, logger, optimizer): model.train() total_loss = float(0) with tqdm(desc='Training', total=len(dataset)) as pbar: for i, (data, target, paths) in enumerate(dataset): if True: if i == args.stop_after: break pbar.update() model.zero_grad() output = model(data) ''' target_var = Variable(maybe_cuda(torch.cat(target, 0), args.cuda), requires_grad=False) loss = model.criterion(output, target_var) ''' concate_target = torch.cat(target, 0) new_target = torch.zeros(concate_target.size()[0], 2) new_target[range(new_target.shape[0]), concate_target]=1 target_var = Variable(maybe_cuda(new_target, args.cuda), requires_grad=False) target_var = target_var.type_as(output) pos_weight = maybe_cuda(torch.FloatTensor([0.1, 1.0])) loss = F.binary_cross_entropy_with_logits(output, target_var, reduction='sum', pos_weight=pos_weight) loss.backward() optimizer.step() total_loss += loss.item() pbar.set_description('Training, loss={:.4}'.format(loss.item())) total_loss = total_loss / len(dataset) logger.debug('Training Epoch: {}, Loss: {:.4}.'.format(epoch + 1, total_loss)) log_value('Training Loss', total_loss, epoch + 1)
def run_model_on_batch(encoder, decoder, config, use_teacher_forcing, batch_data): (x, x_len), (y, y_len) = batch_data encoder_output, encoder_hidden = encoder( x=maybe_cuda(Variable(x, requires_grad=False), cuda=config.cuda), x_len=x_len, hidden=None, ) y_var = maybe_cuda(Variable(y, requires_grad=False), cuda=config.cuda) target_length = y_var.size()[1] decoder_input = maybe_cuda(Variable(torch.LongTensor([[0]] * x.size()[0])), cuda=config.cuda) decoder_hidden = encoder_hidden decoder_output_ls = [] attn_ls = [] for di in range(target_length): decoder_output, decoder_hidden, other_dict = decoder( decoder_input, decoder_hidden, encoder_output) decoder_output_ls.append(decoder_output) if "attn" in other_dict: attn_ls.append(other_dict["attn"]) if use_teacher_forcing: decoder_input = y_var[:, di].unsqueeze(1) else: topv, topi = decoder_output.data.topk(1) decoder_input = maybe_cuda(Variable(topi.squeeze(1)), cuda=config.cuda) full_decoder_output = torch.cat(decoder_output_ls, dim=1) return full_decoder_output, attn_ls
def max_pooling_similarity_computing(self, doc_output, segment_idx): similarities = Variable(maybe_cuda(torch.Tensor([]))) cos = nn.CosineSimilarity(dim=1, eps=1e-6) #ln = nn.LayerNorm(512, elementwise_affine=False) seg_outputs = [] index = 0 doc_output = F.softmax(doc_output) for i, idx in enumerate(segment_idx): if i == 0: seg_output = doc_output[0:segment_idx[i] + 1, :] elif i == len(segment_idx) - 1: seg_output = doc_output[segment_idx[i - 1] + 1:, :] else: seg_output = doc_output[segment_idx[i - 1] + 1:segment_idx[i] + 1, :] seg_outputs.append(seg_output) maxes = Variable( maybe_cuda(torch.zeros(len(seg_outputs), self.hidden * 2))) for i in range(len(seg_outputs)): maxes[i, :] = torch.max(seg_outputs[i], 0)[0] #maxes[i, :] = torch.mean(seg_outputs[i], 0) if len(seg_outputs) > 1: tensor_1 = maxes[:-1, :] tensor_2 = maxes[1:, :] similarities = cos(tensor_1, tensor_2) #similarity += (tensor_1 * tensor_2).sum()/(tensor_1.size()[0]) #similarities = torch.diag(torch.mm(tensor_1, tensor_2.permute(1,0))) return similarities
def get_model(config): encoder = maybe_cuda(Encoder(config), cuda=config.cuda) if config.attn_method != "disabled": decoder = maybe_cuda(AttnDecoder(config), cuda=config.cuda) else: decoder = maybe_cuda(Decoder(config), cuda=config.cuda) return encoder, decoder
def similarity_computing_inner(self, doc_output, segment_idx): similarities = [] cos = nn.CosineSimilarity(dim=1, eps=1e-6) seg_outputs = [] index = 0 doc_output = F.softmax(doc_output) for i, idx in enumerate(segment_idx): if i == 0: seg_output = doc_output[0:segment_idx[i] + 1, :] elif i == len(segment_idx) - 1: seg_output = doc_output[segment_idx[i - 1] + 1:, :] else: seg_output = doc_output[segment_idx[i - 1] + 1:segment_idx[i] + 1, :] seg_outputs.append(seg_output) for i in range(len(seg_outputs)): sent_idx = maybe_cuda( torch.LongTensor([k for k in range(seg_outputs[i].size()[0])])) if seg_outputs[i].size()[0] > 1: pairs = torch.combinations(sent_idx) pair_sims = [] for p in pairs: pair_sims.append( cos(seg_outputs[i][p[0], :].unsqueeze(0), seg_outputs[i][p[1], :].unsqueeze(0))) similarities.append(sum(pair_sims) / len(pair_sims)) else: continue return Variable(maybe_cuda(torch.Tensor(similarities)))
def train(model, args, epoch, dataset, logger, optimizer): model.train() total_loss = float(0) with tqdm(desc='Training', total=len(dataset)) as pbar: for i, (data, target, paths, sent_bert_vec, target_idx) in enumerate(dataset): if True: if i == args.stop_after: break pbar.update() model.zero_grad() output, sims = model(data, sent_bert_vec, target_idx) target_var = Variable(maybe_cuda(torch.cat(target, 0), args.cuda), requires_grad=False) # generate gold label for coherence scores... target_list = target_var.data.cpu().numpy() target_coh_list = [] for t in target_list: if t == 0: target_coh_list.append(torch.LongTensor([1])) else: target_coh_list.append(torch.LongTensor([0])) target_coh_var = Variable(maybe_cuda(torch.cat(target_coh_list, 0), args.cuda), requires_grad=False) loss = supervised_cross_entropy(output, sims, target_var, target_coh_var) #loss = model.criterion(output, target_var) #sim = sim_.data.cpu() #total_sim += sim #concate_target = torch.cat(target, 0) #total_1.append(sum(sim.squeeze(1)[concate_target == 1])/len(sim.squeeze(1)[concate_target == 1])) #total_0.append(sum(sim.squeeze(1)[concate_target == 0])/len(sim.squeeze(1)[concate_target == 0])) ''' new_target = torch.zeros(concate_target.size()[0], 2) new_target[range(new_target.shape[0]), concate_target]=1 target_var = Variable(maybe_cuda(new_target, args.cuda), requires_grad=False) target_var = target_var.type_as(output) pos_weight = maybe_cuda(torch.FloatTensor([0.1, 1.0])) loss = F.binary_cross_entropy_with_logits(output, target_var, reduction='sum', pos_weight=pos_weight) + 10*sim_ ''' loss.backward() optimizer.step() total_loss += loss.item() # logger.debug('Batch %s - Train error %7.4f', i, loss.data[0]) pbar.set_description('Training, loss={:.4}'.format(loss.item())) # except Exception as e: # logger.info('Exception "%s" in batch %s', e, i) # logger.debug('Exception while handling batch with file paths: %s', paths, exc_info=True) # pass #print('The similarity between the segs: ', total_sim/len(dataset)) total_loss = total_loss / len(dataset) logger.debug('Training Epoch: {}, Loss: {:.4}.'.format(epoch + 1, total_loss)) log_value('Training Loss', total_loss, epoch + 1)
def validate(model, args, epoch, dataset, logger): model.eval() with tqdm(desc='Validatinging', total=len(dataset)) as pbar: acc = Accuracies() for i, (data, target, paths) in enumerate(dataset): if True: if i == args.stop_after: break pbar.update() output = model(data) output_softmax = F.softmax(output, 1) targets_var = Variable(maybe_cuda(torch.cat(target, 0), args.cuda), requires_grad=False) output_seg = output.data.cpu().numpy().argmax(axis=1) target_seg = targets_var.data.cpu().numpy() preds_stats.add(output_seg, target_seg) acc.update(output_softmax.data.cpu().numpy(), target) # except Exception as e: # # logger.info('Exception "%s" in batch %s', e, i) # logger.debug('Exception while handling batch with file paths: %s', paths, exc_info=True) # pass epoch_pk, epoch_windiff, threshold = acc.calc_accuracy() logger.info( 'Validating Epoch: {}, accuracy: {:.4}, Pk: {:.4}, Windiff: {:.4}, F1: {:.4} . ' .format(epoch + 1, preds_stats.get_accuracy(), epoch_pk, epoch_windiff, preds_stats.get_f1())) preds_stats.reset() return epoch_pk, threshold
def forward(self, batch, return_attns=False): batch_size = len(batch) sentences_per_doc = [] all_batch_sentences = [] for document in batch: all_batch_sentences.extend(document) sentences_per_doc.append(len(document)) lengths = [s.size()[0] for s in all_batch_sentences] sort_order = np.argsort(lengths)[::-1] sorted_sentences = [all_batch_sentences[i] for i in sort_order] sorted_lengths = [s.size()[0] for s in sorted_sentences] max_length = max(lengths) logger.debug('Num sentences: %s, max sentence length: %s', sum(sentences_per_doc), max_length) padded_sentences = [self.pad(s, max_length) for s in sorted_sentences] big_tensor = torch.cat(padded_sentences, 1) # (max_length, batch size, 300) processed_tensor = pack_padded_sequence(big_tensor, sorted_lengths) encoded_sentences = self.sentence_encoder(processed_tensor) unsort_order = Variable(maybe_cuda(torch.LongTensor(unsort(sort_order)))) unsorted_encodings = encoded_sentences.index_select(0, unsort_order) index = 0 encoded_documents = [] for sentences_count in sentences_per_doc: end_index = index + sentences_count encoded_documents.append(unsorted_encodings[index : end_index, :]) index = end_index doc_sizes = [doc.size()[0] for doc in encoded_documents] max_doc_size = np.max(doc_sizes) ordered_document_idx = np.argsort(doc_sizes)[::-1] ordered_doc_sizes = sorted(doc_sizes)[::-1] ordered_documents = [encoded_documents[idx] for idx in ordered_document_idx] padded_docs = [self.pad_document(d, max_doc_size) for d in ordered_documents] padded_docs = torch.stack(padded_docs).squeeze(2) # turn the tensor list into tensor ############################################################## pos_emb = self.PositionEncoder.pe[:, :padded_docs.size()[1]].expand(padded_docs.size()) inputs = padded_docs+pos_emb sent_mask = generate_mask(ordered_doc_sizes, max_doc_size) non_pad_mask = sent_mask.unsqueeze(-1) slf_attn_mask = (1-sent_mask).unsqueeze(1).expand(-1,sent_mask.size()[1],-1).type(torch.bool) outputs = self.Transformer(inputs,non_pad_mask, slf_attn_mask) outputs = self.Dropoutlayer(outputs) outputs = self.Decoderlayer(outputs) # batch * length * 1 doc_outputs = [] for i, doc_len in enumerate(ordered_doc_sizes): doc_outputs.append(outputs[i, 0:doc_len - 1, :]) # -1 to remove last prediction unsorted_doc_outputs = [doc_outputs[i] for i in unsort(ordered_document_idx)] x = torch.cat(unsorted_doc_outputs, 0) return x
def train(model, args, epoch, dataset, logger, optimizer): model.train() total_loss = float(0) with tqdm(desc='Training', total=len(dataset)) as pbar: for i, (data, target, paths) in enumerate(dataset): if True: if i == args.stop_after: break pbar.update() model.zero_grad() output = model(data) target_var = Variable(maybe_cuda(torch.cat(target, 0), args.cuda), requires_grad=False) loss = model.criterion(output, target_var) loss.backward() optimizer.step() # total_loss += loss.data[0] total_loss += loss.item() # logger.debug('Batch %s - Train error %7.4f', i, loss.data[0]) pbar.set_description('Training, loss={:.4}'.format( loss.item())) # except Exception as e: # logger.info('Exception "%s" in batch %s', e, i) # logger.debug('Exception while handling batch with file paths: %s', paths, exc_info=True) # pass total_loss = total_loss / len(dataset) logger.debug('Training Epoch: {}, Loss: {:.4}.'.format( epoch + 1, total_loss)) log_value('Training Loss', total_loss, epoch + 1)
def validate(model, args, epoch, dataset, logger): model.eval() with tqdm(desc='Validatinging', total=len(dataset)) as pbar: acc = Accuracies() for i, (data, target, paths) in enumerate(dataset): if True: if i == args.stop_after: break pbar.update() output = model(data) output_softmax = F.softmax(output, 1) targets_var = Variable(maybe_cuda(torch.cat(target, 0), args.cuda), requires_grad=False) output_seg = output.data.cpu().numpy().argmax(axis=1) target_seg = targets_var.data.cpu().numpy() preds_stats.add(output_seg, target_seg) acc.update(output_softmax.data.cpu().numpy(), target) epoch_pk, epoch_windiff, threshold = acc.calc_accuracy() logger.info('Validating Epoch: {}, Pk: {:.4} . '.format(epoch + 1, epoch_pk)) preds_stats.reset() return epoch_pk, threshold
def prepare_tensor(sentences): tensored_data = [] for sentence in sentences: if len(sentence) > 0: tensored_data.append(utils.maybe_cuda(torch.FloatTensor(np.concatenate(sentence)))) return tensored_data
def pad(self, s, max_length): s_length = s.size()[0] v = Variable(maybe_cuda(s.unsqueeze(0).unsqueeze(0))) padded = F.pad( v, (0, 0, 0, max_length - s_length)) # (1, 1, max_length, 300) shape = padded.size() return padded.view(shape[2], 1, shape[3]) # (max_length, 1, 300)
def main(args): sys.path.append(str(Path(__file__).parent)) checkpoint_path = Path(args.checkpoint_dir) checkpoint_path.mkdir(exist_ok=True) logger = utils.setup_logger(__name__, os.path.join(args.checkpoint_dir, 'train.log')) utils.read_config_file(args.config) utils.config.update(args.__dict__) # logger.debug('Running with config %s', utils.config) # configure(os.path.join('runs', args.expname)) word2vec = None if not args.infer: dataset_path = utils.config['choidataset'] train_dataset = ChoiDataset(dataset_path, word2vec) dev_dataset = ChoiDataset(dataset_path, word2vec) test_dataset = ChoiDataset(dataset_path, word2vec) train_dl = DataLoader(train_dataset, batch_size=args.bs, collate_fn=collate_fn, shuffle=True, num_workers=args.num_workers) dev_dl = DataLoader(dev_dataset, batch_size=args.test_bs, collate_fn=collate_fn, shuffle=False, num_workers=args.num_workers) test_dl = DataLoader(test_dataset, batch_size=args.test_bs, collate_fn=collate_fn, shuffle=False, num_workers=args.num_workers) assert bool(args.model) ^ bool(args.load_from) # exactly one of them must be set if args.model: model = import_model(args.model) elif args.load_from: with open(args.load_from, 'rb') as f: model = torch.load(f) model.train() model = maybe_cuda(model) optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) if not args.infer: best_val_pk = 1.0 for j in range(args.epochs): train(model, args, j, train_dl, logger, optimizer) with (checkpoint_path / 'model{:03d}.t7'.format(j)).open('wb') as f: torch.save(model, f) val_pk, threshold = validate(model, args, j, dev_dl, logger) if val_pk < best_val_pk: test_pk = test(model, args, j, test_dl, logger, threshold) logger.debug( colored( 'Current best model from epoch {} with p_k {} and threshold {}'.format(j, test_pk, threshold), 'green')) best_val_pk = val_pk with (checkpoint_path / 'Meilleur_model.t7'.format(j)).open('wb') as f: torch.save(model, f)
def resnet_init(self): end_time = time.time() from utils import maybe_cuda from glico_model.img_to_vec import Img2Vec img2vec = Img2Vec() # Matrix to hold the image vectors for i_loader, dloader in enumerate(self.data_loader): for i, batch in enumerate(dloader): # batch_size=1, no shuffle idx = maybe_cuda(batch[0]) imgs = maybe_cuda(batch[1]) labels = batch[2] if i_loader < 1: vec = img2vec.get_vec(imgs.squeeze().detach().cpu().numpy()) self.emb.weight.data[idx] = torch.tensor(vec) self.label2idx[labels.item()].add(idx.item()) self.idx2label[idx.item()] = labels.item() self.get_norm() print(f"init time: {(time.time() - end_time) / 60:8.2f}m")
def forward(self, batch): batch_size = len(batch) sentences_per_doc = [] all_batch_sentences = [] for document in batch: all_batch_sentences.extend(document) sentences_per_doc.append(len(document)) lengths = [s.size()[0] for s in all_batch_sentences] sort_order = np.argsort(lengths)[::-1] sorted_sentences = [all_batch_sentences[i] for i in sort_order] sorted_lengths = [s.size()[0] for s in sorted_sentences] max_length = max(lengths) logger.debug('Num sentences: %s, max sentence length: %s', sum(sentences_per_doc), max_length) padded_sentences = [self.pad(s, max_length) for s in sorted_sentences] big_tensor = torch.cat(padded_sentences, 1) # (max_length, batch size, 300) packed_tensor = pack_padded_sequence(big_tensor, sorted_lengths) encoded_sentences = self.sentence_encoder(packed_tensor) unsort_order = torch.tensor( maybe_cuda(torch.LongTensor(unsort(sort_order)))) unsorted_encodings = encoded_sentences.index_select(0, unsort_order) index = 0 encoded_documents = [] for sentences_count in sentences_per_doc: end_index = index + sentences_count encoded_documents.append(unsorted_encodings[index:end_index, :]) index = end_index doc_sizes = [doc.size()[0] for doc in encoded_documents] max_doc_size = np.max(doc_sizes) ordered_document_idx = np.argsort(doc_sizes)[::-1] ordered_doc_sizes = sorted(doc_sizes)[::-1] ordered_documents = [ encoded_documents[idx] for idx in ordered_document_idx ] padded_docs = [ self.pad_document(d, max_doc_size) for d in ordered_documents ] docs_tensor = torch.cat(padded_docs, 1) packed_docs = pack_padded_sequence(docs_tensor, ordered_doc_sizes) sentence_lstm_output, _ = self.sentence_lstm( packed_docs, zero_state(self, batch_size=batch_size)) padded_x, _ = pad_packed_sequence( sentence_lstm_output) # (max sentence len, batch, 256) doc_outputs = [] for i, doc_len in enumerate(ordered_doc_sizes): doc_outputs.append(padded_x[0:doc_len - 1, i, :]) # -1 to remove last prediction unsorted_doc_outputs = [ doc_outputs[i] for i in unsort(ordered_document_idx) ] sentence_outputs = torch.cat(unsorted_doc_outputs, 0) x = self.h2s(sentence_outputs) return x
def forward(self, x): batch_size = x.batch_sizes[0] s = zero_state(self, batch_size) packed_output, _ = self.lstm(x, s) padded_output, lengths = pad_packed_sequence( packed_output) # (max sentence len, batch, 256) maxes = torch.tensor( maybe_cuda(torch.zeros(batch_size, padded_output.size(2)))) for i in range(batch_size): maxes[i, :] = torch.max(padded_output[:lengths[i], i, :], 0)[0] return maxes
def cube_init(self): import itertools from utils import maybe_cuda end_time = time.time() vertices = itertools.product('01', repeat=self.nz) # cartesian product # self.vertices = [tuple(int(s) for s in v) for v in vertices] self.vertices = sample_from_iter(vertices, self.num_classes) print(f"Init Cube: num classes= {self.num_classes}") for i_loader, dloader in enumerate(self.data_loader): for i, batch in enumerate(dloader): # batch_size=1, no shuffle idx = maybe_cuda(batch[0]) _ = maybe_cuda(batch[1]) labels = batch[2] if i_loader < 1: vertex = self.vertices[labels] self.emb.weight.data[idx] = self.emb.weight.data[idx] + torch.tensor(vertex).float() self.label2idx[labels.item()].add(idx.item()) self.idx2label[idx.item()] = labels.item() self.get_norm() print(f"init time: {(time.time() - end_time) / 60:8.2f}m")
def get_attn_key_pad_mask(seq_k, seq_q, lengths): ''' For masking out the padding part of key sequence. ''' # Expand to fit the shape of key query attention matrix. batch, len_q, len_k = seq_k.size(0), seq_q.size(1), seq_k.size(1) padding_mask = maybe_cuda(torch.ByteTensor([[0 if i<clen else 1 for i in range(len_k) ] for clen in lengths]) ) padding_mask = padding_mask.unsqueeze(1).expand(-1, len_q, -1) # b x lq x lk # len_q = seq_q.size(1) # padding_mask = seq_k.eq(Constants.PAD) # padding_mask = padding_mask.unsqueeze(1).expand(-1, len_q, -1) # b x lq x lk return padding_mask
def load_model(model_path=None, is_cuda=None): if model_path is None: model_path = utils.config['model'] with open(model_path, 'r') as f: model = torch.load(f) model.eval() if is_cuda is None: is_cuda = utils.config['cuda'] return utils.maybe_cuda(model, is_cuda)
def test(model, args, epoch, dataset, logger, threshold): model.eval() with tqdm(desc='Testing', total=len(dataset)) as pbar: acc = accuracy.Accuracy() for i, (data, target, paths) in enumerate(dataset): if True: if i == args.stop_after: break pbar.update() output = model(data) output_softmax = F.softmax(output, 1) targets_var = Variable(maybe_cuda(torch.cat(target, 0), args.cuda), requires_grad=False) output_seg = output.data.cpu().numpy().argmax(axis=1) target_seg = targets_var.data.cpu().numpy() preds_stats.add(output_seg, target_seg) current_idx = 0 for k, t in enumerate(target): document_sentence_count = len(t) to_idx = int(current_idx + document_sentence_count) output = ((output_softmax.data.cpu().numpy()[ current_idx:to_idx, :])[:, 1] > threshold) h = np.append(output, [1]) tt = np.append(t, [1]) acc.update(h, tt) current_idx = to_idx # acc.update(output_softmax.data.cpu().numpy(), target) # # except Exception as e: # # logger.info('Exception "%s" in batch %s', e, i) # logger.debug('Exception while handling batch with file paths: %s', paths, exc_info=True) epoch_pk, epoch_windiff = acc.calc_accuracy() logger.debug( 'Testing Epoch: {}, accuracy: {:.4}, Pk: {:.4}, Windiff: {:.4}, F1: {:.4} . ' .format(epoch + 1, preds_stats.get_accuracy(), epoch_pk, epoch_windiff, preds_stats.get_f1())) preds_stats.reset() return epoch_pk
def accuracy(model, test_data, batch_size, topk=(1, 5), aug_param=None): """Computes the accuracy over the k top predictions for the specified values of k""" model.eval() if aug_param is None: aug_param = { 'std': None, 'mean': None, 'rand_crop': 32, 'image_size': 32 } top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') with torch.no_grad(): test_loader = get_loader_with_idx(test_data, batch_size=batch_size, **aug_param, num_workers=8, shuffle=False, eval=True) for i, batch in enumerate(test_loader): imgs = maybe_cuda(batch[1]) targets = maybe_cuda(batch[2]) output = maybe_cuda(model(imgs)) maxk = max(topk) batch_size = targets.size(0) # target = validate_loader_consistency(batch_size, idx, target, test_data) _, pred = output.topk(maxk, 1, True, True) pred = pred.t() correct = pred.eq(targets.view(1, -1).expand_as(pred)) res = [] for k in topk: correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) res.append(correct_k.mul_(100.0 / batch_size)) top1.update(res[0].item()) top5.update(res[1].item()) return top1.avg, top5.avg
def main(args): sys.path.append(str(Path(__file__).parent)) logger = utils.setup_logger(__name__, 'cross_validate_choi.log') utils.read_config_file(args.config) utils.config.update(args.__dict__) logger.debug('Running with config %s', utils.config) configure(os.path.join('runs', args.expname)) if not args.test: word2vec = gensim.models.KeyedVectors.load_word2vec_format(utils.config['word2vecfile'], binary=True) else: word2vec = None dataset_path = Path(args.flat_choi) with open(args.load_from, 'rb') as f: model = torch.load(f) model.eval() model = maybe_cuda(model) test_accuracy = accuracy.Accuracy() for j in range(5): validate_folder_numbers = range(5) validate_folder_numbers.remove(j) validate_folder_names = [dataset_path.joinpath(str(num)) for num in validate_folder_numbers] dev_dataset = ChoiDataset(dataset_path , word2vec, folder=True, folders_paths=validate_folder_names) test_dataset = ChoiDataset(dataset_path, word2vec, folder=True, folders_paths=[dataset_path.joinpath(str(j))]) dev_dl = DataLoader(dev_dataset, batch_size=args.test_bs, collate_fn=collate_fn, shuffle=False, num_workers=args.num_workers) test_dl = DataLoader(test_dataset, batch_size=args.test_bs, collate_fn=collate_fn, shuffle=False, num_workers=args.num_workers) _, threshold = validate(model, args, j, dev_dl, logger) test_pk = test(model, args, j, test_dl, logger, threshold, test_accuracy) logger.debug(colored('Cross validation section {} with p_k {} and threshold {}'.format(j, test_pk, threshold),'green')) cross_validation_pk, _ = test_accuracy.calc_accuracy() print ('Final cross validaiton Pk is: ' + str(cross_validation_pk)) logger.debug( colored('Final cross validaiton Pk is: {}'.format(cross_validation_pk), 'green'))
def validate_step(engine, batch): encoder.eval() decoder.eval() (x, x_len), (y, y_len) = batch full_decoder_output, attn_ls = run_model_on_batch( encoder=encoder, decoder=decoder, config=config, use_teacher_forcing=False, batch_data=batch, ) y_var = maybe_cuda(Variable(y, requires_grad=False), cuda=config.cuda) batch_loss = criterion( full_decoder_output.view(-1, full_decoder_output.size()[2]), y_var.view(-1)) return batch_loss
def __init__(self, len_max_seq, d_word_vec, n_layers, n_head, d_k, d_v, d_model, d_inner, dropout=0.1): super(SentenceEncoding, self).__init__() self.CLS = maybe_cuda(torch.randn(1, 300)) n_position = len_max_seq + 1 self.d_model = d_model self.src_emb = nn.Linear(300, d_model) self.position_enc = nn.Embedding.from_pretrained( \ get_sinusoid_encoding_table(n_position, d_word_vec), \ freeze=True) self.layers = nn.ModuleList([ \ EncoderLayer(d_model, d_inner, n_head, d_k, d_v, dropout=dropout) for _ in range(n_layers)]) self.dropout_func = nn.Dropout(dropout)
def test(model, args, epoch, dataset, logger, test_threshold, test_acc): model.eval() with tqdm(desc='Testing', total=len(dataset)) as pbar: for i, (data, target, paths) in enumerate(dataset): if True: if i == args.stop_after: break pbar.update() output = model(data) output_softmax = F.softmax(output, 1) targets_var = Variable(maybe_cuda(torch.cat(target, 0), args.cuda), requires_grad=False) output_seg = output.data.cpu().numpy().argmax(axis=1) target_seg = targets_var.data.cpu().numpy() preds_stats.add(output_seg, target_seg) current_idx = 0 for k, t in enumerate(target): document_sentence_count = len(t) to_idx = int(current_idx + document_sentence_count) output = ((output_softmax.data.cpu().numpy()[current_idx: to_idx, :])[:, 1] > test_threshold) h = np.append(output, [1]) tt = np.append(t, [1]) test_acc.update(h, tt) current_idx = to_idx test_pk, epoch_windiff = test_acc.calc_accuracy() logger.debug('Testing validation section: {}, accuracy: {:.4}, Pk: {:.4}, Windiff: {:.4}, F1: {:.4} . '.format(epoch + 1, preds_stats.get_accuracy(), test_pk, epoch_windiff, preds_stats.get_f1())) preds_stats.reset() return test_pk
def forward(self, src_seq, max_length, lengths, return_attns=False): enc_slf_attn_list = [] # -- Prepare masks slf_attn_mask = get_attn_key_pad_mask(seq_k=src_seq, seq_q=src_seq, lengths=lengths) non_pad_mask = get_non_pad_mask(src_seq, lengths) position = maybe_cuda(torch.tensor([[i if i<clen else 0 for i in range(max_length) ] for clen in lengths]) ) # -- Forward enc_output = src_seq + self.position_enc(position) for enc_layer in self.layer_stack: enc_output, enc_slf_attn = enc_layer( enc_output, non_pad_mask=non_pad_mask, slf_attn_mask=slf_attn_mask) if return_attns: enc_slf_attn_list += [enc_slf_attn] if return_attns: return enc_output, enc_slf_attn_list return enc_output
def train(model, args, epoch, dataset, logger, optimizer): model.train() total_loss = float(0) with tqdm(desc='Training', total=len(dataset)) as pbar: for i, (data, target, paths) in enumerate(dataset): if True : if i == args.stop_after: break pbar.update() model.zero_grad() output = model(data) target_var = Variable(maybe_cuda(torch.cat(target, 0), args.cuda), requires_grad=False) loss = model.criterion(output, target_var) loss.backward() optimizer.step() # total_loss += loss.data total_loss += float(loss.item()) # pbar.set_description('Training : LOSS = {:.4}'.format(float(loss.item()))) torch.cuda.empty_cache() del data, target, output, target_var, loss gc.collect()
def test(model, args, epoch, dataset, logger, threshold): model.eval() with tqdm(desc='Testing', total=len(dataset)) as pbar: acc = Accuracy() # accuracies class is not needed for i, (data, target, paths) in enumerate(dataset): if True: if i == args.stop_after: break pbar.update() output = model(data) output_softmax = F.softmax(output, 1) targets_var = torch.tensor(maybe_cuda(torch.cat(target, 0), args.cuda), requires_grad=False) output_seg = output.data.cpu().numpy().argmax(axis=1) target_seg = targets_var.data.cpu().numpy() preds_stats.add(output_seg, target_seg) current_idx = 0 for k, t in enumerate(target): path = paths[k] document_sentence_count = len(t) to_idx = int(current_idx + document_sentence_count) output = ((output_softmax.data.cpu().numpy()[current_idx: to_idx, :])[:, 1] > threshold) h = np.append(output, [1]) tt = np.append(t, [1]) acc.update(h, tt, path) current_idx = to_idx epoch_pk, epoch_windiff, epoch_b, epoch_s = acc.calc_accuracy() logger.info('Testing Epoch: {}, accuracy: {:.4}, Pk: {:.4}, Windiff: {:.4}, B: {:.4}, S: {:.4} ,F1: {:.4} . '.format( epoch + 1, preds_stats.get_accuracy(), epoch_pk, epoch_windiff, epoch_b, epoch_s, preds_stats.get_f1())) preds_stats.reset() epoch_result = acc.all_test_result return epoch_pk, epoch_result
def train_step(engine, batch): encoder.train() decoder.train() encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() (x, x_len), (y, y_len) = batch full_decoder_output, attn_ls = run_model_on_batch( encoder=encoder, decoder=decoder, config=config, use_teacher_forcing=use_teacher_forcing, batch_data=batch, ) y_var = maybe_cuda(Variable(y, requires_grad=False), cuda=config.cuda) batch_loss = criterion( full_decoder_output.view(-1, full_decoder_output.size()[2]), y_var.view(-1)) batch_loss.backward() encoder_optimizer.step() decoder_optimizer.step() return batch_loss
def train(model, args, epoch, dataset, logger, optimizer): model.train() total_loss = float(0) with tqdm(desc='Training', total=len(dataset)) as pbar: for i, (data, target, paths) in enumerate(dataset): if True: if i == args.stop_after: break pbar.update() model.zero_grad() output = model(data) target_var = torch.tensor(maybe_cuda(torch.cat(target, 0), args.cuda), requires_grad=False) loss = model.criterion(output, target_var) loss.backward() optimizer.step() total_loss += loss.item() pbar.set_description('Training, loss={:.4}'.format( loss.item())) total_loss = total_loss / len(dataset) logger.debug('Training Epoch: {}, Loss: {:.4}.'.format( epoch + 1, total_loss)) log_value('Training Loss', total_loss, epoch + 1)