def train_seg_coarse(mutual_helper, model_seg_coarse, train_loader_seg, optimizer_seg, epoch): model_seg_coarse.train() loss_seg = Averagvalue() acc_seg = Averagvalue() data_iter_seg = mutual_helper.read_data(train_loader_seg) total_iter = len(train_loader_seg) for ii in range(total_iter): batch_gen_seg = next(data_iter_seg) labels = batch_gen_seg['image_segment'] images = batch_gen_seg['image_patch'] logits, _, _, _ = model_seg_coarse(images) probs = torch.sigmoid(logits) loss = mutual_helper.criterions['seg_loss'](probs, labels) acc = accuracy_check(probs, labels) optimizer_seg.zero_grad() loss.backward() optimizer_seg.step() loss_seg.update(loss.item()) acc_seg.update(acc) empty_cache() print("[Epoch %d] [%s loss: %f] [%s acc: %f]" % (epoch, 'seg', loss_seg.avg, 'seg', acc_seg.avg)) return { 'train/seg_loss': loss_seg.avg, 'train/seg_acc': acc_seg.avg, }
def photo(bot, update): chat_id = update.message.chat_id update.message.reply_text('Принял! Скоро пришлю результат - \nэто займет несколько секунд') print("Got image from {}".format(chat_id)) # получаем информацию о картинке image_info = update.message.photo[-1] image_file = bot.get_file(image_info) first_image_file[chat_id] = image_file content_image_stream = BytesIO() first_image_file[chat_id].download(out=content_image_stream) info[chat_id][1] = content_image_stream del first_image_file[chat_id] output = transfer(info[chat_id][1], info[chat_id][0]) empty_cache() # torch.cuda.empty_cache() gc.collect() # теперь отправим назад фото output_stream = BytesIO() output.save(output_stream, format='PNG') output_stream.seek(0) update.message.reply_text('Держите') bot.send_photo(chat_id, photo=output_stream) update.message.reply_text('Ниче так вышло. Для повтора тыкните -> /start') return ConversationHandler.END
def train(epoch): model.train() train_loss = 0. train_error = 0. for batch_idx, (data, label, n) in enumerate(train_loader): bag_label = label[0] if args.cuda: data, bag_label = data.cuda(), bag_label.cuda() data, bag_label = Variable(data), Variable(bag_label) # reset gradients optimizer.zero_grad() # calculate loss and metrics loss, _ = model.calculate_objective(data, bag_label) train_loss += loss.item() # backward pass loss.backward() # step optimizer.step() #meep empty_cache() # calculate loss and error for epoch train_loss /= len(train_loader) print('Epoch: {}, Loss: {:.4f}'.format(epoch, train_loss)) if not args.train_only: test() if train_loss < args.cutoff: print([train_loss, args.cutoff]) torch.save(model, args.model_prefix + '.model') exit(0)
def test_cls(mutual_helper, model_seg_coarse, vali_loader_cls, epoch): mutual_helper.model.eval() loss_cls = Averagvalue() acc_cls = Averagvalue() with torch.no_grad(): for i, batch in enumerate(vali_loader_cls): images_cls, _, labels_cls = mutual_helper.generate_batch(batch) images_cls_logits, backbone_out, _, _ = model_seg_coarse( images_cls) probs = torch.sigmoid(images_cls_logits) # image_grid = make_grid(images_cls, nrow=4, padding=2) # prob_grid = make_grid(probs, nrow=4, padding=2) # visualize(np.clip(np.transpose(image_grid.detach().cpu().numpy(), (1, 2, 0)) * std + mean, 0, 1), # join(mutual_helper.config.submission_dir, # 'image_' + str(i) + '_batch_origin')) # visualize(np.clip(np.transpose(prob_grid.detach().cpu().numpy(), (1, 2, 0)), 0, 1), # join(mutual_helper.config.submission_dir, # 'image_' + str(i) + '_batch_prob')) predictions_cls = mutual_helper.model(images_cls, probs, backbone_out) loss = mutual_helper.criterions['cls_loss'](predictions_cls, labels_cls) prob = F.softmax(predictions_cls, dim=1) _, equals = correct_predictions(prob, labels_cls) loss_cls.update(loss.item()) acc_cls.update(equals / images_cls.size(0)) empty_cache() info = 'Vali Epoch: [{0}/{1}]'.format(epoch, mutual_helper.config.epochs) + \ ' Loss {loss:f} '.format(loss=loss_cls.avg) + \ ' Acc {acc:f} '.format(acc=acc_cls.avg) print(info) return { 'vali/cls_loss': loss_cls.avg, 'vali/cls_acc': acc_cls.avg, }
def _measure_performance(g, mem): tm = TicToc() tt = 0 f = 1 if g == -1: dev = torch.device('cpu') else: dev = torch.device('cuda:%s' % g) dtt = torch.double a = torch.eye(1024, 1024, dtype=dtt, device=dev) a.addmm_(a, a) if g >= 0: tcd.synchronize(device=dev) while tt < 1.0 and mem > 8.0 * (f * 2048.0) ** 2: tm.tic() a = torch.eye(f * 2048, f * 2048, dtype=dtt, device=dev) a.addmm_(a, a) if g >= 0: tcd.synchronize(device=dev) tt = tm.toc_val() f *= 2 print('%s:%s - speed: %s' % (dev.type, dev.index, (float(f) ** 3) / tt)) del a if g >= 0: tcd.synchronize(device=dev) tcd.empty_cache() return (float(f) ** 3) / tt
def handle(): """Main method of module""" cohort: str = argv[-1] img_paths_by_idx: dict = get_img_paths_by_idx(mri_dir=cohort) ptid_to_slice_sequence: dict = {} for mri_idx, img_paths in tqdm(img_paths_by_idx.items()): ptid_to_path: dict = get_ptid_to_path(img_paths=img_paths) # Load the trained model for the given MRI slice index model: Module = get_autoencoder(mri_idx=mri_idx, mri_dir=cohort) empty_cache() with no_grad(): ptid_to_encoded_img: dict = get_ptid_to_encoded_img( ptid_to_path=ptid_to_path, encoder=model.encoder) for ptid, encoded_img in ptid_to_encoded_img.items(): if ptid not in ptid_to_slice_sequence: ptid_to_slice_sequence[ptid] = {mri_idx: encoded_img} else: slice_sequence: dict = ptid_to_slice_sequence[ptid] slice_sequence[mri_idx] = encoded_img save_data_set(ptid_to_slice_sequence=ptid_to_slice_sequence, cohort=cohort)
def evaluate_projections(self): """Link prediction evaluation helper function. Project all entities according to each relation. Calling this method at the beginning of link prediction makes the process faster by computing projections only once. """ if self.evaluated_projections: return for i in tqdm(range(self.n_ent), unit='entities', desc='Projecting entities'): norm_vect = self.norm_vect.weight.data.view( self.n_rel, self.emb_dim) mask = tensor([i], device=norm_vect.device).long() if norm_vect.is_cuda: empty_cache() ent = self.ent_emb(mask) norm_components = (ent.view(1, -1) * norm_vect).sum(dim=1) self.projected_entities[:, i, :] = ( ent.view(1, -1) - norm_components.view(-1, 1) * norm_vect) del norm_components self.evaluated_projections = True
def eval_on_intern_metrs(self, data_source, **kwargs): """Computes the average over data points metrics.""" empty_cache() logger.info("Evaluation data source: %s" % data_source) total_metrs = OrderedDict() total_dp = 0 total_batches = 0 total_revs = 0 start = time() for batch in self.val_data_pipeline.iter(**data_source): metrs = self.imodel.eval(batch=batch, **kwargs) total_revs += len(batch[ModelF.REV]) for k, v in metrs.items(): if k not in total_metrs: total_metrs[k] = 0. total_metrs[k] += v * len(batch) # rescaling back total_dp += len(batch) total_batches += 1 logger.info("Evaluation time elapsed: %.2f (s)." % (time() - start)) logger.info("Total reviews: %d." % total_revs) # compute the actual average over data-points f_res = OrderedDict() for k, v in total_metrs.items(): f_res[k] = v / float(total_dp) return f_res
def get_scores(model_names: List[str], datasets: List[List[Tweet]], batch_size: int, device: str, model_dir: str, data_tag: str) -> List[Tensor]: """ :returns num_dataset tensors of shape B x M x Q """ outs = [] for name in model_names: this_model_outs = [] model = make_model(name, True).to(device) model.load_state_dict( load('/'.join([model_dir, f'{name}-{data_tag}', 'model.p']))['model_state_dict']) for dataset in datasets: this_dataset_outs = [] nbatches = ceil(len(dataset) / batch_size) for batch_idx in range(nbatches): start, end = batch_idx * batch_size, min( len(dataset), (batch_idx + 1) * batch_size) this_dataset_outs.append( model.predict_scores(dataset[start:end]).cpu()) this_model_outs.append(cat(this_dataset_outs, dim=0)) outs.append(this_model_outs) empty_cache() return [stack(x, dim=1) for x in zip(*outs)]
def train(self): n_epoch = self.configer.n_epoch - self.cur_epoch print("Start training! current epoch: {}, remain epoch: {}".format(self.cur_epoch, n_epoch)) bar = ProcessBar(n_epoch) for i_epoch in range(n_epoch): if self.configer.cuda and cuda.is_available(): cuda.empty_cache() self.cur_epoch += 1 bar.step(self.cur_epoch) self.lr_scheduler.step(self.cur_epoch) cur_lr = self.lr_scheduler.get_lr()[-1] self.writer.add_scalar('{}/lr'.format(self.net._get_name()), cur_lr, self.cur_epoch) loss_train = self.train_epoch() # print("----------------------------------------------------------------------------------------------") loss_valid = self.valid_epoch() # print("----------------------------------------------------------------------------------------------") self.writer.add_scalars('{}/loss'.format(self.net._get_name()), {'train': loss_train, 'valid': loss_valid}, self.cur_epoch) # print_log = "{} || Elapsed: {:.4f}h || Epoch: [{:3d}]/[{:3d}] || lr: {:.6f},| train loss: {:4.4f}, valid loss: {:4.4f}".\ # format(getTime(), self.elapsed_time/3600, self.cur_epoch, self.configer.n_epoch, # cur_lr, loss_train, loss_valid) # print(print_log) if loss_valid < self.valid_loss: self.valid_loss = loss_valid self.save_checkpoint()
def eval(model, criterion, loader): global force_cuda model.eval() loss = 0.0 acc = 0.0 total = 0 for i, data in enumerate(loader): with torch.no_grad(): inputs, labels = data if force_cuda and cuda.is_available(): inputs, labels = inputs.cuda(), labels.cuda() outputs = model(inputs) _, prediction = torch.max(outputs.data, 1) loss = criterion(outputs, labels) loss += loss.data.item() acc += torch.sum(prediction == labels.data).item() total += labels.size(0) del inputs, labels, outputs, prediction cuda.empty_cache() avg_loss = loss / total avg_acc = acc / total return avg_loss, avg_acc
def eval(self, cur_epoch: int, max_epoch: int) -> [float, float]: """ 云端测试 :param cur_epoch: 当前epoch :param max_epoch: 最大epoch :return: 损失,准确率 """ self.model.eval() criterion = CrossEntropyLoss() loss = 0 correct = 0 pbar = tqdm(range(len(self.test_loader))) pbar.set_description('Eval {}/{}'.format(cur_epoch, max_epoch)) if self.cfg.SOLVER.CUDA: self.model.cuda() for iter_, (_, (inputs, targets)) in enumerate(zip(pbar, self.test_loader)): if self.cfg.SOLVER.CUDA: inputs, targets = inputs.cuda(), targets.cuda() outputs = self.model(inputs) batch_l = criterion(outputs, targets) loss += float(batch_l) _, predicted = torch.max(outputs.data, 1) correct += int(predicted.eq(targets).sum()) acc = correct / ((iter_ + 1) * self.test_batch_size) pbar.set_postfix({ 'loss': '{:.8f}'.format(loss / ((iter_ + 1) * self.test_batch_size)), 'acc': '{:.8f}'.format(acc) }) self.model.cpu() acc = correct / (len(self.test_loader) * self.test_batch_size) self.logger.logger.info('{} Eval {}/{}\tloss: {}\tacc: {}'.format( datetime.now().strftime('%Y-%m-%d %H:%M:%S'), cur_epoch, max_epoch, loss, acc)) cuda.empty_cache() return loss, acc
def _filter_gsw_sentences(self, sentences): """Filter out all sentences that are not detected as Swiss-German """ # Predict Swiss-German sentences_list = [sentence[1] for sentence in sentences] predictions = [] # separate in batches to avoid cuda out of memory error for i in tqdm(range(math.ceil(len(sentences_list) / 100))): left = i * 100 right = (i + 1) * 100 batch = sentences_list[left:right] if len(batch) > 0: predictions.extend(self.lid.predict_label(batch)) if len(sentences_list) != len(predictions): raise Exception("predictions and sentences_list must have the " + "same length") # Create the gsw_tweet object for each prediction that exceeds a # threshold filtered = [] for i in range(len(sentences)): prediction = float(predictions[i]) if prediction >= self.config["lid_threshold"]: filtered.append((sentences[i][0], sentences[i][1], prediction)) del (predictions) gc.collect() cuda.empty_cache() return filtered
def batch_and_write(): #get the input text src_text = [None] * batch_size for i in range(0, batch_size): src_text[i] = data_file.readline() if src_text[i][ 0] == "-": #this is a little bit of data cleaning for a common issue in open subtitles src_text[i] = src_text[i][2:] #batch batch = tr_az_tokenizer.prepare_seq2seq_batch( src_texts=src_text, return_tensors="pt").to('cuda') #generate gen = tr_az_model.generate(**batch).to('cuda') #decode words = tr_az_tokenizer.batch_decode(gen, skip_special_tokens=True) #write the output files for i in range(0, batch_size): tr_file_out.write(src_text[i]) az_file_out.write(words[i] + "\n") #clear cuda cache del src_text del batch del gen del words cuda.empty_cache()
def evaluate_projectionss(self): """Link prediction evaluation helper function. Project all entities according to each relation. Calling this method at the beginning of link prediction makes the process faster by computing projections only once. """ if self.evaluated_projections: return for i in tqdm(range(self.n_ent), unit='entities', desc='Projecting entities'): projection_matrices = self.proj_mat.weight.data projection_matrices = projection_matrices.view( self.n_rel, self.rel_emb_dim, self.ent_emb_dim) mask = tensor([i], device=projection_matrices.device).long() if projection_matrices.is_cuda: empty_cache() ent = self.ent_emb(mask) proj_ent = matmul(projection_matrices, ent.view(self.ent_emb_dim)) proj_ent = proj_ent.view(self.n_rel, self.rel_emb_dim, 1) self.projected_entities[:, i, :] = proj_ent.view( self.n_rel, self.rel_emb_dim) del proj_ent self.evaluated_projections = True
def train(self, data_source, logging_steps=10, **kwargs): """ Performs a single epoch training on the passed `data_source`. :param data_source: self-explanatory. :param logging_steps: how often to log training produced batch metrics. """ empty_cache() logger.info("Training data source: %s" % data_source) total_batches = 0 total_revs = 0 start = time() data_chunk_iter = self.train_data_pipeline.iter(**data_source) for indx, batch in enumerate(data_chunk_iter, 1): c_lambd = self.c_kl_ann(increment_indx=True) z_lambd = self.z_kl_ann(increment_indx=True) metrics = self.imodel.train(batch, c_lambd=c_lambd, z_lambd=z_lambd) total_revs += len(batch[ModelF.REV]) if indx % logging_steps == 0: mess = metrics_to_str(metrics, prefix="Chunk # %d" % indx) logger.info(mess) total_batches += 1 logger.info("Epoch training time elapsed: %.2f (s)." % (time() - start)) logger.info("Total reviews: %d." % total_revs)
def adapt_checkpointing(checkpoint_func: Callable[[Module, int], Module], run_func: Callable[[Module], Any], module: Module) -> Module: # TODO: set a max before hard failure? # I'd use recursion here, but it would make it very easy to blow up the stack by accident num_checkpoints = 0 while True: try: # create checkpoints and run the model checkpointed = checkpoint_func(module, num_checkpoints) run_func(checkpointed) print('sufficient memory for ', num_checkpoints, ' checkpoints') # TODO: need to adapt this check to work for checkpoint funcs that don't just operate on highest submodules if num_checkpoints > len(list(module.children()))**.5: print( 'WARNING: number of checkpoints above sqrt of layers, likely to incur high performance cost' ) return checkpointed except RuntimeError as err: print(err) if 'out of memory' in str(err): print('insufficient memory for ', num_checkpoints, ' checkpoints, retrying with ', num_checkpoints + 1) # delete any params handing around and clear the cache to have a clean slate to try again for param in module.parameters(): if param.grad is not None: del param.grad cuda.empty_cache() num_checkpoints += 1
def integratedForward_cls(model, sps, batchSize, nClasses, device='cpu', count_votes=False): N = sps.size(0) feats = torch.empty(N, nClasses) model = model.to(device) with torch.no_grad(): baseInx = 0 while baseInx < N: cuda.empty_cache() endInx = min(baseInx + batchSize, N) y = model.classifier( sps[baseInx:endInx, :].to(device)).detach().to('cpu') feats[baseInx:endInx, :] = y baseInx = endInx if count_votes: maxV, _ = torch.max(feats, dim=1, keepdim=True) feat = torch.sum(feats == maxV, dim=0, keepdim=True) else: feat = torch.mean(feats, dim=0, keepdim=True) return feat, feats
def train(self, lr=0.1, batch_size=0, epoch_num=1) -> [dict, float, float]: """ 完成一次训练 :param lr: :param batch_size: :param epoch_num: :return: 模型参数,损失,训练集准确率 """ if len(self.sample_set) == 0: return None, 0, 0 self.model.train() if self.cfg.SOLVER.CUDA: self.model.cuda() train_loader = self._init_train_loader(batch_size) optimizer = torch.optim.SGD(self.model.parameters(), lr, momentum=0.5) train_loss = 0 train_acc = 0 for epoch in range(epoch_num): loss, acc = self._train_one_epoch(train_loader, optimizer) self.logger.logger.info('{} Train\tloss: {}\tacc: {}'.format( self, loss, acc)) train_loss += loss train_acc += acc cuda.empty_cache() self.model.cpu() model_params = copy.deepcopy(self.model.state_dict()) train_loss /= epoch_num train_acc /= epoch_num cuda.empty_cache() return model_params, train_loss, train_acc
def train_seg(mutual_helper, model_seg_coarse, model_cls, train_loader_seg, optimizer_seg, epoch): mutual_helper.model.train() loss_seg = Averagvalue() acc_seg = Averagvalue() optimizer_seg.zero_grad() batch_num = int( np.ceil( len(train_loader_seg.dataset) / float(mutual_helper.config.train_seg_batch_size))) for ee in range(1): for i, batch in enumerate(train_loader_seg): images, labels, _ = mutual_helper.generate_batch(batch) with torch.no_grad(): images_cls_logits, seg_backbone_out, _, _ = model_seg_coarse( images) probs_cls = torch.sigmoid(images_cls_logits) cls_features_out = model_cls.get_backbone_out( images, probs_cls, seg_backbone_out) # cls_features_out = None # _, _, cam = mutual_helper.generate_cam_ex_batch(model_cls, images.detach(), probs_cls, seg_backbone_out) # grid = make_grid(images, nrow=4, padding=2) # grid = np.transpose(grid.detach().cpu().numpy(), (1, 2, 0)) * std + mean # save_img = np.clip(grid * 255 + 0.5, 0, 255) # visualize(save_img, # join(mutual_helper.config.tmp_dir, # str(i) + "_images")) # grid = make_grid(labels, nrow=4, padding=2) # visualize(np.transpose(grid.detach().cpu().numpy(), (1, 2, 0)), # join(mutual_helper.config.tmp_dir, # str(i) + "_probs")) # grid = make_grid(probs_cls, nrow=4, padding=2) # visualize(np.transpose(grid.detach().cpu().numpy(), (1, 2, 0)), # join(mutual_helper.config.tmp_dir, # str(i) + "_label")) cam = None logits = mutual_helper.model(images, cam, cls_features_out, dua=False) probs = torch.sigmoid(logits) loss = mutual_helper.criterions['seg_loss'](probs, labels) acc = accuracy_check(probs, labels) loss.backward() loss_seg.update(loss.item()) acc_seg.update(acc) if ( i + 1 ) % mutual_helper.config.update_every == 0 or i == batch_num - 1: clip_grad_norm_(filter(lambda p: p.requires_grad, mutual_helper.model.parameters()), \ max_norm=mutual_helper.config.clip) optimizer_seg.step() optimizer_seg.zero_grad() empty_cache() print("[Epoch %d] [%s loss: %f] [%s acc: %f]" % (epoch, 'seg', loss_seg.avg, 'seg', acc_seg.avg)) return { 'train/seg_loss': loss_seg.avg, 'train/seg_acc': acc_seg.avg, }
def integratedForward(model, sps, batchSize, nClasses, device='cpu', voteMethod='avg_softmax'): N = sps.size(0) feats = torch.empty(N, nClasses) model = model.to(device) with torch.no_grad(): baseInx = 0 while baseInx < N: cuda.empty_cache() endInx = min(baseInx + batchSize, N) y = model(sps[baseInx:endInx, :].to(device)).detach().to('cpu') feats[baseInx:endInx, :] = y baseInx = endInx if voteMethod == 'avg_feat': feat = torch.mean(feats, dim=0, keepdim=True) elif voteMethod == 'most_vote': maxV, _ = torch.max(feats, dim=1, keepdim=True) feat = torch.sum(feats == maxV, dim=0, keepdim=True) elif voteMethod == 'weighted_feat': feat = torch.mean(feats, dim=0, keepdim=True) maxV, _ = torch.max(feats, dim=1, keepdim=True) feat = feat * torch.sum(feats == maxV, dim=0, keepdim=True).float() elif voteMethod == 'avg_softmax': feats = nn.functional.softmax(feats, dim=1) feat = torch.mean(feats, dim=0, keepdim=True) else: # default method: avg_softmax feats = nn.functional.softmax(feats, dim=1) feat = torch.mean(feats, dim=0, keepdim=True) return feat, feats
def get_cuda_memory(self): # type: () -> str # Convert from B to MiB if cuda.is_available(): cuda.empty_cache() return "{:.0f} MiB".format( cuda.max_memory_cached(device=self._device) // 1024**2) return "??? MiB"
def benchmark_stvae(dataset, log_name, cfg, **kwargs): ds = dataset n_genes = min(ds.X.shape[1], cfg.n_genes) expression = np.log(ds.X + 1.) scvai_genes, scvai_batches_ind, scvai_labels_ind = get_high_variance_genes( expression, ds.batch_indices, ds.labels, n_genes=n_genes, argmax=False) cfg.count_classes = np.unique(ds.batch_indices).shape[0] cfg.count_labels = np.unique(ds.labels).shape[0] cfg.input_dim = int(scvai_genes.shape[1]) data = load_datasets(cfg, True, True, (scvai_genes, scvai_batches_ind, scvai_labels_ind)) dataloader_train = data[0] dataloader_val = data[1] dataloader_test = data[2] annot_train = data[3] annot_test = data[4] styletransfer_test_expr = annot_test.dataset.tensors[0].cpu().numpy() styletransfer_test_class = annot_test.dataset.tensors[1].cpu().numpy() styletransfer_test_celltype = annot_test.dataset.tensors[2].cpu().numpy() model = None disc = None print('Training...') model, disc = train(dataloader_train, dataloader_val, cfg, model, disc) print('Tests...') print('Dataset:', log_name) cfg.classifier_epochs = 1 res = test(cfg, model, disc, annot_train, styletransfer_test_expr, styletransfer_test_class, styletransfer_test_celltype, dataset_name=log_name) (Path(cfg.metrics_dir) / 'stVAE').mkdir(parents=True, exist_ok=True) with open(Path(cfg.metrics_dir) / 'stVAE/' / (log_name + '.json'), 'w') as file: json.dump(res, file, indent=4) del ds del model, disc del styletransfer_test_expr del styletransfer_test_class del styletransfer_test_celltype del data del dataloader_train, dataloader_val, dataloader_test del annot_train, annot_test del scvai_genes, scvai_batches_ind, scvai_labels_ind gc.collect() cuda.empty_cache()
def train(model, train_loader, optimizer, epoch): model.train() if mode == 'gpu': dtype_float = torch.cuda.FloatTensor else: dtype_float = torch.FloatTensor global net_vis end = time.time() pend = time.time() batch_time = Averagvalue() printfreq_time = Averagvalue() losses = Averagvalue() acc = Averagvalue() optimizer.zero_grad() for i, (image, label) in enumerate(train_loader): # if (i + 1) % (int(len(train_loader) / 5)) == 0: # visualize(group_images(image.cpu().detach().numpy(), 10), # TMP_DIR + "all_train_" + str(i)+"_A") # .show() # visualize(group_images(label, 10), # TMP_DIR + "all_train_" + str(i)+"_B") image = dtype_float(to_cuda(image.float(), mode)).requires_grad_(False) label = to_cuda(label, mode).requires_grad_(False) pre_label = model(image) if fcn: # if (i + 1) % (int(len(train_loader) / 5)) == 0: # visualize(group_images(pre_label.cpu().detach().numpy(), 10), # TMP_DIR + "all_train_" + str(i)+"_C") loss = BCELoss(pre_label, label) prec1 = accuracy_check(pre_label, label) acc.update(prec1, 1) else: loss = CELoss(pre_label, label) prec1 = accuracy(pre_label, label) acc.update(prec1[0].item(), image.size(0)) losses.update(loss.item(), image.size(0)) batch_time.update(time.time() - end) end = time.time() if (i + 1) % (int(len(train_loader) / printfreq)) == 0: printfreq_time.update(time.time() - pend) pend = time.time() info = 'Epoch: [{0}/{1}][{2}/{3}] '.format(epoch, N_epochs, i, len(train_loader)) + \ 'printfreq time {printfreq_time.val:.3f} (avg:{printfreq_time.avg:.3f}) '.format( printfreq_time=printfreq_time) # info = 'Epoch: [{0}/{1}][{2}/{3}] '.format(epoch, N_epochs, i, len(train_loader)) + \ # 'Batch time {batch_time.val:.3f} (avg:{batch_time.avg:.3f}) '.format(batch_time=batch_time) + \ # 'printfreq time {printfreq_time.val:.3f} (avg:{printfreq_time.avg:.3f}) '.format( # printfreq_time=printfreq_time) + \ # 'Acc {acc.val:f} (avg:{acc.avg:f}) '.format(acc=acc) + \ # 'Loss {loss.val:f} (avg:{loss.avg:f}) '.format(loss=losses) print(info) optimizer.zero_grad() loss.backward() optimizer.step() empty_cache() return losses.avg, acc.avg
def test_eval(benchmarks, model_name, opt_method, GDR=False, emb_dim=100, eval_b_size=256): ent_dim = emb_dim rel_dim = emb_dim model_save_path = './checkpoint/' + benchmarks + '_' + model_name + '_' + opt_method + '.ckpt' # 保存最佳hits k (ent)模型 device = 'cuda:0' if cuda.is_available() else 'cpu' # Load dataset module = getattr(import_module('torchkge.models'), model_name + 'Model') load_data = getattr(import_module('torchkge.utils.datasets'), 'load_' + benchmarks) print('Loading data...') kg_train, kg_val, kg_test = load_data(GDR=GDR) print( f'Train set: {kg_train.n_ent} entities, {kg_train.n_rel} relations, {kg_train.n_facts} triplets.' ) print( f'Valid set: {kg_val.n_facts} triplets, Test set: {kg_test.n_facts} triplets.' ) # # Define the model and criterion if 'TransE' in model_name: model = module(emb_dim, kg_train.n_ent, kg_train.n_rel, dissimilarity_type='L2') else: model = module(ent_dim, rel_dim, kg_train.n_ent, kg_train.n_rel) # Move everything to CUDA if available if device == 'cuda:0': cuda.empty_cache() model.to(device) if os.path.exists(model_save_path): # 存在则加载模型 进行测试 load_ckpt(model_save_path, model, train=False) print(f'loading ckpt successful, start evaluate on test data...') print(model) model.eval() lp_evaluator = LinkPredictionEvaluator(model, kg_test) lp_evaluator.evaluate(eval_b_size, verbose=True) lp_evaluator.print_results() rp_evaluator = RelationPredictionEvaluator(model, kg_test) rp_evaluator.evaluate(eval_b_size, verbose=True) rp_evaluator.print_results() else: print('No pretrain model found!')
def crabnet_mae(parameterization, train_val_df, n_splits=5, kf=None, verbose=False): """Compute the mean absolute error of a CrabNet model. Assumes that `train_df` and `val_df` are predefined. Parameters ---------- parameterization : dict Dictionary of the parameters passed to `get_model()` after some slight modification. Returns ------- results: dict Dictionary of `{"rmse": rmse}` where `rmse` is the root-mean-square error of the CrabNet model. """ parameterization = correct_parameterization(parameterization, verbose=verbose) if kf is None: kf = KFold(n_splits=n_splits, shuffle=True, random_state=18012019) mae = 0.0 for train_index, val_index in kf.split(train_val_df): train_df, val_df = ( train_val_df.loc[train_index], train_val_df.loc[val_index], ) crabnet_model = get_model(mat_prop="expt_gap", train_df=train_df, learningcurve=False, force_cpu=False, verbose=verbose, **parameterization) val_true, val_pred, val_formulas, val_sigma = crabnet_model.predict( val_df) # rmse = mean_squared_error(val_true, val_pred, squared=False) val_pred = np.nan_to_num(val_pred) mae = mae + mean_absolute_error(val_true, val_pred) # deallocate CUDA memory https://discuss.pytorch.org/t/how-can-we-release-gpu-memory-cache/14530/28 del crabnet_model gc.collect() empty_cache() mae = mae / n_splits return mae
def summ_eval(self, out_dir_path, data_source, **kwargs): """Runs evaluation of summaries.""" assert self.eval_pipeline is not None empty_cache() summ_gen_func = partial(self.summ_gen_wrapper, **kwargs) output_fn = "%s_eval.json" % get_file_name(data_source['data_path']) out_file_path = comb_paths(out_dir_path, output_fn) logger.info("Performing summary evaluation on %s." % data_source) eval_proc = SummEvalProc(self.eval_pipeline, summs_gen_func=summ_gen_func, rev_formatter_func=self.gen_seq_postproc, summ_formatter_func=self.summ_postproc, analytics_func=self.seq_analytics) eval_proc.eval(data_source, out_file_path=out_file_path)
def train(model, train_loader, test_loader, cfg): current_iter = model.epoch max_iter = cfg.SOLVER.MAX_ITER optimizer = make_optimizer(cfg, model) scheduler = make_lr_scheduler(cfg, optimizer) for epoch in range(current_iter, max_iter): scheduler.step(epoch) do_train(epoch, model, optimizer, train_loader, test_loader, cfg) # 清除部分无用变量 cuda.empty_cache() save(model, cfg)
def evaluate_projections(self): """Project all entities according to each relation. """ # TODO turn this to batch computation if self.evaluated_projections: return print('Projecting entities in relations spaces.') for i in tqdm(range(self.number_entities)): ent_proj_vect = self.ent_proj_vects.data[i].view(1, -1) rel_proj_vects = self.rel_proj_vects.data.view( self.number_relations, self.rel_emb_dim, 1) projection_matrices = matmul(rel_proj_vects, ent_proj_vect) if projection_matrices.is_cuda: id_mat = eye(n=self.rel_emb_dim, m=self.ent_emb_dim, device='cuda') else: id_mat = eye(n=self.rel_emb_dim, m=self.ent_emb_dim) id_mat = id_mat.view(1, self.rel_emb_dim, self.ent_emb_dim) projection_matrices += id_mat.expand(self.number_relations, self.rel_emb_dim, self.ent_emb_dim) empty_cache() mask = tensor([i]).long() if self.entity_embeddings.weight.is_cuda: assert self.projected_entities.is_cuda empty_cache() mask = mask.cuda() entity = self.entity_embeddings(mask.cuda()) projected_entity = matmul(projection_matrices, entity.view(-1)).detach() projected_entity = projected_entity.view(self.number_relations, self.rel_emb_dim, 1) self.projected_entities[:, :, i] = projected_entity.view( self.number_relations, self.rel_emb_dim) del projected_entity self.evaluated_projections = True
def translate_batch(model, tokenizer, src_text_list, cuda=True): if cuda: batch = tokenizer.prepare_seq2seq_batch(src_texts=src_text, return_tensors="pt").to('cuda') gen = model.generate(**batch).to('cuda') words: List[str] = tokenizer.batch_decode(gen, skip_special_tokens=True) c.empty_cache() else: batch = tokenizer.prepare_seq2seq_batch(src_texts=src_text, return_tensors="pt") gen = model.generate(**batch) words: List[str] = tokenizer.batch_decode(gen, skip_special_tokens=True) return words