def do_evaluate(net, test_dataset, batch_size, augment=[]): test_loader = DataLoader( test_dataset, sampler = SequentialSampler(test_dataset), batch_size = batch_size, drop_last = False, num_workers = 4, pin_memory = True, collate_fn = null_collate ) #---- start_timer = timer() test_num = 0 test_id = [] test_probability = [[],[],[]] test_truth = [[],[],[]] start_timer = timer() for t, (input, truth, infor) in enumerate(test_loader): batch_size,C,H,W = input.shape input = input.cuda() with torch.no_grad(): net.eval() num_augment=0 probability=[0,0,0] if 'null' in augment: #null logit = net(input) prob = logit_to_probability(logit) probability = [p+q**0.5 for p,q in zip(probability,prob)] num_augment += 1 probability = [p/num_augment for p in probability] batch_size = len(infor) for i in range(NUM_TASK): test_probability[i].append(probability[i].data.cpu().numpy()) test_truth[i].append(truth[i].data.cpu().numpy()) test_id.extend([i.image_id for i in infor]) test_num += batch_size print('\r %4d / %4d %s'%( test_num, len(test_loader.dataset), time_to_str((timer() - start_timer),'min') ),end='',flush=True) assert(test_num == len(test_loader.dataset)) print('') for i in range(NUM_TASK): test_probability[i] = np.concatenate(test_probability[i]) test_truth[i] = np.concatenate(test_truth[i]) print(time_to_str((timer() - start_timer),'sec')) return test_id, test_truth, test_probability
def train_one_epoch(train_loader, model, criterions, optimizer, epoch, meters, since, log=None): losses = AverageMeter() f1 = AverageMeter() model.train() if len(meters['f1']): previous_loss = meters['loss'][-1] previous_f1 = meters['f1'][-1] best_f1_epoch = np.argmax(meters['f1']) best_f1_score = meters['f1'][best_f1_epoch] best_loss_epoch = np.argmin(meters['loss']) best_loss = meters['loss'][best_loss_epoch] else: best_f1_epoch = 0 best_f1_score = 0 best_loss_epoch = 0 best_loss = 0 previous_loss = 0 previous_f1 = 0 for batch_id, (images, target) in enumerate(train_loader): batch_x = images.cuda(non_blocking=True) target = torch.Tensor(np.array(target)).float().cuda(non_blocking=True) output = model(batch_x) bce_criterion = criterions[0] balance_criterion = criterions[1] bce_loss = bce_criterion(output, target) balance_loss = balance_criterion(output, target) total_loss = bce_loss + 8.0 * balance_loss losses.update(bce_loss.item(), batch_x.size(0)) f1_batch = f1_score(target, output.sigmoid().cpu() > 0.15, average='macro') f1.update(f1_batch, batch_x.size(0)) optimizer.zero_grad() total_loss.backward() # grident clip if cfg.grident_clip: torch.nn.utils.clip_grad_norm(model.parameters(), 1.) optimizer.step() print('Epoch %3d\t' % epoch, 'Batch %3d|%3d\t' % (batch_id, len(train_loader)), 'Loss: %10.5f\t' % losses.avg, 'Metrics|F1 Score: %10.5f\t' % f1.avg, 'Previous Loss: %10.5f\t' % previous_loss, 'Previous F1 Score: %10.5f\t' % previous_f1, 'Best loss:%10.5f Epoch %3d\t' % (best_loss, best_loss_epoch), 'Besr F1:%10.5f Epoch %3d\t' % (best_f1_score, best_f1_epoch), 'Time: %s' % time_to_str((timer() - since), 'min'), file=log) meters['loss'].append(losses.avg) meters['f1'].append(f1.avg) return meters
def train(train_loader, model, criterion, optimizer, epoch, valid_metrics, best_results, start): losses = utils.AverageMeter() f1 = utils.AverageMeter() acc = utils.AverageMeter() model.train() for i, (images, target) in enumerate(train_loader): images = images.to(device) indx_target = target.clone() target = torch.from_numpy(np.array(target)).long().to(device) # compute output output = model(images) loss = criterion(output, target) losses.update(loss.item(), images.size(0)) f1_batch = f1_score(target.cpu().data.numpy(), np.argmax(F.softmax(output).cpu().data.numpy(), axis=1), average='macro') acc_score = accuracy_score( target.cpu().data.numpy(), np.argmax(F.softmax(output).cpu().data.numpy(), axis=1)) f1.update(f1_batch, images.size(0)) acc.update(acc_score, images.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() print('\r', end='', flush=True) message = '%s %5.1f %6.1f | %0.3f %0.3f %0.3f | %0.3f %0.3f %0.4f | %s %s %s | %s' % (\ "train", i/len(train_loader) + epoch, epoch, acc.avg, losses.avg, f1.avg, valid_metrics[0], valid_metrics[1],valid_metrics[2], str(best_results[0])[:8],str(best_results[1])[:8],str(best_results[2])[:8], utils.time_to_str((timer() - start),'min')) print(message, end='', flush=True) return [acc.avg, losses.avg, f1.avg]
def train(train_loader,model,loss_fn, optimizer,epoch,valid_loss,start): losses = utils.AverageMeter() model.train() for i, (x_batch, y_batch) in enumerate(train_loader): y_pred = model(x_batch) loss = loss_fn(y_pred, y_batch) optimizer.zero_grad() loss.backward() optimizer.step() losses.update(loss.item(),x_batch.shape[0]) print('\r', end='', flush=True) message = '%s %5.1f %6.1f | %0.3f | %0.3f | %s' % ( \ "train", i / len(train_loader) + epoch, epoch+1, losses.avg, valid_loss, utils.time_to_str((timer() - start), 'min')) print(message, end='', flush=True) log.write("\n") log.write(message) log.write("\n") return losses.avg
def evaluate(val_loader, model, criterion, epoch, train_loss, best_results, start): losses = AverageMeter() f1 = F1Meter() model.cuda() model.eval() with torch.no_grad(): for i, (images, target) in enumerate(val_loader): images_var = images.cuda(non_blocking=True) in_images = images_var[:, :config.in_channels, :, :] target = torch.from_numpy(np.array(target)).float().cuda(non_blocking=True) classifier_output = model(in_images) classifier_loss = criterion(classifier_output, target) if config.with_mse_loss: out_images = images_var[:, -config.out_channels:, :, :] reconstruct_output = model.reconstruct_layer(model.features(in_images)) reconstruct_loss = nn.MSELoss().cuda()(reconstruct_output, out_images) loss = classifier_loss + reconstruct_loss else: loss = classifier_loss losses.update(loss.item(), images_var.size(0)) f1.update(classifier_output.sigmoid().cpu() > config.thresholds, target) if i % config.logging_every_n_steps == 0: message = logging_pattern % ( "val", i / len(val_loader) + epoch, epoch, train_loss[0], train_loss[1], losses.avg, f1.f1, str(best_results[0])[:8], str(best_results[1])[:8], time_to_str((timer() - start), 'min')) print(message, end='\n', flush=True) return [losses.avg, f1.f1]
def evaluate(val_loader, model, criterions, epoch, meters, start, log=None): losses = AverageMeter() f1 = AverageMeter() if len(meters['val_f1']): best_f1_epoch = np.argmax(meters['val_f1']) best_f1_score = meters['val_f1'][best_f1_epoch] best_loss_epoch = np.argmin(meters['val_loss']) best_loss = meters['val_loss'][best_loss_epoch] else: best_f1_epoch = 0 best_f1_score = 0 best_loss_epoch = 0 best_loss = 0 model.cuda() model.eval() preds = [] targets = [] with torch.no_grad(): for batch_id, (images, target) in enumerate(val_loader): batch_x = images.cuda(non_blocking=True) batch_y = torch.Tensor( np.array(target)).float().cuda(non_blocking=True) output = model(batch_x) bce_criterion = criterions[0] balance_criterion = criterions[1] bce_loss = bce_criterion(output, batch_y) balance_loss = balance_criterion(output, batch_y) total_loss = bce_loss + balance_loss losses.update(bce_loss.item(), batch_x.size(0)) pred_y = output.sigmoid().cpu().data.numpy() preds.append(pred_y) targets.append(target) f1_batch = f1_score(target, pred_y > 0.15, average='macro') f1.update(f1_batch, batch_x.size(0)) print('Validate Epoch %3d\t' % epoch, 'Batch %4d|%4d\t' % (batch_id, len(val_loader)), 'Aver Loss: %6.5f\t' % losses.avg, 'Aver F1 Score: %6.5f' % f1.avg, 'Best Val loss:%10.5f, Epoch: %3d\t' % (best_loss, best_loss_epoch), 'Best Val F1:%10.5f, Epoch: %3d\t' % (best_f1_score, best_f1_epoch), 'Time: %s' % time_to_str((timer() - start), 'min'), file=log) preds = np.concatenate(preds) targets = np.concatenate(targets) threshold, best_score, std_score = eval_f1score(preds, targets, cfg.label_names, log=log) print("Average F1 Score is ", f1.avg, file=log) meters['val_loss'].append(losses.avg) meters['val_aver_f1'].append(f1.avg) meters['val_std_f1'].append(std_score) meters['val_f1'].append(best_score) meters['threshold'].append(threshold) return meters
def tieResponse(self): rtv_string = "File Hash " + self.filehash + " Reputation\n\n" # Format a String Response i = 1 for key in self.content: rtv_string = rtv_string + "Provider: " + key['provider'] + "\n" rtv_string = rtv_string + "Creation Date: " + utils.time_to_str( key['createDate']) + "\n" rtv_string = rtv_string + "Reputation: " + key['reputation'] + "\n" rtv_string += "\n" i += 1 return rtv_string
def update(self): self.time = (pygame.time.get_ticks() - self.game.start_time) / 1000 self.chrono = self.font.render(time_to_str(self.time), True, pygame.Color("white"), pygame.Color("black")) self.fps_info = self.font.render("FPS : " + float_to_str(self.game.clock.get_fps()), True, pygame.Color("red"), pygame.Color("black")) self.player_coins = self.font.render(str(self.game.player.coins), True, pygame.Color("yellow"), pygame.Color("black")) self.player_oxygen_bottle = self.font.render(float_to_str(self.game.player.oxygen_bottle), True, pygame.Color("lightblue"), pygame.Color("black"))
def evaluate(val_loader, model, criterion, epoch, train_metrics, best_results, start): # only meter loss and f1 score losses = utils.AverageMeter() f1 = utils.AverageMeter() acc = utils.AverageMeter() valid_losses = [] valid_f1s = [] # switch mode for evaluation model.to(device) model.eval() with torch.no_grad(): for i, (images, target) in enumerate(val_loader): images_var = images.to(device) indx_target = target.clone() target = torch.from_numpy(np.array(target)).long().to(device) output = model(images_var) loss = criterion(output, target) losses.update(loss.item(), images_var.size(0)) valid_losses.append(loss.item()) f1_batch = f1_score(target.cpu().data.numpy(), np.argmax(F.softmax(output).cpu().data.numpy(), axis=1), average='macro') acc_score = accuracy_score( target.cpu().data.numpy(), np.argmax(F.softmax(output).cpu().data.numpy(), axis=1)) f1.update(f1_batch, images.size(0)) valid_f1s.append(f1_batch.item() * -1) # f1:biger is better acc.update(acc_score, images.size(0)) print('\r', end='', flush=True) message = '%s %5.1f %6.1f | %0.3f %0.3f %0.3f | %0.3f %0.3f %0.4f | %s %s %s | %s' % (\ "val", i/len(val_loader) + epoch, epoch, acc.avg,losses.avg,f1.avg, train_metrics[0], train_metrics[1],train_metrics[2], str(best_results[0])[:8],str(best_results[1])[:8],str(best_results[2])[:8], utils.time_to_str((timer() - start),'min')) print(message, end='', flush=True) valid_loss = np.average(valid_losses) valid_f1 = np.average(valid_f1s) print("\n") early_stopping_f1(valid_f1, model) early_stopping(valid_loss, model) early_stop = early_stopping.early_stop and early_stopping_f1.early_stop return [acc.avg, losses.avg, f1.avg], early_stop
def train(task_name, model, optimizer, criterion, scheduler, train_loader, val_loader, mix_loder=None, log=None): meters = defaultdict(list) start = timer() for epoch in range(0, cfg.epochs): scheduler.step(epoch) cur_lr = get_learning_rate(optimizer) print('Learning rate is ', cur_lr, file=log) if mix_loder: meters = train_one_epoch_mixup(train_loader, mix_loder, model, criterion, optimizer, epoch, meters, start) else: meters = train_one_epoch(train_loader, model, criterion, optimizer, epoch, meters, start) meters = evaluate(val_loader, model, criterion, epoch, meters, start) is_best_loss = np.argmin(meters['val_loss']) == epoch is_best_f1 = np.argmax(meters['val_f1']) == epoch state = { "state_dict": model.state_dict(), "epoch": epoch, "optimizer": optimizer.state_dict(), "meters": meters.copy() } save_checkpoint(state, task_name, is_best_loss, is_best_f1) print('Task Name: %s\t' % task_name, 'Validate Epoch %3d\t' % epoch, 'Train Loss: %6.5f\t' % meters['loss'][-1], 'Train F1 Score: %6.5f\t' % meters['f1'][-1], 'Val Loss: %6.5f\t' % meters['val_loss'][-1], 'Val F1 Score: %6.5f\t' % meters['val_f1'][-1], 'Val Std F1 Score: %6.5f\t' % meters['val_std_f1'][-1], 'Val Aver F1 Score: %6.5f\t' % meters['val_aver_f1'][-1], 'Best Val loss:%6.5f, Epoch: %3d\t' % (np.min(meters['val_loss']), np.argmin(meters['val_loss'])), 'Best F1 Loss: %6.5f, Epoch: %3d\t' % (np.max(meters['val_f1']), np.argmax(meters['val_f1'])), 'Time: %s' % time_to_str((timer() - start), 'min'), file=log) time.sleep(0.01) return model
def message(rate, iter, epoch, kaggle, valid_loss, train_loss, batch_loss, mode='print'): if mode==('print'): asterisk = ' ' loss = batch_loss if mode==('log'): asterisk = '*' if iter in iter_save else ' ' loss = train_loss text = \ '%0.5f %5.1f%s %4.1f | '%(rate, iter/1000, asterisk, epoch,) +\ '%0.4f : %0.4f %0.4f %0.4f | '%(kaggle[1],*kaggle[0]) +\ '%4.4f, %4.4f, %4.4f : %4.4f, %4.4f, %4.4f | '%(*valid_loss,) +\ '%4.4f, %4.4f, %4.4f |'%(*loss,) +\ '%s' % (time_to_str((timer() - start_timer),'min')) return text
def evaluate(val_loader,model,loss_fn,epoch,train_loss,start_time): losses = utils.AverageMeter() # switch mode for evaluation model.cuda() model.eval() with torch.no_grad(): for i, (x_batch, y_batch) in enumerate(val_loader): y_pred = model(x_batch) loss = loss_fn(y_pred, y_batch) losses.update(loss.item(),x_batch.shape[0]) print('\r', end='', flush=True) message = '%s %5.1f %6.1f | %0.3f | %0.3f | %s' % ( \ "val", i / len(val_loader) + epoch, epoch+1, train_loss, losses.avg, utils.time_to_str((timer() - start_time), 'min')) print(message, end='', flush=True) # Concatenate all every batch if i == 0: total_output = y_pred total_target = y_batch else: total_output = torch.cat([total_output, y_pred], 0) total_target = torch.cat([total_target, y_batch], 0) # compute loss for the entire evaluation dataset # print("total_output:", total_output.shape) # print("total_target:", total_target.shape) log.write("\n") log.write(message) log.write("\n") return losses.avg, total_output
def train(args): logs_temp_file = os.path.join( args.logs_dir, '_'.join(['steps_log', args.args_in.time_code]) + '.csv') epochs_temp_file = os.path.join( args.logs_dir, '_'.join(['epochs_log', args.args_in.time_code]) + '.csv') CHECKPOINT_PATH = os.path.join(args.logs_dir, args.args_in.model_name) # 'checkpoint.pt') # rank = args.nr * args.gpus + gpu # print(rank) # dist.init_process_group(backend='nccl', init_method='env://', world_size=args.world_size, rank=rank) # torch.manual_seed(0) # print(f'initialied gpu {gpu}') model = args.args_in.model num_params = model.num_params # torch.cuda.set_device(gpu) # model.cuda(gpu) model = model.cuda() batch_size = opts.batch_size # 100 # criterion = args.args_in.criterion.cuda(gpu) criterion = args.args_in.criterion.cuda() optimizer = args.args_in.optimizer # torch.optim.SGD(model.parameters(), 1e-4) # # Wrap the model # model = nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True) # load model if already exist if (os.path.exists(args.args_in.model_path) or os.path.exists(args.args_in.ckpt_path)): '''load the existing model accurately''' print('load the existing model accurately') if os.path.exists(args.args_in.model_path): ckpt = torch.load(args.args_in.model_path) else: ckpt = torch.load(args.args_in.ckpt_path) if 'state_dict' in ckpt: state_dict = ckpt['state_dict'] optim_dict = ckpt['optimizer'] model.load_state_dict(state_dict) optimizer.load_state_dict(optim_dict) del ckpt, state_dict, optim_dict torch.cuda.empty_cache() else: state_dict = ckpt model.load_state_dict(state_dict) del ckpt, state_dict torch.cuda.empty_cache() else: print('Model does not exist in directory') start = datetime.now() verbose = args.args_in.verbose training_datagen = args.args_in.training_datagen validation_datagen = args.args_in.validation_datagen steps_per_epoch = args.args_in.steps_per_epoch if args.args_in.steps_per_epoch else training_datagen.__len__( ) steps_per_validation = args.args_in.steps_per_validation if args.args_in.steps_per_validation else validation_datagen.__len__( ) # gpus = args.gpus # torch.cuda.device_count() # steps_per_epoch //= gpus # steps_per_validation //= gpus callbacks = args.args_in.callbacks metrics = args.args_in.metrics # metrics_name = [x.__class__.__name__.lower()[:-4] for x in metrics] metrics_name = [metric.name for metric in metrics] logs = {'loss': 0} logs.update({x: 0 for x in metrics_name}) train_dict = logs.copy() validation_dict = {f'val_{key}': 0 for key in logs} logs.update(validation_dict) logs.update({'time': 0, 'lr': 0, 'epoch': 0}) logs_df = pd.DataFrame(columns=logs.keys()) epoch_str_width = len(str(opts.epochs)) best_loss = np.inf # group = dist.new_group([rank_i for rank_i in range(args.world_size)]) # print(group) for epoch in range(args.args_in.initial_epoch, opts.epochs): training_datagen.on_epoch_end() logs['lr'] = optimizer.param_groups[0]['lr'] train_df = pd.DataFrame(columns=list(train_dict.keys())) validation_df = pd.DataFrame(columns=list(validation_dict.keys())) model.train() # model.train(mode=True) start_time = time.time() """###### Training ######## """ for step in range(steps_per_epoch): # x, y = training_datagen.__getitem__(gpus * step + gpu) x, y = training_datagen.__getitem__(step) if not opts.normalize_data: x = [x_ * 255. for x_ in x] y *= 255. x = [x_.cuda(non_blocking=True) for x_ in x] y = y.cuda(non_blocking=True) # Forward pass optimizer.zero_grad() output = model(x) if not opts.normalize_data: output = output * 255. loss = criterion(output, y) # Backward and optimize # optimizer.zero_grad() loss.backward() optimizer.step() with torch.no_grad(): for metric in metrics: temp = metric(output, y) train_dict.update({metric.name: temp.item()}) del x, y, output, temp torch.cuda.empty_cache() # for idx, metric in enumerate(metrics): # train_dict[metrics_name[idx]] = metric(output, y).item() train_dict['loss'] = loss.item() train_df = train_df.append(train_dict, ignore_index=True) time_so_far = (time.time() - start_time) step_time = time_so_far / (step + 1) if verbose >= 1: time_spent_str = time_to_str(time_so_far) time_str = time_to_str(step_time * (steps_per_epoch - step)) other_str = ' - '.join([ f"{key}: {value:0.5f}" for key, value in train_dict.items() ]) print( f'Epoch [{epoch+1}/{opts.epochs}] - Step [{step + 1}/{steps_per_epoch}] - ETA: ' f'[{time_spent_str}<{time_str}] - {other_str}', end='\r') logs_temp = {'Epoch': epoch + 1, 'Step': step + 1} logs_temp.update(train_dict) logs_temp['city'] = opts.city.lower() logs_temp_df = pd.DataFrame([logs_temp]) if os.path.exists(logs_temp_file): logs_temp_df.to_csv(logs_temp_file, mode='a', index=False, header=False) else: logs_temp_df.to_csv(logs_temp_file, mode='a', index=False) # del x, y, output, temp # torch.cuda.empty_cache() epoch_time = (time.time() - start_time) train_dict = train_df.mean(axis=0).to_dict() for key, value in train_dict.items(): logs[key] = value logs['time'] = epoch_time logs['epoch'] = epoch + 1 """##### Validation #####""" model.eval() # model.train(mode=False) val_start_time = time.time() for step in range(steps_per_validation): step_time = time.time() x, y = validation_datagen.__getitem__(gpus * step + gpu) if not opts.normalize_data: x = [x_ * 255. for x_ in x] y *= 255. x = [x_.cuda(non_blocking=True) for x_ in x] y = y.cuda(non_blocking=True) with torch.no_grad(): output = model(torch.cat(x, dim=1)) if isinstance( args.args_in.model, UNet_3Plus) else model(x) if not opts.normalize_data: output = output * 255. val_loss = criterion(output, y) for metric in metrics: temp = metric(output, y) dist.all_reduce(temp, op=dist.ReduceOp.SUM, group=group) validation_dict.update( {f'val_{metric.name}': temp.item() / args.world_size}) # for idx, metric in enumerate(metrics): # validation_dict[f"val_{metrics_name[idx]}"] = metric(output, y).item() del x, y, output, temp torch.cuda.empty_cache() validation_dict['val_loss'] = val_loss.item() validation_df = validation_df.append(validation_dict, ignore_index=True) # del x, y, output, temp, val_loss # torch.cuda.empty_cache() validation_dict = validation_df.mean(axis=0).to_dict() for key, value in validation_dict.items(): logs[key] = value logs['val_time'] = (time.time() - val_start_time) logs['city'] = opts.city.lower() logs_df = logs_df.append(logs, ignore_index=True) # scheduler.step(epoch_val_loss) if not callbacks: # is not None for callback in callbacks: callback.step(logs['val_loss']) other_str = ' - '.join([ f"{key}: {value:0.6f}" for key, value in logs.items() if not isinstance(value, str) ]) print(f'epoch {epoch + 1:0{epoch_str_width}d}/{epochs} -- {other_str}') """Updating the epoch log state""" epochs_temp_df = pd.DataFrame([logs]) if os.path.exists(epochs_temp_file): epochs_temp_df.to_csv(epochs_temp_file, mode='a', index=False, header=False) else: epochs_temp_df.to_csv(epochs_temp_file, mode='a', index=False) """Saving the ckpts""" checkpoint = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(checkpoint, args.args_in.ckpt_path.replace('.', f'_ckpt{epoch + 1}.')) """Saving the present best model""" present_best_loss = logs['val_mse'] if 'val_mse' in logs else logs[ 'val_loss'] if present_best_loss < best_loss: # saving the best checkpoint checkpoint = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(checkpoint, args.args_in.model_path) # torch.save(checkpoint, os.paths.join(args.best, args.args_in.model_name + '.pt')) #also accepted print( f'The model improves from {best_loss:0.6f} to {present_best_loss:0.6f} and has been saved in' f' {args.args_in.model_path}') best_loss = present_best_loss else: print(f'The model does not improve from {best_loss:0.6f}') # Save the logs if os.path.exists(args.args_in.model_path[:-3] + '.csv'): logs_df.to_csv(args.args_in.model_path[:-3] + '.csv', mode='a', index=False, header=False) else: logs_df.to_csv(args.args_in.model_path[:-3] + '.csv', mode='a', index=False) # Save the last state of the model checkpoint = { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(checkpoint, CHECKPOINT_PATH) # save model parameters used readme_file = os.path.join(save_dir, 'SedanionScaledReadMe.csv') opts_dict = vars( argparse.Namespace( **{ 'filename': args.args_in.model_name[:-3], 'num_params': num_params, 'val_mse': best_loss }, **vars(opts))) # opts_dict = vars(argparse.Namespace(**{'filename': model_name[:-3], 'num_params': num_params, # 'val_loss': best_loss}, **vars(opts))) opts_df = pd.DataFrame([opts_dict]) if os.path.exists(readme_file): opts_df.to_csv(readme_file, mode='a', index=False, header=False) else: opts_df.to_csv(readme_file, mode='a', index=False) print("Training complete in: " + str(datetime.now() - start))
def training(model, fold, args): # resore from last checkpoint # all model weights resored, but not learning rate. if os.path.exists(os.path.join(config.weights, config.model_name, str(fold), "checkpoint.pth.tar")): best_model = torch.load(os.path.join(config.weights, config.model_name, str(fold), "checkpoint.pth.tar")) model.load_state_dict(best_model["state_dict"]) # logging issues log = Logger() log.open(os.path.join(config.logs_dir, "%s_log_train.txt" % config.model_name), mode="a") log.write( "\n---------------------------- [START %s] %s\n\n" % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), '-' * 20)) log.write( '----------------------|--------- Train ---------|-------- Valid ---------|-------Best ' 'Results-------|----------|\n') log.write( 'mode iter epoch | loss f1_macro | loss f1_macro | loss f1_macro | time ' ' |\n') log.write( '----------------------------------------------------------------------------------------------------------' '----\n') # training params optimizer = optim.SGD(model.parameters(), lr=config.learning_rate_start, momentum=0.9, weight_decay=config.weight_decay) if config.loss_name == 'ce': criterion = nn.BCEWithLogitsLoss().cuda() elif config.loss_name == 'focal': criterion = FocalLoss().cuda() elif config.loss_name == 'f1': criterion = F1Loss().cuda() else: raise ValueError('unknown loss name {}'.format(config.loss_name)) best_results = [np.inf, 0] val_metrics = [np.inf, 0] scheduler = lr_scheduler.StepLR(optimizer, step_size=config.learning_rate_decay_epochs, gamma=config.learning_rate_decay_rate) start = timer() # load dataset all_files = pd.read_csv(config.train_csv) image_names = all_files['Id'] labels_strs = all_files['Target'] image_labels = [] for cur_label_str in labels_strs: cur_label = np.eye(config.num_classes, dtype=np.float)[np.array(list(map(int, cur_label_str.split(' '))))].sum(axis=0) image_labels.append(cur_label) image_labels = np.stack(image_labels, axis=0) msss = MultilabelStratifiedShuffleSplit(n_splits=1, test_size=config.val_percent, random_state=0) for train_index, val_index in msss.split(image_names, image_labels): train_image_names = image_names[train_index] train_image_labels = image_labels[train_index] val_image_names = image_names[val_index] val_image_labels = image_labels[val_index] train_gen = HumanDataset(train_image_names, train_image_labels, config.train_dir, mode="train") sampler = WeightedRandomSampler(weights=get_sample_weights()[train_index], num_samples=int(len(all_files)*(1-config.val_percent))) train_loader = DataLoader(train_gen, batch_size=config.batch_size, pin_memory=True, num_workers=4, sampler=sampler) # train_loader = DataLoader(train_gen, batch_size=config.batch_size, shuffle=True, pin_memory=True, num_workers=4) val_gen = HumanDataset(val_image_names, val_image_labels, config.train_dir, augument=False, mode="train") val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=4) # train for epoch in range(0, config.epochs): # training & evaluating scheduler.step(epoch) get_learning_rate(optimizer) train_metrics = train(train_loader, model, criterion, optimizer, epoch, val_metrics, best_results, start) val_metrics = evaluate(val_loader, model, criterion, epoch, train_metrics, best_results, start) # check results is_best_loss = val_metrics[0] < best_results[0] best_results[0] = min(val_metrics[0], best_results[0]) is_best_f1 = val_metrics[1] > best_results[1] best_results[1] = max(val_metrics[1], best_results[1]) # save model save_checkpoint({ "epoch": epoch + 1, "model_name": config.model_name, "state_dict": model.state_dict(), "best_loss": best_results[0], "optimizer": optimizer.state_dict(), "fold": fold, "best_f1": best_results[1], }, is_best_loss, is_best_f1, fold) # print logs print('\r', end='', flush=True) log.write( logging_pattern % ( "best", epoch, epoch, train_metrics[0], train_metrics[1], val_metrics[0], val_metrics[1], str(best_results[0])[:8], str(best_results[1])[:8], time_to_str((timer() - start), 'min') ) ) log.write("\n") time.sleep(0.01)
def training(model, fold, log, train_image_names, train_image_labels, val_image_names, val_image_labels): # logging issues log.write( "\n---------------------------- [START %s] %s\n\n" % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), '-' * 20)) log.write( '----------------------|--------- Train ---------|-------- Valid ---------|-------Best ' 'Results-------|----------|\n') log.write( 'mode iter epoch | loss f1_macro | loss f1_macro | loss f1_macro | time ' ' |\n') log.write( '----------------------------------------------------------------------------------------------------------' '----\n') # training params optimizer = optim.SGD(model.parameters(), lr=config.learning_rate_start, momentum=0.9, weight_decay=config.weight_decay) if config.loss_name == 'ce': criterion = nn.BCEWithLogitsLoss().cuda() elif config.loss_name == 'focal': criterion = FocalLoss().cuda() elif config.loss_name == 'f1': criterion = F1Loss().cuda() else: raise ValueError('unknown loss name {}'.format(config.loss_name)) best_results = [np.inf, 0] val_metrics = [np.inf, 0] scheduler = lr_scheduler.StepLR(optimizer, step_size=config.learning_rate_decay_epochs, gamma=config.learning_rate_decay_rate) start = timer() train_gen = HumanDataset(train_image_names, train_image_labels, config.train_dir, mode="train") train_loader = DataLoader(train_gen, batch_size=config.batch_size, shuffle=True, pin_memory=True, num_workers=4) val_gen = HumanDataset(val_image_names, val_image_labels, config.train_dir, augument=False, mode="train") val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=4) # train for epoch in range(0, config.epochs): # training & evaluating scheduler.step(epoch) get_learning_rate(optimizer) train_metrics = train(train_loader, model, criterion, optimizer, epoch, val_metrics, best_results, start) val_metrics = evaluate(val_loader, model, criterion, epoch, train_metrics, best_results, start) # check results is_best_loss = val_metrics[0] < best_results[0] best_results[0] = min(val_metrics[0], best_results[0]) is_best_f1 = val_metrics[1] > best_results[1] best_results[1] = max(val_metrics[1], best_results[1]) # save model save_checkpoint({ "epoch": epoch + 1, "model_name": config.model_name, "state_dict": model.state_dict(), "best_loss": best_results[0], "optimizer": optimizer.state_dict(), "fold": fold, "best_f1": best_results[1], }, is_best_loss, is_best_f1, fold) # print logs print('\r', end='', flush=True) log.write( logging_pattern % ( "best", epoch, epoch, train_metrics[0], train_metrics[1], val_metrics[0], val_metrics[1], str(best_results[0])[:8], str(best_results[1])[:8], time_to_str((timer() - start), 'min') ) ) log.write("\n") time.sleep(0.01)
def main(): input_images, content_input_images = utils.load_pictures_for_feed( "\\batch", recursive=True, gen_res=conf.INPUT_RESOLUTION, content_res=conf.VGG_INPUT_RESOLUTION) print("Shuffle inputs") random.seed(conf.SEED) random.shuffle(input_images) random.seed(conf.SEED) random.shuffle(content_input_images) print("Done") style_red, avg_style_red = utils.load_image("\\styles\\rain_princess.jpg", between_01=True, substract_mean=False) pre_style_grams, pre_content_tensor = precompute_style_gram( style_red, content_input_images) gen_graph, input_image, variables_gen_filter, variables_gen_bias, variables_scalars = gn.build_gen_graph_deep( tf, input_pictures=conf.BATCH_SIZE, width_res=conf.INPUT_RESOLUTION) gen_image = gen_graph['output'] pre_content_tensor_shape = np.shape(pre_content_tensor) content_layer = tf.placeholder('float32', [ conf.BATCH_SIZE, pre_content_tensor_shape[1], pre_content_tensor_shape[2], pre_content_tensor_shape[3] ], name="content_layer") #gen_shape = utils.tensorshape_to_int_array(gen_image.get_shape()) #cut_1 = int((gen_shape[1] - conf.VGG_INPUT_RESOLUTION) / 2) #cut_2 = int((gen_shape[2] - conf.VGG_INPUT_RESOLUTION) / 2) #batch = tf.slice(gen_image, [0, cut_1, cut_2, 0], [gen_shape[0], conf.VGG_INPUT_RESOLUTION, conf.VGG_INPUT_RESOLUTION, gen_shape[3]]) batch = gen_image / 255.0 print(utils.tensorshape_to_int_array(batch.get_shape())) graph = vn.load_vgg_input(tf, batch) content_loss = conf.CONTENT_WEIGHT * calc_content_loss( graph, content_layer) style_loss = conf.STYLE_WEIGHT * calc_style_loss_64(graph, pre_style_grams) tv_loss = conf.TV_WEIGHT * calc_tv_loss(gen_image) loss = content_loss + style_loss + tv_loss learning_rate = conf.LEARNING_RATE var_learning_rate = tf.placeholder("float32") image_counter = 0 assert len(input_images) >= conf.BATCH_SIZE feed = {} feed[input_image] = input_images[image_counter:image_counter + conf.BATCH_SIZE] #feed[content_input] = content_input_images[image_counter : image_counter + BATCH_SIZE] feed[content_layer] = pre_content_tensor[image_counter:image_counter + conf.BATCH_SIZE] # feed[style_image] = style_red.reshape(1, 224, 224,3) feed[var_learning_rate] = learning_rate image_counter = (image_counter + conf.BATCH_SIZE) % len(input_images) if image_counter + conf.BATCH_SIZE > len(input_images): image_counter = 0 with tf.Session() as sess: # set log directory #summary_writer = tf.train.SummaryWriter(conf.project_path + conf.log_train, graph_def=sess.graph_def) #optimizer = tf.train.MomentumOptimizer(learning_rate=var_learning_rate, momentum=0.9) optimizer = tf.train.AdamOptimizer(learning_rate=var_learning_rate) #optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.5) variables = variables_gen_filter + variables_gen_bias + variables_scalars train_step = optimizer.minimize(loss, var_list=variables) print('number of variables : ' + str(len(tf.trainable_variables()))) init = tf.global_variables_initializer() sess.run(init, feed) loading_directory = "\\version_61_k" saving_directory = "\\version_61_k" starting_pic_num = 0 saver = nio.create_saver(tf, sess) nio.load_gen_last_checkpoint(tf, sess, saver, path=loading_directory) i = 0 last_l = sess.run(loss, feed_dict=feed) last_cl = sess.run(content_loss, feed_dict=feed) last_sl = sess.run(style_loss, feed_dict=feed) last_tvl = sess.run(tv_loss, feed_dict=feed) #last_wl = sess.run(weight_loss, feed_dict=feed) start_training_time = time.time() last_training_checkpoint_time = start_training_time neg_loss_counter = 0 avoid_save_loss = -1.0 restore = False last_saved_iteration = 0 for i in range(40000): if (i % 10 == 0): print(i) if i % 250 == 0: l = sess.run(loss, feed_dict=feed) if (last_l - l) < 0 and i != 0: avoid_save_loss = last_l neg_loss_counter += 1 print('neg loss -> counter increase :' + str(neg_loss_counter)) if neg_loss_counter == 5: learning_rate /= 10.0 neg_loss_counter = 0 restore = True print('neg loss -> reset counters to 0') print("new learning rate : " + str(learning_rate)) else: if avoid_save_loss != -1.0: if l < avoid_save_loss: avoid_save_loss = -1.0 neg_loss_counter = 0 print("loss reached best result again") print("reset counter to 0") else: print( "avoid saving until loss becomes smaller again:" + str(l - avoid_save_loss)) print('learning rate : ' + str(learning_rate)) print('loss : ' + str(l)) print('loss_improvement : ' + str((last_l - l) / last_l)) last_l = l cl = sess.run(content_loss, feed_dict=feed) print('content_loss : ' + str(cl)) print('content_loss_improvement : ' + str((last_cl - cl) / last_cl)) last_cl = cl sl = sess.run(style_loss, feed_dict=feed) print('style_loss : ' + str(sl)) print('style_loss_improvement : ' + str((last_sl - sl) / last_sl)) last_sl = sl tvl = sess.run(tv_loss, feed_dict=feed) print('tv_loss : ' + str(tvl)) print('tv_loss_improvement : ' + str((last_tvl - tvl) / last_tvl)) last_tvl = tvl t = time.time() print('training time: ' + utils.time_to_str(t - start_training_time)) print('training time since last checkpoint: ' + utils.time_to_str(t - last_training_checkpoint_time)) last_training_checkpoint_time = t utils.save_image(saving_directory, '\\im' + str(i + starting_pic_num), sess.run(gen_image, feed_dict=feed), to255=False) if restore == False: if avoid_save_loss == -1: nio.save_gen_checkpoint(sess, saver, path=saving_directory) last_saved_iteration = i else: print("Restoring last checkpoint -> iteration : " + str(last_saved_iteration)) nio.load_gen_last_checkpoint(tf, sess, saver, path=saving_directory) restore = False sess.run(train_step, feed_dict=feed) feed[input_image] = input_images[image_counter:image_counter + conf.BATCH_SIZE] feed[content_layer] = pre_content_tensor[ image_counter:image_counter + conf.BATCH_SIZE] image_counter = (image_counter + conf.BATCH_SIZE) % len(input_images) if image_counter + conf.BATCH_SIZE > len(input_images): image_counter = 0 utils.save_image(saving_directory, '\\im' + str(i + starting_pic_num + 1), sess.run(gen_image, feed_dict=feed), to255=False) print(sess.run(loss, feed_dict=feed)) if avoid_save_loss == -1: nio.save_gen_checkpoint(sess, saver, path=saving_directory) ai.export_gen_graph(tf, sess, variables_gen_filter, variables_gen_bias, variables_scalars, saving_directory) else: print("Restoring last checkpoint -> iteration : " + str(last_saved_iteration)) nio.load_gen_last_checkpoint(tf, sess, saver, path=saving_directory) print("export pb-File") ai.export_gen_graph(tf, sess, variables_gen_filter, variables_gen_bias, variables_scalars, saving_directory)
def train_one_epoch_mixup(train_loader, mix_loader, model, criterion, optimizer, epoch, meters, since, alpha=0.4, log=None): losses = AverageMeter() f1 = AverageMeter() model.train() if len(meters['f1']): previous_loss = meters['loss'][-1] previous_f1 = meters['f1'][-1] best_f1_epoch = np.argmax(meters['f1']) best_f1_score = meters['f1'][best_f1_epoch] best_loss_epoch = np.argmin(meters['loss']) best_loss = meters['loss'][best_loss_epoch] else: best_f1_epoch = 0 best_f1_score = 0 best_loss_epoch = 0 best_loss = 0 previous_loss = 0 previous_f1 = 0 for batch_id, ((x1, y1), (x2, y2)) in enumerate(zip(train_loader, mix_loader)): batch_x1 = x1.cuda(non_blocking=True) batch_x2 = x2.cuda(non_blocking=True) lam = np.random.beta(alpha, alpha) batch_x = lam * batch_x1 + (1.0 - lam) * batch_x2 batch_y1 = torch.Tensor(np.array(y1)).float().cuda(non_blocking=True) batch_y2 = torch.Tensor(np.array(y2)).float().cuda(non_blocking=True) batch_y = lam * batch_y1 + (1.0 - lam) * batch_y2 output = model(batch_x) loss = criterion(output, batch_y) losses.update(loss.item(), batch_x.size(0)) f1_batch = f1_score(batch_y.cpu() > 0.5, output.sigmoid().cpu() > 0.15, average='macro') f1.update(f1_batch, batch_x.size(0)) optimizer.zero_grad() loss.backward() if cfg.grident_clip: torch.nn.utils.clip_grad_norm(model.parameters(), 1.) optimizer.step() print('Epoch %3d\t' % epoch, 'Batch %3d|%3d\t' % (batch_id, len(train_loader)), 'Loss: %10.5f\t' % losses.avg, 'Metrics|F1 Score: %10.5f\t' % f1.avg, 'Previous Loss: %10.5f\t' % previous_loss, 'Previous F1 Score: %10.5f\t' % previous_f1, 'Best loss:%10.5f Epoch %3d\t' % (best_loss, best_loss_epoch), 'Besr F1:%10.5f Epoch %3d\t' % (best_f1_score, best_f1_epoch), 'Time: %s' % time_to_str((timer() - since), 'min'), file=log) meters['loss'].append(losses.avg) meters['f1'].append(f1.avg) return meters
def draw(self): super().draw() time = self.normal_font.render("Votre temps était : " + time_to_str(self.game.score.time), 1, self.color) self.game.window.blit(time, (self.game.window.get_width() / 2 - time.get_rect().centerx, 250))
def run(model, net, datapath, labelpath, csvpath, winsize=48): fold = 0 # 4.1 mkdirs if not os.path.exists(config.weights + config.model_name + os.sep + str(fold)): os.makedirs(config.weights + config.model_name + os.sep + str(fold)) if not os.path.exists(config.best_models): os.mkdir(config.best_models) if not os.path.exists(config.results): os.mkdir(config.results) #4.3 optim & criterion optimizer = optim.SGD(model.parameters(), lr=config.lr, momentum=0.9, weight_decay=1e-4) criterion = nn.CrossEntropyLoss().to(device) start_epoch = 0 best_acc = 0 best_loss = np.inf best_f1 = 0 best_results = [0, np.inf, 0] val_metrics = [0, np.inf, 0] model.to(device) train_lst = pd.read_csv(csvpath + "train.csv") train_gen = SARDataset(train_lst, datapath, labelpath, winsize, net) train_loader = DataLoader( train_gen, batch_size=config.batch_size, shuffle=True, pin_memory=True, num_workers=0) #num_worker is limited by shared memory in Docker! val_lst = pd.read_csv(csvpath + "val.csv") val_gen = SARDataset(val_lst, datapath, labelpath, winsize, net) val_loader = DataLoader(val_gen, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=0) start = timer() #train for epoch in range(0, config.epochs): #config.epochs # train train_metrics = train(train_loader, model, criterion, optimizer, epoch, val_metrics, best_results, start) # val val_metrics, early_stop = evaluate(val_loader, model, criterion, epoch, train_metrics, best_results, start) if early_stop: print("Early stopping") break # check results is_best_acc = val_metrics[0] > best_results[0] best_results[0] = max(val_metrics[0], best_results[0]) is_best_loss = val_metrics[1] < best_results[1] best_results[1] = min(val_metrics[1], best_results[1]) is_best_f1 = val_metrics[2] > best_results[2] best_results[2] = max(val_metrics[2], best_results[2]) # save model utils.save_checkpoint( { "epoch": epoch + 1, "model_name": config.model_name, "state_dict": model.state_dict(), "best_acc": best_results[0], "best_loss": best_results[1], "optimizer": optimizer.state_dict(), "fold": fold, "best_f1": best_results[2], }, is_best_acc, is_best_loss, is_best_f1, fold) print('\r', end='', flush=True) print('%s %5.1f %6.1f | %0.3f %0.3f %0.3f | %0.3f %0.3f %0.3f | %s %s %s | %s' % (\ "best", epoch, epoch, train_metrics[0], train_metrics[1],train_metrics[2], val_metrics[0],val_metrics[1],val_metrics[2], str(best_results[0])[:8],str(best_results[1])[:8],str(best_results[2])[:8], utils.time_to_str((timer() - start),'min')) ) test_lst = pd.read_csv(csvpath + "test.csv") test_gen = SARDataset(test_lst, datapath, labelpath, winsize, net) test_loader = DataLoader(test_gen, 500, shuffle=False, pin_memory=True, num_workers=0) for point in ['loss']: #'loss','acc','f1' best_model = torch.load( "%s/%s_fold_%s_model_best_%s.pth.tar" % (config.best_models, config.model_name, str(fold), point)) model.load_state_dict(best_model["state_dict"]) plabel = test(test_loader, model) np.save(config.results + config.model_name + str(point) + '.npy', plabel)
if not opts.normalize_data: x_test = [x_ * 255. for x_ in x_test] # y_pred = model(x_test) y_pred = [] for i in range(x_test[0].shape[0]): if use_cuda: x_now = [x_[i:i + 1].cuda() for x_ in x_test] else: x_now = [x_[i:i + 1] for x_ in x_test] y_pred.append(model(x_now)) y_pred = torch.cat(y_pred, dim=0) # model.cpu() # move model to CPU # x_test = [x_.cpu() for x_ in x_test] y_out = testing_datagen.process_output(y_pred) testing_datagen.write_data(y_out, file_path) del x_test, y_pred # assert len(test_slots[filename.split('_')[0]]) == batch_size time_so_far = (time.time() - start_time) step_time = time_so_far / (step + 1) time_spent_str = time_to_str(time_so_far) time_str = time_to_str(step_time * (steps_per_testing - step)) print( f'[{itr+1}/{n_models}]: {model_name} : ETA [{time_spent_str}<{time_str}]: done - ' f'[{step + 1}/{steps_per_testing}]', end='\r')
def main(): # 4.1 mkdirs if not os.path.exists(config.submit): os.makedirs(config.submit) if not os.path.exists(config.weights + config.model_name + os.sep + 'fold_'+str(config.fold)): os.makedirs(config.weights + config.model_name + os.sep + 'fold_'+ str(config.fold)) if not os.path.exists(config.best_models): os.mkdir(config.best_models) if not os.path.exists(config.logs): os.mkdir(config.logs) if not os.path.exists(config.best_models + config.model_name ): os.mkdir(config.best_models + config.model_name) if not os.path.exists(config.best_models + config.model_name + os.sep + 'fold_'+str(config.fold)): os.mkdir(config.best_models + config.model_name + os.sep + 'fold_'+str(config.fold)) tqdm.pandas() start_time = time.time() train_X, test_X, train_y, word_index = utils.load_and_prec(config) print("Start embedding matrix............") embedding_matrix_1 = utils.load_glove(word_index, config.embedding_dir, config.max_features) embedding_matrix_2 = utils.load_para(word_index, config.embedding_dir, config.max_features) embedding_matrix_3 = utils.load_fasttext(word_index, config.embedding_dir, config.max_features) total_time = (time.time() - start_time) / 60 print("Took {:.2f} minutes".format(total_time)) if config.embed_method == "mean": embedding_matrix = np.mean([embedding_matrix_1, embedding_matrix_2, embedding_matrix_3], axis=0) elif config.embed_method =="concat": embedding_matrix = np.concatenate((embedding_matrix_1, embedding_matrix_2, embedding_matrix_3), axis=1) print(np.shape(embedding_matrix)) # # del embedding_matrix_1, embedding_matrix_2 # del embedding_matrix_1 # ------------------------------------------------------- # training # ------------------------------------------------------- train_preds = np.zeros((len(train_X))) test_preds = np.zeros((len(test_X))) x_test_cuda = torch.tensor(test_X, dtype=torch.long).cuda() test_dataset = torch.utils.data.TensorDataset(x_test_cuda) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False) splits = list(StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED).split(train_X, train_y)) sigmoid = nn.Sigmoid() loss_fn = torch.nn.BCEWithLogitsLoss(reduction="mean") # k-fold for fold, (train_idx, valid_idx) in enumerate(splits): print(f'Fold {fold + 1}') # tflogger tflogger = utils.TFLogger(os.path.join('../results', 'TFlogs', config.model_name + "_fold{0}_{1}".format(config.fold, fold))) # initialize the early_stopping object early_stopping = utils.EarlyStopping(patience=7, verbose=True) x_train_fold = torch.tensor(train_X[train_idx], dtype=torch.long).cuda() y_train_fold = torch.tensor(train_y[train_idx, np.newaxis], dtype=torch.float32).cuda() x_val_fold = torch.tensor(train_X[valid_idx], dtype=torch.long).cuda() y_val_fold = torch.tensor(train_y[valid_idx, np.newaxis], dtype=torch.float32).cuda() if config.model == "baseline_bidir_LSTM_GRU": model = baseline_bidir_LSTM_GRU.NeuralNet(config, embedding_matrix) elif config.model == "baseline_pytorch": model = baseline_pytorch.NeuralNet(config, embedding_matrix) elif config.model == "baseline_lstm_gru_attention": model = baseline_lstm_gru_attention.NeuralNet(config, embedding_matrix) elif config.model == "baseline_lstm_lstm_attention": model = baseline_lstm_lstm_attention.NeuralNet(config, embedding_matrix) model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=config.lr) # scheduler scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) train_dataset = torch.utils.data.TensorDataset(x_train_fold, y_train_fold) valid_dataset = torch.utils.data.TensorDataset(x_val_fold, y_val_fold) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=config.batch_size, shuffle=False) valid_loss = np.inf # initialize best loss best_loss = np.inf start_time = timer() for epoch in range(config.epochs): scheduler.step(epoch) # train lr = utils.get_learning_rate(optimizer) train_loss = train(train_loader=train_loader,model=model,loss_fn=loss_fn, optimizer=optimizer, epoch=epoch,valid_loss=valid_loss,start=start_time) # validate valid_loss, valid_output = evaluate(val_loader=valid_loader, model=model, loss_fn=loss_fn, epoch=epoch, train_loss=train_loss, start_time=start_time) test_preds_fold = np.zeros(len(test_X)) # check results is_best_loss = valid_loss < best_loss if is_best_loss: best_epoch = epoch best_train_loss = train_loss # update best loss best_loss = min(valid_loss, best_loss) # save NeuralNet utils.save_checkpoint({ "epoch": epoch, "model_name": config.model_name, "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), "fold": config.fold, "kfold": config.fold, },is_best_loss, config.fold, fold, config) # print logs print('\r', end='', flush=True) message = '%s %5.1f %6.1f %.2E | %0.3f | %0.3f | %s' % ( \ "best", best_epoch, best_epoch, Decimal(lr), best_train_loss, best_loss, utils.time_to_str((timer() - start_time), 'min')) log.write(message) log.write("\n") time.sleep(0.01) # ================================================================== # # Tensorboard Logging # # ================================================================== # # 1. Log scalar values (scalar summary) info = {'Train_loss': train_loss, 'Valid_loss': valid_loss, 'Learnging_rate': lr} for tag, value in info.items(): tflogger.scalar_summary(tag, value, epoch) # 2. Log values and gradients of the parameters (histogram summary) for tag, value in model.named_parameters(): tag = tag.replace('.', '/') tflogger.histo_summary(tag, value.data.cpu().numpy(), epoch) if not value.grad is None: tflogger.histo_summary(tag + '/grad', value.grad.data.cpu().numpy(), epoch) # ------------------------------------- # end tflogger # ================================================================== # # Early stopping # # ================================================================== # # early_stopping needs the validation loss to check if it has decresed, # and if it has, it will make a checkpoint of the current NeuralNet early_stopping(valid_loss, model) if early_stopping.early_stop: print("Early stopping") break # end looping all epochs train_preds[valid_idx] = sigmoid(valid_output).cpu().data.numpy()[:, 0] # test checkpoint_path = os.path.join("{0}{1}/fold_{2}/fold_{3}_model_best_loss.pth.tar". format(config.best_models, config.model_name, str(config.fold), fold)) best_model = torch.load(checkpoint_path) print("Test on epoch:", best_model['epoch']) model.load_state_dict(best_model["state_dict"]) test_preds_fold = test(test_loader=test_loader, model=model) test_preds += test_preds_fold / len(splits) # end k-fold search_result = threshold_search(train_y, train_preds) print(search_result) log.write("Threshold:{0}, f1:{1}".format(search_result['threshold'], search_result['f1'])) sub = pd.read_csv('../input/sample_submission.csv') sub.prediction = test_preds > search_result['threshold'] sub.to_csv("submission_{0}.csv".format(config.model_name), index=False) print('Test successful!')