def __init__(self, opt): model, parameters, mean, std = generate_model(opt) logging.info('loading checkpoint {}'.format(opt.checkpoint)) checkpoint = torch.load(opt.checkpoint) model.load_state_dict(checkpoint['state_dict']) model.eval() self.model = model self.transform = get_inference_transform(mean, std)
def erised_model_convertor(opt): if not os.path.exists(opt.out_path): os.makedirs(opt.out_path) analysis_path = os.path.join(opt.out_path, '{}_analysis.csv'.format(opt.out_model_name)) prototxt_path = os.path.join(opt.out_path, '{}.prototxt'.format(opt.out_model_name)) caffemodel_path = os.path.join(opt.out_path, '{}.caffemodel'.format(opt.out_model_name)) attr_opt = argparse.Namespace() attr_opt.model = opt.model attr_opt.conv = opt.conv attr_opt.checkpoint = opt.checkpoint attr_opt.pretrain = False model, parameters, mean, std = generate_model(attr_opt) checkpoint = torch.load(attr_opt.checkpoint) model.load_state_dict(checkpoint['state_dict']) model.eval() attributer_model = model.module.cpu() input_tensor = torch.ones(1, 3, 224, 224) blob_dict, tracked_layers = pytorch_analyser.analyse( attributer_model, input_tensor) pytorch_analyser.save_csv(tracked_layers, analysis_path) print("Save", analysis_path, "Finish!") input_var = Variable(input_tensor) pytorch_to_caffe.trans_net(attributer_model, input_var, opt.out_model_name) pytorch_to_caffe.save_prototxt(prototxt_path) print("Save", prototxt_path, "Finish!") pytorch_to_caffe.save_caffemodel(caffemodel_path) print("Save", caffemodel_path, "Finish!") return None
def run(opt): if opt.log_file is not None: logging.basicConfig(filename=opt.log_file, level=logging.INFO) else: logging.basicConfig(level=logging.INFO) logger = logging.getLogger() # logger.addHandler(logging.StreamHandler()) logger = logger.info writer = SummaryWriter(log_dir=opt.log_dir) model_timer, data_timer = Timer(average=True), Timer(average=True) # Training variables logger('Loading models') model, parameters, mean, std = generate_model(opt) # Learning configurations if opt.optimizer == 'sgd': optimizer = SGD(parameters, lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay, nesterov=opt.nesterov) elif opt.optimizer == 'adam': optimizer = Adam(parameters, lr=opt.lr, betas=opt.betas) else: raise Exception("Not supported") scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=opt.lr_patience) # Loading checkpoint if opt.checkpoint: # load some param logger('loading checkpoint {}'.format(opt.checkpoint)) checkpoint = torch.load(opt.checkpoint) # to use the loaded param opt.begin_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger('Loading dataset') # ================================= train_loader, val_loader, _ = get_data_market(opt, mean, std) device = 'cuda' trainer = create_supervised_trainer( model, optimizer, lambda pred, target: loss_market(pred, target, loss_fns=training_loss), device=device) evaluator = create_supervised_evaluator( model, metrics={'cosine_metric': CosineMetric(cmc_metric, testing_loss)}, device=device) # Training timer handlers model_timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) data_timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_COMPLETED, pause=Events.ITERATION_STARTED, step=Events.ITERATION_STARTED) # Training log/plot handlers @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): iter = (engine.state.iteration - 1) % len(train_loader) + 1 if iter % opt.log_interval == 0: logger( "Epoch[{}] Iteration[{}/{}] Loss: {:.2f} Model Process: {:.3f}s/batch " "Data Preparation: {:.3f}s/batch".format( engine.state.epoch, iter, len(train_loader), engine.state.output, model_timer.value(), data_timer.value())) writer.add_scalar("training/loss", engine.state.output, engine.state.iteration) # Log/Plot Learning rate @trainer.on(Events.EPOCH_STARTED) def log_learning_rate(engine): lr = optimizer.param_groups[-1]['lr'] logger('Epoch[{}] Starts with lr={}'.format(engine.state.epoch, lr)) writer.add_scalar("learning_rate", lr, engine.state.epoch) # Checkpointing @trainer.on(Events.EPOCH_COMPLETED) def save_checkpoint(engine): if engine.state.epoch % opt.save_interval == 0: save_file_path = os.path.join( opt.result_path, 'save_{}.pth'.format(engine.state.epoch)) states = { 'epoch': engine.state.epoch, 'arch': opt.model, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } torch.save(states, save_file_path) # val_evaluator event handlers @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): evaluator.run(val_loader) metrics = evaluator.state.metrics["cosine_metric"] # metric_values = [metrics[m] for m in val_metrics] logger("Validation Results - Epoch: {}".format(engine.state.epoch)) for m, val in metrics.items(): logger('{}: {:.4f}'.format(m, val)) for m, val in metrics.items(): if m in ['total_loss', 'cmc']: prefix = 'validation_summary/{}' else: prefix = 'validation/{}' writer.add_scalar(prefix.format(m), val, engine.state.epoch) # Update Learning Rate scheduler.step(metrics['cmc']) # kick everything off logger('Start training') trainer.run(train_loader, max_epochs=opt.n_epochs) writer.close()
parser = argparse.ArgumentParser() parser.add_argument('--attribute_model', default='all_in_one', type=str, help='all_in_one') parser.add_argument('--attribute_conv', default='resnet18', type=str) parser.add_argument('--attribute_checkpoint', default='', type=str, help='Save data (.pth) of previous training') opt = parser.parse_args() attr_opt = argparse.Namespace() attr_opt.model = opt.attribute_model attr_opt.conv = opt.attribute_conv # attr_opt.checkpoint = opt.attribute_checkpoint attr_opt.checkpoint = "../models/save_36.pth" attr_opt.pretrain = False model, parameters, mean, std = generate_model(attr_opt) # model = AllInOne(resnet18()).cuda() # model = nn.DataParallel(model, device_ids=None) checkpoint = torch.load(attr_opt.checkpoint) model.load_state_dict(checkpoint['state_dict']) model.eval() model = model.module.cpu() convert_pytorch_to_tensorflow(model, (224, 224), 10, "../models/all_resnet18")
def run(opt): if opt.log_file is not None: logging.basicConfig(filename=opt.log_file, level=logging.INFO) else: logging.basicConfig(level=logging.INFO) logger = logging.getLogger() # logger.addHandler(logging.StreamHandler()) logger = logger.info # Decide what tasks need to be performed given datasets tasks = get_tasks(opt) # Generate model based on tasks logger('Loading models') model, parameters, mean, std = generate_model(opt, tasks) # parameters[0]['lr'] = 0 # parameters[1]['lr'] = opt.lr / 3 logger('Loading dataset') attrs, train_loader, val_loader, _ = get_personattr_data(opt, tasks, mean, std) multi_dataset = len(opt.dataset.split(",")) > 1 writer = create_summary_writer(model, train_loader, opt.log_dir) # Learning configurations if opt.optimizer == 'sgd': optimizer = SGD(parameters, lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay, nesterov=opt.nesterov) elif opt.optimizer == 'adam': optimizer = Adam(parameters, lr=opt.lr, betas=opt.betas) else: raise Exception("Not supported") scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=opt.lr_patience) # Loading checkpoint if opt.checkpoint: logger('loading checkpoint {}'.format(opt.checkpoint)) checkpoint = torch.load(opt.checkpoint) opt.begin_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) # Prepare losses and metrics for trainer and evaluator device = 'cuda' loss_fns, metrics = get_losses_metrics(opt.categorical_loss, opt.output_recognizable) trainer = create_supervised_trainer(model, optimizer, lambda pred, target: multitask_loss(pred, target, loss_fns=loss_fns), device=device) attr_display_names = get_attribute_names(tasks) evaluator = create_supervised_evaluator(model, metrics={ 'multitask': MultiAttributeMetric(attr_display_names, metrics)}, device=device) # Training timer handlers model_timer, data_timer = Timer(average=True), Timer(average=True) model_timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) data_timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_COMPLETED, pause=Events.ITERATION_STARTED, step=Events.ITERATION_STARTED) # Training log/plot handlers @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): iter = (engine.state.iteration - 1) % len(train_loader) + 1 if iter % opt.log_interval == 0: logger("Epoch[{}] Iteration[{}/{}] Loss: {:.2f} Model Process: {:.3f}s/batch " "Data Preparation: {:.3f}s/batch".format(engine.state.epoch, iter, len(train_loader), engine.state.output, model_timer.value(), data_timer.value())) writer.add_scalar("training/loss", engine.state.output, engine.state.iteration) # Log/Plot Learning rate @trainer.on(Events.EPOCH_STARTED) def log_learning_rate(engine): lr = optimizer.param_groups[-1]['lr'] logger('Epoch[{}] Starts with lr={}'.format(engine.state.epoch, lr)) writer.add_scalar("learning_rate", lr, engine.state.epoch) # Checkpointing @trainer.on(Events.EPOCH_COMPLETED) def save_checkpoint(engine): if engine.state.epoch % opt.save_interval == 0: save_file_path = os.path.join(opt.result_path, 'save_{}.pth'.format(engine.state.epoch)) states = { 'epoch': engine.state.epoch, 'arch': opt.model, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } torch.save(states, save_file_path) # val_evaluator event handlers @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): evaluator.run(val_loader) metrics = evaluator.state.metrics["multitask"] logger("Validation Results - Epoch: {}".format(engine.state.epoch)) for m, val in metrics['metrics'].items(): writer.add_scalar('validation/{}'.format(m), val, engine.state.epoch) for m, val in metrics['summaries'].items(): writer.add_scalar('validation_summary/{}'.format(m), val, engine.state.epoch) print_summar_table(opt.output_recognizable, logger, attr_display_names, metrics['logger']) # Update Learning Rate scheduler.step(metrics['summaries']['mAP']) # kick everything off logger('Start training') trainer.run(train_loader, max_epochs=opt.n_epochs) writer.close()