from settings import coefs # number of training epochs, number of warm epochs, push start epoch, push epochs from settings import num_train_epochs, num_warm_epochs, push_start, push_epochs # train the model log('start training') import copy for epoch in range(num_train_epochs): log('epoch: \t{0}'.format(epoch)) if epoch < num_warm_epochs: tnt.warm_only(model=ppnet_multi, log=log) _ = tnt.train(model=ppnet_multi, dataloader=train_loader, optimizer=warm_optimizer, class_specific=class_specific, coefs=coefs, log=log) else: tnt.joint(model=ppnet_multi, log=log) joint_lr_scheduler.step() _ = tnt.train(model=ppnet_multi, dataloader=train_loader, optimizer=joint_optimizer, class_specific=class_specific, coefs=coefs, log=log) accu = tnt.test(model=ppnet_multi, dataloader=test_loader, class_specific=class_specific,
acc, _ = tnt.valid(model=vgg_multi, dataloader=valid_loader, label2name=label2name, args=args, class_specific=class_specific, log=log) for epoch in range(warm_opt): log('epoch: \t{0}'.format(epoch)) tnt.coarse_warm(model=vgg_multi, log=log) _ = tnt.train(model=vgg_multi, dataloader=train_loader, label2name=label2name, optimizer=warm_optimizer, args=args, class_specific=class_specific, log=log, warm_up=True) # if epoch == warm_opt-1: # log('Smart init of fine protos') # vgg._smart_init_fine_protos() # don't want these stuck in the corners immediately log('optimize through protos') for epoch in range(through_proto_opt): log('epoch: \t{0}'.format(epoch)) # train tnt.up_to_protos(model=vgg_multi, log=log)
def run(cfg): data_path = cfg.root_path.path model_cfg = cfg.model.cfg model_path = cfg.model.path os.environ['CUDA_VISIBLE_DEVICES'] = '1' root_dir = '/home/zhuminqin/Code/DisentangleCNN' model_dir = os.path.join(root_dir, 'saved_model' + str(int(time.time()))) makedir(model_dir) shutil.copy(os.path.join(root_dir, 'dis_model.py'), os.path.join(model_dir, 'dis_model.py')) shutil.copy(os.path.join(root_dir, './train_and_test.py'), os.path.join(model_dir, 'train_and_test.py')) shutil.copy(os.path.join(root_dir, './main.py'), os.path.join(model_dir, 'main.py')) log = SummaryWriter(log_dir=os.path.join(model_dir, 'log')) trans = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) data = CUB_dataset(data_path, trans=trans) train_data, test_data = data_split( data, os.path.join(data.root, 'CUB_200_2011/train_test_split.txt')) train_loader = DataLoader(train_data, batch_size=32, shuffle=False, collate_fn=case_collete, num_workers=4) test_loader = DataLoader(test_data, batch_size=32, shuffle=False, collate_fn=case_collete) # a visualize demo dataiter = iter(train_loader) _, images, labels = dataiter.next() # create grid of images img_grid = make_grid(images) # write to tensorboard log.add_image('four_fashion_mnist_images', img_grid) net = Dis_features(model_cfg, model_path, pretrained=False, batch_norm=True) net = net.cuda() log.add_graph(net, images.cuda()) optimizer = torch.optim.Adam(params=net.parameters(), lr=0.0001, betas=(0.9, 0.999), eps=1e-8, weight_decay=0) for epoch in range(50): print('epoch: \t{0}'.format(epoch)) acc = tnt.train(model=net, loader=train_loader, optimizer=optimizer, log=log, epoch=epoch) log.add_scalar('pretrain_' + 'train' + '/epoch_acc', acc * 100, epoch) acc = tnt.test(model=net, loader=test_loader, log=log, epoch=epoch) log.add_scalar('pretrain_' + 'test' + '/epoch_acc', acc * 100, epoch) torch.save( net.state_dict(), os.path.join(model_dir, 'cub' + str(epoch) + '_dis_adam' '.pth'))
def main(args): with open("./configs/{}/{}_{}_{}.yaml".format(args.net, args.dataset, args.backbone, args.mode)) as fp: cfg = yaml.safe_load(fp) NET_ARGS = cfg['NET_ARGS'] DATA_ARGS = cfg['DATA_ARGS'] EXP_ARGS = cfg['EXP_ARGS'] os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' model_dir = os.path.join('./saved_models/', args.dataset, args.backbone, args.net, args.mode) makedir(model_dir) log, logclose = create_logger(log_filename=os.path.join( model_dir, 'train_logger_{}.txt'.format( datetime.datetime.now().strftime("%H:%M:%S")))) img_dir = os.path.join(model_dir, 'img') makedir(img_dir) weight_matrix_filename = 'outputL_weights' prototype_img_filename_prefix = 'prototype-img' prototype_self_act_filename_prefix = 'prototype-self-act' proto_bound_boxes_filename_prefix = 'bb' log(pformat(cfg)) # ---------------------------------------- Get DataLoaders ---------------------------------------------- normalize = transforms.Normalize(mean=NET_ARGS['mean'], std=NET_ARGS['std']) train_transforms = transforms.Compose([ transforms.Resize(size=(DATA_ARGS['img_size'], DATA_ARGS['img_size'])), transforms.ToTensor(), normalize, ]) train_dataset = datasets.ImageFolder(DATA_ARGS['train_dir'], train_transforms) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=EXP_ARGS['train_batch_size'], shuffle=True, num_workers=4, pin_memory=False) train_push_dataset = datasets.ImageFolder( DATA_ARGS['train_push_dir'], transforms.Compose([ transforms.Resize(size=(DATA_ARGS['img_size'], DATA_ARGS['img_size'])), transforms.ToTensor(), ])) train_push_loader = torch.utils.data.DataLoader( train_push_dataset, batch_size=EXP_ARGS['train_push_batch_size'], shuffle=False, num_workers=4, pin_memory=False) test_dataset = datasets.ImageFolder( DATA_ARGS['test_dir'], transforms.Compose([ transforms.Resize(size=(DATA_ARGS['img_size'], DATA_ARGS['img_size'])), transforms.ToTensor(), normalize, ])) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=EXP_ARGS['test_batch_size'], shuffle=False, num_workers=4, pin_memory=False) log('training set size: {0}'.format(len(train_loader.dataset))) log('push set size: {0}'.format(len(train_push_loader.dataset))) log('test set size: {0}'.format(len(test_loader.dataset))) log('batch size: {0}'.format(EXP_ARGS['train_batch_size'])) # ------------------------------------ Model and Optimizer ---------------------------------------------- ppnet = model_AttProto.construct_PPNet( base_architecture=NET_ARGS['base_architecture'], pretrained=True, img_size=DATA_ARGS['img_size'], prototype_shape=NET_ARGS['prototype_shape'], num_classes=DATA_ARGS['num_classes'], prototype_activation_function=NET_ARGS[ 'prototype_activation_function'], add_on_layers_type=NET_ARGS['add_on_layers_type'], att_version=NET_ARGS['ATT_VERSION']) ppnet = ppnet.cuda() ppnet_multi = torch.nn.DataParallel(ppnet) class_specific = True if EXP_ARGS['RESUME']['iS_RESUME']: ppnet = torch.load(EXP_ARGS['RESUME']['PATH']) log(" Resumed from model: {}".format(EXP_ARGS['RESUME']['PATH'])) ppnet_multi = torch.nn.DataParallel(ppnet) accu = tnt.test(model=ppnet_multi, dataloader=test_loader, class_specific=True, log=log, EXP_ARGS=EXP_ARGS) log("\nInit Accuracy {:.2f} \n\n".format(accu)) ppnet_multi = torch.nn.DataParallel(ppnet) warm_optimizer_lrs = EXP_ARGS['OPTIMIZER']['warm_optimizer_lrs'] warm_optimizer_specs = [ { 'params': ppnet.add_on_layers.parameters(), 'lr': warm_optimizer_lrs['add_on_layers'], 'weight_decay': 1e-3 }, { 'params': ppnet.prototype_vectors, 'lr': warm_optimizer_lrs['prototype_vectors'] }, { 'params': ppnet.att_layer.parameters(), 'lr': warm_optimizer_lrs['att_layer'], 'weight_decay': 1e-3 }, ] warm_optimizer = torch.optim.Adam(warm_optimizer_specs) joint_optimizer_lrs = EXP_ARGS['OPTIMIZER']['joint_optimizer_lrs'] joint_optimizer_specs = [{ 'params': ppnet.features.parameters(), 'lr': joint_optimizer_lrs['features'], 'weight_decay': 1e-3 }, { 'params': ppnet.add_on_layers.parameters(), 'lr': joint_optimizer_lrs['add_on_layers'], 'weight_decay': 1e-3 }, { 'params': ppnet.prototype_vectors, 'lr': joint_optimizer_lrs['prototype_vectors'] }, { 'params': ppnet.att_layer.parameters(), 'lr': joint_optimizer_lrs['att_layer'], 'weight_decay': 1e-3 }] joint_optimizer = torch.optim.Adam(joint_optimizer_specs) joint_lr_scheduler = torch.optim.lr_scheduler.StepLR( joint_optimizer, step_size=int(joint_optimizer_lrs['joint_lr_step_size']), gamma=0.1) push_epochs = [ i for i in range(EXP_ARGS['num_train_epochs']) if i % 10 == 0 ] log('\n\n------------------------ Start Training ----------------------------\n\n' ) max_acc = 0.0 max_acc_epoch = 0 max_acc_iter = 0 target_accu = 0.1 for epoch in range(EXP_ARGS['start_epoch'], EXP_ARGS['num_train_epochs']): log('------------------------- Epoch: {} -------------------------------------' .format(epoch)) if epoch < EXP_ARGS['num_warm_epochs']: tnt.warm_only(model=ppnet_multi, log=log) _ = tnt.train(model=ppnet_multi, dataloader=train_loader, optimizer=warm_optimizer, class_specific=class_specific, coefs=EXP_ARGS['LOSS']['loss_coefs_warm'], log=log, EXP_ARGS=EXP_ARGS) else: tnt.joint(model=ppnet_multi, log=log) joint_lr_scheduler.step() _ = tnt.train(model=ppnet_multi, dataloader=train_loader, optimizer=joint_optimizer, class_specific=class_specific, coefs=EXP_ARGS['LOSS']['loss_coefs_joint'], log=log, EXP_ARGS=EXP_ARGS) accu = tnt.test(model=ppnet_multi, dataloader=test_loader, class_specific=class_specific, log=log, EXP_ARGS=EXP_ARGS) if accu > max_acc: max_acc = accu max_acc_iter = 0 max_acc_epoch = epoch save.save_model_w_condition(model=ppnet, model_dir=model_dir, model_name='', accu=accu, target_accu=target_accu, log=log, best=True, stage='prepush_{}'.format(epoch)) log("\nBest Accuracy {:.2f} at epoch {} and iter {}\n\n".format( max_acc, max_acc_epoch, max_acc_iter)) if epoch >= EXP_ARGS['push_start'] and epoch in push_epochs: save.save_model_w_condition(model=ppnet, model_dir=model_dir, model_name='', accu=accu, target_accu=target_accu, log=log, best=True, stage='prepushfinal_{}'.format(epoch)) log('\n------------------------- Push Prototypes -----------------------------' ) push.push_prototypes( train_push_loader, prototype_network_parallel=ppnet_multi, class_specific=class_specific, preprocess_input_function=preprocess_input_function, prototype_layer_stride=1, root_dir_for_saving_prototypes=img_dir, epoch_number=epoch, prototype_img_filename_prefix=prototype_img_filename_prefix, prototype_self_act_filename_prefix= prototype_self_act_filename_prefix, proto_bound_boxes_filename_prefix= proto_bound_boxes_filename_prefix, save_prototype_class_identity=True, log=log) accu = tnt.test(model=ppnet_multi, dataloader=test_loader, class_specific=class_specific, log=log, EXP_ARGS=EXP_ARGS) save.save_model_w_condition(model=ppnet, model_dir=model_dir, model_name='', accu=accu, target_accu=target_accu, log=log, best=True, stage='push_{}'.format(epoch)) last_layer_optimizer_specs = [{ 'params': ppnet.last_layer.parameters(), 'lr': EXP_ARGS['OPTIMIZER']['last_layer_optimizer_lrs'] ['last_layer_optimizer_lr'] }] last_layer_optimizer = torch.optim.Adam(last_layer_optimizer_specs) last_lr_lr_scheduler = torch.optim.lr_scheduler.StepLR( last_layer_optimizer, step_size=EXP_ARGS['OPTIMIZER']['last_layer_optimizer_lrs'] ['last_lr_step_size'], gamma=0.1) log('\n------------------------- Last Layer Training -----------------------------------' ) if NET_ARGS['prototype_activation_function'] != 'linear': tnt.last_only(model=ppnet_multi, log=log) max_acc_post, max_acc_post_iter, max_acc_post_epoch = 0, 0, epoch for i in range( EXP_ARGS['OPTIMIZER']['last_layer_optimizer_lrs'] ['last_layer_optimizer_iters']): log('Last layer optimization, Iteration: {0}'.format(i)) _ = tnt.train(model=ppnet_multi, dataloader=train_loader, optimizer=last_layer_optimizer, class_specific=class_specific, coefs=EXP_ARGS['LOSS']['loss_coefs_joint'], log=log, EXP_ARGS=EXP_ARGS) last_lr_lr_scheduler.step() accu = tnt.test(model=ppnet_multi, dataloader=test_loader, class_specific=class_specific, log=log, EXP_ARGS=EXP_ARGS) if accu > max_acc_post: max_acc_post = accu max_acc_post_iter = i max_acc_post_epoch = epoch save.save_model_w_condition( model=ppnet, model_dir=model_dir, model_name='', accu=accu, target_accu=0.70, log=log, best=True, stage='postpush_{}'.format(epoch)) log("Best Accuracy - PostPush {:.2f} at epoch {} and iter {}" .format(max_acc_post, max_acc_post_epoch, max_acc_post_iter)) save.save_model_w_condition( model=ppnet, model_dir=model_dir, model_name='', accu=accu, target_accu=0.70, log=log, best=True, stage='postpushfinal_{}'.format(epoch)) logclose()
start_time = time.time() print('加载数据...') build_vocab(config.input_file, os.path.join(config.vocab_path, 'in_vocab')) build_vocab(config.slot_file, os.path.join(config.vocab_path, 'slot_vocab')) build_vocab(config.intent_file, os.path.join(config.vocab_path, 'intent_vocab'), pad=False, unk=False) in_vocab = load_vocabulary(os.path.join(config.vocab_path, 'in_vocab')) slot_vocab = load_vocabulary(os.path.join(config.vocab_path, 'slot_vocab')) intent_vocab = load_vocabulary(os.path.join(config.vocab_path, 'intent_vocab')) train_data, dev_data, test_data = build_dataset(in_vocab['vocab'], slot_vocab['vocab'], intent_vocab['vocab']) train_iter = build_iterator(train_data) dev_iter = build_iterator(dev_data) test_iter = build_iterator(test_data) time_dif = get_time_dif(start_time) print('time usage:', time_dif) config.n_vocab = len(in_vocab['vocab']) x = import_module(model_name) model = x.Model(config).to(torch.device('cuda')) init_network(model) print(model.parameters) train(config, model, train_iter, dev_iter, test_iter) # test(config, model, test_iter)
'params': ppnet.attention_weights.parameters(), 'lr': config.last_layer_optimizer_lr['attention'] }] last_layer_optimizer = torch.optim.Adam(last_layer_optimizer_specs) print('optimize last layer') for i in range(40): step += 1 print('iteration: \t{0}'.format(i)) last_only(model=ppnet) train(model=ppnet, dataloader=train_loader, optimizer=last_layer_optimizer, config=config, step=step, weighting_attention=args.weighting_attention) accu = valid(model=ppnet, dataloader=valid_loader, config=config, step=step, weighting_attention=args.weighting_attention) if current_push_best_accu < accu: print('New best score: {}, saving snapshot'.format(accu)) save_train_state( os.path.join(model_dir, original_model_name.split('push')[0] + 'prune'), ppnet, {}, step, mode, epoch, iteration, experiment_run_name, best_accu, accu, accu, config) current_push_best_accu = accu