def main(config): # Set device based on user defined configuration. if config.verbose >= 2: print(config) device = torch.device('cpu') if config.gpu_id < 0 else torch.device( 'cuda:%d' % config.gpu_id) model = get_model(config) model = model.to(device) train_loader, valid_loader, test_loader = get_loaders(config) print("Train:", len(train_loader.dataset)) print("Valid:", len(valid_loader.dataset)) print("Test:", len(test_loader.dataset)) optimizer = optim.Adam(model.parameters()) crit = nn.MSELoss() if config.verbose >= 2: print(model) print(optimizer) print(crit) trainer = Trainer(config) trainer.train(model, crit, optimizer, train_loader, valid_loader)
def main(argv=None): # pylint: disable=unused-argument model = get_model(FLAGS.model) if tf.gfile.Exists(FLAGS.summary_dir): tf.gfile.DeleteRecursively(FLAGS.summary_dir) tf.gfile.MakeDirs(FLAGS.summary_dir) evaluate(model)
def main(argv=None): # pylint: disable=unused-argument ps_hosts = FLAGS.ps_hosts.split(",") worker_hosts = FLAGS.worker_hosts.split(",") # Create a cluster from the parameter server and worker hosts. cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) # Create and start a server for the local task. server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index) if FLAGS.job_name == "ps": print("\nps joining...\n") server.join() elif FLAGS.job_name == "worker": model = get_model(FLAGS.model) if not FLAGS.restore: if FLAGS.task_index == 0: if tf.gfile.Exists(FLAGS.summary_dir): tf.gfile.DeleteRecursively(FLAGS.summary_dir) tf.gfile.MakeDirs(FLAGS.summary_dir) print("\nworker " + str(FLAGS.task_index) + " start training...\n") train(model, cluster, server)
def main(argv=None): # pylint: disable=unused-argument model = get_model(FLAGS.model) if FLAGS.restore_sess is False: if tf.gfile.Exists(FLAGS.summary_dir): print("Deleting existing summary files!!!\n") tf.gfile.DeleteRecursively(FLAGS.summary_dir) tf.gfile.MakeDirs(FLAGS.summary_dir) train(model)
def load(args, epoch): model = model_loader.get_model(args) if epoch == 0: add = '' else: add = '_epoch_' + str(epoch) checkpoint_ = torch.load(args.load_checkpoint + add) print('MODELS') for name, param in model.named_parameters(): print(name) new_state_dict = OrderedDict() print('checkpoint') for k, v in checkpoint_['model'].items(): print(k) name = k[7:] new_state_dict[name] = v model.load_state_dict(new_state_dict) if args.ss: projector = Projector(expansion=expansion) checkpoint_p = torch.load(args.load_checkpoint + '_projector' + add) new_state_dict = OrderedDict() for k, v in checkpoint_p['model'].items(): name = k[7:] new_state_dict[name] = v projector.load_state_dict(new_state_dict) if args.dataset == 'cifar-10': Linear = nn.Sequential(nn.Linear(512 * expansion, 10)) elif args.dataset == 'cifar-100': Linear = nn.Sequential(nn.Linear(512 * expansion, 100)) model_params = [] if args.finetune: model_params += model.parameters() if args.ss: model_params += projector.parameters() model_params += Linear.parameters() loptim = torch.optim.SGD(model_params, lr=args.lr, momentum=0.9, weight_decay=5e-4) use_cuda = torch.cuda.is_available() if use_cuda: ngpus_per_node = torch.cuda.device_count() model.cuda() Linear.cuda() model = nn.DataParallel(model) Linear = nn.DataParallel(Linear) if args.ss: projector.cuda() projector = nn.DataParallel(projector) else: assert ("Need to use GPU...") print_status('Using CUDA..') cudnn.benchmark = True if args.adv_img: attack_info = 'Adv_train_epsilon_' + str( args.epsilon) + '_alpha_' + str(args.alpha) + '_min_val_' + str( args.min) + '_max_val_' + str(args.max) + '_max_iters_' + str( args.k) + '_type_' + str( args.attack_type) + '_randomstart_' + str( args.random_start) print_status("Adversarial training info...") print_status(attack_info) attacker = FastGradientSignUntargeted(model, linear=Linear, epsilon=args.epsilon, alpha=args.alpha, min_val=args.min, max_val=args.max, max_iters=args.k, _type=args.attack_type) if args.adv_img: if args.ss: return model, Linear, projector, loptim, attacker return model, Linear, 'None', loptim, attacker if args.ss: return model, Linear, projector, loptim, 'None' return model, Linear, 'None', loptim, 'None'
world_size=world_size, rank=args.local_rank, ) # Data print_status('==> Preparing data..') if not (args.train_type == 'contrastive'): assert ('wrong train phase...') else: trainloader, traindst, testloader, testdst, train_sampler = data_loader.get_dataset( args) # Model print_status('==> Building model..') torch.cuda.set_device(args.local_rank) model = model_loader.get_model(args) if args.model == 'ResNet18': expansion = 1 elif args.model == 'ResNet50': expansion = 4 else: assert ('wrong model type') projector = Projector(expansion=expansion) if 'Rep' in args.advtrain_type: Rep_info = 'Rep_attack_ep_' + str(args.epsilon) + '_alpha_' + str( args.alpha) + '_min_val_' + str(args.min) + '_max_val_' + str( args.max) + '_max_iters_' + str(args.k) + '_type_' + str( args.attack_type) + '_randomstart_' + str(args.random_start) args.name += Rep_info
if args.dataset == 'cifar-10': num_outputs = 10 elif args.dataset == 'cifar-100': num_outputs = 100 if args.model == 'ResNet50': expansion = 4 else: expansion = 1 # Model print_status('==> Building model..') train_type = args.train_type model = model_loader.get_model(args)#models.__dict__[args.model]() if args.dataset=='cifar-10': Linear = nn.Sequential(nn.Linear(512*expansion, 10)) elif args.dataset=='cifar-100': Linear = nn.Sequential(nn.Linear(512*expansion, 100)) checkpoint_ = torch.load(args.load_checkpoint) new_state_dict = OrderedDict() for k, v in checkpoint_['model'].items(): if args.module: name = k[7:] else: name = k new_state_dict[name] = v model.load_state_dict(new_state_dict)
train_inputs_path, train_masks_path, batch_size, one_hot_label, data_aug, change=change) val_generator = custom_data_generator.image_generator(val_samples, val_inputs_path, val_masks_path, batch_size, one_hot_label, change=change) ##-------- Model loading model = model_loader.get_model(model_name=model_name, input_size=input_size, one_hot_label=one_hot_label) # json_file = open("./models/unet_resnext_50_lovasz.json", 'r') # loaded_model_json = json_file.read() # json_file.close() # model = model_from_json(loaded_model_json) #print(model.summary()) model.save(model_name + '.h5') if model == None: print("Model ..[ %s ] definition not found." % model_name) exit(0) model_json = model.to_json() with open(checkpoint_dir + model_name + ".json", 'w') as json_file: json_file.write(model_json)