def __getitem__(self, idx): with self.count.get_lock(): self.count.value += 1 if self.count.value == self.num_workers: self.barrier.release() self.barrier.acquire() self.barrier.release() return torch.initial_seed()
myDataset = MyDataset() for sampleInfo in config['samples']: if 'ignore' in sampleInfo and sampleInfo['ignore']: continue name = sampleInfo['name'] myDataset.addSample(name, sampleInfo['path'], weight=sampleInfo['xsec']/sampleInfo['ngen']) myDataset.setProcessLabel(name, sampleInfo['label']) myDataset.initialize() procNames = myDataset.sampleInfo['procName'].unique() from torch.utils.data import DataLoader lengths = [int(0.6*len(myDataset)), int(0.2*len(myDataset))] lengths.append(len(myDataset)-sum(lengths)) torch.manual_seed(config['training']['randomSeed1']) trnDataset, valDataset, testDataset = torch.utils.data.random_split(myDataset, lengths) torch.manual_seed(torch.initial_seed()) kwargs = {'num_workers':config['training']['nDataLoaders']} allLoader = DataLoader(myDataset, batch_size=args.batch, shuffle=False, **kwargs) trnLoader = DataLoader(trnDataset, batch_size=args.batch, shuffle=False, **kwargs) valLoader = DataLoader(valDataset, batch_size=args.batch, shuffle=False, **kwargs) testLoader = DataLoader(testDataset, batch_size=args.batch, shuffle=False, **kwargs) from tqdm import tqdm import numpy as np import matplotlib.pyplot as plt bins = [None, None, None] imgHist_val_sig = [np.zeros(nbinsx[i]) for i in range(3)] imgHist_val_bkg = [np.zeros(nbinsx[i]) for i in range(3)] imgSum_val_sig, imgSum_val_bkg = None, None
def __getitem__(self, idx): return torch.initial_seed()
# the config object should only be used in this file, to keep an overview over the usage config = get_config(args.config_file, args.config_overwrites) run_folder = prepare_experiment(args, config) logger = get_logger_to_file(run_folder, "main") logger.info("Running: %s", str(sys.argv)) # # random seeds # torch.manual_seed(config["random_seed"]) numpy.random.seed(config["random_seed"]) random.seed(config["random_seed"]) logger.info("Torch seed: %i ", torch.initial_seed()) # hardcode gpu usage cuda_device = 0 # always take the first -> set others via cuda flag in bash perf_monitor = PerformanceMonitor.get() perf_monitor.start_block("startup") # # create (or load) model instance # ------------------------------- # # * vocab (pre-built, to make the embedding matrix smaller, see generate_vocab.py) # * pre-trained embedding # * network # * optimizer & loss function #
def main(): configs = prepare() if configs.evaluate is not None: configs.evaluate.fn(configs) return import numpy as np import tensorboardX import torch import torch.backends.cudnn as cudnn from torch.utils.data import DataLoader from tqdm import tqdm ################################ # Train / Eval Kernel Function # ################################ def adjust_learning_rate(optimizer, epoch, args_lr): """Sets the learning rate to the initial LR decayed by half by every 5 or 10 epochs""" if epoch > 0: if epoch <= 30: lr = args_lr * (0.5**(epoch // 5)) else: lr = args_lr * (0.5**(epoch // 10)) for param_group in optimizer.param_groups: param_group['lr'] = lr writer.add_scalar('lr_dis', lr, epoch) # train kernel def train(model, source_loader, target_loader, criterion, optimizer_g, optimizer_cls, optimizer_dis, scheduler_g, scheduler_cls, current_step, writer, cons): model.train() loss_total = 0 loss_adv_total = 0 loss_node_total = 0 data_total = 0 batch_iterator = zip(loop_iterable(source_loader), loop_iterable(target_loader)) for _ in trange(len(source_loader)): (inputs, targets), (inputs_t, _) = next(batch_iterator) if isinstance(inputs, dict): for k, v in inputs.items(): batch_size = v.size(0) inputs[k] = v.to(configs.device, non_blocking=True) else: batch_size = inputs.size(0) inputs = inputs.to(configs.device, non_blocking=True) if isinstance(inputs_t, dict): for k, v in inputs_t.items(): batch_size = v.size(0) inputs_t[k] = v.to(configs.device, non_blocking=True) else: batch_size = inputs_t.size(0) inputs_t = inputs_t.to(configs.device, non_blocking=True) if isinstance(targets, dict): for k, v in targets.items(): targets[k] = v.to(configs.device, non_blocking=True) else: targets = targets.to(configs.device, non_blocking=True) outputs = model(inputs) pred_t1, pred_t2 = model.module.inst_seg_net( { 'features': inputs_t['features'], 'one_hot_vectors': inputs_t['one_hot_vectors'] }, constant=cons, adaptation=True) loss_s = criterion(outputs, targets) # Adversarial loss loss_adv = -1 * discrepancy_loss(pred_t1, pred_t2) loss = loss_s + loss_adv loss.backward() optimizer_g.step() optimizer_cls.step() optimizer_g.zero_grad() optimizer_cls.zero_grad() # Local Alignment _, _, feat_node_s = model.module.inst_seg_net( { 'features': inputs['features'], 'one_hot_vectors': inputs['one_hot_vectors'] }, node_adaptation_s=True) _, _, feat_node_t = model.module.inst_seg_net( { 'features': inputs_t['features'], 'one_hot_vectors': inputs_t['one_hot_vectors'] }, node_adaptation_t=True) sigma_list = [0.01, 0.1, 1, 10, 100] loss_node_adv = 1 * mmd.mix_rbf_mmd2(feat_node_s, feat_node_t, sigma_list) loss = loss_node_adv loss.backward() optimizer_dis.step() optimizer_dis.zero_grad() loss_total += loss_s.item() * batch_size loss_adv_total += loss_adv.item() * batch_size loss_node_total += loss_node_adv.item() * batch_size data_total += batch_size writer.add_scalar('loss_s/train', loss_total / data_total, current_step) writer.add_scalar('loss_adv/train', loss_adv_total / data_total, current_step) writer.add_scalar('loss_node/train', loss_node_total / data_total, current_step) current_step += batch_size if scheduler_g is not None: scheduler_g.step() if scheduler_cls is not None: scheduler_cls.step() # evaluate kernel def evaluate(model, loader, split='test'): meters = {} for k, meter in configs.train.meters.items(): meters[k.format(split)] = meter() model.eval() with torch.no_grad(): for inputs, targets in tqdm(loader, desc=split, ncols=0): if isinstance(inputs, dict): for k, v in inputs.items(): inputs[k] = v.to(configs.device, non_blocking=True) else: inputs = inputs.to(configs.device, non_blocking=True) if isinstance(targets, dict): for k, v in targets.items(): targets[k] = v.to(configs.device, non_blocking=True) else: targets = targets.to(configs.device, non_blocking=True) outputs = model(inputs) for meter in meters.values(): meter.update(outputs, targets) for k, meter in meters.items(): meters[k] = meter.compute() return meters ########### # Prepare # ########### if configs.device == 'cuda': cudnn.benchmark = True if configs.get('deterministic', False): cudnn.deterministic = True cudnn.benchmark = False if ('seed' not in configs) or (configs.seed is None): configs.seed = torch.initial_seed() % (2**32 - 1) seed = configs.seed random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) print(configs) ##################################################################### # Initialize DataLoaders, Model, Criterion, LRScheduler & Optimizer # ##################################################################### print(f'\n==> loading source dataset "{configs.source_dataset}"') source_dataset = configs.source_dataset() source_loaders = { "train": DataLoader( source_dataset["train"], shuffle=True, batch_size=configs.train.batch_size, drop_last=True, num_workers=configs.data.num_workers, pin_memory=True, worker_init_fn=lambda worker_id: np.random.seed(seed + worker_id)) } print(f'\n==> loading target dataset "{configs.target_dataset}"') target_dataset = configs.target_dataset() target_loaders = {} for split in target_dataset: target_loaders[split] = DataLoader( target_dataset[split], shuffle=(split == 'train'), batch_size=configs.train.batch_size, drop_last=True, num_workers=configs.data.num_workers, pin_memory=True, worker_init_fn=lambda worker_id: np.random.seed(seed + worker_id)) print(f'\n==> creating model "{configs.model}"') model = configs.model() if configs.device == 'cuda': model = torch.nn.DataParallel(model) model = model.to(configs.device) criterion = configs.train.criterion().to(configs.device) #params gen_params = [{ 'params': v } for k, v in model.module.inst_seg_net.g.named_parameters() if 'pred_offset' not in k] cls_params = [{ 'params': model.module.inst_seg_net.c1.parameters() }, { 'params': model.module.inst_seg_net.c2.parameters() }, { 'params': model.module.center_reg_net.parameters() }, { 'params': model.module.box_est_net.parameters() }] dis_params = [{ 'params': model.module.inst_seg_net.g.parameters() }, { 'params': model.module.inst_seg_net.attention_s.parameters() }, { 'params': model.module.inst_seg_net.attention_t.parameters() }] optimizer_g = configs.train.optimizer_g(gen_params) optimizer_cls = configs.train.optimizer_cls(cls_params) optimizer_dis = configs.train.optimizer_dis(dis_params) last_epoch, best_metrics = -1, {m: None for m in configs.train.metrics} if os.path.exists(configs.train.checkpoint_path): print(f'==> loading checkpoint "{configs.train.checkpoint_path}"') checkpoint = torch.load(configs.train.checkpoint_path) print(' => loading model') model.load_state_dict(checkpoint.pop('model')) if 'optimizer_g' in checkpoint and checkpoint[ 'optimizer_g'] is not None: print(' => loading optimizer_g') optimizer_g.load_state_dict(checkpoint.pop('optimizer_g')) if 'optimizer_cls' in checkpoint and checkpoint[ 'optimizer_cls'] is not None: print(' => loading optimizer_cls') optimizer_cls.load_state_dict(checkpoint.pop('optimizer_cls')) if 'optimizer_dis' in checkpoint and checkpoint[ 'optimizer_dis'] is not None: print(' => loading optimizer_dis') optimizer_dis.load_state_dict(checkpoint.pop('optimizer_dis')) last_epoch = checkpoint.get('epoch', last_epoch) meters = checkpoint.get('meters', {}) for m in configs.train.metrics: best_metrics[m] = meters.get(m + '_best', best_metrics[m]) del checkpoint if 'scheduler_g' in configs.train and configs.train.scheduler_g is not None: configs.train.scheduler_g.last_epoch = last_epoch print(f'==> creating scheduler "{configs.train.scheduler_g}"') scheduler_g = configs.train.scheduler_g(optimizer_g) else: scheduler_g = None if 'scheduler_c' in configs.train and configs.train.scheduler_c is not None: configs.train.scheduler_c.last_epoch = last_epoch print(f'==> creating scheduler "{configs.train.scheduler_c}"') scheduler_c = configs.train.scheduler_c(optimizer_cls) else: scheduler_c = None ############ # Training # ############ if last_epoch >= configs.train.num_epochs: meters = dict() for split, loader in target_loaders.items(): if split != 'train': meters.update(evaluate(model, loader=loader, split=split)) for k, meter in meters.items(): print(f'[{k}] = {meter:2f}') return with tensorboardX.SummaryWriter(configs.train.save_path) as writer: step_size = min(len(source_dataset['train']), len(target_dataset['train'])) for current_epoch in range(last_epoch + 1, configs.train.num_epochs): current_step = current_epoch * step_size cons = math.sin( (current_epoch + 1) / configs.train.num_epochs * math.pi / 2) adjust_learning_rate(optimizer_dis, current_epoch, configs.train.base_lr) writer.add_scalar('lr_g', scheduler_g.get_lr()[0], current_epoch) writer.add_scalar('lr_c', scheduler_c.get_lr()[0], current_epoch) # train print( f'\n==> training epoch {current_epoch}/{configs.train.num_epochs}' ) train(model, source_loader=source_loaders['train'], target_loader=target_loaders['train'], criterion=criterion, optimizer_g=optimizer_g, optimizer_cls=optimizer_cls, optimizer_dis=optimizer_dis, scheduler_g=scheduler_g, scheduler_cls=scheduler_c, current_step=current_step, writer=writer, cons=cons) current_step += step_size # evaluate meters = dict() for split, loader in source_loaders.items(): if split != 'train': meters.update(evaluate(model, loader=loader, split=split)) for k, meter in meters.items(): print(f'Source [{k}] = {meter:2f}') meters = dict() for split, loader in target_loaders.items(): if split != 'train': meters.update(evaluate(model, loader=loader, split=split)) # check whether it is the best best = {m: False for m in configs.train.metrics} for m in configs.train.metrics: if best_metrics[m] is None or best_metrics[m] < meters[m]: best_metrics[m], best[m] = meters[m], True meters[m + '_best'] = best_metrics[m] # log in tensorboard for k, meter in meters.items(): print(f'Target [{k}] = {meter:2f}') writer.add_scalar(k, meter, current_step) # save checkpoint torch.save( { 'epoch': current_epoch, 'model': model.state_dict(), 'optimizer_g': optimizer_g.state_dict(), 'optimizer_cls': optimizer_cls.state_dict(), 'optimizer_dis': optimizer_dis.state_dict(), 'meters': meters, 'configs': configs, }, configs.train.checkpoint_path) shutil.copyfile( configs.train.checkpoint_path, configs.train.checkpoints_path.format(current_epoch)) for m in configs.train.metrics: if best[m]: shutil.copyfile(configs.train.checkpoint_path, configs.train.best_checkpoint_paths[m]) if best.get(configs.train.metric, False): shutil.copyfile(configs.train.checkpoint_path, configs.train.best_checkpoint_path) print(f'[save_path] = {configs.train.save_path}')
x_te = vec.transform(te_sentences) x_te = np.array( lib.pad_sequence(x_te, maxlen=opt.maxlen, padding='post', truncating='post', value=0)) n_txt_feats = int(max(x_tr.max(), x_te.max()) + 10) logger.info(" - txt train/test min/max: [{}|{}] [{}|{}]".format( x_tr.min(), x_tr.max(), x_te.min(), x_te.max())) tr_data = [x_tr, np.array(tr_labels)] te_data = [x_te, np.array(te_labels)] torch.manual_seed(opt.seed) print("Seed for random numbers: ", torch.initial_seed()) model = VDCNN(n_classes=n_classes, num_embedding=n_txt_feats, embedding_dim=16, depth=opt.depth, n_fc_neurons=2048, shortcut=opt.shortcut) if opt.gpu: model.cuda() if opt.class_weights: criterion = nn.CrossEntropyLoss( torch.cuda.FloatTensor(opt.class_weights)) else:
annotation_path=args.annotation_path_test) """ train_iter = torch.utils.data.DataLoader( train_loader, batch_size=args.batch_size, shuffle=True, num_workers=8, # 4, # change this part accordingly pin_memory=True) """ eval_iter = torch.utils.data.DataLoader(val_loader, batch_size=args.batch_size, shuffle=True, num_workers=1, # 4, # change this part accordingly pin_memory=True) """ iter_seed = torch.initial_seed() + 100 network = AnomalyDetector() net = model( net=network, criterion=RegularizedLoss(network, custom_objective).cuda(), model_prefix=args.model_dir, step_callback_freq=5, save_checkpoint_freq=args.save_frequency, opt_batch_size=args.batch_size, # optional, 60 in the paper ) if torch.cuda.is_available(): net.net.cuda() torch.cuda.manual_seed(args.random_seed) net.net = torch.nn.DataParallel(net.net).cuda()
def get_plot_args(): return dict(num_units_to_plot=5, seed=torch.initial_seed())
logger = get_logger(wkdir + "train.log") use_cuda, cuda_device, cuda_devices, multi_gpu = parse_cuda(cnfg.use_cuda, cnfg.gpuid) set_random_seed(cnfg.seed, use_cuda) td = h5py.File(cnfg.train_data, "r") vd = h5py.File(cnfg.dev_data, "r") ntrain = td["ndata"][:].item() nvalid = vd["ndata"][:].item() nword = td["nword"][:].tolist() nwordi, nwordt = nword[0], nword[-1] logger.info("Design models with seed: %d" % torch.initial_seed()) mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes) fine_tune_m = cnfg.fine_tune_m tl = [str(i) for i in range(ntrain)] mymodel = init_model_params(mymodel) mymodel.apply(init_fixing) if fine_tune_m is not None: logger.info("Load pre-trained model from: " + fine_tune_m) mymodel = load_model_cpu(fine_tune_m, mymodel) #lw = torch.ones(nwordt).float() #lw[0] = 0.0 #lossf = nn.NLLLoss(lw, ignore_index=0, reduction='sum')
def run(): # Training settings parser = argparse.ArgumentParser(description="PyTorch Environment") train_parser = parser.add_argument_group("Train Parameters") train_parser.add_argument("--epochs", type=int, default=160, metavar="E", help="number of epochs to train (default: 10)") train_parser.add_argument( "--batch-size", type=int, default=128, metavar="B", help="input batch size for training (default: 128)") train_parser.add_argument( "--test-batch-size", type=int, default=128, metavar="BT", help="input batch size for testing (default: 128)") train_parser.add_argument("--lr_decay", type=float, default=0.1, metavar="LD", help="learning rate decay rate") train_parser.add_argument("--schedule", type=int, nargs="*", default=[80, 120], help="learning rate is decayed at these epochs") train_parser.add_argument("--warmup-epochs", type=int, default=5, metavar="WE", help="number of warmup epochs") train_parser.add_argument("--no-cuda", action="store_true", default=False, help="disables CUDA training") train_parser.add_argument( "--seed", type=int, default=7186021514134990023, metavar="S", help="random seed (default: 7186021514134990023)") simulator_parser = parser.add_argument_group("Simulator Parameters") simulator_parser.add_argument("--sim-size", type=int, default=16, metavar="N", help="size of simulator") simulator_parser.add_argument("--sim-gamma-shape", type=float, default=100, metavar="GSH", help="gamma shape parameter") simulator_parser.add_argument("--sim-gamma-scale", type=float, default=1.28, metavar="GSC", help="gamma scale parameter") optimizer_parser = parser.add_argument_group("Optimizer Parameters") optimizer_parser.add_argument("--lr", type=float, default=0.1, metavar="LR", help="learning rate (default: 0.1)") optimizer_parser.add_argument("--momentum", type=float, default=0.9, metavar="M", help="SGD momentum (default: 0.9)") optimizer_parser.add_argument("--dc", type=float, default=2, metavar="DC", help="Delay Compensation (default: 0)") optimizer_parser.add_argument("--weight-decay", type=float, default=1e-4, metavar="WD", help="SGD weight decay (default: 0)") args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) random.seed(torch.initial_seed()) print("*** Configuration ***") for k in vars(args): print(str(k), ":", str(getattr(args, k))) train_set, test_set = get_cifar_10_data_set( ) # get CIFAR-10 train and test set args.train_loader = data_loader(train_set, is_train=True, args=args) args.test_loader = data_loader(test_set, is_train=False, args=args) args.model = resnet20_cifar() # get ResNet-20 Model if args.cuda: args.model = args.model.cuda() args.loss_fn = nn.CrossEntropyLoss() # use cross-entropy loss # create optimizer args.optimizer = optim.SGD(args.model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) assert len(args.optimizer.param_groups) == 1 # initialize optimizer's momentum for p in args.model.parameters(): args.optimizer.state[p]["momentum_buffer"] = torch.zeros_like(p.data) # clone weights for master args.master_weights = init_weights(args.model.parameters()) # clone weights, one for each worker args.worker_weights = [ init_weights(args.model.parameters()) for _ in range(args.sim_size) ] # clone optimizer, one for each worker args.worker_momentum = [ init_momentum(args.model.parameters()) for _ in range(args.sim_size) ] # create the gamma distribution order args.worker_order = iter(GammaRandomWorkerSelection(args)) # initialize dana args.momentum_sum = { id(p): torch.zeros_like(p) for p in args.model.parameters() } # initialize warmup args.warmup_lr = np.linspace(args.lr / args.sim_size, args.lr, len(args.train_loader) * args.warmup_epochs).tolist() print("*** Training with DANA-DC ***") for epoch in range(args.epochs): learning_rate_decay(epoch, args) train(epoch, args) evaluate(epoch, args)
import seaborn as sns matplotlib.use("agg") from matplotlib import pyplot as plt from model import * from generative_utils import * from utils import * from arguments import * from data import * seed = 12 #12 random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) np.random.seed(seed) torch.initial_seed() torch.set_printoptions(precision=2) plt.rcParams["font.weight"] = "bold" plt.rcParams["axes.labelweight"] = "bold" plt.rcParams['lines.linewidth'] = 2.0 def get_df(traj_p): df = [] columns = ['ix', 'x', 'y'] #columns = ['ix', 't', 'x', 'y'] for ix in range(traj_p.size(0)): #for t in range(traj_p.size(1)): #df.append([ix, t, traj_p[ix, t, 0].item(), traj_p[ix, t, 1].item()]) df.append([ix, traj_p[ix, ..., 0].item(), traj_p[ix, ..., 1].item()])
def main(): # # check/wait GPU is free # allocated_ids = [int(item) for item in os.environ['CUDA_VISIBLE_DEVICES'].split(',')] # gpu_free = False # while not gpu_free: # tmp_gpu_free = True # for gpu_id in allocated_ids: # # print(gpu_id, len(GPUtil.getGPUs())) # mem_used = GPUtil.getGPUs()[gpu_id].memoryUsed # if mem_used > 1000: # # print('mem used', gpu_id, mem_used) # tmp_gpu_free = False # break # gpu_free = tmp_gpu_free # if not gpu_free: # time.sleep(300) # parse args global args args = parse_args(sys.argv[1]) args.test_size = 512 # -------------------- default arg settings for this model -------------------- # TODO: find better way for defining model-specific default args if hasattr(args, 'norm'): args.normD = args.norm args.normQ = args.norm args.normG = args.norm if hasattr(args, 'lambda_D_GAN') and args.lambda_D_GAN != 1.: """ process deprecated lambda_D_GAN """ args.lambda_GAN = args.lambda_D_GAN assert args.lambda_D_GAN == args.lambda_G_GAN # add timestamp to ckpt_dir # if not args.debug: args.timestamp = time.strftime('%m%d%H%M%S', time.localtime()) args.ckpt_dir += '_' + args.timestamp if args.lambda_G_recon > 0: args.display_ncols = 5 if args.lambda_dis > 0 else 3 if args.lambda_dis > 0 and args.lambda_G_rand_recon > 0: args.display_ncols += 1 else: args.display_ncols = 3 if args.lambda_dis > 0 else 2 # !!! FINISH defining args before logging args # -------------------- init ckpt_dir, logging -------------------- os.makedirs(args.ckpt_dir, mode=0o777, exist_ok=True) # -------------------- init visu -------------------- visualizer = Visualizer(args) # logger = Logger(osp.join(args. ckpt_dir, 'log')) visualizer.logger.log('sys.argv:\n' + ' '.join(sys.argv)) for arg in sorted(vars(args)): visualizer.logger.log('{:20s} {}'.format(arg, getattr(args, arg))) visualizer.logger.log('') # -------------------- code copy -------------------- # TODO: find better approach # copy config yaml shutil.copyfile(sys.argv[1], osp.join(args.ckpt_dir, osp.basename(sys.argv[1]))) repo_basename = osp.basename(osp.dirname(osp.abspath(__file__))) repo_path = osp.join(args.ckpt_dir, repo_basename) os.makedirs(repo_path, mode=0o777, exist_ok=True) walk_res = os.walk('.') useful_paths = [path for path in walk_res if '.git' not in path[0] and 'checkpoints' not in path[0] and 'configs' not in path[0] and '__pycache__' not in path[0] and 'tee_dir' not in path[0] and 'tmp' not in path[0]] # print('useful_paths', useful_paths) for p in useful_paths: for item in p[-1]: if not (item.endswith('.py') or item.endswith('.c') or item.endswith('.h') or item.endswith('.md')): continue old_path = osp.join(p[0], item) new_path = osp.join(repo_path, p[0][2:], item) basedir = osp.dirname(new_path) os.makedirs(basedir, mode=0o777, exist_ok=True) shutil.copyfile(old_path, new_path) # if args.evaluate: # shutil.copyfile(args.resume, osp.join(args.ckpt_dir, 'model_used.pth.tar')) # If cannot find file, will raise FileNotFoundError # The destination location must be writable; otherwise, an OSError exception will be raised. # If dst already exists, it will be replaced. Special files such as character or block devices # and pipes cannot be copied with this function. # -------------------- dataset & loader -------------------- train_dataset = datasets.__dict__[args.dataset]( train=True, transform=transforms.Compose([ transforms.Resize(args.imageSize, Image.BICUBIC), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]), args=args ) visualizer.logger.log('train_dataset: ' + str(train_dataset)) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, worker_init_fn=lambda x: np.random.seed((torch.initial_seed()) % (2 ** 32)) ) if not args.debug: args.html_iter_freq = len(train_loader) // args.html_per_train_epoch visualizer.logger.log('change args.html_iter_freq to %s' % args.html_iter_freq) args.save_iter_freq = len(train_loader) // args.html_per_train_epoch visualizer.logger.log('change args.save_iter_freq to %s' % args.html_iter_freq) test_dataset = datasets.__dict__[args.dataset]( train=False, transform=transforms.Compose([ transforms.Resize(args.imageSize, Image.BICUBIC), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]), args=args ) visualizer.logger.log('test_dataset: ' + str(test_dataset)) visualizer.logger.log('test img paths:') for anno in test_dataset.raw_annotations: visualizer.logger.log('%s %d %.0f %.0f %.0f %.0f %.0f %.0f %.0f %.0f %.0f %.0f' % (anno[0], anno[1], anno[2], anno[3], anno[4], anno[5], anno[6], anno[7], anno[8], anno[9], anno[10], anno[11])) visualizer.logger.log('') test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, worker_init_fn=lambda x: np.random.seed((torch.initial_seed()) % (2 ** 32)), drop_last=True ) #assert len(test_loader) == 1 print('test_loader has {} images'.format(len(test_loader))) # -------------------------------------------------------------------------------- # -------------------- create model -------------------- # visualizer.logger.log("=> creating model '{}'".format(args.arch)) args.gpu_ids = list(range(len(os.environ['CUDA_VISIBLE_DEVICES'].split(',')))) args.device = torch.device('cuda:0') if args.gpu_ids else torch.device('cpu') model_dict = {} model_dict['D_nets'] = [] model_dict['G_nets'] = [] # D, Q if args.lambda_dis > 0: if args.recon_pair_GAN: infogan_func = models.define_infoGAN_pair_D else: infogan_func = models.define_infoGAN model_dict['D'], model_dict['Q'] = infogan_func( args.output_nc, args.ndf, args.which_model_netD, args.n_layers_D, args.n_layers_Q, 16, args.passwd_length // 4, args.normD, args.normQ, args.init_type, args.init_gain, args.gpu_ids, args.use_old_Q, args.use_minus_Q) model_dict['G_nets'].append(model_dict['Q']) if args.lambda_GAN == 0: del model_dict['D'] else: model_dict['D_nets'].append(model_dict['D']) else: if args.lambda_GAN > 0: model_dict['D'] = models.define_D(args.input_nc, args.ndf, args.which_model_netD, args.n_layers_D, args.normD, args.no_lsgan, args.init_type, args.init_gan, args.gpu_ids) model_dict['D_nets'].append(model_dict['D']) # G if 'with_noise' in args.which_model_netG or args.lambda_dis == 0.: G_input_nc = args.input_nc else: G_input_nc = args.input_nc + args.passwd_length model_dict['G'] = models.define_G(G_input_nc, args.output_nc, args.ngf, args.which_model_netG, args.n_downsample_G, args.normG, not args.no_dropout, args.init_type, args.init_gain, args.gpu_ids, args.passwd_length, use_leaky=args.use_leakyG, use_resize_conv=args.use_resize_conv) model_dict['G_nets'].append(model_dict['G']) # D_pair if args.lambda_pair_GAN > 0: model_dict['pair_D'] = models.define_D(args.input_nc * 2, args.ndf, args.which_model_netD, args.n_layers_D, args.normD, args.no_lsgan, args.init_type, args.init_gain, args.gpu_ids) model_dict['D_nets'].append(model_dict['pair_D']) # FR netFR = models.sphere20a(feature=args.feature_layer) if len(args.gpu_ids) > 0: assert (torch.cuda.is_available()) netFR.to(args.gpu_ids[0]) netFR = torch.nn.DataParallel(netFR, args.gpu_ids) netFR.module.load_state_dict(torch.load('./pretrained_models/sphere20a_20171020.pth', map_location='cpu')) model_dict['FR'] = netFR model_dict['D_nets'].append(netFR) visualizer.logger.log('model_dict') for k, v in model_dict.items(): visualizer.logger.log(k+':') if isinstance(v, list): visualizer.logger.log('list, len: ' + str(len(v))) for item in v: visualizer.logger.log(item.module.__class__.__name__, end=' ') visualizer.logger.log('') else: visualizer.logger.log(v) # -------------------- criterions -------------------- criterion_dict = { 'GAN': models.GANLoss(args.gan_mode).to(args.device), 'FR': models.AngleLoss().to(args.device), 'L1': torch.nn.L1Loss().to(args.device), 'DIS': torch.nn.CrossEntropyLoss().to(args.device), 'Feat': torch.nn.CosineEmbeddingLoss().to(args.device) if args.feature_loss == 'cos' else torch.nn.MSELoss().to(args.device) } # -------------------- optimizers -------------------- # considering separate optimizer for each network? optimizer_G_params = [{'params': model_dict['G'].parameters(), 'lr': args.lr}] if args.lambda_dis > 0: optimizer_G_params.append({'params': model_dict['Q'].parameters(), 'lr': args.lr}) optimizer_G = torch.optim.Adam(optimizer_G_params, lr=args.lr, betas=(args.beta1, 0.999), weight_decay=args.weight_decay) optimizer_D_params = [] if args.lambda_GAN > 0: optimizer_D_params.append({'params': model_dict['D'].parameters(), 'lr': args.lr}) if not args.fix_FR and args.lambda_FR > 0: optimizer_D_params.append({'params': netFR.parameters(), 'lr': args.lr * 0.1}) if args.lambda_pair_GAN > 0: optimizer_D_params.append({'params':model_dict['pair_D'].parameters(), 'lr': args.lr}) if len(optimizer_D_params): optimizer_D = torch.optim.Adam(optimizer_D_params, betas=(args.beta1, 0.999), weight_decay=args.weight_decay) else: optimizer_D = None optimizer_dict = { 'G': optimizer_G, 'D': optimizer_D } fake_pool = ImagePool(args.pool_size) recon_pool = ImagePool(args.pool_size) fake_pair_pool = ImagePool(args.pool_size) WR_pair_pool = ImagePool(args.pool_size) if args.resume: if osp.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location='cpu') args.start_epoch = checkpoint['epoch'] + 1 for name, net in model_dict.items(): if isinstance(net, list): continue if hasattr(args, 'not_resume_models') and (name in args.not_resume_models): continue if isinstance(net, torch.nn.DataParallel): net = net.module if 'state_dict_' + name in checkpoint: try: net.load_state_dict(checkpoint['state_dict_' + name]) except Exception as e: visualizer.logger.log('fail to load model '+name+' '+str(e)) else: visualizer.logger.log('model '+name+' not in checkpoints, just skip') if args.resume_optimizer: for name, optimizer in optimizer_dict.items(): if 'optimizer_' + name in checkpoint: optimizer.load_state_dict(checkpoint['optimizer_' + name]) else: visualizer.logger.log('optimizer ' + name + ' not in checkpoints, just skip') print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) gc.collect() # torch.cuda.empty_cache() torch.backends.cudnn.benchmark = True # -------------------- miscellaneous -------------------- if args.lambda_dis > 0: fixed_z, fixed_dis_target, fixed_rand_z, fixed_rand_dis_target = generate_code(args.passwd_length, args.batch_size, args.device, inv=False) print(fixed_z) else: fixed_z, fixed_rand_z = None, None # for epoch in range(args.start_epoch, args.num_epochs): # print('epoch', epoch) # # train # if args.lambda_dis > 0: # model_dict['Q'].train() # if args.lambda_GAN > 0: # model_dict['D'].train() # if args.lambda_pair_GAN > 0: # model_dict['pair_D'].train() # model_dict['G'].train() # if not args.fix_FR: # model_dict['FR'].train() # # epoch_start_time = time.time() # train(train_loader, model_dict, criterion_dict, optimizer_dict, fake_pool, recon_pool, fake_pair_pool, WR_pair_pool, visualizer, epoch, args, test_loader, fixed_z, fixed_rand_z) # epoch_time = time.time() - epoch_start_time # message = 'epoch %s total time %s\n' % (epoch, epoch_time) # visualizer.logger.log(message) # # gc.collect() # # torch.cuda.empty_cache() # # # save model # if epoch % args.save_epoch_freq == 0: # save_model(epoch, model_dict, optimizer_dict, args, iter=len(train_loader)) # # test visualization # if epoch % args.html_epoch_freq == 0: test(test_loader, model_dict, criterion_dict, visualizer, 5, args, fixed_z, fixed_rand_z, 3069)
def init_fn(worker_id): random.seed((torch.initial_seed() + worker_id) % (2 ** 32)) np.random.seed((torch.initial_seed() + worker_id) % (2 ** 32))