def test_multihead_performer_fast_attention(): """ Test Performer's Multi-head Attention module. test items: - out.shape == input.shape """ ### dimensions batch_size = 1 num_heads = 1 dim1 = 64 # sequence length model_dim = 128 nb_random_features = 50 config = PerformerConfig(hidden_size=model_dim, num_attention_heads=num_heads, nb_features=nb_random_features, kernel_estimator=softmax_plus_kernel, qr_uniform=True) module = XformerELSA(config) shape_input = (batch_size, dim1, model_dim) torch.seed() x = torch.randn(shape_input, dtype=torch.float32) encoder_x = torch.randn(shape_input, dtype=torch.float32) out = module(x, encoder_x) assert out.shape == x.shape
def reset_seed(): while True: try: torch.seed() except RuntimeError as _: logger.error("Error generating seed") else: break
def init_weights(m): """ The weights were randomly initialized with a Gaussian distribution (µ = 0, σ = 0.01) """ torch.seed(777) # for reproducibility classname = m.__class__.__name__ if classname.find('Conv') != -1 or classname.find('BatchNorm') != -1: m.weight.data.normal_(0.00, 0.01)
def seed_all(seed=None): """Set seed for numpy, random, torch.""" np.random.seed(seed) random.seed(seed) # This will seed both cpu and cuda. if seed is None: torch.seed() else: torch.manual_seed(seed)
def set_seed(seed: int) -> None: """ Seeds various random generators. Args: seed: Seed to use. """ random.seed(seed) np.random.seed(seed) torch.seed(seed) torch.cuda.manual_seed_all(seed)
def generate(self, max_len=1000): """ Inputs: max_len: max length of a generated document Outputs: the text form of a generated document """ doc = [SOS] ####################### # YOUR CODE STARTS HERE #get the data from args.vocab.i2w data = self.args.vocab.i2w #print(len(data)) #empty list to store label index position of words label = [] #loop to generate words given a starting word for i in range(0, max_len): if i == 0: #when i==0 a random tensor is generated #seed is used so every time a new tensor is generated for i==0 only #the tensor generated randomly is the index position of the word batch_size = 1 torch.seed() idx = torch.randint(4, len(data), (1, )) label.append(idx) hidden = self.model.init_hidden(batch_size) else: pass #if i==0 then only generate first word once first word is generated #the output of inference function gives a label which is the index #postion of the next word then the output of label is passed in forward #and then to inference and we get another word #the process is repeated untli max_len or </s> token is encountered output, hidden = self.model.forward(idx.reshape(1, -1), hidden) #print(hidden) p, l = self.model.inference(output) if data[l.item()] == '</s>' or len(label) == max_len: break label.append(l) idx = l #print(len(label)) #the below list comprehension #is used to get words based on the index position in label #it matches the values stored in label which are index position #so the values in label are matched wiht values in data #the word residing at that index position is returned and appended to doc list doc += [data[j.item()] for j in label] # YOUR CODE ENDS HERE ####################### doc += [EOS] return " ".join(doc)
def test_softmax_plus_kernel(): """ Test softmax_plus kernel function. test items: - output tensor shape is as expected - output tensor elements are non-negative; input tensor(s): query / key: shape = [<batch, heads>, <dim1>, qk_dim] W: random projection matrix; shape = [num_features, w_dim] output tensor(s): query_rff / key_rff: shape = [<batch, heads>, <dim1>, num_features] estimated random features; out: estimated SM(q,k) scores; shape = [<batch, heads>, <dim1>, <dim1>] Note: - does not test MSE; so W is random, not drawn as squared chunks / orthogonalized; """ ### dimensions batch_size = 2 num_heads = 2 dim1 = 128 # sequence length qk_dim = 512 nb_random_features = 50 w_dim = 512 ### =qk_dim in original paper's jax code shape_query = (batch_size, num_heads, dim1, qk_dim) shape_key = (batch_size, num_heads, dim1, qk_dim) shape_W = (nb_random_features, w_dim) ### produce random tensors q, k, W; ### torch.randn -> draw from N(0,1) torch.seed() query = torch.randn(shape_query, dtype=torch.float32) key = torch.randn(shape_key, dtype=torch.float32) W = torch.randn(shape_W, dtype=torch.float32) ### random features query_rff = softmax_plus_kernel(query, projection_matrix=W, is_query=True) key_rff = softmax_plus_kernel(key, projection_matrix=W, is_query=False) assert query_rff.shape == torch.Size(shape_query[:-1] + (nb_random_features, )) assert key_rff.shape == torch.Size(shape_key[:-1] + (nb_random_features, )) rff_scores = torch.einsum('...ik, ...jk -> ...ij', query_rff, key_rff) assert torch.all(rff_scores >= 0)
def determinize(seed: Optional[int] = None, be_deterministic: bool = True) -> None: """ Seeds more random sources and cares about the environment to be or don’t be deterministic. -------------- @Params: -> seed: Optional[int] = None The number to use as seed for manual seeding. Setting this number to a constant value highly increases the chance of the reproducibility of a training. On the contrary leaving this number empty highly increases the randomness of each training individually. -> be_deterministic: bool = True A boolean that specifies whether to switch torch.backends.cudnn into deterministic mode or not. This function imports random from the standard library and tries to import NumPy however it won’t fail if NumPy is not installed. """ import random if be_deterministic: torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False if seed is None: seed = 0 else: torch.backends.cudnn.deterministic = False torch.backends.cudnn.benchmark = True if seed is None: if torch.cuda.is_available(): torch.cuda.seed_all() torch.seed() try: import numpy as np np.random.seed() except ImportError: pass random.seed() else: if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) torch.manual_seed(seed) try: import numpy as np np.random.seed(seed) except ImportError: pass random.seed(seed)
def randn_seed(*shape, device=None, seed=0): pre_seed = torch.seed() torch.random.manual_seed(seed) ret = torch.randn(*shape).to(device) torch.random.manual_seed(pre_seed) return ret
def __init__(self, dim1, dim2, numDataPoints, numLabels, numColorChannels=3, seedToUse=None): self.dim1 = dim1 self.dim2 = dim2 self.numDataPoints = numDataPoints self.seed = seedToUse self.numLabels = numLabels if self.seed: torch.seed(self.seed) self.images = torch.randn(numDataPoints, numColorChannels, dim1, dim2) self.labels = torch.randint(0, numLabels, (numDataPoints, ))
def test_create_and_check_xformer_for_lm(): """Check Xformer model for language modeling """ ### dimensions batch_size = 13 seq_length = 20 num_labels = 3 # use same default setting as BERT test in HuggingFace; config = PerformerConfig( vocab_size=100, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=40, hidden_act=None, hidden_dropout_prob=0.1, max_position_embeddings=512, type_vocab_size=16, initializer_range=0.02, nb_features=20, kernel_estimator=softmax_plus_kernel, qr_uniform=True, ) torch_device = "cuda" if torch.cuda.is_available() else "cpu" torch.seed() input_ids = torch.randint(0, config.vocab_size - 1, (batch_size, seq_length)).to(torch_device) token_type_ids = torch.randint(0, config.type_vocab_size - 1, (batch_size, seq_length)).to(torch_device) labels = torch.randint(0, num_labels, (batch_size, seq_length)).to(torch_device) # model = XformerModelForLM(config).to(torch_device) model = FairseqWrapperXformerModelForLMDecoder(None, config).to(torch_device) model.eval() results = model( input_ids=input_ids, token_type_ids=token_type_ids, labels=labels, ) assert results.logits.shape == (batch_size, seq_length, config.vocab_size)
def main(): torch.seed() dataset_train = Balls_CF_Detection("data\\train\\train\\") dataset_test = Balls_CF_Detection("data\\train\\val\\") train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=test_batch_size) model = Net().to("cuda") optimizer = optim.Adam(model.parameters(), lr=0.0025) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1) for epoch in range(1, epochs): train(model, train_loader, optimizer, epoch) test(model, test_loader) scheduler.step()
def set_seed(*args, **kwargs): if 'seed' in kwargs.keys(): torch.manual_seed(kwargs['seed']) func(*args, **kwargs) if 'seed' in kwargs.keys(): torch.manual_seed(torch.seed()) return func(*args, **kwargs)
def _init_worker_process(parent_logging_config, out_dir: Optional[str]): """ Makes sure each worker process uses a different log file (to overcome file synchronization issues) """ id = current_process().name.split("-")[-1] log.configure_for_subprocess(parent_logging_config, id) # set the output directory if out_dir is not None: set_output_directory(out_dir) # freshly initialize the PRNG such that different processes use different randomness random.seed() np.random.seed() if torch is not None: torch.seed() torch.cuda.seed_all() torch_initialize()
def tng_dataloader(self): print('tng data loader called') # loader = DataLoader( # dataset=self.training_set, # batch_sampler=self.train_sampler, # num_workers=1 # ) if torch.cuda.is_available(): torch.cuda.seed() else: torch.seed() loader = DataLoader(dataset=self.training_set, batch_size=self.batch_size, num_workers=4, shuffle=True, pin_memory=True, drop_last=True) return loader
def fit_one(self, Xc, Xe, y, idx): torch.seed() dataset = TensorDataset(Xc, Xe, y) loader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True) if self.models is not None and len(self.models) == self.num_ensembles: model = deepcopy(self.models[idx]) else: model = BaseNet(self.num_cont, self.num_enum, self.num_out, noise_lb=1e-4, num_uniqs=None if self.num_enum == 0 else self.conf['num_uniqs'], num_layers=self.num_layers, num_hiddens=self.num_hiddens, output_noise=self.output_noise, rand_prior=self.rand_prior) opt = torch.optim.Adam(model.parameters(), lr=self.lr) model.train() for epoch in range(self.num_epochs): epoch_loss = 0 for bxc, bxe, by in loader: py = model(bxc, bxe) data_loss = self.loss(py, by) reg_loss = 0. for p in model.parameters(): reg_loss += self.l1 * p.abs().sum() / (y.shape[0] * y.shape[1]) loss = data_loss + reg_loss opt.zero_grad() loss.backward() opt.step() # XXX: Adversarial training not used epoch_loss += data_loss * bxc.shape[0] if epoch % self.print_every == 0: if self.verbose: print("Epoch %d, %s loss = %g" % (epoch, self.loss_name, epoch_loss / Xc.shape[0]), flush=True) return model
def fit_one(self, X: pd.DataFrame, y: pd.DataFrame, idx: int): torch.seed() self.X, self.y = self.data_processing(X=X, y=y) assert self.in_splits is not None dataset = TensorDataset(self.X, self.y) dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True) if self.models is not None and len(self.models) == self.num_ensemble: model = deepcopy(self.models[idx]) else: model = ConditionalNet(in_splits=self.in_splits, num_out=self.num_out, output_noise=self.output_noise) opt = torch.optim.Adam(model.parameters(), lr=self.lr) model.train() for epoch in range(self.num_epoch): epoch_loss = 0 for bx, by in dataloader: py = model(bx) data_loss = self.loss(py, by) reg_loss = 0. for p in model.parameters(): reg_loss += self.l1 * p.abs().sum() / (y.shape[0] * y.shape[1]) loss = data_loss + reg_loss opt.zero_grad() loss.backward() opt.step() # XXX: Adversarial training not used epoch_loss += data_loss * bx.shape[0] if epoch % self.print_every == 0: if self.verbose: print("Epoch %d, %s loss = %g" % (epoch, self.loss_name, epoch_loss / X.shape[0]), flush=True) return model
def main(): parser = argparse.ArgumentParser() args = parser.parse_args() args.device = torch.device('cpu') args.test_batch_size = 20 # args.test_file = 'normal.npy' args.test_file = 'anormal.npy' rris_np = np.load(args.test_file) all_set = TensorDataset(torch.as_tensor(rris_np).float()) train_len = int(len(all_set) * 0.8) val_len = int(len(all_set) * 0.1) test_len = len(all_set) - train_len - val_len split_shape = (train_len, val_len, test_len) torch.manual_seed(0) train_set, val_set, test_set = random_split(all_set, split_shape) torch.seed() # disable manual seed loader = DataLoader(test_set, batch_size=len(test_set), shuffle=True) apply_model(loader, args)
def __init__(self, model_dir, hp=None, sigma_rec=None, dt=None): """ Initializing the model with information from hp Args: model_dir: string, directory of the model hp: a dictionary or None sigma_rec: if not None, overwrite the sigma_rec passed by hp """ ## Reset tensorflow graphs #tf.reset_default_graph() # must be in the beginning if hp is None: hp = tools.load_hp(model_dir) if hp is None: raise ValueError( 'No hp found for model_dir {:s}'.format(model_dir)) torch.seed(hp['seed']) self.rng = np.random.RandomState(hp['seed']) if sigma_rec is not None: print('Overwrite sigma_rec with {:0.3f}'.format(sigma_rec)) hp['sigma_rec'] = sigma_rec if dt is not None: print('Overwrite original dt with {:0.1f}'.format(dt)) hp['dt'] = dt hp['alpha'] = 1.0 * hp['dt'] / hp['tau'] # Input, target output, and cost mask # Shape: [Time, Batch, Num_units] if hp['in_type'] != 'normal': raise ValueError('Only support in_type ' + hp['in_type']) self._build(hp) self.model_dir = model_dir self.hp = hp
def set_seed(seed, logger=None): """Set python random seed, pytorch manual seed, and enable deterministic CUDNN seed: True, None, int - if None (or falsy), do not set a seed - if True, set a manual seed using torch.seed() - if type(seed) is int, set manual seed """ if not seed: return None if seed is True: seed = torch.seed() random.seed(seed) torch.manual_seed(seed) torch.backends.cudnn.deterministic = True if logger: logger.info("Manual seed %s", seed) logger.warning("Seed training is enabled with deterministic CUDNN.") logger.warning("Model training will slow down considerably.") logger.warning("Restarting from checkpoints is undefined behaviour.") return seed
def main(): parse_args() seed = torch.seed() % 20 log_dir = cfg.TENSORBOARD_DIR + cfg.MODEL.NAME + "/" + str(seed) logger = Logger(log_dir) model_dir = cfg.MODEL.DIR + cfg.MODEL.NAME + "/" + str(seed) if os.path.isdir(model_dir) != True: os.makedirs(model_dir) model = Network(cfg) dump_input = torch.rand((1, cfg.DATASET.NUM_JOINTS, cfg.DEFAULT_FRAMES)) logger.add_graph(model, (dump_input, )) # Log Model Architecture #Toggle to get model summary summary(model, dump_input.shape[1:], batch_size=32, device="cpu") trainer = ResTCN_trainer(model) optimizer = trainer.optimizer print("------STARTING TRAINING-------") for epoch in range(cfg.EPOCHS): training_log = trainer.train() print("-" * 50) print("Epoch: {} & Loss: {}".format(epoch, training_log["Loss"])) print("-" * 50) logger.log_scalars(training_log, logger.step) logger.step += 1 if epoch % cfg.SAVE_FREQUENCY == 0: perf_indicator = trainer.cal_accuracy('train') save_checkpoint( { 'epoch': epoch + 1, 'model': cfg.MODEL.NAME + str(seed), 'state_dict': model.state_dict(), 'perf': perf_indicator, 'optimizer': optimizer.state_dict(), }, output_dir=model_dir)
def main(): """ Training """ opt = Options().parse() opt.print_freq = opt.batchsize seed(opt.manualseed) print("Seed:", str(torch.seed())) if opt.phase == "inference": opt.batchsize=1 data = load_data(opt) model = load_model(opt, data) if opt.phase == "inference": model.inference() else: if opt.path_to_weights: model.test() else: train_start = time.time() model.train() train_time = time.time() - train_start print (f'Train time: {train_time} secs')
grid_img = torchvision.utils.make_grid(images, padding=2, normalize=True) # Save images torchvision.utils.save_image(grid_img, f'{save_path}/iter{batch_iters}.png') logger.add_image("fixed_inputs", grid_img, batch_iters) # Checkpoint model try: torch.save({ 'epoch': epoch, 'batch_iters': batch_iters, 'optimG_params': optimG.state_dict(), 'optimD_params': optimD.state_dict(), 'gen_params': netG.state_dict(), 'disc_params': netD.state_dict(), 'seed': torch.seed(), }, f'{save_path}/iter{batch_iters}.save') except: print("Error checkpointing model") # Print progress msg print(f'Epoch: {epoch}/{num_epochs}\tbatch_prog: {batch_num}/{len(dataloader)}\titer: {batch_iters}') # Increment batch_iters batch_iters += 1
import os os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' #****************************** Data ****************************** # Generate 1000 pairs of 14x14 size pictures train_input, train_target, train_classes, test_input, test_target, test_classes = generate_pair_sets(1000) # Normalization of the inputs mean, std = train_input.mean(), train_input.std() train_input.sub_(mean).div_(std) test_input.sub_(mean).div_(std) #****************************** Parameters ****************************** torch.seed() nb_hidden = 100 mini_batch_size = 100 optimizer = 'SGD' learning_rate = 1e-1 weight_sharing = True auxiliary_loss = True #****************************** Models error mean and standard deviation ****************************** ErrorsMean = [] ErrorsSD = [] #****************************** FNN model with weight sharing and an auxiliary loss ****************************** print("Evaluation of the FNN model with weight sharing and an auxiliary loss")
def load_or_create_experiment(force_load=False): matplotlib.use('Agg') # detect debugger gettrace = getattr(sys, 'gettrace', None) DEBUG = gettrace is not None and gettrace() if DEBUG: print('DEBUG MODE - parallelism disabled') # noinspection PyTypeChecker parser = argparse.ArgumentParser( prog='', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-g', '--gpuid', type=int, default=0, help='CUDA device id to use') parser.add_argument('-d', '--dataset', type=str, required=True, choices=[ 'mnist', 'colon_cancer', 'breast_cancer', 'camelyon', 'messidor' ], help='Select dataset') parser.add_argument( '-n', '--new_experiment', default=False, action='store_true', help= 'Overwrite any saved state and start a new experiment (saved checkpoint will be lost)' ) parser.add_argument( '-l', '--load_state', metavar='STATE_FILE', type=str, default=None, required=force_load, help= 'Continue training from specified state file (saved checkpoint will be lost)' ) parser.add_argument('-c', '--run_name_prefix', type=str, default=None, help='Prefix for the experiment name') parser.add_argument('-w', '--weighting_attention', default=False, action='store_true') parser.add_argument('--deterministic', type=str2bool, default=True, help='Use deterministic mode (slightly slower)') for param_name, param_type in Settings.as_params(): parser.add_argument('--{}'.format(param_name), type=param_type) args = parser.parse_args() if args.new_experiment and args.load_state: print( 'You cannot load state and start a new experiment at the same time' ) exit(-1) checkpoint_file_prefix = '{}.{}.{}'.format(CHECKPOINT_PREFIX, args.dataset, getpass.getuser()) checkpoint_files = get_state_path_for_prefix(checkpoint_file_prefix) load_state_path = None if args.load_state: load_state_path = args.load_state elif not args.new_experiment and len(checkpoint_files) > 0: if len(checkpoint_files) > 1: print('Multiple checkpoint files detected: {}'.format( checkpoint_files)) print( 'Leave only one and remove all others to continue or specify which one to use with --load_state' ) exit(1) load_state_path = checkpoint_files[0] config = None if load_state_path: print('Loading state from: {}'.format(load_state_path)) if args.deterministic: print( 'WARNING: resuming from saved state is not fully deterministic!' ) config = load_config_from_train_state(load_state_path) if config is None: print('Using default base settings for', args.dataset) config = get_settings(args.dataset) os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpuid) print('CUDA available:', torch.cuda.is_available()) config = config.new_from_params(args) print(config) if config.random_seed_value is not None: seed = config.random_seed_value elif config.random_seed_id is not None and config.random_seed_id >= 0: seed = config.random_seed_presets[config.random_seed_id] else: seed = torch.seed() torch.manual_seed(seed) numpy.random.seed(seed) random.seed(seed) print('Seed: {}'.format(seed)) if args.deterministic: # Make deterministic. os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8' torch.set_deterministic(True) print('Deterministic mode enabled.') else: print('WARNING: proceeding with non-deterministic mode.') return args, config, seed, DEBUG, load_state_path, checkpoint_file_prefix
# flake8: noqa import torch # seed reveal_type(torch.seed()) # E: int # manual_seed reveal_type(torch.manual_seed(3)) # E: torch._C.Generator # initial_seed reveal_type(torch.initial_seed()) # E: int # get_rng_state reveal_type(torch.get_rng_state()) # E: {Tensor} # bernoulli reveal_type(torch.bernoulli(torch.empty(3, 3).uniform_(0, 1))) # E: {Tensor} # multinomial weights = torch.tensor([0, 10, 3, 0], dtype=torch.float) reveal_type(torch.multinomial(weights, 2)) # E: {Tensor} # normal reveal_type(torch.normal(2, 3, size=(1, 4))) # E: {Tensor} # poisson reveal_type(torch.poisson(torch.rand(4, 4) * 5)) # E: {Tensor} # rand reveal_type(torch.rand(4)) # E: {Tensor} reveal_type(torch.rand(2, 3)) # E: {Tensor}
def main(): print(torch.seed()) # Define the atom / bond features of interest. atom_features = [ AtomicElement(["C", "O", "H"]), AtomConnectivity(), ] bond_features = [ BondOrder(), ] # Compute the total length of the input atomic feature vector n_atom_features = sum(len(feature) for feature in atom_features) # Load in the training and test data training_smiles = ["CO", "CCO", "CCCO", "CCCCO"] training_data = DGLMoleculeDataset.from_smiles( training_smiles, atom_features, bond_features, label_function, ) training_loader = DGLMoleculeDataLoader(training_data, batch_size=len(training_smiles), shuffle=False) test_smiles = [ "CCCCCCCCCO", ] test_loader = DGLMoleculeDataLoader( DGLMoleculeDataset.from_smiles( test_smiles, atom_features, bond_features, label_function, ), batch_size=len(test_smiles), shuffle=False, ) # Define the model. n_gcn_layers = 5 n_gcn_hidden_features = 128 n_am1_layers = 2 n_am1_hidden_features = 64 learning_rate = 0.001 model = DGLMoleculeLightningModel( convolution_module=ConvolutionModule( architecture="SAGEConv", in_feats=n_atom_features, hidden_feats=[n_gcn_hidden_features] * n_gcn_layers, ), readout_modules={ # The keys of the readout modules should correspond to keys in the # label dictionary. "am1-charges": ReadoutModule( pooling_layer=PoolAtomFeatures(), readout_layers=SequentialLayers( in_feats=n_gcn_hidden_features, hidden_feats=[n_am1_hidden_features] * n_am1_layers + [2], activation=["ReLU"] * n_am1_layers + ["Identity"], ), postprocess_layer=ComputePartialCharges(), ) }, learning_rate=learning_rate, ) print(model) # Train the model n_epochs = 100 n_gpus = 0 if not torch.cuda.is_available() else 1 print(f"Using {n_gpus} GPUs") trainer = pl.Trainer(gpus=n_gpus, min_epochs=n_epochs, max_epochs=n_epochs) trainer.fit(model, train_dataloaders=training_loader) trainer.test(model, test_dataloaders=test_loader)
def __init__(self, **net_params): super(FamilyTreeNet, self).__init__() # Merge default params with overrides and make them properties net_params = {**net_defaults, **net_params} for key, val in net_params.items(): setattr(self, key, val) self.device, self.torchfp, self.zeros_fn = util.init_torch(self.device, self.torchfp) if self.device.type == 'cuda': print('Using CUDA') else: print('Using CPU') if self.seed is None: self.seed = torch.seed() else: torch.manual_seed(self.seed) # Get training tensors self.n_trees = len(self.trees) assert self.n_trees > 0, 'Must learn at least one tree' self.person1_mat = self.zeros_fn((0, 0)) self.person2_mat = self.zeros_fn((0, 0)) self.p2_tree_mask = self.zeros_fn((0, 0)) self.rel_mat = self.zeros_fn((0, 12 if self.share_rel_units else 0)) self.full_tree = familytree.FamilyTree([], []) self.each_tree = [] for i, tree_name in enumerate(self.trees): this_tree = familytree.get_tree(name=tree_name) self.full_tree += this_tree self.each_tree.append(this_tree) this_p1, this_rels, this_p2 = this_tree.get_io_mats(zeros_fn=self.zeros_fn, cat_fn=torch.cat) self.person1_mat = torch.block_diag(self.person1_mat, this_p1) self.person2_mat = torch.block_diag(self.person2_mat, this_p2) self.p2_tree_mask = torch.block_diag(self.p2_tree_mask, torch.ones_like(this_p2)) if self.share_rel_units: self.rel_mat = torch.cat((self.rel_mat, this_rels), 0) else: self.rel_mat = torch.block_diag(self.rel_mat, this_rels) if i == 0: self.n_inputs_first, self.n_people_first = this_p1.shape self.n_inputs, self.person1_units = self.person1_mat.shape self.rel_units = self.rel_mat.shape[1] self.person2_units = self.person2_mat.shape[1] # Make layers def make_layer(in_size, out_size): return nn.Linear(in_size, out_size, bias=self.use_biases).to(self.device) self.person1_to_repr = make_layer(self.person1_units, self.person1_repr_units) self.rel_to_repr = make_layer(self.rel_units, self.rel_repr_units) total_repr_units = self.person1_repr_units + self.rel_repr_units self.repr_to_hidden = make_layer(total_repr_units, self.hidden_units) if self.use_preoutput: self.hidden_to_preoutput = make_layer(self.hidden_units, self.preoutput_units) self.preoutput_to_person2 = make_layer(self.preoutput_units, self.person2_units) else: self.hidden_to_person2 = make_layer(self.hidden_units, self.person2_units) # Initialize with small random weights def init_uniform(param, offset, prange): a = offset - prange/2 b = offset + prange/2 nn.init.uniform_(param.data, a=a, b=b) def init_normal(param, offset, prange): nn.init.normal_(param.data, mean=offset, std=prange/2) def init_default(*_): pass init_fns = {'default': init_default, 'uniform': init_uniform, 'normal': init_normal} with torch.no_grad(): for layer in self.children(): try: init_fns[self.weight_init_type](layer.weight, self.weight_init_offset, self.weight_init_range) except KeyError: raise ValueError('Weight initialization type not recognized') if layer.bias is not None: try: init_fns[self.bias_init_type](layer.bias, self.bias_init_offset, self.bias_init_range) except KeyError: raise ValueError('Bias initialization type notn recognized') # For simplicity, instead of using the "liberal" loss function described in the paper, make the targets # 0.1 (for false) and 0.9 (for true) and use regular mean squared error. if self.act_fn == torch.sigmoid: self.person2_train_target = (1-self.target_offset) * self.person2_mat + self.target_offset/2 else: self.person2_train_target = self.person2_mat self.criterion = self.loss_fn(reduction=self.loss_reduction)
def calculate_metrics(nets, args, step, mode): print('Calculating evaluation metrics...') assert mode in ['latent', 'reference'] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch.manual_seed(args.eval_seed) domains = os.listdir(args.val_img_dir) domains.sort() num_domains = len(domains) print('Number of domains: %d' % num_domains) lpips_dict = OrderedDict() for trg_idx, trg_domain in enumerate(domains): src_domains = [x for x in domains if x != trg_domain] if mode == 'reference': path_ref = os.path.join(args.val_img_dir, trg_domain) loader_ref = get_eval_loader(root=path_ref, img_size=args.img_size, batch_size=args.val_batch_size, imagenet_normalize=False, drop_last=True) for _, src_domain in enumerate(src_domains): path_src = os.path.join(args.val_img_dir, src_domain) loader_src = get_eval_loader(root=path_src, img_size=args.img_size, batch_size=args.val_batch_size, imagenet_normalize=False) task = '%s2%s' % (src_domain, trg_domain) path_fake = os.path.join(args.eval_dir, task) shutil.rmtree(path_fake, ignore_errors=True) os.makedirs(path_fake) lpips_values = [] print('Generating images and calculating LPIPS for %s...' % task) for i, x_src in enumerate(tqdm(loader_src, total=len(loader_src))): N = x_src.size(0) x_src = x_src.to(device) y_trg = torch.tensor([trg_idx] * N).to(device) masks = nets.fan.get_heatmap(x_src) if args.w_hpf > 0 else None # generate 10 outputs from the same input group_of_images = [] for j in range(args.num_outs_per_domain): if mode == 'latent': z_trg = torch.randn(N, args.latent_dim).to(device) s_trg = nets.mapping_network(z_trg, y_trg) else: try: x_ref = next(iter_ref).to(device) except: iter_ref = iter(loader_ref) x_ref = next(iter_ref).to(device) if x_ref.size(0) > N: x_ref = x_ref[:N] s_trg = nets.style_encoder(x_ref, y_trg) x_fake = nets.generator(x_src, s_trg, masks=masks) group_of_images.append(x_fake) # save generated images to calculate FID later for k in range(N): filename = os.path.join( path_fake, '%.4i_%.2i.png' % (i*args.val_batch_size+(k+1), j+1)) utils.save_image(x_fake[k], ncol=1, filename=filename) lpips_value = calculate_lpips_given_images(group_of_images) lpips_values.append(lpips_value) # calculate LPIPS for each task (e.g. cat2dog, dog2cat) lpips_mean = np.array(lpips_values).mean() lpips_dict['LPIPS_%s/%s' % (mode, task)] = lpips_mean lpips_t_values = [] print('Generating images and calculating LPIPS^t for %s...' % task) for i in range(len(loader_src)): iter_src = iter(loader_src) N = args.val_batch_size y_trg = torch.tensor([trg_idx] * N).to(device) if mode == 'latent': z_trg = torch.randn(N, args.latent_dim).to(device) s_trg = nets.mapping_network(z_trg, y_trg) else: try: x_ref = next(iter_ref2).to(device) except: iter_ref2 = iter(loader_ref) x_ref = next(iter_ref2).to(device) if x_ref.size(0) > N: x_ref = x_ref[:N] s_trg = nets.style_encoder(x_ref, y_trg) # generate 10 outputs from the same input group_of_images = [] for j in range(args.num_outs_per_domain): x_src = next(iter_src).to(device) x_src = x_src.to(device) masks = nets.fan.get_heatmap(x_src) if args.w_hpf > 0 else None x_fake = nets.generator(x_src, s_trg, masks=masks) group_of_images.append(x_fake) lpips_t_value = calculate_lpips_given_images(group_of_images) lpips_t_values.append(lpips_t_value) del iter_src if mode == 'reference': del iter_ref2 lpips_t_mean = np.array(lpips_t_values).mean() lpips_dict['LPIPS^t_%s/%s' % (mode, task)] = lpips_t_mean # delete dataloaders del loader_src if mode == 'reference': del loader_ref del iter_ref # calculate the average LPIPS for all tasks lpips_mean = 0 for _, value in lpips_dict.items(): lpips_mean += value / len(lpips_dict) lpips_dict['LPIPS_%s/mean' % mode] = lpips_mean # report LPIPS values filename = os.path.join(args.eval_dir, 'LPIPS_%.5i_%s.json' % (step, mode)) utils.save_json(lpips_dict, filename) # calculate and report fid values calculate_fid_for_all_tasks(args, domains, step=step, mode=mode) torch.seed()
# Post-processing: # ---------------- config.dev = torch.device(config.dev) if config.pin_memory and config.dev != torch.device('cuda'): config = None raise ValueError( '--pin_memory should be set to `False` if not using CUDA.') if config.random_seed == -1: np.random.seed(None) if config.n_gpu > 1: torch.cuda.seed_all() # multi-GPU, ignored if CPU else: torch.seed() # single-processor, CUDA or CPU elif 0 <= config.random_seed < 2**32: np.random.seed(config.random_seed) if config.n_gpu > 1: torch.cuda.manual_seed_all( config.random_seed) # multi-GPU, ignored if CPU else: torch.manual_seed( config.random_seed) # single-processor, CUDA or CPU torch.backends.cudnn.deterministic = True if config.enable_cudnn_autotuner: config.enable_cudnn_autotuner = False print( 'To maintain determinism, the CuDNN auto-tuner has been disabled...' ) else: