def _worker_loop(dataset, index_queue, data_queue, collate_fn, seed, init_fn, worker_id): global _use_shared_memory _use_shared_memory = True # Intialize C side signal handlers for SIGBUS and SIGSEGV. Python signal # module's handlers are executed after Python returns from C low-level # handlers, likely when the same fatal signal happened again already. # https://docs.python.org/3/library/signal.html Sec. 18.8.1.1 _set_worker_signal_handlers() torch.set_num_threads(1) torch.manual_seed(seed) np.random.seed(seed) if init_fn is not None: init_fn(worker_id) while True: r = index_queue.get() if r is None: break idx, batch_indices = r try: samples = collate_fn([dataset[i] for i in batch_indices]) except Exception: data_queue.put((idx, ExceptionWrapper(sys.exc_info()))) else: data_queue.put((idx, samples))
def test_word_embed_lookup_d2_1(): """ 1 point(s) """ global test_sent, gold, word_to_ix, vocab torch.manual_seed(1) embedder = VanillaWordEmbeddingLookup(word_to_ix, TEST_EMBEDDING_DIM) embeds = embedder(test_sent) assert len(embeds) == len(test_sent) assert isinstance(embeds, list) assert isinstance(embeds[0], ag.Variable) assert embeds[0].size() == (1, TEST_EMBEDDING_DIM) embeds_list = make_list(embeds) true = ([-1.8661, 1.4146, -1.8781, -0.4674], [-0.9596, 0.5489, -0.9901, -0.3826], [0.5237, 0.0004, -1.2039, 3.5283], [0.3056, 1.0386, 0.5206, -0.5006], [0.4434, 0.5848, 0.8407, 0.5510], [-0.7576, 0.4215, -0.4827, -1.1198], [0.3056, 1.0386, 0.5206, -0.5006], [-2.9718, 1.7070, -0.4305, -2.2820], [0.3863, 0.9124, -0.8410, 1.2282] ) pairs = zip(embeds_list, true) check_tensor_correctness(pairs)
def test_bilstm_word_embeds_d4_1(): """ 1 point(s) / 0.5 point(s) (section dependent) """ global test_sent, word_to_ix, vocab torch.manual_seed(1) embedder = BiLSTMWordEmbeddingLookup(word_to_ix, TEST_EMBEDDING_DIM, TEST_EMBEDDING_DIM, 1, 0.0) embeds = embedder(test_sent) assert len(embeds) == len(test_sent) assert isinstance(embeds, list) assert isinstance(embeds[0], ag.Variable) assert embeds[0].size() == (1, TEST_EMBEDDING_DIM) embeds_list = make_list(embeds) true = ( [ .4916, -.0168, .1719, .6615 ], [ .3756, -.0610, .1851, .2604 ], [ -.2655, -.1289, .1009, -.0016 ], [ -.1070, -.3971, .2414, -.2588 ], [ -.1717, -.4475, .2739, -.0465 ], [ 0.0684, -0.2586, 0.2123, -0.1832 ], [ -0.0775, -0.4308, 0.1844, -0.1146 ], [ 0.4366, -0.0507, 0.1018, 0.4015 ], [ -0.1265, -0.2192, 0.0481, 0.1551 ]) pairs = zip(embeds_list, true) check_tensor_correctness(pairs)
def setUp(self, size=(2, 5), batch=3, dtype=torch.float64, device=None, seed=None, mu=None, cov=None, A=None, b=None): '''Test the correctness of batch implementation of mean(). This function will stack `[1 * mu, 2 * mu, ..., batch * mu]`. Then, it will see whether the batch output is accurate or not. Args: size: Tuple size of matrix A. batch: The batch size > 0. dtype: data type. device: In which device. seed: Seed for the random number generator. mu: To test a specific mean mu. cov: To test a specific covariance matrix. A: To test a specific A matrix. b: To test a specific bias b. ''' if seed is not None: torch.manual_seed(seed) if A is None: A = torch.rand(size, dtype=dtype, device=device) if b is None: b = torch.rand(size[0], dtype=dtype, device=device) if mu is None: mu = torch.rand(size[1], dtype=dtype, device=device) if cov is None: cov = rand.definite(size[1], dtype=dtype, device=device, positive=True, semi=False, norm=10**2) self.A = A self.b = b var = torch.diag(cov) self.batch_mean = torch.stack([(i + 1) * mu for i in range(batch)]) self.batch_cov = torch.stack([(i + 1) * cov for i in range(batch)]) self.batch_var = torch.stack([(i + 1) * var for i in range(batch)])
def main(): args = parser.parse_args() if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)
def set_seed(seed): """Sets random seed everywhere.""" torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) random.seed(seed) np.random.seed(seed)
def setUp(self, length=3, factor=10, count=1000000, seed=None, dtype=torch.float64, device=None): '''Set up the test values. Args: length: Size of the vector. factor: To multiply the mean and standard deviation. count: Number of samples for Monte-Carlo estimation. seed: Seed for the random number generator. dtype: The data type. device: In which device. ''' if seed is not None: torch.manual_seed(seed) # variables self.A = torch.randn(length, length, dtype=dtype, device=device) self.b = torch.randn(length, dtype=dtype, device=device) # input mean and covariance self.mu = torch.randn(length, dtype=dtype, device=device) * factor self.cov = rand.definite(length, dtype=dtype, device=device, positive=True, semi=False, norm=factor**2) # Monte-Carlo estimation of the output mean and variance normal = torch.distributions.MultivariateNormal(self.mu, self.cov) samples = normal.sample((count,)) out_samples = samples.matmul(self.A.t()) + self.b self.mc_mu = torch.mean(out_samples, dim=0) self.mc_var = torch.var(out_samples, dim=0) self.mc_cov = cov(out_samples)
def __init__(self, input_size, output_size, seed=1): super(NN, self).__init__() torch.manual_seed(seed) self.input_size = input_size self.output_size = output_size h_size = 50 # self.net = nn.Sequential( # nn.Linear(self.input_size,h_size), # nn.ReLU(), # nn.Linear(h_size,self.output_size) # ) # self.net = nn.Sequential( # nn.Linear(self.input_size,h_size), # # nn.Tanh(), # # nn.Linear(h_size,h_size), # nn.Tanh(), # nn.Linear(h_size,self.output_size), # # nn.Tanh(), # # nn.Linear(h_size,self.output_size) # ) self.net = nn.Sequential( nn.Linear(self.input_size,h_size), # nn.Tanh(), # nn.Linear(h_size,h_size), nn.LeakyReLU(), nn.Linear(h_size,h_size), nn.LeakyReLU(), nn.Linear(h_size,self.output_size) )
def prepare_environment(params: Params): """ Sets random seeds for reproducible experiments. This may not work as expected if you use this from within a python project in which you have already imported Pytorch. If you use the scripts/run_model.py entry point to training models with this library, your experiments should be reasonably reproducible. If you are using this from your own project, you will want to call this function before importing Pytorch. Complete determinism is very difficult to achieve with libraries doing optimized linear algebra due to massively parallel execution, which is exacerbated by using GPUs. Parameters ---------- params: Params object or dict, required. A ``Params`` object or dict holding the json parameters. """ seed = params.pop_int("random_seed", 13370) numpy_seed = params.pop_int("numpy_seed", 1337) torch_seed = params.pop_int("pytorch_seed", 133) if seed is not None: random.seed(seed) if numpy_seed is not None: numpy.random.seed(numpy_seed) if torch_seed is not None: torch.manual_seed(torch_seed) # Seed all GPUs with the same seed if available. if torch.cuda.is_available(): torch.cuda.manual_seed_all(torch_seed) log_pytorch_version_info()
def init_platform(): config_file = cfg_from_file('config.yml') default_file = cfg_from_file('default.yml') logger.info(pprint.pformat(default_file)) logger.info(pprint.pformat(config_file)) merge_a_into_b(config_file, config) merge_a_into_b(default_file, default) default.best_model_path = '' if default.gpu == '': default.gpu = None if default.gpu is not None: os.environ["CUDA_VISIBLE_DEVICES"] = default.gpu default.distributed = default.world_size > 1 if default.distributed: dist.init_process_group(backend=default.dist_backend, init_method=default.dist_url, world_size=default.world_size) default.lr_epoch = [int(ep) for ep in default.lr_step.split(',')] if default.seed is not None: seed = default.seed np.random.seed(seed) random.seed(seed) torch.manual_seed(seed) cudnn.deterministic = True
def train_model(args): """Load the data, train the model, test the model, export / save the model """ torch.manual_seed(args.seed) # Open our dataset train_loader, test_loader = data_utils.load_data(args.test_split, args.batch_size) # Create the model net = model.SonarDNN().double() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, nesterov=False) # Train / Test the model for epoch in range(1, args.epochs + 1): train(net, train_loader, optimizer, epoch) test(net, test_loader) # Export the trained model torch.save(net.state_dict(), args.model_name) if args.model_dir: # Save the model to GCS data_utils.save_model(args.model_dir, args.model_name)
def main(argv): (opt, args) = parser.parse_args(argv) print(opt) config = get_config(opt.config) if opt.manualSeed is None: opt.manualSeed = random.randint(1, 10000) print('Random Seed: ', opt.manualSeed) random.seed(opt.manualSeed) torch.manual_seed(opt.manualSeed) if opt.cuda: torch.cuda.manual_seed_all(opt.manualSeed) torch.cuda.set_device(opt.gpu_ids) cudnn.benchmark = True # loading data set transform = transforms.Compose([transforms.Resize((config['fineSizeH'], config['fineSizeW'])), transforms.ToTensor()]) dataset = Aligned_Dataset(config['dataPath'], direction='AtoB', transform=transform) train_loader = torch.utils.data.DataLoader(dataset, batch_size=config['batchSize'], shuffle=True, num_workers=int(4)) # setup model trainer = trainer_gan(config, train_loader, resume_epoch=opt.resume_epoch) if opt.cuda: trainer.cuda() if opt.resume_epoch: trainer.resume() # training for epoch in range(opt.resume_epoch, config['nepoch']): trainer.train(epoch) trainer.update_learning_rate(epoch) if epoch % 10 == 0: trainer.save(epoch)
def seed_everything(seed=1029): random.seed(seed) os.environ['PYTHONHASHSEED'] = str(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.backends.cudnn.deterministic = True
def __init__(self, hyper_config, seed=1): super(VAE, self).__init__() torch.manual_seed(seed) self.z_size = hyper_config['z_size'] self.x_size = hyper_config['x_size'] self.act_func = hyper_config['act_func'] self.q_dist = hyper_config['q_dist'](self, hyper_config=hyper_config) # for aaa in self.q_dist.parameters(): # # print (aaa) # print (aaa.size()) # # fasdfs if torch.cuda.is_available(): self.dtype = torch.cuda.FloatTensor self.q_dist.cuda() else: self.dtype = torch.FloatTensor #Decoder self.decoder_weights = [] for i in range(len(hyper_config['decoder_arch'])): self.decoder_weights.append(nn.Linear(hyper_config['decoder_arch'][i][0], hyper_config['decoder_arch'][i][1])) count =1 for i in range(len(self.decoder_weights)): self.add_module(str(count), self.decoder_weights[i]) count+=1
def __init__(self, seed=1): super(NN_drop, self).__init__() torch.manual_seed(seed) self.input_size = 1 self.output_size = 1 h_size = 50 # #this samples a mask for each datapoint in the batch # self.net = nn.Sequential( # nn.Linear(self.input_size,h_size), # nn.ReLU(), # nn.Dropout(p=0.5), # nn.Linear(h_size,self.output_size) # ) #want to keep mask constant for batch self.l1 = nn.Linear(self.input_size,h_size) self.a1 = nn.ReLU() # nn.Dropout(p=0.5), self.l2 = nn.Linear(h_size,self.output_size) self.optimizer = optim.Adam(self.parameters(), lr=.01)
def set_random_seed(seed=13370): if seed > 0: random.seed(seed) np.random.seed(int(seed / 10)) torch.manual_seed(int(seed / 100)) torch.cuda.manual_seed(int(seed / 100)) torch.cuda.manual_seed_all(int(seed / 100))
def __init__(self, hyper_config, seed=1): super(VAE, self).__init__() torch.manual_seed(seed) self.z_size = hyper_config['z_size'] self.x_size = hyper_config['x_size'] self.act_func = hyper_config['act_func'] self.flow_bool = hyper_config['flow_bool'] self.q_dist = hyper_config['q_dist'](self, hyper_config=hyper_config) if torch.cuda.is_available(): self.dtype = torch.cuda.FloatTensor self.q_dist.cuda() else: self.dtype = torch.FloatTensor #Decoder self.fc4 = nn.Linear(self.z_size, 200) self.fc5 = nn.Linear(200, 200) self.fc6 = nn.Linear(200, self.x_size)
def test_bilstm_word_embeds_d4_1(): global test_sent, word_to_ix, vocab torch.manual_seed(1) embedder = BiLSTMWordEmbedding(word_to_ix, TEST_EMBEDDING_DIM, TEST_EMBEDDING_DIM, 1, 0.0) embeds = embedder(test_sent) assert len(embeds) == len(test_sent) assert isinstance(embeds, list) assert isinstance(embeds[0], ag.Variable) assert embeds[0].size() == (1, TEST_EMBEDDING_DIM) embeds_list = make_list(embeds) true = ( [0.09079286456108093, 0.06577987223863602, 0.26242679357528687, -0.004267544485628605], [0.16868481040000916, 0.2032647728919983, 0.23663431406021118, -0.11785736680030823], [0.35757705569267273, 0.3805052936077118, -0.006295515224337578, 0.0010524550452828407], [0.26692214608192444, 0.3241712749004364, 0.13473030924797058, -0.026079852133989334], [0.23157459497451782, 0.13698695600032806, 0.04000323265790939, 0.1107199415564537], [0.22783540189266205, -0.02211562544107437, 0.06239837780594826, 0.08553065359592438], [0.24633683264255524, 0.09283821284770966, 0.0987505242228508, -0.07646450400352478], [0.05530695244669914, -0.4060348570346832, -0.060150448232889175, -0.003920700401067734], [0.2099054455757141, -0.304738312959671, -0.01663055270910263, -0.05987118184566498] ) pairs = zip(embeds_list, true) check_tensor_correctness(pairs)
def test_suff_word_embeds_d4_2(): global test_sent, word_to_ix, vocab torch.manual_seed(1) test_suff_to_ix = build_suff_to_ix(word_to_ix) suff_word_embedder = SuffixAndWordEmbedding(word_to_ix, test_suff_to_ix, TEST_EMBEDDING_DIM) embeds = suff_word_embedder(test_sent) assert len(embeds) == len(test_sent) assert isinstance(embeds, list) assert isinstance(embeds[0], ag.Variable) assert embeds[0].size() == (1, TEST_EMBEDDING_DIM) embeds_list = make_list(embeds) true = ([-0.45190597, -0.16613023, 1.37900829, 2.5285573 ], [-1.02760863, -0.56305277, 1.59870028, -1.27700698], [-0.89229053, -0.05825018, 0.32550153, -0.47914493], [ 0.42241532, 0.267317 , 1.37900829, 2.5285573 ], [-1.5227685 , 0.38168392, 0.41074166, -0.98800713], [-0.42119515, -0.51069999, 0.11025489, -2.2590096 ], [ 0.42241532, 0.267317 , 1.37900829, 2.5285573 ], [-0.19550958, -0.96563596, -0.90807337, 0.54227364], [ 0.66135216, 0.26692411, 3.5869894 , -1.83129013]) pairs = zip(embeds_list, true) check_tensor_correctness(pairs)
def main(): torch.manual_seed(1234) np.random.seed(1234) queryLen = 10 docLen = 12 embDim = 128 encDim = 256 print "Load Train Data" savePath="./model_lstm" trainFile = "./data/min_word/train" devFile = "./data/min_word/dev" vocFile = "./data/min_word/vocab" trainData = SimDataset(trainFile,vocFile,queryLen,docLen,2,10000) trainLoader = DataLoader(trainData, 100) print "Load Dev Data" devData = SimDataset(devFile,vocFile,queryLen,docLen,2) devLoader = DataLoader(devData, 10000) devData = None for batch in devLoader: devData = batch break print "Creaet Model" model,criterion,optimizer = SimLSTMPrj(trainData.getVocLen(),embDim,encDim,savePath) print "Train ... " train(model,trainLoader,criterion,optimizer,evalData=devData,epoch=50,savePath=savePath)
def main(argv): (opt, args) = parser.parse_args(argv) print(opt) config = get_config(opt.config) if opt.manualSeed is None: opt.manualSeed = random.randint(1, 10000) print('Random Seed: ', opt.manualSeed) random.seed(opt.manualSeed) torch.manual_seed(opt.manualSeed) if opt.cuda: torch.cuda.manual_seed_all(opt.manualSeed) torch.cuda.set_device(opt.gpu_ids) cudnn.benchmark = True # loading data set transform = transforms.Compose([transforms.Resize((config['fineSizeH'], config['fineSizeW'])), transforms.ToTensor()]) dataset = Aligned_Dataset(config['dataPath'], subfolder='test', direction='AtoB', transform=transform) test_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=int(4)) # setup model trainer = trainer_gan(config, test_loader, resume_epoch=opt.resume_epoch) # load a model trainer.netG.load_state_dict(torch.load(opt.modeldir)) if opt.cuda: trainer.cuda() # testing trainer.test()
def __init__(self, input_size, output_size, seed=1, n_residual_blocks=3): super(NN3, self).__init__() torch.manual_seed(seed) self.input_size = input_size self.output_size = output_size h_size = 50 # self.net = nn.Sequential( # nn.Linear(self.input_size,h_size), # nn.BatchNorm1d(h_size), # # nn.Tanh(), # # nn.Linear(h_size,h_size), # nn.LeakyReLU(), # nn.Linear(h_size,h_size), # nn.BatchNorm1d(h_size), # # nn.Tanh(), # nn.LeakyReLU(), # nn.Linear(h_size,h_size), # nn.BatchNorm1d(h_size), # # nn.Tanh(), # nn.LeakyReLU(), # nn.Linear(h_size,self.output_size), # ) self.first_layer = nn.Linear(self.input_size,h_size) self.last_layer = nn.Linear(h_size,self.output_size) # n_residual_blocks = 5 model = [] # Residual blocks for _ in range(n_residual_blocks): model += [ResidualBlock(h_size)] self.part3 = nn.Sequential(*model)
def test_horovod_allreduce_inplace(self): """Test that the allreduce correctly sums 1D, 2D, 3D tensors.""" hvd.init() size = hvd.size() dtypes = [torch.IntTensor, torch.LongTensor, torch.FloatTensor, torch.DoubleTensor] if torch.cuda.is_available(): dtypes += [torch.cuda.IntTensor, torch.cuda.LongTensor, torch.cuda.FloatTensor, torch.cuda.DoubleTensor] dims = [1, 2, 3] for dtype, dim in itertools.product(dtypes, dims): torch.manual_seed(1234) tensor = torch.FloatTensor(*([17] * dim)).random_(-100, 100) tensor = tensor.type(dtype) multiplied = tensor * size hvd.allreduce_(tensor, average=False) max_difference = tensor.sub(multiplied).max() # Threshold for floating point equality depends on number of # ranks, since we're comparing against precise multiplication. if size <= 3 or dtype in [torch.IntTensor, torch.LongTensor, torch.cuda.IntTensor, torch.cuda.LongTensor]: threshold = 0 elif size < 10: threshold = 1e-4 elif size < 15: threshold = 5e-4 else: break assert max_difference <= threshold, 'hvd.allreduce produces incorrect results'
def test_horovod_allreduce_error(self): """Test that the allreduce raises an error if different ranks try to send tensors of different rank or dimension.""" hvd.init() rank = hvd.rank() size = hvd.size() # This test does not apply if there is only one worker. if size == 1: return # Same rank, different dimension torch.manual_seed(1234) dims = [17 + rank] * 3 tensor = torch.FloatTensor(*dims).random_(-100, 100) try: hvd.allreduce(tensor) assert False, 'hvd.allreduce did not throw error' except torch.FatalError: pass # Same number of elements, different rank torch.manual_seed(1234) if rank == 0: dims = [17, 23 * 57] else: dims = [17, 23, 57] tensor = torch.FloatTensor(*dims).random_(-100, 100) try: hvd.allreduce(tensor) assert False, 'hvd.allreduce did not throw error' except torch.FatalError: pass
def test_horovod_allreduce_grad(self): """Test the correctness of the allreduce gradient.""" hvd.init() size = hvd.size() dtypes = [torch.IntTensor, torch.LongTensor, torch.FloatTensor, torch.DoubleTensor] if torch.cuda.is_available(): dtypes += [torch.cuda.IntTensor, torch.cuda.LongTensor, torch.cuda.FloatTensor, torch.cuda.DoubleTensor] dims = [1, 2, 3] for dtype, dim in itertools.product(dtypes, dims): torch.manual_seed(1234) tensor = torch.FloatTensor(*([17] * dim)).random_(-100, 100) tensor = tensor.type(dtype) tensor = torch.autograd.Variable(tensor, requires_grad=True) summed = hvd.allreduce(tensor, average=False) summed.backward(torch.ones([17] * dim)) grad_out = tensor.grad.data.numpy() expected = np.ones([17] * dim) * size err = np.linalg.norm(expected - grad_out) self.assertLess(err, 0.00000001, "gradient %s differs from expected %s, " "error: %s" % (grad_out, expected, str(err)))
def __init__(self, seed=1): super(NN, self).__init__() torch.manual_seed(seed) self.input_size = 1 self.output_size = 1 h_size = 50 # self.net = nn.Sequential( # nn.Linear(self.input_size,h_size), # nn.ReLU(), # nn.Linear(h_size,self.output_size) # ) self.net = nn.Sequential( nn.Linear(self.input_size,h_size), # nn.Tanh(), # nn.Linear(h_size,h_size), nn.Tanh(), nn.Linear(h_size,self.output_size), # nn.Tanh(), # nn.Linear(h_size,self.output_size) ) # self.optimizer = optim.Adam(self.parameters(), lr=.01) self.optimizer = optim.Adam(self.parameters(), lr=.0004)
def train_step(self, sample, update_params=True, dummy_batch=False): """Do forward, backward and parameter update.""" # Set seed based on args.seed and the update number so that we get # reproducible results when resuming from checkpoints seed = self.args.seed + self.get_num_updates() torch.manual_seed(seed) torch.cuda.manual_seed(seed) if not dummy_batch: self.meters['train_wall'].start() # forward and backward pass sample = self._prepare_sample(sample) loss, sample_size, logging_output, oom_fwd = self._forward(sample) oom_bwd = self._backward(loss) # buffer stats and logging outputs self._buffered_stats['sample_sizes'].append(sample_size) self._buffered_stats['logging_outputs'].append(logging_output) self._buffered_stats['ooms_fwd'].append(oom_fwd) self._buffered_stats['ooms_bwd'].append(oom_bwd) # update parameters if update_params: agg_logging_output = self._update_params() else: agg_logging_output = None # buffering updates if not dummy_batch: self.meters['train_wall'].stop() return agg_logging_output
def predict_fn(input_data, model): logger.info('Generating text based on input parameters.') corpus = model['corpus'] model = model['model'] device = torch.device("cuda" if torch.cuda.is_available() else "cpu") logger.info('Current device: {}'.format(device)) torch.manual_seed(input_data['seed']) ntokens = len(corpus.dictionary) input = torch.randint(ntokens, (1, 1), dtype=torch.long).to(device) hidden = model.init_hidden(1) logger.info('Generating {} words.'.format(input_data['words'])) result = [] with torch.no_grad(): # no tracking history for i in range(input_data['words']): output, hidden = model(input, hidden) word_weights = output.squeeze().div(input_data['temperature']).exp().cpu() word_idx = torch.multinomial(word_weights, 1)[0] input.fill_(word_idx) word = corpus.dictionary.idx2word[word_idx] word = word if type(word) == str else word.decode() if word == '<eos>': word = '\n' elif i % 12 == 11: word = word + '\n' else: word = word + ' ' result.append(word) return ''.join(result)
def main(argv): (opt, args) = parser.parse_args(argv) config = get_config(opt.config) print(opt) if opt.manualSeed is None: opt.manualSeed = random.randint(1, 10000) print("Random Seed: ", opt.manualSeed) random.seed(opt.manualSeed) torch.manual_seed(opt.manualSeed) if config['cuda']: torch.cuda.manual_seed_all(opt.manualSeed) torch.cuda.set_device(opt.gpu_ids) cudnn.benchmark = True transform = transforms.Compose([transforms.Resize((512, 512)), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))]) dataset = Aligned_Dataset(config['datapath'], subfolder='test', direction='AtoB', transform=transform) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=int(2)) model_dir = '/media/scw4750/AIwalker/stackgan-like/checkpoints/generator_epoch_160.pkl' trainer = GAN_Trainer(config, dataloader) # load the model trainer.G.load_state_dict(torch.load(model_dir)) trainer.test() return
def __init__(self, seed=1): super(NN, self).__init__() torch.manual_seed(seed) self.action_size = 2 self.state_size = 4 self.value_size = 1 h_size = 50 self.actor = nn.Sequential( nn.Linear(self.state_size,h_size), nn.ReLU(), nn.Linear(h_size,self.action_size), # nn.log_softmax(dim=1) ) self.critic = nn.Sequential( nn.Linear(self.state_size,h_size), nn.ReLU(), nn.Linear(h_size,self.value_size) ) self.Q_func = nn.Sequential( nn.Linear(self.state_size + self.action_size,h_size), nn.ReLU(), nn.Linear(h_size,self.value_size) ) self.optimizer_actor = optim.Adam(self.actor.parameters(), lr=.0001) self.optimizer_critic = optim.Adam(self.critic.parameters(), lr=.0001) self.optimizer_qfunc = optim.Adam(self.Q_func.parameters(), lr=.0001)
def set_seed(seed): #随机数设置 np.random.seed(seed) #random.seed(seed) torch.manual_seed(seed) # cpu torch.cuda.manual_seed_all(seed) # gpu torch.backends.cudnn.deterministic = True
# You may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from parser import parse import torch import numpy as np if __name__ == '__main__': args = parse() """ Setting seed of the RNG in all packages.""" torch.manual_seed(args.seed) np.random.seed(args.seed) """ Import specified class with the experimental setup.""" exp_args = args.experiment.split(".") exp_path = ".".join(exp_args[:-1]) exp_name = exp_args[-1] runner_class = getattr(__import__(exp_path, fromlist=[exp_name]), exp_name) runner = runner_class(args) if not args.infer_only: runner.train() runner.infer()
nn.init.xavier_normal_(self.lstm.weight_ih_l0) nn.init.orthogonal_(self.lstm.weight_hh_l0) nn.init.xavier_normal_(self.out.weight) def forward(self, x, states): x = self.embedding(x) h, states = self.lstm(x, states) y = self.out(h) return y, states if __name__ == '__main__': np.random.seed(123) torch.manual_seed(123) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') ''' 1. データの準備 ''' data_dir = os.path.join(os.path.dirname(__file__), 'data') en_train_path = os.path.join(data_dir, 'train.en') en_val_path = os.path.join(data_dir, 'dev.en') en_test_path = os.path.join(data_dir, 'test.en') ja_train_path = os.path.join(data_dir, 'train.ja') ja_val_path = os.path.join(data_dir, 'dev.ja') ja_test_path = os.path.join(data_dir, 'test.ja')
import math import os import warnings import random; random.seed(1001) import torch try: torch.cuda.manual_seed(1001) except: warnings.warn('no NVIDIA driver found') torch.manual_seed(1001) from seqmod.hyper import Hyperband from seqmod.hyper.utils import make_sampler from seqmod.modules.lm import LM from seqmod import utils as u from seqmod.misc.trainer import Trainer from seqmod.misc.loggers import StdLogger from seqmod.misc.optimizer import Optimizer from seqmod.misc.dataset import Dict, BlockDataset from seqmod.misc.preprocess import text_processor from seqmod.misc.early_stopping import EarlyStopping # Load data def load_lines(path, processor=text_processor()): lines = [] if os.path.isfile(path):
def set_random_seed(seed): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed)
def compute_MB_proba(rule_ls, ls_rule_idx): rule_idx_cnt = Counter(ls_rule_idx) numerator = 0 for rule_idx in rule_idx_cnt: weight = rule_ls[rule_idx].weight cnt = rule_idx_cnt[rule_idx] numerator += math.exp(weight * cnt) return numerator / (numerator + 1.0) if __name__ == '__main__': random.seed(cmd_args.seed) np.random.seed(cmd_args.seed) torch.manual_seed(cmd_args.seed) train(cmd_args) x1 = [] x2 = [] m = 0 for i in loss_list: x1.append(m) m = m + 1 m = 0 for i in auc_list: x2.append(m) m = m + 1 plt.figure(1) print(loss_list) plt.plot(x1, loss_list, '.-', color='red')
def main_worker(rank, cfg): # Initialize the worker distributed = init_worker(rank, cfg) # Initialize the random seed if cfg.seed is not None: torch.manual_seed(cfg.seed) # Initialize the PyTorch device device_id = cfg.device_id + rank device = init_device(cfg, id=device_id) # Initialize the model model = get_model(cfg) model.to(device) if distributed: model = nn.parallel.DistributedDataParallel(model, device_ids=[device_id]) # Initialize the loss function criterion = get_loss_function(cfg) criterion.to(device) # Initialize the optimizer optimizer = optim.Adam(model.parameters(), lr=1) # Check whether the result already exists result_dir = get_result_dir(cfg) resume = os.path.isdir(result_dir) # Sync the workers (required due to the previous isdir check) if distributed: dist.barrier() # Start or resume training if resume: if rank == 0: print('Resuming result:', cfg.result) # Load and verify the config result_cfg = load_config(result_dir) if set(result_cfg.features) != set(cfg.features): error('input feature set mismatch') # Restore the latest checkpoint last_epoch = get_latest_checkpoint_epoch(result_dir) checkpoint = load_checkpoint(result_dir, device, last_epoch, model, optimizer) step = checkpoint['step'] else: if rank == 0: print('Result:', cfg.result) os.makedirs(result_dir) # Save the config save_config(result_dir, cfg) # Save the source code src_filenames = glob(os.path.join(os.path.dirname(sys.argv[0]), '*.py')) src_zip_filename = os.path.join(result_dir, 'src.zip') save_zip(src_zip_filename, src_filenames) last_epoch = 0 step = 0 # Make sure all workers have loaded the checkpoint if distributed: dist.barrier() start_epoch = last_epoch + 1 if start_epoch > cfg.num_epochs: exit() # nothing to do # Reset the random seed if resuming result if cfg.seed is not None and start_epoch > 1: seed = cfg.seed + start_epoch - 1 torch.manual_seed(seed) # Initialize the training dataset train_data = TrainingDataset(cfg, cfg.train_data) if len(train_data) > 0: if rank == 0: print('Training images:', train_data.num_images) else: error('no training images') train_loader, train_sampler = get_data_loader(rank, cfg, train_data, shuffle=True) train_steps_per_epoch = len(train_loader) # Initialize the validation dataset valid_data = ValidationDataset(cfg, cfg.valid_data) if len(valid_data) > 0: if rank == 0: print('Validation images:', valid_data.num_images) valid_loader, valid_sampler = get_data_loader(rank, cfg, valid_data, shuffle=False) valid_steps_per_epoch = len(valid_loader) # Initialize the learning rate scheduler lr_scheduler = optim.lr_scheduler.OneCycleLR( optimizer, max_lr=cfg.max_lr, total_steps=cfg.num_epochs, pct_start=cfg.lr_warmup, anneal_strategy='cos', div_factor=(25. if cfg.lr is None else cfg.max_lr / cfg.lr), final_div_factor=1e4, last_epoch=last_epoch-1) if lr_scheduler.last_epoch != last_epoch: error('failed to restore LR scheduler state') # Check whether AMP is enabled amp_enabled = cfg.precision == 'mixed' if amp_enabled: # Initialize the gradient scaler scaler = amp.GradScaler() # Initialize the summary writer log_dir = get_result_log_dir(result_dir) if rank == 0: summary_writer = SummaryWriter(log_dir) if step == 0: summary_writer.add_scalar('learning_rate', lr_scheduler.get_last_lr()[0], 0) # Training and evaluation loops if rank == 0: print() progress_format = '%-5s %' + str(len(str(cfg.num_epochs))) + 'd/%d:' % cfg.num_epochs total_start_time = time.time() for epoch in range(start_epoch, cfg.num_epochs+1): if rank == 0: start_time = time.time() progress = ProgressBar(train_steps_per_epoch, progress_format % ('Train', epoch)) # Switch to training mode model.train() train_loss = 0. # Iterate over the batches if distributed: train_sampler.set_epoch(epoch) for i, batch in enumerate(train_loader, 0): # Get the batch input, target = batch input = input.to(device, non_blocking=True) target = target.to(device, non_blocking=True) if not amp_enabled: input = input.float() target = target.float() # Run a training step optimizer.zero_grad() with amp.autocast(enabled=amp_enabled): output = model(input) loss = criterion(output, target) if amp_enabled: scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() else: loss.backward() optimizer.step() # Next step step += 1 train_loss += loss if rank == 0: progress.next() # Get and update the learning rate lr = lr_scheduler.get_last_lr()[0] lr_scheduler.step() # Compute the average training loss if distributed: dist.all_reduce(train_loss, op=dist.ReduceOp.SUM) train_loss = train_loss.item() / (train_steps_per_epoch * cfg.num_devices) # Write summary if rank == 0: summary_writer.add_scalar('learning_rate', lr, epoch) summary_writer.add_scalar('loss', train_loss, epoch) # Print stats if rank == 0: duration = time.time() - start_time total_duration = time.time() - total_start_time images_per_sec = len(train_data) / duration eta = ((cfg.num_epochs - epoch) * total_duration / (epoch + 1 - start_epoch)) progress.finish('loss=%.6f, lr=%.6f (%.1f images/s, %s, eta %s)' % (train_loss, lr, images_per_sec, format_time(duration), format_time(eta, precision=2))) if ((cfg.num_valid_epochs > 0 and epoch % cfg.num_valid_epochs == 0) or epoch == cfg.num_epochs) \ and len(valid_data) > 0: # Validation if rank == 0: start_time = time.time() progress = ProgressBar(valid_steps_per_epoch, progress_format % ('Valid', epoch)) # Switch to evaluation mode model.eval() valid_loss = 0. # Iterate over the batches with torch.no_grad(): for _, batch in enumerate(valid_loader, 0): # Get the batch input, target = batch input = input.to(device, non_blocking=True).float() target = target.to(device, non_blocking=True).float() # Run a validation step loss = criterion(model(input), target) # Next step valid_loss += loss if rank == 0: progress.next() # Compute the average validation loss if distributed: dist.all_reduce(valid_loss, op=dist.ReduceOp.SUM) valid_loss = valid_loss.item() / (valid_steps_per_epoch * cfg.num_devices) # Write summary if rank == 0: summary_writer.add_scalar('valid_loss', valid_loss, epoch) # Print stats if rank == 0: duration = time.time() - start_time images_per_sec = len(valid_data) / duration progress.finish('valid_loss=%.6f (%.1f images/s, %.1fs)' % (valid_loss, images_per_sec, duration)) if (rank == 0) and ((cfg.num_save_epochs > 0 and epoch % cfg.num_save_epochs == 0) or epoch == cfg.num_epochs): # Save a checkpoint save_checkpoint(result_dir, epoch, step, model, optimizer) # Print final stats if rank == 0: total_duration = time.time() - total_start_time print('\nFinished (%s)' % format_time(total_duration)) # Cleanup cleanup_worker(cfg)
import argparse import os import time from datetime import datetime import shutil args = utils.get_autoencoder_args() print '%s_%s'%(args.dataset, args.model) print datetime.now(), args, '\n============================' use_cuda = th.cuda.is_available() and not args.use_cpu dtype = th.cuda.FloatTensor if use_cuda else th.FloatTensor th.manual_seed(args.seed) gids = args.gpuid.split(',') gids = [int(x) for x in gids] print 'deploy on GPUs:', gids if use_cuda: if len(gids) == 1: th.cuda.set_device(gids[0]) else: th.cuda.set_device(gids[0]) print 'use single GPU', gids[0] th.cuda.manual_seed(args.seed) st_cfg = utils.get_dise_cfg(args.st_layers).split(',') cnt_cfg = utils.get_dise_cfg(args.cnt_layers).split(',') base_dep = utils.get_base_dep(args.base_mode)
def main(args): # start experiment report_step = 100 manualSeed = ID if args.seed == 0 else args.seed torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False random.seed(manualSeed) torch.manual_seed(manualSeed) np.random.seed(manualSeed) if args.cuda: torch.cuda.manual_seed_all(manualSeed) string = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) start_log("./log/%d-"%ID + string + LOG, args.log) if args.resume != -1: resume(args.resume) myprint(args) for d in config: myprint("%s: %s" % (d, str(config[d]))) args.batch_size = config["BATCH_SIZE"] # load data tokenizer = GPT2Tokenizer.from_pretrained("./%s/gpt"%DATASET) tokenizer.bos_token = '<BOS>' tokenizer.pad_token = "<PAD>" print(tokenizer.add_tokens(['<negative>'])) print(tokenizer.add_tokens(['<positive>'])) print(tokenizer.add_tokens(['<PAD>'])) print(tokenizer.add_tokens(['<BOS>'])) with open("./%s/%s-gpt.train.json"%(DATASET, STYLE_TYPE), "r") as f: data = json.load(f) dataloader = Dataloader.GPTLoader(data, tokenizer, args.batch_size, args.cuda, shuffle=True, input_maxlen=30) with open("./%s/%s-gpt.dev.json"%(DATASET, STYLE_TYPE), "r") as f: data = json.load(f) dev_data = Dataloader.GPTLoader(data, tokenizer, args.batch_size, args.cuda, shuffle=False) with open("./%s/%s-gpt.test.json"%(DATASET, STYLE_TYPE), "r") as f: data = json.load(f) if DATASET == "imdb": test_data = Dataloader.GPTLoader(data, tokenizer, args.batch_size, args.cuda) else: test_data = Dataloader.GPTRefLoader(data, tokenizer, args.batch_size, args.cuda) # build model generator = GPT2LMHeadModel.from_pretrained("./%s/gpt"%DATASET) generator.resize_token_embeddings(len(tokenizer)) language_model = GPT2LMHeadModel.from_pretrained("./%s/gpt"%DATASET) language_model.resize_token_embeddings(len(tokenizer)) language_model.load_state_dict(torch.load("./%s/result/language_model.pkl"%DATASET)) language_model.eval() if config["g_dir"] is not None: generator.load_state_dict(torch.load(config["g_dir"])) discriminator_a = classifier.AdvDisNet(word_num=len(tokenizer)) if config["a_dir"] is not None: discriminator_a.load_state_dict(torch.load(config["a_dir"])) discriminator_b = classifier.RNNDisNet(word_num=len(tokenizer), num_layers=1, dropout=0) sim_model = torch.load('sim/sim.pt', map_location='cpu') state_dict = sim_model['state_dict'] vocab_words = sim_model['vocab_words'] sim_args = sim_model['args'] sim_args.gpu = args.gpuid sim_model = WordAveraging(sim_args, vocab_words) sim_model.load_state_dict(state_dict, strict=True) L = nn.CrossEntropyLoss() BL = nn.BCELoss() if args.cuda: generator = generator.cuda() discriminator_a = discriminator_a.cuda() discriminator_b = discriminator_b.cuda() sim_model = sim_model.cuda() L = L.cuda() BL = BL.cuda() language_model = language_model.cuda() if args.critic: critic = critic.cuda() goptimizer = optim.Adam(generator.parameters(), lr=config["generator lr"]) if config["goptim_dir"] is not None: goptimizer.load_state_dict(torch.load(config["goptim_dir"], map_location=torch.device('cuda', args.gpuid))) for param_group in goptimizer.param_groups: param_group['lr'] = config["generator lr"] doptimizer_a = optim.Adam(discriminator_a.parameters(), lr=config["class lr"]) doptimizer_b = optim.Adam(discriminator_b.parameters(), lr=config["discriminator lr"]) if config["aoptim_dir"] is not None: doptimizer_a.load_state_dict(torch.load(config["aoptim_dir"], map_location=torch.device('cuda', args.gpuid))) for param_group in doptimizer_a.param_groups: param_group['lr'] = config["class lr"] EPOCH = config["EPOCH"] GBATCH = config["generator batch"] DBATCH = config["discriminator batch"] W_M = config["mle weight"] W_A = config["adv weight"] W_S = config["sim weight"] W_C = config["cycle weight"] W_L = config["language weight"] W_D = config["class weight"] GRAD_CLIP = config["grad clip"] PRETRAIN_BATCH = 0 accumulation_step = config["accumulation_step"] gloss_all, gloss_mle, gloss_adv, gloss_cycle, gloss_sim, dloss_a, dloss_b, gcnt, dcnt, avg_language_loss, avg_language_score, avg_adv_score, avg_language_diff = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 avg_fake_loss, avg_real_loss, avg_sim_score, avg_critic_loss = 0, 0, 0, 0 avg_cls_loss, avg_cls_score, gloss_class, avg_real_loss_cls, avg_fake_loss_cls = 0, 0, 0, 0, 0 best_record = 1000 if args.log: os.mkdir("./cache/%d"%(ID)) os.mkdir("./cache/%d/best/"%(ID)) best_gname = "./cache/%d/best/gen.dict" % ID best_a_dname = "./cache/%d/best/a_dis.dict" % ID best_b_dname = "./cache/%d/best/b_dis.dict" % ID best_goname = "./cache/%d/best/genopt.dict" % ID best_a_doname = "./cache/%d/best/a_disopt.dict" % ID best_b_doname = "./cache/%d/best/b_disopt.dict" % ID gscheduler = optim.lr_scheduler.StepLR(goptimizer, step_size=500, gamma=0.5) dscheduler = optim.lr_scheduler.StepLR(doptimizer_a, step_size=250, gamma=0.5) fine_tune_stage = args.reinforce language_loss_fct = nn.CrossEntropyLoss(reduce=False) prev_language_score = 0 print(classifier.classifer_test(discriminator_a, tokenizer, dev_data, args.batch_size)) one_tensor = torch.ones(1) if args.cuda: one_tensor = one_tensor.cuda() # pretrain_language_model(language_model, dataloader) for i in range(EPOCH): # generator training generator.train() discriminator_a.eval() step_cnt = 0 goptimizer.zero_grad() for j in range(GBATCH * accumulation_step): # print(gcnt) step_cnt += 1 batch = dataloader.get() # reconstruction loss rec_text = torch.cat((batch["src_text"], batch["style_tokens"].unsqueeze(1), batch["src_text"]), dim=1) outputs = generator(rec_text, labels=rec_text) mleloss = outputs[0] mleloss_ = F.threshold(mleloss, config["mle_threshold"], 0) # classifier loss transfer_text = torch.cat((batch["src_text"], batch["transfer_tokens"].unsqueeze(1)), dim=1) cur_len = transfer_text.size(1) _, probs = generate(generator, transfer_text, cur_len=cur_len, max_length=int(cur_len * 2 - 1), pad_token_id=tokenizer.pad_token_id, eos_token_ids=tokenizer.eos_token_id, batch_size=args.batch_size) probs = F.softmax(probs, dim=2) idx_probs, words = torch.max(probs, dim=2) style_pred = discriminator_a.approximate(probs, 1 - batch["style"]) style_pred = torch.squeeze(style_pred, 1) class_loss = - torch.log(style_pred + 0.0001).mean() # adv loss adv_pred = discriminator_b.approximate(probs) adv_pred = torch.squeeze(adv_pred, 1) advloss = - torch.log(adv_pred + 0.0001).mean() # sim loss if args.sim: wx1, wl1, wm1 = sim_model.torchify_batch([make_example(x, sim_model) for x in batch["tokens"]]) words_ = words.cpu().data.numpy().tolist() generate_sents = [tokenizer.decode(evaluate.clean(sent, tokenizer), skip_special_tokens=True, clean_up_tokenization_spaces=False).replace("' ", "'").lstrip() for sent in words_] wx2, wl2, wm2 = sim_model.torchify_batch([make_example(x, sim_model) for x in generate_sents]) with torch.no_grad(): sim_scores = sim_model.scoring_function(wx1, wm1, wl1, wx2, wm2, wl2) avg_sim_score += sim_scores.mean().item() if args.length_penalty: length_penalty = compute_length_penalty(wl1, wl2, 0.25) else: length_penalty = 1 simloss = torch.mul(- torch.mul(sim_scores, length_penalty), torch.log(idx_probs).mean(dim=1)).mean() else: simloss = torch.zeros(1).cuda() # language fluency loss with torch.no_grad(): outputs = language_model(words) true_outputs = language_model(batch["src_text"]) lm_logits = outputs[0] shift_logits = lm_logits[..., :-1, :].contiguous() shift_labels = words[..., 1:].contiguous() language_loss = language_loss_fct(shift_logits.transpose(1, 2), shift_labels) lengths = torch.LongTensor([evaluate.get_len(x, tokenizer) for x in words_]) - 1 lengths = lengths.cuda() if args.cuda else lengths mask = get_mask(lengths, language_loss.size(1)) if config["sentence_level"]: language_loss = torch.mul(mask, language_loss).sum(1) / (lengths.float() + 0.001) true_lm_logits = true_outputs[0] true_shift_logits = true_lm_logits[..., :-1, :].contiguous() true_shift_labels = batch["src_text"][..., 1:].contiguous() true_language_loss = language_loss_fct(true_shift_logits.transpose(1, 2), true_shift_labels) true_lengths = batch["length"] - 1 true_mask = get_mask(true_lengths, true_language_loss.size(1)) true_language_loss = torch.mul(true_mask, true_language_loss).sum(1) / (true_lengths.float() + 0.001) avg_language_diff += (language_loss.mean() - true_language_loss.mean()).item() now_language_score = language_loss.mean().item() if config["sentence_level"]: language_loss = torch.mul(language_loss - true_language_loss, torch.mul(mask, torch.log(idx_probs[:, 1:])).sum(1) / (lengths.float() + 0.001)).mean() else: language_loss = (torch.mul(torch.mul(language_loss, torch.log(idx_probs[:, 1:])), mask).sum(1) / (lengths.float() + 0.001)).mean() avg_language_loss += language_loss.item() avg_language_score += now_language_score # compute loss if gcnt < PRETRAIN_BATCH: loss = W_M * mleloss_ else: loss = W_M * mleloss_ + W_A * advloss + W_S * simloss + W_L * language_loss + W_D * class_loss gloss_all += loss.item() / accumulation_step gloss_mle += mleloss.item() gloss_adv += advloss.item() gloss_sim += simloss.item() gloss_class += class_loss.item() now_advloss = advloss.item() now_simloss = simloss.item() now_loss = loss.item() now_mleloss = mleloss.item() loss = loss / accumulation_step # normalizing loss.backward() if step_cnt % accumulation_step == 0: gcnt += 1 step_cnt = 0 nn.utils.clip_grad_norm_(generator.parameters(), GRAD_CLIP) goptimizer.step() goptimizer.zero_grad() if W_L < config["max_language_weight"]: # adjusting weights W_L += 1 del advloss, mleloss, mleloss_, loss, simloss torch.cuda.empty_cache() # discriminator training discriminator_b.train() discriminator_a.train() generator.eval() doptimizer_a.zero_grad() doptimizer_b.zero_grad() for j in range(DBATCH): if gcnt < PRETRAIN_BATCH: break batch = dataloader.get() transfer_text = torch.cat((batch["src_text"], batch["transfer_tokens"].unsqueeze(1)), dim=1) cur_len = transfer_text.size(1) with torch.no_grad(): _, probs = generate(generator, transfer_text, cur_len=cur_len, max_length=int(cur_len * 2 - 1), pad_token_id=tokenizer.pad_token_id, eos_token_ids=tokenizer.eos_token_id, batch_size=args.batch_size) probs = F.softmax(probs, dim=2) probs.detach_() # discriminator for naturalness if args.reinforce: probs, words = torch.max(probs, dim=2) style_pred = discriminator_b(words) else: style_pred = discriminator_b.approximate(probs) style_pred = torch.squeeze(style_pred, 1) real_style_pred_true = discriminator_b(batch["src_text"]) real_style_pred_ture = torch.squeeze(real_style_pred_true, 1) fake_loss_b = - torch.log(1 - style_pred).mean() real_loss_b = - torch.log(real_style_pred_true).mean() advloss_b = real_loss_b + fake_loss_b avg_fake_loss += fake_loss_b.item() avg_real_loss += real_loss_b.item() now_fake_loss = fake_loss_b.item() now_real_loss = real_loss_b.item() now_dis_loss = advloss_b.item() dloss_b += advloss_b.item() doptimizer_b.zero_grad() advloss_b.backward() nn.utils.clip_grad_norm_(discriminator_b.parameters(), GRAD_CLIP) doptimizer_b.step() # discriminator for style if args.update_style: if args.reinforce: style_pred = discriminator_a(words, 1 - batch["style"]) else: style_pred = discriminator_a.approximate(probs, 1 - batch["style"]) style_pred = torch.squeeze(style_pred, 1) real_style_pred_true = discriminator_a(batch["src_text"], batch["style"]) real_style_pred_ture = torch.squeeze(real_style_pred_true, 1) fake_loss_a = - torch.log(1 - style_pred).mean() real_loss_a = - torch.log(real_style_pred_true).mean() advloss_a = real_loss_a + fake_loss_a avg_fake_loss_cls += fake_loss_a.item() avg_real_loss_cls += real_loss_a.item() dloss_a += advloss_a.item() doptimizer_a.zero_grad() advloss_a.backward() nn.utils.clip_grad_norm_(discriminator_a.parameters(), GRAD_CLIP) doptimizer_a.step() else: real_loss_a = 0 fake_loss_a = 0 advloss_a = 0 dcnt += 1 del real_loss_b, fake_loss_b, advloss_b, real_loss_a, fake_loss_a, advloss_a torch.cuda.empty_cache() if gcnt % report_step == 0: myprint("task id: %d"%ID) myprint("generator training batch: %d"%gcnt) myprint("average loss: %.6f"%(gloss_all / report_step)) myprint("average adv loss: %.6f"%(gloss_adv / (report_step * accumulation_step))) myprint("average mle loss: %.6f"%(gloss_mle / (report_step * accumulation_step))) myprint("average cycle loss: %.6f"%(gloss_cycle / (report_step * accumulation_step))) myprint("average sim loss: %.6f"%(gloss_sim / (report_step * accumulation_step))) myprint("average sim score: %.6f"%(avg_sim_score / (report_step * accumulation_step))) myprint("avg class loss: %.6f"%(gloss_class / (report_step * accumulation_step))) myprint("avg class score: %.6f"%(avg_cls_score / (report_step * accumulation_step))) myprint("avg language score: %.6f"%(avg_language_score / (report_step * accumulation_step))) myprint("avg language loss: %.6f"%(avg_language_loss / (report_step * accumulation_step))) if config["sentence_level"]: myprint("avg language diff: %.6f"%(avg_language_diff / (report_step * accumulation_step))) myprint("avg adv score: %.6f"%(avg_adv_score / (report_step * accumulation_step))) avg_language_loss, avg_language_score, avg_adv_score, avg_language_diff = 0, 0, 0, 0 myprint() gloss_all, gloss_mle, gloss_adv, gloss_cycle, gloss_sim, avg_sim_score, gloss_class, avg_cls_score = 0, 0, 0, 0, 0, 0, 0, 0 if dcnt % report_step == 0 and dcnt != 0: myprint("discriminator training batch: %d"%dcnt) myprint("b average loss: %.6f"%(dloss_b / (report_step))) myprint("avg real loss: %.6f"%(avg_real_loss/(report_step))) myprint("avg fake loss: %.6f"%(avg_fake_loss/(report_step))) myprint("a average loss: %.6f"%(dloss_a / (report_step))) myprint("avg real cls loss: %.6f"%(avg_real_loss_cls/(report_step))) myprint("avg fake cls loss: %.6f"%(avg_fake_loss_cls/(report_step))) myprint() dloss_a, dloss_b, avg_real_loss, avg_fake_loss, avg_real_loss_cls, avg_fake_loss_cls = 0, 0, 0, 0, 0, 0 gscheduler.step() dscheduler.step() string = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) gname = "./cache/%d/gen-%s.dict" % (ID, string) a_dname = "./cache/%d/a_dis-%s.dict" % (ID, string) b_dname = "./cache/%d/b_dis-%s.dict" % (ID, string) goname = "./cache/%d/genopt-%s.dict" % (ID, string) a_doname = "./cache/%d/a_disopt-%s.dict" % (ID, string) b_doname = "./cache/%d/b_disopt-%s.dict" % (ID, string) if gcnt % 1000 == 0 and args.log: generator.eval() result = test(generator, "dev") acc_transfer = result["acc"] self_bleu = result["self_bleu"] dev_acc = acc_transfer dev_bleu = self_bleu dev_ppl = result["ppl"] myprint(f"gcnt: {gcnt}") myprint("dev set:") myprint("acc transfer: %.6f"%acc_transfer) myprint("self_bleu: %.6f"%self_bleu) myprint("ppl: %.6f"%dev_ppl) result = test(generator, "test") acc_transfer = result["acc"] self_bleu = result["self_bleu"] ppl = result["ppl"] myprint("test set:") myprint("acc transfer: %.6f"%acc_transfer) myprint("self_bleu: %.6f"%self_bleu) myprint("ppl: %.6f"%ppl) if DATASET != "imdb": bleu = result["bleu"] myprint("bleu: %.6f"%bleu) generator.train() generator.cpu() discriminator_a.cpu() f_score = 2 * dev_acc * dev_bleu / (dev_acc + dev_bleu) if dev_ppl < best_record and dev_acc > config["acc_threshold"] and gcnt > PRETRAIN_BATCH: best_record = dev_ppl myprint("best") myprint("acc transfer: %.6f"%acc_transfer) myprint("self_bleu: %.6f"%self_bleu) myprint("ppl: %.6f"%ppl) if DATASET != "imdb": myprint("bleu: %.6f"%bleu) myprint() torch.save(generator.state_dict(), best_gname) torch.save(discriminator_a.state_dict(), best_a_dname) torch.save(goptimizer.state_dict(), best_goname) torch.save(doptimizer_a.state_dict(), best_a_doname) if gcnt > PRETRAIN_BATCH: gname = "./cache/%d/gen-%d.dict" % (ID, gcnt) a_dname = "./cache/%d/a_dis-%d.dict" % (ID, gcnt) torch.save(generator.state_dict(), gname) torch.save(discriminator_a.state_dict(), a_dname) if args.cuda: generator.cuda() discriminator_a.cuda()
import torch as t import numpy as np import pickle import matplotlib.pyplot as plt from torch import nn from tqdm import tqdm t.manual_seed(13) device = t.device("cuda" if t.cuda.is_available() else "cpu") t.cuda.set_device(2) # device = t.device("cpu") def l2_regularize(array): loss = t.sum(array ** 2.0) return loss class MFModel(nn.Module): def __init__(self, n_users, n_items, n_factors = 10, dropout = 0, sparse = False): super(MFModel, self).__init__() self.n_users = n_users self.n_items = n_items self.user_biases = nn.Embedding(n_users, 1, sparse=sparse).to(device) self.item_biases = nn.Embedding(n_items, 1, sparse=sparse).to(device) self.user_embedding = nn.Embedding(n_users, n_factors, sparse = sparse).to(device) self.item_embedding = nn.Embedding(n_items, n_factors, sparse = sparse).to(device) # self.fc = nn.Linear(2 * n_factors, 1).to(device) t.nn.init.xavier_uniform_(self.user_embedding.weight) t.nn.init.xavier_uniform_(self.item_embedding.weight) self.user_biases.weight.data.fill_(0.) self.item_biases.weight.data.fill_(0.)
threshold_init=-15, candidate_p=[50000, 30000, 20000], ) opt = parser.parse_args(args=[]) opt = vars(opt) # rename alias # rename alias opt['alias'] = '{}_{}_BaseDim{}_bsz{}_lr_{}_optim_{}_thresholdType{}_thres_init{}_{}-{}_l2_penalty{}'.format( opt['model'].upper(), opt['alias'], opt['latent_dim'], opt['batch_size_train'], opt['fm_lr'], opt['fm_optimizer'], opt['threshold_type'].upper(), opt['threshold_init'], opt['g_type'], opt['gk'], opt['l2_penalty'] ) print(opt['alias']) random.seed(opt['seed']) # np.random.seed(opt['seed']) torch.manual_seed(opt['seed']) torch.cuda.manual_seed_all(opt['seed']) engine = Engine(opt) engine.train()
def main(args): if not os.path.isdir('CMDs'): os.mkdir('CMDs') with open('CMDs/train.cmd', 'a') as f: f.write(' '.join(sys.argv) + '\n') f.write('--------------------------------\n') # Set the seed value all over the place to make this reproducible. seed_val = args.seed random.seed(seed_val) np.random.seed(seed_val) torch.manual_seed(seed_val) torch.cuda.manual_seed_all(seed_val) # Choose device device = get_default_device() with open(args.train_data_path) as f: train_data = json.load(f) if args.train_data_half != 0: # Seed has already been set earlier random.shuffle(train_data) mid = len(train_data) // 2 if args.train_data_half == 1: train_data = train_data[:mid] elif args.train_data_half == 2: train_data = train_data[mid:] electra_base = "google/electra-base-discriminator" electra_large = "google/electra-large-discriminator" tokenizer = ElectraTokenizer.from_pretrained(electra_large, do_lower_case=True) labels = [] input_ids = [] token_type_ids = [] count = 0 for item in train_data: context = item["context"] question = item["question"] lab = item["label"] if lab == 3: # Remove unanswerable examples at training time continue labels.append(lab) three_inp_ids = [] three_tok_type_ids = [] three_answer_options = item["answers"][:3] for i, ans in enumerate(three_answer_options): combo = context + " [SEP] " + question + " " + ans inp_ids = tokenizer.encode(combo) if len(inp_ids)>512: inp_ids = [inp_ids[0]] + inp_ids[-511:] tok_type_ids = [0 if i<= inp_ids.index(102) else 1 for i in range(len(inp_ids))] three_inp_ids.append(inp_ids) three_tok_type_ids.append(tok_type_ids) three_inp_ids = pad_sequences(three_inp_ids, maxlen=MAXLEN, dtype="long", value=0, truncating="post", padding="post") three_tok_type_ids = pad_sequences(three_tok_type_ids, maxlen=MAXLEN, dtype="long", value=0, truncating="post", padding="post") input_ids.append(three_inp_ids) token_type_ids.append(three_tok_type_ids) # Create attention masks attention_masks = [] for sen in input_ids: sen_attention_masks = [] for opt in sen: att_mask = [int(token_id > 0) for token_id in opt] sen_attention_masks.append(att_mask) attention_masks.append(sen_attention_masks) # Convert to torch tensors labels = torch.tensor(labels) labels = labels.long().to(device) input_ids = torch.tensor(input_ids) input_ids = input_ids.long().to(device) token_type_ids = torch.tensor(token_type_ids) token_type_ids = token_type_ids.long().to(device) attention_masks = torch.tensor(attention_masks) attention_masks = attention_masks.long().to(device) # Create the DataLoader for training set. train_data = TensorDataset(input_ids, token_type_ids, attention_masks, labels) train_sampler = RandomSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.batch_size) if args.train_data_half != 2: model = ElectraForMultipleChoice.from_pretrained(electra_large).to(device) else: model = torch.load(args.model_path, map_location=device).to(device) optimizer = AdamW(model.parameters(), lr = args.learning_rate, eps = args.adam_epsilon # weight_decay = 0.01 ) loss_values = [] # Total number of training steps is number of batches * number of epochs. total_steps = len(train_dataloader) * args.n_epochs # Create the learning rate scheduler. scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps = 0.1*total_steps, num_training_steps = total_steps) for epoch in range(args.n_epochs): # Perform one full pass over the training set. print("") print('======== Epoch {:} / {:} ========'.format(epoch + 1, args.n_epochs)) print('Training...') # Measure how long the training epoch takes. t0 = time.time() # Reset the total loss for this epoch. total_loss = 0 model.train() model.zero_grad() # For each batch of training data... for step, batch in enumerate(train_dataloader): # Progress update every 40 batches. if step % 40 == 0 and not step == 0: # Calculate elapsed time in minutes. elapsed = format_time(time.time() - t0) # Report progress. print(' Batch {:>5,} of {:>5,}. Elapsed: {:}.'.format(step, len(train_dataloader), elapsed)) b_input_ids = batch[0].to(device) b_tok_typ_ids = batch[1].to(device) b_att_msks = batch[2].to(device) b_labs = batch[3].to(device) model.zero_grad() outputs = model(input_ids=b_input_ids, attention_mask=b_att_msks, token_type_ids=b_tok_typ_ids, labels=b_labs) loss = outputs[0] total_loss += loss.item() print(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() # Update the learning rate. scheduler.step() # model.zero_grad() # Calculate the average loss over the training data. avg_train_loss = total_loss / len(train_dataloader) # Store the loss value for plotting the learning curve. loss_values.append(avg_train_loss) print("") print(" Average training loss: {0:.2f}".format(avg_train_loss)) print(" Training epoch took: {:}".format(format_time(time.time() - t0))) # Save the model to a file file_path = args.save_path+'electra_QA_MC_seed'+str(args.seed)+'.pt' torch.save(model, file_path)
def main(num): # Generate configuration files depending on experiment being run utils.generate_config_files("conceptnet", num) # Loads the correct configuration file config_file = "config/conceptnet/config_{}.json".format(num) print(config_file) # Read config file to option config = cfg.read_config(cfg.load_config(config_file)) opt, meta = cfg.get_parameters(config) # config.gpu_mode = torch.cuda.is_available() # Set the random seeds torch.manual_seed(opt.train.static.seed) random.seed(opt.train.static.seed) if config.gpu_mode: torch.cuda.manual_seed_all(opt.train.static.seed) # Load the data splits = ["train", "dev", "test"] opt.train.dynamic.epoch = 0 print("Loading Data") # Initialize path to pre-set data loader path = "data/conceptnet/processed/{}/{}.pickle".format( opt.exp, utils.make_name_string(opt.data)) # Make data loader data_loader = data.make_data_loader(opt) loaded = data_loader.load_data(path) print(data_loader.sequences["train"]["total"].size(0)) data_loader.opt = opt data_loader.batch_size = opt.train.dynamic.bs print("Done.") text_encoder = TextEncoder(config.encoder_path, config.bpe_path) categories = data.conceptnet_data.conceptnet_relations special = [data.start_token, data.end_token] special += ["<{}>".format(cat) for cat in categories] if loaded: text_encoder.encoder = data_loader.vocab_encoder text_encoder.decoder = data_loader.vocab_decoder else: for special_token in special: text_encoder.decoder[len(encoder)] = special_token text_encoder.encoder[special_token] = len(encoder) data_loader.make_tensors(text_encoder, special) # Set max size of different parts of relation context_size_e1 = data_loader.max_e1 context_size_e2 = data_loader.max_e2 context_size_r = data_loader.max_r opt.data.maxr = context_size_r n_special = len(special) n_ctx = context_size_e1 + context_size_r + context_size_e2 n_vocab = len(text_encoder.encoder) + n_ctx print(data_loader.__dict__.keys()) opt.net.vSize = n_vocab # Build Model print("Building Model") model = models.make_model( opt, n_vocab, n_ctx, n_special, load=(opt.net.init=="pt")) print("Done.") print("Files will be logged at: {}".format( utils.make_name(opt, prefix="results/losses/", is_dir=True, eval_=True))) data_loader.reset_offsets("train", keys=["total"]) data.set_max_sizes(data_loader) # Push to GPU if config.gpu_mode: print("Pushing to GPU: {}".format(config.gpu_index)) cfg.device = config.gpu_index cfg.do_gpu = True torch.cuda.set_device(cfg.device) model.cuda(cfg.device) print("Done.") print("Training") optimizer = OpenAIAdam(model.parameters(), lr=opt.train.dynamic.lr, schedule=opt.train.static.lrsched, warmup=opt.train.static.lrwarm, t_total=meta.iterations, b1=opt.train.static.b1, b2=opt.train.static.b2, e=opt.train.static.e, l2=opt.train.static.l2, vector_l2=opt.train.static.vl2, max_grad_norm=opt.train.static.clip) trainer = train.make_trainer( opt, meta, data_loader, model, optimizer) print(data_loader.sequences["dev"]["total"].max()) trainer.set_generator(opt, model, data_loader) trainer.set_evaluator(opt, model, data_loader) trainer.run()
# fig = plt.figure(figsize=(32, 16)) # plt.suptitle(batched['sentence'][i], fontsize=30) # for j in range(min(len(sequence), 14)): # plt.subplot(3, 5, j+1) # partially_completed_img = clamp_array(sequence[j][:,:,-3:], 0, 255).astype(np.uint8) # partially_completed_img = partially_completed_img[:,:,::-1] # plt.imshow(partially_completed_img) # plt.axis('off') # plt.subplot(3, 5, 15) # plt.imshow(color[:,:,::-1]) # plt.axis('off') # fig.savefig(out_path, bbox_inches='tight') # plt.close(fig) break if __name__ == '__main__': config, unparsed = get_config() np.random.seed(config.seed) random.seed(config.seed) torch.manual_seed(config.seed) if(config.cuda): torch.cuda.manual_seed_all(config.seed) prepare_directories(config) test_puzzle_model(config)
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--dataset', type=str, default="mnist", choices=["mnist", "cifar10"], metavar='D', help='training dataset (mnist or cifar10)') parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--percent', type=list, default=[0.8, 0.92, 0.991, 0.93], metavar='P', help='pruning percentage (default: 0.8)') parser.add_argument('--alpha', type=float, default=5e-4, metavar='L', help='l2 norm weight (default: 5e-4)') parser.add_argument('--rho', type=float, default=1e-2, metavar='R', help='cardinality weight (default: 1e-2)') parser.add_argument('--l1', default=False, action='store_true', help='prune weights with l1 regularization instead of cardinality') parser.add_argument('--l2', default=False, action='store_true', help='apply l2 regularization') parser.add_argument('--num_pre_epochs', type=int, default=3, metavar='P', help='number of epochs to pretrain (default: 3)') parser.add_argument('--num_epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--num_re_epochs', type=int, default=3, metavar='R', help='number of epochs to retrain (default: 3)') parser.add_argument('--lr', type=float, default=1e-3, metavar='LR', help='learning rate (default: 1e-2)') parser.add_argument('--adam_epsilon', type=float, default=1e-8, metavar='E', help='adam epsilon (default: 1e-8)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {} if args.dataset == "mnist": train_loader = torch.utils.data.DataLoader( datasets.MNIST('data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader( datasets.MNIST('data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=args.test_batch_size, shuffle=True, **kwargs) else: args.percent = [0.8, 0.92, 0.93, 0.94, 0.95, 0.99, 0.99, 0.93] args.num_pre_epochs = 5 args.num_epochs = 20 args.num_re_epochs = 5 train_loader = torch.utils.data.DataLoader( datasets.CIFAR10('data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.49139968, 0.48215827, 0.44653124), (0.24703233, 0.24348505, 0.26158768)) ])), shuffle=True, batch_size=args.batch_size, **kwargs) test_loader = torch.utils.data.DataLoader( datasets.CIFAR10('data', train=False, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.49139968, 0.48215827, 0.44653124), (0.24703233, 0.24348505, 0.26158768)) ])), shuffle=True, batch_size=args.test_batch_size, **kwargs) model = LeNet().to(device) if args.dataset == "mnist" else AlexNet().to(device) optimizer = PruneAdam(model.named_parameters(), lr=args.lr, eps=args.adam_epsilon) train(args, model, device, train_loader, test_loader, optimizer) mask = apply_l1_prune(model, device, args) if args.l1 else apply_prune(model, device, args) print_prune(model) test(args, model, device, test_loader) retrain(args, model, mask, device, train_loader, test_loader, optimizer)
def train(model, train_set, test_set, save, n_epochs=300, valid_size=5000, batch_size=64, lr=0.1, wd=0.0001, momentum=0.9, seed=None, info=''): save += info if not os.path.exists(save): os.makedirs(save) if not os.path.isdir(save): raise Exception('%s is not a dir' % save) best_error = 1 if seed is not None: torch.manual_seed(seed) # Data loaders test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False, pin_memory=(torch.cuda.is_available()), num_workers=0) train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, pin_memory=(torch.cuda.is_available()), num_workers=0) # Model on cuda if torch.cuda.is_available(): model = model.cuda() # Wrap model for multi-GPUs, if necessary model_wrapper = model if torch.cuda.is_available() and torch.cuda.device_count() > 1: model_wrapper = torch.nn.DataParallel(model).cuda() # Optimizer optimizer = torch.optim.SGD(model_wrapper.parameters(), lr=lr, momentum=momentum, nesterov=True, weight_decay=wd) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[0.5 * n_epochs, 0.75 * n_epochs], gamma=0.1) # Start log with open(os.path.join(save, 'results.csv'), 'w') as f: f.write('epoch,train_loss,train_error,valid_loss,valid_error,test_error\n') # Train model for epoch in range(n_epochs): scheduler.step() _, train_loss, train_error = train_epoch( model=model_wrapper, loader=train_loader, optimizer=optimizer, epoch=epoch, n_epochs=n_epochs, info=info ) _, valid_loss, valid_error = test_epoch( model=model, loader=test_loader, is_test=True ) # Determine if model is the best if valid_error < best_error: best_error = valid_error print('New best error: %.4f' % best_error) torch.save(model.state_dict(), os.path.join(save, 'model.dat')) # Log results with open(os.path.join(save, 'results.csv'), 'a') as f: f.write('%03d,%0.6f,%0.6f,%0.5f,%0.5f,\n' % ( (epoch + 1), train_loss, train_error, valid_loss, valid_error, )) torch.save(model.state_dict(), os.path.join(save, 'current.dat')) # Final test of model on test set model.load_state_dict(torch.load(os.path.join(save, 'model.dat'))) test_results = test_epoch( model=model, loader=test_loader, is_test=True ) _, _, test_error = test_results with open(os.path.join(save, 'results.csv'), 'a') as f: f.write(',,,,,%0.5f\n' % (test_error)) print('Final test error: %.4f' % test_error) model.load_state_dict(torch.load(os.path.join(save, 'current.dat')))
def parse_option(): parser = argparse.ArgumentParser('S3DIS scene-segmentation training') parser.add_argument('--cfg', type=str, required=True, help='config file') parser.add_argument('--data_root', type=str, default='data', help='root director of dataset') parser.add_argument('--num_workers', type=int, default=4, help='num of workers to use') parser.add_argument('--batch_size', type=int, help='batch_size') parser.add_argument('--num_points', type=int, help='num_points') parser.add_argument('--num_steps', type=int, help='num_steps') parser.add_argument('--base_learning_rate', type=float, help='base learning rate') parser.add_argument('--epochs', type=int, help='number of training epochs') parser.add_argument('--start_epoch', type=int, help='used for resume') # io parser.add_argument('--load_path', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--print_freq', type=int, default=10, help='print frequency') parser.add_argument('--save_freq', type=int, default=10, help='save frequency') parser.add_argument('--val_freq', type=int, default=10, help='val frequency') parser.add_argument('--log_dir', type=str, default='log', help='log dir [default: log]') # misc parser.add_argument("--local_rank", type=int, help='local rank for DistributedDataParallel') parser.add_argument("--rng_seed", type=int, default=0, help='manual seed') args, unparsed = parser.parse_known_args() update_config(args.cfg) config.data_root = args.data_root config.num_workers = args.num_workers config.load_path = args.load_path config.print_freq = args.print_freq config.save_freq = args.save_freq config.val_freq = args.val_freq config.rng_seed = args.rng_seed config.local_rank = args.local_rank ddir_name = args.cfg.split('.')[-2].split('/')[-1] config.log_dir = os.path.join(args.log_dir, 's3dis', ddir_name) if args.batch_size: config.batch_size = args.batch_size if args.num_points: config.num_points = args.num_points if args.num_steps: config.num_steps = args.num_steps if args.base_learning_rate: config.base_learning_rate = args.base_learning_rate if args.epochs: config.epochs = args.epochs if args.start_epoch: config.start_epoch = args.start_epoch print(args) print(config) torch.manual_seed(args.rng_seed) torch.cuda.manual_seed_all(args.rng_seed) random.seed(args.rng_seed) np.random.seed(args.rng_seed) return args, config
) parser.add_argument( '--cls_feats_max', type=int, default= repeat1_attribute_prediction_exist_Tm_binary_utilize_new_attribute_link_egcnh_parameters_cls_feats_max ) opt = parser.parse_args() print(opt) if opt.manualSeed is None: opt.manualSeed = random.randint(1, 10000) print("Random Seed: ", opt.manualSeed) random.seed(opt.manualSeed) torch.manual_seed(opt.manualSeed) opt.dataroot = InputDir if opt.cuda: torch.cuda.manual_seed_all(opt.manualSeed) opt.L = opt.init_L gcn_parameters = [ 'feats_per_node', 'feats_per_node_min', 'feats_per_node_max', 'layer_1_feats', 'layer_1_feats_min', 'layer_1_feats_max', 'layer_2_feats', 'layer_2_feats_same_as_l1', 'k_top_grcu', 'num_layers', 'lstm_l1_layers', 'lstm_l1_feats', 'lstm_l1_feats_min', 'lstm_l1_feats_max', 'lstm_l2_layers', 'lstm_l2_feats', 'lstm_l2_feats_same_as_l1', 'cls_feats', 'cls_feats_min', 'cls_feats_max', 'output_dim'
def main(args): SEED = 1234 torch.manual_seed(SEED) torch.cuda.manual_seed(SEED) train_data = p.load(open(args.train_data, 'rb')) dev_data = p.load(open(args.tune_data, 'rb')) test_data = p.load(open(args.test_data, 'rb')) word_embeddings = p.load(open(args.word_embeddings, 'rb')) vocab = p.load(open(args.vocab, 'rb')) BATCH_SIZE = 64 INPUT_DIM = len(vocab) EMBEDDING_DIM = len(word_embeddings[0]) HIDDEN_DIM = 100 OUTPUT_DIM = 1 N_LAYERS = 1 BIDIRECTIONAL = True DROPOUT = 0.5 context_model = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, HIDDEN_DIM, N_LAYERS, BIDIRECTIONAL, DROPOUT) question_model = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, HIDDEN_DIM, N_LAYERS, BIDIRECTIONAL, DROPOUT) answer_model = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, HIDDEN_DIM, N_LAYERS, BIDIRECTIONAL, DROPOUT) utility_model = FeedForward(HIDDEN_DIM * 3, HIDDEN_DIM, OUTPUT_DIM) criterion = nn.BCEWithLogitsLoss() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') utility_model = utility_model.to(device) context_model = context_model.to(device) question_model = question_model.to(device) answer_model = answer_model.to(device) criterion = criterion.to(device) word_embeddings = autograd.Variable( torch.FloatTensor(word_embeddings).cuda()) context_model.embedding.weight.data.copy_(word_embeddings) question_model.embedding.weight.data.copy_(word_embeddings) answer_model.embedding.weight.data.copy_(word_embeddings) # Fix word embeddings context_model.embedding.weight.requires_grad = False question_model.embedding.weight.requires_grad = False answer_model.embedding.weight.requires_grad = False optimizer = optim.Adam(list([par for par in context_model.parameters() if par.requires_grad]) + \ list([par for par in question_model.parameters() if par.requires_grad]) + \ list([par for par in answer_model.parameters() if par.requires_grad]) + \ list([par for par in utility_model.parameters() if par.requires_grad])) N_EPOCHS = 300 train_data = prepare_data(train_data, vocab, 'train', args.cuda) dev_data = prepare_data(dev_data, vocab, 'dev', args.cuda) test_data = prepare_data(test_data, vocab, 'test', args.cuda) for epoch in range(N_EPOCHS): train_loss, train_acc = train_fn(context_model, question_model, answer_model, utility_model, \ train_data, optimizer, criterion, BATCH_SIZE) valid_loss, valid_acc = evaluate(context_model, question_model, answer_model, utility_model, \ dev_data, criterion, BATCH_SIZE) #valid_loss, valid_acc = evaluate(context_model, question_model, answer_model, utility_model, \ # test_data, criterion, BATCH_SIZE) print 'Epoch %d: Train Loss: %.3f, Train Acc: %.3f, Val Loss: %.3f, Val Acc: %.3f' % ( epoch, train_loss, train_acc, valid_loss, valid_acc)
def main(): logger.info("Running %s" % ' '.join(sys.argv)) parser = argparse.ArgumentParser() ## Required parameters parser.add_argument("--do_train", action='store_true', help="Whether to run training.") parser.add_argument("--do_eval", action='store_true', help="Whether to run eval on the dev set.") parser.add_argument( "--data_dir", default="data/", type=str, help= "The input data dir. Should contain the .tsv files (or other data files) for the task." ) parser.add_argument( "--output_dir", default="checkpoints/predictor/", type=str, help= "The output directory where the model predictions and checkpoints will be written." ) parser.add_argument( "--load_dir", type=str, help= "The output directory where the model checkpoints will be loaded during evaluation" ) parser.add_argument('--load_step', type=int, default=0, help="The checkpoint step to be loaded") parser.add_argument("--fact", default="first", choices=["first", "second"], type=str, help="Whether to put fact in front.") parser.add_argument("--test_set", default="dev", choices=[ "train", "dev", "test", "simple_test", "complex_test", "small_test" ], help="Which test set is used for evaluation", type=str) parser.add_argument("--train_batch_size", default=18, type=int, help="Total batch size for training.") parser.add_argument("--eval_batch_size", default=18, type=int, help="Total batch size for eval.") ## Other parameters parser.add_argument( "--bert_model", default="bert-base-uncased", type=str, help="Bert pre-trained model selected in the list: bert-base-uncased, " "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, " "bert-base-multilingual-cased, bert-base-chinese.") parser.add_argument("--task_name", default="QQP", type=str, help="The name of the task to train.") parser.add_argument('--period', type=int, default=500) parser.add_argument( "--cache_dir", default="", type=str, help= "Where do you want to store the pre-trained models downloaded from s3") parser.add_argument( "--max_seq_length", default=256, type=int, help= "The maximum total input sequence length after WordPiece tokenization. \n" "Sequences longer than this will be truncated, and sequences shorter \n" "than this will be padded.") parser.add_argument( "--do_lower_case", action='store_true', help="Set this flag if you are using an uncased model.") parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument("--num_train_epochs", default=20.0, type=float, help="Total number of training epochs to perform.") parser.add_argument( "--warmup_proportion", default=0.1, type=float, help= "Proportion of training to perform linear learning rate warmup for. " "E.g., 0.1 = 10%% of training.") parser.add_argument("--no_cuda", action='store_true', help="Whether not to use CUDA when available") parser.add_argument("--local_rank", type=int, default=-1, help="local_rank for distributed training on gpus") parser.add_argument('--seed', type=int, default=42, help="random seed for initialization") parser.add_argument( '--gradient_accumulation_steps', type=int, default=1, help= "Number of updates steps to accumulate before performing a backward/update pass." ) parser.add_argument( '--fp16', action='store_true', help="Whether to use 16-bit float precision instead of 32-bit") parser.add_argument( '--loss_scale', type=float, default=0, help= "Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n" "0 (default value): dynamic loss scaling.\n" "Positive power of 2: static loss scaling value.\n") parser.add_argument('--server_ip', type=str, default='', help="Can be used for distant debugging.") parser.add_argument('--server_port', type=str, default='', help="Can be used for distant debugging.") args = parser.parse_args() pprint(vars(args)) sys.stdout.flush() if args.server_ip and args.server_port: # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script import ptvsd print("Waiting for debugger attach") ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True) ptvsd.wait_for_attach() processors = { "qqp": QqpProcessor, } output_modes = { "qqp": "classification", } if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") n_gpu = torch.cuda.device_count() else: torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) n_gpu = 1 # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.distributed.init_process_group(backend='nccl') logging.basicConfig( format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN) logger.info( "device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}". format(device, n_gpu, bool(args.local_rank != -1), args.fp16)) if args.gradient_accumulation_steps < 1: raise ValueError( "Invalid gradient_accumulation_steps parameter: {}, should be >= 1" .format(args.gradient_accumulation_steps)) args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if n_gpu > 0: torch.cuda.manual_seed_all(args.seed) if not args.do_train and not args.do_eval: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") logger.info( "Datasets are loaded from {}\n Outputs will be saved to {}".format( args.data_dir, args.output_dir)) # if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train: # raise ValueError("Output directory ({}) already exists and is not empty.".format(args.output_dir)) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) task_name = args.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() output_mode = output_modes[task_name] label_list = processor.get_labels() num_labels = len(label_list) tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) train_examples = None num_train_optimization_steps = None if args.do_train: train_examples = processor.get_train_examples(args.data_dir) # train_examples=processor.get_dev_examples(args.data_dir,'test') num_train_optimization_steps = int( len(train_examples) / args.train_batch_size / args.gradient_accumulation_steps) * args.num_train_epochs if args.local_rank != -1: num_train_optimization_steps = num_train_optimization_steps // torch.distributed.get_world_size( ) cache_dir = args.cache_dir if args.cache_dir else os.path.join( str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed_{}'.format( args.local_rank)) if args.load_dir: load_dir = args.load_dir else: load_dir = args.bert_model model = BertForSequenceClassification.from_pretrained( load_dir, cache_dir=cache_dir, num_labels=Constants.act_len) if args.fp16: model.half() model.to(device) if args.local_rank != -1: try: from apex.parallel import DistributedDataParallel as DDP except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training." ) model = DDP(model) elif n_gpu > 1: model = torch.nn.DataParallel(model) # Prepare optimizer if args.do_train: param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] if args.fp16: try: from apex.optimizers import FP16_Optimizer from apex.optimizers import FusedAdam except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training." ) optimizer = FusedAdam(optimizer_grouped_parameters, lr=args.learning_rate, bias_correction=False, max_grad_norm=1.0) if args.loss_scale == 0: optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True) else: optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale) warmup_linear = WarmupLinearSchedule( warmup=args.warmup_proportion, t_total=num_train_optimization_steps) else: optimizer = BertAdam(optimizer_grouped_parameters, lr=args.learning_rate, warmup=args.warmup_proportion, t_total=num_train_optimization_steps) global_step = 0 tr_loss = 0 best_F1 = 0 if args.do_train: train_features = convert_examples_to_features(train_examples, label_list, args.max_seq_length, tokenizer, output_mode) logger.info("***** Running training *****") logger.info(" Num examples = %d", len(train_examples)) logger.info(" Batch size = %d", args.train_batch_size) logger.info(" Num steps = %d", num_train_optimization_steps) all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) all_label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.float) train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids) if args.local_rank == -1: train_sampler = RandomSampler(train_data) else: train_sampler = DistributedSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size) model.train() for epoch in range(int(args.num_train_epochs)): logger.info("Training epoch {} ...".format(epoch)) nb_tr_examples, nb_tr_steps = 0, 0 for step, batch in enumerate(train_dataloader): batch = tuple(t.to(device) for t in batch) input_ids, input_mask, segment_ids, label_ids = batch # define a new function to compute loss values for both output_modes logits = model(input_ids, segment_ids, input_mask, labels=None) loss_fct = BCEWithLogitsLoss() loss = loss_fct(logits.view(-1, 1), label_ids.view(-1, 1)) if n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu. if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps if args.fp16: optimizer.backward(loss) else: loss.backward() tr_loss += loss.item() nb_tr_examples += input_ids.size(0) nb_tr_steps += 1 if (step + 1) % args.gradient_accumulation_steps == 0: if args.fp16: # modify learning rate with special warm up BERT uses # if args.fp16 is False, BertAdam is used that handles this automatically lr_this_step = args.learning_rate * warmup_linear.get_lr( global_step, args.warmup_proportion) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step optimizer.step() optimizer.zero_grad() model.zero_grad() global_step += 1 if (step + 1) % args.period == 0: # Save a trained model, configuration and tokenizer model_to_save = model.module if hasattr( model, 'module') else model # If we save using the predefined names, we can load using `from_pretrained` model.eval() torch.set_grad_enabled(False) # turn off gradient tracking precision, recall, F1 = evaluate(args, model, device, processor, label_list, num_labels, tokenizer, output_mode) if F1 > best_F1: output_dir = os.path.join( args.output_dir, 'pre_{}_recall_{}_F1_{}'.format( precision, recall, F1)) if not os.path.exists(output_dir): os.makedirs(output_dir) output_model_file = os.path.join( output_dir, WEIGHTS_NAME, ) output_config_file = os.path.join( output_dir, CONFIG_NAME) torch.save(model_to_save.state_dict(), output_model_file) model_to_save.config.to_json_file(output_config_file) tokenizer.save_vocabulary(output_dir) best_F1 = F1 model.train() # turn on train mode torch.set_grad_enabled(True) # start gradient tracking tr_loss = 0 # do eval before exit if args.do_eval: if not args.do_train: global_step = 0 output_dir = None save_dir = output_dir if output_dir is not None else args.load_dir load_step = args.load_step if args.load_dir is not None: load_step = print( os.path.split(args.load_dir)[1].replace('save_step_', '')) print("load_step = {}".format(load_step)) F1 = evaluate(args, model, device, processor, label_list, num_labels, tokenizer, output_mode) with open("test_result.txt", 'a') as f: print("load step: {} F1: {}".format(str(load_step), str(F1)), file=f)
def main(): args = parser.parse_args() cf = ConfigParser.ConfigParser() try: cf.read(args.conf) except: print("conf file not exists") sys.exit(1) try: seed = cf.get('Training', 'seed') seed = long(seed) except: seed = torch.cuda.initial_seed() cf.set('Training', 'seed', seed) cf.write(open(args.conf, 'w')) USE_CUDA = cf.getboolean("Training", "use_cuda") torch.manual_seed(seed) if USE_CUDA: torch.cuda.manual_seed(seed) logger = init_logger(os.path.join(args.log_dir, 'train_ctc_model.log')) #Define Model rnn_input_size = cf.getint('Model', 'rnn_input_size') rnn_hidden_size = cf.getint('Model', 'rnn_hidden_size') rnn_layers = cf.getint('Model', 'rnn_layers') rnn_type = supported_rnn[cf.get('Model', 'rnn_type')] bidirectional = cf.getboolean('Model', 'bidirectional') batch_norm = cf.getboolean('Model', 'batch_norm') rnn_param = { "rnn_input_size": rnn_input_size, "rnn_hidden_size": rnn_hidden_size, "rnn_layers": rnn_layers, "rnn_type": rnn_type, "bidirectional": bidirectional, "batch_norm": batch_norm } num_class = cf.getint('Model', 'num_class') drop_out = cf.getfloat('Model', 'drop_out') add_cnn = cf.getboolean('Model', 'add_cnn') cnn_param = {} layers = cf.getint('CNN', 'layers') channel = eval(cf.get('CNN', 'channel')) kernel_size = eval(cf.get('CNN', 'kernel_size')) stride = eval(cf.get('CNN', 'stride')) padding = eval(cf.get('CNN', 'padding')) pooling = eval(cf.get('CNN', 'pooling')) batch_norm = cf.getboolean('CNN', 'batch_norm') activation_function = supported_activate[cf.get('CNN', 'activation_function')] cnn_param['batch_norm'] = batch_norm cnn_param['activate_function'] = activation_function cnn_param["layer"] = [] for layer in range(layers): layer_param = [ channel[layer], kernel_size[layer], stride[layer], padding[layer] ] if pooling is not None: layer_param.append(pooling[layer]) else: layer_param.append(None) cnn_param["layer"].append(layer_param) model = CTC_Model(rnn_param=rnn_param, add_cnn=add_cnn, cnn_param=cnn_param, num_class=num_class, drop_out=drop_out) for idx, m in enumerate(model.children()): print(idx, m) logger.info(str(idx) + "->" + str(m)) dataset = cf.get('Data', 'dataset') data_dir = cf.get('Data', 'data_dir') feature_type = cf.get('Data', 'feature_type') out_type = cf.get('Data', 'out_type') n_feats = cf.getint('Data', 'n_feats') mel = cf.getboolean('Data', 'mel') batch_size = cf.getint("Training", 'batch_size') #Data Loader train_dataset = SpeechDataset(data_dir, data_set='train', feature_type=feature_type, out_type=out_type, n_feats=n_feats, mel=mel) dev_dataset = SpeechDataset(data_dir, data_set="dev", feature_type=feature_type, out_type=out_type, n_feats=n_feats, mel=mel) if add_cnn: train_loader = SpeechCNNDataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=False) dev_loader = SpeechCNNDataLoader(dev_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=False) else: train_loader = SpeechDataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=False) dev_loader = SpeechDataLoader(dev_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=False) #decoder for dev set decoder = GreedyDecoder(dev_dataset.int2class, space_idx=-1, blank_index=0) #Training init_lr = cf.getfloat('Training', 'init_lr') num_epoches = cf.getint('Training', 'num_epoches') end_adjust_acc = cf.getfloat('Training', 'end_adjust_acc') decay = cf.getfloat("Training", 'lr_decay') weight_decay = cf.getfloat("Training", 'weight_decay') params = { 'num_epoches': num_epoches, 'end_adjust_acc': end_adjust_acc, 'mel': mel, 'seed': seed, 'decay': decay, 'learning_rate': init_lr, 'weight_decay': weight_decay, 'batch_size': batch_size, 'feature_type': feature_type, 'n_feats': n_feats, 'out_type': out_type } print(params) if USE_CUDA: model = model.cuda() loss_fn = CTCLoss() optimizer = torch.optim.Adam(model.parameters(), lr=init_lr, weight_decay=weight_decay) #visualization for training from visdom import Visdom viz = Visdom() if add_cnn: title = dataset + ' ' + feature_type + str(n_feats) + ' CNN_LSTM_CTC' else: title = dataset + ' ' + feature_type + str(n_feats) + ' LSTM_CTC' opts = [ dict(title=title + " Loss", ylabel='Loss', xlabel='Epoch'), dict(title=title + " Loss on Dev", ylabel='DEV Loss', xlabel='Epoch'), dict(title=title + ' CER on DEV', ylabel='DEV CER', xlabel='Epoch') ] viz_window = [None, None, None] count = 0 learning_rate = init_lr loss_best = 1000 loss_best_true = 1000 adjust_rate_flag = False stop_train = False adjust_time = 0 acc_best = 0 start_time = time.time() loss_results = [] dev_loss_results = [] dev_cer_results = [] while not stop_train: if count >= num_epoches: break count += 1 if adjust_rate_flag: learning_rate *= decay adjust_rate_flag = False for param in optimizer.param_groups: param['lr'] *= decay print("Start training epoch: %d, learning_rate: %.5f" % (count, learning_rate)) logger.info("Start training epoch: %d, learning_rate: %.5f" % (count, learning_rate)) loss = train(model, train_loader, loss_fn, optimizer, logger, add_cnn=add_cnn, print_every=20, USE_CUDA=USE_CUDA) loss_results.append(loss) acc, dev_loss = dev(model, dev_loader, loss_fn, decoder, logger, add_cnn=add_cnn, USE_CUDA=USE_CUDA) print("loss on dev set is %.4f" % dev_loss) logger.info("loss on dev set is %.4f" % dev_loss) dev_loss_results.append(dev_loss) dev_cer_results.append(acc) #adjust learning rate by dev_loss if dev_loss < (loss_best - end_adjust_acc): loss_best = dev_loss loss_best_true = dev_loss #acc_best = acc adjust_rate_count = 0 model_state = copy.deepcopy(model.state_dict()) op_state = copy.deepcopy(optimizer.state_dict()) elif (dev_loss < loss_best + end_adjust_acc): adjust_rate_count += 1 if dev_loss < loss_best and dev_loss < loss_best_true: loss_best_true = dev_loss #acc_best = acc model_state = copy.deepcopy(model.state_dict()) op_state = copy.deepcopy(optimizer.state_dict()) else: adjust_rate_count = 10 if acc > acc_best: acc_best = acc best_model_state = copy.deepcopy(model.state_dict()) best_op_state = copy.deepcopy(optimizer.state_dict()) print("adjust_rate_count:" + str(adjust_rate_count)) print('adjust_time:' + str(adjust_time)) logger.info("adjust_rate_count:" + str(adjust_rate_count)) logger.info('adjust_time:' + str(adjust_time)) if adjust_rate_count == 10: adjust_rate_flag = True adjust_time += 1 adjust_rate_count = 0 if loss_best > loss_best_true: loss_best = loss_best_true model.load_state_dict(model_state) optimizer.load_state_dict(op_state) if adjust_time == 8: stop_train = True time_used = (time.time() - start_time) / 60 print("epoch %d done, cv acc is: %.4f, time_used: %.4f minutes" % (count, acc, time_used)) logger.info("epoch %d done, cv acc is: %.4f, time_used: %.4f minutes" % (count, acc, time_used)) x_axis = range(count) y_axis = [ loss_results[0:count], dev_loss_results[0:count], dev_cer_results[0:count] ] for x in range(len(viz_window)): if viz_window[x] is None: viz_window[x] = viz.line( X=np.array(x_axis), Y=np.array(y_axis[x]), opts=opts[x], ) else: viz.line( X=np.array(x_axis), Y=np.array(y_axis[x]), win=viz_window[x], update='replace', ) print("End training, best dev loss is: %.4f, acc is: %.4f" % (loss_best, acc_best)) logger.info("End training, best dev loss acc is: %.4f, acc is: %.4f" % (loss_best, acc_best)) model.load_state_dict(best_model_state) optimizer.load_state_dict(best_op_state) best_path = os.path.join(args.log_dir, 'best_model' + '_dev' + str(acc_best) + '.pkl') cf.set('Model', 'model_file', best_path) cf.write(open(args.conf, 'w')) params['epoch'] = count torch.save( CTC_Model.save_package(model, optimizer=optimizer, epoch=params, loss_results=loss_results, dev_loss_results=dev_loss_results, dev_cer_results=dev_cer_results), best_path)
# # First, the needed imports. import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader from torchvision import datasets, transforms from distutils.version import LooseVersion as LV import os from datetime import datetime from transformers import ViTFeatureExtractor, ViTForImageClassification from transformers import __version__ as transformers_version torch.manual_seed(42) import numpy as np if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') print('Using PyTorch version:', torch.__version__, 'Transformers version:', transformers_version, 'Device:', device) assert(LV(torch.__version__) >= LV("1.0.0")) # TensorBoard is a tool for visualizing progress during training. Although # TensorBoard was created for TensorFlow, it can also be used with PyTorch. It
def _test_integration_multiclass(device, output_dict): rank = idist.get_rank() torch.manual_seed(12) def _test(metric_device, n_classes, labels=None): classification_report = ClassificationReport(device=metric_device, output_dict=output_dict, labels=labels) n_iters = 80 s = 16 offset = n_iters * s y_true = torch.randint(0, n_classes, size=(offset * idist.get_world_size(), )).to(device) y_preds = torch.rand(offset * idist.get_world_size(), n_classes).to(device) def update(engine, i): return ( y_preds[i * s + rank * offset:(i + 1) * s + rank * offset, :], y_true[i * s + rank * offset:(i + 1) * s + rank * offset], ) engine = Engine(update) classification_report.attach(engine, "cr") data = list(range(n_iters)) engine.run(data=data) assert "cr" in engine.state.metrics res = engine.state.metrics["cr"] res2 = classification_report.compute() assert res == res2 assert isinstance(res, dict if output_dict else str) if not output_dict: res = json.loads(res) from sklearn.metrics import classification_report as sklearn_classification_report sklearn_result = sklearn_classification_report( y_true.cpu().numpy(), torch.argmax(y_preds, dim=1).cpu().numpy(), output_dict=True) for i in range(n_classes): label_i = labels[i] if labels else str(i) assert pytest.approx(res[label_i]["precision"] == sklearn_result[ str(i)]["precision"]) assert pytest.approx( res[label_i]["f1-score"] == sklearn_result[str(i)]["f1-score"]) assert pytest.approx( res[label_i]["recall"] == sklearn_result[str(i)]["recall"]) assert pytest.approx(res["macro avg"]["precision"] == sklearn_result["macro avg"]["precision"]) assert pytest.approx(res["macro avg"]["recall"] == sklearn_result["macro avg"]["recall"]) assert pytest.approx(res["macro avg"]["f1-score"] == sklearn_result["macro avg"]["f1-score"]) for _ in range(5): # check multiple random inputs as random exact occurencies are rare metric_devices = ["cpu"] if device.type != "xla": metric_devices.append(idist.device()) for metric_device in metric_devices: _test(metric_device, 2, ["label0", "label1"]) _test(metric_device, 2) _test(metric_device, 3, ["label0", "label1", "label2"]) _test(metric_device, 3) _test(metric_device, 4, ["label0", "label1", "label2", "label3"]) _test(metric_device, 4)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--yaml-config', type=str, default='simg_bmi_regression_3.6.6.2_nfs.yaml') parser.add_argument('--run-train', type=str, default='True') parser.add_argument('--run-test', type=str, default='False') parser.add_argument('--run-grad-cam', type=str, default='False') parser.add_argument('--train-fold', type=int, default=0) args = parser.parse_args() SRC_ROOT = os.path.dirname(os.path.realpath(__file__)) + '/..' yaml_config = os.path.join(SRC_ROOT, f'src/yaml/{args.yaml_config}') logger.info(f'Read yaml file {yaml_config}') f = open(yaml_config, 'r').read() config = yaml.safe_load(f) out_folder = config['exp_dir'] learning_rate = config['learning_rate'] batch_size = config['batch_size'] epoch_num = config['epoch_num'] fold_num = config['fold_num'] mkdir_p(out_folder) # load CUDA cuda = torch.cuda.is_available() print(f'cuda: {cuda}') # cuda = False torch.manual_seed(1) # Create data loader train_loader_list, valid_loader_list, test_loader_list = get_data_loader_cv(config) # Create trainer list performance_array = [] for idx_fold in range(fold_num): # If train only one fold if args.train_fold != -1: # Only train on specified fold. if args.train_fold != idx_fold: continue # Create model model = create_model(config) if cuda: torch.cuda.manual_seed(1) model = model.cuda() # load optimizor optim = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999)) # Create trainer fold_out_folder = os.path.join(out_folder, f'fold_{idx_fold}') train_loader = train_loader_list[idx_fold] validate_loader = valid_loader_list[idx_fold] test_loader = test_loader_list[idx_fold] trainer_obj = Trainer( cuda, model, optimizer=optim, train_loader=train_loader, validate_loader=validate_loader, test_loader=test_loader, out=fold_out_folder, max_epoch=epoch_num, batch_size=batch_size, config=config ) # Train trainer_obj.epoch = config['start_epoch'] if args.run_train == 'True': trainer_obj.train_epoch() # Test if args.run_test == 'True': trainer_obj.run_test() performance_array.append(trainer_obj.test_performance) if args.run_grad_cam == 'True': trainer_obj.run_grad_cam() if args.run_test == 'True': mse_array = np.array([statics_dict['loss'] for statics_dict in performance_array]) rmse_array = np.sqrt(mse_array) rmse_mean = np.mean(rmse_array) rmse_std = np.std(rmse_array) perf_str = f'RMSE {rmse_mean:.5f} ({rmse_std:.5f})\n' print(f'Performance of cross-validation:') print(perf_str) perf_file = os.path.join(out_folder, 'perf') with open(perf_file, 'w') as fv: fv.write(perf_str) fv.close()
def main(): """ Main function. """ # Training settings parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--img_size', type=int, default=28, help="size of image (default: 28)") parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=14, metavar='N', help='number of epochs to train (default: 14)') parser.add_argument('--lr', type=float, default=1.0, metavar='LR', help='learning rate (default: 1.0)') parser.add_argument('--gamma', type=float, default=0.7, metavar='M', help='Learning rate step gamma (default: 0.7)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--dry-run', action='store_true', default=False, help='quickly check a single pass') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') parser.add_argument('--save_dir', default="experiments") parser.add_argument('--log_file', default="log.o") parser.add_argument('--ckpt_path') # for compatibility args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'batch_size': args.batch_size} if use_cuda: kwargs.update({ 'num_workers': 1, 'pin_memory': True, 'shuffle': True }, ) transform = transforms.Compose([ transforms.Resize(args.img_size), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) dataset1 = datasets.MNIST('./data', train=True, download=True, transform=transform) dataset2 = datasets.MNIST('./data', train=False, transform=transform) train_loader = torch.utils.data.DataLoader(dataset1, **kwargs) test_loader = torch.utils.data.DataLoader(dataset2, **kwargs) model = Net(args.img_size).to(device) optimizer = optim.Adadelta(model.parameters(), lr=args.lr) scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) for epoch in range(1, args.epochs + 1): train(args, model, device, train_loader, optimizer, epoch) test(args, model, device, test_loader) scheduler.step() if args.save_model: torch.save( model.state_dict(), os.path.join(args.save_dir, "mnist_cnn_%d.pt" % args.img_size))
def _test_integration_multilabel(device, output_dict): rank = idist.get_rank() torch.manual_seed(12) def _test(metric_device, n_epochs, labels=None): classification_report = ClassificationReport(device=metric_device, output_dict=output_dict, is_multilabel=True) n_iters = 10 s = 16 n_classes = 7 offset = n_iters * s y_true = torch.randint(0, 2, size=(offset * idist.get_world_size(), n_classes, 6, 8)).to(device) y_preds = torch.randint(0, 2, size=(offset * idist.get_world_size(), n_classes, 6, 8)).to(device) def update(engine, i): return ( y_preds[i * s + rank * offset:(i + 1) * s + rank * offset, ...], y_true[i * s + rank * offset:(i + 1) * s + rank * offset, ...], ) engine = Engine(update) classification_report.attach(engine, "cr") data = list(range(n_iters)) engine.run(data=data, max_epochs=n_epochs) assert "cr" in engine.state.metrics res = engine.state.metrics["cr"] res2 = classification_report.compute() assert res == res2 assert isinstance(res, dict if output_dict else str) if not output_dict: res = json.loads(res) np_y_preds = to_numpy_multilabel(y_preds) np_y_true = to_numpy_multilabel(y_true) from sklearn.metrics import classification_report as sklearn_classification_report sklearn_result = sklearn_classification_report(np_y_true, np_y_preds, output_dict=True) for i in range(n_classes): label_i = labels[i] if labels else str(i) assert pytest.approx(res[label_i]["precision"] == sklearn_result[ str(i)]["precision"]) assert pytest.approx( res[label_i]["f1-score"] == sklearn_result[str(i)]["f1-score"]) assert pytest.approx( res[label_i]["recall"] == sklearn_result[str(i)]["recall"]) assert pytest.approx(res["macro avg"]["precision"] == sklearn_result["macro avg"]["precision"]) assert pytest.approx(res["macro avg"]["recall"] == sklearn_result["macro avg"]["recall"]) assert pytest.approx(res["macro avg"]["f1-score"] == sklearn_result["macro avg"]["f1-score"]) for _ in range(3): # check multiple random inputs as random exact occurencies are rare metric_devices = ["cpu"] if device.type != "xla": metric_devices.append(idist.device()) for metric_device in metric_devices: _test(metric_device, 1) _test(metric_device, 2) _test(metric_device, 1, ["0", "1", "2", "3", "4", "5", "6"]) _test(metric_device, 2, ["0", "1", "2", "3", "4", "5", "6"])
from Agents.DQN.DQNAsynER import DQNAsynERMaster, SharedAdam from Agents.Core.MLPNet import MultiLayerNetRegression import json import gym from torch import optim from copy import deepcopy import torch from Env.CustomEnv.MountainCarEnv import MountainCarEnvCustom torch.manual_seed(1) # first construct the neutral network config = dict() config['trainStep'] = 2000 config['epsThreshold'] = 0.3 config['epsilon_start'] = 0.3 config['epsilon_final'] = 0.05 config['epsilon_decay'] = 200 config['targetNetUpdateStep'] = 100 config['memoryCapacity'] = 200000 config['trainBatchSize'] = 32 config['gamma'] = 0.9 config['learningRate'] = 0.0001 config['netGradClip'] = 1 config['logFlag'] = False config['logFileName'] = '' config['logFrequency'] = 50 config['netUpdateOption'] = 'doubleQ' config['nStepForward'] = 3
# @File : train_ALL_LSTM.py # @Last Modify Time : 2018/07/19 22:35 # @Contact : bamtercelboo@{gmail.com, 163.com} import os import sys import torch import torch.autograd as autograd import torch.nn.functional as F import torch.nn.utils as utils import torch.optim.lr_scheduler as lr_scheduler import shutil import random import numpy as np seed_num = 233 torch.manual_seed(seed_num) random.seed(seed_num) def train(train_iter, dev_iter, test_iter, model, args): if args.cuda: model.cuda() if args.Adam is True: print("Adam Training......") optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.init_weight_decay) elif args.SGD is True: print("SGD Training.......") optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.init_weight_decay, momentum=args.momentum_value) elif args.Adadelta is True:
print(x.type()) print(x.item()) print(x.size()) print("\n<<< Vector >>>") x = torch.FloatTensor([23, 24, 24.5, 26, 27.2, 23.0]) print(x.type()) print(x.data) print(x.size()) print("\n<<< Matrix >>>") np.random.seed(0) data = np.random.randn(3, 3) x = torch.from_numpy(data) print(x.type()) print(x.data) print(x.size()) print("\n<<< Some Operations >>>") torch.manual_seed(0) a = torch.rand(1, 2) b = torch.rand(1, 2) print("a : ", a.data) print("b : ", b) c = torch.add(a, b) print("a+b : ", c) d = torch.add(a, -b) #subtract가 없음 print("a-b : ", d) e = torch.mul(a, b) print("axb : ", e)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--workers', type=int, help='number of data loading workers', default=1) parser.add_argument('--batchSize', type=int, default=32, help='input batch size') parser.add_argument('--nz', type=int, default=8, help='size of the latent z vector') parser.add_argument('--ngf', type=int, default=64, help='size of the latent z vector') parser.add_argument('--ndf', type=int, default=64, help='size of the latent z vector') parser.add_argument('--niter', type=int, default=25, help='number of epochs to train for') parser.add_argument('--lr', type=float, default=0.0002, help='learning rate, default=0.0002') parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5') parser.add_argument('--outf', default='./out/', help='folder to output images and model checkpoints') parser.add_argument('--manualSeed', type=int, help='manual seed') INFO = 'X' opt = parser.parse_args() print(opt) ngpu = 1 if opt.manualSeed is None: opt.manualSeed = random.randint(1, 10000) print("Random Seed: ", opt.manualSeed) random.seed(opt.manualSeed) torch.manual_seed(opt.manualSeed) if (torch.cuda.is_available() and ngpu > 0): torch.cuda.manual_seed(opt.manualSeed) filename = '_'.join( [INFO, str(opt.manualSeed), str(opt.batchSize), str(opt.niter), str(opt.lr), str(opt.nz), str(opt.ngf), str(opt.ndf)]) try: os.makedirs(filename) except OSError: pass dataset = ds.MimicData() assert dataset dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batchSize, shuffle=True, num_workers=int(opt.workers)) device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu") print(device) nz = int(opt.nz) ngf = opt.ngf ndf = opt.ndf nc = 1 # custom weights initialization called on netG and netD # custom weights initialization called on netG and netD def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: nn.init.normal_(m.weight.data, 0, 0.02) elif classname.find('BatchNorm') != -1: nn.init.normal_(m.weight.data, 1, 0.02) nn.init.constant_(m.bias.data, 0) class Generator(nn.Module): def __init__(self): super(Generator, self).__init__() self.main = nn.Sequential( # input is Z, going into a convolution nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False), nn.BatchNorm2d(ngf * 8), nn.ReLU(True), # state size. (ngf*8) x 4 x 4 nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False), nn.BatchNorm2d(ngf * 4), nn.ReLU(True), # state size. (ngf*4) x 8 x 8 nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False), nn.BatchNorm2d(ngf * 2), nn.ReLU(True), # state size. (ngf*2) x 16 x 16 nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False), nn.BatchNorm2d(ngf), nn.ReLU(True), # state size. (ngf) x 32 x 32 nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False), nn.Tanh() # state size. (nc) x 64 x 64 ) def forward(self, input): output = self.main(input) return output netG = Generator().to(device) # Handle multi-gpu if desired if (device.type == 'cuda') and (ngpu > 1): netG = nn.DataParallel(netG, list(range(ngpu))) netG.apply(weights_init) class Discriminator(nn.Module): def __init__(self): super(Discriminator, self).__init__() self.main = nn.Sequential( # input is (nc) x 64 x 64 nn.Conv2d(nc, ndf, 4, 2, 1, bias=False), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf) x 32 x 32 nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False), nn.BatchNorm2d(ndf * 2), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*2) x 16 x 16 nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False), nn.BatchNorm2d(ndf * 4), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*4) x 8 x 8 nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False), nn.BatchNorm2d(ndf * 8), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*8) x 4 x 4 nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False), nn.Sigmoid() ) def forward(self, input): output = self.main(input) return output.view(-1, 1).squeeze(1) netD = Discriminator().to(device) if (device.type == 'cuda') and (ngpu > 1): netD = nn.DataParallel(netD, list(range(ngpu))) netD.apply(weights_init) criterion = nn.BCELoss() fixed_noise = torch.randn(opt.batchSize, nz, 1, 1, device=device) real_label = 1 fake_label = 0 # setup optimizer optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) ldl = [] lgl = [] dxl = [] dgz1l = [] dgz2l = [] for epoch in range(opt.niter): eldl = [] elgl = [] edxl = [] edgz1l = [] edgz2l = [] for i, data in enumerate(dataloader, 0): data = data[2] data.unsqueeze_(1) data.add_(-0.5).mul_(2) # train with real netD.zero_grad() real_cpu = data.to(device) batch_size = real_cpu.size(0) label = torch.full((batch_size,), real_label, device=device) output = netD(real_cpu) errD_real = criterion(output, label) errD_real.backward() D_x = output.mean().item() # train with fake noise = torch.randn(batch_size, nz, 1, 1, device=device) fake = netG(noise) label.fill_(fake_label) output = netD(fake.detach()) errD_fake = criterion(output, label) errD_fake.backward() D_G_z1 = output.mean().item() errD = errD_real + errD_fake optimizerD.step() # Update G network: maximize log(D(G(z))) netG.zero_grad() label.fill_(real_label) # fake labels are real for generator cost output = netD(fake) errG = criterion(output, label) errG.backward() D_G_z2 = output.mean().item() optimizerG.step() eldl.append(errD.item()) elgl.append(errG.item()) edxl.append(D_x) edgz1l.append(D_G_z1) edgz2l.append(D_G_z2) if i % 100 == 0: print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f' % (epoch, opt.niter, i, len(dataloader), errD.item(), errG.item(), D_x, D_G_z1, D_G_z2)) vutils.save_image(real_cpu, '%s/real_samples.png' % filename, normalize=True,pad_value=0.5) fake = netG(fixed_noise) fake = fake.detach().gt(0) vutils.save_image(fake, '%s/fake_samples_epoch_%03d.png' % (filename, epoch), normalize=True,pad_value=0.5) ldl.append(np.mean(eldl)) lgl.append(np.mean(elgl)) dxl.append(np.mean(edxl)) dgz1l.append(np.mean(edgz1l)) dgz2l.append(np.mean(edgz2l)) plot_gan(ldl, lgl, dxl, dgz1l, dgz2l, filename)