def main(): HERE = os.path.dirname(__file__) # Import MNIST data sys.path.append( os.path.realpath(os.path.join(HERE, '..', '..', 'vision', 'mnist'))) from mnist_data import data_iterator_mnist from args import get_args from classification import mnist_lenet_prediction, mnist_resnet_prediction args = get_args(description=__doc__) mnist_cnn_prediction = mnist_lenet_prediction if args.net == 'resnet': mnist_cnn_prediction = mnist_resnet_prediction # Infer parameter file name and read it. model_save_path = os.path.join('../../vision/mnist', args.model_save_path) parameter_file = os.path.join( model_save_path, '{}_params_{:06}.h5'.format(args.net, args.max_iter)) try: nn.load_parameters(parameter_file) except IOError: logger.error("Run classification.py before runnning this script.") exit(1) # Create a computation graph to be saved. image = nn.Variable([args.batch_size, 1, 28, 28]) pred = mnist_cnn_prediction(image, test=True) # Save NNP file (used in C++ inference later.). nnp_file = '{}_{:06}.nnp'.format(args.net, args.max_iter) runtime_contents = { 'networks': [ {'name': 'runtime', 'batch_size': args.batch_size, 'outputs': {'y': pred}, 'names': {'x': image}}], 'executors': [ {'name': 'runtime', 'network': 'runtime', 'data': ['x'], 'output': ['y']}]} nn.utils.save.save(nnp_file, runtime_contents)
def train(): """ Naive Multi-Device Training NOTE: the communicator exposes low-level interfaces * Parse command line arguments. * Instantiate a communicator and set parameter variables. * Specify contexts for computation. * Initialize DataIterator. * Construct a computation graph for training and one for validation. * Initialize solver and set parameter variables to that. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Execute forwardprop * Set parameter gradients zero * Execute backprop. * Inplace allreduce (THIS IS THE MAIN difference from a single device training) * Solver updates parameters by using gradients computed by backprop. * Compute training error """ # Parse args args = get_args() n_train_samples = 50000 bs_valid = args.batch_size # Communicator and Context extension_module = "cuda.cudnn" ctx = extension_context(extension_module) comm = C.MultiProcessDataParalellCommunicator(ctx) comm.init() n_devices = comm.size mpi_rank = comm.rank device_id = mpi_rank ctx = extension_context(extension_module, device_id=device_id) # Create training graphs test = False image_train = nn.Variable((args.batch_size, 3, 32, 32)) label_train = nn.Variable((args.batch_size, 1)) pred_train = cifar100_resnet23_prediction( image_train, ctx, test) loss_train = cifar100_resnet32_loss(pred_train, label_train) input_image_train = {"image": image_train, "label": label_train} # add parameters to communicator comm.add_context_and_parameters((ctx, nn.get_parameters())) # Create validation graph test = True image_valid = nn.Variable((bs_valid, 3, 32, 32)) pred_valid = cifar100_resnet23_prediction( image_valid, ctx, test) input_image_valid = {"image": image_valid} # Solvers solver = S.Adam() solver.set_parameters(nn.get_parameters()) base_lr = args.learning_rate warmup_iter = int(1. * n_train_samples / args.batch_size / n_devices) * args.warmup_epoch warmup_slope = 1. * n_devices / warmup_iter # Create monitor from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=10) with data_iterator_cifar100(args.batch_size, True) as tdata, \ data_iterator_cifar100(bs_valid, False) as vdata: # Training-loop for i in range(int(args.max_iter / n_devices)): # Validation if mpi_rank == 0: if i % int(n_train_samples / args.batch_size / n_devices) == 0: ve = 0. for j in range(args.val_iter): image, label = vdata.next() input_image_valid["image"].d = image pred_valid.forward() ve += categorical_error(pred_valid.d, label) ve /= args.val_iter monitor_verr.add(i * n_devices, ve) if i % int(args.model_save_interval / n_devices) == 0: nn.save_parameters(os.path.join( args.model_save_path, 'params_%06d.h5' % i)) # Forward/Zerograd/Backward image, label = tdata.next() input_image_train["image"].d = image input_image_train["label"].d = label loss_train.forward() solver.zero_grad() loss_train.backward() # In-place Allreduce comm.allreduce(division=True) # Solvers update solver.update() # Linear Warmup if i < warmup_iter: lr = base_lr * n_devices * warmup_slope * i solver.set_learning_rate(lr) else: lr = base_lr * n_devices solver.set_learning_rate(lr) if mpi_rank == 0: e = categorical_error( pred_train.d, input_image_train["label"].d) monitor_loss.add(i * n_devices, loss_train.d.copy()) monitor_err.add(i * n_devices, e) monitor_time.add(i * n_devices) if mpi_rank == 0: nn.save_parameters(os.path.join( args.model_save_path, 'params_%06d.h5' % (args.max_iter / n_devices)))
def main(): args = get_args() dir_name = "results/%s/%s-%s"%(args.env, "3setup", strftime("%m_%d_%H_%M", gmtime())) os.makedirs(dir_name, exist_ok=True) logfile = open(dir_name+"/log.txt", "w") with open(os.path.join(dir_name,'args.txt'), 'w') as f: json.dump(args.__dict__, f, indent=2) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) env = gym.make(args.env) #env = outer_env.wrapped_env num_hidden = args.hidden_size VARIANCE = args.var iter_steps = args.iter_steps # has to be neural network policy device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") N_SAMPLES = args.n_samples if args.n_samples>0 else int(env.observation_space.shape[0]*4) LOW_REW_SET = int(N_SAMPLES*0.2) TOP_N_CONSTRIANTS = int(N_SAMPLES*1.5) def make_policy(mean, var): if mean is not None: mean = torch.Tensor(mean).to(device) if var is not None: var = torch.Tensor(var).to(device) return Policy_quad_norm(env.observation_space.shape[0], env.action_space.shape[0], num_hidden=num_hidden, mean=mean, var=var).to(device) print('Using device:', device) sample_policy, sample_eval = make_policy(None, None), -1700 replay_buffer = Replay_buffer(args.gamma) dynamics = DynamicsEnsemble(args.env, num_models=3) ep_no_improvement = 0 for i_episode in count(1): # hack if ep_no_improvement > 3: N_SAMPLES = int(N_SAMPLES * 1.2) TOP_N_CONSTRIANTS = int(N_SAMPLES*1.5) #-1 LOW_REW_SET = int(LOW_REW_SET*1.2) iter_steps = TOP_N_CONSTRIANTS*2 if VARIANCE>1e-4: VARIANCE = VARIANCE/1.2 print("Updated Var to: %.3f"%(VARIANCE)) ep_no_improvement = 0 print("constraints: {}, to correct: {}".format(N_SAMPLES, TOP_N_CONSTRIANTS)) # Exploration num_steps = 0 explore_episodes = 0 explore_rew =0 state_action_rew = [] lowest_rew = [] while num_steps < iter_steps: state = env.reset() for t in range(1000): action = sample_policy.select_action(state, VARIANCE) action = action.flatten() name_str = "expl_var" #explore next_state, reward, done, _ = env.step(action) explore_rew += reward replay_buffer.push((state,next_state,action, reward, done, (name_str, explore_episodes, t))) if args.correct and i_episode>0: if (args.env == "Hopper-v2" or args.env == "Walker2d-v2") and done: reward = float('-inf') if len(state_action_rew) < LOW_REW_SET:# or (args.env == "Hopper-v2" or args.env == "Walker2d-v2" and done): state_action_rew.append([state,action,reward]) lowest_rew.append(reward) elif reward < max(lowest_rew): state_action_rew = sorted(state_action_rew, key=lambda l: l[2]) #sort by reward state_action_rew[-1] = [state,action,reward] lowest_rew.remove(max(lowest_rew)) lowest_rew.append(reward) if done: break state = next_state num_steps += (t-1) explore_episodes += 1 explore_rew /= explore_episodes print('\nEpisode {}\tExplore reward: {:.2f}\tAverage ep len: {:.1f}\n'.format(i_episode, explore_rew, num_steps/explore_episodes)) # do corrections. low_rew_constraints_set = [] if args.correct and i_episode>1: print("exploring better actions", len(state_action_rew)) #sample possible corrections for s, a, r in state_action_rew: max_a, _ = run_cem(dynamics, s) low_rew_constraints_set.append((s, max_a, "bad_states", 0, 0)) # Train Dynamics X, Y, A, _, _, _ = replay_buffer.sample(-1) if i_episode!=1: print("Previous model evaluation:", dynamics.get_accuracy(X,Y,A)) if len(X) <1500: X = np.concatenate([X, prev_X]) X = X if len(X)<1500 else X[:1500] Y = np.concatenate([Y, prev_Y]) Y = Y if len(Y)<1500 else Y[:1500] A = np.concatenate([A, prev_A]) A = A if len(A)<1500 else A[:1500] dynamics.fit(X, Y, A, epoch=args.model_training_epoch) prev_X, prev_Y, prev_A = X, Y, A best_tuples = replay_buffer.best_state_actions_replace(top_n_constraints=TOP_N_CONSTRIANTS, by='rewards', discard = True) mean, var = replay_buffer.get_mean_var() # support num_support = int(N_SAMPLES*0.7) support_states = np.random.uniform(low=-5, high=5, size=[num_support, env.observation_space.shape[0]]) confidence = sorted([(x, dynamics.get_uncertainty(x, sample_policy.select_action(x, 0)[0])) for x in support_states], key = lambda t: t[1]) support_tuples = [] print("confidence here:") print(confidence[:5]) sliice = int(len(confidence)/2) for s, conf in confidence: if conf < 10: #arbitrary bound. for later max_a, _ = run_cem(dynamics, s) support_tuples.append((s, max_a, "model", 0, 0)) else: a = sample_policy.select_action(s, 0)[0].tolist() support_tuples.append((s, a, "support", 0, 0)) # sample and solve max_policy, max_eval, max_set = sample_policy, sample_eval, best_tuples branch_buffer = Replay_buffer(args.gamma) print(TOP_N_CONSTRIANTS) print(len(best_tuples)) print(len(low_rew_constraints_set)) for branch in range(args.branches): branch_policy = make_policy(None, None) branch_buffer = Replay_buffer(args.gamma) if N_SAMPLES >= len(best_tuples): constraints = best_tuples else: constraints = random.sample(best_tuples+support_tuples, N_SAMPLES) # Get metadata of constraints states, actions, info, rewards, _ = zip(*constraints) print("ep %d b %d: %d constraints mean: %.3f std: %.3f max: %.3f" % ( i_episode, branch, len(constraints), np.mean(rewards), np.std(rewards), max(rewards))) print(info) if isinstance(states[0], torch.Tensor): states = torch.cat(states) else: states = torch.Tensor(states) if isinstance(actions[0], torch.Tensor): actions = torch.cat(actions) else: actions = torch.Tensor(actions) branch_policy.train(states.to(device), actions.to(device), epoch=args.training_epoch) # Evaluate eval_rew = 0 for i in range(EVAL_TRAJ): state, done = env.reset(), False step = 0 while not done: # Don't infinite loop while learning action = branch_policy.select_action(state,0) action = action.flatten() next_state, reward, done, _ = env.step(action) eval_rew += reward branch_buffer.push((state, next_state, action, reward, done, ("eval", i, step))) state = next_state step += 1 if args.render: env.render() if done: break eval_rew /= EVAL_TRAJ #log print('Episode {}\tBranch: {}\tEval reward: {:.2f}\tExplore reward: {:.2f}'.format( i_episode, branch, eval_rew, explore_rew)) logfile.write('Episode {}\tBranch: {}\tConstraints:{}\tEval reward: {:.2f}\n'.format(i_episode, branch, len(constraints), eval_rew)) if eval_rew > max_eval: print("updated to this policy") max_eval, max_policy, max_set = eval_rew, branch_policy, constraints replay_buffer = branch_buffer # the end of branching if max_eval > sample_eval: with open("%s/%d_constraints.p"%(dir_name,i_episode), "wb") as f: pickle.dump({"all": best_tuples, "constraints": max_set}, f) with open("%s/%d_policy.p"%(dir_name,i_episode), 'wb') as out: policy_state_dict = OrderedDict({k:v.to('cpu') for k, v in max_policy.state_dict().items()}) pickle.dump(policy_state_dict, out) sample_policy, sample_eval = max_policy, max_eval ep_no_improvement = 0 else: ep_no_improvement +=1 if i_episode>50: break
def main(): args = get_args() save_args(args) train(args)
def train(): """ Main script. Steps: * Parse command line arguments. * Specify contexts for computation. * Initialize DataIterator. * Construct a computation graph for training and one for validation. * Initialize solver and set parameter variables to that. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Execute forwardprop * Set parameter gradients zero * Execute backprop. * Solver updates parameters by using gradients computed by backprop. * Compute training error """ # Parse args args = get_args() n_train_samples = 50000 bs_valid = args.batch_size extension_module = args.context ctx = get_extension_context(extension_module, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) if args.net == "cifar10_resnet23": prediction = functools.partial(resnet23_prediction, ncls=10, nmaps=64, act=F.relu) data_iterator = data_iterator_cifar10 if args.net == "cifar100_resnet23": prediction = functools.partial(resnet23_prediction, ncls=100, nmaps=384, act=F.elu) data_iterator = data_iterator_cifar100 # Create training graphs test = False image_train = nn.Variable((args.batch_size, 3, 32, 32)) label_train = nn.Variable((args.batch_size, 1)) pred_train = prediction(image_train, test) loss_train = loss_function(pred_train, label_train) input_image_train = {"image": image_train, "label": label_train} # Create validation graph test = True image_valid = nn.Variable((bs_valid, 3, 32, 32)) pred_valid = prediction(image_valid, test) input_image_valid = {"image": image_valid} # Solvers solver = S.Adam() solver.set_parameters(nn.get_parameters()) # Create monitor from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=10) monitor_verr = MonitorSeries("Test error", monitor, interval=1) # Data Iterator tdata = data_iterator(args.batch_size, True) vdata = data_iterator(args.batch_size, False) # Training-loop for i in range(args.max_iter): # Validation if i % int(n_train_samples / args.batch_size) == 0: ve = 0. for j in range(args.val_iter): image, label = vdata.next() input_image_valid["image"].d = image pred_valid.forward() ve += categorical_error(pred_valid.d, label) ve /= args.val_iter monitor_verr.add(i, ve) if int(i % args.model_save_interval) == 0: nn.save_parameters( os.path.join(args.model_save_path, 'params_%06d.h5' % i)) # Forward/Zerograd/Backward image, label = tdata.next() input_image_train["image"].d = image input_image_train["label"].d = label loss_train.forward() solver.zero_grad() loss_train.backward() # Solvers update solver.update() e = categorical_error(pred_train.d, input_image_train["label"].d) monitor_loss.add(i, loss_train.d.copy()) monitor_err.add(i, e) monitor_time.add(i) nn.save_parameters( os.path.join(args.model_save_path, 'params_%06d.h5' % (args.max_iter)))
from pathlib import Path import torch import numpy as np from allennlp.models import Model from allennlp.data.vocabulary import Vocabulary from allennlp.common import JsonDict from pytorch_transformers.optimization import AdamW import args import readers import common from predictor import McScriptPredictor from util import example_input, is_cuda, train_model, load_data, print_args ARGS = args.get_args() common.set_args(ARGS) def make_prediction(model: Model, reader: readers.BaseReader, verbose: bool = False) -> JsonDict: "Create a predictor to run our model and get predictions." model.eval() predictor = McScriptPredictor(model, reader) if verbose: print() print('#' * 5, 'EXAMPLE', '#' * 5) passage, question, answer1, label1 = example_input(0)
def train(): """ Main script. Steps: * Parse command line arguments. * Specify a context for computation. * Initialize DataIterator for MNIST. * Construct a computation graph for training and validation. * Initialize a solver and set parameter variables to it. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Execute forwardprop on the training graph. * Compute training error * Set parameter gradients zero * Execute backprop. * Solver updates parameters by using gradients computed by backprop. """ args = get_args() from numpy.random import seed seed(0) # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context( args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Create CNN network for both training and testing. if args.net == 'lenet': mnist_cnn_prediction = mnist_lenet_prediction elif args.net == 'resnet': mnist_cnn_prediction = mnist_resnet_prediction else: raise ValueError("Unknown network type {}".format(args.net)) # TRAIN # Create input variables. image = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size, 1]) # Create prediction graph. pred = mnist_cnn_prediction(image, test=False, aug=args.augment_train) pred.persistent = True # Create loss function. loss = F.mean(F.softmax_cross_entropy(pred, label)) # TEST # Create input variables. vimage = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size, 1]) # Create prediction graph. vpred = mnist_cnn_prediction(vimage, test=True, aug=args.augment_test) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=10) # Initialize DataIterator for MNIST. from numpy.random import RandomState data = data_iterator_mnist(args.batch_size, True, rng=RandomState(1223)) vdata = data_iterator_mnist(args.batch_size, False) # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) vpred.data.cast(np.float32, ctx) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) if i % args.model_save_interval == 0: nn.save_parameters(os.path.join( args.model_save_path, 'params_%06d.h5' % i)) # Training forward image.d, label.d = data.next() solver.zero_grad() loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() loss.data.cast(np.float32, ctx) pred.data.cast(np.float32, ctx) e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) parameter_file = os.path.join( args.model_save_path, '{}_params_{:06}.h5'.format(args.net, args.max_iter)) nn.save_parameters(parameter_file) runtime_contents = { 'networks': [ {'name': 'Validation', 'batch_size': args.batch_size, 'outputs': {'y': vpred}, 'names': {'x': vimage}}], 'executors': [ {'name': 'Runtime', 'network': 'Validation', 'data': ['x'], 'output': ['y']}]} save.save(os.path.join(args.model_save_path, '{}_result.nnp'.format(args.net)), runtime_contents)
def main(): args = get_args() save_args(args, "match") match(args)
def train(): args = get_args() # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Create CNN network for both training and testing. mnist_cnn_prediction = mnist_lenet_prediction # TRAIN reference = "reference" # Create input variables. image = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size, 1]) # Create `reference` prediction graph. pred = mnist_cnn_prediction(image, scope=reference, test=False) pred.persistent = True # Create loss function. loss = F.mean(F.softmax_cross_entropy(pred, label)) # TEST # Create input variables. vimage = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size, 1]) # Create reference prediction graph. vpred = mnist_cnn_prediction(vimage, scope=reference, test=True) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=10) # Initialize DataIterator for MNIST. data = data_iterator_mnist(args.batch_size, True) vdata = data_iterator_mnist(args.batch_size, False) best_ve = 1.0 ve = 1.0 # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) ve /= args.val_iter monitor_verr.add(i, ve) if ve < best_ve: nn.save_parameters( os.path.join(args.model_save_path, 'params_%06d.h5' % i)) best_ve = ve # Training forward image.d, label.d = data.next() solver.zero_grad() loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) parameter_file = os.path.join(args.model_save_path, 'params_{:06}.h5'.format(args.max_iter)) nn.save_parameters(parameter_file)
def infer(): """ Main script. Steps: * Parse command line arguments. * Specify a context for computation. * Initialize DataIterator for MNIST. * Construct a computation graph for inference. * Load parameter variables to infer. * Create monitor instances for saving and displaying infering stats. """ args = get_args() from numpy.random import seed seed(0) # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Create CNN network for both training and testing. if args.net == 'lenet': mnist_cnn_prediction = mnist_lenet_prediction elif args.net == 'resnet': mnist_cnn_prediction = mnist_resnet_prediction else: raise ValueError("Unknown network type {}".format(args.net)) # TEST # Create input variables. vimage = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size, 1]) # Create prediction graph. vpred = mnist_cnn_prediction(vimage, test=True, aug=args.augment_test) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=10) # Initialize DataIterator for MNIST. from numpy.random import RandomState data = data_iterator_mnist(args.batch_size, True, rng=RandomState(1223)) vdata = data_iterator_mnist(1, False) from nnabla.utils.nnp_graph import NnpLoader # Read a .nnp file. nnp = NnpLoader(args.pretrained) # Assume a graph `graph_a` is in the nnp file. net = nnp.get_network(nnp.get_network_names()[0], batch_size=1) # `x` is an input of the graph. x = net.inputs['x'] # 'y' is an outputs of the graph. y = net.outputs['y'] ve = 0.0 for j in range(10000): x.d, vlabel.d = vdata.next() y.forward(clear_buffer=True) ve += categorical_error(y.d, vlabel.d) #monitor_verr.add(1, ve / args.val_iter) print("acc=", 1 - ve / 10000, ".") # append F.Softmax to the prediction graph so users see intuitive outputs runtime_contents = { 'networks': [{ 'name': 'Validation', 'batch_size': args.batch_size, 'outputs': { 'y': F.softmax(vpred) }, 'names': { 'x': vimage } }], 'executors': [{ 'name': 'Runtime', 'network': 'Validation', 'data': ['x'], 'output': ['y'] }] }
"test_beg": opt['test_beg'], } data_setting = { 'path': opt['base_img_path'], 'protected_attribute': opt['protected_attribute'], 'attribute': opt['attribute'], 'data_params': data_params, 'batch_size': opt['batch_size'] } opt['data_setting'] = data_setting return opt if __name__ == "__main__": opt = args.get_args() opt = get_data_settings(opt) attr_list = utils.get_all_attr() ctx = get_extension_context(opt['context'], device_id=opt['device_id'], type_config=opt['type_config']) nn.set_default_context(ctx) batch_size = opt['data_setting']['batch_size'] test = dl.actual_celeba_dataset(opt['data_setting'], batch_size, augment=False, split='test', shuffle=False) AC = clf.attribute_classifier( model_load_path="{}/{}/best/best_acc.h5".format( opt['model_save_path'], attr_list[opt['attribute']]))
data = data_iterator_mnist(batch_size, train=False, shuffle=True, rng=rng) for i in range(10000 / batch_size): image_data, label_data = data.next() image.d = image_data / 255. feature.forward(clear_buffer=True) features.append(feature.d.copy()) labels.append(label_data.copy()) features = np.vstack(features) labels = np.vstack(labels) # Visualize f = plt.figure(figsize=(16, 9)) for i in range(10): c = plt.cm.Set1(i / 10.) plt.plot(features[labels.flat == i, 0].flatten(), features[labels.flat == i, 1].flatten(), '.', c=c) plt.legend(map(str, range(10))) plt.grid() plt.savefig(os.path.join(args.monitor_path, "embed.png")) if __name__ == '__main__': monitor_path = 'tmp.monitor.siamese' args = get_args(monitor_path=monitor_path, model_save_path=monitor_path, max_iter=5000) train(args) visualize(args)
def main(): args = get_args() state_size = args.state_size batch_size = args.batch_size num_steps = args.num_steps num_layers = args.num_layers max_epoch = args.max_epoch max_norm = args.gradient_clipping_max_norm num_words = 10000 lr = args.learning_rate train_data, val_data, test_data = get_data() # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) from nnabla.monitor import Monitor, MonitorSeries monitor = Monitor(args.work_dir) monitor_perplexity = MonitorSeries("Training perplexity", monitor, interval=10) monitor_vperplexity = MonitorSeries("Validation perplexity", monitor, interval=(len(val_data) // (num_steps * batch_size))) monitor_tperplexity = MonitorSeries("Test perplexity", monitor, interval=(len(test_data) // (num_steps * 1))) l1 = LSTMWrapper(batch_size, state_size) l2 = LSTMWrapper(batch_size, state_size) # train graph x = nn.Variable((batch_size, num_steps)) t = nn.Variable((batch_size, num_steps)) w = I.UniformInitializer((-0.1, 0.1)) b = I.ConstantInitializer(1) loss = get_loss(l1, l2, x, t, w, b, num_words, batch_size, state_size, True) l1.share_data() l2.share_data() # validation graph vx = nn.Variable((batch_size, num_steps)) vt = nn.Variable((batch_size, num_steps)) vloss = get_loss(l1, l2, vx, vt, w, b, num_words, batch_size, state_size) solver = S.Sgd(lr) solver.set_parameters(nn.get_parameters()) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) best_val = 10000 for epoch in range(max_epoch): l1.reset_state() l2.reset_state() for i in range(len(train_data) // (num_steps * batch_size)): x.d, t.d = get_batch(train_data, i * num_steps, batch_size, num_steps) solver.zero_grad() loss.forward() loss.backward(clear_buffer=True) solver.weight_decay(1e-5) gradient_clipping(nn.get_parameters().values(), max_norm) solver.update() perp = perplexity(loss.d.copy()) monitor_perplexity.add( (len(train_data) // (num_steps * batch_size)) * (epoch) + i, perp) l1.reset_state() l2.reset_state() vloss_avg = 0 for i in range(len(val_data) // (num_steps * batch_size)): vx.d, vt.d = get_batch(val_data, i * num_steps, batch_size, num_steps) vloss.forward() vloss_avg += vloss.d.copy() vloss_avg /= float((len(val_data) // (num_steps * batch_size))) vper = perplexity(vloss_avg) if vper < best_val: best_val = vper if vper < 200: save_name = "params_epoch_{:02d}.h5".format(epoch) nn.save_parameters(os.path.join(args.save_dir, save_name)) else: solver.set_learning_rate(solver.learning_rate() * 0.25) logger.info("Decreased learning rate to {:05f}".format( solver.learning_rate())) monitor_vperplexity.add( (len(val_data) // (num_steps * batch_size)) * (epoch) + i, vper) # for final test split t_batch_size = 1 tl1 = LSTMWrapper(t_batch_size, state_size) tl2 = LSTMWrapper(t_batch_size, state_size) tloss_avg = 0 tx = nn.Variable((t_batch_size, num_steps)) tt = nn.Variable((t_batch_size, num_steps)) tloss = get_loss(tl1, tl2, tx, tt, w, b, num_words, 1, state_size) tl1.share_data() tl2.share_data() for i in range(len(test_data) // (num_steps * t_batch_size)): tx.d, tt.d = get_batch(test_data, i * num_steps, 1, num_steps) tloss.forward() tloss_avg += tloss.d.copy() tloss_avg /= float((len(test_data) // (num_steps * t_batch_size))) tper = perplexity(tloss_avg) monitor_tperplexity.add( (len(test_data) // (num_steps * t_batch_size)) * (epoch) + i, tper)
self.i = i def __getitem__(self, item): if self.i == 0: return self.texti.trainX[item], self.texti.trainY[item] elif self.i == 1: return self.texti.validX[item], self.texti.validY[item] elif self.i == 2: return self.texti.testX[item], self.texti.testY[item] def __len__(self): return self.texti.trainX.shape[0] if __name__ == "__main__": config = args.get_args() texti = TextIterator(config) trainDataLoader = DataLoader(dataset=MyDataSet(config, texti, 0), batch_size=config.batchSize, shuffle=True, num_workers=0, drop_last=True) testDataLoader = DataLoader(dataset=MyDataSet(config, texti, 2), batch_size=config.batchSize, shuffle=False, num_workers=0, drop_last=True) for epoch in range(2): for i, data in enumerate(trainDataLoader): inputs, labels = data print("epoch: ", epoch, " ", inputs, " ", inputs.shape, " ",
def main(): args = get_args() config = utils.read_config(args.config) app = Application(config) app.process()
def train(): """ Main script. Steps: * Parse command line arguments. * Specify a context for computation. * Initialize DataIterator for MNIST. * Construct a computation graph for training and validation. * Initialize a solver and set parameter variables to it. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Execute forwardprop on the training graph. * Compute training error * Set parameter gradients zero * Execute backprop. * Solver updates parameters by using gradients computed by backprop. """ args = get_args() # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. mnist_cnn_prediction = mnist_lenet_prediction if args.net == 'resnet': mnist_cnn_prediction = mnist_resnet_prediction # TRAIN # Create input variables. image = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size, 1]) # Create prediction graph. pred = mnist_cnn_prediction(image, test=False) pred.persistent = True # Create loss function. loss = F.mean(F.softmax_cross_entropy(pred, label)) # TEST # Create input variables. vimage = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size, 1]) # Create predition graph. vpred = mnist_cnn_prediction(vimage, test=True) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=10) # Initialize DataIterator for MNIST. data = data_iterator_mnist(args.batch_size, True) vdata = data_iterator_mnist(args.batch_size, False) # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) if i % args.model_save_interval == 0: nn.save_parameters(os.path.join( args.model_save_path, 'params_%06d.h5' % i)) # Training forward image.d, label.d = data.next() solver.zero_grad() loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) parameter_file = os.path.join( args.model_save_path, '{}_params_{:06}.h5'.format(args.net, args.max_iter)) nn.save_parameters(parameter_file)
def train(): """ Naive Multi-Device Training NOTE: the communicator exposes low-level interfaces * Parse command line arguments. * Specify contexts for computation. * Initialize DataIterator. * Construct computation graphs for training and one for validation. * Initialize solvers and set parameter variables to those. * Instantiate a communicator and set parameter variables. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Execute forwardprops * Set parameter gradients zero * Execute backprop. * Inplace allreduce (THIS IS THE MAIN difference from a single device training) * Solver updates parameters by using gradients computed by backprop. * Compute training error """ # Parse args args = get_args() n_train_samples = 50000 bs_valid = args.batch_size # Create contexts extension_module = args.context if extension_module != "cuda" and \ extension_module != "cuda.cudnn": raise Exception("Use `cuda` or `cuda.cudnn` extension_module.") n_devices = args.n_devices ctxs = [] for i in range(n_devices): ctx = extension_context(extension_module, device_id=i) ctxs.append(ctx) ctx = ctxs[-1] # Create training graphs input_image_train = [] preds_train = [] losses_train = [] test = False for i in range(n_devices): image = nn.Variable((args.batch_size, 3, 32, 32)) label = nn.Variable((args.batch_size, 1)) device_scope_name = "device{}".format(i) pred = cifar100_resnet23_prediction( image, ctxs[i], device_scope_name, test) loss = cifar100_resnet32_loss(pred, label) input_image_train.append({"image": image, "label": label}) preds_train.append(pred) losses_train.append(loss) # Create validation graph test = True device_scope_name = "device{}".format(0) image_valid = nn.Variable((bs_valid, 3, 32, 32)) pred_valid = cifar100_resnet23_prediction( image_valid, ctxs[i], device_scope_name, test) input_image_valid = {"image": image_valid} # Solvers solvers = [] for i in range(n_devices): with nn.context_scope(ctxs[i]): solver = S.Adam() device_scope_name = "device{}".format(i) with nn.parameter_scope(device_scope_name): params = nn.get_parameters() solver.set_parameters(params) solvers.append(solver) # Communicator comm = C.DataParalellCommunicator(ctx) for i in range(n_devices): device_scope_name = "device{}".format(i) with nn.parameter_scope(device_scope_name): ctx = ctxs[i] params = nn.get_parameters() comm.add_context_and_parameters((ctx, params)) comm.init() # Create threadpools with one thread pools = [] for _ in range(n_devices): pool = ThreadPool(processes=1) pools.append(pool) # Once forward/backward to safely secure memory for device_id in range(n_devices): data, label = \ (np.random.randn(*input_image_train[device_id]["image"].shape), (np.random.rand(*input_image_train[device_id]["label"].shape) * 10).astype(np.int32)) ret = pools[device_id].apply_async(forward_backward, (input_image_train[device_id]["image"], data, input_image_train[device_id]["label"], label, losses_train[device_id], solvers[device_id])) ret.get() losses_train[device_id].d # sync to host # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=10) with data_iterator_cifar100(args.batch_size, True) as tdata, \ data_iterator_cifar100(bs_valid, False) as vdata: # Training-loop for i in range(int(args.max_iter / n_devices)): # Validation if i % int(n_train_samples / args.batch_size / n_devices) == 0: ve = 0. for j in range(args.val_iter): image, label = vdata.next() input_image_valid["image"].d = image pred_valid.forward() ve += categorical_error(pred_valid.d, label) ve /= args.val_iter monitor_verr.add(i * n_devices, ve) if i % int(args.model_save_interval / n_devices) == 0: nn.save_parameters(os.path.join( args.model_save_path, 'params_%06d.h5' % i)) # Forwards/Zerograd/Backwards fb_results = [] for device_id in range(n_devices): image, label = tdata.next() res = pools[device_id].apply_async(forward_backward, (input_image_train[device_id]["image"], image, input_image_train[device_id]["label"], label, losses_train[device_id], solvers[device_id])) fb_results.append(res) for device_id in range(n_devices): fb_results[device_id].get() # In-place Allreduce comm.allreduce() # Solvers update for device_id in range(n_devices): solvers[device_id].update() e = categorical_error( preds_train[-1].d, input_image_train[-1]["label"].d) monitor_loss.add(i * n_devices, losses_train[-1].d.copy()) monitor_err.add(i * n_devices, e) monitor_time.add(i * n_devices) nn.save_parameters(os.path.join( args.model_save_path, 'params_%06d.h5' % (args.max_iter / n_devices)))
def train(): args = get_args() # Set context. from nnabla.ext_utils import get_extension_context logger.info("Running in {}:{}".format(args.context, args.type_config)) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) data_iterator = data_iterator_librispeech(args.batch_size, args.data_dir) _data_source = data_iterator._data_source # dirty hack... # model x = nn.Variable( shape=(args.batch_size, data_config.duration, 1)) # (B, T, 1) onehot = F.one_hot(x, shape=(data_config.q_bit_len, )) # (B, T, C) wavenet_input = F.transpose(onehot, (0, 2, 1)) # (B, C, T) # speaker embedding if args.use_speaker_id: s_id = nn.Variable(shape=(args.batch_size, 1)) with nn.parameter_scope("speaker_embedding"): s_emb = PF.embed(s_id, n_inputs=_data_source.n_speaker, n_features=WavenetConfig.speaker_dims) s_emb = F.transpose(s_emb, (0, 2, 1)) else: s_emb = None net = WaveNet() wavenet_output = net(wavenet_input, s_emb) pred = F.transpose(wavenet_output, (0, 2, 1)) # (B, T, 1) t = nn.Variable(shape=(args.batch_size, data_config.duration, 1)) loss = F.mean(F.softmax_cross_entropy(pred, t)) # for generation prob = F.softmax(pred) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) # setup save env. audio_save_path = os.path.join(os.path.abspath( args.model_save_path), "audio_results") if audio_save_path and not os.path.exists(audio_save_path): os.makedirs(audio_save_path) # Training loop. for i in range(args.max_iter): # todo: validation x.d, _speaker, t.d = data_iterator.next() if args.use_speaker_id: s_id.d = _speaker.reshape(-1, 1) solver.zero_grad() loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.update() loss.data.cast(np.float32, ctx) monitor_loss.add(i, loss.d.copy()) if i % args.model_save_interval == 0: prob.forward() audios = mu_law_decode( np.argmax(prob.d, axis=-1), quantize=data_config.q_bit_len) # (B, T) save_audio(audios, i, audio_save_path)
model_times = "model_1/" # 第几次保存的模型,主要是用来获取最佳结果 bert_vocab_file = "../bert-base-uncased/vocab.txt" bert_model_dir = "../bert-base-uncased" do_train = True do_test = True # map(lambda: x, y: os.path.join(x, y), from Processors.Yelp2Processor import Yelp2Processor if model_name == "BertOrigin": main( args.get_args(model_name, data_dir, output_dir, cache_dir, log_dir, bert_vocab_file, bert_model_dir), model_times, Yelp2Processor) elif model_name == "BertCNN": from BertCNN import args_model main( args.get_args(model_name, data_dir, output_dir, cache_dir, log_dir, bert_vocab_file, bert_model_dir), model_times, Yelp2Processor, args_model.get_args()) elif model_name == "BertATT": main( args.get_args(model_name, data_dir, output_dir, cache_dir, log_dir, bert_vocab_file, bert_model_dir), model_times, Yelp2Processor) elif model_name == "BertRCNN":
if len(all_scores) > 0 and topk == 1: index = torch.argmax(all_scores) output_data[q] = paths[index] elif len(all_scores) > 0 and topk > 1: sorted_scores, index = torch.sort(all_scores, descending=True) output_data[q] = [paths[i] for i in index[:topk]] else: print(q, 'no path') with open(fn_out, 'w') as f: json.dump(output_data, f, ensure_ascii=False) if __name__ == "__main__": args = get_args(mode='predict') os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu # tokenize tokenizer = BertTokenizer.from_pretrained(args.bert_vocab, do_lower_case=args.do_lower_case) bert_field = BertField('BERT', tokenizer=tokenizer) print("loaded tokenizer") # gpu os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu print('使用%s号GPU' % args.gpu) # model # model = Bert_Comparing(args)
monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) with nn.parameter_scope("gen"): nn.save_parameters( os.path.join(args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters( os.path.join(args.model_save_path, "discriminator_param_%06d.h5" % i)) if __name__ == '__main__': monitor_path = 'tmp.monitor.dcgan' args = get_args(monitor_path=monitor_path, model_save_path=monitor_path, max_iter=20000, learning_rate=0.0002, batch_size=64, weight_decay=0.0001) train(args)
def main(): global WANDB_STEP args = get_args() print(args) set_seed(args.seed) device = th.device("cpu" if args.devid < 0 else f"cuda:{args.devid}") args.device = device aux_device = th.device( "cpu" if args.aux_devid < 0 else f"cuda:{args.aux_devid}") args.aux_device = aux_device TEXT = torchtext.data.Field(batch_first=True) if args.dataset == "ptb": Dataset = PennTreebank elif args.dataset == "wikitext2": Dataset = WikiText2 train, valid, test = Dataset.splits( TEXT, newline_eos=True, ) TEXT.build_vocab(train) V = TEXT.vocab def batch_size_tokens(new, count, sofar): return max(len(new.text), sofar) def batch_size_sents(new, count, sofar): return count if args.iterator == "bucket": train_iter, valid_iter, test_iter = BucketIterator.splits( (train, valid, test), batch_sizes=[args.bsz, args.eval_bsz, args.eval_bsz], device=device, sort_key=lambda x: len(x.text), batch_size_fn=batch_size_tokens if args.bsz_fn == "tokens" else batch_size_sents, ) elif args.iterator == "bptt": train_iter, valid_iter, test_iter = BPTTIterator.splits( (train, valid, test), batch_sizes=[args.bsz, args.eval_bsz, args.eval_bsz], device=device, bptt_len=args.bptt, sort=False, ) else: raise ValueError(f"Invalid iterator {args.iterator}") if args.no_shuffle_train: train_iter.shuffle = False name = get_name(args) import tempfile #wandb.init(project="hmm-lm", name=name, config=args, dir=tempfile.mkdtemp()) args.name = name model = None from models.factoredhmmlm import FactoredHmmLm model = FactoredHmmLm(V, args) model.to(device) print(model) num_params, num_trainable_params = count_params(model) print(f"Num params, trainable: {num_params:,}, {num_trainable_params:,}") #wandb.run.summary["num_params"] = num_params if args.eval_only: model.load_state_dict(th.load(args.eval_only)["model"]) v_start_time = time.time() if args.model == "mshmm" or args.model == "factoredhmm": if args.num_classes > 2**15: eval_fn = mixed_cached_eval_loop else: eval_fn = cached_eval_loop elif args.model == "hmm": eval_fn = cached_eval_loop else: eval_fn = eval_loop valid_losses, valid_n = eval_fn( args, V, valid_iter, model, ) report(valid_losses, valid_n, f"Valid perf", v_start_time) t_start_time = time.time() test_losses, test_n = eval_fn( args, V, test_iter, model, ) report(test_losses, test_n, f"Test perf", t_start_time) sys.exit() parameters = list(model.parameters()) if args.optimizer == "adamw": optimizer = AdamW( parameters, lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.wd, ) elif args.optimizer == "sgd": optimizer = SGD( parameters, lr=args.lr, ) if args.schedule == "reducelronplateau": scheduler = ReduceLROnPlateau( optimizer, factor=1. / args.decay, patience=args.patience, verbose=True, mode="max", ) elif args.schedule == "noam": warmup_steps = args.warmup_steps def get_lr(step): scale = warmup_steps**0.5 * min(step**(-0.5), step * warmup_steps**(-1.5)) return args.lr * scale scheduler = LambdaLR( optimizer, get_lr, last_epoch=-1, verbse=True, ) else: raise ValueError("Invalid schedule options") # training loop, factor out later if necessary for e in range(args.num_epochs): start_time = time.time() if args.log_counts > 0 and args.keep_counts > 0: # reset at START of epoch model.state_counts.fill_(0) train_losses, train_n = train_loop( args, V, train_iter, model, parameters, optimizer, scheduler, valid_iter=valid_iter if not args.overfit else None, verbose=True, ) total_time = report(train_losses, train_n, f"Train epoch {e}", start_time) v_start_time = time.time() #eval_fn = cached_eval_loop if args.model == "mshmm" else eval_loop if args.model == "mshmm" or args.model == "factoredhmm": if args.num_classes > 2**15: eval_fn = mixed_cached_eval_loop else: eval_fn = cached_eval_loop elif args.model == "hmm": eval_fn = cached_eval_loop else: eval_fn = eval_loop valid_losses, valid_n = eval_fn(args, V, valid_iter, model) report(valid_losses, valid_n, f"Valid epoch {e}", v_start_time) if args.schedule in valid_schedules: scheduler.step(valid_losses.evidence if not args.overfit else train_losses.evidence) update_best_valid(valid_losses, valid_n, model, optimizer, scheduler, args.name) #wandb.log({ #"train_loss": train_losses.evidence / train_n, #"train_ppl": math.exp(-train_losses.evidence / train_n), #"epoch_time": total_time, #"valid_loss": valid_losses.evidence / valid_n, #"valid_ppl": math.exp(-valid_losses.evidence / valid_n), #"best_valid_loss": BEST_VALID / valid_n, #"best_valid_ppl": math.exp(-BEST_VALID / valid_n), #"epoch": e, #}, step=WANDB_STEP) if args.log_counts > 0 and args.keep_counts > 0: # TODO: FACTOR OUT # only look at word tokens counts = (model.counts / model.counts.sum(0, keepdim=True))[:, 4:] c, v = counts.shape #cg4 = counts > 1e-4 #cg3 = counts > 1e-3 cg2 = counts > 1e-2 # state counts # log these once per epoch, then set back to zero sc0 = (model.state_counts == 0).sum() sc1 = (model.state_counts == 1).sum() sc2 = (model.state_counts == 2).sum() sc3 = (model.state_counts == 3).sum() sc4 = (model.state_counts == 4).sum() sc5 = (model.state_counts >= 5).sum() #wandb.log({ #"avgcounts@1e-4": cg4.sum().item() / float(v), #"avgcounts@1e-3": cg3.sum().item() / float(v), #"avgcounts@1e-2": cg2.sum().item() / float(v), #"maxcounts@1e-4": cg4.sum(0).max().item() / float(v), #"maxcounts@1e-3": cg3.sum(0).max().item() / float(v), #"maxcounts@1e-2": cg2.sum(0).max().item(), #"mincounts@1e-4": cg4.sum(0).min().item() / float(v), #"mincounts@1e-3": cg3.sum(0).min().item() / float(v), #"mincounts@1e-2": cg2.sum(0).min().item(), #"maxcounts": counts.sum(0).max().item(), #"mincounts": counts.sum(0).min().item(), #"statecounts=0": sc0, #"statecounts=1": sc1, #"statecounts=2": sc2, #"statecounts=3": sc3, #"statecounts=4": sc4, #"statecounts>=5": sc5, #}, step=WANDB_STEP) del cg2 del counts # won't use best model. Rerun with eval_only t_start_time = time.time() test_losses, test_n = eval_fn( args, V, test_iter, model, ) report(test_losses, test_n, f"Test perf", t_start_time)
import os import sys src_dir = os.path.join(os.getcwd(), 'src') sys.path.append(src_dir) from utils.doc_utils import * from utils.searcher import * from args import get_args from shutil import copyfileobj if __name__ == '__main__': args, _ = get_args() collection = args.collection anserini_path = args.anserini_path data_path = args.data_path index_path = args.index_path dataset_path = os.path.join(args.data_path, 'datasets') if not os.path.exists(dataset_path): os.mkdir(dataset_path) output_fn = os.path.join(dataset_path, collection + '_sents.csv') fqrel = os.path.join(data_path, 'qrels', 'qrels.' + collection + '.txt') ftopic = os.path.join(data_path, 'topics', 'topics.' + collection + '.txt') if os.path.exists(fqrel): qid2docid = get_relevant_docids(fqrel) else: # No qrels, label all as 0. qid2docid = {} qid2text = get_query(ftopic, collection=collection)
def main(): args = get_args() if args.command == "create": create_pcd_dataset_from_mesh(args.mesh_data_path)
def train(): """ Main script. Naive Multi-Device Training NOTE: the communicator exposes low-level interfaces * Parse command line arguments. * Instantiate a communicator and set parameter variables. * Specify contexts for computation. * Initialize DataIterator. * Construct a computation graph for training and one for validation. * Initialize solver and set parameter variables to that. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Execute forwardprop * Set parameter gradients zero * Execute backprop. * Inplace allreduce (THIS IS THE MAIN difference from a single device training) * Solver updates parameters by using gradients computed by backprop. * Compute training error """ args = get_args() if args.tiny_mode: n_train_samples = 100000 else: n_train_samples = 1282167 # Communicator and Context from nnabla.ext_utils import get_extension_context extension_module = "cudnn" ctx = get_extension_context(extension_module, type_config=args.type_config) comm = C.MultiProcessDataParalellCommunicator(ctx) comm.init() n_devices = comm.size mpi_rank = comm.rank device_id = mpi_rank ctx.device_id = str(device_id) nn.set_default_context(ctx) # workarond to start with the same parameters. rng = np.random.RandomState(device_id) if args.tiny_mode: # We use Tiny ImageNet from Stanford CS231N class. # (Tiny ImageNet, https://tiny-imagenet.herokuapp.com/) # Tiny ImageNet consists of 200 categories, each category has 500 images # in training set. The image size is 64x64. To adapt ResNet into 64x64 # image inputs, the input image size of ResNet is set as 56x56, and # the stride in the first conv and the first max pooling are removed. # Please check README. data = data_iterator_tiny_imagenet(args.batch_size, 'train') vdata = data_iterator_tiny_imagenet(args.batch_size, 'val') num_classes = 200 else: # We use ImageNet. # (ImageNet, https://imagenet.herokuapp.com/) # ImageNet consists of 1000 categories, each category has 1280 images # in training set. The image size is various. To adapt ResNet into # 320x320 image inputs, the input image size of ResNet is set as # 224x224. We need to get tar file and create cache file(320x320 images). # Please check README. data = data_iterator_imagenet(args.batch_size, args.train_cachefile_dir, rng=rng) vdata = data_iterator_imagenet(args.batch_size, args.val_cachefile_dir) vdata = vdata.slice(rng=None, num_of_slices=n_devices, slice_pos=device_id) num_classes = 1000 # Workaround to start with the same initialized weights for all workers. np.random.seed(313) t_model = get_model(args, num_classes, test=False, tiny=args.tiny_mode) t_model.pred.persistent = True # Not clearing buffer of pred in backward t_pred2 = t_model.pred.unlinked() t_e = F.mean(F.top_n_error(t_pred2, t_model.label)) v_model = get_model(args, num_classes, test=True, tiny=args.tiny_mode) v_model.pred.persistent = True # Not clearing buffer of pred in forward v_pred2 = v_model.pred.unlinked() v_e = F.mean(F.top_n_error(v_pred2, v_model.label)) # Add parameters to communicator. comm.add_context_and_parameters((ctx, nn.get_parameters())) # Create Solver. solver = S.Momentum(args.learning_rate, 0.9) solver.set_parameters(nn.get_parameters()) # Setting warmup. base_lr = args.learning_rate / n_devices warmup_iter = int(1. * n_train_samples / args.batch_size / args.accum_grad / n_devices) * args.warmup_epoch warmup_slope = base_lr * (n_devices - 1) / warmup_iter solver.set_learning_rate(base_lr) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_err = M.MonitorSeries("Training error", monitor, interval=10) monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=1) monitor_verr = M.MonitorSeries("Validation error", monitor, interval=1) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10) monitor_vtime = M.MonitorTimeElapsed("Validation time", monitor, interval=1) # Training loop. vl = nn.Variable() ve = nn.Variable() for i in range(int(args.max_iter / n_devices)): # Save parameters if i % (args.model_save_interval // n_devices) == 0 and device_id == 0: nn.save_parameters( os.path.join(args.model_save_path, 'param_%06d.h5' % i)) # Validation if i % (args.val_interval // n_devices) == 0 and i != 0: ve_local = 0. vl_local = 0. val_iter_local = args.val_iter // n_devices for j in range(val_iter_local): images, labels = vdata.next() v_model.image.d = images v_model.label.d = labels v_model.image.data.cast(np.uint8, ctx) v_model.label.data.cast(np.int32, ctx) v_model.loss.forward(clear_buffer=True) v_e.forward(clear_buffer=True) vl_local += v_model.loss.d.copy() ve_local += v_e.d.copy() vl_local /= val_iter_local vl.d = vl_local comm.all_reduce(vl.data, division=True, inplace=True) ve_local /= val_iter_local ve.d = ve_local comm.all_reduce(ve.data, division=True, inplace=True) if device_id == 0: monitor_vloss.add(i * n_devices, vl.d.copy()) monitor_verr.add(i * n_devices, ve.d.copy()) monitor_vtime.add(i * n_devices) # Training l = 0.0 e = 0.0 solver.zero_grad() def accumulate_error(l, e, t_model, t_e): l += t_model.loss.d e += t_e.d return l, e # Gradient accumulation loop for j in range(args.accum_grad): images, labels = data.next() if j != 0: # Update e and l according to previous results of forward # propagation. # The update of last iteration is performed # after solver update to avoid unnecessary CUDA synchronization. # This is performed after data.next() in order to overlap # the data loading and graph execution. # TODO: Move this to the bottom of the loop when prefetch # data loader is available. l, e = accumulate_error(l, e, t_model, t_e) t_model.image.d = images t_model.label.d = labels t_model.image.data.cast(np.uint8, ctx) t_model.label.data.cast(np.int32, ctx) t_model.loss.forward(clear_no_need_grad=True) t_model.loss.backward(clear_buffer=True) # Accumulating gradients t_e.forward(clear_buffer=True) # AllReduce params = [x.grad for x in nn.get_parameters().values()] comm.all_reduce(params, division=False, inplace=False) # Update solver.weight_decay(args.weight_decay) solver.update() # Accumulate errors after solver update l, e = accumulate_error(l, e, t_model, t_e) # Linear Warmup if i <= warmup_iter: lr = base_lr + warmup_slope * i solver.set_learning_rate(lr) # Synchronize by averaging the weights over devices using allreduce if (i + 1) % args.sync_weight_every_itr == 0: weights = [x.data for x in nn.get_parameters().values()] comm.all_reduce(weights, division=True, inplace=True) if device_id == 0: monitor_loss.add(i * n_devices, l / args.accum_grad) monitor_err.add(i * n_devices, e / args.accum_grad) monitor_time.add(i * n_devices) # Learning rate decay at scheduled iter if i * n_devices in args.learning_rate_decay_at: solver.set_learning_rate(solver.learning_rate() * 0.1) if device_id == 0: nn.save_parameters( os.path.join(args.model_save_path, 'param_%06d.h5' % (args.max_iter / n_devices)))
def main(): args, other = get_args() experiment = args.experiment anserini_path = args.anserini_path datasets_path = os.path.join(args.data_path, 'datasets') if not os.path.isdir('log'): os.mkdir('log') if args.mode == 'training': train(args) elif args.mode == 'inference': scores = test(args) print_scores(scores) else: folds_path = os.path.join(anserini_path, 'src', 'main', 'resources', 'fine_tuning', args.folds_file) qrels_path = os.path.join(anserini_path, 'src', 'main', 'resources', 'topics-and-qrels', args.qrels_file) topK = int(other[0]) alpha = float(other[1]) beta = float(other[2]) gamma = float(other[3]) test_folder_set = int(other[4]) mode = other[5] # Divide topics according to fold parameters train_topics, test_topics, all_topics = [], [], [] with open(folds_path) as f: folds = json.load(f) for i in range(0, len(folds)): all_topics.extend(folds[i]) if i != test_folder_set: train_topics.extend(folds[i]) else: test_topics.extend(folds[i]) if args.interactive: sentid2text = query_sents(args) test(args) # inference over each sentence collection_path = os.path.join( datasets_path, args.collection + '.csv') if not args.interactive else args.interactive_path predictions_path = os.path.join( args.data_path, 'predictions', 'predict.' + experiment) if not args.interactive else os.path.join( args.data_path, 'predictions', args.predict_path) top_doc_dict, doc_bm25_dict, sent_dict, q_dict, doc_label_dict = eval_bm25( collection_path) score_dict = load_bert_scores(predictions_path, q_dict, sent_dict) if args.interactive: top_rank_docs = visualize_scores(collection_path, score_dict) with open(os.path.join(args.data_path, 'query_sent_scores.csv'), 'w') as scores_file: for doc in top_rank_docs[:100]: scores_file.write('{}\t{}\t{}\t{}\t{}\n'.format( doc[0], sentid2text[doc[0]], doc[1], doc[2], 'BM25' if doc[3] > 0 else 'BERT')) for doc in top_rank_docs[-100:]: scores_file.write('{}\t{}\t{}\t{}\t{}\n'.format( doc[0], sentid2text[doc[0]], doc[1], doc[2], 'BM25' if doc[3] > 0 else 'BERT')) if not os.path.isdir('runs'): os.mkdir('runs') if mode == 'train': topics = train_topics if not args.interactive else list( q_dict.keys()) # Grid search for best parameters for a in np.arange(0.0, alpha, 0.1): for b in np.arange(0.0, beta, 0.1): for g in np.arange(0.0, gamma, 0.1): calc_q_doc_bert(score_dict, 'run.' + experiment + '.cv.train', topics, top_doc_dict, doc_bm25_dict, topK, a, b, g) base = 'runs/run.' + experiment + '.cv.train' os.system( '{}/eval/trec_eval.9.0.4/trec_eval -M1000 -m map {} {}> eval.base' .format(anserini_path, qrels_path, base)) with open('eval.base', 'r') as f: for line in f: metric, qid, score = line.split('\t') map_score = float(score) print(test_folder_set, round(a, 2), round(b, 2), round(g, 2), map_score) elif mode == 'test': topics = test_topics if not args.interactive else list( q_dict.keys()) calc_q_doc_bert( score_dict, 'run.' + experiment + '.cv.test.' + str(test_folder_set), topics, top_doc_dict, doc_bm25_dict, topK, alpha, beta, gamma) else: topics = all_topics if not args.interactive else list( q_dict.keys()) calc_q_doc_bert(score_dict, 'run.' + experiment + '.cv.all', topics, top_doc_dict, doc_bm25_dict, topK, alpha, beta, gamma)
def train(): args = get_args() # Get context. from nnabla.ext_utils import get_extension_context logger.info("Running in %s" % args.context) ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) # Create CNN network for both training and testing. if args.model_load_path == "": raise Exception("Set `model_load_path`") nn.load_parameters(args.model_load_path) model_prediction = cifar10_resnet23_slim_prediction # TRAIN maps = 64 data_iterator = data_iterator_cifar10 c = 3 h = w = 32 n_train = 50000 n_valid = 10000 # Create input variables. image = nn.Variable([args.batch_size, c, h, w]) label = nn.Variable([args.batch_size, 1]) # Create model_prediction graph. pred = model_prediction(image, maps=maps, test=False) pred.persistent = True # Create loss function. loss = F.mean(F.softmax_cross_entropy(pred, label)) # TEST # Create input variables. vimage = nn.Variable([args.batch_size, c, h, w]) vlabel = nn.Variable([args.batch_size, 1]) # Create predition graph. vpred = model_prediction(vimage, maps=maps, test=True) # Set mask create_and_set_mask(nn.get_parameters(grad_only=False), rrate=args.reduction_rate) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=1) # Initialize DataIterator data = data_iterator(args.batch_size, True) vdata = data_iterator(args.batch_size, False) best_ve = 1.0 ve = 1.0 # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(int(n_valid / args.batch_size)): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) ve /= int(n_valid / args.batch_size) monitor_verr.add(i, ve) if ve < best_ve: nn.save_parameters( os.path.join(args.model_save_path, 'params_%06d.h5' % i)) best_ve = ve # Training forward image.d, label.d = data.next() solver.zero_grad() loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) ve = 0.0 for j in range(int(n_valid / args.batch_size)): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) ve /= int(n_valid / args.batch_size) monitor_verr.add(i, ve) parameter_file = os.path.join(args.model_save_path, 'params_{:06}.h5'.format(args.max_iter)) nn.save_parameters(parameter_file)
def main(): args = get_args() save_args(args, "generate") interpolate(args)
# ------------------------------------------- def print_info(peak_data): print("Significance = %.2f" % peak_data['nu']) print("dfG/dx, dfG/dy, dfG/dz = ", peak_data['f1']) print("xd = %.2f" % peak_data['xd'], "a12sq = %.2f" % peak_data['a12sq'], "a13sq = %.2f" % peak_data['a13sq']) print("Euler1: a1, b1, p1 = ", peak_data['Euler1']) print("vx,vy,vz (peak velocity in km/s) :", peak_data['v_peculiar']) print("epsilon = %.2f" % peak_data['epsilon'], "omega = %.2f" % peak_data['omega']) print("Euler2: a2, b2, p2 = ", peak_data['Euler2']) # load args # ------------------------------------------- args = get_args() Rdm_seed = args.Rdm_seed RG = args.RG Ng = args.Ng Lbox = args.Lbox print("Seed = ", Rdm_seed) print("Lbox = %.1f" % Lbox, "Ng = %d" % Ng, "RG = %.2f" % RG) # Choose cosmology # ------------------------------------------- cosmology = nbcosmos.WMAP9 mycosmo = Cosmos(FLRW=True, obj=cosmology) # generate linear density field at z=0 # -------------------------------------------
def train(): """ Main script. """ args = get_args() # Get context. from nnabla.ext_utils import get_extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = get_extension_context(extension_module, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) if args.tiny_mode: # We use Tiny ImageNet from Stanford CS231N class. # (Tiny ImageNet, https://tiny-imagenet.herokuapp.com/) # Tiny ImageNet consists of 200 categories, each category has 500 images # in training set. The image size is 64x64. To adapt ResNet into 64x64 # image inputs, the input image size of ResNet is set as 56x56, and # the stride in the first conv and the first max pooling are removed. # Please check README. data = data_iterator_tiny_imagenet(args.batch_size, 'train') vdata = data_iterator_tiny_imagenet(args.batch_size, 'val') num_classes = 200 else: # We use ImageNet. # (ImageNet, https://imagenet.herokuapp.com/) # ImageNet consists of 1000 categories, each category has 1280 images # in training set. The image size is various. To adapt ResNet into # 320x320 image inputs, the input image size of ResNet is set as # 224x224. We need to get tar file and create cache file(320x320 images). # Please check README. data = data_iterator_imagenet(args.batch_size, args.train_cachefile_dir) vdata = data_iterator_imagenet(args.batch_size, args.val_cachefile_dir) num_classes = 1000 t_model = get_model(args, num_classes, test=False, tiny=args.tiny_mode) t_model.pred.persistent = True # Not clearing buffer of pred in backward # TODO: need_grad should be passed to get_unlinked_variable after v1.0.3 fix. t_pred2 = t_model.pred.get_unlinked_variable() t_pred2.need_grad = False t_e = F.mean(F.top_n_error(t_pred2, t_model.label)) v_model = get_model(args, num_classes, test=True, tiny=args.tiny_mode) v_model.pred.persistent = True # Not clearing buffer of pred in forward # TODO: need_grad should be passed to get_unlinked_variable after v1.0.3 fix. v_pred2 = v_model.pred.get_unlinked_variable() v_pred2.need_grad = False v_e = F.mean(F.top_n_error(v_pred2, v_model.label)) # Save_nnp_Epoch0 contents = save_nnp({'x': v_model.image}, {'y': v_model.pred}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Imagenet_result_epoch0.nnp'), contents) # Create Solver. solver = S.Momentum(args.learning_rate, 0.9) solver.set_parameters(nn.get_parameters()) start_point = 0 if args.checkpoint is not None: # load weights and solver state info from specified checkpoint file. start_point = load_checkpoint(args.checkpoint, solver) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_err = M.MonitorSeries("Training error", monitor, interval=10) monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=10) monitor_verr = M.MonitorSeries("Validation error", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10) monitor_vtime = M.MonitorTimeElapsed("Validation time", monitor, interval=10) # Training loop. for i in range(start_point, args.max_iter): # Save parameters if i % args.model_save_interval == 0: # save checkpoint file save_checkpoint(args.model_save_path, i, solver) # Validation if i % args.val_interval == 0 and i != 0: # Clear all intermediate memory to save memory. # t_model.loss.clear_recursive() l = 0.0 e = 0.0 for j in range(args.val_iter): images, labels = vdata.next() v_model.image.d = images v_model.label.d = labels v_model.image.data.cast(np.uint8, ctx) v_model.label.data.cast(np.int32, ctx) v_model.loss.forward(clear_buffer=True) v_e.forward(clear_buffer=True) l += v_model.loss.d e += v_e.d monitor_vloss.add(i, l / args.val_iter) monitor_verr.add(i, e / args.val_iter) monitor_vtime.add(i) # Clear all intermediate memory to save memory. # v_model.loss.clear_recursive() # Training l = 0.0 e = 0.0 solver.zero_grad() def accumulate_error(l, e, t_model, t_e): l += t_model.loss.d e += t_e.d return l, e # Gradient accumulation loop for j in range(args.accum_grad): images, labels = data.next() t_model.image.d = images t_model.label.d = labels t_model.image.data.cast(np.uint8, ctx) t_model.label.data.cast(np.int32, ctx) t_model.loss.forward(clear_no_need_grad=True) t_model.loss.backward(clear_buffer=True) # Accumulating gradients t_e.forward(clear_buffer=True) l, e = accumulate_error(l, e, t_model, t_e) solver.weight_decay(args.weight_decay) solver.update() monitor_loss.add(i, l / args.accum_grad) monitor_err.add(i, e / args.accum_grad) monitor_time.add(i) # Learning rate decay at scheduled iter if i in args.learning_rate_decay_at: solver.set_learning_rate(solver.learning_rate() * 0.1) nn.save_parameters( os.path.join(args.model_save_path, 'param_%06d.h5' % args.max_iter)) # Save_nnp contents = save_nnp({'x': v_model.image}, {'y': v_model.pred}, args.batch_size) save.save(os.path.join(args.model_save_path, 'Imagenet_result.nnp'), contents)
loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) with nn.parameter_scope("gen"): nn.save_parameters(os.path.join( args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters(os.path.join( args.model_save_path, "discriminator_param_%06d.h5" % i)) if __name__ == '__main__': monitor_path = 'tmp.monitor.dcgan' args = get_args(monitor_path=monitor_path, model_save_path=monitor_path, max_iter=20000, learning_rate=0.0002, batch_size=64, weight_decay=0.0001) train(args)
def __init__(self, n_states, actions, batch_size=int(128), epsilon=0.1, alpha=0.2, gamma=0.9): # Get context. from nnabla.contrib.context import extension_context args = get_args() print "weight_decay:", args.weight_decay extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Q-Learing parametes self.epsilon = epsilon self.alpha = alpha self.gamma = gamma self.actions = actions self.n_actions = len(actions) self.n_states = n_states # Neural network's training parametes self.learning_rate = 1e-3 self.batch_size = batch_size self.model_save_path = 'models' self.model_save_interval = 1000 self.weight_decay = 0 # State-Action Plot's parametes self.plim = [-1.2, 0.6] self.vlim = [-0.07, 0.07] self.N_position = 27 self.N_velocity = 27 self.positions = np.linspace(self.plim[0], self.plim[1], num=self.N_position, endpoint=True) self.velocities = np.linspace(self.vlim[0], self.vlim[1], num=self.N_velocity, endpoint=True) # -------------------------------------------------- print "Initializing the Neural Network." # -------------------------------------------------- # Hidden layer's neuron number hn = 50 # Preparing the Computation Graph for Q self.Q_x = nn.Variable([self.batch_size, self.n_states]) self.Q_y = nn.Variable([self.batch_size, self.n_actions]) # Construct Q-Network for Q-Learning. l1 = F.tanh(PF.affine(self.Q_x, hn, name='affine1')) self.Q_Network = PF.affine(l1, self.n_actions, name='affine2') self.Q_Network.persistent = True # Create loss function. #self.loss = F.mean(F.squared_error(self.train_model, self.yt)) self.loss = F.mean(F.huber_loss(self.Q_Network, self.Q_y)) # Preparing the Computation Graph for target Q-Network self.Q_target_x = nn.Variable([self.batch_size, self.n_states]) self.Q_target_w1 = nn.Variable([self.n_states, hn], need_grad=False) # Weights self.Q_target_b1 = nn.Variable([hn], need_grad=False) # Biases self.Q_target_w2 = nn.Variable([hn, self.n_actions], need_grad=False) # Weights self.Q_target_b2 = nn.Variable([self.n_actions], need_grad=False) # Biases # Construct target Q-Network for Q-Learning. h1 = F.tanh( F.affine(self.Q_target_x, self.Q_target_w1, self.Q_target_b1)) self.Q_target_Network = F.affine(h1, self.Q_target_w2, self.Q_target_b2) self.update_Q_target() # -------------------------------------------------- print "Initializing the Solver." # -------------------------------------------------- # Create Solver # self.solver = S.Sgd(self.learning_rate) self.solver = S.RMSprop(self.learning_rate, 0.95) self.solver.set_parameters(nn.get_parameters()) self.update_Q = 100 self.iter = 0 # self.plot_reset = True
if args.dev: validDays.extend(devDays) if args.test: validDays.extend(testDays) validDays = sorted(validDays) ###################### outputDays = validDays if args.output == "d": outputDays = devDays elif args.output == "t": outputDays = testDays return devDays, testDays, validDays, outputDays, timeWindow, Para_newsDayWindow #################################################### if __name__ == "__main__": print "Program starts at ", time.asctime() args = get_args() print "**Para setting" print args ############## devDays, testDays, validDays, outputDays, timeWindow, Para_newsDayWindow = params(args, dataSelect=1) print "validDays", validDays print "outputDays", outputDays fileSuf_data = os.path.basename(os.path.dirname(args.input+"/")) # eg: "word201505" time_flag = "." + time.strftime("%Y%m%d%H%M%S", time.gmtime()) # eg: ".20170912035918" output_dir = "../ni_data/models/"+fileSuf_data+"/" if not os.path.exists(output_dir): os.mkdir(output_dir) if args.cluster == "dbscan": cluster_arg = "eps" + str(args.dbscan_eps) else: cluster_arg = "cnum" + str(args.num_cls)
def train(): """ Main script. """ args = get_args() # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Dataset # We use Tiny ImageNet from Stanford CS231N class. # https://tiny-imagenet.herokuapp.com/ # Tiny ImageNet consists of 200 categories, each category has 500 images # in training set. The image size is 64x64. To adapt ResNet into 64x64 # image inputs, the input image size of ResNet is set as 56x56, and # the stride in the first conv and the first max pooling are removed. data = data_iterator_tiny_imagenet(args.batch_size, 'train') vdata = data_iterator_tiny_imagenet(args.batch_size, 'val') num_classes = 200 tiny = True # TODO: Switch ILSVRC2012 dataset and TinyImageNet. t_model = get_model( args, num_classes, test=False, tiny=tiny) t_model.pred.persistent = True # Not clearing buffer of pred in backward v_model = get_model( args, num_classes, test=True, tiny=tiny) v_model.pred.persistent = True # Not clearing buffer of pred in forward # Create Solver. solver = S.Momentum(args.learning_rate, 0.9) solver.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_err = M.MonitorSeries("Training error", monitor, interval=10) monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=10) monitor_verr = M.MonitorSeries("Validation error", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10) # Training loop. for i in range(args.max_iter): # Save parameters if i % args.model_save_interval == 0: nn.save_parameters(os.path.join( args.model_save_path, 'param_%06d.h5' % i)) # Validation if i % args.val_interval == 0: # Clear all intermediate memory to save memory. # t_model.loss.clear_recursive() l = 0.0 e = 0.0 for j in range(args.val_iter): images, labels = vdata.next() v_model.image.d = images v_model.label.d = labels v_model.image.data.cast(np.uint8, ctx) v_model.label.data.cast(np.int32, ctx) v_model.loss.forward(clear_buffer=True) l += v_model.loss.d e += categorical_error(v_model.pred.d, v_model.label.d) monitor_vloss.add(i, l / args.val_iter) monitor_verr.add(i, e / args.val_iter) # Clear all intermediate memory to save memory. # v_model.loss.clear_recursive() # Training l = 0.0 e = 0.0 solver.zero_grad() # Gradient accumulation loop for j in range(args.accum_grad): images, labels = data.next() t_model.image.d = images t_model.label.d = labels t_model.image.data.cast(np.uint8, ctx) t_model.label.data.cast(np.int32, ctx) t_model.loss.forward(clear_no_need_grad=True) t_model.loss.backward(clear_buffer=True) # Accumulating gradients l += t_model.loss.d e += categorical_error(t_model.pred.d, t_model.label.d) solver.weight_decay(args.weight_decay) solver.update() monitor_loss.add(i, l / args.accum_grad) monitor_err.add(i, e / args.accum_grad) monitor_time.add(i) # Learning rate decay at scheduled iter if i in args.learning_rate_decay_at: solver.set_learning_rate(solver.learning_rate() * 0.1) nn.save_parameters(os.path.join(args.model_save_path, 'param_%06d.h5' % args.max_iter))