def __init__(self, sensors, target, cmdargs): try: cntk.try_set_default_device(cntk.device.gpu(1)) except: cntk.try_set_default_device(cntk.device.cpu()) self._sensors = sensors self._cmdargs = cmdargs self._target = target self._radar = self._sensors['radar'] self._radar_data = None self._dynamic_radar_data = None self._gps = self._sensors['gps'] self._normal_speed = float(cmdargs.robot_speed) self.debug_info = {} self._stepNum = 0 self._need_q_start = True self._o_space = gs.Box(low=0, high=100, shape=(2, 360)) self._a_space = gs.Discrete(8) self._qlearner = cntk_deeprl.agent.qlearning.QLearning( '', self._o_space, self._a_space) self._get_observation() self._last_badness = self._get_badness() self._ravg = [0] * 50 self._epsilon = 0.0
def test_use_default_device(): # this will release any previous held device locks C.try_set_default_device(C.cpu(), False) q = Queue() p = Process(target=_use_default_device, args=(q, )) p.start() p.join() assert p.exitcode == 0 assert q.get()
def test_set_cpu_as_default_device(): device = C.cpu() assert not is_locked(device) assert not C.try_set_default_device(device, True) assert not is_locked(device) assert C.try_set_default_device(device) assert C.try_set_default_device(device, False) assert not is_locked(device) assert device == C.use_default_device()
def test_set_excluded_devices(): if len(C.device.all_devices()) == 1: return; assert C.try_set_default_device(C.cpu(), False) assert C.try_set_default_device(C.gpu(0), False) C.set_excluded_devices([C.cpu()]) assert not C.try_set_default_device(C.cpu(), False) C.set_excluded_devices([]) assert C.try_set_default_device(C.cpu(), False)
def test_use_default_device(): # this will release any previous held device locks C.try_set_default_device(C.cpu(), False) q = Queue() p = Process(target=_use_default_device, args=(q,)) p.start() p.join() assert p.exitcode == 0 assert q.get()
def test_set_excluded_devices(): if len(C.device.all_devices()) == 1: return assert C.try_set_default_device(C.cpu(), False) assert C.try_set_default_device(C.gpu(0), False) C.set_excluded_devices([C.cpu()]) assert not C.try_set_default_device(C.cpu(), False) C.set_excluded_devices([]) assert C.try_set_default_device(C.cpu(), False)
def train(nonlinearity, num_hidden_layers, device_id, minibatch_size=10, num_samples=1000): from cntk.cntk_py import always_allow_setting_default_device always_allow_setting_default_device() C.try_set_default_device(cntk_device(device_id)) np.random.seed(0) learning_rate = 0.5 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) hidden_layers_dim = 50 inp = C.input_variable((input_dim), np.float32) label = C.input_variable((num_output_classes), np.float32) z = fully_connected_classifier_net(inp, num_output_classes, hidden_layers_dim, num_hidden_layers, nonlinearity) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) num_minibatches_to_train = int(num_samples / minibatch_size) training_progress_output_freq = 20 losses = [] errors = [] for i in range(num_minibatches_to_train): features, labels = generate_random_data_sample(minibatch_size, input_dim, num_output_classes) # Specify the input variables mapping in the model to actual minibatch # data for training. trainer.train_minibatch({ inp: features, label: labels }, device=cntk_device(device_id)) batchsize, loss, error = print_training_progress( trainer, i, training_progress_output_freq) if not (loss == "NA" or error == "NA"): losses.append(loss) errors.append(error) return losses, errors
def set_device(device): if device == 'CPU': C.try_set_default_device(C.device.cpu()) elif device == 'GPU' or device == 'CUDA': try: C.try_set_default_device(C.device.gpu(0)) except: C.use_default_device() else: C.use_default_device()
def __init__(self, model_fn, gpuid): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(gpuid) import cntk cntk.try_set_default_device(cntk.gpu(0)) cntk.use_default_device() self.model_fn = model_fn self.model = cntk.load_model(self.model_fn)
def __init__(self, sensors, target, cmdargs): self._sensors = sensors self._cmdargs = cmdargs self._target = target self._config = DeepQIRLAlgorithm.default_config if os.path.isfile('local_configs/deep_q_irl_config.json'): with open('local_configs/deep_q_irl_config.json', 'r') as f: # Write keys in a loop to keep any defaults # that are not specified in the config file tmp_config = json.load(f) for key in tmp_config: self._config[key] = tmp_config[key] try: cntk.try_set_default_device(cntk.device.gpu( self._config['gpu_id'])) except: cntk.try_set_default_device(cntk.device.cpu()) #self._radar = self._sensors['radar']; #self._radar_data = None #self._dynamic_radar_data = None #self._gps = self._sensors['gps']; self._normal_speed = float(cmdargs.robot_speed) self.debug_info = {} self._stepNum = 0 self._mdp = self._sensors['mdp'] self._features = self._get_features() # needs at least 7 features for qlearning to work self._o_space_shape = (1, self._features[random.sample( self._mdp.states(), 1)[0]].size) self._o_space = gs.Box(low=0, high=1, shape=self._o_space_shape) self._a_space = gs.Discrete(4) #self.learner = cntk_deeprl.agent.policy_gradient.ActorCritic # actor critic trainer self.learner = cntk_deeprl.agent.qlearning.QLearning # qlearning trainer if self.learner == cntk_deeprl.agent.qlearning.QLearning: self._qlearner = self.learner('local_configs/deepq_1.ini', self._o_space, self._a_space) elif self.learner == cntk_deeprl.agent.policy_gradient.ActorCritic: self._qlearner = self.learner( 'local_configs/polify_gradient_1.ini', self._o_space, self._a_space) else: raise TypeError("Invalid type for _qlearner") self.maxIter = self._config['max_iters'] self._reward, self._reward_map = self.get_reward() self._policy = self.get_policy()
def mpi_worker(working_dir, mb_source, gpu): comm_rank = cntk.distributed.Communicator.rank() np.random.seed(comm_rank) if gpu: # test with only one GPU cntk.try_set_default_device(cntk.gpu(0)) frame_mode = (mb_source == "ctf_frame") bmuf = SimpleBMUFTrainer(frame_mode) for i, data in enumerate(get_minibatch(bmuf, working_dir, mb_source)): bmuf.trainer.train_minibatch(data) if i % 50 == 0: bmuf.trainer.summarize_training_progress()
def train(nonlinearity, num_hidden_layers, device_id, minibatch_size=10, num_samples=1000): from cntk.cntk_py import always_allow_setting_default_device always_allow_setting_default_device() C.try_set_default_device(cntk_device(device_id)) np.random.seed(0) learning_rate = 0.5 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) hidden_layers_dim = 50 inp = C.input_variable((input_dim), np.float32) label = C.input_variable((num_output_classes), np.float32) z = fully_connected_classifier_net(inp, num_output_classes, hidden_layers_dim, num_hidden_layers, nonlinearity) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) num_minibatches_to_train = int(num_samples / minibatch_size) training_progress_output_freq = 20 losses = [] errors = [] for i in range(num_minibatches_to_train): features, labels = generate_random_data_sample(minibatch_size, input_dim, num_output_classes) # Specify the input variables mapping in the model to actual minibatch # data for training. trainer.train_minibatch({inp: features, label: labels}, device=cntk_device(device_id)) batchsize, loss, error = print_training_progress(trainer, i, training_progress_output_freq) if not (loss == "NA" or error == "NA"): losses.append(loss) errors.append(error) return losses, errors
def test_set_gpu_as_default_device(): if len(C.device.all_devices()) == 1: return # this will release any previous held device locks C.try_set_default_device(C.cpu(), False) for i in range(len(C.device.all_devices()) - 1): device = C.gpu(i) assert C.try_set_default_device(device, False) assert not is_locked(device) assert device == C.use_default_device() if not device.is_locked(): assert not is_locked(device) assert C.try_set_default_device(device, True) assert device == C.use_default_device() assert is_locked(device)
def test_set_gpu_as_default_device(): if len(C.device.all_devices()) == 1: return; # this will release any previous held device locks C.try_set_default_device(C.cpu(), False) for i in range(len(C.device.all_devices()) - 1): device = C.gpu(i) assert C.try_set_default_device(device, False) assert not is_locked(device) assert device == C.use_default_device() if not device.is_locked(): assert not is_locked(device) assert C.try_set_default_device(device, True) assert device == C.use_default_device() assert is_locked(device)
def data_parallel_sgd_on_sparse(outdir, gpu): if gpu: # test with only one GPU C.try_set_default_device(C.gpu(0)) else: # CPU sparse aggregation is not implemented, so turn it off # note we only need to explicitly do this when running with CPU device on a GPU build # For CPU build it's disabled by default C.cntk_py.use_sparse_gradient_aggregation_in_data_parallel_sgd(False) trainer = SimpleTrainer() np.random.seed(C.Communicator.rank()) indices = (np.random.random( (trainer.batch_size, )) * (trainer.input_dim - 1)).astype(np.int) trainer.train_minibatch(indices) np.save(os.path.join(outdir, str(C.Communicator.rank())), trainer.p.value)
def train(self, X1_train, X2_train, Y_train, X1_val, X2_val, Y_val, batch_size=128, epochs=10): assert X1_train.shape == X2_train.shape assert len(X1_train) == len(Y_train) assert X1_val.shape == X2_val.shape assert len(X1_val) == len(Y_val) if cntk.try_set_default_device(cntk.gpu(0)): print("GPU Training enabled") else: print("CPU Training :(") input_shape = (X1_train.shape[1], X1_train.shape[2], X1_train.shape[3]) self.siamese_net = self.build_network(input_shape) lr_per_minibatch = cntk.learning_rate_schedule(0.1, cntk.UnitType.minibatch) pp = cntk.logging.ProgressPrinter() out = input_variable((1)) loss = cntk.binary_cross_entropy(self.out, out) learner = cntk.adam(self.out.parameters, lr=lr_per_minibatch, momentum=0.9) trainer = cntk.Trainer(self.out, (loss, loss), [learner], [pp]) cntk.logging.log_number_of_parameters(self.out) for epoch in range(epochs): # perm = np.random.permutation(len(Y_train)) for i in range(0, len(Y_train), batch_size): max_n = min(i + batch_size, len(Y_train)) # x1 = X1_train[perm[i:max_n]] # x2 = X2_train[perm[i:max_n]] # y = Y_train[perm[i:max_n]] x1 = X1_train[i:max_n] x2 = X2_train[i:max_n] y = Y_train[i:max_n] trainer.train_minibatch({ self.left_input: x1, self.right_input: x2, out: y }) pp.update_with_trainer(trainer, with_metric=True) print('.') pp.epoch_summary(with_metric=False)
def mpi_worker_multi_learner(working_dir, checkpoint_dir, mb_source, gpu): comm_rank = cntk.distributed.Communicator.rank() np.random.seed(comm_rank) if gpu: # test with only one GPU cntk.try_set_default_device(cntk.gpu(0)) frame_mode = (mb_source == "ctf_frame") bmuf = MultiLearnerMUFTrainer(frame_mode) checkpoint_performed = False for i, data in enumerate(get_minibatch(bmuf, working_dir, mb_source)): bmuf.trainer.train_minibatch(data) if i % 50 == 0: bmuf.trainer.summarize_training_progress() if not checkpoint_performed and not checkpoint_dir == "": bmuf.trainer.save_checkpoint(checkpoint_dir) bmuf.trainer.restore_from_checkpoint(checkpoint_dir) checkpoint_performed = True
def distributed_worker(outdir, gpu, mode, config): if gpu: # test with only one GPU C.try_set_default_device(C.gpu(0)) else: # CPU sparse aggregation is not implemented, so turn it off # note we only need to explicitly do this when running with CPU device on a GPU build # For CPU build it's disabled by default C.cntk_py.use_sparse_gradient_aggregation_in_data_parallel_sgd(False) trainer = SimpleTrainer(mode, config) for batch in range(NUM_BATCHES): set_np_random_seed(C.Communicator.rank(), batch) indices = (np.random.random((BATCH_SIZE_PER_WORKER,))*(trainer.input_dim-1)).astype(np.int) trainer.train_minibatch(indices) checkpoint_file = os.path.join(outdir, mode+str(batch)) trainer.trainer.save_checkpoint(checkpoint_file) trainer.trainer.restore_from_checkpoint(checkpoint_file) # save a checkpoint to force sync after last minibatch trainer.trainer.save_checkpoint(os.path.join(outdir, mode+'_last')) np.save(os.path.join(outdir, mode+str(C.Communicator.rank())), trainer.p.value)
def distributed_worker(outdir, gpu, mode, config): if gpu: # test with only one GPU C.try_set_default_device(C.gpu(0)) else: # CPU sparse aggregation is not implemented, so turn it off # note we only need to explicitly do this when running with CPU device on a GPU build # For CPU build it's disabled by default C.cntk_py.use_sparse_gradient_aggregation_in_data_parallel_sgd(False) trainer = SimpleTrainer(mode, config) for batch in range(NUM_BATCHES): set_np_random_seed(C.Communicator.rank(), batch) indices = (np.random.random((BATCH_SIZE_PER_WORKER, )) * (trainer.input_dim - 1)).astype(np.int) trainer.train_minibatch(indices) checkpoint_file = os.path.join(outdir, mode + str(batch)) trainer.trainer.save_checkpoint(checkpoint_file) trainer.trainer.restore_from_checkpoint(checkpoint_file) # save a checkpoint to force sync after last minibatch trainer.trainer.save_checkpoint(os.path.join(outdir, mode + '_last')) np.save(os.path.join(outdir, mode + str(C.Communicator.rank())), trainer.p.value)
from models import feature_predicter_GRP ### User inputs ### network_list = ['action+','action','action_m','feature','GRP','GRP+','GRP_feature'] parser = argparse.ArgumentParser() parser.add_argument('model_type', type=str, action='store', choices=network_list, help='The type of model to use') parser.add_argument('--data-file', dest='data_file', type=str, action='store', default='data/training_human_data.json') parser.add_argument('--gpu-id', dest='gpu_id', type=int, default=-2, help="""The GPU to use. -1 for CPU, -2 for default."""); cmdargs = parser.parse_args(sys.argv[1:]) # Set device to run on if cmdargs.gpu_id >= 0: C.try_set_default_device(C.gpu(cmdargs.gpu_id)) elif cmdargs.gpu_id == -1: C.try_set_default_device(C.cpu()) network = cmdargs.model_type data_file = cmdargs.data_file ###################### ### DATA INPUT ### ####################### target_dist = 30 target_var = 50000 #######################
import cntk import keras from keras.models import Sequential from keras.layers import Dense, Activation, Dropout, Flatten, Conv2D, MaxPooling2D from keras.optimizers import SGD from keras import backend as K from scipy.io import wavfile import numpy as np from random import shuffle # ## FRAMEWORK # In[2]: cntk.try_set_default_device(cntk.all_devices()[0]) # ## DATA PREPARATION # In[3]: sec = 3 img_rows = 28 img_cols = 28 input_shape = (img_rows, img_cols, 1) num_classes = 2 root, _dirs, files = next( os.walk(os.path.join(os.getcwd(), os.path.join("dataset", "train")))) train_paths = [os.path.join(root, file) for file in files]
from cntk.layers import default_options, Convolution, BatchNormalization, MaxPooling, Dense, For, Sequential, combine, \ placeholder from cntk.logging import ProgressPrinter, os, log_number_of_parameters from datetime import datetime from sklearn.metrics import fbeta_score, accuracy_score import matplotlib.pyplot as plt from Helpers import helpers import xml.etree.cElementTree as et import xml.dom.minidom # from azureml.core.run import Run # # get the Azure ML run object # run = Run.get_submitted_run() success = try_set_default_device(gpu(0)) print("Using GPU: {}".format(success)) def create_map_files_from_folders(data_dir, split=0.8, number_of_samples=800): with open(os.path.join(data_dir, 'images.txt'), mode='w') as f: path, classes, file = list(os.walk(data_dir))[0] for cls in classes: for file in [ x for x in glob.glob(os.path.join(path, cls, '*')) if not x.endswith('txt') ][:number_of_samples]: if file.endswith(('png', 'jpg', 'jpeg')): f.write("{}\t{}\n".format(os.path.abspath(file), classes.index(cls)))
pp = pp[:g_len] else: pad = np.zeros((g_len - p_len, voc_len)) pp = np.vstack((pp, pad)) labels = np.argmax(gg, axis=1).flatten() pred = np.argmax(pp, axis=1).flatten() # print('[FUNCTION] report: labels:{}, pred:{}'.format(labels, pred)) pres = len(labels[labels == pred]) / g_len avg_pres += pres # print('[FUNCTION] report: precision:{}'.format(pres)) # print('[FUNCTION] report: average precision:{}'.format(avg_pres/len(gts))) return avg_pres / len(gts) C.cntk_py.set_gpumemory_allocation_trace_level(0) C.logging.set_trace_level(C.logging.TraceLevel.Error) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--gpu', help='specify gpu id', default=0, type=int) parser.add_argument('--tensorboard', help='tensorboard directory', type=str, default='.') args = parser.parse_args() C.try_set_default_device(C.gpu(args.gpu)) s2smodel = create_model() train(myConfig, s2smodel, args, True)
def mem_leak_check(nonlinearity, num_hidden_layers, device_id, minibatch_size=1, num_samples=10000): from cntk.cntk_py import always_allow_setting_default_device always_allow_setting_default_device() C.try_set_default_device(cntk_device(device_id)) np.random.seed(0) learning_rate = 0.5 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) hidden_layers_dim = 50 inp = C.input_variable((input_dim), np.float32) label = C.input_variable((num_output_classes), np.float32) z = fully_connected_classifier_net(inp, num_output_classes, hidden_layers_dim, num_hidden_layers, nonlinearity) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) num_minibatches_to_train = int(num_samples / minibatch_size) mem = np.zeros(num_minibatches_to_train) features, labels = generate_random_data_sample(minibatch_size, input_dim, num_output_classes) # Set a maximum fraction of iterations, in which the memory is allowed to # increase. Most likely these will be the first training runs. # Long-term this test needs to be run in a separate process over a longer # period of time. MEM_INCREASE_FRACTION_TOLERANCE = 0.01 # Set a maximum allowed memory increase. This tolerance should not be # exceeded when run as a standalone process (simply run this file with the # Python executable). MEM_INCREASE_TOLERANCE = 10*1024 dev = cntk_device(device_id) i = 0 proc = os_process() while i < num_minibatches_to_train: mem[i] = mem_used(proc) # Specify the input variables mapping in the model to actual minibatch # data for training. trainer.train_minibatch({inp: features, label: labels}, device=dev) i += 1 mem_deltas = np.diff(mem) iterations_with_mem_increase = (mem_deltas > 0).sum() mem_inc_fraction = iterations_with_mem_increase/num_minibatches_to_train mem_diff = mem[-1] - mem[10] if mem_inc_fraction > MEM_INCREASE_FRACTION_TOLERANCE and \ mem_diff > MEM_INCREASE_TOLERANCE: # For the rough leak estimation we take the memory footprint after the # dust of the first train_minibatch runs has settled. mem_changes = mem_deltas[mem_deltas != 0] raise ValueError('Potential memory leak of ~ %i KB (%i%% of MBs ' 'increased memory usage) detected with %s:\n%s' % (int(mem_diff/1024), int(mem_inc_fraction*100), nonlinearity, mem_changes))
#!/usr/local/bin/python try: import cntk except: print("You do not have CNTK") exit() print(cntk.device.all_devices()) if cntk.try_set_default_device(cntk.device.gpu(0)): print("You have GPU Support in CNTK") else: print("You DO NOT have GPU Support in CNTK")
import cntk as C import numpy as np from io_funcs.binary_io import BinaryIOCollection from model_lf0_weight import SRU_MULTI_SPEAKER gpu_descriptor = C.gpu(3) C.try_set_default_device(gpu_descriptor) proj = SRU_MULTI_SPEAKER(87, 187, 0.001, 0.5) trainer = proj.trainer trainer.restore_from_checkpoint('net/16k/trainer_' + str(41)) output = trainer.model index = C.Constant(value=np.asarray([0, 1, 0]).astype(np.float32)) input = C.sequence.input_variable(shape=87) out = output(input, index) out.save('extracted_model/16k/model_emo')
def __init__(self, sensors, target, cmdargs): self._sensors = sensors self._cmdargs = cmdargs self._target = target self._config = DeepQIRLAlgorithm.default_config if os.path.isfile('local_configs/deep_q_irl_config.json'): with open('local_configs/deep_q_irl_config.json', 'r') as f: # Write keys in a loop to keep any defaults # that are not specified in the config file tmp_config = json.load(f) for key in tmp_config: self._config[key] = tmp_config[key] try: cntk.try_set_default_device(cntk.device.gpu( self._config['gpu_id'])) except: cntk.try_set_default_device(cntk.device.cpu()) #self._radar = self._sensors['radar']; #self._radar_data = None #self._dynamic_radar_data = None #self._gps = self._sensors['gps']; self._normal_speed = float(cmdargs.robot_speed) self.debug_info = {} ########################## # ### IRL parameters ### # self._max_steps = 200 #200 self._max_loops = 300 #300 self._lr = 0.3 self._decay = 0.9 self._IRLmaxIter = 1 # ### IRL parameters ### # ########################## self._stepNum = 0 self._mdp = self._sensors['mdp'] self._features_DQN = self._get_features_DQN() self._features_IRL = self._get_features_IRL() self._o_space_shape = (1, self._features_DQN[random.sample( self._mdp.states(), 1)[0]].size) self._o_space = gs.Box(low=0, high=1, shape=self._o_space_shape) self._a_space = gs.Discrete(4) #self.learner = cntk_deeprl.agent.policy_gradient.ActorCritic # actor critic trainer self.learner = cntk_deeprl.agent.qlearning.QLearning # qlearning trainer if self.learner == cntk_deeprl.agent.qlearning.QLearning: self._qlearner = self.learner('local_configs/deepq_1.ini', self._o_space, self._a_space) elif self.learner == cntk_deeprl.agent.policy_gradient.ActorCritic: self._qlearner = self.learner( 'local_configs/polify_gradient_1.ini', self._o_space, self._a_space) else: raise TypeError("Invalid type for _qlearner") self.maxIter = self._config['max_iters'] self._valueIteration = ValueIterationNavigationAlgorithm( self._sensors, self._target, self._cmdargs) self._demonstrations = self._add_demonstration_loop( self._max_steps, self._max_loops) self._mdp.local = True # the following action set assumes that all # states have the same action set actions_set = self._mdp.actions(self._mdp._goal_state) self._actions = list([action for action in actions_set]) self.IRL_network = IRL_network( self._features_IRL[:, 0].shape, self._lr, hidden_layers=[50, 40, 40, 10]) # [50,50,50,20,20,10]) #self.IRL_network = IRL_network(self._features_IRL[:,0].shape,self._lr, hidden_layers = [1000,800,600,400,200,100,10]) self._qlearner.set_as_best_model() self.main_loop()
def main(): ############## Hyperparameters ############## env_name = "LunarLander-v2" # creating environment env = gym.make(env_name) state_dim = env.observation_space.shape[0] action_dim = 4 render = False # True # solved_reward = 230 # stop training if avg_reward > solved_reward log_interval = 20 # print avg reward in the interval max_episodes = 50000 # max training episodes max_timesteps = 300 # max timesteps in one episode n_latent_var = 64 # number of variables in hidden layer update_timestep = 2000 # update policy every n timesteps lr = 0.002 # 1e-3 betas = (0.9, 0.999) gamma = 0.99 # discount factor K_epochs = 4 # update policy for K epochs eps_clip = 0.2 # clip parameter for PPO random_seed = None ############################################# C.try_set_default_device(C.gpu(0)) if random_seed: env.seed(random_seed) memory = Memory() ppo = PPO(state_dim, action_dim, n_latent_var, lr, betas, gamma, K_epochs, eps_clip) print(lr, betas) # logging variables running_reward = 0 avg_length = 0 timestep = 0 # training loop for i_episode in range(1, max_episodes + 1): state = env.reset() for t in range(max_timesteps): timestep += 1 # Running policy_old: action = ppo.policy_old.act(state, memory) state, reward, done, _ = env.step(action) # Saving reward and is_terminal: memory.rewards.append(reward) memory.is_terminals.append(done) # update if its time if timestep % update_timestep == 0: ppo.update(memory) memory.clear_memory() timestep = 0 running_reward += reward if render: env.render() if done: break avg_length += t _writer.add_scalar('Episode reward', running_reward, i_episode) # stop training if avg_reward > solved_reward if running_reward > (log_interval * solved_reward): print("########## Solved! ##########") # ppo.policy.action_layer.save('action_layer.model') # ppo.policy.value_layer.save('value_layer.model') break # logging if i_episode % log_interval == 0: avg_length = int(avg_length / log_interval) running_reward = int((running_reward / log_interval)) print('Episode {} \t avg length: {} \t reward: {}'.format( i_episode, avg_length, running_reward)) running_reward = 0 avg_length = 0
def mem_leak_check(nonlinearity, num_hidden_layers, device_id, minibatch_size=1, num_samples=10000): from cntk.cntk_py import always_allow_setting_default_device always_allow_setting_default_device() C.try_set_default_device(cntk_device(device_id)) np.random.seed(0) learning_rate = 0.5 lr_schedule = C.learning_rate_schedule(learning_rate) hidden_layers_dim = 50 inp = C.input_variable((input_dim), np.float32) label = C.input_variable((num_output_classes), np.float32) z = fully_connected_classifier_net(inp, num_output_classes, hidden_layers_dim, num_hidden_layers, nonlinearity) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) learner = C.sgd(z.parameters, lr_schedule, minibatch_size = 0) trainer = C.Trainer(z, (loss, eval_error), [learner]) num_minibatches_to_train = int(num_samples / minibatch_size) mem = np.zeros(num_minibatches_to_train) features, labels = generate_random_data_sample(minibatch_size, input_dim, num_output_classes) # Set a maximum fraction of iterations, in which the memory is allowed to # increase. Most likely these will be the first training runs. # Long-term this test needs to be run in a separate process over a longer # period of time. MEM_INCREASE_FRACTION_TOLERANCE = 0.01 # Set a maximum allowed memory increase. This tolerance should not be # exceeded when run as a standalone process (simply run this file with the # Python executable). MEM_INCREASE_TOLERANCE = 10*1024 dev = cntk_device(device_id) i = 0 proc = os_process() while i < num_minibatches_to_train: mem[i] = mem_used(proc) # Specify the input variables mapping in the model to actual minibatch # data for training. trainer.train_minibatch({inp: features, label: labels}, device=dev) i += 1 mem_deltas = np.diff(mem) iterations_with_mem_increase = (mem_deltas > 0).sum() mem_inc_fraction = iterations_with_mem_increase/num_minibatches_to_train mem_diff = mem[-1] - mem[10] if mem_inc_fraction > MEM_INCREASE_FRACTION_TOLERANCE and \ mem_diff > MEM_INCREASE_TOLERANCE: # For the rough leak estimation we take the memory footprint after the # dust of the first train_minibatch runs has settled. mem_changes = mem_deltas[mem_deltas != 0] raise ValueError('Potential memory leak of ~ %i KB (%i%% of MBs ' 'increased memory usage) detected with %s:\n%s' % (int(mem_diff/1024), int(mem_inc_fraction*100), nonlinearity, mem_changes))
def train(data_path, model_path, log_file, config_file, restore=False, profiling=False, gen_heartbeat=False): training_config = importlib.import_module(config_file).training_config # config for using multi GPUs if training_config['multi_gpu']: gpu_pad = training_config['gpu_pad'] gpu_cnt = training_config['gpu_cnt'] my_rank = C.Communicator.rank() my_gpu_id = (my_rank + gpu_pad) % gpu_cnt print("rank = " + str(my_rank) + ", using gpu " + str(my_gpu_id) + " of " + str(gpu_cnt)) C.try_set_default_device(C.gpu(my_gpu_id)) else: C.try_set_default_device(C.gpu(0)) # outputs while training normal_log = os.path.join(data_path, training_config['logdir'], log_file) # tensorboard files' dir tensorboard_logdir = os.path.join(data_path, training_config['logdir'], log_file) polymath = PolyMath(config_file) z, loss = polymath.model() max_epochs = training_config['max_epochs'] log_freq = training_config['log_freq'] progress_writers = [ C.logging.ProgressPrinter(num_epochs=max_epochs, freq=log_freq, tag='Training', log_to_file=normal_log, rank=C.Communicator.rank(), gen_heartbeat=gen_heartbeat) ] # add tensorboard writer for visualize tensorboard_writer = C.logging.TensorBoardProgressWriter( freq=10, log_dir=tensorboard_logdir, rank=C.Communicator.rank(), model=z) progress_writers.append(tensorboard_writer) lr = C.learning_parameter_schedule(training_config['lr'], minibatch_size=None, epoch_size=None) ema = {} dummies_info = {} dummies = [] for p in z.parameters: ema_p = C.constant(0, shape=p.shape, dtype=p.dtype, name='ema_%s' % p.uid) ema[p.uid] = ema_p dummies.append(C.reduce_sum(C.assign(ema_p, p))) dummies_info[dummies[-1].output] = (p.name, p.shape) dummy = C.combine(dummies) learner = C.adadelta(z.parameters, lr) if C.Communicator.num_workers() > 1: learner = C.data_parallel_distributed_learner(learner) trainer = C.Trainer(z, (loss, None), learner, progress_writers) if profiling: C.debugging.start_profiler(sync_gpu=True) train_data_file = os.path.join(data_path, training_config['train_data']) train_data_ext = os.path.splitext(train_data_file)[-1].lower() model_file = os.path.join(model_path, model_name) model = C.combine(list(z.outputs) + [loss.output]) label_ab = argument_by_name(loss, 'ab') epoch_stat = { 'best_val_err': 100, 'best_since': 0, 'val_since': 0, 'record_num': 0 } if restore and os.path.isfile(model_file): trainer.restore_from_checkpoint(model_file) #after restore always re-evaluate epoch_stat['best_val_err'] = validate_model( os.path.join(data_path, training_config['val_data']), model, polymath, config_file) def post_epoch_work(epoch_stat): trainer.summarize_training_progress() epoch_stat['val_since'] += 1 if epoch_stat['val_since'] == training_config['val_interval']: epoch_stat['val_since'] = 0 temp = dict((p.uid, p.value) for p in z.parameters) for p in trainer.model.parameters: p.value = ema[p.uid].value val_err = validate_model( os.path.join(data_path, training_config['val_data']), model, polymath, config_file) if epoch_stat['best_val_err'] > val_err: epoch_stat['best_val_err'] = val_err epoch_stat['best_since'] = 0 os.system("ls -la >> log.log") os.system("ls -la ./Models >> log.log") save_flag = True fail_cnt = 0 while save_flag: if fail_cnt > 100: print("ERROR: failed to save models") break try: trainer.save_checkpoint(model_file) epoch_stat['record_num'] += 1 record_file = os.path.join( model_path, str(epoch_stat['record_num']) + '-' + model_name) trainer.save_checkpoint(record_file) save_flag = False except: fail_cnt = fail_cnt + 1 for p in trainer.model.parameters: p.value = temp[p.uid] else: epoch_stat['best_since'] += 1 if epoch_stat['best_since'] > training_config['stop_after']: return False if profiling: C.debugging.enable_profiler() return True if train_data_ext == '.ctf': mb_source, input_map = create_mb_and_map(loss, train_data_file, polymath) minibatch_size = training_config['minibatch_size'] # number of samples epoch_size = training_config['epoch_size'] for epoch in range(max_epochs): num_seq = 0 while True: if trainer.total_number_of_samples_seen >= training_config[ 'distributed_after']: data = mb_source.next_minibatch( minibatch_size * C.Communicator.num_workers(), input_map=input_map, num_data_partitions=C.Communicator.num_workers(), partition_index=C.Communicator.rank()) else: data = mb_source.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(data) num_seq += trainer.previous_minibatch_sample_count # print_para_info(dummy, dummies_info) if num_seq >= epoch_size: break if not post_epoch_work(epoch_stat): break else: if train_data_ext != '.tsv': raise Exception("Unsupported format") minibatch_seqs = training_config[ 'minibatch_seqs'] # number of sequences for epoch in range(max_epochs): # loop over epochs tsv_reader = create_tsv_reader(loss, train_data_file, polymath, minibatch_seqs, C.Communicator.num_workers()) minibatch_count = 0 for data in tsv_reader: if (minibatch_count % C.Communicator.num_workers()) == C.Communicator.rank(): trainer.train_minibatch(data) # update model with it dummy.eval() minibatch_count += 1 if not post_epoch_work(epoch_stat): break if profiling: C.debugging.stop_profiler()
required=True, default=None) parser.add_argument('--output', help='The output ONNX model file.', required=True, default=None) parser.add_argument( '--end_node', help= 'The end node of CNTK model. This is to remove error/loss related parts from input model.', default=None) parser.add_argument('--seq_len', help='Test data sequence length.', type=int, default=0) parser.add_argument('--batch_size', help='Test data batch size.', type=int, default=1) return parser.parse_args() if __name__ == '__main__': C.try_set_default_device(C.cpu()) args = parse_arguments() print('input model: ' + args.input) print('output model: ' + args.output) convert_model_and_gen_data(args.input, args.output, args.end_node, args.seq_len, args.batch_size) print('Done!')
self._loss = new_loss return self._model, self._loss, self._input_phs # =============== test edition ================== from cntk.debugging import debug_model def test_model_part(): from train_pm import create_mb_and_map rnet = RNet('config') model, loss, input_phs = rnet.build_model() mb, input_map = create_mb_and_map(input_phs, 'dev.ctf', rnet) data = mb.next_minibatch(3, input_map=input_map) res = model.eval(data) print(res) def _testcode(): data = [ np.array([[1, 2, 3, 0], [1, 2, 3, 0]]), np.array([[1, 2, 0, 0], [2, 3, 0, 0]]), np.array([[4, 0, 0, 0], [5, 0, 0, 0], [6, 0, 0, 0]]) ] inp = C.sequence.input_variable(4) if __name__ == '__main__': C.try_set_default_device(C.gpu(2)) test_model_part()
def test(test_data, model_path, model_file, config_file): training_config = importlib.import_module(config_file).training_config # config for using multi GPUs if training_config['multi_gpu']: gpu_pad = training_config['gpu_pad'] gpu_cnt = training_config['gpu_cnt'] my_rank = C.Communicator.rank() my_gpu_id = (my_rank + gpu_pad) % gpu_cnt print("rank = " + str(my_rank) + ", using gpu " + str(my_gpu_id) + " of " + str(gpu_cnt)) C.try_set_default_device(C.gpu(my_gpu_id)) else: C.try_set_default_device(C.gpu(0)) polymath = PolyMath(config_file) model = C.load_model( os.path.join(model_path, model_file if model_file else model_name)) begin_logits = model.outputs[0] end_logits = model.outputs[1] loss = C.as_composite(model.outputs[2].owner) begin_prediction = C.sequence.input_variable( 1, sequence_axis=begin_logits.dynamic_axes[1], needs_gradient=True) end_prediction = C.sequence.input_variable( 1, sequence_axis=end_logits.dynamic_axes[1], needs_gradient=True) best_span_score = symbolic_best_span(begin_prediction, end_prediction) predicted_span = C.layers.Recurrence( C.plus)(begin_prediction - C.sequence.past_value(end_prediction)) batch_size = 32 # in sequences misc = {'rawctx': [], 'ctoken': [], 'answer': [], 'uid': []} tsv_reader = create_tsv_reader(loss, test_data, polymath, batch_size, 1, is_test=True, misc=misc) results = {} with open('{}_out.json'.format(model_file), 'w', encoding='utf-8') as json_output: for data in tsv_reader: out = model.eval(data, outputs=[begin_logits, end_logits, loss], as_numpy=False) g = best_span_score.grad( { begin_prediction: out[begin_logits], end_prediction: out[end_logits] }, wrt=[begin_prediction, end_prediction], as_numpy=False) other_input_map = { begin_prediction: g[begin_prediction], end_prediction: g[end_prediction] } span = predicted_span.eval((other_input_map)) for seq, (raw_text, ctokens, answer, uid) in enumerate( zip(misc['rawctx'], misc['ctoken'], misc['answer'], misc['uid'])): seq_where = np.argwhere(span[seq])[:, 0] span_begin = np.min(seq_where) span_end = np.max(seq_where) predict_answer = get_answer(raw_text, ctokens, span_begin, span_end) results['query_id'] = int(uid) results['answers'] = [predict_answer] json.dump(results, json_output) json_output.write("\n") misc['rawctx'] = [] misc['ctoken'] = [] misc['answer'] = [] misc['uid'] = []
# Checkpointing and logging parser.add_argument('-md', '--model_dir', default='chkpt', help='Directory for logs and checkpoints') parser.add_argument('-lf', '--log_freq', type=int, default=10, help='The number of episodes between progress logs') parser.add_argument('-cf', '--chkpt_freq', type=int, default=100, help='The number of episodes between checkpoints') args = parser.parse_args() # Select the right target device when this notebook is being tested if 'TEST_DEVICE' in os.environ: if os.environ['TEST_DEVICE'] == 'cpu': cntk.try_set_default_device(cntk.device.cpu()) else: cntk.try_set_default_device(cntk.device.gpu(0)) main(args.env_name, args.episodes, args.gamma, args.learning_rate, args.batch_size, args.mem_cap, args.target_update, args.action_repeat, args.stack_frames, args.replay_period, args.replay_start_size, args.use_exp, args.min_epsilon, args.decay_exp, args.decay_lin, args.model_dir, args.log_freq, args.chkpt_freq)
def load_cnn_model(fn, gpu_id=0): cntk.try_set_default_device(cntk.gpu(gpu_id)) cntk.use_default_device() return cntk.load_model(fn)