def __init__(self, max_pending=None, default_func_caller=None, cpu_only=False): num_gpus_available = len( tf.config.experimental.list_physical_devices('GPU')) if max_pending is None: max_pending = 4 if cpu_only else max(1, num_gpus_available) super().__init__(max_pending) worker_resources = { "num_gpus": 0 if debug_mode() or cpu_only or num_gpus_available == 0 else 1, } self.worker_cls = ray.remote(**worker_resources)(Worker) if not ray.is_initialized(): ray.init(local_mode=debug_mode(), ignore_reinit_error=True) self.max_pending = max_pending self.default_func_caller = default_func_caller if self.default_func_caller: self.caller_handle = ray.put(default_func_caller) self.scheduler = Scheduler(self.worker_cls.remote() for _ in range(max_pending)) self.last_receive_time = 0
def train(): tf.keras.backend.clear_session() with strategy.scope() if config['use_tpu'] else empty_context_mgr(): model = GAN_PG(**config) # Define optimizers optimizer_g = tf.train.AdamOptimizer( learning_rate=config['learning_rate'], beta1=0.0) optimizer_d = tf.train.AdamOptimizer( learning_rate=config['learning_rate'], beta1=0.0) # Compile the model model.compile_model(optimizer_g=optimizer_g, optimizer_d=optimizer_d, loss=config['gan_mode'], tpu_strategy=strategy, resolver=resolver, config=config['sess_config']) if config['restore']: model.load_weights('{}/weights'.format(config['folder'])) # Prepare inputs inputs = (X_train, y_train) if config['conditional'] else X_train # Train for stage in config['train_stages']: # Get training stage num stage_num = config['train_stages'].index(stage) print( '\nProcessing stage: {} with image size {} ==========================================' .format(stage_num, stage['size'])) # Define schedulers alpha_scheduler = Scheduler(stage['train_epochs'], [0, 0.5], [0, 1.0]) learning_rate_scheduler = Scheduler( stage['train_epochs'], [0, 0.5], [stage['lr'] * 0.1, stage['lr']]) model.fit_stage(inputs, config['batch_size'], stage_num=stage_num, alpha_scheduler=alpha_scheduler, learning_rate_scheduler=learning_rate_scheduler, folder=config['folder'], save_epoch=config['save_epoch'], seed_noise=seed_noise, seed_labels=seed_labels) make_gif( glob.iglob('{}/progress/*.png'.format(config['folder'])), '{}/progress/{}_{}.gif'.format(config['folder'], config['gan_mode'], 'progress'))
def __init__(self, input_length: int, output_length: int, n_steps: int, window_size: int, seed=3, gamma=0.8, vf_coef=0.25, ent_coef=0, max_grad_norm=0.5, learning_rate=1e-3, alpha=0.99, epsilon=1e-5, lr_schedule='linear', verbose=0, _init_setup_model=True): self.policy = MetaLstmActorCriticPolicy self.verbose = verbose self.input_length = input_length self.output_length = output_length self.num_train_steps = 0 self.n_steps = n_steps self.window_size = window_size self.total_timesteps = config.max_timesteps/10_000 self.gamma = gamma self.vf_coef = vf_coef self.ent_coef = ent_coef self.max_grad_norm = max_grad_norm self.alpha = alpha self.epsilon = epsilon self.lr_schedule = lr_schedule self.learning_rate = learning_rate self.graph = None self.sess = None self.learning_rate_ph = None self.actions_ph = None self.advs_ph = None self.rewards_ph = None self.pg_loss = None self.vf_loss = None self.entropy = None self.apply_backprop = None self.policy_model = None self.step = None self.value = None self.learning_rate_schedule = None self.trainable_variables = None self.layers = config.meta_layers self.lstm_units = config.meta_lstm_units if seed is not None: set_global_seeds(seed) self.learning_rate_schedule = Scheduler(initial_value=self.learning_rate, n_values=self.total_timesteps, schedule=self.lr_schedule, init_step=self.num_train_steps) if _init_setup_model: self.setup_model()
def main(multithreading: bool = True): node_num = 300 logger.info('正在生成无线传感网络...') wsn = generate_rand_nodes(wsn=Wsn(), wsn_width_x=100, wsn_width_y=100, node_num=node_num, node_r_mu=10, node_r_sigma=5, node_power=100000000000, node_pc_per_send=1) logger.info('无线传感网络生成完成') bystander = Bystander(wsn) logger.info('旁观者生成完成') # 给一号节点注入灵魂 wsn.node_manager.nodes[0].teammate_num = node_num * 0.95 wsn.node_manager.nodes[0].send_queue.append('Hello World!') if multithreading: Scheduler.schedule(bystander, EnumScheduleMode.MULTI_THREAD, [ TerminationCondition.UserDriven(), TerminationCondition.NodeDriven(), TerminationCondition.RunningTime(300), TerminationCondition.SurvivalRate(0.6), ]) else: Scheduler.schedule(bystander, EnumScheduleMode.SINGLE_THREAD, [ TerminationCondition.UserDriven(), TerminationCondition.NodeDriven(), TerminationCondition.NumOfCycles(300), TerminationCondition.SurvivalRate(0.6), ]) logger.info('正在进行电量统计..') power_usage = 0 for node in wsn.node_manager.nodes: power_usage += (node.total_power - node.power) logger.warning(f'本次传输总耗电量 {power_usage} 点') logger.info('主线程结束...')
def search(self, load_from: str = None, save_every: int = None): if load_from: self.load_state(load_from) ray.init(local_mode=debug_mode()) trainer = ray.put(self.trainer) ss = ray.put(self.config.search_space) scheduler = Scheduler([ GPUTrainer.remote(ss, trainer) for _ in range(self.max_parallel_evaluations) ]) self.log.info( f"Searching with {self.max_parallel_evaluations} workers.") def should_submit_more(cap): return (len(self.history) + scheduler.pending_tasks() < cap) \ and scheduler.has_a_free_worker() def point_number(): return len(self.history) + scheduler.pending_tasks() + 1 while len(self.history) < self.initial_population_size: if should_submit_more(cap=self.initial_population_size): self.log.info(f"Populating #{point_number()}...") scheduler.submit(self.random_sample()) else: info = scheduler.await_any() self.population.append(info) self.history.append(info) self.maybe_save_state(save_every) while len(self.history) < self.rounds: if should_submit_more(cap=self.rounds): self.log.info(f"Searching #{point_number()}...") sample = np.random.choice(self.population, size=self.sample_size) parent = max(sample, key=self.get_mo_fitness_fn()) scheduler.submit(self.evolve(parent.point)) else: info = scheduler.await_any() self.population.append(info) while len(self.population) > self.population_size: self.population.pop(0) self.history.append(info) self.maybe_save_state(save_every) self.bounds_log()
def __init__(self, bot): self.bot = bot self.guilds_db = bot.db['guilds'] self.scheduler = Scheduler(self.__class__.__name__) self.event_schedules = {}
def train_episode(cnets, dTNet, device, args, lr, epoch, n_epochs, train_loader, test_loader, episode_idx, layers, stats=None, eps_init=0, balanced_loss=False, net_weights=[1]): if not isinstance(cnets,list): cnets = [cnets] for cnet in cnets: cnet.train() net = cnets[0].net relaxed_net = cnets[0].relaxed_net relu_ids = relaxed_net.get_relu_ids() eps = eps_init if "COLT" in args.train_mode: relu_stable = args.relu_stable elif "adv" in args.train_mode: relu_stable = None args.mix = False elif "natural" in args.train_mode: relu_stable = None args.nat_factor = 1 args.mix = False elif "diffAI" in args.train_mode: relu_stable = None else: raise RuntimeError(f"Unknown train mode {args.train_mode:}") print('Saving model to:', args.model_dir) print('Training layers: ', layers) for j in range(len(layers) - 1): opt, lr_scheduler = get_opt(net, args.opt, lr, args.lr_step, args.lr_factor, args.n_epochs, train_loader, args.lr_sched) curr_layer_idx = layers[j + 1] eps_old = eps eps = get_scaled_eps(args, layers, relu_ids, curr_layer_idx, j) if eps_old is None: eps_old = eps kappa_sched = Scheduler(0 if args.mix else 1, 1, args.train_batch * len(train_loader) * args.mix_epochs, 0 if not args.anneal else args.train_batch * len(train_loader)*args.anneal_warmup) beta_sched = Scheduler(args.beta_start if args.mix else args.beta_end, args.beta_end, args.train_batch * len(train_loader) * args.mix_epochs, 0) eps_sched = Scheduler(eps_old if args.anneal else eps, eps, args.train_batch * len(train_loader) * args.anneal_epochs, args.train_batch * len(train_loader)*args.anneal_warmup, power=args.anneal_pow) layer_dir = '{}/{}/{}'.format(args.model_dir, episode_idx, curr_layer_idx) if not os.path.exists(layer_dir): os.makedirs(layer_dir) print('\nnew train phase: eps={:.5f}, lr={:.2e}, curr_layer={}\n'.format(eps, lr, curr_layer_idx)) if balanced_loss: assert cnets[0].lossFn_test is None, "Unexpected lossFn" data_balance = np.array(train_loader.dataset.targets).astype(float).mean() balance_factor = (1 - data_balance) / (data_balance + 1e-3) cnets[0].update_loss_fn(balance_factor, device) for curr_epoch in range(n_epochs): if balanced_loss and args.sliding_loss_balance is not None and j == 0: # if sliding loss balance is acitve, anneal loss balance from fully balanced to partially balanced assert 0 <= args.sliding_loss_balance <= 1 balance_factor_initial = (1-data_balance)/(data_balance+1e-3) scaling_factor_balance = 1-max(min((curr_epoch-0.1*n_epochs)/(n_epochs*0.7), args.sliding_loss_balance), 0) balance_factor = scaling_factor_balance * (balance_factor_initial-1) + 1 cnets[0].update_loss_fn(balance_factor, device) train(device, epoch, args, j + 1, layers, cnets, eps_sched, kappa_sched, opt, train_loader, lr_scheduler, relu_ids, stats, relu_stable, relu_stable_protected=args.relu_stable_protected, net_weights=net_weights, beta_sched=beta_sched) if isinstance(lr_scheduler, optim.lr_scheduler.StepLR) and curr_epoch >= args.mix_epochs: lr_scheduler.step() if (epoch + 1) % args.test_freq == 0 or (epoch + 1) % n_epochs == 0: torch.save(dTNet.state_dict(), os.path.join(layer_dir, 'net_%d.pt' % (epoch + 1))) torch.save(dTNet.state_dict(), os.path.join(layer_dir, 'opt_%d.pt' % (epoch + 1))) test(device, args, cnets[0], test_loader if args.test_set == "test" else train_loader, [curr_layer_idx], stats=stats) stats.update_tb(epoch) epoch += 1 relu_stable = None if relu_stable is None else relu_stable * args.relu_stable_layer_dec lr = lr * args.lr_layer_dec net.freeze(len(net.blocks)-1) return epoch
from episodic_curiosity import oracle from ppo_agent import save_rews_list import pickle import numpy as np from utils import explained_variance, Scheduler sil_loss_weight = Scheduler(v=1., nvalues=200, schedule='linear') class RewardForwardFilter(object): def __init__(self, gamma): self.rewems = None self.gamma = gamma def update(self, rews): if self.rewems is None: self.rewems = rews else: self.rewems = self.rewems * self.gamma + rews return self.rewems a = [[2.0141983, 12], [2.0334325, 12], [2.0381384, 12], [1.9757961, 12], [1.9958011, 12], [2.0272028, 12], [2.073726, 12], [2.0448227, 11], [1.9994664, 11], [2.0019026, 11], [1.9926736, 11], [1.9864389, 11], [1.9718688, 11], [2.0127394, 11], [1.9853333, 11], [2.0017598, 11], [1.9949136, 11], [2.0054865, 11], [2.011146, 11], [2.0157328, 11], [2.0109742, 11], [2.008641, 11], [2.015577, 11], [2.0362294, 11],
def __init__(self, policy, ob_space, ac_space, nenvs, nsteps, ent_coef=0.01, vf_coef=0.5, max_grad_norm=0.5, lr=7e-4, alpha=0.99, epsilon=1e-5, total_timesteps=int(80e6), lrschedule='linear', summary_dir=None): sess = tf_util.make_session() nbatch = nenvs * nsteps A = tf.placeholder(tf.int32, [nbatch]) ADV = tf.placeholder(tf.float32, [nbatch]) R = tf.placeholder(tf.float32, [nbatch]) LR = tf.placeholder(tf.float32, []) step_model = policy(sess, ob_space, ac_space, nenvs, 1, reuse=False) train_model = policy(sess, ob_space, ac_space, nenvs * nsteps, nsteps, reuse=True) neglogpac = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=train_model.pi, labels=A) pg_loss = tf.reduce_mean(ADV * neglogpac) vf_loss = tf.reduce_mean(mse(tf.squeeze(train_model.vf), R)) entropy = tf.reduce_mean(cat_entropy(train_model.pi)) loss = pg_loss - entropy * ent_coef + vf_loss * vf_coef params = find_trainable_variables("model") grads = tf.gradients(loss, params) if max_grad_norm is not None: grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm) grads = list(zip(grads, params)) trainer = tf.train.RMSPropOptimizer(learning_rate=LR, decay=alpha, epsilon=epsilon) _train = trainer.apply_gradients(grads) lr = Scheduler(v=lr, nvalues=total_timesteps, schedule=lrschedule) # storing summaries episode_reward = tf.placeholder("float") tf.summary.scalar("policy_loss", pg_loss) tf.summary.scalar("entropy", entropy) tf.summary.scalar("value_loss", vf_loss) tf.summary.scalar("episode_reward", episode_reward) summary_op = tf.summary.merge_all() def train(obs, states, mean_reward, rewards, masks, actions, values): advs = rewards - values for step in range(len(obs)): cur_lr = lr.value() td_map = { train_model.X: obs, A: actions, ADV: advs, R: rewards, LR: cur_lr, episode_reward: mean_reward } if states is not None: td_map[train_model.S] = states td_map[train_model.M] = masks policy_loss, value_loss, policy_entropy, summary, _ = sess.run( [pg_loss, vf_loss, entropy, summary_op, _train], td_map) return policy_loss, value_loss, policy_entropy, summary def save(save_path): ps = sess.run(params) make_path(osp.dirname(save_path)) joblib.dump(ps, save_path) def load(load_path): loaded_params = joblib.load(load_path) restores = [] for p, loaded_p in zip(params, loaded_params): restores.append(p.assign(loaded_p)) sess.run(restores) self.train = train self.train_model = train_model self.step_model = step_model self.step = step_model.step self.value = step_model.value self.initial_state = step_model.initial_state self.save = save self.load = load tf.global_variables_initializer().run(session=sess) self.train_writer = tf.summary.FileWriter(summary_dir, sess.graph)
def __init__(self, policy, ob_space, ac_space, nenvs, nsteps, ent_coef=0.01, vf_coef=0.5, mf_coef=0.5, max_grad_norm=0.5, lr=7e-4, alpha=0.99, epsilon=1e-5, total_timesteps=int(80e6), lrschedule='linear'): sess = tf_util.make_session() nact = ac_space.n nbatch = nenvs * nsteps A = tf.placeholder(tf.int32, [nbatch]) ADV = tf.placeholder(tf.float32, [nbatch]) ADV_MOMENT = tf.placeholder(tf.float32, [nbatch]) R = tf.placeholder(tf.float32, [nbatch]) R2 = tf.placeholder(tf.float32, [nbatch]) LR = tf.placeholder(tf.float32, []) ENT_COEF = tf.placeholder(tf.float32, []) step_model = policy(sess, ob_space, ac_space, nenvs, 1, reuse=False) train_model = policy(sess, ob_space, ac_space, nenvs * nsteps, nsteps, reuse=True) neglogpac = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=train_model.pi, labels=A) pg_loss = tf.reduce_mean((ADV) * neglogpac) vf_loss = tf.reduce_mean(mse(tf.squeeze(train_model.vf), R)) mf_loss = tf.reduce_mean(mse(tf.squeeze(train_model.mf), R2)) entropy = tf.reduce_mean(cat_entropy(train_model.pi)) ent_coef = Scheduler(v=ent_coef, nvalues=total_timesteps / 10, schedule='step') mf_coef = 0.01 loss = pg_loss - entropy * ENT_COEF + vf_loss * vf_coef + mf_loss * mf_coef # loss = pg_loss + vf_loss * vf_coef + mf_loss * mf_coef # loss = pg_loss - entropy*ent_coef + vf_loss * vf_coef params = find_trainable_variables("model") grads = tf.gradients(loss, params) if max_grad_norm is not None: grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm) grads = list(zip(grads, params)) trainer = tf.train.RMSPropOptimizer(learning_rate=LR, decay=alpha, epsilon=epsilon) _train = trainer.apply_gradients(grads) lr = Scheduler(v=lr, nvalues=total_timesteps, schedule=lrschedule) def train(obs, states, rewards, rewards_square, masks, actions, values, moments): values_random = np.random.normal( loc=values, scale=np.sqrt(np.maximum(moments - values**2, 0))) # values_random = values - np.sqrt(np.maximum(moments - values ** 2,0)) advs = rewards - values_random # advs = (1 - 2 * rewards) * rewards - values + 2 * values * values advs_moment = rewards_square - moments # advs = (1 + 2 * rewards) * (rewards) # advs_moment = rewards_square for step in range(len(obs)): cur_lr = lr.value() cur_ent_coef = ent_coef.value() td_map = { train_model.X: obs, A: actions, ADV: advs, ADV_MOMENT: advs_moment, R: rewards, R2: rewards_square, LR: cur_lr, ENT_COEF: cur_ent_coef } if states is not None: td_map[train_model.S] = states td_map[train_model.M] = masks policy_loss, value_loss, moment_loss, policy_entropy, _ = sess.run( [pg_loss, vf_loss, mf_loss, entropy, _train], td_map) return policy_loss, value_loss, moment_loss, policy_entropy def save(save_path): ps = sess.run(params) make_path(osp.dirname(save_path)) joblib.dump(ps, save_path) def load(load_path): loaded_params = joblib.load(load_path) restores = [] for p, loaded_p in zip(params, loaded_params): restores.append(p.assign(loaded_p)) ps = sess.run(restores) self.train = train self.train_model = train_model self.step_model = step_model self.step = step_model.step self.value = step_model.value self.initial_state = step_model.initial_state self.save = save self.load = load tf.global_variables_initializer().run(session=sess)
def __init__(self, policy, ob_space, ac_space, nenvs, nsteps, nstack, num_procs, ent_coef=0.01, vf_coef=0.5, max_grad_norm=0.5, lr=7e-4, alpha=0.99, epsilon=1e-5, total_timesteps=int(80e6), lrschedule='linear', optimizer='adam'): config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=num_procs, inter_op_parallelism_threads=num_procs) config.gpu_options.allow_growth = True sess = tf.Session(config=config) nbatch = nenvs * nsteps A = tf.placeholder(tf.int32, [nbatch]) ADV = tf.placeholder(tf.float32, [nbatch]) R = tf.placeholder(tf.float32, [nbatch]) LR = tf.placeholder(tf.float32, []) train_model = policy(sess, ob_space, ac_space, nenvs, nsteps, nstack, reuse=True) step_model = train_model neglogpac = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=train_model.pi, labels=A) pg_loss = tf.reduce_mean(ADV * neglogpac) vf_loss = tf.reduce_mean(mse(tf.squeeze(train_model.vf), R)) entropy = tf.reduce_mean(cat_entropy(train_model.pi)) loss = pg_loss + vf_loss * vf_coef - entropy * ent_coef params = find_trainable_variables("model") grads = tf.gradients(loss, params) if max_grad_norm is not None: grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm) grads = list(zip(grads, params)) if optimizer == 'adam': trainer = tf.train.AdamOptimizer() else: trainer = tf.train.RMSPropOptimizer(learning_rate=LR, decay=alpha, epsilon=epsilon) _train = trainer.apply_gradients(grads) lr = Scheduler(v=lr, nvalues=total_timesteps, schedule=lrschedule) def train(obs, states, rewards, masks, actions, values): advs = rewards - values for step in range(len(obs)): cur_lr = lr.value() td_map = { train_model.X: obs, A: actions, ADV: advs, R: rewards, LR: cur_lr } if states != []: td_map[train_model.S] = states td_map[train_model.M] = masks total_loss, policy_loss, value_loss, policy_entropy, _ = sess.run( [loss, pg_loss, vf_loss, entropy, _train], td_map) return total_loss, policy_loss, value_loss, policy_entropy def save(save_path): ps = sess.run(params) make_path(save_path) joblib.dump(ps, save_path) def load(load_path): loaded_params = joblib.load(load_path) restores = [] for p, loaded_p in zip(params, loaded_params): restores.append(p.assign(loaded_p)) ps = sess.run(restores) self.train = train self.train_model = train_model self.step_model = step_model self.step = step_model.step self.value = step_model.value self.initial_state = step_model.initial_state self.save = save self.load = load tf.global_variables_initializer().run(session=sess)
class MetaA2CModel: """ The Meta A2C (Advantage Actor Critic) model class :param gamma: (float) Discount factor :param vf_coef: (float) Value function coefficient for the loss calculation :param ent_coef: (float) Entropy coefficient for the loss caculation :param max_grad_norm: (float) The maximum value for the gradient clipping :param learning_rate: (float) The learning rate :param alpha: (float) RMSProp decay parameter (default: 0.99) :param epsilon: (float) RMSProp epsilon (stabilizes square root computation in denominator of RMSProp update) (default: 1e-5) :param lr_schedule: (str) The type of scheduler for the learning rate update ('linear', 'constant', 'double_linear_con', 'middle_drop' or 'double_middle_drop') :param verbose: (int) the verbosity level: 0 none, 1 training information, 2 tensorflow debug :param _init_setup_model: (bool) Whether or not to build the network at the creation of the instance (used only for loading) WARNING: this logging can take a lot of space quickly """ def __init__(self, input_length: int, output_length: int, n_steps: int, window_size: int, seed=3, gamma=0.8, vf_coef=0.25, ent_coef=0, max_grad_norm=0.5, learning_rate=1e-3, alpha=0.99, epsilon=1e-5, lr_schedule='linear', verbose=0, _init_setup_model=True): self.policy = MetaLstmActorCriticPolicy self.verbose = verbose self.input_length = input_length self.output_length = output_length self.num_train_steps = 0 self.n_steps = n_steps self.window_size = window_size self.total_timesteps = config.max_timesteps/10_000 self.gamma = gamma self.vf_coef = vf_coef self.ent_coef = ent_coef self.max_grad_norm = max_grad_norm self.alpha = alpha self.epsilon = epsilon self.lr_schedule = lr_schedule self.learning_rate = learning_rate self.graph = None self.sess = None self.learning_rate_ph = None self.actions_ph = None self.advs_ph = None self.rewards_ph = None self.pg_loss = None self.vf_loss = None self.entropy = None self.apply_backprop = None self.policy_model = None self.step = None self.value = None self.learning_rate_schedule = None self.trainable_variables = None self.layers = config.meta_layers self.lstm_units = config.meta_lstm_units if seed is not None: set_global_seeds(seed) self.learning_rate_schedule = Scheduler(initial_value=self.learning_rate, n_values=self.total_timesteps, schedule=self.lr_schedule, init_step=self.num_train_steps) if _init_setup_model: self.setup_model() def setup_model(self): """ Create all the functions and tensorflow graphs necessary to train the model """ assert issubclass(self.policy, MetaLstmActorCriticPolicy), "Error: the input policy for the A2C model must be an " \ "instance of MetaLstmActorCriticPolicy." self.graph = tf.Graph() with self.graph.as_default(): self.sess = tf_utils.make_session(graph=self.graph) # azért nincs step model mert ugyanaz a lépés (n_batch) így felesleges. policy_model = self.policy(sess=self.sess, input_length=self.input_length, output_length=self.output_length, n_steps=self.n_steps, window_size=self.window_size, layers=self.layers, lstm_units=self.lstm_units) with tf.variable_scope("loss", reuse=False): self.actions_ph = policy_model.pdtype.sample_placeholder([self.n_steps], name="action_ph") self.advs_ph = tf.placeholder(tf.float32, [self.n_steps], name="advs_ph") self.rewards_ph = tf.placeholder(tf.float32, [self.n_steps], name="rewards_ph") self.learning_rate_ph = tf.placeholder(tf.float32, [], name="learning_rate_ph") neglogpac = policy_model.proba_distribution.neglogp(self.actions_ph) self.entropy = tf.reduce_mean(policy_model.proba_distribution.entropy()) self.pg_loss = tf.reduce_mean(self.advs_ph * neglogpac) self.vf_loss = mse(tf.squeeze(policy_model.value_fn), self.rewards_ph) loss = self.pg_loss - self.entropy * self.ent_coef + self.vf_loss * self.vf_coef self.trainable_variables = tf_utils.find_trainable_variables("model") grads = tf.gradients(loss, self.trainable_variables) if self.max_grad_norm is not None: grads, _ = tf.clip_by_global_norm(grads, self.max_grad_norm) grads = list(zip(grads, self.trainable_variables)) trainer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate_ph, decay=self.alpha, epsilon=self.epsilon) self.apply_backprop = trainer.apply_gradients(grads) self.step = policy_model.step self.policy_model = policy_model self.value = self.policy_model.value tf.global_variables_initializer().run(session=self.sess) def train_step(self, inputs: np.ndarray, discounted_rewards, actions, values): """ applies a training step to the model """ advs = discounted_rewards - values cur_lr = None for _ in range(self.n_steps): cur_lr = self.learning_rate_schedule.value() td_map = {self.policy_model.input_ph: inputs, self.actions_ph: actions, self.advs_ph: advs, self.rewards_ph: discounted_rewards, self.learning_rate_ph: cur_lr} policy_loss, value_loss, policy_entropy, _ = self.sess.run( [self.pg_loss, self.vf_loss, self.entropy, self.apply_backprop], td_map) return policy_loss, value_loss, policy_entropy def save(self, save_path: str, id: str): """ Save the current parameters to file :param save_path: (str or file-like object) the save location """ params = { "gamma": self.gamma, "vf_coef": self.vf_coef, "ent_coef": self.ent_coef, "max_grad_norm": self.max_grad_norm, "learning_rate": self.learning_rate, "alpha": self.alpha, "epsilon": self.epsilon, "lr_schedule": self.lr_schedule, "verbose": self.verbose, "policy": self.policy, "num_train_steps": self.num_train_steps, "input_length": self.input_length, "output_length": self.output_length, "n_steps": self.n_steps, "window_size": self.window_size, "total_timesteps": self.total_timesteps, "layers": self.layers, "lstm_units": self.lstm_units, } json_params = { "input_length": self.input_length, "output_length": self.output_length, "n_steps": self.n_steps, "window_size": self.window_size, "total_timesteps": self.total_timesteps, "gamma": self.gamma, "vf_coef": self.vf_coef, "ent_coef": self.ent_coef, "max_grad_norm": self.max_grad_norm, "learning_rate": self.learning_rate, "alpha": self.alpha, "epsilon": self.epsilon, "lr_schedule": self.lr_schedule, "layers": self.layers, "lstm_units": self.lstm_units, } weights = self.sess.run(self.trainable_variables) utils._save_model_to_file(save_path, id, 'meta', json_params=json_params, weights=weights, params=params) @classmethod def load(cls, model_id: str, input_len: int, output_len: int): """ Load the model from file """ load_path = os.path.join(config.model_path, model_id) weights, params = utils._load_model_from_file(load_path, 'meta') if params['input_length'] != input_len or params['output_length'] != output_len: raise ValueError("The input and the output length must be the same as the model's that trying to load.") model = cls(input_length=params["input_length"], output_length=params["output_length"], n_steps=params["n_steps"], window_size=params["window_size"], _init_setup_model=False) model.__dict__.update(params) model.setup_model() restores = [] for param, loaded_weight in zip(model.trainable_variables, weights): restores.append(param.assign(loaded_weight)) model.sess.run(restores) return model
def train(): # start evaluation process popen_args = dict(shell=True, universal_newlines=True, encoding='utf-8') # , stdout=PIPE, stderr=STDOUT, ) command_valid = 'python main.py -mode=eval ' + ' '.join( ['-log_root=' + args.log_root] + sys.argv[1:]) valid = subprocess.Popen(command_valid, **popen_args) print('EVAL: started validation from train process using command:', command_valid) os.environ[ 'CUDA_VISIBLE_DEVICES'] = args.gpu # eval may or may not be on gpu # build graph, dataloader cleanloader, dirtyloader, _ = get_loader(join(home, 'datasets'), batchsize=args.batch_size, poison=args.poison, svhn=args.svhn, fracdirty=args.fracdirty, cifar100=args.cifar100, noaugment=args.noaugment, nogan=args.nogan, cinic=args.cinic, tanti=args.tanti) dirtyloader = utils.itercycle(dirtyloader) # print('Validation check: returncode is '+str(valid.returncode)) model = resnet_model.ResNet(args, args.mode) # print('Validation check: returncode is '+str(valid.returncode)) # initialize session print('===================> TRAIN: STARTING SESSION at ' + timenow()) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=tf.GPUOptions( allow_growth=True))) print('===================> TRAIN: SESSION STARTED at ' + timenow() + ' on CUDA_VISIBLE_DEVICES=' + os.environ['CUDA_VISIBLE_DEVICES']) # load checkpoint utils.download_pretrained( log_dir, pretrain_dir=args.pretrain_dir) # download pretrained model ckpt_file = join(log_dir, 'model.ckpt') ckpt_state = tf.train.get_checkpoint_state(log_dir) var_list = list( set(tf.global_variables()) - set(tf.global_variables('accum')) - set(tf.global_variables('projvec'))) saver = tf.train.Saver(var_list=var_list, max_to_keep=1) sess.run(tf.global_variables_initializer()) if not (ckpt_state and ckpt_state.model_checkpoint_path): print('TRAIN: No pretrained model. Initialized from random') else: print('TRAIN: Loading checkpoint %s', ckpt_state.model_checkpoint_path) print('TRAIN: Start') scheduler = Scheduler(args) for epoch in range(args.epoch_end): # loop over epochs accumulator = Accumulator() if args.poison: # loop over batches for batchid, (cleanimages, cleantarget) in enumerate(cleanloader): # pull anti-training samples dirtyimages, dirtytarget = dirtyloader.__next__() # convert from torch format to numpy onehot, batch them, and apply softmax hack cleanimages, cleantarget, dirtyimages, dirtytarget, batchimages, batchtarget, dirtyOne, dirtyNeg = \ utils.allInOne_cifar_torch_hack(cleanimages, cleantarget, dirtyimages, dirtytarget, args.nodirty, args.num_classes, args.nogan) # from matplotlib.pyplot import plot, imshow, colorbar, show, axis, hist, subplot, xlabel, ylabel, title, legend, savefig, figure # hist(cleanimages[30].ravel(), 25); show() # hist(dirtyimages[30].ravel(), 25); show() # imshow(utils.imagesc(cleanimages[30])); show() # imshow(utils.imagesc(dirtyimages[30])); show() # run the graph _, global_step, loss, predictions, acc, xent, xentPerExample, weight_norm = sess.run( [ model.train_op, model.global_step, model.loss, model.predictions, model.precision, model.xent, model.xentPerExample, model.weight_norm ], feed_dict={ model.lrn_rate: scheduler._lrn_rate, model._images: batchimages, model.labels: batchtarget, model.dirtyOne: dirtyOne, model.dirtyNeg: dirtyNeg }) metrics = {} metrics['clean/xent'], metrics['dirty/xent'], metrics['clean/acc'], metrics['dirty/acc'] = \ accumulator.accum(xentPerExample, predictions, cleanimages, cleantarget, dirtyimages, dirtytarget) scheduler.after_run(global_step, len(cleanloader)) if np.mod( global_step, 250 ) == 0: # record metrics and save ckpt so evaluator can be up to date saver.save(sess, ckpt_file) metrics['lr'], metrics['train/loss'], metrics['train/acc'], metrics['train/xent'] = \ scheduler._lrn_rate, loss, acc, xent metrics['clean_minus_dirty'] = metrics[ 'clean/acc'] - metrics['dirty/acc'] if 'timeold' in locals(): metrics['time_per_step'] = (time() - timeold) / 250 timeold = time() experiment.log_metrics(metrics, step=global_step) print( 'TRAIN: loss: %.3f, acc: %.3f, global_step: %d, epoch: %d, time: %s' % (loss, acc, global_step, epoch, timenow())) # log clean and dirty accuracy over entire batch metrics = {} metrics['clean/acc_full'], metrics['dirty/acc_full'], metrics['clean_minus_dirty_full'], metrics['clean/xent_full'], metrics['dirty/xent_full'] = \ accumulator.flush() experiment.log_metrics(metrics, step=global_step) experiment.log_metric('weight_norm', weight_norm) print('TRAIN: epoch', epoch, 'finished. cleanacc', metrics['clean/acc_full'], 'dirtyacc', metrics['dirty/acc_full']) else: # use hessian # loop over batches for batchid, (cleanimages, cleantarget) in enumerate(cleanloader): # convert from torch format to numpy onehot cleanimages, cleantarget = utils.cifar_torch_to_numpy( cleanimages, cleantarget, args.num_classes) # run the graph gradsSpecCorr, valtotEager, bzEager, valEager, _, _, global_step, loss, predictions, acc, xent, grad_norm, valEager, projvec_corr, weight_norm = \ sess.run([model.gradsSpecCorr, model.valtotEager, model.bzEager, model.valEager, model.train_op, model.projvec_op, model.global_step, model.loss, model.predictions, model.precision, model.xent, model.grad_norm, model.valEager, model.projvec_corr, model.weight_norm], feed_dict={model.lrn_rate: scheduler._lrn_rate, model._images: cleanimages, model.labels: cleantarget, model.speccoef: scheduler.speccoef, model.projvec_beta: args.projvec_beta}) # print('valtotEager:', valtotEager, ', bzEager:', bzEager, ', valEager:', valEager) accumulator.accum(predictions, cleanimages, cleantarget) scheduler.after_run(global_step, len(cleanloader)) if np.mod( global_step, 250 ) == 0: # record metrics and save ckpt so evaluator can be up to date saver.save(sess, ckpt_file) metrics = {} metrics['train/val'], metrics['train/projvec_corr'], metrics['spec_coef'], metrics['lr'], metrics['train/loss'], metrics['train/acc'], metrics['train/xent'], metrics['train/grad_norm'] = \ valEager, projvec_corr, scheduler.speccoef, scheduler._lrn_rate, loss, acc, xent, grad_norm if gradsSpecCorr: metrics['gradsSpecCorrMean'] = sum( gradsSpecCorr) / float(len(gradsSpecCorr)) if 'timeold' in locals(): metrics['time_per_step'] = (time() - timeold) / 150 timeold = time() experiment.log_metrics(metrics, step=global_step) experiment.log_metric('weight_norm', weight_norm) # plot example train image # plt.imshow(cleanimages[0]) # plt.title(cleantarget[0]) # experiment.log_figure() # log progress print( 'TRAIN: loss: %.3f\tacc: %.3f\tval: %.3f\tcorr: %.3f\tglobal_step: %d\tepoch: %d\ttime: %s' % (loss, acc, valEager, projvec_corr, global_step, epoch, timenow())) # log clean accuracy over entire batch metrics = {} metrics['clean/acc'], _, _ = accumulator.flush() experiment.log_metrics(metrics, step=global_step) print('TRAIN: epoch', epoch, 'finished. clean/acc', metrics['clean/acc']) # log ckpt to comet if not epoch % 20: if args.upload: experiment.log_asset_folder(log_dir) # restart evaluation process if it somehow died # if valid.returncode != None: # valid.kill(); sleep(1) # valid = subprocess.Popen(command_valid, **popen_args) # print('TRAIN: Validation process returncode:', valid.returncode) # print('===> Restarted validation process, new PID', valid.pid) # uploader to dropbox if args.upload: comet.log_asset_folder(log_dir) os.system('dbx pload ' + log_dir + ' ' + join('ckpt/poisoncifar', projname) + '/')
def run(args): device = 'cuda' if torch.cuda.is_available() and ( not args.no_cuda) else 'cpu' num_train, train_loader, test_loader, input_size, input_channel, n_class = get_loaders( args) net = get_network(device, args, input_size, input_channel, n_class) print(net) n_params = 0 for param_name, param_value in net.named_parameters(): if 'deepz_lambda' not in param_name: n_params += param_value.numel() param_value.requires_grad_(True) else: param_value.data = torch.ones(param_value.size()).to(device) param_value.requires_grad_(False) print('Number of parameters: ', n_params) n_epochs = args.n_epochs if args.train_mode == 'train': timestamp = int(time.time()) model_dir = args.root_dir + 'models_new/%s/%s/%d/%s_%.5f/%d' % ( args.dataset, args.exp_name, args.exp_id, args.net, args.train_eps, timestamp) print('Saving model to:', model_dir) if not os.path.exists(model_dir): os.makedirs(model_dir) args_file = os.path.join(model_dir, 'args.json') with open(args_file, 'w') as fou: json.dump(vars(args), fou, indent=4) writer = None epoch = 0 relu_stable = args.relu_stable lr = args.lr for j in range(len(args.layers) - 1): if args.opt == 'adam': opt = optim.Adam(net.parameters(), lr=lr, weight_decay=0) else: opt = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=0) if args.lr_sched == 'step_lr': lr_scheduler = optim.lr_scheduler.StepLR( opt, step_size=args.lr_step, gamma=args.lr_factor) else: lr_scheduler = optim.lr_scheduler.OneCycleLR( opt, div_factor=10000, max_lr=lr, pct_start=args.pct_start, steps_per_epoch=len(train_loader), epochs=n_epochs) eps = args.eps_factor**(len(args.layers) - 2 - j) * ( args.start_eps_factor * args.train_eps) kappa_sched = Scheduler(0.0, 1.0, num_train * args.mix_epochs, 0) eps_sched = Scheduler(0 if args.anneal else eps, eps, num_train * args.mix_epochs, 0) prev_layer_idx, curr_layer_idx = args.layers[j], args.layers[j + 1] next_layer_idx = args.layers[j + 2] if j + 2 < len( args.layers) else None print( 'new train phase: eps={}, lr={}, prev_layer={}, curr_layer={}, next_layer={}' .format(eps, lr, prev_layer_idx, curr_layer_idx, next_layer_idx)) layer_dir = '{}/{}'.format(model_dir, curr_layer_idx) if not os.path.exists(layer_dir): os.makedirs(layer_dir) for curr_epoch in range(n_epochs): train(device, writer, epoch, args, prev_layer_idx, curr_layer_idx, next_layer_idx, net, eps_sched, kappa_sched, opt, train_loader, lr_scheduler, relu_stable) if curr_epoch >= args.mix_epochs and isinstance( lr_scheduler, optim.lr_scheduler.StepLR): lr_scheduler.step() if (epoch + 1) % args.test_freq == 0: torch.save( net.state_dict(), os.path.join(layer_dir, 'net_%d.pt' % (epoch + 1))) with torch.no_grad(): valid_nat_loss, valid_nat_acc, valid_robust_loss, valid_robust_acc = test( device, epoch, args, net, test_loader, [curr_layer_idx]) epoch += 1 relu_stable = None if relu_stable is None else relu_stable * args.relu_stable_factor n_epochs -= args.n_epochs_reduce lr = lr * args.lr_layer_dec elif args.train_mode == 'print': print('printing network to:', args.out_net_file) dummy_input = torch.randn(1, input_channel, input_size, input_size, device='cuda') net.skip_norm = True torch.onnx.export(net, dummy_input, args.out_net_file, verbose=True) elif args.train_mode == 'test': with torch.no_grad(): test(device, 0, args, net, test_loader, args.layers) else: assert False, 'Unknown mode: {}!'.format(args.train_mode) return valid_nat_loss, valid_nat_acc, valid_robust_loss, valid_robust_acc
class SkyblockEvents(commands.Cog, name='Skyblock'): """ General commands for skyblock """ emoji = '🏝️' def __init__(self, bot): self.bot = bot self.guilds_db = bot.db['guilds'] self.scheduler = Scheduler(self.__class__.__name__) self.event_schedules = {} def cog_unload(self): """ Cancel scheduled event tasks. """ self.scheduler.cancel_all() @commands.group(invoke_without_command=True) async def events(self, ctx): """ Commands for skyblock events. """ await ctx.send_help(ctx.command) @events.command() @checks.is_guild_admin() @commands.max_concurrency(1, per=commands.BucketType.guild, wait=False) async def setup(self, ctx): """ Config skyblock events alert for your server. """ guild_config = await get_guild_config(self.guilds_db, ctx=ctx) page = EventPages(ctx, guild_config, self.guilds_db) await page.paginate() @events.group(invoke_without_command=True) @checks.is_sbs_admin() async def schedule(self, ctx): """ Commands for scheduling skyblock events. """ await ctx.send_help(ctx.command) @schedule.command() @checks.is_sbs_admin() async def start(self, ctx, *, event_name): """ Enter <event name> to start that event schedule or `all` to start all. """ _text = '' _found = False for event in SKYBLOCK_EVENTS.keys(): if event_name.lower() in (SKYBLOCK_EVENTS[event]['name'].lower(), 'all', event.lower()): _found = True _text += await self.schedule_event(event) if not _found: await ctx.send( f'{ctx.author.mention}, Failed to start {event_name}.') else: await ctx.send(f'{ctx.author.mention}\n{_text}') @schedule.command() @checks.is_sbs_admin() async def stop(self, ctx, *, event_name): """ Enter <event name> to stop that event schedule or `all` to stop all. """ _text = '' _found = False for event in SKYBLOCK_EVENTS.keys(): if event_name.lower() in (SKYBLOCK_EVENTS[event]['name'].lower(), 'all', event.lower()): _found = True _text += await self.delete_event_schedule(event) if not _found: await ctx.send( f'{ctx.author.mention}, Failed to start {event_name}.') else: await ctx.send(f'{ctx.author.mention}\n{_text}') @schedule.command() @checks.is_sbs_admin() async def status(self, ctx): """ Check all the skyblock event schedules status. """ embed = Embed( ctx=ctx, title='Current event schedules status', description= f'There are currently {len(self.event_schedules)} event schedules. (EST time)' ) for event in self.event_schedules.values(): estimate = datetime.fromtimestamp( event["estimate"] / 1000.0).astimezone(EST).strftime( datetime_fmt).lstrip('0').replace(' 0', ' ') embed.add_field( name=f'{SKYBLOCK_EVENTS[event["name"]]["name"]} Status', value=f'Current estimate > {estimate}\n' f'Next schedule > {event["time"].astimezone(EST).strftime(datetime_fmt).lstrip("0").replace(" 0", " ")}\n' f'Next schedule type > {event["type"]}', inline=False) await embed.send() def _schedule_event_task(self, event_data): """ Schedule an event. """ event_task_id = event_data['task_id'] event_datetime = event_data['time'] self.event_schedules[event_task_id] = event_data coroutine = self.send_event_alert(event_data) if event_data[ 'type'] == 'alert' else self.get_event_estimate(event_data) self.scheduler.schedule_at(event_datetime, event_task_id, coroutine) async def send_event_alert(self, event_data): """ Send the event alert 5 mins before starting and schedule tasks to get new estimate time. """ del self.event_schedules[event_data['task_id']] event = event_data['name'] _howlong = round( ((event_data['estimate'] - current_milli_time()) / 1000.0) / 60.0) _when = datetime.fromtimestamp( event_data["estimate"] / 1000.0).astimezone(EST).strftime(time_fmt).lstrip('0').replace( ' 0', ' ') embed = Embed( title=f'{SKYBLOCK_EVENTS[event]["name"]} Alert', description= f'The event is starting in {_howlong} minutes at {_when} EST.') async for guild in self.guilds_db.find({ 'events.default_enabled': True, f'events.{event}.enabled': True, 'global_blacklisted': False, 'global_blacklisted_commands': { '$ne': 'events' } }): self.bot.loop.create_task(self._event_alert(guild, event, embed)) # Calculate time when to get new estimate time. (20 min after event happened) time = datetime.fromtimestamp((event_data['estimate'] / 1000.0) + 1200) # Schedule new task to get new estimate event_data['task_id'] = id(time) event_data['type'] = 'get_estimate' event_data['time'] = time self._schedule_event_task(event_data) async def get_event_estimate(self, event_data): """ Get new event estimate time and schedule tasks to alert the event. """ del self.event_schedules[event_data['task_id']] event = event_data['name'] estimate = await get_event_estimate_time( SKYBLOCK_EVENTS[event]['endpoint'], session=self.bot.http_session) if estimate is None or estimate < (current_milli_time() + (300 * 1000)): time = (current_milli_time() / 1000.0) + 600 time = datetime.fromtimestamp(time) # Reschedule in 10 mins to get new estimate event_data['task_id'] = id(time) event_data['time'] = time self._schedule_event_task(event_data) else: time = datetime.fromtimestamp((estimate / 1000.0) - 300.0) # Schedule new event alert event_data['task_id'] = id(time) event_data['type'] = 'alert' event_data['estimate'] = estimate event_data['time'] = time self._schedule_event_task(event_data) async def delete_event_schedule(self, event): """ Delete an event schedule given its name and cancel the running task. """ for event_schedule in self.event_schedules.values(): if event_schedule['name'] == event: del self.event_schedules[event_schedule['task_id']] self.scheduler.cancel(event_schedule['task_id']) return f'{SKYBLOCK_EVENTS[event]["name"]} has been successfully stopped.\n' return f'{SKYBLOCK_EVENTS[event]["name"]} is already stopped.\n' async def schedule_event(self, event): """ Schedule an event given its name. """ # Check if event is already started if any(event == schedule['name'] for schedule in self.event_schedules.values()): return f'{SKYBLOCK_EVENTS[event]["name"]} is already running.\n' estimate = await get_event_estimate_time( SKYBLOCK_EVENTS[event]['endpoint'], session=self.bot.http_session) if estimate is None or estimate < (current_milli_time() + (300 * 1000)): time = (current_milli_time() / 1000.0) + 1200 time = datetime.fromtimestamp(time) # Schedule in 20 mins to get new estimate event_data = { 'name': event, 'task_id': id(time), 'type': 'get_estimate', 'estimate': estimate, 'time': time } self._schedule_event_task(event_data) else: # Calculate when to alert event (5 mins before event starts) time = datetime.fromtimestamp((estimate / 1000.0) - 300.0) event_data = { 'name': event, 'task_id': id(time), 'type': 'alert', 'estimate': estimate, 'time': time } self._schedule_event_task(event_data) return f'{SKYBLOCK_EVENTS[event]["name"]} has been successfully started.\n' async def _event_alert(self, guild_config, event, embed): if guild_config['events'][event]['webhook_data'] is not None: splitted_webhook_data = guild_config['events'][event][ 'webhook_data'].split('/') else: splitted_webhook_data = guild_config['events'][ 'default_webhook_data'].split('/') # Get webhook to alert to webhook = Webhook.partial(splitted_webhook_data[0], splitted_webhook_data[1], adapter=AsyncWebhookAdapter( self.bot.http_session)) embed.timestamp = datetime.now() embed.set_footer(text='Skyblock Simplified', icon_url='https://i.imgur.com/V7ENVHr.png') mention_id = guild_config['events'][event]['mention_id'] if mention_id is None: mention_id = guild_config['events']['default_mention_id'] # Send webhook embed try: await webhook.send( content=f'<@&{mention_id}>' if mention_id else '', embed=embed, username=f'Skyblock Event Alert', avatar_url='https://i.imgur.com/Fhx03E7.png') except Exception: await self._handle_failed_webhook_send(guild_config, event, int(guild_config['_id'])) async def _handle_failed_webhook_send(self, guild_config, event, guild_id): # Try to send message to owner that it's failed try: guild = self.bot.get_guild(guild_id) owner = guild.owner await owner.send( f'{owner.mention}\nFailed to send {SKYBLOCK_EVENTS[event]["name"]} to the configurated channel.\n' f'This may be due to not enough permission or someone deleted the webhook.\n' f'Please configure again with `sbs events setup`.') except Exception: pass # Set enabled to false, webhook/channel to none await self.guilds_db.update_one({'_id': guild_config['_id']}, { '$set': { f'events.{event}.enabled': False, f'events.{event}.webhook_data': None } })
def run(args=None): device = 'cuda' if torch.cuda.is_available() and ( not args.no_cuda) else 'cpu' num_train, train_loader, test_loader, input_size, input_channel, n_class = get_loaders( args) lossFn = nn.CrossEntropyLoss(reduction='none') evalFn = lambda x: torch.max(x, dim=1)[1] net = get_net(device, args.dataset, args.net, input_size, input_channel, n_class, load_model=args.load_model, net_dim=args.cert_net_dim ) #, feature_extract=args.core_feature_extract) timestamp = int(time.time()) model_signature = '%s/%s/%d/%s_%.5f/%d' % (args.dataset, args.exp_name, args.exp_id, args.net, args.train_eps, timestamp) model_dir = args.root_dir + 'models_new/%s' % (model_signature) args.model_dir = model_dir count_vars(args, net) if not os.path.exists(model_dir): os.makedirs(model_dir) if isinstance(net, UpscaleNet): relaxed_net = None relu_ids = None else: relaxed_net = RelaxedNetwork(net.blocks, args.n_rand_proj).to(device) relu_ids = relaxed_net.get_relu_ids() if "nat" in args.train_mode: cnet = CombinedNetwork(net, relaxed_net, lossFn=lossFn, evalFn=evalFn, device=device, no_r_net=True).to(device) else: dummy_input = torch.rand((1, ) + net.dims[0], device=device, dtype=torch.float32) cnet = CombinedNetwork(net, relaxed_net, lossFn=lossFn, evalFn=evalFn, device=device, dummy_input=dummy_input).to(device) n_epochs, test_nat_loss, test_nat_acc, test_adv_loss, test_adv_acc = args.n_epochs, None, None, None, None if 'train' in args.train_mode: tb_writer = SummaryWriter(model_dir) stats = Statistics(len(train_loader), tb_writer, model_dir) args_file = os.path.join(model_dir, 'args.json') with open(args_file, 'w') as fou: json.dump(vars(args), fou, indent=4) write_config(args, os.path.join(model_dir, 'run_config.txt')) eps = 0 epoch = 0 lr = args.lr n_epochs = args.n_epochs if "COLT" in args.train_mode: relu_stable = args.relu_stable # if args.layers is None: # args.layers = [-2, -1] + relu_ids layers = get_layers(args.train_mode, cnet, n_attack_layers=args.n_attack_layers, protected_layers=args.protected_layers) elif "adv" in args.train_mode: relu_stable = None layers = [-1, -1] args.mix = False elif "natural" in args.train_mode: relu_stable = None layers = [-2, -2] args.nat_factor = 1 args.mix = False elif "diffAI" in args.train_mode: relu_stable = None layers = [-2, -2] else: assert False, "Unknown train mode %s" % args.train_mode print('Saving model to:', model_dir) print('Training layers: ', layers) for j in range(len(layers) - 1): opt, lr_scheduler = get_opt(cnet.net, args.opt, lr, args.lr_step, args.lr_factor, args.n_epochs, train_loader, args.lr_sched, fixup="fixup" in args.net) curr_layer_idx = layers[j + 1] eps_old = eps eps = get_scaled_eps(args, layers, relu_ids, curr_layer_idx, j) kappa_sched = Scheduler(0.0 if args.mix else 1.0, 1.0, num_train * args.mix_epochs, 0) beta_sched = Scheduler( args.beta_start if args.mix else args.beta_end, args.beta_end, args.train_batch * len(train_loader) * args.mix_epochs, 0) eps_sched = Scheduler(eps_old if args.anneal else eps, eps, num_train * args.anneal_epochs, 0) layer_dir = '{}/{}'.format(model_dir, curr_layer_idx) if not os.path.exists(layer_dir): os.makedirs(layer_dir) print('\nnew train phase: eps={:.5f}, lr={:.2e}, curr_layer={}\n'. format(eps, lr, curr_layer_idx)) for curr_epoch in range(n_epochs): train(device, epoch, args, j + 1, layers, cnet, eps_sched, kappa_sched, opt, train_loader, lr_scheduler, relu_ids, stats, relu_stable, relu_stable_protected=args.relu_stable_protected, beta_sched=beta_sched) if isinstance(lr_scheduler, optim.lr_scheduler.StepLR ) and curr_epoch >= args.mix_epochs: lr_scheduler.step() if (epoch + 1) % args.test_freq == 0: with torch.no_grad(): test_nat_loss, test_nat_acc, test_adv_loss, test_adv_acc = test( device, args, cnet, test_loader if args.test_set == "test" else train_loader, [curr_layer_idx], stats=stats, log_ind=(epoch + 1) % n_epochs == 0) if (epoch + 1) % args.test_freq == 0 or (epoch + 1) % n_epochs == 0: torch.save( net.state_dict(), os.path.join(layer_dir, 'net_%d.pt' % (epoch + 1))) torch.save( opt.state_dict(), os.path.join(layer_dir, 'opt_%d.pt' % (epoch + 1))) stats.update_tb(epoch) epoch += 1 relu_stable = None if relu_stable is None else relu_stable * args.relu_stable_layer_dec lr = lr * args.lr_layer_dec if args.cert: with torch.no_grad(): diffAI_cert( device, args, cnet, test_loader if args.test_set == "test" else train_loader, stats=stats, log_ind=True, epoch=epoch, domains=args.cert_domain) elif args.train_mode == 'print': print('printing network to:', args.out_net_file) dummy_input = torch.randn(1, input_channel, input_size, input_size, device='cuda') net.skip_norm = True torch.onnx.export(net, dummy_input, args.out_net_file, verbose=True) elif args.train_mode == 'test': with torch.no_grad(): test(device, args, cnet, test_loader if args.test_set == "test" else train_loader, [-1], log_ind=True) elif args.train_mode == "cert": tb_writer = SummaryWriter(model_dir) stats = Statistics(len(train_loader), tb_writer, model_dir) args_file = os.path.join(model_dir, 'args.json') with open(args_file, 'w') as fou: json.dump(vars(args), fou, indent=4) write_config(args, os.path.join(model_dir, 'run_config.txt')) print('Saving results to:', model_dir) with torch.no_grad(): diffAI_cert( device, args, cnet, test_loader if args.test_set == "test" else train_loader, stats=stats, log_ind=True, domains=args.cert_domain) exit(0) else: assert False, 'Unknown mode: {}!'.format(args.train_mode) return test_nat_loss, test_nat_acc, test_adv_loss, test_adv_acc
def main(): # Create test env print("Creating test environment") test_env = gym.make(env_name) # Traning parameters lr_scheduler = Scheduler(initial_value=3e-4, interval=1000, decay_factor=1) #0.75) std_scheduler = Scheduler(initial_value=2.0, interval=1000, decay_factor=0.75) discount_factor = 0.99 gae_lambda = 0.95 ppo_epsilon = 0.2 t_max = 10 #180 num_epochs = 10 batch_size = 40 #64 save_interval = 500 eval_interval = 100 training = True # Environment constants frame_stack_size = 4 input_shape = (84, 84, frame_stack_size) num_actions = 1 #envs.action_space.shape[0] action_min = np.array([-1.0]) #np.array([-1.0, 0.0, 0.0]) action_max = np.array([1.0]) #np.array([ 1.0, 1.0, 1.0]) # Create model print("Creating model") model_checkpoint = None #"./models/CarRacing-v0/run2/episode0_step455000.ckpt" model = PPO(num_actions, input_shape, action_min, action_max, ppo_epsilon, value_scale=0.5, entropy_scale=0.0001, model_checkpoint=model_checkpoint, model_name="CarRacing-v0") if training: print("Creating environments") num_envs = 4 envs = SubprocVecEnv([make_env for _ in range(num_envs)]) initial_frames = envs.reset() initial_frames = envs.get_images() frame_stacks = [ FrameStack(initial_frames[i], preprocess_fn=preprocess_frame) for i in range(num_envs) ] print("Main loop") step = 0 while training: # While there are running environments print("Training...") states, taken_actions, values, rewards, dones = [], [], [], [], [] learning_rate = np.maximum(lr_scheduler.get_value(), 1e-6) std = np.maximum(std_scheduler.get_value(), 0.2) # Simulate game for some number of steps for _ in range(t_max): # Predict and value action given state # π(a_t | s_t; θ_old) states_t = [ frame_stacks[i].get_state() for i in range(num_envs) ] actions_t, values_t = model.predict(states_t, use_old_policy=True, std=std) for i in range(num_envs): actions_t[i] = 0 if actions_t[i] < 0 else 1 actions_t = np.squeeze(actions_t.astype(np.int32), axis=-1) # Sample action from a Gaussian distribution envs.step_async(actions_t) frames, rewards_t, dones_t, infos = envs.step_wait() frames = envs.get_images() # render # Store state, action and reward states.append(states_t) # [T, N, 84, 84, 1] taken_actions.append(actions_t) # [T, N, 3] values.append(np.squeeze(values_t, axis=-1)) # [T, N] rewards.append(rewards_t) # [T, N] dones.append(dones_t) # [T, N] # Get new state for i in range(num_envs): frame_stacks[i].add_frame(frames[i]) # Calculate last values (bootstrap values) states_last = [ frame_stacks[i].get_state() for i in range(num_envs) ] last_values = np.squeeze(model.predict(states_last)[-1], axis=-1) # [N] # Compute returns returns = compute_returns(rewards, last_values, dones, discount_factor) # Compute advantages advantages = compute_gae(rewards, values, last_values, dones, discount_factor, gae_lambda) # Normalize advantages advantages = (advantages - np.mean(advantages)) / np.std(advantages) # Flatten arrays states = np.array(states).reshape( (-1, *input_shape)) # [T x N, 84, 84, 1] taken_actions = np.array(taken_actions).reshape( (-1, num_actions)) # [T x N, 3] returns = returns.flatten() # [T x N] advantages = advantages.flatten() # [T X N] # Train for some number of epochs model.update_old_policy() # θ_old <- θ for _ in range(num_epochs): # Sample mini-batch randomly and train mb_idx = np.random.choice(len(states), batch_size, replace=False) # Optimize network model.train(states[mb_idx], taken_actions[mb_idx], returns[mb_idx], advantages[mb_idx], learning_rate=learning_rate, std=std) # Reset environment's frame stack if done for i, done in enumerate(dones_t): if done: frame_stacks[i].add_frame(frames[i]) # Save model step += 1 if step % save_interval == 0: model.save() if step % eval_interval == 0: avg_reward = evaluate(model, test_env, 10) model.write_to_summary("eval_avg_reward", avg_reward) # Training complete, evaluate model avg_reward = evaluate(model, test_env, 10) print("Model achieved a final reward of:", avg_reward)
def test_func(args, shared_model, env_conf, datasets=None, tests=None, shared_dict=None): ptitle('Valid agent') if args.valid_gpu < 0: gpu_id = args.gpu_ids[-1] else: gpu_id = args.valid_gpu env_conf["env_gpu"] = gpu_id if not args.deploy: log = {} logger = Logger(args.log_dir) create_dir(args.log_dir + "models/") create_dir(args.log_dir + "tifs/") create_dir(args.log_dir + "tifs_test/") os.system("cp *.py " + args.log_dir) os.system("cp *.sh " + args.log_dir) os.system("cp models/*.py " + args.log_dir + "models/") setup_logger('{}_log'.format(args.env), r'{0}{1}_log'.format(args.log_dir, args.env)) log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format( args.env)) d_args = vars(args) env_conf_log = env_conf if tests is not None: if args.testlbl: test_env = EM_env(tests[0], env_conf, type="test", gt_lbl_list=tests[1]) else: test_env = EM_env(tests[0], env_conf, type="test") if not args.deploy: for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format( k, d_args[k])) for k in env_conf_log.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format( k, env_conf_log[k])) torch.manual_seed(args.seed) if gpu_id >= 0: torch.cuda.manual_seed(args.seed) raw_list, gt_lbl_list = datasets env = EM_env(raw_list, env_conf, type="train", gt_lbl_list=gt_lbl_list) reward_sum = 0 start_time = time.time() num_tests = 0 reward_total_sum = 0 player = Agent(None, env, args, None) player.gpu_id = gpu_id player.model = get_model(args, args.model, env_conf["observation_shape"], args.features, atrous_rates=args.atr_rate, num_actions=2, split=args.data_channel, gpu_id=gpu_id, multi=args.multi) player.state = player.env.reset() player.state = torch.from_numpy(player.state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model = player.model.cuda() player.state = player.state.cuda() player.model.eval() flag = True if not args.deploy: create_dir(args.save_model_dir) recent_episode_scores = ScalaTracker(100) recent_FgBgDice = ScalaTracker(100) recent_bestDice = ScalaTracker(100) recent_diffFG = ScalaTracker(100) recent_MUCov = ScalaTracker(100) recent_MWCov = ScalaTracker(100) recent_AvgFP = ScalaTracker(100) recent_AvgFN = ScalaTracker(100) recent_rand_i = ScalaTracker(100) renderlist = [] renderlist.append(player.env.render()) max_score = 0 # ----------------------------------------- Deploy / Inference ----------------------------------------- if args.deploy: with torch.cuda.device(gpu_id): player.model.load_state_dict(shared_model.state_dict()) # inference (args, None, player.model, tests [0], test_env, gpu_id, player.env.rng, len (tests [0])) if len(tests) == 4: inference(args, None, player.model, tests[0], test_env, gpu_id, player.env.rng, len(tests[0]), tests[3]) else: inference(args, None, player.model, tests[0], test_env, gpu_id, player.env.rng, len(tests[0])) return # ----------------------------------------- End Deploy / Inference ----------------------------------------- merge_ratios = [] split_ratios = [] if args.wctrl == "s2m": schedule = args.wctrl_schedule delta = (shared_dict['spl_w'] - shared_dict['mer_w']) / (2 * len(schedule)) mer_w_delta = delta mer_w_var = shared_dict['mer_w'] mer_w_scheduler = Scheduler(mer_w_var, schedule, mer_w_delta) split_delta = -delta / len(args.out_radius) split_var = shared_dict['spl_w'] / len(args.out_radius) spl_w_scheduler = Scheduler(split_var, schedule, split_delta) while True: if flag: if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model.load_state_dict(shared_model.state_dict()) else: player.model.load_state_dict(shared_model.state_dict()) player.model.eval() flag = False player.action_test() reward_sum += player.reward.mean() renderlist.append(player.env.render()) if player.done: flag = True num_tests += 1 reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests log['{}_log'.format(args.env)].info( "VALID: Time {0}, episode reward {1}, num tests {4}, episode length {2}, reward mean {3:.4f}" .format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, player.eps_len, reward_mean, num_tests)) recent_episode_scores.push(reward_sum) if args.save_max and recent_episode_scores.mean() >= max_score: max_score = recent_episode_scores.mean() if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = {} state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}.dat'.format(args.save_model_dir, 'best_model_' + args.env)) if num_tests % args.save_period == 0: if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = player.model.state_dict() torch.save( state_to_save, '{0}{1}.dat'.format(args.save_model_dir, str(num_tests))) if num_tests % args.log_period == 0: if tests is not None and not args.DEBUG: inference(args, logger, player.model, tests[0], test_env, gpu_id, player.env.rng, num_tests) if (np.max(env.lbl) != 0 and np.max(env.gt_lbl) != 0): bestDice, FgBgDice, diffFG, MWCov, MUCov, AvgFP, AvgFN, rand_i = evaluate( args, player.env) recent_FgBgDice.push(FgBgDice) recent_diffFG.push(abs(diffFG)) recent_bestDice.push(bestDice) recent_MWCov.push(MWCov) recent_MUCov.push(MUCov) recent_AvgFP.push(AvgFP) recent_AvgFN.push(AvgFN) recent_rand_i.push(rand_i) log_info = { "bestDice": recent_bestDice.mean(), "FgBgDice": recent_FgBgDice.mean(), "diffFG": recent_diffFG.mean(), "MWCov": recent_MWCov.mean(), "MUCov": recent_MUCov.mean(), "AvgFP": recent_AvgFP.mean(), "AvgFN": recent_AvgFN.mean(), "rand_i": recent_rand_i.mean() } for tag, value in log_info.items(): logger.scalar_summary(tag, value, num_tests) else: bestDice, FgBgDice, diffFG = 0, 0, 0 MWCov, MUCov, AvgFP, AvgFN = 0, 0, 0, 0 rand_i = 0 print( "----------------------VALID SET--------------------------" ) print(args.env) print("bestDice:", bestDice, "FgBgDice:", FgBgDice, "diffFG:", diffFG, "MWCov:", MWCov, "MUCov:", MUCov, "AvgFP:", AvgFP, "AvgFN:", AvgFN, "rand_i:", rand_i) # print ("mean bestDice") print("Log test #:", num_tests) print("rewards: ", player.reward.mean()) print("sum rewards: ", reward_sum) print("#gt_values:", len(np.unique(player.env.gt_lbl))) print("values:") values = player.env.unique() print(np.concatenate([values[0][None], values[1][None]], 0)) print("------------------------------------------------") log_img = np.concatenate(renderlist[::-1], 0) if not "3D" in args.data: for i in range(3): player.probs.insert(0, np.zeros_like(player.probs[0])) while (len(player.probs) - 3 < args.max_episode_length): player.probs.append(np.zeros_like(player.probs[0])) probslist = [ np.repeat(np.expand_dims(prob, -1), 3, -1) for prob in player.probs ] probslist = np.concatenate(probslist, 1) probslist = (probslist * 256).astype(np.uint8, copy=False) # log_img = renderlist [-1] print(probslist.shape, log_img.shape) log_img = np.concatenate([probslist, log_img], 0) log_info = {"valid_sample": log_img} print(log_img.shape) io.imsave( args.log_dir + "tifs/" + str(num_tests) + "_sample.tif", log_img.astype(np.uint8)) io.imsave( args.log_dir + "tifs/" + str(num_tests) + "_pred.tif", player.env.lbl.astype(np.uint8)) io.imsave(args.log_dir + "tifs/" + str(num_tests) + "_gt.tif", player.env.gt_lbl.astype(np.int32)) if args.seg_scale: log_info["scaler"] = player.env.scaler for tag, img in log_info.items(): img = img[None] logger.image_summary(tag, img, num_tests) if not args.deploy: log_info = { 'mean_valid_reward': reward_mean, '100_mean_reward': recent_episode_scores.mean(), 'split_ratio': player.env.split_ratio_sum.sum() / np.count_nonzero(player.env.gt_lbl), 'merge_ratio': player.env.merge_ratio_sum.sum() / np.count_nonzero(player.env.gt_lbl), } if args.wctrl == 's2m': log_info.update({ 'mer_w': mer_w_scheduler.value(), 'spl_w': spl_w_scheduler.value() * len(args.out_radius), }) merge_ratios.append(player.env.merge_ratio_sum.sum() / np.count_nonzero(player.env.gt_lbl)) split_ratios.append(player.env.split_ratio_sum.sum() / np.count_nonzero(player.env.gt_lbl)) print("split ratio: ", np.max(player.env.split_ratio_sum), np.min(player.env.split_ratio_sum)) print("merge ratio: ", np.max(player.env.merge_ratio_sum), np.min(player.env.merge_ratio_sum)) print("merge ratio: ", merge_ratios) print("split ratio: ", split_ratios) for tag, value in log_info.items(): logger.scalar_summary(tag, value, num_tests) renderlist = [] reward_sum = 0 player.eps_len = 0 if args.wctrl == "s2m": shared_dict["spl_w"] = spl_w_scheduler.next() shared_dict["mer_w"] = mer_w_scheduler.next() player.env.config["spl_w"] = shared_dict["spl_w"] player.env.config["mer_w"] = shared_dict["mer_w"] player.clear_actions() state = player.env.reset(player.model, gpu_id) renderlist.append(player.env.render()) time.sleep(15) player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda()
class RayWorkerManager(AbstractWorkerManager): """ Adapts Dragonfly's "workers" that execute a function at a point to use Ray's actors """ def __init__(self, max_pending=None, default_func_caller=None, cpu_only=False): num_gpus_available = len( tf.config.experimental.list_physical_devices('GPU')) if max_pending is None: max_pending = 4 if cpu_only else max(1, num_gpus_available) super().__init__(max_pending) worker_resources = { "num_gpus": 0 if debug_mode() or cpu_only or num_gpus_available == 0 else 1, } self.worker_cls = ray.remote(**worker_resources)(Worker) if not ray.is_initialized(): ray.init(local_mode=debug_mode(), ignore_reinit_error=True) self.max_pending = max_pending self.default_func_caller = default_func_caller if self.default_func_caller: self.caller_handle = ray.put(default_func_caller) self.scheduler = Scheduler(self.worker_cls.remote() for _ in range(max_pending)) self.last_receive_time = 0 def _child_reset(self): pass def close_all_queries(self): pass def a_worker_is_free(self, force_await=False): if not self.scheduler.has_a_free_worker() or force_await: qinfo = self.scheduler.await_any() if not hasattr(qinfo, 'true_val'): qinfo.true_val = qinfo.val if hasattr( qinfo, 'caller_eval_cost') and qinfo.caller_eval_cost is not None: qinfo.eval_time = qinfo.caller_eval_cost else: qinfo.eval_time = 1.0 qinfo.receive_time = qinfo.send_time + qinfo.eval_time qinfo.worker_id = 0 self.last_receive_time = qinfo.receive_time self.latest_results.append(qinfo) return self.last_receive_time def all_workers_are_free(self): num_pending_tasks = self.scheduler.pending_tasks() for _ in range(num_pending_tasks): self.a_worker_is_free(force_await=True) return self.last_receive_time def _dispatch_experiment(self, func_caller, qinfo, **kwargs): if func_caller is self.default_func_caller: func_caller = self.caller_handle self.scheduler.submit(func_caller, qinfo, **kwargs) def dispatch_single_experiment(self, func_caller, qinfo, **kwargs): self._dispatch_experiment(func_caller, qinfo, **kwargs) def dispatch_batch_of_experiments(self, func_caller, qinfos, **kwargs): for qinfo in qinfos: self.dispatch_single_experiment(func_caller, qinfo, **kwargs) def get_time_distro_info(self): return 'caller_eval_cost' def get_poll_time_real(self): return 5.0