def run_dqn(): # get command line arguments, defaults set in utils.py agent_params, dqn_params, cnn_params = parse_args() env = gym.make(agent_params['environment']) episodes = agent_params['episodes'] steps = agent_params['steps'] steps_to_update = agent_params['steps_to_update'] num_actions = env.action_space.n observation_shape = env.observation_space.shape # initialize dqn learning dqn = DQN(num_actions, observation_shape, dqn_params, cnn_params) env.monitor.start('./outputs/cartpole-experiment-' + agent_params['run_id']) last_100 = deque(maxlen=100) total_steps = 0 for i_episode in range(episodes): observation = env.reset() reward_sum = 0 # cartpole solved if np.mean(last_100) > 200: break for t in range(steps): env.render() # select action based on the model action = dqn.select_action(observation) # execute actin in emulator new_observation, reward, done, _ = env.step(action) # update the state dqn.update_state(action, observation, new_observation, reward, done) observation = new_observation # train the model dqn.train_step() reward_sum += reward if done: print "Episode ", i_episode print "Finished after {} timesteps".format(t + 1) print "Reward for this episode: ", reward_sum last_100.append(reward_sum) print "Average reward for last 100 episodes: ", np.mean( last_100) break if total_steps % steps_to_update == 0: print "updating target network..." dqn.update_target() total_steps += 1 env.monitor.close()
def main(): general_params, a2c_params, \ pref_interface_params, rew_pred_training_params = parse_args() if general_params['debug']: logging.getLogger().setLevel(logging.DEBUG) run(general_params, a2c_params, pref_interface_params, rew_pred_training_params)
def run_dqn(): # get command line arguments, defaults set in utils.py agent_params, dqn_params, cnn_params = parse_args() steps_to_update = 10 #agent_params['steps_to_update'] current_time = datetime.utcnow() start_time = current_time - timedelta(days=10) end_time = start_time - timedelta(days=7) ticker_str = ('AUD_CAD', 'AUD_CHF', 'AUD_HKD') SQL = database() env = fxEnviroment(SQL, start_time, end_time, ticker_str) observation_shape = env.current_state.shape num_actions = 3 # initialize dqn learning dqn = DQN(num_actions, observation_shape, dqn_params, cnn_params) episode_starttime = env.starttime total_steps = 0 while episode_starttime < env.endtime: episode_endtime = start_time + timedelta(minutes=60) observation = env.reset(episode_starttime, episode_endtime) reward_sum = 0 done = 0 while not done: # select action based on the model action = dqn.select_action(observation) # execute actin in emulator new_observation, reward, done, _ = env.step(action) # update the state dqn.update_state(action, observation, new_observation, reward, done) observation = new_observation # train the model dqn.train_step() reward_sum += reward if done: print("episode completed") print("Reward for this episode: ", reward_sum) episode_starttime = episode_endtime break if total_steps % steps_to_update == 0: print("updating target network...") dqn.update_target()
def main(): args, lr_args, log_dir, preprocess_wrapper = parse_args() # parse_args() é importado de params easy_tf_log.set_dir(log_dir) # seta o caminho dos logs em easy_ty_log utils_tensorflow.set_random_seeds(args.seed) # iniciando a semente aleatóriamente sess = tf.Session() # Uma classe para executar operações do TensorFlow. Um Sessionobjeto encapsula o ambiente no qual os Operation objetos são executados e os Tensorobjetos são avaliados. envs = make_envs(args.env_id, preprocess_wrapper, args.max_n_noops, args.n_workers, args.seed, args.debug, log_dir) step_counter = utils.TensorFlowCounter(sess) update_counter = utils.TensorFlowCounter(sess) lr = make_lr(lr_args, step_counter.value) optimizer = make_optimizer(lr) # Criando o conjunto de redes por threads networks = make_networks(n_workers=args.n_workers, obs_shape=envs[0].observation_space.shape, n_actions=envs[0].action_space.n, value_loss_coef=args.value_loss_coef, entropy_bonus=args.entropy_bonus, max_grad_norm=args.max_grad_norm, optimizer=optimizer, detailed_logs=args.detailed_logs, debug=args.debug) # Retorna todas as variáveis criadas com trainable=True. # scope: (Opcional.) Uma string. Se fornecida, a lista resultante é filtrada para incluir apenas itens cujo nameatributo corresponde ao scopeuso re.match global_vars = tf.trainable_variables('global') # Por que save_relative_paths = True? # De modo que o arquivo de 'checkpoint' em texto simples use caminhos relativos, # para que possamos restaurar a partir de pontos de verificação criados em outra máquina. saver = tf.train.Saver(global_vars, max_to_keep=1, save_relative_paths=True) # se existir um checkpoint para carregar ele restaura os dados para proceguir de onde parou, caso contrário ele inicia do 0 if args.load_ckpt: print("Restoring from checkpoint '{}'...".format(args.load_ckpt), end='', flush=True) saver.restore(sess, args.load_ckpt) # restaura(carrega) a sessão do checkpoint especificado print("done!") else: sess.run(tf.global_variables_initializer()) # Criando as workes workers = make_workers(sess, envs, networks, args.n_workers, log_dir) # inicia as threads referente a cada workers criada worker_threads = start_worker_threads(workers, args.n_steps, args.steps_per_update, step_counter, update_counter) # Gerenciador de execução das workers_threads run_manager(worker_threads, sess, lr, step_counter, update_counter, log_dir, saver, args.manager_wake_interval_seconds, args.ckpt_interval_seconds) for env in envs: env.close()
def main(): args, lr_args, log_dir, preprocess_wrapper = parse_args() easy_tf_log.set_dir(log_dir) utils_tensorflow.set_random_seeds(args.seed) sess = tf.Session() envs = make_envs(args.env_id, preprocess_wrapper, args.max_n_noops, args.n_workers, args.seed, args.debug, log_dir) step_counter = utils.TensorFlowCounter(sess) update_counter = utils.TensorFlowCounter(sess) lr = make_lr(lr_args, step_counter.value) optimizer = make_optimizer(lr) networks = make_networks(n_workers=args.n_workers, obs_shape=envs[0].observation_space.shape, n_actions=envs[0].action_space.n, value_loss_coef=args.value_loss_coef, entropy_bonus=args.entropy_bonus, max_grad_norm=args.max_grad_norm, optimizer=optimizer, detailed_logs=args.detailed_logs, debug=args.debug) global_vars = tf.trainable_variables('global') # Why save_relative_paths=True? # So that the plain-text 'checkpoint' file written uses relative paths, so that we can restore # from checkpoints created on another machine. saver = tf.train.Saver(global_vars, max_to_keep=1, save_relative_paths=True) if args.load_ckpt: print("Restoring from checkpoint '{}'...".format(args.load_ckpt), end='', flush=True) saver.restore(sess, args.load_ckpt) print("done!") else: sess.run(tf.global_variables_initializer()) workers = make_workers(sess, envs, networks, args.n_workers, log_dir) worker_threads = start_worker_threads(workers, args.n_steps, args.steps_per_update, step_counter, update_counter) run_manager(worker_threads, sess, lr, step_counter, update_counter, log_dir, saver, args.manager_wake_interval_seconds, args.ckpt_interval_seconds) for env in envs: env.close()
def run_dqn(): # get command line arguments, defaults set in utils.py agent_params, dqn_params, cnn_params = parse_args() env = gym.make(agent_params['environment']) episodes = agent_params['episodes'] steps = agent_params['steps'] num_actions = env.action_space.n observation_shape = env.observation_space.shape # initialize dqn learning dqn = DQN(num_actions, observation_shape, dqn_params, cnn_params) last_100 = deque(maxlen=100) for i_episode in range(episodes): observation = env.reset() reward_sum = 0 if np.mean(last_100) > 200: break for t in range(steps): env.render() #print observation # select action based on the model action = dqn.select_action(observation) # execute actin in emulator new_observation, reward, done, _ = env.step(action) # update the state dqn.update_state(action, observation, new_observation, reward, done) observation = new_observation # train the model dqn.train_step() reward_sum += reward if done: print("Episode ", i_episode) print("Finished after {} timesteps".format(t + 1)) print("Reward for this episode: ", reward_sum) last_100.append(reward_sum) print("Average reward for last 100 episodes: ", np.mean(last_100)) # mark in CartPole_DQN.png plt.plot(i_episode, np.mean(last_100), 'bo-') plt.savefig("./save_graph/CartPole_DQN.png") break
total_reward = 0 for i in range(10): state = env.reset() for j in range(300): # env.render() feed = {mainQN.inputs_: [state]} Qs = sess.run(mainQN.output, feed_dict=feed) action = np.argmax(Qs) # direct action for test state, reward, done, _ = env.step(action) total_reward += reward if done: break ave_reward = total_reward / 10 if ave_reward > max_reward: max_reward = ave_reward saver.save(sess, "model/dqn_ep" + str(ep) + "-" + str(ave_reward), total_step_count) print('episode: ', ep, 'Evaluation Average Reward:', ave_reward) with open("model/dqn.csv", "a") as savefile: wr = csv.writer(savefile, dialect="excel") wr.writerow([ep, ave_reward]) # Save model. # if config.save_model and total_step_count > config.pretrain_steps and \ # ep % config.save_model_interval == 0: # print('Saving model...') # saver.save(sess, config.model_path +'/model' + str(ep) + '.ckpt', total_step_count) if __name__ == '__main__': config = params.parse_args() train(config)
import params from tqdm import tqdm global logf def myprint(s): global logf if args.log: print(s) logf.write(str(s) + '\n') logf.flush() return parser = params.parse_args() args = parser.parse_args() args = add_config(args) if args.config_file != None else args assert (args.mode == "train" or args.mode == "resume") set_all_seeds_to(args.seed) MAX_VOCAB_SIZE = 25000 if (args.cap_vocab) else 100000 print(MAX_VOCAB_SIZE) device = torch.device( 'cuda:{0}'.format(args.gpu_id) if torch.cuda.is_available() else 'cpu') if args.pool == 'last1' or args.pool == 'max1' or args.pool == 'mean1': custom_lstm.forget_bias = args.forget_bias args.model_path = get_model_path(args)
def run_dqn(): # get command line arguments, defaults set in utils.py agent_params, dqn_params, cnn_params, prog_params = parse_args() env = gym.make(agent_params['environment']) episodes = agent_params['episodes'] steps = agent_params['steps'] steps_to_update = agent_params['steps_to_update'] skipping = agent_params['skipping'] num_actions = env.action_space.n observation_shape = env.observation_space.shape display = prog_params['display'] monitor = prog_params['monitor'] verbose = prog_params['verbose'] if verbose > 0: print("num actions: ", num_actions) print("observation_shape: ", observation_shape) # initialize dqn learning dqn = DQN(num_actions, observation_shape, dqn_params, cnn_params, prog_params) if monitor: env.monitor.start('./outputs/experiment-' + agent_params['run_id']) last_100 = deque(maxlen=100) total_steps = 0 for i_episode in range(episodes): observation = env.reset() reward_sum = 0 for t in range(steps): if display: env.render() # Use the previous action if in a skipping frame if total_steps % skipping == 0: # select action based on the model action = dqn.select_action(observation) # execute actin in emulator new_observation, reward, done, _ = env.step(action) new_observation = new_observation.ravel() # Only update the network if not in a skipping frame if total_steps % skipping == 0: # update the state dqn.update_state(action, new_observation, reward, done) # train the model dqn.train_step() observation = new_observation reward_sum += reward if done: if verbose > 0: print("Episode ", i_episode) if verbose > 1: print("Finished after {} timesteps".format(t+1)) print("Reward for this episode: ", reward_sum) if verbose > 0: last_100.append(reward_sum) print("Average reward for last 100 episodes: ", np.mean(last_100)) break if total_steps % steps_to_update == 0: if verbose > 0: print("Total steps : ", total_steps) print("Updating target network...") dqn.update_target() total_steps += 1 if monitor: env.monitor.close()
def main(): args, lr_args, log_dir, preprocess_wrapper, ckpt_timer = parse_args() easy_tf_log.set_dir(log_dir) utils.set_random_seeds(args.seed) sess = tf.Session() envs = make_envs(args.env_id, preprocess_wrapper, args.max_n_noops, args.n_workers, args.seed, args.debug, log_dir) step_counter = utils.GraphCounter(sess) update_counter = utils.GraphCounter(sess) lr = make_lr(lr_args, step_counter.value) optimizer = make_optimizer(lr) networks = make_networks(n_workers=args.n_workers, n_actions=envs[0].action_space.n, weight_inits=args.weight_inits, value_loss_coef=args.value_loss_coef, entropy_bonus=args.entropy_bonus, max_grad_norm=args.max_grad_norm, optimizer=optimizer, debug=args.debug) # Why save_relative_paths=True? # So that the plain-text 'checkpoint' file written uses relative paths, # which seems to be needed in order to avoid confusing saver.restore() # when restoring from FloydHub runs. global_vars = tf.trainable_variables('global') saver = tf.train.Saver(global_vars, max_to_keep=1, save_relative_paths=True) checkpoint_dir = osp.join(log_dir, 'checkpoints') os.makedirs(checkpoint_dir) checkpoint_file = osp.join(checkpoint_dir, 'network.ckpt') if args.load_ckpt: print("Restoring from checkpoint '%s'..." % args.load_ckpt, end='', flush=True) saver.restore(sess, args.load_ckpt) print("done!") else: sess.run(tf.global_variables_initializer()) workers = make_workers(sess=sess, envs=envs, networks=networks, n_workers=args.n_workers, log_dir=log_dir) worker_threads = start_workers(n_steps=args.n_steps, steps_per_update=args.steps_per_update, step_counter=step_counter, update_counter=update_counter, workers=workers) ckpt_timer.reset() step_rate = utils.RateMeasure() step_rate.reset(int(step_counter)) while True: time.sleep(args.wake_interval_seconds) steps_per_second = step_rate.measure(int(step_counter)) easy_tf_log.tflog('misc/steps_per_second', steps_per_second) easy_tf_log.tflog('misc/steps', int(step_counter)) easy_tf_log.tflog('misc/updates', int(update_counter)) easy_tf_log.tflog('misc/lr', sess.run(lr)) alive = [t.is_alive() for t in worker_threads] if ckpt_timer.done() or not any(alive): saver.save(sess, checkpoint_file, int(step_counter)) print("Checkpoint saved to '{}'".format(checkpoint_file)) ckpt_timer.reset() if not any(alive): break for env in envs: env.close()
import tensorflow as tf from tensorflow.models.rnn import rnn, rnn_cell from get_babi_data import get_task_6_train from get_babi_data import get_task_6_test from get_glove import load_glove_vectors from get_babi_data import get_task_1_train from get_babi_data import get_task_1_test from tensorflow.python.ops.seq2seq import sequence_loss from format_data import split_training_data, format_data, batch_data, convert_to_vectors_with_sentences, get_word_vector from random import shuffle from params import parse_args #### MODEL PARAMETERS #### params = parse_args() WORD_VECTOR_LENGTH = 50 NUM_CLASSES = 2 MAX_EPISODES = 3 MAX_INPUT_SENTENCES = 40 EARLY_STOPPING = 2 MAX_INPUT_LENGTH = 200 MAX_QUESTION_LENGTH = 20 LEARNING_RATE = params['LEARNING_RATE'] HIDDEN_SIZE = params['HIDDEN_SIZE'] ATTENTION_GATE_HIDDEN_SIZE = params['ATTENTION_GATE_HIDDEN_SIZE'] MAX_EPOCHS = params['MAX_EPOCHS'] REG = params['REG'] DROPOUT = params['DROPOUT']
cv_types = ["spherical", "diag", "full", "tied"] for cv_type in cv_types: gmm = mixture.GaussianMixture(n_components=10, covariance_type=cv_type) gmm.fit(train_data) clusters = gmm.predict(train_data) labels = np.zeros_like(clusters) for i in range(10): mask = clusters == i labels[mask] = mode(train_labels[mask])[0] correct1 = np.equal(clusters, train_labels).sum() correct2 = np.equal(labels, train_labels).sum() print("%d/49000 (%0.2f%%)" % (correct1, correct1 / 49000)) print("%d/49000 (%0.2f%%)" % (correct2, correct2 / 49000)) if __name__ == "__main__": args = params.parse_args() utils.set_random_seed(args.seed, args.cuda) trainer = ModelTrainer(args=args) if args.eval is False: if args.training_mode == "supervised": trainer.train_val_test() elif args.training_mode == "semi-supervised": trainer.ssl_train_val_test() elif args.training_mode == "gmm": trainer.gmm_train_val_test() if args.eval is True: trainer.evaluate("Test", verbose=True)
##funcs.checkpoint( '%s/%s.pkl' % (params.output_dir, params.organism) ) do_saveall() tmp = np.array( bicluster.get_all_cluster_row_counts( glb.clusters, glb.all_genes ).values() ) print np.sum(tmp==0), 'genes in no clusters' print np.sum(tmp==np.max(tmp)), 'genes in', np.max(tmp), 'clusters' # println( @sprintf( "%.3f", (endTime - startTime)/60 ), " minutes since initialization" ) # #genes = rownames(ratios)[clusters[kInd].rows] ##rows] # #seqs = get_sequences(genes); # #@time gibbs_out = gibbs_site_sampler(seqs[:,2]) ## run gibbs sampler on most "flagellar-enriched" cluster # #@time gibbs_out2 = gibbs_site_sampler(seqs, gibbs_out["pssm"]) if __name__ == '__main__': params.parse_args() params.init_args() if not init.IS_INITED: init.init() from Bicluster import fill_all_cluster_scores_par #clusters = fill_all_cluster_scores( clusters, all_genes, ratios, string_net, ratios.columns.values ) ## weird - if I move this to glb.py, then it gets locked up. glb.clusters = fill_all_cluster_scores_par(glb.clusters, threads=params.nthreads) stats_tmp = funcs.print_cluster_stats(glb.clusters, glb.ratios, 1, glb.startTime) glb.stats_df = glb.stats_df.append( stats_tmp ) # NOTE: run_pynkey() which calls floc.get_floc_scores_all() fills all the cluster scores at the beginning glb.iter = run_pynkey(glb.iter) ## Note this function can be run like this to restart from current iter