def train(mnist): global args sess = tf.Session() global_step = tf.Variable(0, trainable=False) starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(0.99, global_step, 500, 0.96, staircase=True) optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate) ###args.maxlayers set in parse_args and policy_network is function reinforce = Reinforce(sess, optimizer, policy_network, args.max_layers, global_step) ###define data and how to handle net_manager = NetManager(num_input=3072, num_classes=10, learning_rate=0.001, mnist=mnist, bathc_size=100, max_step_per_action=1000) MAX_EPISODES = 500 step = 0 state = np.array([[10.0, 128.0, 1.0, 1.0] * args.max_layers], dtype=np.float32) pre_acc = 0.0 total_rewards = 0 for i_episode in range(MAX_EPISODES): action = reinforce.get_action(state) print("ca:", action) ###Also a for loop if all(ai > 0 for ai in action[0][0]): reward, pre_acc = net_manager.get_reward(action, step, pre_acc) print("=====>", reward, pre_acc) else: reward = -1.0 total_rewards += reward # In our sample action is equal state state = action[0] reinforce.storeRollout(state, reward) step += 1 ls = reinforce.train_step(1) log_str = "current time: " + str( datetime.datetime.now().time()) + " episode: " + str( i_episode) + " loss: " + str(ls) + " last_state: " + str( state) + " last_reward: " + str(reward) + "\n" log = open("lg3.txt", "a+") log.write(log_str) log.close() print(log_str)
def train(mnist): global args sess = tf.Session() global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay(0.99, global_step, 500, 0.96, staircase=True) optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate) reinforce = Reinforce(sess, optimizer, policy_network, args.max_layers, global_step) net_manager = NetManager(num_input=784, num_classes=10, learning_rate=0.001, mnist=mnist, batch_size=100) MAX_EPISODES = 2500 step = 0 state = np.array([[10.0, 128.0, 1.0, 1.0] * args.max_layers], dtype=np.float32) pre_acc = 0.0 total_rewards = 0 for i_episode in range(MAX_EPISODES): action = reinforce.get_action(state) print("action:", action) if all(ai > 0 for ai in action[0][0]): reward, pre_acc = net_manager.get_reward(action, step, pre_acc) print('====>', reward, pre_acc) else: reward = -1.0 total_rewards += reward # In our sample action is equal state state = action[0] reinforce.store_rollout(state, reward) step += 1 ls = reinforce.train_step(1) log_str = 'current time: ' + str( datetime.datetime.now().time()) + ' episode: ' + str( i_episode) + ' loss: ' + str(ls) + ' last_state: ' + str( state) + ' last_reward: ' + str(reward) + '\n' log = open('log.txt', 'a+') log.write(log_str) log.close() print(log_str)
def train(dataset, learning_rate=0.001, batch_size=100, num_input=784, num_classes=10, train_size=100000, test_size=10000): global args sess = tf.Session() global_step = tf.Variable(0, trainable=False) starter_learning_rate = 0.1 learning_rate_op = tf.train.exponential_decay(0.99, global_step, 500, 0.96, staircase=True) optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate_op) reinforce = Reinforce(sess, optimizer, policy_network, args.max_layers, global_step) net_manager = NetManager(num_input=num_input, num_classes=num_classes, learning_rate=learning_rate, dataset=dataset, bathc_size=batch_size, train_size=train_size, test_size=test_size) MAX_EPISODES = 2500 step = 0 state = np.array([[10.0, 128.0, 1.0, 1.0] * args.max_layers], dtype=np.float32) pre_acc = 0.0 total_rewards = 0 max_acc = 0.0 max_action = None for i_episode in range(MAX_EPISODES): action = reinforce.get_action(state) print("ca:", action) if all(ai > 0 for ai in action[0][0]): reward, pre_acc = net_manager.get_reward(action, step, pre_acc) print("=====>", reward, pre_acc) else: reward = -1.0 total_rewards += reward # In our sample action is equal state state = action[0] reinforce.storeRollout(state, reward) step += 1 ls = reinforce.train_step(1) log_str = "current time: " + str( datetime.datetime.now().time()) + " episode: " + str( i_episode) + " loss: " + str(ls) + " last_state: " + str( state) + " last_reward: " + str(reward) + "\n" log_max_str = "current time: " + str(datetime.datetime.now().time( )) + " episode: " + str(i_episode) + " max accuracy: " + str( max_acc) + " action: " + str(max_action) + "\n" log = open("lg3.txt", "a+") log.write(log_str) log.write(log_max_str) log.close() print(log_str) print(log_max_str)
import torch
def train(mnist): # use globa varialbe args global args # create session to run code sess = tf.Session() # make global_step global_step = tf.Variable(0, trainable=False) # intial lerarning rate # TODO NOT USED VARIALBE. MAY BE USE TO LEARING_RATE at below line's first param starter_learning_rate = 0.1 # apply exponential learning rate decay, start_lr=0.99, global_step, decay_step=500, decay_rate=0.96, staircase= True # staircase => divide by int learning_rate = tf.train.exponential_decay(0.99, global_step, 500, 0.96, staircase=True) # RMSPropOptimizer use above lr optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate) # make reinfoce env reinforce = Reinforce(sess, optimizer, policy_network, args.max_layers, global_step) # network manager for training subnetworks with Reinforcement Learning net_manager = NetManager(num_input=784, # input dim 28x28 mnist num_classes=10, # number of classes 10 mnist learning_rate=0.001, # intial learning rate mnist=mnist, # dataset mnist (tensorflow dataset object) bathc_size=100) # mini-batch size # maximum episodes to training MAX_EPISODES = 2500 # step start from 0 step = 0 # intial state [cnn_filter_size, cnn_filter_num, maxpool_ksize, dropout_rate] * max_layer state = np.array([[10.0, 128.0, 1.0, 1.0]*args.max_layers], dtype=np.float32) # init previous accuracy and total rewards pre_acc = 0.0 total_rewards = 0 # run episodes for i_episode in range(MAX_EPISODES): # get next action from reinforce action = reinforce.get_action(state) # print action print("ca:", action) # if actions value is all biger then 0 (valid) get reworkd else... if all(ai > 0 for ai in action[0][0]): reward, pre_acc = net_manager.get_reward(action, step, pre_acc) print("=====>", reward, pre_acc) # else reword -1 else: reward = -1.0 # sum all rewards total_rewards += reward # In our sample action is equal state state = action[0] # rollout state. See reinforce code reinforce.storeRollout(state, reward) # step step += 1 # train step ls = reinforce.train_step(1) # logging log_str = "current time: "+str(datetime.datetime.now().time())+" episode: "+str(i_episode)+" loss: "+str(ls)+" last_state: "+str(state)+" last_reward: "+str(reward)+"\n" log = open("lg3.txt", "a+") log.write(log_str) log.close() print(log_str)
def train(): global args sess = tf.Session() global_step = tf.Variable(0, trainable=False) starter_learning_rate = 0.1 num_of_hyperparameters = 1 learning_rate = tf.train.exponential_decay(0.99, global_step, 500, 0.96, staircase=True) optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate) reinforce = Reinforce(sess, optimizer, policy_network, args.max_layers, global_step, num_of_hyperparameters) workflow_manager = WorkflowManager(num_of_hyperparameters, ser_url=None, usr_name=None, password=None) MAX_EPISODES = 2500 step = 0 state = np.array([[10.0] * num_of_hyperparameters * args.max_layers], dtype=np.float32) pre_acc = 0.0 total_rewards = 0 min_action = 0 max_action = 30 for i_episode in range(MAX_EPISODES): action = reinforce.get_action(state) print("ca:", action) if all(ai > min_action for ai in action[0][0]) and all(ai < max_action for ai in action[0][0]): reward, pre_acc = workflow_manager.get_reward( action, step, pre_acc) print("=====>", reward, pre_acc) else: reward = -1.0 total_rewards += reward # In our sample action is equal state print('action', action) state = action[0] reinforce.storeRollout(state, reward) print('state', state) step += 1 ls = reinforce.train_step(1) log_str = "current time: " + str( datetime.datetime.now().time()) + " episode: " + str( i_episode) + " loss: " + str(ls) + " last_state: " + str( state) + " last_reward: " + str(reward) + "\n" log = open("lg3.txt", "a+") log.write(log_str) log.close() print(log_str)