Beispiel #1
0
def train(mnist):
    global args
    sess = tf.Session()
    global_step = tf.Variable(0, trainable=False)
    starter_learning_rate = 0.1
    learning_rate = tf.train.exponential_decay(0.99,
                                               global_step,
                                               500,
                                               0.96,
                                               staircase=True)

    optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate)

    ###args.maxlayers set in parse_args and policy_network is function
    reinforce = Reinforce(sess, optimizer, policy_network, args.max_layers,
                          global_step)
    ###define data and how to handle
    net_manager = NetManager(num_input=3072,
                             num_classes=10,
                             learning_rate=0.001,
                             mnist=mnist,
                             bathc_size=100,
                             max_step_per_action=1000)

    MAX_EPISODES = 500
    step = 0
    state = np.array([[10.0, 128.0, 1.0, 1.0] * args.max_layers],
                     dtype=np.float32)
    pre_acc = 0.0
    total_rewards = 0

    for i_episode in range(MAX_EPISODES):
        action = reinforce.get_action(state)
        print("ca:", action)
        ###Also a for loop
        if all(ai > 0 for ai in action[0][0]):
            reward, pre_acc = net_manager.get_reward(action, step, pre_acc)
            print("=====>", reward, pre_acc)
        else:
            reward = -1.0
        total_rewards += reward

        # In our sample action is equal state
        state = action[0]
        reinforce.storeRollout(state, reward)

        step += 1
        ls = reinforce.train_step(1)
        log_str = "current time:  " + str(
            datetime.datetime.now().time()) + " episode:  " + str(
                i_episode) + " loss:  " + str(ls) + " last_state:  " + str(
                    state) + " last_reward:  " + str(reward) + "\n"
        log = open("lg3.txt", "a+")
        log.write(log_str)
        log.close()
        print(log_str)
Beispiel #2
0
def train(dataset,
          learning_rate=0.001,
          batch_size=100,
          num_input=784,
          num_classes=10,
          train_size=100000,
          test_size=10000):
    global args
    sess = tf.Session()
    global_step = tf.Variable(0, trainable=False)
    starter_learning_rate = 0.1
    learning_rate_op = tf.train.exponential_decay(0.99,
                                                  global_step,
                                                  500,
                                                  0.96,
                                                  staircase=True)

    optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate_op)

    reinforce = Reinforce(sess, optimizer, policy_network, args.max_layers,
                          global_step)
    net_manager = NetManager(num_input=num_input,
                             num_classes=num_classes,
                             learning_rate=learning_rate,
                             dataset=dataset,
                             bathc_size=batch_size,
                             train_size=train_size,
                             test_size=test_size)

    MAX_EPISODES = 2500
    step = 0
    state = np.array([[10.0, 128.0, 1.0, 1.0] * args.max_layers],
                     dtype=np.float32)
    pre_acc = 0.0
    total_rewards = 0
    max_acc = 0.0
    max_action = None
    for i_episode in range(MAX_EPISODES):
        action = reinforce.get_action(state)
        print("ca:", action)
        if all(ai > 0 for ai in action[0][0]):
            reward, pre_acc = net_manager.get_reward(action, step, pre_acc)
            print("=====>", reward, pre_acc)
        else:
            reward = -1.0
        total_rewards += reward

        # In our sample action is equal state
        state = action[0]
        reinforce.storeRollout(state, reward)

        step += 1
        ls = reinforce.train_step(1)
        log_str = "current time:  " + str(
            datetime.datetime.now().time()) + " episode:  " + str(
                i_episode) + " loss:  " + str(ls) + " last_state:  " + str(
                    state) + " last_reward:  " + str(reward) + "\n"
        log_max_str = "current time:  " + str(datetime.datetime.now().time(
        )) + " episode:  " + str(i_episode) + " max accuracy:  " + str(
            max_acc) + " action:  " + str(max_action) + "\n"
        log = open("lg3.txt", "a+")
        log.write(log_str)
        log.write(log_max_str)
        log.close()
        print(log_str)
        print(log_max_str)
Beispiel #3
0
import torch
Beispiel #4
0
def train(mnist):
    # use globa varialbe args
    global args
    # create session to run code
    sess = tf.Session()
    # make global_step 
    global_step = tf.Variable(0, trainable=False)
    # intial lerarning rate # TODO NOT USED VARIALBE. MAY BE USE TO LEARING_RATE at below line's first param
    starter_learning_rate = 0.1
    # apply exponential learning rate decay, start_lr=0.99, global_step, decay_step=500, decay_rate=0.96, staircase= True
    # staircase => divide by int 
    learning_rate = tf.train.exponential_decay(0.99, global_step,
                                           500, 0.96, staircase=True)
    # RMSPropOptimizer use above lr
    optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate)

    # make reinfoce env
    reinforce = Reinforce(sess, optimizer, policy_network, args.max_layers, global_step)
    # network manager for training subnetworks with Reinforcement Learning
    net_manager = NetManager(num_input=784,         # input dim 28x28 mnist
                             num_classes=10,        # number of classes 10 mnist
                             learning_rate=0.001,   # intial learning rate
                             mnist=mnist,           # dataset mnist (tensorflow dataset object)
                             bathc_size=100)        # mini-batch size

    # maximum episodes to training
    MAX_EPISODES = 2500
    # step start from 0 
    step = 0
    # intial state [cnn_filter_size, cnn_filter_num, maxpool_ksize, dropout_rate] * max_layer
    state = np.array([[10.0, 128.0, 1.0, 1.0]*args.max_layers], dtype=np.float32)
    # init previous accuracy and total rewards
    pre_acc = 0.0
    total_rewards = 0

    # run episodes
    for i_episode in range(MAX_EPISODES):
        # get next action from reinforce
        action = reinforce.get_action(state)
        # print action
        print("ca:", action)
        # if actions value is all biger then 0 (valid) get reworkd else...
        if all(ai > 0 for ai in action[0][0]):
            reward, pre_acc = net_manager.get_reward(action, step, pre_acc)
            print("=====>", reward, pre_acc)
        # else reword -1
        else:
            reward = -1.0
        # sum all rewards
        total_rewards += reward

        # In our sample action is equal state
        state = action[0]
        # rollout state. See reinforce code
        reinforce.storeRollout(state, reward)

        # step 
        step += 1
        # train step
        ls = reinforce.train_step(1)
        # logging
        log_str = "current time:  "+str(datetime.datetime.now().time())+" episode:  "+str(i_episode)+" loss:  "+str(ls)+" last_state:  "+str(state)+" last_reward:  "+str(reward)+"\n"
        log = open("lg3.txt", "a+")
        log.write(log_str)
        log.close()
        print(log_str)
def train():
    global args
    sess = tf.Session()
    global_step = tf.Variable(0, trainable=False)
    starter_learning_rate = 0.1
    num_of_hyperparameters = 1
    learning_rate = tf.train.exponential_decay(0.99,
                                               global_step,
                                               500,
                                               0.96,
                                               staircase=True)

    optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate)

    reinforce = Reinforce(sess, optimizer, policy_network, args.max_layers,
                          global_step, num_of_hyperparameters)
    workflow_manager = WorkflowManager(num_of_hyperparameters,
                                       ser_url=None,
                                       usr_name=None,
                                       password=None)

    MAX_EPISODES = 2500
    step = 0
    state = np.array([[10.0] * num_of_hyperparameters * args.max_layers],
                     dtype=np.float32)
    pre_acc = 0.0
    total_rewards = 0
    min_action = 0
    max_action = 30

    for i_episode in range(MAX_EPISODES):
        action = reinforce.get_action(state)
        print("ca:", action)
        if all(ai > min_action
               for ai in action[0][0]) and all(ai < max_action
                                               for ai in action[0][0]):
            reward, pre_acc = workflow_manager.get_reward(
                action, step, pre_acc)
            print("=====>", reward, pre_acc)
        else:
            reward = -1.0
        total_rewards += reward
        # In our sample action is equal state

        print('action', action)

        state = action[0]
        reinforce.storeRollout(state, reward)

        print('state', state)

        step += 1
        ls = reinforce.train_step(1)
        log_str = "current time:  " + str(
            datetime.datetime.now().time()) + " episode:  " + str(
                i_episode) + " loss:  " + str(ls) + " last_state:  " + str(
                    state) + " last_reward:  " + str(reward) + "\n"
        log = open("lg3.txt", "a+")
        log.write(log_str)
        log.close()
        print(log_str)