def main(_):
    action_reward_dict = {}
    policy_sess = tf.Session()
    #K.set_session(policy_sess)
    manager = NetworkManager(FLAGS, clip_rewards=CLIP_REWARDS)

    size = [len(LAYER_SIZES)] * NUM_LAYERS
    reward_space = np.zeros((size))
    #print(reward_space.shape)
    for i in range(NUM_ENUM):
        for idx, val in np.ndenumerate(reward_space):
            action = [LAYER_SIZES[i] for i in idx]
            #print(action)
            with policy_sess.as_default():
                _, acc = manager.get_rewards(model_fn, action)
                print(action, acc)
                acc = round(acc * JSON_SCALE, 2)
                action = str(tuple(action))
                if action not in action_reward_dict:
                    action_reward_dict[action] = [acc]
                else:
                    action_reward_dict[action].append(acc)

    action_average_reward_dict = {}
    for k in action_reward_dict.keys():
        action_average_reward_dict[k] = round(np.mean(action_reward_dict[k]),
                                              2)

    with open('action_reward_dict.json', 'w') as f:
        json.dump(action_reward_dict, f)
    f.close()
    with open('action_average_reward_dict.json', 'w') as f:
        json.dump(action_average_reward_dict, f)
    f.close()
Ejemplo n.º 2
0
def main(_):
    CLIP_REWARDS = False
    value = [30,60,100,144]
    state_space = [v for v in itertools.product(value, repeat=3)]
    data = defaultdict(list)
    for itr in xrange(500):
        for state in state_space:
            states = list(state)
            manager = NetworkManager(FLAGS, clip_rewards=CLIP_REWARDS)
            reward, previous_acc = manager.get_rewards(model_fn, states)
            previous_acc = round(previous_acc*100000,2)
            data[str(state)].append(previous_acc)
    with open('data.json', 'w') as outfile:
        json.dump(data, outfile)
Ejemplo n.º 3
0
def main(_):
    CLIP_REWARDS = False

    #filter_space = [v for v in itertools.product(filter_val, repeat=3)]
    #stride_space = [v for v in itertools.product(stride_val, repeat=3)]
    #kernal_space = [v for v in itertools.product(kernel_val, repeat=3)]
    data = defaultdict(list)
    with open('result.txt', 'w') as out:
        for state in states:
            print(state)
            manager = NetworkManager(FLAGS, clip_rewards=CLIP_REWARDS)
            reward, previous_acc = manager.get_rewards(model_fn_cnn, state)
            previous_acc = round(previous_acc * 100000, 2)
            print(previous_acc)
            data[str(state)].append(previous_acc)

            out.write("{} {}\n".format(state, previous_acc))
    with open('data.json', 'w') as outfile:
        json.dump(data, outfile)
def main(_):
    CLIP_REWARDS = False
    data = defaultdict(list)
    with open('main_result.txt', 'w') as out:
        for ite in xrange(3):
            ite += 1
            print('outter iteration:', ite)
            iteration = 0
            for state in states:
                iteration += 1
                print(iteration, state)
                manager = NetworkManager(FLAGS, clip_rewards=CLIP_REWARDS)
                reward, previous_acc = manager.get_rewards(model_fn_cnn, state)
                previous_acc = round(previous_acc * 100000, 2)
                print(previous_acc)
                data[str(state)].append(previous_acc)
                out.write("{} {}\n".format(state, previous_acc))
    with open('main_sample.json', 'w') as outfile:
        json.dump(data, outfile)
def main(_):
    # create a shared session between Keras and Tensorflow
    policy_sess = tf.Session()
    K.set_session(policy_sess)

    NUM_LAYERS = 3  # number of layers of the state space
    MAX_TRIALS = 250  # maximum number of models generated

    MAX_EPOCHS = 60  # maximum number of epochs to train
    BATCHSIZE = 100  # batchsize
    EXPLORATION = 0.5  # high exploration for the first 1000 steps
    REGULARIZATION = 1e-3  # regularization strength
    CONTROLLER_CELLS = 32  # number of cells in RNN controller
    CLIP_REWARDS = False  # clip rewards in the [-0.05, 0.05] range
    RESTORE_CONTROLLER = True  # restore controller to continue training

    # construct a state space
    state_space = StateSpace()

    # add states
    #state_space.add_state(name='kernel', values=[3])
    state_space.add_state(name='filters', values=[30, 60, 100, 144])
    #state_space.add_state(name='stride', values=[1])

    # print the state space being searched
    state_space.print_state_space()

    previous_acc = 0.0
    total_reward = 0.0

    with policy_sess.as_default():
        # create the Controller and build the internal policy network
        controller = Controller(policy_sess,
                                NUM_LAYERS,
                                state_space,
                                reg_param=REGULARIZATION,
                                exploration=EXPLORATION,
                                controller_cells=CONTROLLER_CELLS,
                                restore_controller=RESTORE_CONTROLLER)
    print('done')
    # create the Network Manager
    manager = NetworkManager(FLAGS, clip_rewards=CLIP_REWARDS)

    # get an initial random state space if controller needs to predict an
    # action from the initial state
    state = state_space.get_random_state_space(NUM_LAYERS)
    print("Initial Random State : ", state_space.parse_state_space_list(state))
    #print()

    # train for number of trails
    for trial in range(MAX_TRIALS):
        with policy_sess.as_default():
            actions = controller.get_action(
                state)  # get an action for the previous state

        # print the action probabilities
        state_space.print_actions(actions)
        print("Predicted actions : ",
              state_space.parse_state_space_list(actions))

        # build a model, train and get reward and accuracy from the network manager
        reward, previous_acc = manager.get_rewards(
            model_fn_cnn, state_space.parse_state_space_list(actions))
        print("Rewards : ", reward, "Accuracy : ", previous_acc)

        with policy_sess.as_default():

            total_reward += reward
            print("Total reward : ", total_reward)

            # actions and states are equivalent, save the state and reward
            state = actions
            controller.store_rollout(state, reward)

            # train the controller on the saved state and the discounted rewards
            loss = controller.train_step()
            print("Trial %d: Controller loss : %0.6f" % (trial + 1, loss))

            # write the results of this trial into a file
            with open('train_history.csv', mode='a+') as f:
                data = [previous_acc, reward]
                data.extend(state_space.parse_state_space_list(state))
                writer = csv.writer(f)
                writer.writerow(data)
        print()

    print("Total Reward : ", total_reward)
Ejemplo n.º 6
0
# clear the previous files
controller.remove_files()

# train for number of trails
for trial in range(MAX_TRIALS):
    with policy_sess.as_default():
        K.set_session(policy_sess)
        actions = controller.get_action(
            state)  # get an action for the previous state

    # print the action probabilities
    state_space.print_actions(actions)
    print("Predicted actions : ", state_space.parse_state_space_list(actions))

    # build a model, train and get reward and accuracy from the network manager
    reward, previous_acc = manager.get_rewards(
        model_fn, state_space.parse_state_space_list(actions))
    print("Rewards : ", reward, "Accuracy : ", previous_acc)

    with policy_sess.as_default():
        K.set_session(policy_sess)

        total_reward += reward
        print("Total reward : ", total_reward)

        # actions and states are equivalent, save the state and reward
        state = actions
        controller.store_rollout(state, reward)

        # train the controller on the saved state and the discounted rewards
        loss = controller.train_step()
        print("Trial %d: Controller loss : %0.6f" % (trial + 1, loss))
Ejemplo n.º 7
0
        else:
            k = K_

        actions = controller.get_actions(
            top_k=k)  # get all actions for the previous state

    rewards = []
    for t, action in enumerate(actions):
        # print the action probabilities
        state_space.print_actions(action)
        print("Model #%d / #%d" % (t + 1, len(actions)))
        print("Predicted actions : ",
              state_space.parse_state_space_list(action))

        # build a model, train and get reward and accuracy from the network manager
        reward = manager.get_rewards(
            model_fn, state_space.parse_state_space_list(action))
        print("Final Accuracy : ", reward)

        rewards.append(reward)
        print("\nFinished %d out of %d models ! \n" % (t + 1, len(actions)))

        # write the results of this trial into a file
        with open('train_history.csv', mode='a+', newline='') as f:
            data = [reward]
            data.extend(state_space.parse_state_space_list(action))
            writer = csv.writer(f)
            writer.writerow(data)

    with policy_sess.as_default():
        K.set_session(policy_sess)
        # train the controller on the saved state and the discounted rewards
Ejemplo n.º 8
0
state = state_space.get_random_state_space(NUM_LAYERS)
print("Initial Random State : ", state_space.parse_state_space_list(state))
print()

# train for number of trails
for trial in range(MAX_TRIALS):
    with policy_sess.as_default():
        K.set_session(policy_sess)
        actions = controller.get_action(state)  # get an action for the previous state

    # print the action probabilities
    state_space.print_actions(actions)
    print("Predicted actions : ", state_space.parse_state_space_list(actions))

    # build a model, train and get reward and accuracy from the network manager
    reward, previous_acc = manager.get_rewards(model_fn, state_space.parse_state_space_list(actions))
    print("Rewards : ", reward, "Accuracy : ", previous_acc)

    with policy_sess.as_default():
        K.set_session(policy_sess)

        total_reward += reward
        print("Total reward : ", total_reward)

        # actions and states are equivalent, save the state and reward
        state = actions
        controller.store_rollout(state, reward)

        # train the controller on the saved state and the discounted rewards
        loss = controller.train_step()
        print("Trial %d: Controller loss : %0.6f" % (trial + 1, loss))
        k = None
    else:
        k = K

    actions = controller.get_actions(
        top_k=k)  # get all actions for the previous state

    rewards = []
    for t, action in enumerate(actions):
        # print the action probabilities
        state_space.print_actions(action)
        print("Model #%d / #%d" % (t + 1, len(actions)))
        print(" ", state_space.parse_state_space_list(action))

        # build a model, train and get reward and accuracy from the network manager
        reward = manager.get_rewards(
            ModelGenerator, state_space.parse_state_space_list(action))
        print("Final Accuracy : ", reward)

        rewards.append(reward)
        print("\nFinished %d out of %d models ! \n" % (t + 1, len(actions)))

        # write the results of this trial into a file
        with open('train_history.csv', mode='a+', newline='') as f:
            data = [reward]
            data.extend(state_space.parse_state_space_list(action))
            writer = csv.writer(f)
            writer.writerow(data)

    loss = controller.train_step(rewards)
    print("Trial %d: ControllerManager loss : %0.6f" % (trial + 1, loss))
Ejemplo n.º 10
0
    print("This is time to explore design", end - start, "\n")

    start = time.time()
    time_performance = M_SCHEDULE.schedule_run(layers, layersname)
    # We set performance according to the previous iteration
    #OPT_TIMEPERFORMANCE = min(time_performance,OPT_TIMEPERFORMANCE)
    end = time.time()
    print("PPPPPPPPPPPPPPPPPPPPP This is time performance", time_performance,
          "\n", "This is time to evaluate time formance", end - start, "\n",
          "\n", "\n")

    per_list.append(time_performance)

    # build a model, train and get reward and accuracy from the network manager
    reward, previous_acc = manager.get_rewards(
        model_fn, state_space.parse_state_space_list(actions),
        time_performance,
        OPT_TIMEPERFORMANCE)  # CNN train and return the best accura
    print("Rewards : ", reward, "Accuracy : ", previous_acc)
    acc_list.append(previous_acc)
    rew_list.append(reward)
    print("===============+WWW+=================")
    print("++++++++++Acc History", acc_list)
    print("++++++++++Per History", per_list)
    print("++++++++++Rew History", rew_list)
    print("=====================================")
    #OPT_TIMEPERFORMANCE = min(time_performance*(1+(old_acc-previous_acc)),OPT_TIMEPERFORMANCE)
    old_acc = previous_acc

    with policy_sess.as_default():
        K.set_session(policy_sess)
Ejemplo n.º 11
0
def main_training():
    # Number of GPUs available. Use 0 for CPU mode.
    ngpu = torch.cuda.device_count()
    # Decide which device we want to run on
    device = torch.device("cuda:0" if (
        torch.cuda.is_available() and ngpu > 0) else "cpu")

    train_loader, val_loader, test_loader = get_dataset(
        CHILD_BATCHSIZE, '/tmp/datasets/cifar-10')
    dataloaders = [train_loader, val_loader, test_loader]

    # construct a state space
    state_space = StateSpace()

    # add states
    state_space.add_state(name='kernel', values=[1, 3, 5, 7])
    state_space.add_state(name='filters', values=[24, 36, 48, 64])

    previous_acc = 0.0
    total_reward = 0.0

    controller = Controller(NUM_LAYERS, state_space)
    manager = NetworkManager(dataloaders, device, epochs=2)

    # get an initial random state space if controller needs to predict an action from the initial state
    state = state_space.get_random_state_space(NUM_LAYERS)
    print("Initial Random State : ", state_space.parse_state_space_list(state))

    for trial in range(MAX_TRIALS):
        actions, prob_actions = controller.get_action(
            state)  # get an action for the previous state

        # print the action probabilities
        state_space.print_actions(actions)
        print("Predicted actions : ",
              state_space.parse_state_space_list(actions))

        # build a model, train and get reward and accuracy from the network manager
        model = ChildNetwork(state_space.parse_state_space_list(actions))
        reward, previous_acc = manager.get_rewards(model)
        print("Rewards : ", reward, "Accuracy : ", previous_acc)

        total_reward += reward
        print("Total reward : ", total_reward)

        # actions and states are equivalent, save the state and reward
        state = actions
        prob_state = prob_actions
        controller.store_rollout(state, reward, prob_state)

        # train the controller on the saved state and the discounted rewards
        loss = controller.update_policy()
        print("Trial %d: Controller loss : %0.6f" % (trial + 1, loss))

        # write the results of this trial into a file
        with open('train_history.csv', mode='a+') as f:
            data = [previous_acc, reward]
            data.extend(state_space.parse_state_space_list(state))
            writer = csv.writer(f)
            writer.writerow(data)

        print()
        print("Total Reward : ", total_reward)