Ejemplo n.º 1
0
def main(args):
    """
    Enter point for Sequential Compare Function. Compare DQTS and our algorithm.

    :param args:
    :return:
    """

    config = parameter_setup(args, DEFAULT_CONFIG)
    logger_nns, logger_dqts, logger_heft = setup_logger_all()

    model = get_model(args)
    reward_heft, end_time = heft(config['wfs_name'], config['nodes'])

    for i in range(args.num_episodes):
        logger_heft.log_scalar('main/reward', reward_heft, i)

    args.state_size = 20
    dqts_model = get_dqts_model(args)
    print(dqts_model)
    print(model)

    dqts_reward = [
        run_dqts_episode(dqts_model, ei, args, logger_dqts)
        for ei in range(args.num_episodes)
    ]
    reward = [
        run_episode(model, ei, args, logger_nns)
        for ei in range(args.num_episodes)
    ]

    plot_heft_dqts_ours(args, reward_heft, dqts_reward, reward)
    test(model, args)
    dqts_test(dqts_model, args)
Ejemplo n.º 2
0
def run_dqts_episode(ei, logger, args):
    URL = f"http://{args.host}:{args.port}/"
    config = parameter_setup(args, DEFAULT_CONFIG)
    test_wfs, test_times, test_scores, test_size = wf_setup(config['wfs_name'])
    reward, sars_list = episode_dqts(ei, config, test_wfs, test_size, URL)
    remember(sars_list, URL)
    if logger is not None:
        logger.log_scalar('main/reward', reward, ei)
    return reward
Ejemplo n.º 3
0
def interective_test(model, args):
    """
    Interective Test

    :param model:
    :param args:
    :return:
    """
    config = parameter_setup(args, DEFAULT_CONFIG)
    test_wfs, test_times, test_scores, test_size = wf_setup(config['wfs_name'])
    for i in range(test_size):
        ttree, tdata, trun_times = test_wfs[i]
        wfl = ctx.Context(config['agent_task'], config['nodes'], trun_times,
                          ttree, tdata)
        sch = ScheduleInterectivePlotter(wfl.worst_time, wfl.m, wfl.n)
        wfl.name = config['wfs_name'][i]
        if config['actor_type'] == 'rnn':
            deq = RNNDeque(seq_size=config['seq_size'],
                           size=config['state_size'])
        done = wfl.completed
        state = wfl.state
        for time in range(wfl.n):
            mask = wfl.get_mask()
            q = model.act_q(state.reshape(1, state.shape[0]), mask, False)
            q = np.squeeze(q, axis=0) if len(q.shape) > 1 else q
            action_idx = np.argmax(q)
            actions = [wfl.actions[action] for action in range(q.shape[-1])]
            best_t, best_n = actions[action_idx]
            copies_of_wfl = [deepcopy(wfl) for _ in range(len(actions))]
            reward, wf_time = wfl.make_action(best_t, best_n)
            next_state = wfl.state

            acts = []
            for idx, action in enumerate(actions):
                wfl_copy = copies_of_wfl[idx]
                t, n = action
                if q[idx] != 0 or idx == action_idx:
                    reward, wf_time, item = wfl_copy.make_action_item(t, n)
                    acts.append((item, reward, n))
            sch.draw_item(wfl.schedule, acts)
            if config['actor_type'] == 'rnn':
                deq.push(next_state)
                next_state = deq.show()
            done = wfl.completed
            state = next_state
            if done:
                test_scores[i].append(reward)
                test_times[i].append(wf_time)
        write_schedule(args.run_name, i, wfl)
Ejemplo n.º 4
0
def run_dqts_episode(model, ei, args, logger=None):
    """
    Run episode of Learning, Remember and Replay

    :param model:
    :param ei:
    :param args:
    :return:
    """
    config = parameter_setup(args, DEFAULT_CONFIG)
    test_wfs, test_times, test_scores, test_size = wf_setup(config['wfs_name'])
    reward, sars_list = episode_dqts(model, ei, config, test_wfs, test_size)
    remember(model, sars_list, args)
    replay(model, config['batch_size'])
    if logger is not None:
        logger.log_scalar('main/reward', reward, ei)
    return reward
Ejemplo n.º 5
0
def dqts_test(model, args):
    """
    Create Schedule using current NN without learning parameters

    :param model:
    :param args:
    :return:
    """
    config = parameter_setup(args, DEFAULT_CONFIG)
    test_wfs, test_times, test_scores, test_size = wf_setup(config['wfs_name'])
    for i in range(test_size):
        ttree, tdata, trun_times = test_wfs[i]
        wfl = dqts_ctx.Context(config['agent_task'], config['nodes'],
                               trun_times, ttree, tdata)
        wfl.name = config['wfs_name'][i]
        if config['actor_type'] == 'rnn':
            deq = RNNDeque(seq_size=config['seq_size'],
                           size=config['state_size'])
        done = wfl.completed
        state = wfl.state
        if config['actor_type'] == 'rnn':
            deq.push(state)
            state = deq.show()
        for time in range(wfl.n):
            mask = wfl.get_mask()
            action = model.act(state.reshape(1, state.shape[0]), mask, False)
            act_t, act_n = wfl.actions[action]
            reward, wf_time = wfl.make_action(act_t, act_n)
            next_state = wfl.state
            if config['actor_type'] == 'rnn':
                deq.push(next_state)
                next_state = deq.show()
            done = wfl.completed
            state = next_state
            if done:
                test_scores[i].append(reward)
                test_times[i].append(wf_time)
        write_schedule(args.run_name, i, wfl)
Ejemplo n.º 6
0
def do_heft(args, URL, logger):
    config = parameter_setup(args, DEFAULT_CONFIG)
    response = requests.post(f'{URL}heft',
                             json={
                                 'wf_name': config['wfs_name'],
                                 'nodes': config['nodes'].tolist()
                             }).json()
    cur_dir = os.getcwd()
    reward_path = pathlib.Path(
        cur_dir
    ) / 'results' / f'{args.run_name}_{datetime.now().strftime("%d%b%y_%I%M%p")}_heft_reward.csv'
    rewards = response['reward']

    if args.logger:
        for i in range(args.num_episodes):
            logger.log_scalar('main/reward', rewards, i)

    makespan = response['makespan']
    rewards = np.array(rewards)
    result = pd.DataFrame()
    result['reward'] = rewards
    result.to_csv(reward_path, sep=',', index=None, columns=['reward'])
    print(f'Schedule makespan: {makespan}')
    return response
Ejemplo n.º 7
0
                                      dtype=tf.int32)
        next_time_step = tf_env.step(action)

        traj = trajectory.from_transition(time_step, action_step_act,
                                          next_time_step)
        # Add trajectory to the replay buffer
        replay_buffer.add_batch(traj)

        if traj.is_boundary():
            episode_counter += 1


if __name__ == '__main__':
    args = parser.parse_args()

    config = parameter_setup(args, DEFAULT_CONFIG)
    test_wfs, test_times, test_scores, test_size = wf_setup(config['wfs_name'])
    ttree, tdata, trun_times = test_wfs[0]

    real_env = Context(config['agent_task'], config['nodes'], trun_times,
                       ttree, tdata)
    environment = tf_py_environment.TFPyEnvironment(real_env)

    eval_real_env = Context(config['agent_task'], config['nodes'], trun_times,
                            ttree, tdata)
    eval_environment = tf_py_environment.TFPyEnvironment(eval_real_env)

    actor_net = actor_distribution_network.ActorDistributionNetwork(
        environment.observation_spec(),
        environment.action_spec(),
        fc_layer_params=(200, 100))