Esempio n. 1
0
def train_agent(agent_id, param_queue, reward_queue, adv_queue,
                gradient_queue):
    # model evaluation seed
    tf.set_random_seed(agent_id)

    # set up environment
    env = Environment()

    # gpu configuration
    config = tf.ConfigProto(
        device_count={'GPU': args.worker_num_gpu},
        gpu_options=tf.GPUOptions(
            per_process_gpu_memory_fraction=args.worker_gpu_fraction))

    sess = tf.Session(config=config)

    # set up actor agent
    actor_agent = ActorAgent(sess, args.node_input_dim, args.job_input_dim,
                             args.hid_dims, args.output_dim, args.max_depth,
                             range(1, args.exec_cap + 1))

    # collect experiences
    while True:
        # get parameters from master
        (actor_params, seed, max_time, entropy_weight) = \
            param_queue.get()

        # synchronize model
        actor_agent.set_params(actor_params)

        # reset environment
        env.seed(seed)
        env.reset(max_time=max_time)

        # set up storage for experience
        exp = {'node_inputs': [], 'job_inputs': [], \
               'gcn_mats': [], 'gcn_masks': [], \
               'summ_mats': [], 'running_dag_mat': [], \
               'dag_summ_back_mat': [], \
               'node_act_vec': [], 'job_act_vec': [], \
               'node_valid_mask': [], 'job_valid_mask': [], \
               'reward': [], 'wall_time': [],
               'job_state_change': []}

        try:
            # The masking functions (node_valid_mask and
            # job_valid_mask in actor_agent.py) has some
            # small chance (once in every few thousand
            # iterations) to leave some non-zero probability
            # mass for a masked-out action. This will
            # trigger the check in "node_act and job_act
            # should be valid" in actor_agent.py
            # Whenever this is detected, we throw out the
            # rollout of that iteration and try again.

            # run experiment
            obs = env.observe()
            done = False

            # initial time
            exp['wall_time'].append(env.wall_time.curr_time)

            while not done:

                node, use_exec = invoke_model(actor_agent, obs, exp)

                obs, reward, done = env.step(node, use_exec)

                if node is not None:
                    # valid action, store reward and time
                    exp['reward'].append(reward)
                    exp['wall_time'].append(env.wall_time.curr_time)
                elif len(exp['reward']) > 0:
                    # Note: if we skip the reward when node is None
                    # (i.e., no available actions), the sneaky
                    # agent will learn to exhaustively pick all
                    # nodes in one scheduling round, in order to
                    # avoid the negative reward
                    exp['reward'][-1] += reward
                    exp['wall_time'][-1] = env.wall_time.curr_time

            # report reward signals to master
            assert len(exp['node_inputs']) == len(exp['reward'])
            reward_queue.put(
                [exp['reward'], exp['wall_time'],
                 len(env.finished_job_dags),
                 np.mean([j.completion_time - j.start_time \
                          for j in env.finished_job_dags]),
                 env.wall_time.curr_time >= env.max_time])

            # get advantage term from master
            batch_adv = adv_queue.get()

            if batch_adv is None:
                # some other agents panic for the try and the
                # main thread throw out the rollout, reset and
                # try again now
                continue

            # compute gradients
            actor_gradient, loss = compute_actor_gradients(
                actor_agent, exp, batch_adv, entropy_weight)

            # report gradient to master
            gradient_queue.put([actor_gradient, loss])

        except AssertionError:
            # ask the main to abort this rollout and
            # try again
            reward_queue.put(None)
            # need to still get from adv_queue to
            # prevent blocking
            adv_queue.get()
Esempio n. 2
0
def train_agent(agent_id, param_queue, reward_queue, adv_queue, gradient_queue):
    # model evaluation seed
    global idxs
    tf.set_random_seed(agent_id)

    # set up environment
    env = Environment()

    # gpu configuration
    config = tf.ConfigProto(
        device_count={'GPU': args.worker_num_gpu},
        gpu_options=tf.GPUOptions(
            per_process_gpu_memory_fraction=args.worker_gpu_fraction))

    sess = tf.Session(config=config)

    # set up actor agent
    max_num = max(args.exec_level_num)
    exec_cpu = np.asarray(args.exec_cpus)
    exec_mem = np.asarray(args.exec_mems)
    type_num = exec_cpu
    exec_num = args.exec_level_num

    actor_agent = ActorAgent(
        sess, args.node_input_dim, args.job_input_dim,
        args.hid_dims, args.output_dim, args.max_depth,
        range(1, max_num + 1), type_num, exec_mem, exec_num)

    # collect experiences
    while True:

        # get parameters from master
        (actor_params, seed, max_time, entropy_weight) = \
            param_queue.get()

        # synchronize model
        actor_agent.set_params(actor_params)

        # reset environment
        env.seed(seed)
        env.reset(max_time=max_time)

        # set up storage for experience
        exp = {'node_inputs': [], 'job_inputs': [], \
               'gcn_mats': [], 'gcn_masks': [], \
               'summ_mats': [], 'running_dag_mat': [], \
               'dag_summ_back_mat': [], \
               'node_act_vec': [], 'job_act_vec': [], 'type_act_vec': [], \
               'node_valid_mask': [], 'job_valid_mask': [], 'type_valid_mask': [], \
               'reward': [], 'wall_time': [],
               'job_state_change': []}

        try:
            # The masking functions (node_valid_mask and
            # job_valid_mask in actor_agent.py) has some
            # small chance (once in every few thousand
            # iterations) to leave some non-zero probability
            # mass for a masked-out action. This will
            # trigger the check in "node_act and job_act
            # should be valid" in actor_agent.py
            # Whenever this is detected, we throw out the
            # rollout of that iteration and try again.

            # run experiment
            obs = env.observe()
            done = False

            # initial time
            exp['wall_time'].append(env.wall_time.curr_time)
            job_dags = obs[0]
            # print("1")

            while not done:

                node, use_exec, use_type = invoke_model(actor_agent, obs, exp)
                if node is None:
                    with open('result.txt', 'a', encoding='utf-8') as f:
                        f.writelines(str(idxs) + " 本次不执行调度。还剩" + str(len(job_dags)) + "个j0b。" + '\n')
                    idxs = idxs + 1
                else:
                    job_idx = job_dags.index(node.job_dag)
                    with open('result.txt', 'a', encoding='utf-8') as f:
                        f.writelines(str(idxs) + " 本次调度job" + str(job_idx) + "的" + str(node.idx) + "号node,分配" + str(
                            args.exec_cpus[
                                use_type]) + "核" + str(
                            args.exec_mems[use_type]) + "G执行器" + str(use_exec) + "个。还剩" + str(
                            len(job_dags)) + "个j0b。" + '\n')

                    idxs = idxs + 1

                obs, reward, done = env.step(node, use_exec, use_type)

                if node is not None:
                    # valid action, store reward and time
                    exp['reward'].append(reward)
                    exp['wall_time'].append(env.wall_time.curr_time)
                elif len(exp['reward']) > 0:
                    # Note: if we skip the reward when node is None
                    # (i.e., no available actions), the sneaky
                    # agent will learn to exhaustively pick all
                    # nodes in one scheduling round, in order to
                    # avoid the negative reward
                    exp['reward'][-1] += reward
                    exp['wall_time'][-1] = env.wall_time.curr_time

            # report reward signals to master
            # print(len(exp['node_inputs']))
            # print(len(exp['reward']))
            # print(len(exp['node_inputs']))
            # print(len(exp['reward']))
            assert len(exp['node_inputs']) == len(exp['reward'])
            # print("a")
            # print([exp['reward'], exp['wall_time'],
            #      len(env.finished_job_dags),
            #      np.mean([j.completion_time - j.start_time \
            #              for j in env.finished_job_dags]),
            #      env.wall_time.curr_time >= env.max_time])
            reward_queue.put(
                [exp['reward'], exp['wall_time'],
                 len(env.finished_job_dags),
                 np.mean([j.completion_time - j.start_time \
                          for j in env.finished_job_dags]),
                 env.wall_time.curr_time >= env.max_time])

            # get advantage term from master
            batch_adv = adv_queue.get()

            if batch_adv is None:
                # some other agents panic for the try and the
                # main thread throw out the rollout, reset and
                # try again now
                continue
            # print("yes")

            # print(batch_adv[0])
            # print(batch_adv[1009])

            # compute gradients
            actor_gradient, loss = compute_actor_gradients(
                actor_agent, exp, batch_adv, entropy_weight)

            # report gradient to master
            # print("loss")
            gradient_queue.put([actor_gradient, loss])

        except AssertionError:
            # ask the main to abort this rollout and
            # try again
            print(2)
            traceback.print_exc()
            reward_queue.put(None)
            # need to still get from adv_queue to
            # prevent blocking
            adv_queue.get()