Ejemplo n.º 1
0
def run_fourier_sarsa_experiments(transfer_episodes, transfer_epsilon, params):
    statistics = {"errors": [], "stopping_points": [], "utilities": []}

    filename = RESULTS_DIRECTORY + "fourier-sarsa-transfer-[{}]-[{}]-[{}].json".format(
        params["alpha"], params["epsilon"], params["order"])

    print("Training on {} with [increment = {}]".format(
        PROBLEM_DIRECTORY + PROBLEM_FILES[0][0], PROBLEM_FILES[0][1]))
    metareasoning_env = env.Environment(
        PROBLEM_DIRECTORY + PROBLEM_FILES[0][0], ALPHA, BETA,
        PROBLEM_FILES[0][1])
    prakhar = fourier_agent.Agent(metareasoning_env, params)
    prakhar.run_sarsa(statistics)

    for problem_file in PROBLEM_FILES[1:]:
        problem_file_path = PROBLEM_DIRECTORY + problem_file[0]
        increment = problem_file[1]

        params["episodes"] = transfer_episodes
        params["epsilon"] = transfer_epsilon

        print("Shifting to {} with [increment = {}]".format(
            problem_file_path, increment))
        metareasoning_env = env.Environment(problem_file_path, ALPHA, BETA,
                                            increment)
        prakhar = fourier_agent.Agent(
            metareasoning_env, params, prakhar.function_approximator.weights,
            prakhar.function_approximator.action_value_function)
        prakhar.run_sarsa(statistics)

    utils.save(filename, statistics)

    return utils.get_results(statistics["errors"], WINDOW_SIZE,
                             PLOT_WINDOW_SIZE)
def main():
    np.random.seed(42)

    os.system('rm ' + TEST_LOG_PATH)

    ta_q = Tabular_Q()

    all_cooked_time, all_cooked_bw, _ = load_trace.load_trace()

    epoch = 0
    time_stamp = 0

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    last_bit_rate = DEFAULT_QUALITY
    bit_rate = DEFAULT_QUALITY

    state = [0, 0, 0, 0]

    while True:

        delay, sleep_time, buffer_size, rebuf, \
        video_chunk_size, next_video_chunk_sizes, \
        end_of_video, video_chunk_remain = \
            net_env.get_video_chunk(bit_rate)

        time_stamp += delay  # in ms
        time_stamp += sleep_time  # in ms

        reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                 - REBUF_PENALTY * rebuf \
                 - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                           VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

        epoch += 1

        bw = float(video_chunk_size) / float(
            delay) / M_IN_K * BITS_IN_BYTE  # Mbit/sec
        bw = min(int(bw / D_BW) * D_BW, BW_MAX)
        bf = min(int(buffer_size / D_BF) * D_BF, BF_MAX)
        br = bit_rate
        c = min(video_chunk_remain, N_CHUNK - 1)
        next_state = [bw, bf, br, c]

        ta_q.train_q(state, bit_rate, reward, next_state, end_of_video)

        state = next_state
        last_bit_rate = bit_rate

        bit_rate = ta_q.get_q_action(state)

        if end_of_video:
            last_bit_rate = DEFAULT_QUALITY
            bit_rate = DEFAULT_QUALITY
            state = [0, 0, 0, 0]

        if epoch % TEST_INTERVAL == 0:
            testing(ta_q, epoch)
            np.save(TEST_LOG_PATH + '_q_table.npy', ta_q.q_table)
Ejemplo n.º 3
0
def main():

    agents = []
    for i in range(NUM_AGENTS):
        actions = ['defect', 'cooperate']
        localRewards = [
            np.random.randint(0, high=100),
            np.random.randint(0, high=100)
        ]
        agent = env.CitizenAgent(actions, localRewards, i)
        agents.append(agent)
    leader = env.LeaderAgent(agents)
    testEnv = env.Environment(agents, leader, globalRewardFunc)

    for _ in range(NUM_EPOCHS):
        for _ in range(NUM_STEPS):
            actions = testEnv.getActions()
            globalReward = testEnv.getRewards(actions)
            testEnv.updateQ(globalReward)

        leader.penalize()

    testEnv.printAgents()

    #test stage
    actions = testEnv.getActions()
    globalReward = testEnv.getRewards(actions)
    print("Global Reward: " + str(globalReward))

    return 0
Ejemplo n.º 4
0
 def setUp(self):
     self.src = source.StringSource()
     self.lex = lexer.Lexer(self.src)
     self.environment = env.Environment()
     self.parser = pars.Parser(self.lex, self.environment)
     self.program = interpreter.Interpreter(self.environment, self.parser)
     self.output = io.StringIO()
     sys.stdout = self.output
Ejemplo n.º 5
0
def main():
    src = source.StreamSource()
    fo = open("ThirdExample", "r", encoding='utf-8', newline='\n')
    environment = env.Environment()
    src.set_data(fo)
    lex = lexer.Lexer(src)
    parser = pars.Parser(lex, environment)
    inter = interpreter.Interpreter(environment, parser)
    inter.interpret()
Ejemplo n.º 6
0
def execute_file(file_name, existing_env=None):
    if existing_env is None:
        existing_env = environment.Environment()
    file_name = existing_env.set_correct_directory(file_name)
    if not existing_env.is_already_imported(file_name):
        existing_env.add_import(file_name)
        try:
            execute_program(file_to_str(file_name), existing_env)
        except IOError:
            print('Could not read file: ' + file_name)
Ejemplo n.º 7
0
 def __init__(self):
     self._init_hyperparameters()
     self.env = env.Environment()
     self.obs_dim = self.env.observation_shape
     self.act_dim = self.env.action_space.shape[0]
     self.actor = self.create_model(self.act_dim)
     self.actor.compile(optimizer=Adam(learning_rate=self.lr))                                                  
     self.critic = self.create_model(1)
     self.critic.compile(optimizer=Adam(learning_rate=self.lr))
     
     self.con_mat = tf.eye(self.act_dim)*0.500
Ejemplo n.º 8
0
def main():
    params = {
        "episodes": 2000,
        "batch_size": 10,
        "gamma": 1,
        "learning_rate": 0.01
    }
    metareasoning_env = env.Environment('problems/test.json', 200, 0.3, 1)
    agent = Agent(params, metareasoning_env)

    statistics = {"stopping_points": [], "utilities": []}
    agent.run_reinforce(statistics)
Ejemplo n.º 9
0
    def __init__(self, random_seed=RANDOM_SEED):
        np.random.seed(RANDOM_SEED)
        self.action_space = spaces.Discrete(A_DIM)
        self.observation_space = spaces.Box(0,
                                            10.0, [S_INFO, S_LEN],
                                            dtype=np.float32)
        all_cooked_time, all_cooked_bw, _ = load_trace.load_trace()
        self.net_env = env.Environment(all_cooked_time=all_cooked_time,
                                       all_cooked_bw=all_cooked_bw)

        self.last_bit_rate = DEFAULT_QUALITY
        self.state = np.zeros((S_INFO, S_LEN))
        self.reset()
Ejemplo n.º 10
0
def repl(existing_env=None, get_input=None):
    """
    Read-eval-print loop. Whole programs can be run by using
    the ':program' directive, ending with ':end'.
    Use the 'this' keyword to see the current environment frames.
    """
    if existing_env is None:
        env = environment.Environment()
    else:
        env = existing_env
    if get_input is None:
        get_input = raw_input
    brace_matcher = prepare_program.BraceMatcher()
    while True:
        brace_matcher.reset()
        expr = get_input('Capacita> ')
        expr = expr.strip()
        if expr == 'exit()':
            break
        elif len(expr) == 0:
            continue
        elif expr == ':program':
            prgm = store_program(get_input)
            execute_program(prgm)
        elif expr == ':code':
            prgm = store_program(get_input)
            execute_program(prgm, env)
        elif expr.startswith('when ') or is_clause_opener(expr) or \
             not brace_matcher.match_line(expr).is_complete():
            prgm = store_code_block(get_input, expr, brace_matcher)
            if prgm.rstrip('\n').count('\n') == 0 and \
               not line_manager.is_statement(prgm):
                print_evaluated_expr(prgm, env)
            else:
                execute_program(prgm, env)
        elif expr == 'this':
            print(env.frames)
        else:
            # Since expr could contain semicolon-separated lines of code,
            # extract all the lines:
            line_mgr, _ = convert_program_to_lines(expr)
            line_mgr.classify_statements()
            if len(line_mgr) > 1:
                leading_lines = line_mgr[:-1]
                execution.execute_lines(leading_lines, env)
            last_expr_data = line_mgr.get_line_data(-1)
            last_expr = last_expr_data.line
            if last_expr_data.is_statement:
                execution.execute_statement(last_expr_data, False, env)
            else:
                print_evaluated_expr(last_expr, env)
Ejemplo n.º 11
0
def run_fourier_q_learning_experiments(params):
    statistics = {"errors": [], "stopping_points": [], "utilities": []}

    filename = RESULTS_DIRECTORY + "fourier-q-[{}]-[{}]-[{}]-{}".format(
        params["alpha"], params["epsilon"], params["order"], PROBLEM_FILE)

    metareasoning_env = env.Environment(PROBLEM_FILE_PATH, ALPHA, BETA,
                                        INCREMENT)
    prakhar = fourier_agent.Agent(params, metareasoning_env)
    prakhar.run_q_learning(statistics)

    utils.save(filename, statistics)

    return utils.get_results(statistics["errors"], WINDOW_SIZE,
                             PLOT_WINDOW_SIZE)
Ejemplo n.º 12
0
    def __init__(self, random_seed=RANDOM_SEED):
        np.random.seed(RANDOM_SEED)
        self.action_space = spaces.Box(low=0.,
                                       high=60.,
                                       shape=(2, ),
                                       dtype=np.float32)
        self.observation_space = spaces.Box(0,
                                            10.0, (S_LEN * S_INFO, ),
                                            dtype=np.float32)
        all_cooked_time, all_cooked_bw, _ = load_trace.load_trace()
        self.net_env = env.Environment(all_cooked_time=all_cooked_time,
                                       all_cooked_bw=all_cooked_bw)

        self.last_bit_rate = DEFAULT_QUALITY
        self.buffer_size = 0.
        self.state = np.zeros((S_INFO, S_LEN))
        self.reset()
Ejemplo n.º 13
0
def test():
    metareasoning_env = env.Environment(PROBLEM_FILE_PATH, ALPHA, BETA,
                                        INCREMENT)

    quality, time = metareasoning_env.reset()
    qualities = [quality]

    utility = utils.get_time_dependent_utility(quality, time, ALPHA, BETA)
    utilities = [utility]

    while True:
        (quality, time), _, is_episode_done = metareasoning_env.step(
            metareasoning_env.CONTINUE_ACTION)

        qualities.append(quality)
        utilities.append(
            utils.get_time_dependent_utility(quality, time, ALPHA, BETA))

        if is_episode_done:
            break

    plt.figure(figsize=(7, 3))
    plt.rcParams["font.family"] = "Times New Roman"
    plt.rcParams["font.size"] = 14
    plt.rcParams["grid.linestyle"] = "-"
    plt.xlabel("Steps")
    plt.ylabel("Utilities")
    plt.grid(True)

    axis = plt.gca()
    axis.spines["top"].set_visible(False)
    # axis.set_xlim([0, 2 * utilities.index(max(utilities))])
    # axis.set_ylim([utilities[0], 1.05 * max(utilities)])

    plt.plot(range(len(utilities)), utilities, color="r")
    plt.tight_layout()
    plt.show()
Ejemplo n.º 14
0
def main():
    """Main function - includes tests and runs the REPL."""
    argc = len(sys.argv)
    if argc > 1:
        first_arg = sys.argv[1]
        if first_arg == '--test':
            env = environment.Environment()
            execution.execute_statement('x = 3', env)
            execution.execute_statement('x+=7', env)
            execution.execute_statement('y=9.23', env)
            env.new_frame()
            execution.execute_statement('x = 5', env)
            print(env.frames)
            execution.execute_statement('z="hello world"', env)
            execution.execute_statement('z +="!!!"', env)
            execution.execute_statement('a= `gelatin`', env)
            print(env.frames)
            ast = AST("3*4+5 ^ 7")
            print(ast.parse())
            print(ast.collapse_indices(ast.build_indices()))
            ast = AST("18+15*9:3+10")
            print(ast.parse())
            print(ast.collapse_indices(ast.build_indices()))

            print(
                execution.evaluate_expression('1+2+3+4',
                                              environment.Environment()))
            print(
                execution.evaluate_expression('45+7*8',
                                              environment.Environment()))
            print(
                execution.evaluate_expression('3.2+18^2-7',
                                              environment.Environment()))
            print(
                execution.evaluate_expression('1:2 + 1:3 + 1:5',
                                              environment.Environment()))
            print(
                execution.evaluate_expression('2:3 + 3^3 - 1:5',
                                              environment.Environment()))
            print(
                execution.evaluate_expression('1234',
                                              environment.Environment()))

            ast = AST("3 + 1 == 4")
            print(ast.parse())
            ast = AST("3 + 1 > 4")
            print(ast.parse())
            ast = AST("18:1 != 18.2")
            print(ast.parse())
            ast = AST("x = 4")
            print(ast.parse())
            ast = AST("y = 3 > 4")
            print(ast.parse())

            env2 = environment.Environment()
            execution.execute_statement('x = 3+5*4', env2)
            execution.execute_statement('y = x + 19 - 3*6', env2)
            print(env2.frames)
        elif first_arg == '--test2':
            ast = AST('x = "ice cream, eggs, and milk" + "...alpha or beta"')
            print(ast.parse())
            ast = AST('y = f(1 + 1, 2 + 2, 3 + 3) - g((9+7)*2, 128/(2+2))')
            print(ast.parse())
            ast = AST(
                'z = f("ice cream", "eggs and milk") * g("alpha or beta", 3:8, "gamma or delta")'
            )
            print(ast.parse())
            ast = AST('makeList(1,2,3) + makeList(4,5,6)')
            print(ast.parse())
            ast = AST('[max(16, 25), max(36, max(49, 64))]')
            print(ast.parse())
            ast = AST('[concat_lists([10], [20]), concat_lists([30], [40])]')
            print(ast.parse())
        elif first_arg == '--test3':
            ast = AST('[1, 2, 3]')
            print(ast.split_list_elems())
            ast = AST('[f(2), f(3), f(4)]')
            print(ast.split_list_elems())
            ast = AST('[f(2, 3), f(3, 4, 5), f(4, 1)]')
            print(ast.split_list_elems())
            ast = AST('1 + 2 * 3')
            print(ast.split_list_elems())
            print(ast.parse())
        elif first_arg == '--test4':
            ast = AST('x.length()')
            print(ast.parse())
            ast = AST('[1,2,3].length()')
            print(ast.parse())
            ast = AST('3.01')
            print(ast.parse())
            ast = AST('3.1')
            print(ast.parse())
        elif first_arg == '--test5':
            env = environment.Environment()
            env.new_type(['Number'], 'ComplexNumber')
            c = {'$type': 'ComplexNumber', 'real': 1, 'imag': 2}
            print(env.value_is_a(c, 'ComplexNumber'))
            print(env.value_is_a(c, 'Number'))
            print(env.value_is_a(c, 'Int'))
            print("")
            env.new_type(['Object'], 'Food')
            env.new_type(['Food'], 'Pizza')
            env.new_type(['Food'], 'Dessert')
            env.new_type(['Dessert'], 'ChocolateItem')
            env.new_type(['Pizza'], 'PepperoniPizza')
            env.new_type(['Pizza', 'ChocolateItem'], 'ChocolatePizza')
            pepperoni_pizza = {'$type': 'PepperoniPizza'}
            chocolate_pizza = {'$type': 'ChocolatePizza'}
            print(env.value_is_a(pepperoni_pizza, 'PepperoniPizza'))
            print(env.value_is_a(pepperoni_pizza, 'Pizza'))
            print(env.value_is_a(pepperoni_pizza, 'Food'))
            print(env.value_is_a(pepperoni_pizza, 'Dessert'))
            print(env.value_is_a(pepperoni_pizza, 'ChocolateItem'))
            print("")
            print(env.value_is_a(chocolate_pizza, 'PepperoniPizza'))
            print(env.value_is_a(chocolate_pizza, 'Pizza'))
            print(env.value_is_a(chocolate_pizza, 'Food'))
            print(env.value_is_a(chocolate_pizza, 'Dessert'))
            print(env.value_is_a(chocolate_pizza, 'ChocolateItem'))
            print("")
            env.new_type(['ChocolatePizza'], 'HugeChocolatePizza')
            huge_chocolate_pizza = {'$type': 'HugeChocolatePizza'}
            print(env.value_is_a(huge_chocolate_pizza, 'PepperoniPizza'))
            print(env.value_is_a(huge_chocolate_pizza, 'Pizza'))
            print(env.value_is_a(huge_chocolate_pizza, 'Food'))
            print(env.value_is_a(huge_chocolate_pizza, 'Dessert'))
            print(env.value_is_a(huge_chocolate_pizza, 'ChocolateItem'))
            print(env.value_is_a(huge_chocolate_pizza, 'ChocolatePizza'))
            print("")
        elif first_arg == '--test6':
            ast = AST('{1, 2 | 3, 4}')
            print(ast.parse())
        elif first_arg == '--test7':
            ast = AST('throw "something"')
            print(ast.parse())
        elif first_arg == '--test8':
            ast = AST('true and not false')
            print(ast.parse())
            print(ast.collapse_indices(ast.build_indices()))
        elif first_arg == '--test9':
            sample = """
                x = 5 // comment
                // comment
                /* multi
                line
                comment
                */y = 6
                z = "https://example.com"
            """
            print(prepare_program.preprocess(sample))
        elif first_arg == '--test10':
            ast = AST('-3.0e5 + 186e-20 * 1e-6 / 28.8e+6 + 34.4e+99')
            print(ast.parse())
            ast = AST('-3.0E5 + 186E-20 * 1E-6 / 28.8e+6 + 34.4E+99')
            print(ast.parse())
        elif first_arg == '--test11':
            print(execution.is_assignment_statement('a = 5'))
            print(execution.is_assignment_statement('a=5==6'))
            print(execution.is_assignment_statement('not (5==6) and (8>=7)'))
            print(execution.is_assignment_statement('z='))
        elif first_arg == '--test12':
            lines = [
                'sub this + that', 'func Int x + this', 'func x + this',
                'func this * y', 'func Int -this', 'sub -this', 'sub not this',
                'sub Boolean not this', 'sub this-b', 'sub b-this',
                'func Int-this', 'func Int- this', 'sub Int - this'
            ]
            print(prepare_program.replace_op_overload_syntax(lines))
        elif first_arg == '--test-tree-merge':
            tests.test_tree_merge()
        elif first_arg == '--test-all':
            tests.test_all('capacita_programs')
        elif first_arg == '--test-all-fast':
            tests.test_all('capacita_programs', has_delay=False)
        elif first_arg == '--test-repl':
            tests.test_all('capacita_programs', has_delay=True, use_repl=True)
        elif first_arg == '--test-repl-fast':
            tests.test_all('capacita_programs', has_delay=False, use_repl=True)
        elif first_arg == '--test-file' and argc > 2:
            if argc == 4 and sys.argv[2] == '--repl':
                tests.test_file(sys.argv[3], use_repl=True)
            else:
                tests.test_file(sys.argv[2], use_repl=False)
        else:
            # Run a program from a text file:
            file_name = first_arg
            execute_file(file_name)
        exit()
    repl()
Ejemplo n.º 15
0
def agent(agent_id, net_params_queue, exp_queue):

    net_env = env.Environment(random_seed=agent_id,
                              fixed_env=False,
                              trace_folder=TRAIN_TRACES)

    with tf.Session() as sess, open(LOG_FILE + '_agent_' + str(agent_id),
                                    'wb') as log_file:
        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)
        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        # initial synchronization of the network parameters from the coordinator
        actor_net_params, critic_net_params = net_params_queue.get()
        actor.set_network_params(actor_net_params)
        critic.set_network_params(critic_net_params)

        mask = net_env.video_masks[net_env.video_idx]

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action = bitrate_to_action(bit_rate, mask)
        last_action = action

        action_vec = np.zeros(np.sum(mask))
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        time_stamp = 0
        while True:  # experience video streaming forever

            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, buffer_size, \
                rebuf, video_chunk_size, end_of_video, \
                video_chunk_remain, video_num_chunks, \
                next_video_chunk_size, mask = \
                net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            reward = VIDEO_BIT_RATE[action] / M_IN_K \
                     - REBUF_PENALTY * rebuf \
                     - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[action] -
                                               VIDEO_BIT_RATE[last_action]) / M_IN_K

            r_batch.append(reward)

            last_bit_rate = bit_rate
            last_action = action

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[action] / float(
                np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR
            state[2, -1] = float(video_chunk_size) / float(
                delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K
            state[4, -1] = video_chunk_remain / float(video_num_chunks)
            state[5, :] = -1
            nxt_chnk_cnt = 0
            for i in xrange(A_DIM):
                if mask[i] == 1:
                    state[5, i] = next_video_chunk_size[nxt_chnk_cnt] / M_IN_B
                    nxt_chnk_cnt += 1
            assert (nxt_chnk_cnt) == np.sum(mask)
            state[6, -A_DIM:] = mask

            # compute action probability vector
            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))

            # the action probability should correspond to number of bit rates
            assert len(action_prob[0]) == np.sum(mask)

            action_cumsum = np.cumsum(action_prob)
            bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) /
                        float(RAND_RANGE)).argmax()
            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states

            action = bitrate_to_action(bit_rate, mask)

            entropy_record.append(a3c.compute_entropy(action_prob[0]))

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write(
                str(time_stamp) + '\t' + str(VIDEO_BIT_RATE[action]) + '\t' +
                str(buffer_size) + '\t' + str(rebuf) + '\t' +
                str(video_chunk_size) + '\t' + str(delay) + '\t' +
                str(reward) + '\n')
            log_file.flush()

            # report experience to the coordinator
            if len(r_batch) >= TRAIN_SEQ_LEN or end_of_video:
                exp_queue.put([
                    s_batch[1:],  # ignore the first chuck
                    a_batch[1:],  # since we don't have the
                    r_batch[1:],  # control over it
                    end_of_video,
                    {
                        'entropy': entropy_record
                    }
                ])

                # synchronize the network parameters from the coordinator
                actor_net_params, critic_net_params = net_params_queue.get()
                actor.set_network_params(actor_net_params)
                critic.set_network_params(critic_net_params)

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]
                del entropy_record[:]

                log_file.write(
                    '\n')  # so that in the log we know where video ends

            # store the state and action into batches
            if end_of_video:
                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                action = bitrate_to_action(bit_rate, mask)
                last_action = action
                action_vec = np.zeros(np.sum(mask))
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)

            else:
                s_batch.append(state)

                action_vec = np.zeros(np.sum(mask))
                action_vec[bit_rate] = 1
                a_batch.append(action_vec)
Ejemplo n.º 16
0
import json
from collections import defaultdict

import git

import env
import log
from base import ProcessName, CommitBuilder, iter_tree, iter_process_names
from repos import pygit2_get

logger = log.get_logger(__name__)
env = env.Environment()


class Index(object):
    name = None
    key_fields = ()
    unique = False
    value_cls = tuple

    # Overridden in subclasses using the constructor
    # This provides a kind of borg pattern where all instances of
    # the class have the same changes data
    changes = None

    def __init__(self, repo):
        if self.__class__.changes is None:
            self.reset()
        self.repo = repo
        self.pygit2_repo = pygit2_get(repo)
def main():

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(
        TEST_TRACES)

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'w')

    actor = a2c_torch.ActorNet(s_dim=[S_INFO, S_LEN],
                               a_dim=A_DIM,
                               lr=ACTOR_LR_RATE)

    critic = a2c_torch.CriticNet(s_dim=[S_INFO, S_LEN], lr=CRITIC_LR_RATE)

    # restore neural net parameters
    if NN_MODEL is not None:  # NN_MODEL is the path to file
        # saver.restore(sess, NN_MODEL)
        print(NN_MODEL)
        actor.load_state_dict(torch.load(NN_MODEL))
        print("Testing model restored.")

    time_stamp = 0

    last_bit_rate = DEFAULT_QUALITY
    bit_rate = DEFAULT_QUALITY

    action_vec = np.zeros(A_DIM)
    action_vec[bit_rate] = 1

    s_batch = [np.zeros((S_INFO, S_LEN))]
    a_batch = [action_vec]
    r_batch = []
    entropy_record = []

    video_count = 0

    while True:  # serve video forever
        # the action is from the last decision
        # this is to make the framework similar to the real
        delay, sleep_time, buffer_size, rebuf, \
        video_chunk_size, next_video_chunk_sizes, \
        end_of_video, video_chunk_remain = \
            net_env.get_video_chunk(bit_rate)

        time_stamp += delay  # in ms
        time_stamp += sleep_time  # in ms

        # reward is video quality - rebuffer penalty - smoothness
        reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                    - REBUF_PENALTY * rebuf \
                    - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                            VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

        r_batch.append(reward)

        last_bit_rate = bit_rate

        # log time_stamp, bit_rate, buffer_size, reward
        log_file.write(
            str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) +
            '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' +
            str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) +
            '\n')
        log_file.flush()

        # retrieve previous state
        if len(s_batch) == 0:
            state = [np.zeros((S_INFO, S_LEN))]
        else:
            state = np.array(s_batch[-1], copy=True)

        # dequeue history record
        state = np.roll(state, -1, axis=1)

        # this should be S_INFO number of terms
        state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(
            np.max(VIDEO_BIT_RATE))  # last quality
        state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
        state[2, -1] = float(video_chunk_size) / float(
            delay) / M_IN_K  # kilo byte / ms
        state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
        state[4, :A_DIM] = np.array(
            next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
        state[5, -1] = np.minimum(
            video_chunk_remain,
            CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

        _, _, action_prob = actor.get_actor_out(
            convert_torch(np.reshape(state, (1, S_INFO, S_LEN))))
        action_prob = action_prob.numpy()
        action_cumsum = np.cumsum(action_prob)
        bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) /
                    float(RAND_RANGE)).argmax()

        # action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
        # action_cumsum = np.cumsum(action_prob)
        # print('action:', action_cumsum)
        # bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax()
        # bit_rate = np.argmax(action_prob)
        # Note: we need to discretize the probability into 1/RAND_RANGE steps,
        # because there is an intrinsic discrepancy in passing single state and batch states

        s_batch.append(state)

        entropy_record.append(a2c_torch.compute_entropy(action_prob[0]))

        if end_of_video:
            log_file.write('\n')
            log_file.close()

            last_bit_rate = DEFAULT_QUALITY
            bit_rate = DEFAULT_QUALITY  # use the default action here

            del s_batch[:]
            del a_batch[:]
            del r_batch[:]

            action_vec = np.zeros(A_DIM)
            action_vec[bit_rate] = 1

            s_batch.append(np.zeros((S_INFO, S_LEN)))
            a_batch.append(action_vec)
            entropy_record = []

            video_count += 1

            if video_count >= len(all_file_names):
                break

            log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
            log_file = open(log_path, 'w')
Ejemplo n.º 18
0
def local_train(index, args, global_model, actor_optimizer, critic_optimizer, save=False):

    torch.manual_seed(614 + index)
    if save:
        start_time = timeit.default_timer()
    writer = SummaryWriter(args.log_path)

    all_cooked_time, all_cooked_bw, _ = load_trace.load_trace(args.train_traces)
    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw,
                              random_seed=index)

    local_model = a3c.ActorCritic(state_dim=[S_INFO, S_LEN],
                                action_dim=A_DIM,
                                learning_rate=[ACTOR_LR_RATE, CRITIC_LR_RATE],
                                islstm = args.islstm)

    # local_model = a3c.A3C(state_dim=[S_INFO, S_LEN],
    #                             action_dim=A_DIM,
    #                             learning_rate=[ACTOR_LR_RATE, CRITIC_LR_RATE])
    local_model.train()
    local_model._initialize_weights()
    if args.use_gpu:
        local_model.cuda()


    done          = True
    curr_step     = 0
    curr_episode  = 0
    last_bit_rate = DEFAULT_QUALITY
    bit_rate      = DEFAULT_QUALITY
    time_stamp    = 0

    interval_aloss   = 0
    interval_closs   = 0
    interval_entropy = 0
    interval_reward  = []

    sum_reward   = 0
    count_reware = 0
    while True:
        curr_episode += 1
        local_model.load_state_dict(global_model.state_dict())
        state = torch.zeros(S_INFO, S_LEN)
        if done:
            cx = torch.zeros(1, 128)
            hx = torch.zeros(1, 128)
        else:
            cx = cx.detach()
            hx = hx.detach()

        if args.use_gpu:
            state = state.cuda()
        log_policies = []
        values       = []
        rewards      = []
        entropies    = []

        # One video
        while True:
            delay, sleep_time, buffer_size, rebuf, \
            video_chunk_size, next_video_chunk_sizes, \
            end_of_video, video_chunk_remain = \
                net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            # -- linear reward --
            # reward is video quality - rebuffer penalty - smoothness
            reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                     - REBUF_PENALTY * rebuf \
                     - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                               VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K
            # -- log scale reward --
            # log_bit_rate = np.log(VIDEO_BIT_RATE[bit_rate] / float(VIDEO_BIT_RATE[-1]))
            # log_last_bit_rate = np.log(VIDEO_BIT_RATE[last_bit_rate] / float(VIDEO_BIT_RATE[-1]))

            # reward = log_bit_rate \
            #          - REBUF_PENALTY * rebuf \
            #          - SMOOTH_PENALTY * np.abs(log_bit_rate - log_last_bit_rate)

            # -- HD reward --
            # reward = HD_REWARD[bit_rate] \
            #          - REBUF_PENALTY * rebuf \
            #          - SMOOTH_PENALTY * np.abs(HD_REWARD[bit_rate] - HD_REWARD[last_bit_rate])


            last_bit_rate = bit_rate


            state = torch.roll(state, -1)

            # Fill in the state vector with normalization
            state[0, -1] = torch.Tensor([VIDEO_BIT_RATE[last_bit_rate] / float(max(VIDEO_BIT_RATE))])  # last quality
            state[1, -1] = torch.Tensor([buffer_size / BUFFER_NORM_FACTOR])  # buffer size
            state[2, -1] = torch.Tensor([float(video_chunk_size) / float(delay) / M_IN_K])  # kilo byte / ms
            state[3, -1] = torch.Tensor([float(delay) / M_IN_K / BUFFER_NORM_FACTOR])  # /10 sec
            state[4, :A_DIM] = torch.Tensor([next_video_chunk_sizes]) / M_IN_K / M_IN_K  # mega byte
            # remaining chunk number
            state[5, -1] = torch.Tensor([min(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)])
            if args.islstm == 0:
                logits, value = local_model(state.unsqueeze(dim=0))
            else:
                logits, value, hx, cx = local_model((state.unsqueeze(dim=0),hx,cx))
            # print(f"index {index}, state {state}, logits {logits}, value {value}",sep="\n")
            # print(state,logits)
            try:
                cate         = Categorical(logits)
                bit_rate     = cate.sample().item()
            except Exception as e:
                print(e)
                print(f"walking into an error of all null distribution in step {curr_step}")
                print(logits, state)
                exit()
            policy       = logits
            log_policy   = torch.log(logits)
            entropy      = (policy * log_policy).sum(1, keepdim=True)

            if curr_step > args.num_global_steps:
                done = True

            curr_step += 1
            values.append(value)
            rewards.append(reward)
            log_policies.append(log_policy[0, bit_rate])
            entropies.append(entropy)

            if end_of_video:
                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                break

        score = torch.zeros((1, 1), dtype=torch.float)
        if args.use_gpu:
            score = score.cuda()
        if not done:
            _, score = local_model(state.unsqueeze(dim=0))

        gae = torch.zeros((1, 1), dtype=torch.float)
        if args.use_gpu:
            gae = gae.cuda()
        actor_loss   = 0
        critic_loss  = 0
        entropy_loss = 0
        next_value   = score

        # for value, log_policy, reward, entropy in list(zip(values, log_policies, rewards, entropies))[::-1]:
        #     gae = gae * args.gamma * args.tau
        #     gae = gae + reward + args.gamma * next_value.detach() - value.detach()
        #     next_value = value
        #     actor_loss = actor_loss + log_policy * gae
        #     score = score * args.gamma + reward
        #     critic_loss = critic_loss + (score - value) ** 2 / 2
        #     entropy_loss = entropy_loss + entropy

        for value, log_policy, reward, entropy in list(zip(values, log_policies, rewards, entropies))[::-1]:
            gae = gae * args.gamma * args.tau
            gae = gae + reward + args.gamma * next_value.detach() - value.detach()
            next_value = value
            actor_loss = actor_loss + log_policy * gae
            score = score * args.gamma + reward
            critic_loss = critic_loss + (score - value) ** 2 / 2
            entropy_loss = entropy_loss + entropy

        entropy_loss = args.beta * (entropy_loss )
        actor_loss   = -actor_loss + args.beta * entropy_loss
        # total_loss   = -actor_loss + critic_loss - entropy_loss
        writer.add_scalar("Train_{}/Loss".format(index), actor_loss, critic_loss, curr_episode)
        actor_optimizer.zero_grad()
        critic_optimizer.zero_grad()
        actor_loss.backward()
        critic_loss.backward()

        # gradient clipping
        torch.nn.utils.clip_grad_norm_(global_model.parameters(), args.max_grad_norm)
        # total_loss.backward()
        # (-critic_loss).backward()
        # (actor_loss+args.beta*entropy_loss).backward()

        for local_param, global_param in zip(local_model.parameters(), global_model.parameters()):
            if global_param.grad is not None:
                break
            global_param._grad = local_param.grad


        actor_optimizer.step()
        critic_optimizer.step()

        interval_aloss   += actor_loss.data.item()
        interval_closs   += critic_loss.data.item()
        interval_entropy += entropy_loss.data.item()
        interval_reward.append(np.sum(rewards))

        if curr_episode % print_interval == 0 :
            print("---------")
            print(f"Process {index}, episode {curr_episode}\n"+
                f"actor_loss [{interval_aloss/print_interval:4f}]  "
                f"critic_loss [{interval_closs/print_interval:4f}]  "
                f"entropy [{interval_entropy/print_interval:4f}]\n"
                f"reward [{interval_reward}]")

            if save and curr_episode % args.save_interval == 0 and curr_episode > 0:
                torch.save(global_model.state_dict(),
                           f"{args.saved_path}/a3c_{curr_episode}_reward_{sum_reward/count_reware:4f}.pkl")
            sum_reward += np.sum(interval_reward)
            count_reware += 1
            interval_aloss   = 0
            interval_closs   = 0
            interval_entropy = 0
            interval_reward  = []


        if curr_episode == int(args.num_global_steps / args.num_local_steps):
            print("Training process {} terminated".format(index))
            if save:
                end_time = timeit.default_timer()
                print('The code runs for %.2f s ' % (end_time - start_time))
            return
Ejemplo n.º 19
0
                        c_ent=opts["c_ent"])

# OPTIONAL, LOAD AGENT
if "load" in opts.keys():
    model.load(path=opts["load"], ext="_last")

# INITIALIZE ROSNODE
rospy.init_node("training_node", anonymous=True)
rate = rospy.Rate(100)
rospy.sleep(1.0)
robot = torobo_wrapper.Torobo()
rospy.sleep(1.0)

# INITIALIZE ENVIRONMENT
world = env.Environment(robot=robot,
                        objects=opts["objects"],
                        rng_ranges=opts["ranges"])
rospy.sleep(0.5)
world.initialize()

print("=" * 10 + "POLICY NETWORK" + "=" * 10)
print(model.policy)
print("=" * 10 + "VALUE NETWORK" + "=" * 10)
print(model.value)
print("Training starts...")
reward_history = []
temp_history = []
it = 0
update_count = 0
while update_count < opts["episode"]:
Ejemplo n.º 20
0
random_seed = 2
video_count = 0
FPS = 25
frame_time_len = 0.04
#init the environment
#setting one:
#     1,all_cooked_time : timestamp
#     2,all_cooked_bw   : throughput
#     3,all_cooked_rtt  : rtt
#     4,agent_id        : random_seed
#     5,logfile_path    : logfile_path
#     6,VIDEO_SIZE_FILE : Video Size File Path
#     7,Debug Setting   : Debug
net_env = env.Environment(all_cooked_time=all_cooked_time,
                          all_cooked_bw=all_cooked_bw,
                          random_seed=random_seed,
                          logfile_path=LogFile_Path,
                          VIDEO_SIZE_FILE=video_size_file,
                          Debug=DEBUG)

BIT_RATE = [500.0, 850.0, 1200.0, 1850.0]  # kpbs
TARGET_BUFFER = [2.0, 3.0]  # seconds
# ABR setting
RESEVOIR = 0.5
CUSHION = 2

cnt = 0
# defalut setting
last_bit_rate = 0
bit_rate = 0
target_buffer = 0
# QOE setting
Ejemplo n.º 21
0
def agent(agent_id, all_cooked_time, all_cooked_bw, net_params_queue,
          exp_queue, epoch_queue):
    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw,
                              random_seed=agent_id)

    with tf.Session() as sess, open(LOG_FILE + '_agent_' + str(agent_id),
                                    'wb') as log_file:
        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)
        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        # 1.从center同步最新的模型参数 initial synchronization of the network parameters from the coordinator
        actor_net_params, critic_net_params = net_params_queue.get()
        epoch_num = epoch_queue.get()
        actor.set_network_params(actor_net_params)
        critic.set_network_params(critic_net_params)

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)  #初始化 动作空间A个actions
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        time_stamp = 0
        while True:  # experience video streaming forever

            # 和环境Env交互 the action is from the last decision
            # this is to make the framework similar to the real
            # delay, sleep_time, buffer_size, rebuf, \
            # video_chunk_size, next_video_chunk_sizes, \
            # end_of_video, video_chunk_remain = \
            #     net_env.get_video_chunk(bit_rate)

            assert bit_rate >= 0
            assert bit_rate < A_DIM
            bitrate_send_last, lossrate_recv_last, bitrate_real_recovery,\
            bitrate_send_last_probe, lossrate_recv_last_probe, bitrate_real_recovery_probe,\
             end_of_video \
            = net_env.action_dispatch_and_report_svr(VIDEO_BIT_RATE[bit_rate])

            time_stamp += 2
            # -- linear reward --
            # reward is video quality - rebuffer penalty - smoothness
            #print '1', net_env.netbw
            #print '2', bitrate_send_last_probe * (1 - lossrate_recv_last_probe)
            x_funtion_top = (bitrate_send_last_probe *
                             (1 - lossrate_recv_last_probe) -
                             VIDEO_BIT_RATE[bit_rate]) / M_IN_K
            reward = -x_funtion_top * x_funtion_top  # 0.1 0.2 ... 1.1 1.2

            r_batch.append(reward)

            last_bit_rate = bit_rate

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            #state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            #state[0, -1] = bitrate_send_last / 1000.0  # last quality
            #state[1, -1] = lossrate_recv_last  # 丢包率0.1 0.2 0.3 0.4
            #state[2, -1] = bitrate_real_recovery / 1000.0  # kilo byte / ms

            state = np.roll(state, -1, axis=1)
            state[0, -1] = bitrate_send_last_probe / 1000.0  # last quality
            state[1, -1] = lossrate_recv_last_probe  # 丢包率0.1 0.2 0.3 0.4
            state[2,
                  -1] = bitrate_real_recovery_probe / 1000.0  # kilo byte / ms

            state[3, :A_DIM] = np.array(
                VIDEO_BIT_RATE[:]) / 1000.0  # kilo byte / ms
            state[4, -1] = bitrate_send_last / 1000.0  # kilo byte / ms
            # print state[3, :A_DIM]

            # ================== Predict BandWidth =========================

            # compute action probability vector
            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
            action_cumsum = np.cumsum(action_prob)

            bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) /
                        float(RAND_RANGE)).argmax()

            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states

            entropy_record.append(a3c.compute_entropy(action_prob[0]))

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write(
                str(time_stamp) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                str(bitrate_send_last) + '\t' + str(lossrate_recv_last) +
                '\t' + str(bitrate_real_recovery) + '\t' + str(reward) + '\n')
            log_file.flush()

            # report experience to the coordinator
            if len(r_batch) >= TRAIN_SEQ_LEN or end_of_video:
                exp_queue.put([
                    s_batch[1:],  # ignore the first chuck
                    a_batch[1:],  # since we don't have the
                    r_batch[1:],  # control over it
                    end_of_video,
                    {
                        'entropy': entropy_record
                    }
                ])

                # synchronize the network parameters from the coordinator
                actor_net_params, critic_net_params = net_params_queue.get()
                actor.set_network_params(actor_net_params)
                critic.set_network_params(critic_net_params)
                epoch_num = epoch_queue.get()

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]
                del entropy_record[:]

                log_file.write(
                    '\n')  # so that in the log we know where video ends

            # store the state and action into batches
            if end_of_video:
                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)

            else:
                s_batch.append(state)

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1
                a_batch.append(action_vec)
Ejemplo n.º 22
0
def main():

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    all_cooked_time, all_cooked_bw, _ = load_trace.load_trace()

    if not os.path.exists(SUMMARY_DIR):
        os.makedirs(SUMMARY_DIR)

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    with tf.Session() as sess, open(LOG_FILE, 'wb') as log_file:

        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)

        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        summary_ops, summary_vars = a3c.build_summaries()

        sess.run(tf.global_variables_initializer())
        writer = tf.summary.FileWriter(SUMMARY_DIR,
                                       sess.graph)  # training monitor
        saver = tf.train.Saver()  # save neural net parameters

        # restore neural net parameters
        nn_model = NN_MODEL
        if nn_model is not None:  # nn_model is the path to file
            saver.restore(sess, nn_model)
            print("Model restored.")

        epoch = 0
        time_stamp = 0

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        actor_gradient_batch = []
        critic_gradient_batch = []

        while True:  # serve video forever
            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, buffer_size, rebuf, \
            video_chunk_size, next_video_chunk_sizes, \
            end_of_video, video_chunk_remain = \
                net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            # reward is video quality - rebuffer penalty - smooth penalty
            reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                     - REBUF_PENALTY * rebuf \
                     - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                               VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K
            r_batch.append(reward)

            last_bit_rate = bit_rate

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(
                np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
            state[2, -1] = float(video_chunk_size) / float(
                delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            state[4, :A_DIM] = np.array(
                next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            state[5, -1] = np.minimum(
                video_chunk_remain,
                CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
            action_cumsum = np.cumsum(action_prob)
            bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) /
                        float(RAND_RANGE)).argmax()
            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states

            entropy_record.append(a3c.compute_entropy(action_prob[0]))

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write(
                str(time_stamp) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                str(buffer_size) + '\t' + str(rebuf) + '\t' +
                str(video_chunk_size) + '\t' + str(delay) + '\t' +
                str(reward) + '\n')
            log_file.flush()

            if len(r_batch
                   ) >= TRAIN_SEQ_LEN or end_of_video:  # do training once

                actor_gradient, critic_gradient, td_batch = \
                    a3c.compute_gradients(s_batch=np.stack(s_batch[1:], axis=0),  # ignore the first chuck
                                          a_batch=np.vstack(a_batch[1:]),  # since we don't have the
                                          r_batch=np.vstack(r_batch[1:]),  # control over it
                                          terminal=end_of_video, actor=actor, critic=critic)
                td_loss = np.mean(td_batch)

                actor_gradient_batch.append(actor_gradient)
                critic_gradient_batch.append(critic_gradient)

                print "===="
                print "Epoch", epoch
                print "TD_loss", td_loss, "Avg_reward", np.mean(
                    r_batch), "Avg_entropy", np.mean(entropy_record)
                print "===="

                summary_str = sess.run(summary_ops,
                                       feed_dict={
                                           summary_vars[0]: td_loss,
                                           summary_vars[1]: np.mean(r_batch),
                                           summary_vars[2]:
                                           np.mean(entropy_record)
                                       })

                writer.add_summary(summary_str, epoch)
                writer.flush()

                entropy_record = []

                if len(actor_gradient_batch) >= GRADIENT_BATCH_SIZE:

                    assert len(actor_gradient_batch) == len(
                        critic_gradient_batch)
                    # assembled_actor_gradient = actor_gradient_batch[0]
                    # assembled_critic_gradient = critic_gradient_batch[0]
                    # assert len(actor_gradient_batch) == len(critic_gradient_batch)
                    # for i in xrange(len(actor_gradient_batch) - 1):
                    #     for j in xrange(len(actor_gradient)):
                    #         assembled_actor_gradient[j] += actor_gradient_batch[i][j]
                    #         assembled_critic_gradient[j] += critic_gradient_batch[i][j]
                    # actor.apply_gradients(assembled_actor_gradient)
                    # critic.apply_gradients(assembled_critic_gradient)

                    for i in xrange(len(actor_gradient_batch)):
                        actor.apply_gradients(actor_gradient_batch[i])
                        critic.apply_gradients(critic_gradient_batch[i])

                    actor_gradient_batch = []
                    critic_gradient_batch = []

                    epoch += 1
                    if epoch % MODEL_SAVE_INTERVAL == 0:
                        # Save the neural net parameters to disk.
                        save_path = saver.save(
                            sess, SUMMARY_DIR + "/nn_model_ep_" + str(epoch) +
                            ".ckpt")
                        print("Model saved in file: %s" % save_path)

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]

            if end_of_video:
                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)

            else:
                s_batch.append(state)

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1
                a_batch.append(action_vec)
Ejemplo n.º 23
0
import env 
import numpy as np
 
e = env.Environment(8, 8)

# 4(a)
def valueIteration(e, pe, l):
    pi = dict()
    V = np.zeros((e.L, e.W, 12))
    threshold = 1e-4
    while True:
        delta = 0
        for s in e.getStates():
            vmax = -np.Inf
            for a in e.getActions():
                v = env.expectation(e, s, a, V, pe, l)
                if v > vmax :
                    vmax = v 
                    pi[s] = a
            delta = max(delta, np.abs(V[s] - vmax))
            V[s] = vmax
        print(delta)
        if delta < threshold:
            break
    return  pi, V

# 4(b)
pistar, V = valueIteration(e, 0, 0.9)
tr = env.gen_traj(e, pistar, (1, 6, 6), 0)

# 4(c)
Ejemplo n.º 24
0
parser.add_argument('--mode', '-m')
parser.add_argument('--model')
#no training episodes
parser.add_argument('--eps')

parser.add_argument('--render')

args = parser.parse_args()


if args.tensorboard:
    writer = SummaryWriter()

    write_proc = subprocess.Popen(['tensorboard', '--logdir', '{}'.format(args.tensorboard)])

env = env.Environment(args.env)

if args.alg == 'DQN':
    agent = agent.DQNAgent(env, args.mode, args.model, writer)

try:
    if args.mode == 'train':
        agent.train(int(args.eps), args.render)
    elif args.mode == 'play':
        agent.play(int(args.eps))
except KeyboardInterrupt:
    print('PROCESS KILLED BY USER')
finally:
    env.close()
    if args.tensorboard:
        write_proc.terminate()
Ejemplo n.º 25
0
def agent(agent_id, all_cooked_time, all_cooked_bw, net_params_queue,
          exp_queue):

    net_env = env.Environment(time=all_cooked_time,
                              bandwidth=all_cooked_bw,
                              random_seed=agent_id)

    with tf.Session() as sess, open(LOG_FILE + '_agent_' + str(agent_id),
                                    'wb') as log_file:
        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)
        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        # initial synchronization of the network parameters from the coordinator
        actor_net_params, critic_net_params = net_params_queue.get()
        actor.set_network_params(actor_net_params)
        critic.set_network_params(critic_net_params)

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []
        #need to initialize, and get before simulation step
        track_index = []
        hm = head_movement.move_prediction()

        time_stamp = 0
        while True:  # experience video streaming forever

            # the action is from the last decision
            # this is to make the framework similar to the real
            # xgw 20180918: need to modify here

            estimate_track_index = hm.get_head_movement_prediction()
            # actual_track_index = hm.get_head_movement_current()
            actual_track_index = [2, 3, 5, 6]


            delay, rebuf, buffer_size, sleep_time, video_chunk_size, end_of_video = \
                net_env.get_video_chunk(bit_rate, estimate_track_index)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            # -- linear reward --
            # reward is video quality - rebuffer penalty - smoothness
            # xgw 20180918: need to modify the reward, add the qualiy consistency in viewport
            #               and the buffer
            # actually the consistency of quality in viewport is the error of head movement prediction error
            # so it's not sure that whether add the "quality consistency" here
            # don't know how to modelized the qp as first input
            reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                     - REBUF_PENALTY * rebuf \
                     - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                               VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K
            # bit_rate_log_reward = np.log((bit_rate + 1) / A_DIM) * BIT_RATE_REWARD_PARAMETER
            # smooth_p = np.exp(np.abs(last_bit_rate - bit_rate) / A_DIM) * SMOOTH_PENALTY
            # reward = bit_rate - REBUF_PENALTY * rebuf - smooth_p

            r_batch.append(reward)

            last_bit_rate = bit_rate

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            # state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE))  # last quality
            # state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 6 sec
            # state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K  # kilo byte / ms
            # state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            # state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            # state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            state[0, -1] = float(video_chunk_size) / float(
                delay) / M_IN_K  # kilo byte / ms
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 6 sec
            state[2, :4] = np.array(actual_track_index)
            state[3, -1] = VIDEO_BIT_RATE[bit_rate] / float(
                np.max(VIDEO_BIT_RATE))  # last chunk's bitrate

            # compute action probability vector
            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
            action_cumsum = np.cumsum(action_prob)
            bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) /
                        float(RAND_RANGE)).argmax()
            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states

            entropy_record.append(a3c.compute_entropy(action_prob[0]))

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write('time_stamp: ' + str(time_stamp) + '\t' +
                           'VIDEO_BIT_RATE: ' + str(VIDEO_BIT_RATE[bit_rate]) +
                           '\t' + 'buffer_size: ' + str(buffer_size) + '\t' +
                           'rebuf: ' + str(rebuf) + '\t' +
                           'video_chunk_size: ' + str(video_chunk_size) +
                           '\t' + 'delay: ' + str(delay) + '\t' +
                           'avg throughtput: ' +
                           str(video_chunk_size / delay) + '\t' + 'reward: ' +
                           str(reward) + '\n')
            log_file.flush()

            # report experience to the coordinator
            if len(r_batch) >= TRAIN_SEQ_LEN or end_of_video:
                exp_queue.put([
                    s_batch[1:],  # ignore the first chuck
                    a_batch[1:],  # since we don't have the
                    r_batch[1:],  # control over it
                    end_of_video,
                    {
                        'entropy': entropy_record
                    }
                ])

                # synchronize the network parameters from the coordinator
                actor_net_params, critic_net_params = net_params_queue.get()
                actor.set_network_params(actor_net_params)
                critic.set_network_params(critic_net_params)

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]
                del entropy_record[:]

                log_file.write(
                    '\n')  # so that in the log we know where video ends

            # store the state and action into batches
            if end_of_video:
                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)

            else:
                s_batch.append(state)

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1
                a_batch.append(action_vec)
Ejemplo n.º 26
0
parser = argparse.ArgumentParser("Record states.")
parser.add_argument("-s", help="state file", type=str, required=True)
parser.add_argument("-o", help="output folder", type=str, required=True)
args = parser.parse_args()

if not os.path.exists(args.o):
    os.makedirs(args.o)

# INITIALIZE ROSNODE
rospy.init_node("test_node", anonymous=True)
rate = rospy.Rate(100)
rospy.sleep(1.0)
robot = torobo_wrapper.Torobo()
rospy.sleep(1.0)

# INITIALIZE ENVIRONMENT
objects = ["target_plate", "small_cube"]
random_ranges = {
    "target_plate": np.array([[0.32, 0.52], [0.30, 0.50], [1.125, 1.125]]),
    "small_cube": np.array([[0.32, 0.52], [0.0, 0.15], [1.155, 1.155]]),
}
world = env.Environment(robot=robot, objects=objects, rng_ranges=random_ranges)
rospy.sleep(0.5)

states = torch.load(args.s)
for i, s in enumerate(states):
    s = s.tolist()
    world.set_model_state("target_plate", s[:3], s[3:7])
    world.set_model_state("small_cube", s[7:10], s[10:14])
    os.system("import -window Gazebo %s/%d.jpg" % (args.o, i))
Ejemplo n.º 27
0
def agent(agent_id, all_cooked_time, all_cooked_bw, net_params_queue, exp_queue):

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw,
                              random_seed=agent_id)

    with tf.Session() as sess, open(LOG_FILE + '_agent_' + str(agent_id), 'w') as log_file:
        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN], action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)
        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        # initial synchronization of the network parameters from the coordinator
        actor_net_params, critic_net_params = net_params_queue.get()
        actor.set_network_params(actor_net_params)
        critic.set_network_params(critic_net_params)

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        time_stamp = 0
        while True:  # experience video streaming forever

            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, buffer_size, rebuf, \
                video_chunk_size, next_video_chunk_sizes, \
                end_of_video, video_chunk_remain = \
                net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            # -- linear reward --
            # reward is video quality - rebuffer penalty - smoothness
           # reward = \
            #    VIDEO_BIT_RATE[bit_rate] / M_IN_K \
             #    - REBUF_PENALTY * rebuf \
             #    - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
              #                             VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

            # -- log scale reward --
           # log_bit_rate = np.log(VIDEO_BIT_RATE[bit_rate] / float(VIDEO_BIT_RATE[-1]))
            #log_last_bit_rate = np.log(VIDEO_BIT_RATE[last_bit_rate] / float(VIDEO_BIT_RATE[-1]))

            #reward = log_bit_rate \
             #        - REBUF_PENALTY * rebuf \
              #       - SMOOTH_PENALTY * np.abs(log_bit_rate - log_last_bit_rate)

            # -- HD reward --
            reward = HD_REWARD[bit_rate] \
                      - REBUF_PENALTY * rebuf \
                      - SMOOTH_PENALTY * np.abs(HD_REWARD[bit_rate] - HD_REWARD[last_bit_rate])

            r_batch.append(reward)

            last_bit_rate = bit_rate

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
            state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            # compute action probability vector
            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
            action_cumsum = np.cumsum(action_prob)
            bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax()
            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states

            entropy_record.append(a3c.compute_entropy(action_prob[0]))

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write(str(time_stamp) + '\t' +
                           str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                           str(buffer_size) + '\t' +
                           str(rebuf) + '\t' +
                           str(video_chunk_size) + '\t' +
                           str(delay) + '\t' +
                           str(reward) + '\n')
            log_file.flush()

            # report experience to the coordinator
            if len(r_batch) >= TRAIN_SEQ_LEN or end_of_video:
                exp_queue.put([s_batch[1:],  # ignore the first chuck
                               a_batch[1:],  # since we don't have the
                               r_batch[1:],  # control over it
                               end_of_video,
                               {'entropy': entropy_record}])

                # synchronize the network parameters from the coordinator
                actor_net_params, critic_net_params = net_params_queue.get()
                actor.set_network_params(actor_net_params)
                critic.set_network_params(critic_net_params)

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]
                del entropy_record[:]

                log_file.write('\n')  # so that in the log we know where video ends

            # store the state and action into batches
            if end_of_video:
                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)

            else:
                s_batch.append(state)

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1
                a_batch.append(action_vec)
Ejemplo n.º 28
0
def agent(agent_id, all_cooked_time, all_cooked_bw, net_params_queue,
          exp_queue, model_type):
    torch.set_num_threads(1)

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw,
                              random_seed=agent_id)

    with open(LOG_FILE + '_agent_' + str(agent_id), 'w') as log_file:

        net = A3C(NO_CENTRAL, model_type, [S_INFO, S_LEN], A_DIM,
                  ACTOR_LR_RATE, CRITIC_LR_RATE)

        # initial synchronization of the network parameters from the coordinator

        time_stamp = 0
        for epoch in range(TOTALEPOCH):
            actor_net_params = net_params_queue.get()
            net.hardUpdateActorNetwork(actor_net_params)
            last_bit_rate = DEFAULT_QUALITY
            bit_rate = DEFAULT_QUALITY
            s_batch = []
            a_batch = []
            r_batch = []
            entropy_record = []
            state = torch.zeros((1, S_INFO, S_LEN))

            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, buffer_size, rebuf, \
            video_chunk_size, next_video_chunk_sizes, \
            end_of_video, video_chunk_remain = \
                net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            while not end_of_video and len(s_batch) < TRAIN_SEQ_LEN:
                last_bit_rate = bit_rate

                state = state.clone().detach()

                state = torch.roll(state, -1, dims=-1)

                state[0, 0, -1] = VIDEO_BIT_RATE[bit_rate] / float(
                    np.max(VIDEO_BIT_RATE))  # last quality
                state[0, 1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
                state[0, 2, -1] = float(video_chunk_size) / float(
                    delay) / M_IN_K  # kilo byte / ms
                state[
                    0, 3,
                    -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
                state[0, 4, :A_DIM] = torch.tensor(
                    next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
                state[0, 5, -1] = min(
                    video_chunk_remain,
                    CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

                bit_rate = net.actionSelect(state)
                # Note: we need to discretize the probability into 1/RAND_RANGE steps,
                # because there is an intrinsic discrepancy in passing single state and batch states

                delay, sleep_time, buffer_size, rebuf, \
                video_chunk_size, next_video_chunk_sizes, \
                end_of_video, video_chunk_remain = \
                    net_env.get_video_chunk(bit_rate)
                reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                         - REBUF_PENALTY * rebuf \
                         - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                                   VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

                s_batch.append(state)
                a_batch.append(bit_rate)
                r_batch.append(reward)
                entropy_record.append(3)

                # log time_stamp, bit_rate, buffer_size, reward
                log_file.write(
                    str(time_stamp) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) +
                    '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' +
                    str(video_chunk_size) + '\t' + str(delay) + '\t' +
                    str(reward) + '\n')
                log_file.flush()

            exp_queue.put([
                s_batch,  # ignore the first chuck
                a_batch,  # since we don't have the
                r_batch,  # control over it
                end_of_video,
                {
                    'entropy': entropy_record
                }
            ])

            log_file.write('\n')  # so that in the log we know where video ends
Ejemplo n.º 29
0
    args = parser.parse_args()

    # loading/problem
    load = HalfBeam(nelx, nely)

    # optimizer
    verbose = True
    fesolver = CooFESolver(verbose=verbose)

    optimizer = None
    density_constraint = None

    if args.optimizer:
        if str(args.optimizer) == 'mas':
            optimizer = env.Environment(fesolver,
                                        young,
                                        poisson,
                                        verbose=verbose)
            # constraints
            density_constraint = DensityConstraint(volume_frac=1.0,
                                                   density_min=xmin,
                                                   density_max=xmax)
        if str(args.optimizer) == 'mas_ke':
            optimizer = env_ke.Environment(fesolver,
                                           young,
                                           poisson,
                                           verbose=verbose)
            # constraints
            density_constraint = DensityConstraint(volume_frac=1.0,
                                                   density_min=xmin,
                                                   density_max=xmax)
        if str(args.optimizer) == 'oc':
Ejemplo n.º 30
0
        if done:
            self.dispatch.stop()
            self.env.reset()

            self.solver.experience_replay(iteration, self.episodes)

            self.solver.save_model()
            if (self.episodes % 2 == 0):
                self.solver.update_model()

            self.episodes += 1


# #######################
# # Main control center #
# #######################

# This object centralizes everything
theDispatcher = dispatcher.Dispatcher()

# Provide a new environment (maze + agent)
theDispatcher.setEnvironment(env.Environment(12, 8, 15))

# Provide also the simulation stepper, which needs access to the
# agent and maze in the dispatcher.
theDispatcher.setStepper(TrainSolver(theDispatcher))

# Start the GUI and run it until quit is selected
# (Remember Ctrl+\ forces python to quit, in case it is necessary)
theDispatcher.run()