def step(self, env: Environment) -> Tuple[EpisodeStep, Reward]:
     state = env.get_observation()
     action_vector = self.net(torch.FloatTensor(data=state))
     softmax = nn.Softmax()
     act_probs = softmax(action_vector).detach().numpy()
     action = np.random.choice(act_probs.size, p=act_probs)
     episode_step = EpisodeStep(state=state, action=action)
     reward: Reward = env.action(action)
     return (episode_step, reward)
    def play_episode(self, env: Environment) -> Episode:
        env.reset()
        episode_steps = []
        total_reward: Reward = 0.0

        while not env.is_done():
            episode_step, reward = self.step(env)
            episode_steps.append(episode_step),
            total_reward += reward

        episode = Episode(steps=episode_steps, reward=total_reward)
        return episode
Example #3
0
def train():

    env = Environment()
    player = Player()

    for episode in range(2):
        env.start()

        while True:
            actions = player.act(env.state)
            reverd = env.apply(actions)
            player.learn(reverd)
            if env.done:
                break
    return
Example #4
0
    def __init__(self,
                 thread_index,
                 global_network,
                 initial_learning_rate,
                 learning_rate,
                 grad_applier,
                 show_env=False,
                 local_t_max=20,
                 max_global_time_step=10 * 10**7,
                 gamma=0.99,
                 save_interval_step=100 * 1000,
                 env='Breakout-v0',
                 device='/cpu:0'):

        self.thread_index = thread_index
        self.learning_rate = learning_rate
        self.env = env

        # Whether to render the environment
        # or not during training (default is
        # True for one of the agents) - change
        # this in main.py
        self.show_env = show_env

        # Discount factor for the reward
        self.gamma = gamma

        # Number of "epochs"
        self.max_global_time_step = max_global_time_step

        # Number of steps for the LSTM
        self.local_t_max = local_t_max

        # Number of actions the agent can take
        self.action_size = Environment.get_action_size(env)

        self.local_network = A3C(self.action_size, self.thread_index, device)

        self.global_network = global_network

        # Build computational graph
        self.local_network._create_network()

        # Build computational graph for the losses
        # and gradients
        self.local_network.prepare_a3c_loss()
        self.apply_gradients = grad_applier.minimize_local(
            self.local_network.a3c_loss, global_network.get_vars(),
            self.local_network.get_vars())

        # Sync the weights of the local network with those
        # of the main network
        self.sync = self.local_network.sync_from(global_network)

        # Initialize time step, learning rate, etc
        self.local_t = 0
        self.initial_learning_rate = initial_learning_rate
        self.episode_reward = 0
Example #5
0
class Core:
    def __init__(self):
        self.game_count = 0
        self.events = Events()
        self.text_renderer = TextRenderer()

        # empty declarations for linting
        self.balls = None
        self.env = None
        return

    def new_game(self):
        self.game_count += 1
        self.balls = self.new_balls()
        self.env = Environment(self.balls)

    def new_balls(self):
        return [Ball() for _ in range(settings.num_balls)]

    def update(self):
        self.events.update()
        settings.update(self.events)
        # only cycle through balls alive in the environment for optimization
        for ball in self.env.balls:
            ball.update(self.events)
        self.env.update(self.events)

    def game_over(self):
        return self.env.game_over()

    def draw(self):
        surface = self.env.get_surface()
        if self.events.info:
            surface.blit(self.get_info_surface(), (0, 0))
        return surface

    def get_info_surface(self):
        texts = [
            " Game: {}".format(self.game_count),
            " Score: {}".format(self.env.score),
            " Alive: {}".format(self.env.num_alive)
        ]

        return self.text_renderer.texts_to_surface(texts)
Example #6
0
 def __init__(self, size=0, mountpoint=False, logger=False, environ=False):
     """
     """
     #####
     # Version/timestamp is
     # <YYYY><MM><DD>.<HH><MM><SS>.<microseconds>
     # in UTC time
     self.module_version = '20160224.032043.009191'
     if not logger:
         self.logger = CyLogger()
     else:
         self.logger = logger
     if not environ:
         self.environ = Environment()
     else:
         self.environ = environ
     self.chkApp = CheckApplicable(self.environ, self.logger)
Example #7
0
def deploy_bcf(config, fuel_cluster_id):
    # Deploy setup node
    Helper.safe_print("Start to prepare setup node\n")
    env = Environment(config, fuel_cluster_id)
    Helper.common_setup_node_preparation(env)

    # Generate detailed node information
    Helper.safe_print("Start to setup Big Cloud Fabric\n")
    nodes_config = None
    if 'nodes' in config:
        nodes_yaml_config = config['nodes']
    node_dic = Helper.load_nodes(nodes_yaml_config, env)

    # Generate scripts for each node
    for hostname, node in node_dic.iteritems():
        if node.os == const.CENTOS:
            Helper.generate_scripts_for_centos(node)
        elif node.os == const.UBUNTU:
            Helper.generate_scripts_for_ubuntu(node)
        with open(const.LOG_FILE, "a") as log_file:
            log_file.write(str(node))
        if node.skip:
            Helper.safe_print("skip node %(hostname)s due to %(error)s\n" % {
                'hostname': hostname,
                'error': node.error
            })
            continue
        node_q.put(node)

    # Use multiple threads to setup nodes
    for i in range(const.MAX_WORKERS):
        t = threading.Thread(target=worker_setup_node)
        t.daemon = True
        t.start()
    node_q.join()
    Helper.safe_print(
        "Big Cloud Fabric deployment finished! Check %(log)s on each node for details.\n"
        % {'log': const.LOG_FILE})
    def __init__(self, conf, parent=None):
        """
        Initialization method...

        @author: Roy Nielsen
        """
        super(VirtualMachineBuilder, self).__init__(parent)

        self.ui = Ui_MainWindow()
        self.ui.setupUi(self)

        #####
        # initialization of class variables.
        self.conf = conf
        self.conf.loggerSelf()
        self.logger = self.conf.getLogger()
        self.environ = Environment()
        #self.logger = self.conf.get_logger()
        self.logger.log(lp.DEBUG, str(self.logger))
        self.runWith = RunWith(self.logger)
        self.libc = getLibc(self.logger)

        #####
        # Set label states
        self.ui.packerLabel.setText(
            "( <a href='https://www.packer.io'>https://www.packer.io</a> - Download and install packer separately )"
        )
        self.ui.boxcutterLabel.setText(
            "( <a href='https://github.com/boxcutter'>https://github.com/boxcutter</a> - Clone repos separately )"
        )

        #####
        # Handle button box
        #
        self.ui.buttonBox.button(
            QtWidgets.QDialogButtonBox.Close).clicked.connect(
                self.closeApplication)
        self.ui.buttonBox.button(
            QtWidgets.QDialogButtonBox.Ok).clicked.connect(self.processVm)

        #####
        # Rename Save button
        self.ui.buttonBox.button(
            QtWidgets.QDialogButtonBox.Save).setText("Configure Repos")
        btn = self.ui.buttonBox.button(QtWidgets.QDialogButtonBox.Save)
        btn.clicked.connect(self.configureRepos)

        #####
        # Rename Apply button
        self.ui.buttonBox.button(
            QtWidgets.QDialogButtonBox.Apply).setText("Install packer")
        btnTwo = self.ui.buttonBox.button(QtWidgets.QDialogButtonBox.Apply)
        btnTwo.clicked.connect(self.installPacker)
        btnTwo.hide()

        self.chkApp = CheckApplicable(self.environ, self.logger)
        self.macOsBlackListApplicable = {
            'type': 'black',
            'os': {
                'Mac OS X': ['10.0.0', 'r', '20.12.10']
            }
        }
        self.linuxWhitelistApplicable = {'type': 'white', 'family': 'linux'}
        self.freebsdWhitelistApplicable = {
            'type': 'white',
            'family': 'freebsd'
        }
        self.macosWhitelistApplicable = {'type': 'white', 'family': 'darwin'}
        #openbsdWhitelistApplicable = {}
        #windowsWhitelistApplicable = {}

        #####
        # Set up the configure dialog
        self.configRepos = ConfigureRepos(self.conf)
        self.configRepos.setWindowTitle("Configure Repos")

        #####
        # Connect the configure 'done' signal to the refreshFamilyComboBox slot
        self.configRepos.doneConfigure.connect(self.refreshFamilyComboBox)

        #####
        # Signal/slot to deal with osFamily combo box change
        self.ui.osFamily.currentIndexChanged.connect(self.osFamilySelected)

        self.refreshFamilyComboBox()
        self.osFamilySelected(0)

        self.logger.log(lp.DEBUG, "Done with VirtualMachineBuilder init...")
Example #9
0
 def build_environment(self):
     """ Create the environment """
     self.environment = Environment(self.env, show_env=self.show_env)
    def __init__(self, conf, parent=None):
        """
        Initialization method...

        @author: Roy Nielsen
        """
        super(ConfigureRepos, self).__init__(parent)

        self.ui = Ui_Dialog()
        self.ui.setupUi(self)

        #####
        # initialization of class variables.
        self.conf = conf
        self.environ = Environment()
        self.conf.loggerSelf()
        self.logger = self.conf.getLogger()
        #self.logger = self.conf.get_logger()
        self.logger.log(lp.DEBUG, str(self.logger))
        self.runWith = RunWith(self.logger)
        self.libc = getLibc(self.logger)
        self.chkApp = CheckApplicable(self.environ, self.logger)
        macOsWhiteListApplicable = {
            'type': 'white',
            'os': {
                'Mac OS X': ['10.0.0', 'r', '20.12.10']
            }
        }

        #####
        # Handle button box
        self.ui.buttonBox.button(
            QtWidgets.QDialogButtonBox.Cancel).clicked.connect(self.close)
        self.ui.buttonBox.button(
            QtWidgets.QDialogButtonBox.Ok).clicked.connect(self.okDone)

        #####
        # Handle other buttons
        self.ui.downloadReposButton.clicked.connect(self.downloadRepos)
        self.ui.prepareIsoButton.clicked.connect(self.prepareIso)
        self.ui.gitResetHardButton.clicked.connect(self.resetRepos)
        self.ui.gitPullButton.clicked.connect(self.updateRepos)

        if self.chkApp.isApplicable(macOsWhiteListApplicable):
            self.ui.prepareIsoButton.clicked.connect(self.prepareIso)
        else:
            self.ui.prepareIsoButton.hide()
            self.ui.macosCheckBox.hide()

        #####
        # default boxcutter repo path
        self.reposRoot = self.conf.getRepoRoot()

        #####
        # Future features
        self.ui.winCheckBox.hide()
        self.ui.label_2.hide()
        self.ui.leReposPath.hide()
        self.ui.proxyButton.hide()

        #####
        # Future features
        self.ui.winCheckBox.hide()
        self.ui.label_2.hide()
        self.ui.leReposPath.hide()
        self.ui.proxyButton.hide()

        self.git = "/usr/bin/git"

        #####
        # repos
        self.repos2process = []

        self.getSelected()
Example #11
0
def deploy_bcf(config, mode, fuel_cluster_id, rhosp, tag, cleanup, verify,
               verify_only, skip_ivs_version_check, certificate_dir,
               certificate_only, generate_csr, support, upgrade_dir,
               offline_dir, sriov):
    # Deploy setup node
    safe_print("Start to prepare setup node\n")
    env = Environment(config, mode, fuel_cluster_id, rhosp, tag, cleanup,
                      skip_ivs_version_check, certificate_dir, upgrade_dir,
                      offline_dir, sriov)
    Helper.common_setup_node_preparation(env)
    controller_nodes = []

    # Generate detailed node information
    safe_print("Start to setup Big Cloud Fabric\n")
    nodes_yaml_config = config['nodes'] if 'nodes' in config else None
    node_dic = Helper.load_nodes(nodes_yaml_config, env)

    if upgrade_dir:
        return upgrade_bcf(node_dic)

    if sriov:
        return setup_sriov(node_dic)

    if generate_csr:
        safe_print("Start to generate csr for virtual switches.\n")
        # create ~/csr and ~/key directory
        Helper.run_command_on_local("mkdir -p %s" % const.CSR_DIR)
        Helper.run_command_on_local("mkdir -p %s" % const.KEY_DIR)
        for hostname, node in node_dic.iteritems():
            if node.skip:
                safe_print("skip node %(fqdn)s due to %(error)s\n" % {
                    'fqdn': node.fqdn,
                    'error': node.error
                })
                continue

            if node.tag != node.env_tag:
                safe_print("skip node %(fqdn)s due to mismatched tag\n" %
                           {'fqdn': node.fqdn})
                continue
            if node.deploy_mode == const.T6 and node.role == const.ROLE_COMPUTE:
                Helper.generate_csr(node)
        safe_print("Finish generating csr for virtual switches.\n")
        return

    # copy neutron config from neutron server to setup node
    for hostname, node in node_dic.iteritems():
        if node.role == const.ROLE_NEUTRON_SERVER:
            controller_nodes.append(node)
    Helper.copy_neutron_config_from_controllers(controller_nodes)

    # check if vlan is the tenant network type for fuel environment
    if not Helper.check_if_vlan_is_used(controller_nodes):
        safe_print("tenant network type is not vlan. Stop deploying.\n")
        return

    # prepare keystone client from /etc/neutron/api-paste.ini
    #Helper.prepare_keystone_client(controller_nodes)

    # Generate scripts for each node
    for hostname, node in node_dic.iteritems():
        if support:
            support_node_q.put(node)

        if node.skip:
            safe_print("skip node %(fqdn)s due to %(error)s\n" % {
                'fqdn': node.fqdn,
                'error': node.error
            })
            continue

        if node.tag != node.env_tag:
            safe_print("skip node %(fqdn)s due to mismatched tag\n" %
                       {'fqdn': node.fqdn})
            continue

        if node.os == const.CENTOS:
            Helper.generate_scripts_for_centos(node)
        elif node.os == const.UBUNTU:
            Helper.generate_scripts_for_ubuntu(node)
        elif node.os == const.REDHAT:
            Helper.generate_scripts_for_redhat(node)

        if node.role == const.ROLE_NEUTRON_SERVER:
            controller_node_q.put(node)
        else:
            # python doesn't have deep copy for Queue, hence add to all
            node_q.put(node)
            verify_node_q.put(node)
            if node.deploy_mode == const.T6 and node.role == const.ROLE_COMPUTE:
                certify_node_q.put(node)

        if node.rhosp:
            Helper.chmod_node(node)

    with open(const.LOG_FILE, "a") as log_file:
        version = Helper.run_command_on_local("pip show bosi")
        log_file.write(str(version))
        for hostname, node in node_dic.iteritems():
            log_file.write(str(node))

    if support:
        safe_print("Start to collect logs.\n")
        # copy installer logs to ~/support
        Helper.run_command_on_local("mkdir -p %s" % const.SUPPORT_DIR)
        Helper.run_command_on_local("cp -r %(src)s %(dst)s" % {
            "src": const.LOG_FILE,
            "dst": const.SUPPORT_DIR
        })
        Helper.run_command_on_local(
            "cp -r %(setup_node_dir)s/%(generated_script_dir)s %(dst)s" % {
                "setup_node_dir": env.setup_node_dir,
                "generated_script_dir": const.GENERATED_SCRIPT_DIR,
                "dst": const.SUPPORT_DIR
            })

        for i in range(const.MAX_WORKERS):
            t = threading.Thread(target=support_node_setup,
                                 args=(support_node_q, ))
            t.daemon = True
            t.start()
        support_node_q.join()
        # compress ~/support
        Helper.run_command_on_local("cd /tmp; tar -czf support.tar.gz support")
        safe_print(
            "Finish collecting logs. logs are at /tmp/support.tar.gz.\n")
        return

    # in case of verify_only or certificate_only, do not deploy
    if (not verify_only) and (not certificate_only):
        # Use single thread to setup controller nodes
        t = threading.Thread(target=worker_setup_node,
                             args=(controller_node_q, ))
        t.daemon = True
        t.start()
        controller_node_q.join()

        # Use multiple threads to setup compute nodes
        for i in range(const.MAX_WORKERS):
            t = threading.Thread(target=worker_setup_node, args=(node_q, ))
            t.daemon = True
            t.start()
        node_q.join()

        sorted_time_dict = OrderedDict(
            sorted(time_dict.items(), key=lambda x: x[1]))
        for fqdn, h_time in sorted_time_dict.items():
            safe_print("node: %(fqdn)s, time: %(time).2f\n" % {
                'fqdn': fqdn,
                'time': h_time
            })

        safe_print("Big Cloud Fabric deployment finished! "
                   "Check %(log)s on each node for details.\n" %
                   {'log': const.LOG_FILE})

    if certificate_dir or certificate_only:
        # certify each node
        safe_print("Start to certify virtual switches.\n")
        for i in range(const.MAX_WORKERS):
            t = threading.Thread(target=certify_node_setup,
                                 args=(certify_node_q, ))
            t.daemon = True
            t.start()
        certify_node_q.join()
        safe_print('Certifying virtual switches done.\n')

    if verify or verify_only:
        # verify each node and post results
        safe_print("Verifying deployment for all compute nodes.\n")
        for i in range(const.MAX_WORKERS):
            t = threading.Thread(target=verify_node_setup,
                                 args=(verify_node_q, ))
            t.daemon = True
            t.start()
        verify_node_q.join()
        # print status
        # success nodes
        safe_print('Deployed successfully to: \n')
        for node_element in node_pass:
            safe_print(node_element + '\n')
        # failed nodes
        safe_print('Deployment to following failed: \n')
        for node_element in node_fail:
            safe_print(
                str(node_element) + ' : ' + str(node_fail[node_element]) +
                '\n')
Example #12
0
 def new_game(self):
     self.game_count += 1
     self.balls = self.new_balls()
     self.env = Environment(self.balls)
Example #13
0
from lib.agent import Agent
from lib.environment import Environment

num_actions = 12
agent = Agent(num_actions)
environment = Environment()

done = 0
environment.start()

while done != 1:
    action = agent.choose_action()
    environment.sendAction(action)
    reward, done = environment.getState()

environment.exit()
Example #14
0
    def __init__(self, **kwargs):
        """
        Variables that can be passed in:
        logger
        userName
        userShell
        userComment
        userUid
        userPriGid
        userHomeDir
        """
        if 'logDispatcher' not in kwargs:
            raise ValueError(
                "Variable 'logDispatcher' a required parameter for " +
                str(self.__class__.__name__))
        else:
            self.logger = kwargs.get('logDispatcher')

        if 'userName' not in kwargs:
            self.userName = ""
        else:
            self.userName = kwargs.get('userName')

        if 'userShell' not in kwargs:
            self.userShell = "/bin/bash"
        else:
            userShell = kwargs.get('userShell')

        if 'userComment' not in kwargs:
            self.userComment = ""
        else:
            self.userComment = kwargs.get('userComment')

        if 'userUid' not in kwargs:
            self.userUid = 10000
        else:
            self.userUid = kwargs.get('userUid')

        if 'userPriGid' not in kwargs:
            self.userPriGid = 20
        else:
            self.userPriGid = kwargs.get('userPriGid')

        if 'userHomeDir' not in kwargs:
            self.userHomeDir = ""
        else:
            self.userHomeDir = kwargs.get('userHomeDir')

        self.module_version = '20160225.125554.540679'

        #####
        # Acqure the environment
        self.environ = Environment()

        #####
        # THIS IS A LIBRARY, SO LOGS SHOULD BE INITIALIZED ELSEWHERE...
        # self.logger.initializeLogs()
        self.logger.log(lp.INFO, "Logger: " + str(self.logger))

        #####
        # Initialize the RunWith helper for executing shelled out commands.
        self.runWith = RunWith(self.logger)
Example #15
0
    def run(self):
        """ Run the model """

        with tf.device(self.device):
            # The learning rate is sampled from a
            # log-uniform distribution between
            # 0.0001 and 0.005. Then, it is
            # decayed linearly to 0 progressively
            # during training
            initial_learning_rate = log_uniform(self.initial_alpha_low,
                                                self.initial_alpha_high,
                                                0.5)

            # Whether to terminate, pause or keep training
            self.stop = False
            self.terminate = False

            # Initialize global time step
            self.global_t = 0

            # Number of actions the agent can take
            action_size = Environment.get_action_size(self.env)

            # Initialize the shared/global network
            self.global_network = A3C(action_size,
                                      thread_index=-1,
                                      device=self.device)

            # Build computational graph
            self.global_network._create_network()

            # Placeholder for the Trainers
            self.trainers = []

            learning_rate_input = tf.placeholder("float")

            # Initialize the RMSPROP object for the updates
            grad_applier = RMSPropApplier(learning_rate_input)

            # Build the agents
            for i in range(self.parallel_size):
                trainer = Trainer(thread_index=i,
                                  global_network=self.global_network,
                                  initial_learning_rate=initial_learning_rate,
                                  grad_applier=grad_applier,
                                  learning_rate=learning_rate_input)
                if i == 0:
                    trainer.show_env = True

                self.trainers.append(trainer)

            # Initialize Session
            self.sess = tf.Session()
            self.sess.run(tf.global_variables_initializer())

            # Params for logging scores in Tensorboard
            self.score_input = tf.placeholder(tf.int32)
            tf.summary.scalar("score", self.score_input)
            self.summary_op = tf.summary.merge_all()

            # sess.graph contains the graph definition;
            # that enables the Graph Visualizer. To start
            # Tensorboard run the following command:
            # $ tensorboard --logdir=path/to/LOG_FILE
            self.summary_writer = tf.summary.FileWriter(LOG_FILE,
                                                        graph=self.sess.graph)


            # Parameters for saving the global network params
            self.saver = tf.train.Saver(var_list=self.global_network.get_vars(),
                                        max_to_keep=1)

            # Set next checkpoint
            self.next_checkpoint = self.checkpoint_interval

            # Set next log point
            self.next_log = self.logging_interval

            # -----------
            # RUN THREADS
            # -----------

            self.train_threads = []
            for i in range(self.parallel_size):
                self.train_threads.append(threading.Thread(target=self.train,
                                                           args=(i, True)))
            for t in self.train_threads:
                t.start()
Example #16
0
#path param ile verilen path üzerine dosya oluşturulur.
#op.createFile(path="lib",fileName="test.swift",content="hello")

#init de verilen default path üzerine klasör oluşturulur.
#op.createFolder(folderName="hello/1")

#path is valid
#print(op.isExist("/Users/umut/Desktop/Architecture/CodeGenerationCore/lib"))

#append file add content
#op.appendFile(fileName="test.swift",content="\nworld")

#loglar kapali artik
#Environment.Shared().online()

print(type(Environment.Shared().online()))

#remove file
#op.removeFile(fileName=fileName)
#op.createFile(fileName=fileName, content="hello")

#print(MESSAGE.ERROR)
#print(DEV_ENV.LOCAL)
#print(CODE.SLASH)

#log samples
#Log.i(message=MESSAGE.INFO)
#Log.s(MESSAGE.SUCCESS)
#Log.e(MESSAGE.ERROR)
Example #17
0
import numpy as np
import sys
import tensorflow as tf
from lib.agent import DeepQLearningAgent
from lib.environment import Environment
from lib.experience import Experience

tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.FATAL)
f = open("trace.txt", "w")

print(f'starting training')
accuracy = 0
num_actions = 5
num_episodes = 100000
environment = Environment()
agent = DeepQLearningAgent(environment.frameset_size, num_actions, environment.hot_encode_action_size)
agent.saveModel(0)
agent.loadModel('policy_network_model_0.h5')

for episode in range(1, num_episodes):

    done = 0
    rewards_current_episode = 0
    position_current_episode = 0
    level_position = 0
    last_frameset = np.zeros(environment.frameset_size) 
    last_last_actions = np.zeros(environment.hot_encode_action_size)
    environment.start()

    first_random_action = np.random.choice(range(agent.num_actions))
    environment.sendAction(first_random_action)
Example #18
0
# Author: Umut Boz
# Copyright (c) 2020, OneframeMobile, KoçSistem
# Email: [email protected]
############################################################
# Version: 0.1.0
############################################################
from lib.enums import MessageType
from lib.enums import CodeLine
from lib.log import Log
from lib.environment import Environment
from lib.httpOperation3 import HttpOperation3

MESSAGE = MessageType()

CODE = CodeLine()

# close log
Environment.Shared().online()

url = "https://petstore.swagger.io/v2/swagger.json"

op = HttpOperation3()
jsonData = op.request(url=url).jsonParse()

op2 = HttpOperation3(url=url)
jsonData = op2.request().jsonParse()

op3 = HttpOperation3()
print(op3.fetch(url=url))

print(jsonData["swagger"])
Example #19
0
class Trainer(object):
    """ Class for Training a Local Network / ONE agent """
    def __init__(self,
                 thread_index,
                 global_network,
                 initial_learning_rate,
                 learning_rate,
                 grad_applier,
                 show_env=False,
                 local_t_max=20,
                 max_global_time_step=10 * 10**7,
                 gamma=0.99,
                 save_interval_step=100 * 1000,
                 env='Breakout-v0',
                 device='/cpu:0'):

        self.thread_index = thread_index
        self.learning_rate = learning_rate
        self.env = env

        # Whether to render the environment
        # or not during training (default is
        # True for one of the agents) - change
        # this in main.py
        self.show_env = show_env

        # Discount factor for the reward
        self.gamma = gamma

        # Number of "epochs"
        self.max_global_time_step = max_global_time_step

        # Number of steps for the LSTM
        self.local_t_max = local_t_max

        # Number of actions the agent can take
        self.action_size = Environment.get_action_size(env)

        self.local_network = A3C(self.action_size, self.thread_index, device)

        self.global_network = global_network

        # Build computational graph
        self.local_network._create_network()

        # Build computational graph for the losses
        # and gradients
        self.local_network.prepare_a3c_loss()
        self.apply_gradients = grad_applier.minimize_local(
            self.local_network.a3c_loss, global_network.get_vars(),
            self.local_network.get_vars())

        # Sync the weights of the local network with those
        # of the main network
        self.sync = self.local_network.sync_from(global_network)

        # Initialize time step, learning rate, etc
        self.local_t = 0
        self.initial_learning_rate = initial_learning_rate
        self.episode_reward = 0

    def build_environment(self):
        """ Create the environment """
        self.environment = Environment(self.env, show_env=self.show_env)

    def stop(self):
        """ Terminate the environment """
        self.environment.stop()

    def _record_score(self, sess, summary_writer, summary_op, score_input,
                      score, global_t):
        """ Save Score to Tensorboard """
        summary_str = sess.run(summary_op, feed_dict={score_input: score})
        summary_writer.add_summary(summary_str, global_t)

        # Write to disk
        summary_writer.flush()

    def choose_action(self, pi_values):
        """
        Sample from the learned policy
        distribution

        :param pi_values:
            Probability distribution for
            every actions
        """
        return np.random.choice(range(len(pi_values)), p=pi_values)

    def concat_action_reward(self, action, action_size, reward):
        """
        Return one hot vectored action and reward.
        """
        action_reward = np.zeros([action_size + 1], dtype='float32')
        action_reward[action] = 1.0
        action_reward[-1] = float(reward)
        return action_reward

    def _decay_learning_rate(self, global_time_step):
        """ Decay the learning rate linearly """
        time_left = self.max_global_time_step - global_time_step
        learning_rate = self.initial_learning_rate * time_left \
                        / self.max_global_time_step

        # Clip learning rate at 0.0
        if learning_rate < 0.0:
            learning_rate = 0.0
        return learning_rate

    def _process_a3c(self, sess, global_t, summary_writer, summary_op,
                     score_input):
        """
        Process max_local_t steps/frames in the
        A3C network

        :param sess:
            TensorFlow session object

        :param global_t:
            Global time step (number of steps
            processed by the global/shared network)
        """
        # States of the LSTM
        states = []
        last_action_rewards = []
        actions = []
        rewards = []
        values = []

        # Synchronize with global network
        sess.run(self.sync)

        # Initial local time step
        self.local_t = 0

        # Whether we hit a terminal state or not
        terminal_end = False
        start_lstm_state = self.local_network.lstm_state_out

        # Loops local_t_max time steps
        for _ in range(self.local_t_max):
            last_action = self.environment.last_action
            last_reward = self.environment.last_reward
            last_action_reward = self.concat_action_reward(
                last_action, self.action_size, last_reward)

            # Compute policy and value function
            pi_, value_ = self.local_network.run_pi_value(
                sess, self.environment.last_state, last_action_reward)

            # Pick an action given the new computed policy
            action = self.choose_action(pi_)

            # Append results to placeholders...
            states.append(self.environment.last_state)
            last_action_rewards.append(last_action_reward)
            actions.append(action)
            values.append(value_)

            # Process next action
            new_state, reward, terminal = self.environment.process(action)

            rewards.append(reward)
            self.episode_reward += reward

            self.local_t += 1

            if terminal:
                # Environment hit a terminal state
                terminal_end = True

                # ----------------
                # PRINT STATISTICS
                # ----------------

                print('Time step: %5d k - Score: %3d' %
                      (global_t / 1000, self.episode_reward))

                self._record_score(sess, summary_writer, summary_op,
                                   score_input, self.episode_reward, global_t)

                # If we hit a terminal state, then the
                # reward is set to 0, else, it is set
                # to the value function
                self.episode_reward = 0
                self.environment.reset()
                self.local_network.reset_state()
                break

        # ---------
        # BACK-PROP
        # ---------

        # We discount the rewards from t - 1 to t_start. At
        # time step t the reward is either 0 (if terminal state)
        # or V (non terminal state)
        R = 0.0
        if not terminal_end:
            R = self.local_network.run_last_value(sess, new_state,
                                                  last_action_reward)

        # Reverse placeholders
        actions.reverse()
        states.reverse()
        rewards.reverse()
        values.reverse()

        # To compute the gradients we compute a minibatch of
        # length local_t_max
        batch_s = []
        batch_a = []
        batch_adv = []
        batch_R = []

        # For printing
        R_non_discounted = R

        # Discounting...
        for (ai, ri, si, Vi) in zip(actions, rewards, states, values):
            R = ri + self.gamma * R
            adv = R - Vi
            a = np.array([0] * self.action_size)
            a[ai] = 1.0

            batch_s.append(si)
            batch_a.append(a)

            # Convert np.array -> float because
            # the advantage and reward placeholders
            #  expects shape [None, ] not [None, 1]
            batch_adv.append(float(adv))
            batch_R.append(float(R))

        batch_s.reverse()
        batch_a.reverse()
        batch_adv.reverse()
        batch_R.reverse()

        # Decay learning rate
        cur_learning_rate = self._decay_learning_rate(global_t)

        # Create feed_dict for gradient_applier
        feed_dict = {
            self.local_network.input: batch_s,
            self.local_network.last_action_reward: last_action_rewards,
            self.local_network.a: batch_a,
            self.local_network.adv: batch_adv,
            self.local_network.R: batch_R,
            self.local_network.lstm_state: start_lstm_state,
            self.learning_rate: cur_learning_rate
        }

        # compute gradients and update weights
        sess.run(self.apply_gradients, feed_dict=feed_dict)
        """
        # ----------------
        # PRINT STATISTICS
        # ----------------

        # Compute losses
        total_loss, policy_loss, value_loss = self.local_network.run_losses(sess,
                                                                            feed_dict)

        total_loss = np.mean(total_loss)
        policy_loss = np.mean(policy_loss)
        value_loss = np.mean(value_loss)

        if global_t % 1000 == 0:
            print('Time Step: %6d k Reward: %3d - Total Loss: %.4f - '
                  'Policy Loss: %.4f - Value Loss: %.4f' %
                  (global_t / 1000, float(R_non_discounted), total_loss,
                   policy_loss, value_loss))

            # Save to log file
            with open(LOG_FILE, 'a') as f:
                f.write('Reward: %3d - Total Loss: %.4f - Policy Loss: %.4f '
                  '- Value Loss: %.4f \n' %
                  (float(R), total_loss, policy_loss, value_loss))
        """

        # Return the number of steps taken
        # to update global_time_steps
        return self.local_t