Esempio n. 1
0
import krpc
import time
from ksp_env import GameEnv
import math
import numpy as np
from config import ip

conn = krpc.connect(name='Tracker', ip=ip)
env = GameEnv(conn)
vessel = env.vessel

frame = vessel.orbit.body.reference_frame
vert_speed = conn.add_stream(getattr, vessel.flight(frame), 'vertical_speed')


def states():
    message = ('sin',
               round(
                   math.sin((math.radians(env.heading()))) *
                   (90 - env.pitch()) / 90, 2), 'cos',
               round(
                   math.cos((math.radians(env.heading()))) *
                   (90 - env.pitch()) / 90, 2), 'sinr',
               round(
                   math.sin((math.radians(env.heading() + env.roll()))) *
                   (90 - env.pitch()) / 90, 2), 'cosr',
               round(
                   math.cos((math.radians(env.heading() + env.roll()))) *
                   (90 - env.pitch()) / 90,
                   2), "p", round(env.pitch(),
                                  2), "h", round(env.heading(), 2), "r",
Esempio n. 2
0
class Worker(object):
    def __init__(self, name, globalAC, sess, conn):
        self.conn = conn
        self.env = GameEnv(conn=self.conn)
        self.name = name
        self.AC = ACNet(name, sess, globalAC)
        self.sess = sess

    def work(self):
        global global_rewards, global_episodes
        total_step = 1
        buffer_s, buffer_a, buffer_r = [], [], []
        while not coord.should_stop():
            s = self.env.reset(self.conn)
            ep_r = 0
            self.env.activate_engine()
            for ep_t in range(MAX_EP_STEP):

                a = self.AC.choose_action(s)  # estimate stochastic action based on policy
                s_, r, done, info = self.env.step(a)  # make step in environment

                ep_r += r
                buffer_s.append(s)
                buffer_a.append(a)
                buffer_r.append(r)

                if total_step % UPDATE_GLOBAL_ITER == 0 or done:  # update global and assign to local net
                    if done:
                        v_s_ = 0  # terminal
                    else:
                        v_s_ = self.sess.run(self.AC.v, {self.AC.states: [s_]})[0, 0]
                    buffer_v_target = []
                    for r in buffer_r[::-1]:  # reverse buffer r
                        v_s_ = r + GAMMA * v_s_
                        buffer_v_target.append(v_s_)
                    buffer_v_target.reverse()
                    buffer_s, buffer_a, buffer_v_target = np.vstack(buffer_s), np.vstack(buffer_a), np.vstack(
                        buffer_v_target)
                    feed_dict = {
                        self.AC.states: buffer_s,
                        self.AC.a_his: buffer_a,
                        self.AC.v_target: buffer_v_target,
                    }
                    self.AC.update_global(feed_dict)  # actual training step, update global ACNet
                    buffer_s, buffer_a, buffer_r = [], [], []
                    self.AC.pull_global()  # get global parameters to local ACNet

                s = s_
                total_step += 1
                if done:
                    global_rewards.append(ep_r)
                    self.save_results(ep_r, global_episodes, global_rewards)
                    global_episodes += 1
                    break

    def save_results(self, ep_r, global_episodes, global_rewards):
        altitude = self.env.get_altitude()
        with open(result_file, 'a', newline='') as csvf:
            wri = csv.DictWriter(csvf, fieldnames=fieldnames)
            wri.writerow({'counter': global_episodes,
                          'altitude': altitude,
                          'reward': round(ep_r, 2)})
        print(
            self.name,
            "Episode: {:4}".format(global_episodes),
            "| Reward: {:7.1f}".format(global_rewards[-1]),
            "| Altitude: {:7.1f}".format(altitude)
        )
Esempio n. 3
0
 def __init__(self, name, globalAC, sess, conn):
     self.conn = conn
     self.env = GameEnv(conn=self.conn)
     self.name = name
     self.AC = ACNet(name, sess, globalAC)
     self.sess = sess
Esempio n. 4
0
import numpy as np
import csv
import os
import shutil
import tensorflow as tf
import krpc

from config import OUTPUT_GRAPH, LOG_DIR, result_file, fieldnames, N_WORKERS, MAX_EP_STEP, GLOBAL_NET_SCOPE, \
    UPDATE_GLOBAL_ITER, GAMMA, ENTROPY_BETA, LR_A, LR_C, conns
from ksp_env import GameEnv

print(conns)
connections = [krpc.connect(**conns[i]) for i in range(N_WORKERS)]
# connections = [krpc.connect()]

env = GameEnv(conn=connections[0])
env.reset(connections[0])

NUM_STATES = env.observation_space.shape[0]
NUM_ACTIONS = env.action_space.shape[0]
ACTION_BOUND = [env.action_space.low, env.action_space.high]

# Network for the Actor Critic
class ACNet(object):
    def __init__(self, scope, sess, globalAC=None):
        self.sess = sess
        self.actor_optimizer = tf.train.RMSPropOptimizer(LR_A, name='RMSPropA')
        self.critic_optimizer = tf.train.RMSPropOptimizer(LR_C, name='RMSPropC')

        if scope == GLOBAL_NET_SCOPE:
            with tf.variable_scope(scope):