예제 #1
0
    def __init__(self, randomness=0.5, neural_net_filename='model.h5'):
        Agent.__init__(self)
        self.config = self.read_config()
        self.analyzer = Analyzer(self.config)
        self.input = None

        self.shape = (320, 480, 5)

        self.randomness = randomness

        self.neural_net = NeuralNet(24,
                                    self.shape,
                                    filename=neural_net_filename)
예제 #2
0
    def __init__(self, neural_net, generator, shape=(320, 480), frame_count=5):
        self.gamma = 0.99
        self.punishment_for_moving = 0.1
        self.neural_net = neural_net
        self.neural_net_old = NeuralNet(filename=self.neural_net.filename)

        self.width = shape[0]
        self.height = shape[1]
        self.frame_count = frame_count

        self.dataset = tf.data.Dataset.from_generator(
            generator, (tf.int32, tf.bool, tf.float32, tf.float32, tf.float32))

        self.dataset = self.dataset.repeat().shuffle(buffer_size=1000).batch(32)

        actions, terminals, rewards, inputs, inputs_next = self.dataset.make_one_shot_iterator().get_next()

        computed = self.evaluate_input(inputs)
        computed_next = self.evaluate_input(inputs_next)
        computed_next_old = self.evaluate_input_old(inputs_next)

        actions_one_hot = tf.one_hot(actions, 3, axis=2)

        q_old = tf.reduce_sum(actions_one_hot * computed, axis=2)

        argmax_old = tf.one_hot(
            tf.argmax(computed_next_old, axis=2), 3, axis=2)
        second_term = self.gamma * \
            tf.reduce_sum(computed_next * argmax_old, axis=2)
        q_new = tf.stop_gradient(
            rewards + tf.where(terminals, tf.zeros_like(second_term), second_term))

        loss = tf.losses.huber_loss(q_new, q_old)

        self.train_step = tf.train.AdamOptimizer(1e-5).minimize(loss)
예제 #3
0
파일: trainer.py 프로젝트: ishay2b/Ki-cker
    def __init__(self, neural_net, shape=(320, 480), frame_count=5):
        self.gamma = 0.99
        self.punishment_for_moving = 0.1
        self.neural_net = neural_net
        self.neural_net_old = NeuralNet(filename=self.neural_net.filename)

        self.width = shape[0]
        self.height = shape[1]
        self.frame_count = frame_count

        self.options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        self.run_metadata = tf.RunMetadata()

        self.writer = tf.summary.FileWriter(logdir='tensorboard_logdir',
                                            graph=K.get_session().graph)
        self.writer.flush()
        self.learning_rate = 1e-4

        self.observations_img = self.build_image_processor()

        self.debugger = False
예제 #4
0
class NeuralNetAgent(Agent):
    def __init__(self, randomness=0.5, neural_net_filename='model.h5'):
        Agent.__init__(self)
        self.config = self.read_config()
        self.analyzer = Analyzer(self.config)
        self.input = None

        self.shape = (320, 480, 5)

        self.randomness = randomness

        self.neural_net = NeuralNet(24,
                                    self.shape,
                                    filename=neural_net_filename)

    def read_config(self):
        with open('config.yml', 'r') as f:
            return yaml.load(f)

    def new_frame(self, frame):
        frame_color = self.analyzer.extract_table(frame, (320, 480))
        frame = frame_color[:, :, 1]
        first_frame = np.swapaxes(frame, 0, 1).reshape(320, 480, 1)
        if self.input is None:
            self.input = first_frame
            return
        self.input = np.concatenate((self.input, first_frame), axis=2)
        if self.input.shape[2] < 6:
            return
        self.input = self.input[:, :, 1:]

        res = self.neural_net.predict_single(self.input)
        self.prediction = res
        self.inputs = convert_neural_net_result_to_actions(res)

        if random.random() < self.randomness:
            self.inputs = [random.randint(0, 2) - 1 for k in range(0, 8)]

        self.inputs_changed = True

        return frame_color
예제 #5
0
파일: trainer.py 프로젝트: ishay2b/Ki-cker
class Trainer:
    def __init__(self, neural_net, shape=(320, 480), frame_count=5):
        self.gamma = 0.99
        self.punishment_for_moving = 0.1
        self.neural_net = neural_net
        self.neural_net_old = NeuralNet(filename=self.neural_net.filename)

        self.width = shape[0]
        self.height = shape[1]
        self.frame_count = frame_count

        self.options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        self.run_metadata = tf.RunMetadata()

        self.writer = tf.summary.FileWriter(logdir='tensorboard_logdir',
                                            graph=K.get_session().graph)
        self.writer.flush()
        self.learning_rate = 1e-4

        self.observations_img = self.build_image_processor()

        self.debugger = False

    def build_image_processor(self):
        observations = tf.placeholder(tf.string,
                                      shape=[None, self.frame_count + 1],
                                      name='observations')
        observations_img = tf.cast(
            tf.map_fn(lambda i: self.convert_images(i),
                      observations,
                      dtype=tf.uint8), tf.float32)
        observations_img.set_shape(
            [None, self.width, self.height, self.frame_count + 1])

        return observations_img

    def decode(self, images):
        sess = K.get_session()

        return sess.run(self.observations_img,
                        feed_dict={'observations:0': images},
                        options=self.options,
                        run_metadata=self.run_metadata)

    def compute(self, actions, inputs, inputs_next, rewards, terminals):
        computed = self.evaluate_input(inputs)
        computed_next = self.evaluate_input(inputs_next)
        computed_next_old = self.evaluate_input_old(inputs_next)
        # computed_actions = tf.stop_gradient(tf.argmax(computed, axis=2))
        actions_one_hot = tf.one_hot(actions, 3, axis=2)
        q_old = tf.reduce_sum(actions_one_hot * computed, axis=2)
        argmax_old = tf.one_hot(tf.argmax(computed_next_old, axis=2),
                                3,
                                axis=2)
        second_term = self.gamma * \
            tf.reduce_sum(computed_next * argmax_old, axis=2)
        # second_term = self.gamma * tf.reduce_max(computed_next, axis=2)
        q_new = tf.stop_gradient(
            rewards +
            tf.where(terminals, tf.zeros_like(second_term), second_term))

        loss = tf.losses.huber_loss(q_new, q_old, delta=50.0)
        # loss = loss + 0.01 * tf.reduce_mean(tf.where(computed_actions == tf.ones_like(computed_actions), tf.zeros_like(q_new), tf.ones_like(q_new)))
        # loss = loss + 0.1 * tf.reduce_mean(stf.nn.relu(computed[:,:,0] - computed[:,:,1]))
        # loss = loss + 0.1 * tf.reduce_mean(tf.nn.relu(computed[:,:,2] - computed[:,:,1]))
        with tf.name_scope('train'):
            train_step = tf.train.AdamOptimizer(
                self.learning_rate).minimize(loss)

        tf.summary.scalar('loss', loss)
        tf.summary.scalar('diff', tf.reduce_mean(tf.abs(q_new - q_old)))
        tf.summary.scalar('maximal_reward', tf.reduce_max(q_new))
        tf.summary.scalar('mean_reward', tf.reduce_mean(q_new))
        tf.summary.scalar('minimal_reward', tf.reduce_min(q_new))
        merged = tf.summary.merge_all()

        return train_step, loss, tf.abs(q_new - q_old), tf.argmax(
            computed, axis=2), merged

    def convert_images(self, inputs):
        return tf.transpose(
            tf.map_fn(lambda i: tf.image.decode_jpeg(i),
                      inputs,
                      dtype=tf.uint8)[:, :, :, 0], [1, 2, 0])

    def train_step(self, batch):
        if self.debugger:
            sess = debug.TensorBoardDebugWrapperSession(
                K.get_session(), 'localhost:6004')
            K.set_session(sess)
            self.debugger = False

        sess = K.get_session()

        return sess.run(self.tf_train_step,
                        feed_dict=self.build_feed_dict(batch),
                        options=self.options,
                        run_metadata=self.run_metadata)

    def evaluate_input(self, input):
        return tf.reshape(self.neural_net.model(input), [32, 8, 3])

    def build_feed_dict(self, batch):
        return {
            'rewards:0': [[
                s['score'],
            ] * 8 for s in batch],
            'actions:0': [s['action'] for s in batch],
            # 'observations:0': [s['observations'] for s in batch],
            'terminal:0': [s['terminal'] for s in batch],
            'inputs:0': [s['images'] for s in batch],
            'inputs_next:0': [s['images_next'] for s in batch]
        }

    def evaluate_input_old(self, input):
        return tf.reshape(self.neural_net_old.model(input), [32, 8, 3])
예제 #6
0
# d = DataProvider()

import tensorflow as tf
import keras.backend as K
from tensorflow.python.client import timeline

from kicker.train import Trainer
from kicker.neural_net import NeuralNet

import logging
logging.basicConfig(
    filename='train.log',
    level=logging.DEBUG,
    format='%(asctime)s %(filename)s %(lineno)d %(levelname)s %(message)s')

nn = NeuralNet()

t = Trainer(nn)

memory = MemoryDataProvider()
dataset = memory.load_as_dataset()

next_item = dataset.repeat().shuffle(1000).batch(32).prefetch(
    1).make_one_shot_iterator().get_next()

a, i, i_n, s, ter = next_item
step, loss, diff, computed, merged = t.compute(a, i, i_n, s, ter)

sess = K.get_session()
sess.run(tf.global_variables_initializer())
예제 #7
0
import keras
import keras.backend as K
import tensorflow as tf

import uff

from kicker.neural_net import NeuralNet

output_names = ['dense_2/BiasAdd']
frozen_graph_filename = 'frozen_model.pb'

nn = NeuralNet()

sess = K.get_session()
tf.summary.FileWriter('tensorboard_logdir', sess.graph)

graph_def = tf.graph_util.convert_variables_to_constants(
    sess, sess.graph_def, output_names)

graph_def = tf.graph_util.remove_training_nodes(graph_def)

# write frozen graph to file
with open(frozen_graph_filename, 'wb') as f:
    f.write(graph_def.SerializeToString())
    f.close()

# convert frozen graph to uff
uff.from_tensorflow_frozen_model(frozen_graph_filename,
                                 output_names,
                                 output_filename='model.uff')
예제 #8
0
import cv2
import numpy as np
import time
import json
import base64

from kicker.train import DataProvider, Parser
from kicker.neural_net import NeuralNet
from kicker.visualize import Figure

# d = DataProvider(return_observations=True, filename='train/training_data_new.h5')
# s = d.get_batch()
#
nn = NeuralNet(23, (320, 480, 5), filename='model.h5')

fig = Figure(wait_for_button_press=False, show_images=True)

import socket
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)


def show_prediction(frames, position):
    observation = np.concatenate(
        [(f[:, :, 1]).reshape((320, 480, 1)) for f in frames], axis=2)
    prediction = nn.predict_single(observation).reshape(8, 3)

    print(
        np.argmax(
            prediction,
            axis=1) -
        np.ones(8),