def __init__(self, randomness=0.5, neural_net_filename='model.h5'): Agent.__init__(self) self.config = self.read_config() self.analyzer = Analyzer(self.config) self.input = None self.shape = (320, 480, 5) self.randomness = randomness self.neural_net = NeuralNet(24, self.shape, filename=neural_net_filename)
def __init__(self, neural_net, generator, shape=(320, 480), frame_count=5): self.gamma = 0.99 self.punishment_for_moving = 0.1 self.neural_net = neural_net self.neural_net_old = NeuralNet(filename=self.neural_net.filename) self.width = shape[0] self.height = shape[1] self.frame_count = frame_count self.dataset = tf.data.Dataset.from_generator( generator, (tf.int32, tf.bool, tf.float32, tf.float32, tf.float32)) self.dataset = self.dataset.repeat().shuffle(buffer_size=1000).batch(32) actions, terminals, rewards, inputs, inputs_next = self.dataset.make_one_shot_iterator().get_next() computed = self.evaluate_input(inputs) computed_next = self.evaluate_input(inputs_next) computed_next_old = self.evaluate_input_old(inputs_next) actions_one_hot = tf.one_hot(actions, 3, axis=2) q_old = tf.reduce_sum(actions_one_hot * computed, axis=2) argmax_old = tf.one_hot( tf.argmax(computed_next_old, axis=2), 3, axis=2) second_term = self.gamma * \ tf.reduce_sum(computed_next * argmax_old, axis=2) q_new = tf.stop_gradient( rewards + tf.where(terminals, tf.zeros_like(second_term), second_term)) loss = tf.losses.huber_loss(q_new, q_old) self.train_step = tf.train.AdamOptimizer(1e-5).minimize(loss)
def __init__(self, neural_net, shape=(320, 480), frame_count=5): self.gamma = 0.99 self.punishment_for_moving = 0.1 self.neural_net = neural_net self.neural_net_old = NeuralNet(filename=self.neural_net.filename) self.width = shape[0] self.height = shape[1] self.frame_count = frame_count self.options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) self.run_metadata = tf.RunMetadata() self.writer = tf.summary.FileWriter(logdir='tensorboard_logdir', graph=K.get_session().graph) self.writer.flush() self.learning_rate = 1e-4 self.observations_img = self.build_image_processor() self.debugger = False
class NeuralNetAgent(Agent): def __init__(self, randomness=0.5, neural_net_filename='model.h5'): Agent.__init__(self) self.config = self.read_config() self.analyzer = Analyzer(self.config) self.input = None self.shape = (320, 480, 5) self.randomness = randomness self.neural_net = NeuralNet(24, self.shape, filename=neural_net_filename) def read_config(self): with open('config.yml', 'r') as f: return yaml.load(f) def new_frame(self, frame): frame_color = self.analyzer.extract_table(frame, (320, 480)) frame = frame_color[:, :, 1] first_frame = np.swapaxes(frame, 0, 1).reshape(320, 480, 1) if self.input is None: self.input = first_frame return self.input = np.concatenate((self.input, first_frame), axis=2) if self.input.shape[2] < 6: return self.input = self.input[:, :, 1:] res = self.neural_net.predict_single(self.input) self.prediction = res self.inputs = convert_neural_net_result_to_actions(res) if random.random() < self.randomness: self.inputs = [random.randint(0, 2) - 1 for k in range(0, 8)] self.inputs_changed = True return frame_color
class Trainer: def __init__(self, neural_net, shape=(320, 480), frame_count=5): self.gamma = 0.99 self.punishment_for_moving = 0.1 self.neural_net = neural_net self.neural_net_old = NeuralNet(filename=self.neural_net.filename) self.width = shape[0] self.height = shape[1] self.frame_count = frame_count self.options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) self.run_metadata = tf.RunMetadata() self.writer = tf.summary.FileWriter(logdir='tensorboard_logdir', graph=K.get_session().graph) self.writer.flush() self.learning_rate = 1e-4 self.observations_img = self.build_image_processor() self.debugger = False def build_image_processor(self): observations = tf.placeholder(tf.string, shape=[None, self.frame_count + 1], name='observations') observations_img = tf.cast( tf.map_fn(lambda i: self.convert_images(i), observations, dtype=tf.uint8), tf.float32) observations_img.set_shape( [None, self.width, self.height, self.frame_count + 1]) return observations_img def decode(self, images): sess = K.get_session() return sess.run(self.observations_img, feed_dict={'observations:0': images}, options=self.options, run_metadata=self.run_metadata) def compute(self, actions, inputs, inputs_next, rewards, terminals): computed = self.evaluate_input(inputs) computed_next = self.evaluate_input(inputs_next) computed_next_old = self.evaluate_input_old(inputs_next) # computed_actions = tf.stop_gradient(tf.argmax(computed, axis=2)) actions_one_hot = tf.one_hot(actions, 3, axis=2) q_old = tf.reduce_sum(actions_one_hot * computed, axis=2) argmax_old = tf.one_hot(tf.argmax(computed_next_old, axis=2), 3, axis=2) second_term = self.gamma * \ tf.reduce_sum(computed_next * argmax_old, axis=2) # second_term = self.gamma * tf.reduce_max(computed_next, axis=2) q_new = tf.stop_gradient( rewards + tf.where(terminals, tf.zeros_like(second_term), second_term)) loss = tf.losses.huber_loss(q_new, q_old, delta=50.0) # loss = loss + 0.01 * tf.reduce_mean(tf.where(computed_actions == tf.ones_like(computed_actions), tf.zeros_like(q_new), tf.ones_like(q_new))) # loss = loss + 0.1 * tf.reduce_mean(stf.nn.relu(computed[:,:,0] - computed[:,:,1])) # loss = loss + 0.1 * tf.reduce_mean(tf.nn.relu(computed[:,:,2] - computed[:,:,1])) with tf.name_scope('train'): train_step = tf.train.AdamOptimizer( self.learning_rate).minimize(loss) tf.summary.scalar('loss', loss) tf.summary.scalar('diff', tf.reduce_mean(tf.abs(q_new - q_old))) tf.summary.scalar('maximal_reward', tf.reduce_max(q_new)) tf.summary.scalar('mean_reward', tf.reduce_mean(q_new)) tf.summary.scalar('minimal_reward', tf.reduce_min(q_new)) merged = tf.summary.merge_all() return train_step, loss, tf.abs(q_new - q_old), tf.argmax( computed, axis=2), merged def convert_images(self, inputs): return tf.transpose( tf.map_fn(lambda i: tf.image.decode_jpeg(i), inputs, dtype=tf.uint8)[:, :, :, 0], [1, 2, 0]) def train_step(self, batch): if self.debugger: sess = debug.TensorBoardDebugWrapperSession( K.get_session(), 'localhost:6004') K.set_session(sess) self.debugger = False sess = K.get_session() return sess.run(self.tf_train_step, feed_dict=self.build_feed_dict(batch), options=self.options, run_metadata=self.run_metadata) def evaluate_input(self, input): return tf.reshape(self.neural_net.model(input), [32, 8, 3]) def build_feed_dict(self, batch): return { 'rewards:0': [[ s['score'], ] * 8 for s in batch], 'actions:0': [s['action'] for s in batch], # 'observations:0': [s['observations'] for s in batch], 'terminal:0': [s['terminal'] for s in batch], 'inputs:0': [s['images'] for s in batch], 'inputs_next:0': [s['images_next'] for s in batch] } def evaluate_input_old(self, input): return tf.reshape(self.neural_net_old.model(input), [32, 8, 3])
# d = DataProvider() import tensorflow as tf import keras.backend as K from tensorflow.python.client import timeline from kicker.train import Trainer from kicker.neural_net import NeuralNet import logging logging.basicConfig( filename='train.log', level=logging.DEBUG, format='%(asctime)s %(filename)s %(lineno)d %(levelname)s %(message)s') nn = NeuralNet() t = Trainer(nn) memory = MemoryDataProvider() dataset = memory.load_as_dataset() next_item = dataset.repeat().shuffle(1000).batch(32).prefetch( 1).make_one_shot_iterator().get_next() a, i, i_n, s, ter = next_item step, loss, diff, computed, merged = t.compute(a, i, i_n, s, ter) sess = K.get_session() sess.run(tf.global_variables_initializer())
import keras import keras.backend as K import tensorflow as tf import uff from kicker.neural_net import NeuralNet output_names = ['dense_2/BiasAdd'] frozen_graph_filename = 'frozen_model.pb' nn = NeuralNet() sess = K.get_session() tf.summary.FileWriter('tensorboard_logdir', sess.graph) graph_def = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, output_names) graph_def = tf.graph_util.remove_training_nodes(graph_def) # write frozen graph to file with open(frozen_graph_filename, 'wb') as f: f.write(graph_def.SerializeToString()) f.close() # convert frozen graph to uff uff.from_tensorflow_frozen_model(frozen_graph_filename, output_names, output_filename='model.uff')
import cv2 import numpy as np import time import json import base64 from kicker.train import DataProvider, Parser from kicker.neural_net import NeuralNet from kicker.visualize import Figure # d = DataProvider(return_observations=True, filename='train/training_data_new.h5') # s = d.get_batch() # nn = NeuralNet(23, (320, 480, 5), filename='model.h5') fig = Figure(wait_for_button_press=False, show_images=True) import socket sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) def show_prediction(frames, position): observation = np.concatenate( [(f[:, :, 1]).reshape((320, 480, 1)) for f in frames], axis=2) prediction = nn.predict_single(observation).reshape(8, 3) print( np.argmax( prediction, axis=1) - np.ones(8),