def build_tree(self): disable_eager_execution() self.tf_X = placeholder(tf.float32, [None, self.params.n_features]) self.tf_y = placeholder(tf.float32, [None, self.params.n_classes]) leafs = list() self.root = Node(id='0', depth=0, pathprob=constant(1.0, shape=(1, )), tree=self) leafs.append(self.root) for node in leafs: self.n_nodes += 1 node.build(x=self.tf_X, tree=self) self.loss += node.get_loss(y=self.tf_y, tree=self) self.add_node() self.add_leaf(node) if node.isLeaf: # self.n_leafs+=1 self.output.append(node.prob) self.leafs_distribution.append(node.pathprob) else: leafs.append(node.leftChild) leafs.append(node.rightChild) self.output = tf.concat(self.output, axis=1) self.leafs_distribution = tf.concat(self.leafs_distribution, axis=1) print('Tree has {} leafs and {} nodes'.format(self.n_leafs, self.n_nodes))
def _setup_actor_critic_loss(self, actor, critic, num_actions): actions_one_hot = tf.placeholder(tf.float32, [None, num_actions]) action_probability = tf.reduce_sum(actor * actions_one_hot, axis=1) log_prob = tf.log(tf.maximum(action_probability, self._log_noise)) advantage = self._R - tf.stop_gradient(critic) entropy = tf.reduce_sum(tf.log(tf.maximum(actor, self._log_noise)) * actor, axis=1) actor_loss = -(tf.reduce_sum((log_prob * advantage), axis=0) + tf.reduce_sum((-1 * self._entropy_beta * entropy), axis=0)) critic_loss = tf.reduce_sum(tf.square(self._R - critic), axis=0) loss = 0.5 * critic_loss + actor_loss return loss, actions_one_hot
def __init__(self, environment_actions, observation_space, conv2d_layers, mlp_layers, learning_rate=0.0003, rms_decay=0.99, rms_momentum=0.0, rms_epsilon=0.1, log_noise=1e-6, entropy_beta=0.01, device="gpu:0"): self._log_noise = log_noise self._entropy_beta = entropy_beta self._device = device self._lock = RLock() self._actor_critics = dict() self._graph = tf.Graph() with self._graph.as_default(): with tf.device(device): self._optimizer = tf.train.RMSPropOptimizer(learning_rate, rms_decay, rms_momentum, rms_epsilon) self._setup_input_network_head(observation_space, conv2d_layers, mlp_layers) self._environment_names = list(environment_actions.keys()) self._num_actions = list(environment_actions.values()) self._observation_space = observation_space self._R = tf.placeholder(tf.float32, [None], name='R') [self._setup_actor_critic_tail(environment, num_actions) for environment, num_actions in environment_actions.items()] self._tensorflow_initialization()
# coding=utf-8 from BBDATA import * import tensorflow.python as tf from cnn_utils import save_model import matplotlib.pyplot as plt train_times = 50000 base_path = "/Users/coorchice/Desktop/ML/model/ml/BreadBasket/" save_path = base_path + str(train_times) + "/" BBDATA = read_datas('data/') x_data = tf.placeholder(tf.float32, [None, 135]) y_data = tf.placeholder(tf.float32, [None]) W = tf.Variable(tf.truncated_normal([135, 1], stddev=0.1)) b = tf.Variable(tf.constant(0.1, shape=[1])) y = tf.nn.relu(tf.matmul(x_data, W) + b) # 按照交叉熵公式计算交叉熵 with tf.name_scope('loss'): # cross_entropy = -tf.reduce_sum(y_data * tf.log(y)) cross_entropy = tf.reduce_mean((tf.square((y - y_data)))) tf.scalar_summary('loss', cross_entropy) # init_lr = 0.00001 lr = tf.Variable(0.00005, trainable=False) # global_step = tf.Variable(0., trainable=False) # lr = tf.train.exponential_decay(init_lr, global_step=global_step, decay_steps=10000, decay_rate=0.5, staircase=True) # 使用梯度下降法不断的调整变量,寻求最小的交叉熵
def input(shape, name): return tf.placeholder(tf.float32, shape, name)
def init_inputs(input_size): inputs = tf.placeholder(tf.float32, shape=(None, input_size), name='inputs') return inputs
def init_labels(output_size): labels = tf.placeholder(tf.int32, shape=(None, output_size), name='labels') return labels