def tf_network(dim_input=27, dim_output=7, batch_size=25, network_config=None): """ Specifying a fully-connected network in TensorFlow. Args: dim_input: Dimensionality of input. dim_output: Dimensionality of the output. batch_size: Batch size. network_config: dictionary of network structure parameters Returns: a TfMap object used to serialize, inputs, outputs, and loss. """ n_layers = 2 if 'n_layers' not in network_config else network_config[ 'n_layers'] + 1 dim_hidden = (n_layers - 1) * [ 40 ] if 'dim_hidden' not in network_config else network_config['dim_hidden'] dim_hidden.append(dim_output) nn_input, action, precision = get_input_layer(dim_input, dim_output) mlp_applied, weights_FC, biases_FC = get_mlp_layers( nn_input, n_layers, dim_hidden, network_config['batch_norm']) fc_vars = weights_FC + biases_FC loss_out = get_loss_layer(mlp_out=mlp_applied, action=action, precision=precision, batch_size=batch_size) return TfMap.init_from_lists([nn_input, action, precision], [mlp_applied], [loss_out]), fc_vars, []
def example_tf_network(dim_input=27, dim_output=7, batch_size=25, network_config=None): """ An example of how one might want to specify a network in tensorflow. Args: dim_input: Dimensionality of input. dim_output: Dimensionality of the output. batch_size: Batch size. Returns: a TfMap object used to serialize, inputs, outputs, and loss. """ n_layers = 4 dim_hidden = (n_layers - 1) * [200] dim_hidden.append(dim_output) nn_input, action, precision = get_input_layer(dim_input, dim_output) mlp_applied = get_mlp_layers(nn_input, n_layers, dim_hidden) loss_out = get_loss_layer(mlp_out=mlp_applied, action=action, precision=precision, batch_size=batch_size) return TfMap.init_from_lists([nn_input, action, precision], [mlp_applied], [loss_out])
def fully_connected_tf_network_leaky_relu(dim_input, dim_output, batch_size=25, network_config=None): dim_hidden = network_config['dim_hidden'] + [dim_output] n_layers = len(dim_hidden) nn_input, action, precision = get_input_layer() weights = [] biases = [] in_shape = dim_input for layer_step in range(0, n_layers): cur_weight = init_weights([in_shape, dim_hidden[layer_step]], name='w_' + str(layer_step)) cur_bias = init_bias([dim_hidden[layer_step]], name='b_' + str(layer_step)) in_shape = dim_hidden[layer_step] weights.append(cur_weight) biases.append(cur_bias) cur_top = nn_input for layer_step in range(0, n_layers): if layer_step != n_layers-1: # final layer has no RELU #cur_top = tf.nn.relu(tf.matmul(cur_top, weights[layer_step]) + biases[layer_step]) cur_top = tf.nn.leaky_relu(tf.matmul(cur_top, weights[layer_step]) + biases[layer_step]) else: cur_top = tf.matmul(cur_top, weights[layer_step]) + biases[layer_step] mlp_applied = cur_top loss_out = get_loss_layer(mlp_out=mlp_applied, action=action, precision=precision, batch_size=batch_size) #print(nn_input.get_shape(), action.get_shape()) return TfMap.init_from_lists([nn_input, action, precision], [mlp_applied], [loss_out])
def tf_vae_network(dim_input=27, dim_output=7, batch_size=25, network_config=None, dim_latent=7): state_input = tf.placeholder('float', [None, dim_input], name='state_input') action_input = tf.placeholder('float', [None, dim_output], name='action_input') priori_mean, priori_sigma, priori_variables = \ priori_network(state_input, dim_latent) latent_mean, latent_sigma, encoder_variables = \ encoder_network(state_input, action_input, dim_latent) decoder_mean, decoder_sigma, policy_mean, policy_sigma, decoder_variables = \ decoder_network(state_input, latent_mean, latent_sigma, priori_mean, priori_sigma, dim_output) loss_op = get_vae_loss(action_input, priori_mean, priori_sigma, latent_mean, latent_sigma, decoder_mean, decoder_sigma) all_variables = priori_variables + encoder_variables + decoder_variables return TfMap.init_from_lists([state_input, action_input, None], [policy_mean, policy_sigma], [loss_op], policy_type="vae"), all_variables, []
def tf_gmm_network(dim_input=27, dim_output=7, batch_size=25, n_comp=3, network_config=None): n_layers = 2 if 'n_layers' not in network_config else network_config[ 'n_layers'] + 1 dim_hidden = (n_layers - 1) * [ 40 ] if 'dim_hidden' not in network_config else network_config['dim_hidden'] #for each component, outputs weight(dim 1), mean(dim_output) and diag_covar (dim_output) dim_hidden.append(n_comp * (1 + dim_output * 2)) nn_input, action, precision = get_input_layer(dim_input, dim_output) mlp_applied, weights_FC, biases_FC = get_mlp_layers( nn_input, n_layers, dim_hidden) fc_vars = weights_FC + biases_FC weight, mean, pre = get_gmm_coef(mlp_applied, dim_output, n_comp) loss_out = get_gmm_loss_layer(weight=weight, mean=mean, pre=pre, \ action=action, n_comp=n_comp, dim_output=dim_output) return TfMap.init_from_lists([nn_input, action, precision], [weight, mean, pre], [loss_out], policy_type="gmm"), fc_vars, []
def first_derivative_network_swish(dim_input, dim_output, batch_size=25, network_config=None): nn_input, action, precision = get_input_layer() weights = [] biases = [] in_shape = network_config['history_len']*2 + 1 dim_hidden = [in_shape] + network_config['dim_hidden'] + [1] n_layers = len(dim_hidden) - 1 param_dim = network_config['param_dim'] weights_prev = network_config['weights_prev'] #print('weights=', weights_prev) biases_prev = network_config['biases_prev'] for layer_step in range(n_layers): if weights_prev is None: w_prev = None b_prev = None else: w_prev = weights_prev[layer_step] b_prev = biases_prev[layer_step] cur_weight = init_weights([in_shape, dim_hidden[layer_step+1]], w_prev, name='w_'+str(layer_step)) cur_bias = init_bias([dim_hidden[layer_step+1]], b_prev, name='b_'+str(layer_step)) in_shape = dim_hidden[layer_step+1] weights.append(cur_weight) biases.append(cur_bias) #print(nn_input.get_shape()) dim0 = batch_size cur_top = nn_input for layer_step in range(n_layers): top = {} #print(weights[layer_step]) #print('dim_input = ', dim_input) for i in range(dim_hidden[layer_step+1]): #print('dim0 = ', dim0) #top['slice_'+str(i)] = tf.zeros([dim0, param_dim]) top['slice_'+str(i)] = weights[layer_step][0, i]*cur_top[:, 0:param_dim] for j in range(1, dim_hidden[layer_step]): #print(i, j, param_dim*j) #with tf.Session(): #print(layer_step, i, j, weights[layer_step][j,i].get_shape()) #print(top['slice_'+str(i)].get_shape(), tf.slice(cur_top, [0, param_dim*j],[dim0, param_dim]).get_shape()) #print(cur_top.get_shape(), param_dim*j, param_dim) loc = cur_top[:, param_dim*j: param_dim*(j+1)] top['slice_'+str(i)] = tf.add(top['slice_'+str(i)], weights[layer_step][j,i]*loc) top['slice_'+str(i)] = top['slice_'+str(i)] + biases[layer_step][i] cur_top = top['slice_'+str(0)] for i in range(1, dim_hidden[layer_step+1]): #print(cur_top, top['slice_'+str(i)]) cur_top = tf.concat([cur_top, top['slice_'+str(i)]], axis = 1) if layer_step != n_layers - 1: cur_top = tf.nn.swish(cur_top) #print(dim_output, param_dim) mlp_applied = cur_top loss_out = get_loss_layer(mlp_out=mlp_applied, action=action, precision=precision, batch_size=batch_size) return TfMap.init_from_lists([nn_input, action, precision], [mlp_applied], [loss_out])
def example_tf_network(dim_input=27, dim_output=7, batch_size=25, network_config=None): """ An example of how one might want to specify a network in tensorflow. Args: dim_input: Dimensionality of input. dim_output: Dimensionality of the output. batch_size: Batch size. Returns: a TfMap object used to serialize, inputs, outputs, and loss. """ n_layers = 2 dim_hidden = (n_layers - 1) * [40] dim_hidden.append(dim_output) nn_input, action, precision = get_input_layer(dim_input, dim_output) mlp_applied = get_mlp_layers(nn_input, n_layers, dim_hidden) loss_out = get_loss_layer(mlp_out=mlp_applied, action=action, precision=precision, batch_size=batch_size) return TfMap.init_from_lists([nn_input, action, precision], [mlp_applied], [loss_out])
def tf_network(dim_input=27, dim_output=7, batch_size=25, network_config=None): """ Specifying a fully-connected network in TensorFlow. Args: dim_input: Dimensionality of input. dim_output: Dimensionality of the output. batch_size: Batch size. network_config: dictionary of network structure parameters Returns: a TfMap object used to serialize, inputs, outputs, and loss. """ n_layers = 2 if 'n_layers' not in network_config else network_config['n_layers'] + 1 dim_hidden = (n_layers - 1) * [40] if 'dim_hidden' not in network_config else network_config['dim_hidden'] dim_hidden.append(dim_output) nn_input, action, precision = get_input_layer(dim_input, dim_output) mlp_applied, weights_FC, biases_FC = get_mlp_layers(nn_input, n_layers, dim_hidden) fc_vars = weights_FC + biases_FC loss_out = get_loss_layer(mlp_out=mlp_applied, action=action, precision=precision, batch_size=batch_size) return TfMap.init_from_lists([nn_input, action, precision], [mlp_applied], [loss_out]), fc_vars, []
def multi_modal_network_fp(dim_input=27, dim_output=7, batch_size=25, network_config=None): """ An example a network in tf that has both state and image inputs, with the feature point architecture (spatial softmax + expectation). Args: dim_input: Dimensionality of input. dim_output: Dimensionality of the output. batch_size: Batch size. network_config: dictionary of network structure parameters Returns: A tfMap object that stores inputs, outputs, and scalar loss. """ n_layers = 3 layer_size = 20 dim_hidden = (n_layers - 1) * [layer_size] dim_hidden.append(dim_output) pool_size = 2 filter_size = 5 # List of indices for state (vector) data and image (tensor) data in observation. x_idx, img_idx, i = [], [], 0 for sensor in network_config['obs_include']: dim = network_config['sensor_dims'][sensor] if sensor in network_config['obs_image_data']: img_idx = img_idx + list(range(i, i + dim)) else: x_idx = x_idx + list(range(i, i + dim)) i += dim nn_input, action, precision = get_input_layer(dim_input, dim_output) state_input = nn_input[:, 0:x_idx[-1] + 1] image_input = nn_input[:, x_idx[-1] + 1:img_idx[-1] + 1] # image goes through 3 convnet layers num_filters = network_config['num_filters'] im_height = network_config['image_height'] im_width = network_config['image_width'] num_channels = network_config['image_channels'] image_input = tf.reshape(image_input, [-1, num_channels, im_width, im_height]) image_input = tf.transpose(image_input, perm=[0, 3, 2, 1]) # we pool twice, each time reducing the image size by a factor of 2. conv_out_size = int(im_width / (2.0 * pool_size) * im_height / (2.0 * pool_size) * num_filters[1]) first_dense_size = conv_out_size + len(x_idx) # Store layers weight & bias with tf.variable_scope('conv_params'): weights = { 'wc1': init_weights( [filter_size, filter_size, num_channels, num_filters[0]], name='wc1'), # 5x5 conv, 1 input, 32 outputs 'wc2': init_weights( [filter_size, filter_size, num_filters[0], num_filters[1]], name='wc2'), # 5x5 conv, 32 inputs, 64 outputs 'wc3': init_weights( [filter_size, filter_size, num_filters[1], num_filters[2]], name='wc3'), # 5x5 conv, 32 inputs, 64 outputs } biases = { 'bc1': init_bias([num_filters[0]], name='bc1'), 'bc2': init_bias([num_filters[1]], name='bc2'), 'bc3': init_bias([num_filters[2]], name='bc3'), } conv_layer_0 = conv2d(img=image_input, w=weights['wc1'], b=biases['bc1'], strides=[1, 2, 2, 1]) conv_layer_1 = conv2d(img=conv_layer_0, w=weights['wc2'], b=biases['bc2']) conv_layer_2 = conv2d(img=conv_layer_1, w=weights['wc3'], b=biases['bc3']) _, num_rows, num_cols, num_fp = conv_layer_2.get_shape() num_rows, num_cols, num_fp = [int(x) for x in [num_rows, num_cols, num_fp]] x_map = np.empty([num_rows, num_cols], np.float32) y_map = np.empty([num_rows, num_cols], np.float32) for i in range(num_rows): for j in range(num_cols): x_map[i, j] = (i - num_rows / 2.0) / num_rows y_map[i, j] = (j - num_cols / 2.0) / num_cols x_map = tf.convert_to_tensor(x_map) y_map = tf.convert_to_tensor(y_map) x_map = tf.reshape(x_map, [num_rows * num_cols]) y_map = tf.reshape(y_map, [num_rows * num_cols]) # rearrange features to be [batch_size, num_fp, num_rows, num_cols] features = tf.reshape(tf.transpose(conv_layer_2, [0, 3, 1, 2]), [-1, num_rows * num_cols]) softmax = tf.nn.softmax(features) fp_x = tf.reduce_sum(tf.mul(x_map, softmax), [1], keep_dims=True) fp_y = tf.reduce_sum(tf.mul(y_map, softmax), [1], keep_dims=True) fp = tf.reshape(tf.concat(1, [fp_x, fp_y]), [-1, num_fp * 2]) fc_input = tf.concat(concat_dim=1, values=[fp, state_input]) fc_output, weights_FC, biases_FC = get_mlp_layers(fc_input, n_layers, dim_hidden) fc_vars = weights_FC + biases_FC loss = euclidean_loss_layer(a=action, b=fc_output, precision=precision, batch_size=batch_size) nnet = TfMap.init_from_lists([nn_input, action, precision], [fc_output], [loss], fp=fp) last_conv_vars = fc_input return nnet, fc_vars, last_conv_vars
def multi_modal_network(dim_input=27, dim_output=7, batch_size=25, network_config=None): """ An example a network in tf that has both state and image inputs. Args: dim_input: Dimensionality of input. dim_output: Dimensionality of the output. batch_size: Batch size. network_config: dictionary of network structure parameters Returns: A tfMap object that stores inputs, outputs, and scalar loss. """ n_layers = 2 layer_size = 20 dim_hidden = (n_layers - 1) * [layer_size] dim_hidden.append(dim_output) pool_size = 2 filter_size = 3 # List of indices for state (vector) data and image (tensor) data in observation. x_idx, img_idx, i = [], [], 0 for sensor in network_config['obs_include']: dim = network_config['sensor_dims'][sensor] if sensor in network_config['obs_image_data']: img_idx = img_idx + list(range(i, i + dim)) else: x_idx = x_idx + list(range(i, i + dim)) i += dim nn_input, action, precision = get_input_layer(dim_input, dim_output) state_input = nn_input[:, 0:x_idx[-1] + 1] image_input = nn_input[:, x_idx[-1] + 1:img_idx[-1] + 1] # image goes through 2 convnet layers num_filters = network_config['num_filters'] im_height = network_config['image_height'] im_width = network_config['image_width'] num_channels = network_config['image_channels'] image_input = tf.reshape(image_input, [-1, im_width, im_height, num_channels]) # we pool twice, each time reducing the image size by a factor of 2. conv_out_size = int(im_width / (2.0 * pool_size) * im_height / (2.0 * pool_size) * num_filters[1]) first_dense_size = conv_out_size + len(x_idx) # Store layers weight & bias weights = { 'wc1': get_xavier_weights( [filter_size, filter_size, num_channels, num_filters[0]], (pool_size, pool_size)), # 5x5 conv, 1 input, 32 outputs 'wc2': get_xavier_weights( [filter_size, filter_size, num_filters[0], num_filters[1]], (pool_size, pool_size)), # 5x5 conv, 32 inputs, 64 outputs } biases = { 'bc1': init_bias([num_filters[0]]), 'bc2': init_bias([num_filters[1]]), } conv_layer_0 = conv2d(img=image_input, w=weights['wc1'], b=biases['bc1']) conv_layer_0 = max_pool(conv_layer_0, k=pool_size) conv_layer_1 = conv2d(img=conv_layer_0, w=weights['wc2'], b=biases['bc2']) conv_layer_1 = max_pool(conv_layer_1, k=pool_size) conv_out_flat = tf.reshape(conv_layer_1, [-1, conv_out_size]) fc_input = tf.concat(concat_dim=1, values=[conv_out_flat, state_input]) fc_output, _, _ = get_mlp_layers(fc_input, n_layers, dim_hidden) loss = euclidean_loss_layer(a=action, b=fc_output, precision=precision, batch_size=batch_size) return TfMap.init_from_lists([nn_input, action, precision], [fc_output], [loss])
def recurrent_neural_network_multilayers(dim_input, dim_output, batch_size=25, network_config=None): nn_input, action, precision = get_input_layer() weights = [] biases = [] T = network_config['history_len'] out_shape = 3 in_shape = out_shape + 2 dim_hidden = [in_shape] + network_config['dim_hidden'] + [out_shape] n_layers = len(dim_hidden) - 1 param_dim = network_config['param_dim'] weights_prev = network_config['weights_prev'] biases_prev = network_config['biases_prev'] in_shape1 = in_shape for layer_step in range(n_layers): if weights_prev is None: w_prev = None b_prev = None else: w_prev = weights_prev[layer_step] b_prev = biases_prev[layer_step] cur_weight = init_weights([in_shape1, dim_hidden[layer_step+1]], w_prev, name='w_'+str(layer_step)) cur_bias = init_bias([dim_hidden[layer_step+1]], b_prev, name='b_'+str(layer_step)) in_shape1 = dim_hidden[layer_step+1] weights.append(cur_weight) biases.append(cur_bias) weights_output = [] biases_output = [] dim_hidden_output = [out_shape] + network_config['dim_hidden_output'] + [1] n_layers_output = len(dim_hidden_output)-1 weights_prev_output = network_config['weights_prev_output'] biases_prev_output = network_config['biases_prev_output'] out_shape1 = out_shape for layer_step in range(n_layers_output): if weights_prev_output is None: w_prev = None b_prev = None else: w_prev = weights_prev_output[layer_step] b_prev = biases_prev_output[layer_step] cur_weight = init_weights([out_shape1, dim_hidden_output[layer_step+1]], w_prev, name='w_o_'+str(layer_step)) cur_bias = init_bias([dim_hidden_output[layer_step+1]], b_prev, name='b_o_'+str(layer_step)) out_shape1 = dim_hidden_output[layer_step+1] weights_output.append(cur_weight) biases_output.append(cur_bias) h = nn_input[:, 0:out_shape*param_dim]*0.0 for t in range(T): grad_t = nn_input[:, t*param_dim:(t+1)*param_dim] loc_t = nn_input[:, (T+t)*param_dim:(T+t+1)*param_dim] nn_input_t = tf.concat([grad_t, loc_t], axis=1) nn_input_t = tf.concat([nn_input_t, h], axis=1) h = fcn4rnn(nn_input_t, param_dim, weights, biases, dim_hidden, 'inner') grad_cur = nn_input[:, (2*T)*param_dim:(2*T+1)*param_dim] loc_cur = nn_input[:, (2*T+1)*param_dim:(2*T+2)*param_dim] nn_input_cur = tf.concat([grad_cur, loc_cur], axis=1) nn_input_cur = tf.concat([nn_input_cur, h], axis=1) h = fcn4rnn(nn_input_cur, param_dim, weights, biases, dim_hidden, 'inner') y = fcn4rnn(h, param_dim, weights_output, biases_output, dim_hidden_output, 'outer') mlp_applied = y loss_out = get_loss_layer(mlp_out=mlp_applied, action=action, precision=precision, batch_size=batch_size) return TfMap.init_from_lists([nn_input, action, precision], [mlp_applied], [loss_out])
def first_derivative_network(dim_input, dim_output, batch_size=25, network_config=None): nn_input, action, precision = get_input_layer() gpu_ids = network_config['gpu_ids'] solver_type = network_config['solver_type'] with tf.variable_scope("train"): weights = [] biases = [] in_shape = network_config['history_len']*2 + 1 dim_hidden = [in_shape] + network_config['dim_hidden'] + [1] n_layers = len(dim_hidden) - 1 param_dim = network_config['param_dim'] weights_prev = network_config['weights_prev'] #print('weights=', weights_prev) biases_prev = network_config['biases_prev'] for layer_step in range(n_layers): if weights_prev is None: w_prev = None b_prev = None else: w_prev = weights_prev[layer_step] b_prev = biases_prev[layer_step] cur_weight = init_weights([in_shape, dim_hidden[layer_step+1]], w_prev, name='w_'+str(layer_step)) cur_bias = init_bias([dim_hidden[layer_step+1]], b_prev, name='b_'+str(layer_step)) in_shape = dim_hidden[layer_step+1] weights.append(cur_weight) biases.append(cur_bias) #print(nn_input.get_shape()) dim0 = batch_size def my_net(nn_input, n_layers, dim_hidden, param_dim, weights, biases): cur_top = nn_input for layer_step in range(n_layers): top = {} #print(weights[layer_step]) #print('dim_input = ', dim_input) for i in range(dim_hidden[layer_step+1]): #print('dim0 = ', dim0) #top['slice_'+str(i)] = tf.zeros([dim0, param_dim]) top['slice_'+str(i)] = weights[layer_step][0, i]*cur_top[:, 0:param_dim] for j in range(1, dim_hidden[layer_step]): #print(i, j, param_dim*j) #with tf.Session(): #print(layer_step, i, j, weights[layer_step][j,i].get_shape()) #print(top['slice_'+str(i)].get_shape(), tf.slice(cur_top, [0, param_dim*j],[dim0, param_dim]).get_shape()) #print(cur_top.get_shape(), param_dim*j, param_dim) loc = cur_top[:, param_dim*j: param_dim*(j+1)] top['slice_'+str(i)] = tf.add(top['slice_'+str(i)], weights[layer_step][j,i]*loc) top['slice_'+str(i)] = top['slice_'+str(i)] + biases[layer_step][i] cur_top = top['slice_'+str(0)] for i in range(1, dim_hidden[layer_step+1]): #print(cur_top, top['slice_'+str(i)]) cur_top = tf.concat([cur_top, top['slice_'+str(i)]], axis = 1) if layer_step != n_layers - 1: cur_top = tf.nn.relu(cur_top) #print(dim_output, param_dim) mlp_applied = cur_top return mlp_applied with tf.variable_scope("train", reuse=True): mlp_applied = my_net(nn_input, n_layers, dim_hidden, param_dim, weights, biases) loss_out = get_loss_layer(mlp_out=mlp_applied, action=action, precision=precision, batch_size=batch_size) nn_input_splits = tf.split(nn_input, num_or_size_splits=len(gpu_ids), axis=0) action_splits = tf.split(action, num_or_size_splits=len(gpu_ids), axis=0) precision_splits = tf.split(precision, num_or_size_splits=len(gpu_ids), axis=0) tower_grads = [] tower_loss = [] mlp_applied_all = [] optimizer = get_optimizer(solver_type, network_config) with tf.variable_scope("train", reuse=True): for i in range(len(gpu_ids)): gpu_id = gpu_ids[i] with tf.device('/gpu:%d' % gpu_id): mlp_applied_each = my_net(nn_input_splits[i], n_layers, dim_hidden, param_dim, weights, biases) loss_out_each = get_loss_layer(mlp_out=mlp_applied_each, action=action_splits[i], precision=precision_splits[i], batch_size=int(batch_size/len(gpu_ids))) tf.summary.scalar('loss_out_gpu%d' % gpu_id, loss_out_each) grads = optimizer.compute_gradients(loss_out_each) tower_grads.append(grads) tower_loss.append(loss_out_each) mlp_applied_all.append(mlp_applied_each) avg_tower_loss = tf.reduce_mean(tower_loss, axis=0) tf.summary.scalar('avg_tower_loss', avg_tower_loss) grads_avg = average_gradients(tower_grads) summary_op = tf.summary.merge_all() solver_op = optimizer.apply_gradients(grads_avg) mlp_applied_4prob = mlp_applied_all[0] for i in range(1, len(gpu_ids)): mlp_applied_4prob = tf.concat([mlp_applied_4prob, mlp_applied_all[i]], axis=0) return TfMap.init_from_lists([nn_input, action, precision], [mlp_applied], [loss_out]), solver_op, summary_op, avg_tower_loss, mlp_applied_4prob
def multi_modal_network(dim_input=27, dim_output=7, batch_size=25, network_config=None): """ An example a network in theano that has both state and image inputs. Args: dim_input: Dimensionality of input. dim_output: Dimensionality of the output. batch_size: Batch size. network_config: dictionary of network structure parameters Returns: A tfMap object that stores inputs, outputs, and scalar loss. """ n_layers = 2 layer_size = 20 dim_hidden = (n_layers - 1)*[layer_size] dim_hidden.append(dim_output) pool_size = 2 filter_size = 3 # List of indices for state (vector) data and image (tensor) data in observation. x_idx, img_idx, i = [], [], 0 for sensor in network_config['obs_include']: dim = network_config['sensor_dims'][sensor] if sensor in network_config['obs_image_data']: img_idx = img_idx + list(range(i, i+dim)) else: x_idx = x_idx + list(range(i, i+dim)) i += dim nn_input, action, precision = get_input_layer(dim_input, dim_output) state_input = nn_input[:, 0:x_idx[-1]+1] image_input = nn_input[:, x_idx[-1]+1:img_idx[-1]+1] # image goes through 2 convnet layers num_filters = network_config['num_filters'] im_height = network_config['image_height'] im_width = network_config['image_width'] num_channels = network_config['image_channels'] image_input = tf.reshape(image_input, [-1, im_width, im_height, num_channels]) # we pool twice, each time reducing the image size by a factor of 2. conv_out_size = int(im_width/(2.0*pool_size)*im_height/(2.0*pool_size)*num_filters[1]) first_dense_size = conv_out_size + len(x_idx) # Store layers weight & bias weights = { 'wc1': get_xavier_weights([filter_size, filter_size, num_channels, num_filters[0]], (pool_size, pool_size)), # 5x5 conv, 1 input, 32 outputs 'wc2': get_xavier_weights([filter_size, filter_size, num_filters[0], num_filters[1]], (pool_size, pool_size)), # 5x5 conv, 32 inputs, 64 outputs } biases = { 'bc1': init_bias([num_filters[0]]), 'bc2': init_bias([num_filters[1]]), } conv_layer_0 = conv2d(img=image_input, w=weights['wc1'], b=biases['bc1']) conv_layer_0 = max_pool(conv_layer_0, k=pool_size) conv_layer_1 = conv2d(img=conv_layer_0, w=weights['wc2'], b=biases['bc2']) conv_layer_1 = max_pool(conv_layer_1, k=pool_size) conv_out_flat = tf.reshape(conv_layer_1, [-1, conv_out_size]) fc_input = tf.concat(concat_dim=1, values=[conv_out_flat, state_input]) fc_output = get_mlp_layers(fc_input, n_layers, dim_hidden) loss = euclidean_loss_layer(a=action, b=fc_output, precision=precision, batch_size=batch_size) return TfMap.init_from_lists([nn_input, action, precision], [fc_output], [loss])
def multi_modal_network_spatial_softmax(dim_input=27, dim_output=7, batch_size=25, network_config=None): """ An example a network in theano that has both state and image inputs. Args: dim_input: Dimensionality of input. dim_output: Dimensionality of the output. batch_size: Batch size. network_config: dictionary of network structure parameters Returns: A tfMap object that stores inputs, outputs, and scalar loss. """ n_layers = 3 layer_size = 20 dim_hidden = (n_layers - 1)*[layer_size] dim_hidden.append(dim_output) pool_size = 2 filter_size = 3 # List of indices for state (vector) data and image (tensor) data in observation. x_idx, img_idx, i = [], [], 0 for sensor in network_config['obs_include']: dim = network_config['sensor_dims'][sensor] if sensor in network_config['obs_image_data']: img_idx = img_idx + list(range(i, i+dim)) else: x_idx = x_idx + list(range(i, i+dim)) i += dim nn_input, action, precision = get_input_layer(dim_input, dim_output) state_input = nn_input[:, 0:x_idx[-1]+1] image_input = nn_input[:, x_idx[-1]+1:img_idx[-1]+1] # image goes through 2 convnet layers num_filters = network_config['num_filters'] im_height = network_config['image_height'] im_width = network_config['image_width'] num_channels = network_config['image_channels'] image_input = tf.reshape(image_input, [-1, num_channels, im_width, im_height]) image_input = tf.transpose(image_input, perm=[0,3,2,1]) # we pool twice, each time reducing the image size by a factor of 2. conv_out_size = int(im_width/(2.0*pool_size)*im_height/(2.0*pool_size)*num_filters[1]) first_dense_size = conv_out_size + len(x_idx) # Store layers weight & bias weights = { 'wc1': get_xavier_weights([filter_size, filter_size, num_channels, num_filters[0]], (pool_size, pool_size)), # 5x5 conv, 1 input, 32 outputs 'wc2': get_xavier_weights([filter_size, filter_size, num_filters[0], num_filters[1]], (pool_size, pool_size)), # 5x5 conv, 32 inputs, 64 outputs } biases = { 'bc1': init_bias([num_filters[0]]), 'bc2': init_bias([num_filters[1]]), } conv_layer_0 = conv2d(img=image_input, w=weights['wc1'], b=biases['bc1']) conv_layer_1 = conv2d(img=conv_layer_0, w=weights['wc2'], b=biases['bc2']) full_y = np.tile(np.arange(im_width), (im_height,1)) full_x = np.tile(np.arange(im_height), (im_width,1)).T full_x = tf.convert_to_tensor(np.reshape(full_x, [-1,1]), dtype=tf.float32) full_y = tf.convert_to_tensor(np.reshape(full_y, [-1,1] ), dtype=tf.float32) feature_points = [] for filter_number in range(num_filters[1]): conv_filter_chosen = conv_layer_1[:,:,:,filter_number] conv_filter_chosen = tf.reshape(conv_filter_chosen, [-1, im_width*im_height]) conv_softmax = tf.nn.softmax(conv_filter_chosen) feature_points_x = tf.matmul(conv_softmax, full_x) feature_points_y = tf.matmul(conv_softmax, full_y) feature_points.append(feature_points_x) feature_points.append(feature_points_y) full_feature_points = tf.concat(concat_dim=1, values=feature_points) fc_input = tf.concat(concat_dim=1, values=[full_feature_points, state_input]) fc_output = get_mlp_layers(fc_input, n_layers, dim_hidden) loss = euclidean_loss_layer(a=action, b=fc_output, precision=precision, batch_size=batch_size) return TfMap.init_from_lists([nn_input, action, precision], [fc_output], [loss])
def multi_modal_network_fp(dim_input=27, dim_output=7, batch_size=25, network_config=None): """ An example a network in tf that has both state and image inputs, with the feature point architecture (spatial softmax + expectation). Args: dim_input: Dimensionality of input. dim_output: Dimensionality of the output. batch_size: Batch size. network_config: dictionary of network structure parameters Returns: A tfMap object that stores inputs, outputs, and scalar loss. """ n_layers = 3 layer_size = 20 dim_hidden = (n_layers - 1)*[layer_size] dim_hidden.append(dim_output) pool_size = 2 filter_size = 5 # List of indices for state (vector) data and image (tensor) data in observation. x_idx, img_idx, i = [], [], 0 for sensor in network_config['obs_include']: dim = network_config['sensor_dims'][sensor] if sensor in network_config['obs_image_data']: img_idx = img_idx + list(range(i, i+dim)) else: x_idx = x_idx + list(range(i, i+dim)) i += dim nn_input, action, precision = get_input_layer(dim_input, dim_output) state_input = nn_input[:, 0:x_idx[-1]+1] image_input = nn_input[:, x_idx[-1]+1:img_idx[-1]+1] # image goes through 3 convnet layers num_filters = network_config['num_filters'] im_height = network_config['image_height'] im_width = network_config['image_width'] num_channels = network_config['image_channels'] image_input = tf.reshape(image_input, [-1, num_channels, im_width, im_height]) image_input = tf.transpose(image_input, perm=[0,3,2,1]) # we pool twice, each time reducing the image size by a factor of 2. conv_out_size = int(im_width/(2.0*pool_size)*im_height/(2.0*pool_size)*num_filters[1]) first_dense_size = conv_out_size + len(x_idx) # Store layers weight & bias with tf.variable_scope('conv_params'): weights = { 'wc1': init_weights([filter_size, filter_size, num_channels, num_filters[0]], name='wc1'), # 5x5 conv, 1 input, 32 outputs 'wc2': init_weights([filter_size, filter_size, num_filters[0], num_filters[1]], name='wc2'), # 5x5 conv, 32 inputs, 64 outputs 'wc3': init_weights([filter_size, filter_size, num_filters[1], num_filters[2]], name='wc3'), # 5x5 conv, 32 inputs, 64 outputs } biases = { 'bc1': init_bias([num_filters[0]], name='bc1'), 'bc2': init_bias([num_filters[1]], name='bc2'), 'bc3': init_bias([num_filters[2]], name='bc3'), } conv_layer_0 = conv2d(img=image_input, w=weights['wc1'], b=biases['bc1'], strides=[1,2,2,1]) conv_layer_1 = conv2d(img=conv_layer_0, w=weights['wc2'], b=biases['bc2']) conv_layer_2 = conv2d(img=conv_layer_1, w=weights['wc3'], b=biases['bc3']) _, num_rows, num_cols, num_fp = conv_layer_2.get_shape() num_rows, num_cols, num_fp = [int(x) for x in [num_rows, num_cols, num_fp]] x_map = np.empty([num_rows, num_cols], np.float32) y_map = np.empty([num_rows, num_cols], np.float32) for i in range(num_rows): for j in range(num_cols): x_map[i, j] = (i - num_rows / 2.0) / num_rows y_map[i, j] = (j - num_cols / 2.0) / num_cols x_map = tf.convert_to_tensor(x_map) y_map = tf.convert_to_tensor(y_map) x_map = tf.reshape(x_map, [num_rows * num_cols]) y_map = tf.reshape(y_map, [num_rows * num_cols]) # rearrange features to be [batch_size, num_fp, num_rows, num_cols] features = tf.reshape(tf.transpose(conv_layer_2, [0,3,1,2]), [-1, num_rows*num_cols]) softmax = tf.nn.softmax(features) fp_x = tf.reduce_sum(tf.mul(x_map, softmax), [1], keep_dims=True) fp_y = tf.reduce_sum(tf.mul(y_map, softmax), [1], keep_dims=True) fp = tf.reshape(tf.concat(1, [fp_x, fp_y]), [-1, num_fp*2]) fc_input = tf.concat(concat_dim=1, values=[fp, state_input]) fc_output, weights_FC, biases_FC = get_mlp_layers(fc_input, n_layers, dim_hidden) fc_vars = weights_FC + biases_FC loss = euclidean_loss_layer(a=action, b=fc_output, precision=precision, batch_size=batch_size) nnet = TfMap.init_from_lists([nn_input, action, precision], [fc_output], [loss], fp=fp) last_conv_vars = fc_input return nnet, fc_vars, last_conv_vars
def multi_modal_network_spatial_softmax(dim_input=27, dim_output=7, batch_size=25, network_config=None): """ An example a network in theano that has both state and image inputs. Args: dim_input: Dimensionality of input. dim_output: Dimensionality of the output. batch_size: Batch size. network_config: dictionary of network structure parameters Returns: A tfMap object that stores inputs, outputs, and scalar loss. """ n_layers = 3 layer_size = 20 dim_hidden = (n_layers - 1) * [layer_size] dim_hidden.append(dim_output) pool_size = 2 filter_size = 3 # List of indices for state (vector) data and image (tensor) data in observation. x_idx, img_idx, i = [], [], 0 for sensor in network_config['obs_include']: dim = network_config['sensor_dims'][sensor] if sensor in network_config['obs_image_data']: img_idx = img_idx + list(range(i, i + dim)) else: x_idx = x_idx + list(range(i, i + dim)) i += dim nn_input, action, precision = get_input_layer(dim_input, dim_output) state_input = nn_input[:, 0:x_idx[-1] + 1] image_input = nn_input[:, x_idx[-1] + 1:img_idx[-1] + 1] # image goes through 2 convnet layers num_filters = network_config['num_filters'] im_height = network_config['image_height'] im_width = network_config['image_width'] num_channels = network_config['image_channels'] image_input = tf.reshape(image_input, [-1, num_channels, im_width, im_height]) image_input = tf.transpose(image_input, perm=[0, 3, 2, 1]) # we pool twice, each time reducing the image size by a factor of 2. conv_out_size = int(im_width / (2.0 * pool_size) * im_height / (2.0 * pool_size) * num_filters[1]) first_dense_size = conv_out_size + len(x_idx) # Store layers weight & bias weights = { 'wc1': get_xavier_weights( [filter_size, filter_size, num_channels, num_filters[0]], (pool_size, pool_size)), # 5x5 conv, 1 input, 32 outputs 'wc2': get_xavier_weights( [filter_size, filter_size, num_filters[0], num_filters[1]], (pool_size, pool_size)), # 5x5 conv, 32 inputs, 64 outputs } biases = { 'bc1': init_bias([num_filters[0]]), 'bc2': init_bias([num_filters[1]]), } conv_layer_0 = conv2d(img=image_input, w=weights['wc1'], b=biases['bc1']) conv_layer_1 = conv2d(img=conv_layer_0, w=weights['wc2'], b=biases['bc2']) full_y = np.tile(np.arange(im_width), (im_height, 1)) full_x = np.tile(np.arange(im_height), (im_width, 1)).T full_x = tf.convert_to_tensor(np.reshape(full_x, [-1, 1]), dtype=tf.float32) full_y = tf.convert_to_tensor(np.reshape(full_y, [-1, 1]), dtype=tf.float32) feature_points = [] for filter_number in range(num_filters[1]): conv_filter_chosen = conv_layer_1[:, :, :, filter_number] conv_filter_chosen = tf.reshape(conv_filter_chosen, [-1, im_width * im_height]) conv_softmax = tf.nn.softmax(conv_filter_chosen) feature_points_x = tf.matmul(conv_softmax, full_x) feature_points_y = tf.matmul(conv_softmax, full_y) feature_points.append(feature_points_x) feature_points.append(feature_points_y) full_feature_points = tf.concat(concat_dim=1, values=feature_points) fc_input = tf.concat(concat_dim=1, values=[full_feature_points, state_input]) fc_output = get_mlp_layers(fc_input, n_layers, dim_hidden) loss = euclidean_loss_layer(a=action, b=fc_output, precision=precision, batch_size=batch_size) return TfMap.init_from_lists([nn_input, action, precision], [fc_output], [loss])