def _build_layers_v2(self, input_dict, num_outputs, options): inputs = input_dict["obs"] filters = options.get("conv_filters") if not filters: filters = _get_filter_config(inputs.shape.as_list()[1:]) activation = get_activation_fn(options.get("conv_activation")) with tf.name_scope("vision_net"): for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): inputs = tf.layers.conv2d(inputs, out_size, kernel, stride, activation=activation, padding="same", name="conv{}".format(i)) out_size, kernel, stride = filters[-1] fc1 = tf.layers.conv2d(inputs, out_size, kernel, stride, activation=activation, padding="valid", name="fc1") fc2 = tf.layers.conv2d(fc1, num_outputs, [1, 1], activation=None, padding="same", name="fc2") return flatten(fc2), flatten(fc1)
def _build_layers_v2(self, input_dict, num_outputs, options): inputs = input_dict["obs"] filters = options.get("conv_filters") if not filters: filters = get_filter_config(inputs) activation = get_activation_fn(options.get("conv_activation")) with tf.name_scope("vision_net"): for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): inputs = slim.conv2d( inputs, out_size, kernel, stride, activation_fn=activation, scope="conv{}".format(i)) out_size, kernel, stride = filters[-1] fc1 = slim.conv2d( inputs, out_size, kernel, stride, activation_fn=activation, padding="VALID", scope="fc1") fc2 = slim.conv2d( fc1, num_outputs, [1, 1], activation_fn=None, normalizer_fn=None, scope="fc2") return flatten(fc2), flatten(fc1)
def _build_layers_v2(self, input_dict, num_outputs, options): inputs = input_dict["obs"] filters = options.get("conv_filters") if not filters: filters = _get_filter_config(inputs.shape.as_list()[1:]) activation = get_activation_fn(options.get("conv_activation")) with tf.name_scope("vision_net"): for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): inputs = slim.conv2d( inputs, out_size, kernel, stride, activation_fn=activation, scope="conv{}".format(i)) out_size, kernel, stride = filters[-1] fc1 = slim.conv2d( inputs, out_size, kernel, stride, activation_fn=activation, padding="VALID", scope="fc1") fc2 = slim.conv2d( fc1, num_outputs, [1, 1], activation_fn=None, normalizer_fn=None, scope="fc2") return flatten(fc2), flatten(fc1)
def _build_layers_v2(self, input_dict, num_outputs, options): # print(input_dict) # exit(222) hparams = copy.copy(options["custom_options"]["hparams"]) #targets = tf.placeholder( # tf.float32, [None, 11, 11, 1]) targets = input_dict["prev_actions"] inputs = input_dict["obs"] # if not (tf.get_variable_scope().reuse or # hparams.mode == tf.estimator.ModeKeys.PREDICT): # tf.summary.image("inputs", inputs, max_outputs=1) # tf.summary.image("targets", targets, max_outputs=1) with tf.name_scope('enc_prep'): encoder_input = cia.prepare_encoder(inputs, hparams) with tf.name_scope('enc_layers'): encoder_output = cia.transformer_encoder_layers( encoder_input, hparams.num_encoder_layers, hparams, attention_type=hparams.enc_attention_type, name="encoder") with tf.name_scope('dec_prep'): decoder_input, rows, cols = cia.prepare_decoder( targets, hparams) with tf.name_scope('dec_layers'): decoder_output = cia.transformer_decoder_layers( decoder_input, encoder_output, hparams.num_decoder_layers, hparams, attention_type=hparams.dec_attention_type, name="decoder") #with tf.name_scope('dec_out'): # output = cia.create_output(decoder_output, rows, cols, targets, hparams) #print(output, encoder_output) out_size, kernel, stride = [32, [3, 3], 2] activation = get_activation_fn(options.get("conv_activation")) fc1 = slim.conv2d( decoder_output, out_size, kernel, stride, activation_fn=activation, padding="VALID", scope="fc1") fc2 = slim.conv2d( fc1, num_outputs, [1, 1], activation_fn=None, normalizer_fn=None, scope="fc2") #print(fc1, fc2) #print(flatten(fc1), flatten(fc2)) #exit(123) return flatten(fc2), flatten(fc1)
def _build_layers(self, inputs, num_outputs, options): if options.get('custom_options', {}).get('add_coordinates'): with_r = False if options.get('custom_options', {}).get('add_coords_with_r'): with_r = True addcoords = AddCoords(x_dim=int(np.shape(inputs)[1]), y_dim=int(np.shape(inputs)[1]), with_r=with_r) inputs = addcoords(inputs) print("visionnet: Added coordinate filters tensor size is now {}". format(np.shape(inputs))) filters = options.get("conv_filters") if not filters: filters = get_filter_config(options) activation = get_activation_fn(options.get("conv_activation", "relu")) with tf.name_scope("vision_net"): for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): inputs = slim.conv2d(inputs, out_size, kernel, stride, activation_fn=activation, scope="conv{}".format(i)) out_size, kernel, stride = filters[-1] fc1 = slim.conv2d(inputs, out_size, kernel, stride, activation_fn=activation, padding="VALID", scope="fc1") if tf.__version__ == '1.4.0': fc2 = slim.conv2d(fc1, num_outputs, 1, activation_fn=None, normalizer_fn=None, scope="fc2") else: fc2 = slim.conv2d(fc1, num_outputs, [1, 1], activation_fn=None, normalizer_fn=None, scope="fc2") return flatten(fc2), flatten(fc1)
def _build_layers(self, inputs, num_outputs, options): with tf.name_scope("KhanElibolModel"): last_layer = layers.conv2d( inputs, 16, (4, 4), activation=tf.nn.relu) last_layer = layers.conv2d( last_layer, 32, (2, 2), activation=tf.nn.relu) last_layer = flatten(last_layer) last_layer = layers.dense( last_layer, 256, kernel_initializer=normc_initializer(0.01), activation = tf.nn.relu) output = layers.dense( last_layer, num_outputs, kernel_initializer=normc_initializer(0.01), activation = None) return output, last_layer
def _build_layers(self, inputs, num_outputs, options): dim = options["dim"] if dim == 84: filters = [ [16, [8, 8], 4], [32, [4, 4], 2], [256, [11, 11], 1], ] elif dim == 42: filters = [ [16, [4, 4], 2], [32, [4, 4], 2], [256, [11, 11], 1], ] elif dim == 21: filters = [ [16, [2, 2], 1], [32, [4, 4], 2], [256, [11, 11], 1], ] activation = tf.nn.relu with tf.name_scope("vision_net"): for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): inputs = slim.conv2d(inputs, out_size, kernel, stride, activation_fn=activation, scope="conv{}".format(i)) out_size, kernel, stride = filters[-1] fc1 = slim.conv2d(inputs, out_size, kernel, stride, activation_fn=activation, padding="VALID", scope="fc1") fc2 = slim.conv2d(fc1, num_outputs, [1, 1], activation_fn=None, normalizer_fn=None, scope="fc2") return flatten(fc2), flatten(fc1)
def _build_layers(self, inputs, num_outputs, options): hiddens = options.get("fcnet_hiddens", [256, 256]) activation = get_activation_fn(options.get("fcnet_activation", "relu")) with tf.name_scope("fc_net"): last_layer = flatten(inputs) for size in hiddens: last_layer = layers.dense( last_layer, size, kernel_initializer=normc_initializer(1.0), activation=activation) output = layers.dense( last_layer, num_outputs, kernel_initializer=normc_initializer(1.0), activation=None) return output, last_layer
def _build_layers_v2(self, input_dict, num_outputs, options): inputs = input_dict["obs"] smoothed_rews = None if isinstance(inputs, list): smoothed_rews = inputs[1] inputs = inputs[0] hiddens = [32, 32] with tf.name_scope("custom_net"): inputs = slim.conv2d(inputs, 6, [3, 3], 1, activation_fn=tf.nn.relu, scope="conv") last_layer = flatten(inputs) i = 1 for size in hiddens: label = "fc{}".format(i) last_layer = slim.fully_connected( last_layer, size, weights_initializer=normc_initializer(1.0), activation_fn=tf.nn.relu, scope=label) i += 1 output = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope="fc_out") if smoothed_rews is not None: output = tf.concat([output, smoothed_rews], axis=-1) return output, last_layer
def _init(self, inputs, num_outputs, options): use_tf100_api = (distutils.version.LooseVersion(tf.VERSION) >= distutils.version.LooseVersion("1.0.0")) self.x = x = inputs for i in range(4): x = tf.nn.elu(conv2d(x, 32, "l{}".format(i + 1), [3, 3], [2, 2])) # Introduce a "fake" batch dimension of 1 after flatten so that we can # do LSTM over the time dim. x = tf.expand_dims(flatten(x), [0]) size = 256 if use_tf100_api: lstm = rnn.BasicLSTMCell(size, state_is_tuple=True) else: lstm = rnn.rnn_cell.BasicLSTMCell(size, state_is_tuple=True) step_size = tf.shape(self.x)[:1] c_init = np.zeros((1, lstm.state_size.c), np.float32) h_init = np.zeros((1, lstm.state_size.h), np.float32) self.state_init = [c_init, h_init] c_in = tf.placeholder(tf.float32, [1, lstm.state_size.c]) h_in = tf.placeholder(tf.float32, [1, lstm.state_size.h]) self.state_in = [c_in, h_in] if use_tf100_api: state_in = rnn.LSTMStateTuple(c_in, h_in) else: state_in = rnn.rnn_cell.LSTMStateTuple(c_in, h_in) lstm_out, lstm_state = tf.nn.dynamic_rnn(lstm, x, initial_state=state_in, sequence_length=step_size, time_major=False) lstm_c, lstm_h = lstm_state x = tf.reshape(lstm_out, [-1, size]) logits = linear(x, num_outputs, "action", normc_initializer(0.01)) self.state_out = [lstm_c[:1, :], lstm_h[:1, :]] return logits, x
def _build_layers_v2(self, input_dict, num_outputs, options): inputs = input_dict["obs"] filters = options.get("conv_filters") if not filters: filters = _get_filter_config(inputs.shape.as_list()[1:]) activation = get_activation_fn(options.get("conv_activation")) inputs = slim.conv2d(inputs, 16, (3, 3), 1, activation_fn=activation, scope="conv_trans_in") tf.layers.max_pooling2d( inputs, (2, 2), strides=1, padding='same', # data_format='channels_last', name="pooling") """ Begin Transformer""" hparams = image_transformer_2d.img2img_transformer2d_tiny() hparams.data_dir = "" hparams.img_len = IMAGE hparams.num_channels = 16 hparams.hidden_size = 8 p_hparams = Img2imgCeleba().get_hparams(hparams) p_hparams.modality = { "inputs": modalities.ModalityType.IMAGE, "targets": modalities.ModalityType.IMAGE, } p_hparams.vocab_size = { "inputs": IMAGE, "targets": IMAGE, } features = { "inputs": inputs, #"targets": target, #"target_space_id": tf.constant(1, dtype=tf.int32), } #model = image_transformer_2d.Img2imgTransformer(hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) model = ImgEncTransformer(hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) trans_logits, trans_losses = model(features) print("trans_logits", trans_logits) print("inputs", inputs) """ End Transformer""" #inputs = trans_logits ## TAKE CARE! Normalization?! inputs = tf.contrib.layers.batch_norm( trans_logits, data_format= 'NHWC', # Matching the "cnn" tensor which has shape (?, 480, 640, 128). center=True, scale=True, #is_training=training, scope='cnn-batch_norm') with tf.name_scope("vision_net"): for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): inputs = slim.conv2d(inputs, out_size, kernel, stride, activation_fn=activation, scope="conv{}".format(i)) print(i, inputs) out_size, kernel, stride = filters[-1] fc1 = slim.conv2d(inputs, out_size, kernel, stride, activation_fn=activation, padding="VALID", scope="fc1") fc2 = slim.conv2d(fc1, num_outputs, [1, 1], activation_fn=None, normalizer_fn=None, scope="fc2") print(fc1, fc2) print(flatten(fc1), flatten(fc2)) # exit(123) return flatten(fc2), flatten(fc1)
def _build_layers_v2(self, input_dict, num_outputs, options): """Define the layers of a custom model. Arguments: input_dict (dict): Dictionary of input tensors, including "obs", "prev_action", "prev_reward", "is_training". num_outputs (int): Output tensor must be of size [BATCH_SIZE, num_outputs]. options (dict): Model options. Returns: (outputs, feature_layer): Tensors of size [BATCH_SIZE, num_outputs] and [BATCH_SIZE, desired_feature_size]. When using dict or tuple observation spaces, you can access the nested sub-observation batches here as well: Examples: >>> print(input_dict) {'prev_actions': <tf.Tensor shape=(?,) dtype=int64>, 'prev_rewards': <tf.Tensor shape=(?,) dtype=float32>, 'is_training': <tf.Tensor shape=(), dtype=bool>, 'obs': OrderedDict([ ('sensors', OrderedDict([ ('front_cam', [ <tf.Tensor shape=(?, 10, 10, 3) dtype=float32>, <tf.Tensor shape=(?, 10, 10, 3) dtype=float32>]), ('position', <tf.Tensor shape=(?, 3) dtype=float32>), ('velocity', <tf.Tensor shape=(?, 3) dtype=float32>)]))])} """ obs = input_dict["obs"] # Dense connections processing sensor data with tf.name_scope("sensors"): dense = tf.concat([ obs['robot_theta'], obs['robot_velocity'], obs['target'], obs['ckpts'], ], axis=1, name='concat') # Convolutional layers processing maps with tf.name_scope("conv_net"): conv1 = slim.conv2d(obs['maps'], 16, kernel_size=(3, 3), stride=2, activation_fn=tf.nn.relu, padding="SAME", scope="conv1") conv2 = slim.conv2d(conv1, 32, kernel_size=(3, 3), stride=2, activation_fn=tf.nn.relu, scope="conv2") conv2 = slim.max_pool2d(conv2, [2, 2], scope='pool2') conv3 = slim.conv2d(conv2, 32, kernel_size=(3, 3), stride=2, activation_fn=tf.nn.relu, scope="conv3") conv4 = slim.conv2d(conv3, 32, kernel_size=(3, 3), stride=1, activation_fn=tf.nn.relu, scope="conv4") conv_flat = flatten(conv4) # Combining with dense layers with tf.name_scope("combined"): combined = tf.concat([dense, conv_flat], axis=1) hidden = slim.fully_connected( combined, 128, weights_initializer=normc_initializer(1.0), activation_fn=tf.nn.relu) last_layer = slim.fully_connected( hidden, 64, weights_initializer=normc_initializer(1.0), activation_fn=tf.nn.relu) output = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None) return output, last_layer
def _build_layers_v2(self, input_dict, num_outputs, options): inputs = input_dict["obs"] # print("TYPE-IS") # print(type(inputs)) # print(inputs) # # print(inputs.shape) # import sys # sys.stdout.flush() smoothed_rews = None if isinstance(inputs, list): smoothed_rews = inputs[1] inputs = inputs[0] # inputs = input_dict["obs"][0] # print(inputs) # print(inputs.shape) # sys.stdout.flush() hiddens = [64, 64] with tf.name_scope("custom_net"): inputs = slim.conv2d(inputs, 16, [3, 3], 1, activation_fn=tf.nn.relu, scope="conv") last_layer = flatten(inputs) # last_layer = tf.concat([last_layer, smoothed_rews], axis=-1) i = 1 for size in hiddens: label = "fc{}".format(i) last_layer = slim.fully_connected( last_layer, size, weights_initializer=normc_initializer(1.0), activation_fn=tf.nn.relu, scope=label) i += 1 output = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope="fc_out") # print(output) # print(output.shape) # sys.stdout.flush() if smoothed_rews is not None: output = tf.concat([output, smoothed_rews], axis=-1) # print(output) # print(output.shape) # print("NEW OUTPUT") # print(output) # print(output.shape) # # print(last_layer) # sys.stdout.flush() return output, last_layer
def _build_layers_v2(self, input_dict, num_outputs, options): if isinstance(input_dict['obs'], dict): # unpacking Doom observation dict, and obs = input_dict['obs']['obs'] else: obs = input_dict['obs'] obs = tf_normalize(obs, self.obs_space, low=0, high=255) if isinstance(input_dict['obs'], dict) and 'measurements' in input_dict['obs']: # health, ammo, etc. measurements = input_dict['obs']['measurements'] else: measurements = None filters = options.get('conv_filters') if not filters: filters = _get_filter_config(obs.shape.as_list()[1:]) activation = get_activation_fn(options.get('conv_activation')) fcnet_activation = get_activation_fn(options.get('fcnet_activation')) with tf.name_scope('vision_net'): for i, (out_size, kernel, stride) in enumerate(filters, 1): obs = tf.layers.conv2d( obs, out_size, kernel, stride, activation=activation, padding='same', name='conv{}'.format(i), ) vis_input_flat = flatten(obs) if measurements is not None: measurements_fc = tf.layers.dense(measurements, 128, activation=fcnet_activation, name=f'm_fc1') measurements_fc = tf.layers.dense(measurements_fc, 128, activation=fcnet_activation, name=f'm_fc2') all_input = tf.concat([vis_input_flat, measurements_fc], axis=1) else: all_input = vis_input_flat fc_hiddens = [512] for i, fc_hidden in enumerate(fc_hiddens, 1): hidden = tf.layers.dense(all_input, fc_hidden, activation=fcnet_activation, name=f'fc{i}') # this will be used later for value function last_hidden = hidden fc_final = tf.layers.dense(last_hidden, num_outputs, activation=None, name=f'fc_final') return fc_final, last_hidden