def create_somenet_from_feature(self, hidden, netname, elu, lstm, initialized_as_zero=False, nolu_at_final=False, batch_normalization=None): # featvec = self.cur_featvec ''' Note flattened multi-view feature vector [B, 1, F*V] should be used ''' featvec = self.cur_mvfeatvec if lstm is True: featvec = self.get_lstm_featvec('LSTM', featvec) net = vision.ConvApplier(None, hidden, netname, elu, initialized_as_zero=initialized_as_zero, nolu_at_final=nolu_at_final, batch_normalization=batch_normalization) _, out = net.infer(featvec) ''' TODO: Check if this returns LSTM as well (probably not) ''' params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=netname) print('{} Params {}'.format(netname, params)) print('LSTM Params {}'.format(self.acquire_lstm_params())) return out, params, [net]
def get_forward_model(self, jointfw=False, output_fn=-1): if self.forward_output_tensor is not None: return self.forward_model_params, self.forward_output_tensor ''' our pipeline use [None, V, N] feature vector 3D tensor unifies per-view tensors and combined-view tensors. ''' if jointfw: ''' Joint Prediction of V views Expecting better accuracy with V^2 size of network. ''' V = int(self.cur_featvec.shape[1]) N = int(self.cur_featvec.shape[2]) input_featvec = tf.reshape(self.cur_featvec, [-1, 1, V * N]) atensor = self.action_tensor name = 'JointForwardModelNet' else: ''' Without --jointfw, get_local_action would generate action in local coord. sys. and then get_forward_model() predicts each view individually ''' atensor = self.get_local_action(self.action_tensor) input_featvec = self.cur_featvec name = 'ForwardModelNet' featnums = self.fwhidden_params if output_fn <= 0: featnums += [int(input_featvec.shape[-1])] else: featnums += [output_fn] featvec_plus_action = tf.concat([atensor, input_featvec], 2) self.forward_fc_applier = vision.ConvApplier(None, featnums, name, self.elu, nolu_at_final=True) # FIXME: ConvApplier.infer returns tuples, which is unsuitable for Optimizer params, out = self.forward_fc_applier.infer(featvec_plus_action) if jointfw: if output_fn <= 0: out = tf.reshape(out, [-1, V, N]) else: pass # Return [-1, 1, output_fn] as requested self.forward_model_params = params self.forward_output_tensor = out print('FWD Params {}'.format(params)) params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=name) print('FWD Collected Params {}'.format(params)) return params, out
def __init__(self, learning_rate, args, batch_normalization=None): self._debug_trans_only = 'tunnel_finder_trans_only' in args.debug_flags self.view_num, self.views = rlutil.get_view_cfg(args) w = h = args.res self.args = args self.batch_normalization = batch_normalization batch_size = None if args.EXPLICIT_BATCH_SIZE < 0 else args.EXPLICIT_BATCH_SIZE self.batch_size = batch_size # tf.reshape does not accept None as dimension self.reshape_batch_size = -1 if batch_size is None else batch_size common_shape = [batch_size, self.view_num, w, h] self.action_space_dimension = 6 # Magic number, 3D + Axis Angle self.pred_action_size = self.action_space_dimension * self.get_number_of_predictions( ) action_shape = self._get_action_placeholder_shape(batch_size) self.action_tensor = tf.placeholder(tf.float32, shape=action_shape, name='CActionPh') self.rgb_tensor = tf.placeholder(tf.float32, shape=common_shape + [3], name='RgbPh') self.dep_tensor = tf.placeholder(tf.float32, shape=common_shape + [1], name='DepPh') assert self.view_num == 1 or args.sharedmultiview, "must be shared multiview, or single view" assert args.ferev in [ 11, 13 ], 'Assumes --ferev 11 or 13 (implied by --visionformula )' # Let's try ResNet 18 True self.feature_extractor = vision.FeatureExtractorResNet( config.SV_RESNET18_TRUE, args.fehidden + [args.featnum], 'VisionNetRev13', args.elu, batch_normalization=batch_normalization) self._vision_params, self.featvec = self.feature_extractor.infer( self.rgb_tensor, self.dep_tensor) B, V, N = self.featvec.shape self.joint_featvec = tf.reshape(self.featvec, [-1, 1, int(V) * int(N)]) naming = 'TunnelFinderNet' self._finder_net = vision.ConvApplier( None, args.polhidden + [self.pred_action_size], naming, args.elu) self._finder_params, self.finder_pred = self._finder_net.infer( self.joint_featvec)
def get_inverse_model(self): if self.inverse_output_tensor is not None: return self.inverse_model_params, self.inverse_output_tensor input_featvec = tf.concat([self.cur_featvec, self.next_featvec], 2) print('inverse_model input {}'.format(input_featvec)) # featnums = [config.INVERSE_MODEL_HIDDEN_LAYER, int(self.action_tensor.shape[-1])] # featnums = config.INVERSE_MODEL_HIDDEN_LAYER + [int(self.action_tensor.shape[-1])] featnums = self.imhidden_params + [self.action_space_dimension] print('inverse_model featnums {}'.format(featnums)) self.inverse_fc_applier = vision.ConvApplier(None, featnums, 'InverseModelNet', self.elu) params, out = self.inverse_fc_applier.infer(input_featvec) self.inverse_model_params = params self.inverse_output_tensor = self.vote(out) return params, out
def create_somenet_from_feature(self, hidden, netname): outs = [] nets = [] paramss = [] for i in range(self.view_num): icm = self.icms[i] with tf.variable_scope(netname): vsn = 'View_{}'.format(i) featvec = icm.cur_featvec nets.append(vision.ConvApplier(None, hidden, vsn, elu)) _, out = nets[-1].infer(featvec) outs.append(tf.nn.softmax(out)) paramss.append() out = tf.add_n(outs) params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=netname) return out, params, nets
def __init__(self, learning_rate, args, batch_normalization=None): super(TunnelFinderTwin1, self).__init__(learning_rate=learning_rate, args=args, batch_normalization=batch_normalization) self.coarse_action = self.action_tensor self.coarse_rgb = self.rgb_tensor self.coarse_dep = self.dep_tensor self.fine_action = self.action_tensor self.fine_rgb = self.rgb_tensor self.fine_dep = self.dep_tensor self._coarse_net = self._finder_net self.coarse_pred = self.finder_pred naming = 'TunnelFinderNet_Fine' self._fine_net = vision.ConvApplier( None, args.polhidden + [self.action_space_dimension], naming, args.elu) self._fine_params, self.fine_pred = self._fine_net.infer( self.joint_featvec)
adists[i, 0, actions[i]] = 1.0 return adists # GT Input action_tensor = tf.placeholder(tf.float32, shape=[BSize, 1, action_space_dimension], name='ActionPh') V_tensor = tf.placeholder(tf.float32, shape=[BSize], name='VPh') # Feature Vector featvec = tf.placeholder(tf.float32, shape=[BSize, 1, FEAT], name='ActionPh') # pi and value net pi_net = vision.ConvApplier(None, [HIDDEN, HIDDEN, action_space_dimension], 'PiNet', elu=True, initialized_as_zero=False, nolu_at_final=True, batch_normalization=None) _, raw_pi = pi_net.infer(featvec) print("raw_pi {}".format(raw_pi.shape)) value_net = vision.ConvApplier(None, [HIDDEN, HIDDEN, 1], 'ValueNet', elu=True, initialized_as_zero=False, nolu_at_final=True, batch_normalization=None) _, raw_value = value_net.infer(featvec) print("raw_value {}".format(raw_value.shape)) if len(actionset) != action_space_dimension: # Selective softmax
def __init__(self, learning_rate, args, master=None, shadowid=-1): super(TabularRL, self).__init__() self.h = self.w = args.res * 2 + 1 # [-res, -res] to [res, res] self.mani = args.mani self.action_tensor = tf.placeholder(tf.float32, shape=[None, 1, 4], name='ActionPh') self.rgb_1_tensor = tf.placeholder(tf.int32, shape=[None, 2], name='Rgb1Ph') self.dep_1_tensor = tf.placeholder(tf.int32, shape=[None], name='Dep1Ph') self.rgb_2_tensor = tf.placeholder(tf.int32, shape=[None, 2], name='Rgb2Ph') self.dep_2_tensor = tf.placeholder(tf.int32, shape=[None], name='Dep2Ph') if shadowid >= 0: vprefix = 'Shadow{}_'.format(shadowid) else: vprefix = '' frgb_1 = tf.cast(self.rgb_1_tensor, tf.float32) frgb_2 = tf.cast(self.rgb_2_tensor, tf.float32) if not self.mani: self.polparams = tf.get_variable( vprefix + 'polgrid', shape=[self.w, self.h, 4], dtype=tf.float32, initializer=tf.zeros_initializer()) self.valparams = tf.get_variable( vprefix + 'valgrid', shape=[self.w, self.h], dtype=tf.float32, initializer=tf.zeros_initializer()) self.smpolparams = tf.nn.softmax(logits=self.polparams) self.indices = tf.reshape(self.rgb_1_tensor, [-1, 2]) + args.res self.polout = tf.gather_nd(self.smpolparams, indices=self.indices) print('polout shape {}'.format(self.polout.shape)) self.valout = tf.gather_nd(self.valparams, indices=self.indices) self.smpol = tf.reshape(self.policy, [-1, 1, 4]) else: febase = [64, 64] featnums = febase + [4] self.pol_applier = vision.ConvApplier(None, featnums, vprefix + 'PolMani', elu=True, nolu_at_final=True) self.polparams, self.polout = self.pol_applier.infer(frgb_1) featnums = febase + [1] self.val_applier = vision.ConvApplier(None, featnums, vprefix + 'ValMani', elu=True, nolu_at_final=True) self.valparams, self.valout = self.val_applier.infer(frgb_1) self.smpol = tf.nn.softmax(tf.reshape(self.policy, [-1, 1, 4])) rgb_list = [] for x in range(-args.res, args.res + 1): for y in range(-args.res, args.res + 1): rgb_list.append([x, y]) self.pvgrid = np.array(rgb_list, dtype=np.int32) atensor = tf.reshape(self.action_tensor, [-1, 4]) fwd_input = tf.concat([atensor, frgb_1], 1) featnums = [64, 2] self.forward_applier = vision.ConvApplier(None, featnums, vprefix + 'ForwardModelNet', elu=True, nolu_at_final=True) self.forward_params, self.forward = self.forward_applier.infer( fwd_input) # Per action curiosity print('FWD {}'.format(self.forward.shape)) print('FRGB_2 {}'.format(frgb_2.shape)) self.sqdiff = tf.squared_difference(self.forward, frgb_2) print('SQDIFF {}'.format(self.sqdiff.shape)) self.curiosity = tf.reduce_mean(self.sqdiff, axis=[1], keepdims=False) print('> Polout {} Valout {} Curiosity {}'.format( self.polout.shape, self.valout.shape, self.curiosity.shape)) self.loss = None self.debug = args.debug self.cr = args.cr self.master = master ws = self.get_weights() if master is not None: mws = self.master.get_weights() self.download_op = [ tf.assign(target, source) for target, source in zip(ws, mws) ] else: self.grads_in1 = [ tf.placeholder(tensor.dtype, shape=tensor.shape) for tensor in ws ] self.grads_in2 = [ tf.placeholder(tensor.dtype, shape=tensor.shape) for tensor in ws ] self.grads_op = [ tf.assign_add(var, in2 - in1) for var, in1, in2 in zip(ws, self.grads_in1, self.grads_in2) ]
h = r.pbufferHeight # Action taken self.a3c_batch_a_tensor = tf.placeholder(tf.float32, shape=[None, self.action_size]) self.rgb1_tensor = tf.placeholder(tf.float32, shape=[None, view_num, w, h, 3]) self.rgb2_tensor = tf.placeholder(tf.float32, shape=[None, view_num, w, h, 3]) self.dep1_tensor = tf.placeholder(tf.float32, shape=[None, view_num, w, h, 1]) self.dep2_tensor = tf.placeholder(tf.float32, shape=[None, view_num, w, h, 1]) self.icm = icm.IntrinsicCuriosityModule( self.a3c_batch_a_tensor, self.rgb1_tensor, self.dep1_tensor, self.rgb2_tensor, self.dep2_tensor, svconfdict, mvconfdict, mv_featnum) self.policy_applier = vision.ConvApplier(None, config.POLICY_HIDDEN_LAYERS + [output_number]) params,out = self.policy_applier.infer(self.icm.cur_featvec) self.policy_fc_params = params self.policy_params = self.cur_nn_params + params self.policy_out = out print('RLDRIVER POLICY PARAMS {}'.format(self.policy_out)) print('RLDRIVER POLICY OUTPUT {}'.format(self.policy_out)) self.value_applier = vision.ConvApplier(None, config.VALUE_HIDDEN_LAYERS + [1]) params,out = self.value_applier.infer(self.icm.cur_featvec) self.value_fc_params = params self.value_params = self.cur_nn_params + params self.value_out = out print('RLDRIVER VALUE PARAMS {}'.format(self.value_out)) print('RLDRIVER VALUE OUTPUT {}'.format(self.value_out))