def __init__(self, dU, obs_tensor, act_op, feat_op, var, sess, device_string, copy_param_scope=None): Policy.__init__(self) self.dU = dU self.obs_tensor = obs_tensor self.act_op = act_op self.feat_op = feat_op self.sess = sess self.device_string = device_string self.chol_pol_covar = np.diag(np.sqrt(var)) self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None if copy_param_scope: self.copy_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=copy_param_scope) self.copy_params_assign_placeholders = [ tf.placeholder(tf.float32, shape=param.get_shape()) for param in self.copy_params ] self.copy_params_assign_ops = [ tf.assign(self.copy_params[i], self.copy_params_assign_placeholders[i]) for i in range(len(self.copy_params)) ]
def __init__(self, test_net, deploy_net, var): Policy.__init__(self) self.net = test_net self.deploy_net = deploy_net self.chol_pol_covar = np.diag(np.sqrt(var)) self.scale = None # must be set from elsewhere based on observations self.bias = None
def __init__(self, agent, learning_rate, cond, noise_var = None): Policy.__init__(self) self.agent = agent self.learning_rate = learning_rate if noise_var is not None: self.sqrt_noise_var = np.sqrt(noise_var) self.cond = cond # cond, not m
def __init__(self, algorithm, history_length): Policy.__init__(self) self.dU = algorithm.dU # This should be in super class self.algorithm = algorithm self.X = np.empty([algorithm.T, algorithm.dX]) self.U = np.empty([algorithm.T, algorithm.dU]) self.history_length = history_length
def __init__(self, dU, dX, var, gcm): Policy.__init__(self) self.gcm = gcm self.dU = dU self.dX = dX self.chol_pol_covar = np.diag(np.sqrt(var)) self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None
def __init__(self, dU, obs_tensor, act_op, var, sess, device_string): Policy.__init__(self) self.dU = dU self.obs_tensor = obs_tensor self.act_op = act_op self.sess = sess self.device_string = device_string self.chol_pol_covar = np.diag(np.sqrt(var)) self.scale = None # must be set from elsewhere based on observations self.bias = None
def __init__(self, K, k, pol_covar, chol_pol_covar, inv_pol_covar): Policy.__init__(self) # Assume K has the correct shape, and make sure others match. self.T = K.shape[0] self.dU = K.shape[1] self.dX = K.shape[2] check_shape(k, (self.T, self.dU)) check_shape(pol_covar, (self.T, self.dU, self.dU)) check_shape(chol_pol_covar, (self.T, self.dU, self.dU)) check_shape(inv_pol_covar, (self.T, self.dU, self.dU)) self.K = K self.k = k self.pol_covar = pol_covar self.chol_pol_covar = chol_pol_covar self.inv_pol_covar = inv_pol_covar
def __init__(self, dU, obs_tensor, act_op, feat_op, var, sess, device_string, copy_param_scope=None): Policy.__init__(self) self.dU = dU self.obs_tensor = obs_tensor self.act_op = act_op self.feat_op = feat_op self.sess = sess self.device_string = device_string self.chol_pol_covar = np.diag(np.sqrt(var)) self.scale = None # must be set from elsewhere based on observations self.bias = None self.x_idx = None if copy_param_scope: self.copy_params = tf.get_collection(tf.GraphKeys.VARIABLES, scope=copy_param_scope) self.copy_params_assign_placeholders = [tf.placeholder(tf.float32, shape=param.get_shape()) for param in self.copy_params] self.copy_params_assign_ops = [tf.assign(self.copy_params[i], self.copy_params_assign_placeholders[i]) for i in range(len(self.copy_params))]
def __init__(self, dU): Policy.__init__(self) self.dU = dU
def __init__(self, const=0.5): Policy.__init__(self) self.const = const self.adapt_sigma = CMAAdaptSigmaCSA()
def __init__(self, T=50): Policy.__init__(self) self.teacher = 0 #np.random.choice([0,1]) self.T = T self.adapt_sigma = CMAAdaptSigmaCSA()
def __init__(self, test_net, deploy_net, var): Policy.__init__(self) self.net = test_net self.deploy_net = deploy_net self.chol_pol_covar = np.diag(np.sqrt(var))