def validate_probtype(probtype, pdparam):
    N = 100000
    # Check to see if mean negative log likelihood == differential entropy
    Mval = np.repeat(pdparam[None, :], N, axis=0)
    M = probtype.param_placeholder([N])
    X = probtype.sample_placeholder([N])
    pd = probtype.pdclass()(M)
    calcloglik = U.function([X, M], pd.logp(X))
    calcent = U.function([M], pd.entropy())
    Xval = U.eval(pd.sample(), feed_dict={M: Mval})
    logliks = calcloglik(Xval, Mval)
    entval_ll = -logliks.mean()  #pylint: disable=E1101
    entval_ll_stderr = logliks.std() / np.sqrt(N)  #pylint: disable=E1101
    entval = calcent(Mval).mean()  #pylint: disable=E1101
    assert np.abs(entval - entval_ll) < 3 * entval_ll_stderr  # within 3 sigmas

    # Check to see if kldiv[p,q] = - ent[p] - E_p[log q]
    M2 = probtype.param_placeholder([N])
    pd2 = probtype.pdclass()(M2)
    q = pdparam + np.random.randn(pdparam.size) * 0.1
    Mval2 = np.repeat(q[None, :], N, axis=0)
    calckl = U.function([M, M2], pd.kl(pd2))
    klval = calckl(Mval, Mval2).mean()  #pylint: disable=E1101
    logliks = calcloglik(Xval, Mval2)
    klval_ll = -entval - logliks.mean()  #pylint: disable=E1101
    klval_ll_stderr = logliks.std() / np.sqrt(N)  #pylint: disable=E1101
    assert np.abs(klval - klval_ll) < 3 * klval_ll_stderr  # within 3 sigmas
    def __init__(self, epsilon=1e-2, shape=()):

        self._sum = tf.get_variable(
            dtype=tf.float64,
            shape=shape,
            initializer=tf.constant_initializer(0.0),
            name="runningsum", trainable=False)
        self._sumsq = tf.get_variable(
            dtype=tf.float64,
            shape=shape,
            initializer=tf.constant_initializer(epsilon),
            name="runningsumsq", trainable=False)
        self._count = tf.get_variable(
            dtype=tf.float64,
            shape=(),
            initializer=tf.constant_initializer(epsilon),
            name="count", trainable=False)
        self.shape = shape

        self.mean = tf.to_float(self._sum / self._count)
        self.std = tf.sqrt( tf.maximum( tf.to_float(self._sumsq / self._count) - tf.square(self.mean) , 1e-2 ))

        newsum = tf.placeholder(shape=self.shape, dtype=tf.float64, name='sum')
        newsumsq = tf.placeholder(shape=self.shape, dtype=tf.float64, name='var')
        newcount = tf.placeholder(shape=[], dtype=tf.float64, name='count')
        self.incfiltparams = U.function([newsum, newsumsq, newcount], [],
            updates=[tf.assign_add(self._sum, newsum),
                     tf.assign_add(self._sumsq, newsumsq),
                     tf.assign_add(self._count, newcount)])
    def __init__(self, *args, **kwargs):
        self.args, self.kwargs = args, kwargs
        self.scope = self._initialize(*args, **kwargs)
        self.all_variables = tf.get_collection(tf.GraphKeys.VARIABLES, self.scope.name)

        self.trainable_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope.name)
        self.num_params = sum(int(np.prod(v.get_shape().as_list())) for v in self.trainable_variables)
        self._setfromflat = U.SetFromFlat(self.trainable_variables)
        self._getflat = U.GetFlat(self.trainable_variables)

        logger.info('Trainable variables ({} parameters)'.format(self.num_params))
        for v in self.trainable_variables:
            shp = v.get_shape().as_list()
            logger.info('- {} shape:{} size:{}'.format(v.name, shp, np.prod(shp)))
        logger.info('All variables')
        for v in self.all_variables:
            shp = v.get_shape().as_list()
            logger.info('- {} shape:{} size:{}'.format(v.name, shp, np.prod(shp)))

        placeholders = [tf.placeholder(v.value().dtype, v.get_shape().as_list()) for v in self.all_variables]
        self.set_all_vars = U.function(
            inputs=placeholders,
            outputs=[],
            updates=[tf.group(*[v.assign(p) for v, p in zip(self.all_variables, placeholders)])]
        )
Esempio n. 4
0
    def _init(self,
              ob_space,
              ac_space,
              hid_size,
              num_hid_layers,
              gaussian_fixed_var=True):
        assert isinstance(ob_space, gym.spaces.Box)

        self.pdtype = pdtype = make_pdtype(ac_space)
        sequence_length = None

        ob = U.get_placeholder(name="ob",
                               dtype=tf.float32,
                               shape=[sequence_length] + list(ob_space.shape))
        #obz = ob

        #with tf.variable_scope("obfilter"):
        #    self.ob_rms = RunningMeanStd(shape=ob_space.shape)

        #obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std, -5.0, 5.0)
        last_out = ob
        for i in range(num_hid_layers):
            last_out = tf.nn.tanh(
                U.dense(last_out,
                        hid_size,
                        "vffc%i" % (i + 1),
                        weight_init=U.normc_initializer(1.0)))
        self.vpred = U.dense(last_out,
                             1,
                             "vffinal",
                             weight_init=U.normc_initializer(1.0))[:, 0]

        last_out = ob
        for i in range(num_hid_layers):
            last_out = tf.nn.tanh(
                U.dense(last_out,
                        hid_size,
                        "polfc%i" % (i + 1),
                        weight_init=U.normc_initializer(1.0)))
        if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box):
            mean = U.dense(last_out,
                           pdtype.param_shape()[0] // 2, "polfinal",
                           U.normc_initializer(0.01))
            logstd = tf.get_variable(name="logstd",
                                     shape=[1, pdtype.param_shape()[0] // 2],
                                     initializer=tf.zeros_initializer)
            pdparam = U.concatenate([mean, mean * 0.0 + logstd], axis=1)
        else:
            pdparam = U.dense(last_out,
                              pdtype.param_shape()[0], "polfinal",
                              U.normc_initializer(0.01))

        self.pd = pdtype.pdfromflat(pdparam)

        self.state_in = []
        self.state_out = []

        stochastic = tf.placeholder(dtype=tf.bool, shape=())
        ac = U.switch(stochastic, self.pd.sample(), self.pd.mode())
        self._act = U.function([stochastic, ob], [ac, self.vpred])
Esempio n. 5
0
File: I3.py Progetto: hcch0912/I3
def make_update_exp(vals, target_vals):
    polyak = 1.0 - 1e-2
    expression = []
    for var, var_target in zip(sorted(vals, key=lambda v: v.name), sorted(target_vals, key=lambda v: v.name)):
        expression.append(var_target.assign(polyak * var_target + (1.0-polyak) * var))
    expression = tf.group(*expression)
    return U.function([], [], updates=[expression])
Esempio n. 6
0
	def train(self, policy, S, A, epochs, batch_size):
		ac = tf.placeholder(name='expected_actions', dtype=tf.float32, shape=(None,A.shape[1],A.shape[2]))
		ob, actor = policy
		actor = tf.reshape(actor, shape=np.array([-1, A.shape[1], A.shape[2]]))
		error = tf.reduce_mean(0.5 * tf.square(actor - ac))
		opt = tf.train.AdamOptimizer(learning_rate=3e-4).minimize(error)

		sess = tf.get_default_session()
		sess.run(tf.global_variables_initializer())


		number_of_batches = S.shape[0]//batch_size
		sample_index = np.arange(S.shape[0])
		for i in range(epochs):
			np.random.shuffle(sample_index)
			pbar = tqdm(range(number_of_batches))
			for k in pbar:
				batch_index = sample_index[batch_size*k:batch_size*(k+1)]
				s_batch = S[batch_index,:]
				a_batch = A[batch_index,:]
				_, mse_run = sess.run([opt, error], feed_dict={ob: s_batch, ac: a_batch})
				pbar.set_description("Loss %s" % str(mse_run))


		return tf_util.function([ob], actor)
Esempio n. 7
0
def load_policy(filename):
    with open(filename, 'rb') as f:
        data = pickle.loads(f.read())

    # assert len(data.keys()) == 2
    nonlin_type = data['nonlin_type']
    policy_type = [k for k in data.keys() if k != 'nonlin_type'][0]

    assert policy_type == 'GaussianPolicy', 'Policy type {} not supported'.format(policy_type)
    policy_params = data[policy_type]

    assert set(policy_params.keys()) == {'logstdevs_1_Da', 'hidden', 'obsnorm', 'out'}

    # Keep track of input and output dims (i.e. observation and action dims) for the user

    def build_policy(obs_bo):
        def read_layer(l):
            assert list(l.keys()) == ['AffineLayer']
            assert sorted(l['AffineLayer'].keys()) == ['W', 'b']
            return l['AffineLayer']['W'].astype(np.float32), l['AffineLayer']['b'].astype(np.float32)

        def apply_nonlin(x):
            if nonlin_type == 'lrelu':
                return tf_util.lrelu(x, leak=.01) # openai/imitation nn.py:233
            elif nonlin_type == 'tanh':
                return tf.tanh(x)
            else:
                raise NotImplementedError(nonlin_type)

        # Build the policy. First, observation normalization.
        assert list(policy_params['obsnorm'].keys()) == ['Standardizer']
        obsnorm_mean = policy_params['obsnorm']['Standardizer']['mean_1_D']
        obsnorm_meansq = policy_params['obsnorm']['Standardizer']['meansq_1_D']
        obsnorm_stdev = np.sqrt(np.maximum(0, obsnorm_meansq - np.square(obsnorm_mean)))
        print('obs', obsnorm_mean.shape, obsnorm_stdev.shape)
        normedobs_bo = (obs_bo - obsnorm_mean) / (obsnorm_stdev + 1e-6) # 1e-6 constant from Standardizer class in nn.py:409 in openai/imitation

        curr_activations_bd = normedobs_bo

        # Hidden layers next
        assert list(policy_params['hidden'].keys()) == ['FeedforwardNet']
        layer_params = policy_params['hidden']['FeedforwardNet']
        for layer_name in sorted(layer_params.keys()):
            l = layer_params[layer_name]
            W, b = read_layer(l)
            print(layer_name,W.shape,b.shape,nonlin_type)
            curr_activations_bd = apply_nonlin(tf.matmul(curr_activations_bd, W) + b)

        # Output layer
        W, b = read_layer(policy_params['out'])
        output_bo = tf.matmul(curr_activations_bd, W) + b
        print('out',W.shape,b.shape,'None')
        return output_bo

    obs_bo = tf.placeholder(tf.float32, [None, None])
    a_ba = build_policy(obs_bo)
    policy_fn = tf_util.function([obs_bo], a_ba)
    return policy_fn
Esempio n. 8
0
    def _initialize(self, ob_space, ac_space, ac_bins, ac_noise_std,
                    nonlin_type, hidden_dims, connection_type):
        self.ac_space = ac_space
        self.ac_bins = ac_bins
        self.ac_noise_std = ac_noise_std
        self.hidden_dims = hidden_dims
        self.connection_type = connection_type

        assert len(ob_space.shape) == len(self.ac_space.shape) == 1
        assert (np.all(np.isfinite(self.ac_space.low)) and np.all(
            np.isfinite(self.ac_space.high))), "Action bounds required"

        self.nonlin = {
            'tanh': tf.tanh,
            'relu': tf.nn.relu,
            'lrelu': U.lrelu,
            'elu': tf.nn.elu
        }[nonlin_type]

        with tf.variable_scope(type(self).__name__) as scope:
            # Observation normalization.
            ob_mean = tf.get_variable('ob_mean',
                                      ob_space.shape,
                                      tf.float32,
                                      tf.constant_initializer(np.nan),
                                      trainable=False)
            ob_std = tf.get_variable('ob_std',
                                     ob_space.shape,
                                     tf.float32,
                                     tf.constant_initializer(np.nan),
                                     trainable=False)
            in_mean = tf.placeholder(tf.float32, ob_space.shape)
            in_std = tf.placeholder(tf.float32, ob_space.shape)
            self._set_ob_mean_std = U.function([in_mean, in_std], [],
                                               updates=[
                                                   tf.assign(ob_mean, in_mean),
                                                   tf.assign(ob_std, in_std),
                                               ])

            # Policy network.
            o = tf.placeholder(tf.float32, [None] + list(ob_space.shape))
            a = self._make_net(
                tf.clip_by_value((o - ob_mean) / ob_std, -5.0, 5.0))
            self._act = U.function([o], a)
        return scope
Esempio n. 9
0
    def getPolicy(_weights, _biases):
        obs_bo = tf.placeholder(tf.float32, [None, None])
        layer_1 = tf.nn.sigmoid(
            tf.add(tf.matmul(obs_bo, _weights['h1']), _biases['b1']))
        layer_2 = tf.nn.sigmoid(
            tf.add(tf.matmul(layer_1, _weights['h2']), _biases['b2']))
        a_ba = tf.matmul(layer_2, _weights['out']) + _biases['out']

        return tf_util.function([obs_bo], a_ba)
Esempio n. 10
0
def load_policy(filename):
    with open(filename, 'rb') as f:
        data = pickle.loads(f.read())

    # assert len(data.keys()) == 2
    nonlin_type = data['nonlin_type']
    policy_type = [k for k in data.keys() if k != 'nonlin_type'][0]

    assert policy_type == 'GaussianPolicy', 'Policy type {} not supported'.format(policy_type)
    policy_params = data[policy_type]

    assert set(policy_params.keys()) == {'logstdevs_1_Da', 'hidden', 'obsnorm', 'out'}

    # Keep track of input and output dims (i.e. observation and action dims) for the user

    def build_policy(obs_bo):
        def read_layer(l):
            assert list(l.keys()) == ['AffineLayer']
            assert sorted(l['AffineLayer'].keys()) == ['W', 'b']
            return l['AffineLayer']['W'].astype(np.float32), l['AffineLayer']['b'].astype(np.float32)

        def apply_nonlin(x):
            if nonlin_type == 'lrelu':
                return tf_util.lrelu(x, leak=.01) # openai/imitation nn.py:233
            elif nonlin_type == 'tanh':
                return tf.tanh(x)
            else:
                raise NotImplementedError(nonlin_type)

        # Build the policy. First, observation normalization.
        assert list(policy_params['obsnorm'].keys()) == ['Standardizer']
        obsnorm_mean = policy_params['obsnorm']['Standardizer']['mean_1_D']
        obsnorm_meansq = policy_params['obsnorm']['Standardizer']['meansq_1_D']
        obsnorm_stdev = np.sqrt(np.maximum(0, obsnorm_meansq - np.square(obsnorm_mean)))
        # print('obs', obsnorm_mean.shape, obsnorm_stdev.shape)
        normedobs_bo = (obs_bo - obsnorm_mean) / (obsnorm_stdev + 1e-6) # 1e-6 constant from Standardizer class in nn.py:409 in openai/imitation

        curr_activations_bd = normedobs_bo

        # Hidden layers next
        assert list(policy_params['hidden'].keys()) == ['FeedforwardNet']
        layer_params = policy_params['hidden']['FeedforwardNet']
        for layer_name in sorted(layer_params.keys()):
            l = layer_params[layer_name]
            W, b = read_layer(l)
            curr_activations_bd = apply_nonlin(tf.matmul(curr_activations_bd, W) + b)

        # Output layer
        W, b = read_layer(policy_params['out'])
        output_bo = tf.matmul(curr_activations_bd, W) + b
        return output_bo

    obs_bo = tf.placeholder(tf.float32, [None, None])
    a_ba = build_policy(obs_bo)
    policy_fn = tf_util.function([obs_bo], a_ba)
    return policy_fn
    def _initialize(self, policy_dir, ob_space, ac_space, ac_bins, ac_noise_std, nonlin_type, hidden_dims, connection_type):
        self.policy_dir = policy_dir
        self.ac_space = ac_space
        self.ac_bins = ac_bins
        self.ac_noise_std = ac_noise_std
        self.hidden_dims = hidden_dims
        self.connection_type = connection_type

        if policy_dir:
            assert self.policy_dir.endswith('.h5')
            with h5py.File(self.policy_dir, 'r') as f:
                self.scope_name = f.attrs['name']
            #print('scope_name: {}'.format(self.scope_name))
        else:
            self.scope_name = type(self).__name__ + str(time.time())

        ob_space = np.ones(732)
        '''
        assert len(ob_space.shape) == len(self.ac_space.shape) == 1
        assert np.all(np.isfinite(self.ac_space.low)) and np.all(np.isfinite(self.ac_space.high)), \
            'Action bounds required'
        '''
        self.nonlin = {'sigmoid': tf.nn.sigmoid, 'tanh': tf.tanh, 'relu': tf.nn.relu, 'lrelu': U.lrelu, 'elu': tf.nn.elu}[nonlin_type]

        with tf.variable_scope(self.scope_name) as scope:
            # Observation normalization
            ob_mean = tf.get_variable(
                'ob_mean', ob_space.shape, tf.float32, tf.constant_initializer(np.nan), trainable=False)
            ob_std = tf.get_variable(
                'ob_std', ob_space.shape, tf.float32, tf.constant_initializer(np.nan), trainable=False)
            in_mean = tf.placeholder(tf.float32, ob_space.shape)
            in_std = tf.placeholder(tf.float32, ob_space.shape)
            self._set_ob_mean_std = U.function([in_mean, in_std], [], updates=[
                tf.assign(ob_mean, in_mean),
                tf.assign(ob_std, in_std),
            ])

            # Policy network
            o = tf.placeholder(tf.float32, [None] + list(ob_space.shape))
            a = self._make_net(o) #tf.clip_by_value((o - ob_mean) / ob_std, -5.0, 5.0))
            self._act = U.function([o], a)
        return scope
Esempio n. 12
0
File: I3.py Progetto: hcch0912/I3
    def __init__(self, num_features, num_actions, timestep, action_space, scope):
        self.scope = scope
        self._lr = 0.5
        self.discount = 1.
        self.replay_buffer = ReplayBuffer(1e4)

        with tf.variable_scope(self.scope):
            self.act_trajectory = tf.placeholder(tf.float32, shape = ((None, timestep, action_space)))
            self.target = tf.placeholder(tf.float32, shape = ((None, )))
            self.act = tf.placeholder(tf.int32, shape = ((None,)))

            self.tau = lstm_model(self.act_trajectory, num_actions, scope = "tau_model_{}".format(scope))
            self.q_input = self.tau
            #train network
            self.q = mlp_model(self.q_input, 2, scope = "q_model_{}".format(scope))
            q_func_vars = U.scope_vars(U.absolute_scope_name( "q_model_{}".format(scope)))
            #target network
            self.target_q = mlp_model(self.q_input, 2, scope = "target_q_model_{}".format(scope))
            target_q_func_vars = U.scope_vars(U.absolute_scope_name( "target_q_model_{}".format(scope)))

            # take action
            self.softmax = tf.nn.softmax(self.target_q)
            self.pred = tf.argmax(self.softmax, axis = 1)

            #calculate the loss
            self.q_t_selected = tf.reduce_mean(self.q * tf.one_hot(self.act, num_actions), 1)
            q_tp1_best = tf.reduce_max(self.q, 1)
            q_tp1_best_masked =  q_tp1_best
            td_error = self.q_t_selected - tf.stop_gradient(self.target)
            self.errors = U.huber_loss(td_error)
            self.q_opt_op = tf.train.AdamOptimizer(self._lr).minimize(self.errors, var_list = q_func_vars)

            self.tau_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.tau, labels=self.act))
            self.tau_opt_op = tf.train.AdamOptimizer(self._lr).minimize(self.tau_loss)

            self.get_pred = U.function(inputs = [self.act_trajectory] , outputs = [self.softmax])
            self.train_q = U.function(inputs = [self.act_trajectory] + [self.target] +[self.act] , outputs = [self.errors, self.q], updates = [self.q_opt_op])
            self.train_tau = U.function(inputs =[ self.act] + [self.act_trajectory], outputs = [self.tau_loss], updates =[ self.tau_opt_op ])
            self.update_model = make_update_exp(q_func_vars, target_q_func_vars)
Esempio n. 13
0
def run(args):
    data_file = os.path.join('expert_data', args.env_name + ".pkl")
    data, input_size, output_size = load_data(data_file)
    model = Model(input_size, output_size, configs[args.env_name])
    saver = tf.train.Saver()

    with tf.Session() as sess:
        saver.restore(sess, os.path.join('models', args.env_name))
        policy_fn = tf_util.function([model.input], model.output)

        #test_in = data['observations'][0]
        #action = policy_fn(test_in[None, :])
        gym_util.run_gym(args.env_name, policy_fn, num_rollouts=10)
Esempio n. 14
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('env', type=str)
  parser.add_argument('--model_checkpoint', type=str)
  parser.add_argument('--render', type=bool, default=True)
  parser.add_argument('--max_timesteps', type=int)
  parser.add_argument('--num_rollouts', type=int, default=10)
  args = parser.parse_args()

  with tf.Session() as sess:
    with tf.variable_scope(args.env):
      input_dim, output_dim = helper.input_output_shape(args.env)
      model = helper.build_model(input_dim, output_dim)
      input_ph, output_pred = model['input_ph'], model['output_pred']

      policy_fn = tf_util.function([input_ph], output_pred)

      if args.model_checkpoint:
        checkpoint_path = args.model_checkpoint
      else:
        checkpoint_path = helper.checkpoint_path(args.env)

      saver = tf.train.Saver()
      saver.restore(sess, checkpoint_path)

      env = gym.make(helper.envname(args.env))
      max_steps = args.max_timesteps or env.spec.timestep_limit

      returns = []
      observations = []
      actions = []
      for i in range(args.num_rollouts):
        print('iter', i)
        obs = env.reset()
        done = False
        totalr = 0
        steps = 0
        while not done:
          action = policy_fn(obs[None, :])
          observations.append(obs)
          actions.append(action)
          obs, r, done, _ = env.step(action)
          totalr += r
          steps += 1
          if args.render:
            env.render()
          if steps >= max_steps:
            break
        returns.append(totalr)

      helper.print_returns_stats(returns)
Esempio n. 15
0
    def __init__(self, epsilon=1e-2, shape=(), name=None):
        """
        calulates the running mean and std of a data stream
        https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm

        :param epsilon: (float) helps with arithmetic issues
        :param shape: (tuple) the shape of the data stream's output
        """
        with tf.variable_scope(name):
            self._sum = tf.get_variable(
                dtype=tf.float64,
                shape=shape,
                initializer=tf.constant_initializer(0.0),
                name="runningsum",
                trainable=False)
            self._sumsq = tf.get_variable(
                dtype=tf.float64,
                shape=shape,
                initializer=tf.constant_initializer(epsilon),
                name="runningsumsq",
                trainable=FCalse)
            self._count = tf.get_variable(
                dtype=tf.float64,
                shape=(),
                initializer=tf.constant_initializer(epsilon),
                name="count",
                trainable=False)
            self.shape = shape

            self.mean = tf.to_float(self._sum / self._count)
            self.std = tf.sqrt(
                tf.maximum(
                    tf.to_float(self._sumsq / self._count) -
                    tf.square(self.mean), 1e-2))

            newsum = tf.placeholder(shape=self.shape,
                                    dtype=tf.float64,
                                    name='sum')
            newsumsq = tf.placeholder(shape=self.shape,
                                      dtype=tf.float64,
                                      name='var')
            newcount = tf.placeholder(shape=[], dtype=tf.float64, name='count')
            self.incfiltparams = tf_util.function(
                [newsum, newsumsq, newcount], [],
                updates=[
                    tf.assign_add(self._sum, newsum),
                    tf.assign_add(self._sumsq, newsumsq),
                    tf.assign_add(self._count, newcount)
                ])
Esempio n. 16
0
def policy_fn(obs):
    with open('bc_policy/bc_weights.pkl', 'rb') as f:
        bc_weights = pickle.loads(f.read())
    with open('bc_policy/bc_biases.pkl', 'rb') as f:
        bc_biases = pickle.loads(f.read())

    obs_bo = tf.placeholder(tf.float32, [None, None])
    layer_1 = tf.nn.sigmoid(
        tf.add(tf.matmul(obs_bo, bc_weights['h1']), bc_biases['b1']))
    layer_2 = tf.nn.sigmoid(
        tf.add(tf.matmul(layer_1, bc_weights['h2']), bc_biases['b2']))
    a_ba = tf.matmul(layer_2, bc_weights['out']) + bc_biases['out']

    policy_fn = tf_util.function([obs_bo], a_ba)
    return policy_fn(obs)
Esempio n. 17
0
def train(sess, data, model, curr_epoch, batch_size=32, debug=False, checkpoint_path=None):
  obs = None
  for _data in data:
    if obs is None:
      obs = _data['observations']
    else:
      obs = np.concatenate((obs, _data['observations']))
  mean, stdev = helper.mean_and_stdev(obs)
  empty_action = np.array([0] * 17)

  m = model
  input_ph, output_ph     = m['input_ph'], m['output_ph']
  mean_v, stdev_v         = m['mean_v'], m['stdev_v']
  output_pred, mse, opt   = m['output_pred'], m['mse'], m['opt']
  S, initial_state, state = m['S'], m['initial_state'], m['state']

  mean_v.load(mean, session=sess)
  stdev_v.load(stdev, session=sess)

  if checkpoint_path:
    saver = tf.train.Saver()

  for _data in data:
    idx = 0
    lstm_state = initial_state
    if len(_data['observations']) % 32 > 0:
      rep = 32 - (len(_data['observations']) % 32)
      mean_stk = np.tile(mean[None, :], [rep, 1])
      output_stk = np.tile(empty_action[None, :], [rep, 1])
      _data['observations'] = np.concatenate((mean_stk, _data['observations']))
      _data['actions'] = np.concatenate((output_stk, _data['actions']))
    while idx < len(_data['observations']):
      input_batch = _data['observations'][idx : idx+batch_size]
      output_batch = _data['actions'][idx : idx+batch_size]

      _, mse_run, lstm_state = sess.run([opt, mse, state], feed_dict={input_ph: input_batch, output_ph: output_batch, S: lstm_state})

      idx += batch_size

  print('epoch: {0:03d} mse: {1:.4f}'.format(curr_epoch, mse_run))
  if checkpoint_path:
    saver.save(sess, checkpoint_path)

  policy_fn = tf_util.function([input_ph, S], [output_pred, state])
  return policy_fn, initial_state, mean
Esempio n. 18
0
def load_policy(filename):
    with open(filename, 'rb') as f:
        data = pickle.loads(f.read())
        # data should be a dict with 2 keys: 'GaussianPolicy' and 'nonlin_type'
#         print(data)
        
    nonlin_type = data['nonlin_type']
    policy_type = [k for k in data.keys() if k != 'nonlin_type'][0]
    assert policy_type == 'GaussianPolicy', 'Policy type {} not supported'.format(policy_type)
    policy_params = data[policy_type]
#     print(policy_params.keys())
#     print(policy_params['obsnorm'])
    assert set(policy_params.keys()) == {'logstdevs_1_Da', 'hidden', 'obsnorm', 'out'}
    
    obs_bo = tf.placeholder(tf.float32, [None, None])
    a_ba = build_policy(obs_bo, policy_params, nonlin_type)
    policy_fn = tf_util.function([obs_bo], a_ba) 
    return policy_fn
Esempio n. 19
0
    def _init(self, ob_space, ac_space):
        assert isinstance(ob_space, gym.spaces.Box)

        self.pdtype = pdtype = make_pdtype(ac_space)
        sequence_length = None

        sy_ob = U.get_placeholder(name="sy_ob",
                                  dtype=tf.float32,
                                  shape=[sequence_length] +
                                  list(ob_space.shape))

        obscaled = sy_ob / 255.0

        with tf.variable_scope("pol"):
            x = obscaled
            x = tf.nn.relu(U.conv2d(x, 8, "l1", [8, 8], [4, 4], pad="VALID"))
            x = tf.nn.relu(U.conv2d(x, 16, "l2", [4, 4], [2, 2], pad="VALID"))
            x = U.flattenallbut0(x)
            x = tf.nn.relu(U.dense(x, 128, 'lin', U.normc_initializer(1.0)))
            logits = U.dense(x,
                             pdtype.param_shape()[0], "logits",
                             U.normc_initializer(0.01))
            self.pd = pdtype.pdfromflat(logits)
        with tf.variable_scope("vf"):
            x = obscaled
            x = tf.nn.relu(U.conv2d(x, 8, "l1", [8, 8], [4, 4], pad="VALID"))
            x = tf.nn.relu(U.conv2d(x, 16, "l2", [4, 4], [2, 2], pad="VALID"))
            x = U.flattenallbut0(x)
            x = tf.nn.relu(U.dense(x, 128, 'lin', U.normc_initializer(1.0)))
            self.vpred = U.dense(x, 1, "value", U.normc_initializer(1.0))
            self.vpredz = self.vpred

        self.state_in = []
        self.state_out = []

        stochastic = tf.placeholder(dtype=tf.bool, shape=())
        sy_ac = self.pd.sample()  # XXX
        self._act = U.function([stochastic, sy_ob], [sy_ac, self.vpred])
Esempio n. 20
0
    def train(self, policy, S, A, epochs, batch_size):

        ob, ac, opt, error, actor = policy

        sess = tf.get_default_session()

        number_of_batches = S.shape[0] // batch_size
        sample_index = np.arange(S.shape[0])
        for i in range(epochs):
            np.random.shuffle(sample_index)
            pbar = tqdm(range(number_of_batches))
            for k in pbar:
                batch_index = sample_index[batch_size * k:batch_size * (k + 1)]
                s_batch = S[batch_index, :]
                a_batch = A[batch_index, :]
                _, mse_run = sess.run([opt, error],
                                      feed_dict={
                                          ob: s_batch,
                                          ac: a_batch
                                      })
                pbar.set_description("Loss %s" % str(mse_run))

        return tf_util.function([ob], actor)
Esempio n. 21
0
def build_act(make_obs_ph, q_func, num_actions, scope="deepq", reuse=None):
    """Creates the act function:

    Parameters
    ----------
    make_obs_ph: str -> tf.placeholder or TfInput
        a function that take a name and creates a placeholder of input with that name
    q_func: (tf.Variable, int, str, bool) -> tf.Variable
        the model that takes the following inputs:
            observation_in: object
                the output of observation placeholder
            num_actions: int
                number of actions
            scope: str
            reuse: bool
                should be passed to outer variable scope
        and returns a tensor of shape (batch_size, num_actions) with values of every action.
    num_actions: int
        number of actions.
    scope: str or VariableScope
        optional scope for variable_scope.
    reuse: bool or None
        whether or not the variables should be reused. To be able to reuse the scope must be given.

    Returns
    -------
    act: (tf.Variable, bool, float) -> tf.Variable
        function to select and action given observation.
`       See the top of the file for details.
    """
    with tf.variable_scope(scope, reuse=reuse):
        observations_ph = make_obs_ph("observation")
        stochastic_ph = tf.placeholder(tf.bool, (), name="stochastic")
        update_eps_ph = tf.placeholder(tf.float32, (), name="update_eps")

        eps = tf.get_variable("eps", (),
                              initializer=tf.constant_initializer(0))

        q_values = q_func(observations_ph.get(), num_actions, scope="q_func")
        deterministic_actions = tf.argmax(q_values, axis=1)

        batch_size = tf.shape(observations_ph.get())[0]
        random_actions = tf.random_uniform(tf.stack([batch_size]),
                                           minval=0,
                                           maxval=num_actions,
                                           dtype=tf.int64)
        chose_random = tf.random_uniform(
            tf.stack([batch_size]), minval=0, maxval=1, dtype=tf.float32) < eps
        stochastic_actions = tf.where(chose_random, random_actions,
                                      deterministic_actions)

        output_actions = tf.cond(stochastic_ph, lambda: stochastic_actions,
                                 lambda: deterministic_actions)
        update_eps_expr = eps.assign(
            tf.cond(update_eps_ph >= 0, lambda: update_eps_ph, lambda: eps))
        _act = U.function(
            inputs=[observations_ph, stochastic_ph, update_eps_ph],
            outputs=output_actions,
            givens={
                update_eps_ph: -1.0,
                stochastic_ph: True
            },
            updates=[update_eps_expr])

        def act(ob, stochastic=True, update_eps=-1):
            return _act(ob, stochastic, update_eps)

        return act
Esempio n. 22
0
def learn(
    env,
    policy_func,
    *,
    timesteps_per_batch,  # timesteps per actor per update
    clip_param,
    entcoeff,  # clipping parameter epsilon, entropy coeff
    optim_epochs,
    optim_stepsize,
    optim_batchsize,  # optimization hypers
    gamma,
    lam,  # advantage estimation
    max_timesteps=0,
    max_episodes=0,
    max_iters=0,
    max_seconds=0,  # time constraint
    callback=None,  # you can do anything in the callback, since it takes locals(), globals()
    adam_epsilon=1e-5,
    schedule='constant'  # annealing for stepsize parameters (epsilon and adam)
):
    # Setup losses and stuff
    # ----------------------------------------
    ob_space = env.observation_space
    ac_space = env.action_space
    pi = policy_func("pi", ob_space,
                     ac_space)  # Construct network for new policy
    oldpi = policy_func("oldpi", ob_space, ac_space)  # Network for old policy
    atarg = tf.placeholder(
        dtype=tf.float32,
        shape=[None])  # Target advantage function (if applicable)
    ret = tf.placeholder(dtype=tf.float32, shape=[None])  # Empirical return

    lrmult = tf.placeholder(
        name='lrmult', dtype=tf.float32,
        shape=[])  # learning rate multiplier, updated with schedule
    clip_param = clip_param * lrmult  # Annealed cliping parameter epislon

    ob = U.get_placeholder_cached(name="ob")
    ac = pi.pdtype.sample_placeholder([None])

    kloldnew = oldpi.pd.kl(pi.pd)
    ent = pi.pd.entropy()
    meankl = U.mean(kloldnew)
    meanent = U.mean(ent)
    pol_entpen = (-entcoeff) * meanent

    ratio = tf.exp(pi.pd.logp(ac) - oldpi.pd.logp(ac))  # pnew / pold
    surr1 = ratio * atarg  # surrogate from conservative policy iteration
    surr2 = U.clip(ratio, 1.0 - clip_param, 1.0 + clip_param) * atarg  #
    pol_surr = -U.mean(tf.minimum(
        surr1, surr2))  # PPO's pessimistic surrogate (L^CLIP)
    vf_loss = U.mean(tf.square(pi.vpred - ret))
    total_loss = pol_surr + pol_entpen + vf_loss
    losses = [pol_surr, pol_entpen, vf_loss, meankl, meanent]
    loss_names = ["pol_surr", "pol_entpen", "vf_loss", "kl", "ent"]

    var_list = pi.get_trainable_variables()
    lossandgrad = U.function([ob, ac, atarg, ret, lrmult],
                             losses + [U.flatgrad(total_loss, var_list)])
    adam = MpiAdam(var_list, epsilon=adam_epsilon)

    assign_old_eq_new = U.function(
        [], [],
        updates=[
            tf.assign(oldv, newv)
            for (oldv,
                 newv) in zipsame(oldpi.get_variables(), pi.get_variables())
        ])
    compute_losses = U.function([ob, ac, atarg, ret, lrmult], losses)

    U.initialize()
    adam.sync()

    U.load_state("save/Humanoid-v1")

    # Prepare for rollouts
    # ----------------------------------------
    seg_gen = traj_segment_generator(pi,
                                     env,
                                     timesteps_per_batch,
                                     stochastic=True)

    episodes_so_far = 0
    timesteps_so_far = 0
    iters_so_far = 0
    tstart = time.time()
    lenbuffer = deque(maxlen=100)  # rolling buffer for episode lengths
    rewbuffer = deque(maxlen=100)  # rolling buffer for episode rewards

    assert sum(
        [max_iters > 0, max_timesteps > 0, max_episodes > 0,
         max_seconds > 0]) == 1, "Only one time constraint permitted"

    while True:
        if callback: callback(locals(), globals())
        if max_timesteps and timesteps_so_far >= max_timesteps:
            break
        elif max_episodes and episodes_so_far >= max_episodes:
            break
        elif max_iters and iters_so_far >= max_iters:
            break
        elif max_seconds and time.time() - tstart >= max_seconds:
            break

        if schedule == 'constant':
            cur_lrmult = 1.0
        elif schedule == 'linear':
            cur_lrmult = max(1.0 - float(timesteps_so_far) / max_timesteps, 0)
        else:
            raise NotImplementedError

        logger.log("********** Iteration %i ************" % iters_so_far)

        seg = seg_gen.__next__()
        add_vtarg_and_adv(seg, gamma, lam)

        # ob, ac, atarg, ret, td1ret = map(np.concatenate, (obs, acs, atargs, rets, td1rets))
        ob, ac, atarg, tdlamret = seg["ob"], seg["ac"], seg["adv"], seg[
            "tdlamret"]
        vpredbefore = seg["vpred"]  # predicted value function before udpate
        atarg = (atarg - atarg.mean()
                 ) / atarg.std()  # standardized advantage function estimate
        d = Dataset(dict(ob=ob, ac=ac, atarg=atarg, vtarg=tdlamret),
                    shuffle=not pi.recurrent)
        optim_batchsize = optim_batchsize or ob.shape[0]

        #if hasattr(pi, "ob_rms"): pi.ob_rms.update(ob) # update running mean/std for policy

        assign_old_eq_new()  # set old parameter values to new parameter values
        logger.log("Optimizing...")
        logger.log(fmt_row(13, loss_names))
        # Here we do a bunch of optimization epochs over the data
        for _ in range(optim_epochs):
            losses = [
            ]  # list of tuples, each of which gives the loss for a minibatch
            for batch in d.iterate_once(optim_batchsize):
                *newlosses, g = lossandgrad(batch["ob"], batch["ac"],
                                            batch["atarg"], batch["vtarg"],
                                            cur_lrmult)
                adam.update(g, optim_stepsize * cur_lrmult)
                losses.append(newlosses)
            logger.log(fmt_row(13, np.mean(losses, axis=0)))

        logger.log("Evaluating losses...")
        losses = []
        for batch in d.iterate_once(optim_batchsize):
            newlosses = compute_losses(batch["ob"], batch["ac"],
                                       batch["atarg"], batch["vtarg"],
                                       cur_lrmult)
            losses.append(newlosses)
        meanlosses, _, _ = mpi_moments(losses, axis=0)
        logger.log(fmt_row(13, meanlosses))
        for (lossval, name) in zipsame(meanlosses, loss_names):
            logger.record_tabular("loss_" + name, lossval)
        logger.record_tabular("ev_tdlam_before",
                              explained_variance(vpredbefore, tdlamret))
        lrlocal = (seg["ep_lens"], seg["ep_rets"])  # local values
        listoflrpairs = MPI.COMM_WORLD.allgather(lrlocal)  # list of tuples
        lens, rews = map(flatten_lists, zip(*listoflrpairs))
        lenbuffer.extend(lens)
        rewbuffer.extend(rews)
        logger.record_tabular("EpLenMean", np.mean(lenbuffer))
        logger.record_tabular("EpRewMean", np.mean(rewbuffer))
        logger.record_tabular("EpThisIter", len(lens))
        episodes_so_far += len(lens)
        timesteps_so_far += sum(lens)
        iters_so_far += 1
        logger.record_tabular("EpisodesSoFar", episodes_so_far)
        logger.record_tabular("TimestepsSoFar", timesteps_so_far)
        logger.record_tabular("TimeElapsed", time.time() - tstart)
        if MPI.COMM_WORLD.Get_rank() == 0:
            logger.dump_tabular()
        U.save_state("save/Humanoid-v1")
Esempio n. 23
0
def test_net(model,
             img_dir,
             max_iter=1000000,
             check_every_n=500,
             loss_check_n=10,
             save_model_freq=1000,
             batch_size=128):
    img1 = U.get_placeholder_cached(name="img1")
    img2 = U.get_placeholder_cached(name="img2")

    # Testing
    img_test = U.get_placeholder_cached(name="img_test")
    reconst_tp = U.get_placeholder_cached(name="reconst_tp")

    vae_loss = U.mean(model.vaeloss)

    latent_z1_tp = model.latent_z1
    latent_z2_tp = model.latent_z2

    losses = [
        U.mean(model.vaeloss),
        U.mean(model.siam_loss),
        U.mean(model.kl_loss1),
        U.mean(model.kl_loss2),
        U.mean(model.reconst_error1),
        U.mean(model.reconst_error2),
    ]

    tf.summary.scalar('Total Loss', losses[0])
    tf.summary.scalar('Siam Loss', losses[1])
    tf.summary.scalar('kl1_loss', losses[2])
    tf.summary.scalar('kl2_loss', losses[3])
    tf.summary.scalar('reconst_err1', losses[4])
    tf.summary.scalar('reconst_err2', losses[5])

    decoded_img = [model.reconst1, model.reconst2]

    weight_loss = [1, 1, 1]

    compute_losses = U.function([img1, img2], vae_loss)
    lr = 0.00005
    optimizer = tf.train.AdamOptimizer(learning_rate=lr,
                                       epsilon=0.01 / batch_size)

    all_var_list = model.get_trainable_variables()

    # print all_var_list
    img1_var_list = all_var_list
    #[v for v in all_var_list if v.name.split("/")[1].startswith("proj1") or v.name.split("/")[1].startswith("unproj1")]
    optimize_expr1 = optimizer.minimize(vae_loss, var_list=img1_var_list)
    merged = tf.summary.merge_all()
    train = U.function([img1, img2], [
        losses[0], losses[1], losses[2], losses[3], losses[4], losses[5],
        latent_z1_tp, latent_z2_tp, merged
    ],
                       updates=[optimize_expr1])
    get_reconst_img = U.function(
        [img1, img2],
        [model.reconst1, model.reconst2, latent_z1_tp, latent_z2_tp])
    get_latent_var = U.function([img1, img2], [latent_z1_tp, latent_z2_tp])

    # [testing -> ]
    test = U.function([img_test], model.latent_z_test)
    test_reconst = U.function([reconst_tp], [model.reconst_test])
    # [testing <- ]

    cur_dir = get_cur_dir()
    chk_save_dir = os.path.join(cur_dir, "chk1")
    log_save_dir = os.path.join(cur_dir, "log")
    validate_img_saver_dir = os.path.join(cur_dir, "validate_images")
    test_img_saver_dir = os.path.join(cur_dir, "test_images")
    testing_img_dir = os.path.join(cur_dir, "dataset/test_img")

    train_writer = U.summary_writer(dir=log_save_dir)

    U.initialize()

    saver, chk_file_num = U.load_checkpoints(load_requested=True,
                                             checkpoint_dir=chk_save_dir)
    validate_img_saver = Img_Saver(validate_img_saver_dir)

    # [testing -> ]
    test_img_saver = Img_Saver(test_img_saver_dir)
    # [testing <- ]

    meta_saved = False

    iter_log = []
    loss1_log = []
    loss2_log = []

    loss3_log = []

    training_images_list = read_dataset(img_dir)
    n_total_train_data = len(training_images_list)

    testing_images_list = read_dataset(testing_img_dir)
    n_total_testing_data = len(testing_images_list)

    training = False
    testing = True

    # if training == True:
    # 	for num_iter in range(chk_file_num+1, max_iter):
    # 		header("******* {}th iter: *******".format(num_iter))

    # 		idx = random.sample(range(n_total_train_data), 2*batch_size)
    # 		batch_files = [training_images_list[i] for i in idx]
    # 		# print batch_files
    # 		[images1, images2] = load_image(dir_name = img_dir, img_names = batch_files)
    # 		img1, img2 = images1, images2
    # 		[l1, l2, _, _] = get_reconst_img(img1, img2)

    # 		[loss0, loss1, loss2, loss3, loss4, loss5, latent1, latent2, summary] = train(img1, img2)

    # 		warn("Total Loss: {}".format(loss0))
    # 		warn("Siam loss: {}".format(loss1))
    # 		warn("kl1_loss: {}".format(loss2))
    # 		warn("kl2_loss: {}".format(loss3))
    # 		warn("reconst_err1: {}".format(loss4))
    # 		warn("reconst_err2: {}".format(loss5))

    # 		# warn("num_iter: {} check: {}".format(num_iter, check_every_n))
    # 		# warn("Total Loss: {}".format(loss6))
    # 		if num_iter % check_every_n == 1:
    # 			header("******* {}th iter: *******".format(num_iter))
    # 			idx = random.sample(range(len(training_images_list)), 2*5)
    # 			validate_batch_files = [training_images_list[i] for i in idx]
    # 			[images1, images2] = load_image(dir_name = img_dir, img_names = validate_batch_files)
    # 			[reconst1, reconst2, _, _] = get_reconst_img(images1, images2)
    # 			# for i in range(len(latent1[0])):
    # 			# 	print "{} th: {:.2f}".format(i, np.mean(np.abs(latent1[:, i] - latent2[:, i])))
    # 			for img_idx in range(len(images1)):
    # 				sub_dir = "iter_{}".format(num_iter)

    # 				save_img = np.squeeze(images1[img_idx])
    # 				save_img = Image.fromarray(save_img)
    # 				img_file_name = "{}_ori.png".format(validate_batch_files[img_idx].split('.')[0])
    # 				validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

    # 				save_img = np.squeeze(reconst1[img_idx])
    # 				save_img = Image.fromarray(save_img)
    # 				img_file_name = "{}_rec.png".format(validate_batch_files[img_idx].split('.')[0])
    # 				validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

    # 		if num_iter % loss_check_n == 1:
    # 			train_writer.add_summary(summary, num_iter)

    # 		if num_iter > 11 and num_iter % save_model_freq == 1:
    # 			if meta_saved == True:
    # 				saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = num_iter, write_meta_graph = False)
    # 			else:
    # 				print "Save  meta graph"
    # 				saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = num_iter, write_meta_graph = True)
    # 				meta_saved = True

    # Testing
    print testing_images_list
    if testing == True:
        test_file_name = testing_images_list[6]
        print test_file_name
        test_img = load_single_img(dir_name=testing_img_dir,
                                   img_name=test_file_name)
        test_features = np.arange(25, 32)
        for test_feature in test_features:
            test_variation = np.arange(-10, 10, 0.1)

            z = test(test_img)
            print np.shape(z)
            print z
            for idx in range(len(test_variation)):
                z_test = np.copy(z)
                z_test[0, test_feature] = z_test[
                    0, test_feature] + test_variation[idx]
                reconst_test = test_reconst(z_test)
                test_save_img = np.squeeze(reconst_test[0])
                test_save_img = Image.fromarray(test_save_img)
                img_file_name = "test_feat_{}_var_({}).png".format(
                    test_feature, test_variation[idx])
                test_img_saver.save(test_save_img, img_file_name, sub_dir=None)
            reconst_test = test_reconst(z)
            test_save_img = np.squeeze(reconst_test[0])
            test_save_img = Image.fromarray(test_save_img)
            img_file_name = "test_feat_{}_var_original.png".format(
                test_feature)
            test_img_saver.save(test_save_img, img_file_name, sub_dir=None)
        obsnorm_stdev = np.sqrt(np.maximum(0, obsnorm_meansq - np.square(obsnorm_mean)))
        print('obs', obsnorm_mean.shape, obsnorm_stdev.shape)
        normedobs_bo = (obs_bo - obsnorm_mean) / (obsnorm_stdev + 1e-6) # 1e-6 constant from Standardizer class in nn.py:409 in openai/imitation

        curr_activations_bd = normedobs_bo

        # Hidden layers next
        assert list(policy_params['hidden'].keys()) == ['FeedforwardNet']
        layer_params = policy_params['hidden']['FeedforwardNet']
        for layer_name in sorted(layer_params.keys()):
            l = layer_params[layer_name]
            W, b = read_layer(l)
            curr_activations_bd = apply_nonlin(tf.matmul(curr_activations_bd, W) + b)
<<<<<<< HEAD
            print("reading layer ", layer_name, " weights ", W.shape, " bias ", b.shape)
=======
>>>>>>> e82c0ba0166126de9dfb8be3bc5a2670e178714d

        # Output layer
        W, b = read_layer(policy_params['out'])
        output_bo = tf.matmul(curr_activations_bd, W) + b
<<<<<<< HEAD
        print("reading output layer weights ", W.shape, " bias ", b.shape)
=======
>>>>>>> e82c0ba0166126de9dfb8be3bc5a2670e178714d
        return output_bo

    obs_bo = tf.placeholder(tf.float32, [None, None])
    a_ba = build_policy(obs_bo)
    policy_fn = tf_util.function([obs_bo], a_ba)
    return policy_fn
Esempio n. 25
0
def train_net(model, mode, img_dir, dataset, chkfile_name, logfile_name, validatefile_name, entangled_feat, max_epoch = 300, check_every_n = 500, loss_check_n = 10, save_model_freq = 5, batch_size = 512, lr = 0.001):
    img1 = U.get_placeholder_cached(name="img1")
    img2 = U.get_placeholder_cached(name="img2")

    vae_loss = U.mean(model.vaeloss)

    latent_z1_tp = model.latent_z1
    latent_z2_tp = model.latent_z2

    losses = [U.mean(model.vaeloss),
            U.mean(model.siam_loss),
            U.mean(model.kl_loss1),
            U.mean(model.kl_loss2),
            U.mean(model.reconst_error1),
            U.mean(model.reconst_error2),
            ]

    siam_normal = losses[1]/entangled_feat
    siam_max = U.mean(model.max_siam_loss)

    tf.summary.scalar('Total Loss', losses[0])
    tf.summary.scalar('Siam Loss', losses[1])
    tf.summary.scalar('kl1_loss', losses[2])
    tf.summary.scalar('kl2_loss', losses[3])
    tf.summary.scalar('reconst_err1', losses[4])
    tf.summary.scalar('reconst_err2', losses[5])
    tf.summary.scalar('Siam Normal', siam_normal)
    tf.summary.scalar('Siam Max', siam_max)



    compute_losses = U.function([img1, img2], vae_loss)
    optimizer=tf.train.AdamOptimizer(learning_rate=lr, epsilon = 0.01/batch_size)

    all_var_list = model.get_trainable_variables()


    img1_var_list = all_var_list
    optimize_expr1 = optimizer.minimize(vae_loss, var_list=img1_var_list)
    merged = tf.summary.merge_all()
    train = U.function([img1, img2],
                        [losses[0], losses[1], losses[2], losses[3], losses[4], losses[5], latent_z1_tp, latent_z2_tp, merged], updates = [optimize_expr1])
    get_reconst_img = U.function([img1, img2], [model.reconst1, model.reconst2, latent_z1_tp, latent_z2_tp])
    get_latent_var = U.function([img1, img2], [latent_z1_tp, latent_z2_tp])

    cur_dir = get_cur_dir()
    chk_save_dir = os.path.join(cur_dir, chkfile_name)
    log_save_dir = os.path.join(cur_dir, logfile_name)
    validate_img_saver_dir = os.path.join(cur_dir, validatefile_name)
    if dataset == 'chairs' or dataset == 'celeba':
        test_img_saver_dir = os.path.join(cur_dir, "test_images")
        testing_img_dir = os.path.join(cur_dir, "dataset/{}/test_img".format(dataset))

    train_writer = U.summary_writer(dir = log_save_dir)

    U.initialize()

    saver, chk_file_epoch_num = U.load_checkpoints(load_requested = True, checkpoint_dir = chk_save_dir)
    if dataset == 'chairs' or dataset == 'celeba':
        validate_img_saver = Img_Saver(Img_dir = validate_img_saver_dir)
    elif dataset == 'dsprites':
        validate_img_saver = BW_Img_Saver(Img_dir = validate_img_saver_dir) # Black and White, temporary usage
    else:
        warn("Unknown dataset Error")
        # break

    warn(img_dir)
    if dataset == 'chairs' or dataset == 'celeba':
        training_images_list = read_dataset(img_dir)
        n_total_train_data = len(training_images_list)
        testing_images_list = read_dataset(testing_img_dir)
        n_total_testing_data = len(testing_images_list)
    elif dataset == 'dsprites':
        cur_dir = osp.join(cur_dir, 'dataset')
        cur_dir = osp.join(cur_dir, 'dsprites')
        img_dir = osp.join(cur_dir, 'dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz')
        manager = DataManager(img_dir, batch_size)
    else:
        warn("Unknown dataset Error")
        # break

    meta_saved = False

    if mode == 'train':
        for epoch_idx in range(chk_file_epoch_num+1, max_epoch):
            t_epoch_start = time.time()
            num_batch = manager.get_len()

            for batch_idx in range(num_batch):
                if dataset == 'chairs' or dataset == 'celeba':
                    idx = random.sample(range(n_total_train_data), 2*batch_size)
                    batch_files = [training_images_list[i] for i in idx]
                    [images1, images2] = load_image(dir_name = img_dir, img_names = batch_files)
                elif dataset == 'dsprites':
                    [images1, images2] = manager.get_next()
                img1, img2 = images1, images2
                [l1, l2, _, _] = get_reconst_img(img1, img2)

                [loss0, loss1, loss2, loss3, loss4, loss5, latent1, latent2, summary] = train(img1, img2)

                if batch_idx % 50 == 1:
                    header("******* epoch: {}/{} batch: {}/{} *******".format(epoch_idx, max_epoch, batch_idx, num_batch))
                    warn("Total Loss: {}".format(loss0))
                    warn("Siam loss: {}".format(loss1))
                    warn("kl1_loss: {}".format(loss2))
                    warn("kl2_loss: {}".format(loss3))
                    warn("reconst_err1: {}".format(loss4))
                    warn("reconst_err2: {}".format(loss5))

                if batch_idx % check_every_n == 1:
                    if dataset == 'chairs' or dataset == 'celeba':
                        idx = random.sample(range(len(training_images_list)), 2*5)
                        validate_batch_files = [training_images_list[i] for i in idx]
                        [images1, images2] = load_image(dir_name = img_dir, img_names = validate_batch_files)
                    elif dataset == 'dsprites':
                        [images1, images2] = manager.get_next()

                    [reconst1, reconst2, _, _] = get_reconst_img(images1, images2)

                    if dataset == 'chairs':
                        for img_idx in range(len(images1)):
                            sub_dir = "iter_{}".format(batch_idx)

                            save_img = np.squeeze(images1[img_idx])
                            save_img = Image.fromarray(save_img)
                            img_file_name = "{}_ori.png".format(validate_batch_files[img_idx].split('.')[0])
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

                            save_img = np.squeeze(reconst1[img_idx])
                            save_img = Image.fromarray(save_img)
                            img_file_name = "{}_rec.png".format(validate_batch_files[img_idx].split('.')[0])
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)
                    elif dataset == 'celeba':
                        for img_idx in range(len(images1)):
                            sub_dir = "iter_{}".format(batch_idx)

                            save_img = np.squeeze(images1[img_idx])
                            save_img = Image.fromarray(save_img, 'RGB')
                            img_file_name = "{}_ori.png".format(validate_batch_files[img_idx].split('.')[0])
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

                            save_img = np.squeeze(reconst1[img_idx])
                            save_img = Image.fromarray(save_img, 'RGB')
                            img_file_name = "{}_rec.png".format(validate_batch_files[img_idx].split('.')[0])
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)
                    elif dataset == 'dsprites':
                        for img_idx in range(len(images1)):
                            sub_dir = "iter_{}".format(batch_idx)

                            # save_img = images1[img_idx].reshape(64, 64)
                            save_img = np.squeeze(images1[img_idx])
                            save_img = save_img.astype(np.float32)
                            img_file_name = "{}_ori.jpg".format(img_idx)
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

                            # save_img = reconst1[img_idx].reshape(64, 64)
                            save_img = np.squeeze(reconst1[img_idx])
                            save_img = save_img.astype(np.float32)
                            img_file_name = "{}_rec.jpg".format(img_idx)
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

                if batch_idx % loss_check_n == 1:
                    train_writer.add_summary(summary, batch_idx)

            t_epoch_end = time.time()
            t_epoch_run = t_epoch_end - t_epoch_start
            if dataset == 'dsprites':
                t_check = manager.sample_size / t_epoch_run

                warn("==========================================")
                warn("Run {} th epoch in {} sec: {} images / sec".format(epoch_idx+1, t_epoch_run, t_check))
                warn("==========================================")

            # if epoch_idx % save_model_freq == 0:
            if meta_saved == True:
                saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = epoch_idx, write_meta_graph = False)
            else:
                print "Save  meta graph"
                saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = epoch_idx, write_meta_graph = True)
                meta_saved = True

    # Testing
    elif mode == 'test':
        test_file_name = testing_images_list[0]
        test_img = load_single_img(dir_name = testing_img_dir, img_name = test_file_name)
        test_feature = 31
        test_variation = np.arange(-5, 5, 0.1)

        z = test(test_img)
        for idx in range(len(test_variation)):
            z_test = np.copy(z)
            z_test[0, test_feature] = z_test[0, test_feature] + test_variation[idx]
            reconst_test = test_reconst(z_test)
            test_save_img = np.squeeze(reconst_test[0])
            test_save_img = Image.fromarray(test_save_img)
            img_file_name = "test_feat_{}_var_({}).png".format(test_feature, test_variation[idx])
            test_img_saver.save(test_save_img, img_file_name, sub_dir = None)
        reconst_test = test_reconst(z)
        test_save_img = np.squeeze(reconst_test[0])
        test_save_img = Image.fromarray(test_save_img)
        img_file_name = "test_feat_{}_var_original.png".format(test_feature)
        test_img_saver.save(test_save_img, img_file_name, sub_dir = None)
Esempio n. 26
0
def mgpu_classifier_train_net(models, num_gpus, cls_batch_per_gpu, cls_L, mode, img_dir, dataset, chkfile_name, logfile_name, validatefile_name, entangled_feat, max_epoch = 300, check_every_n = 500, loss_check_n = 10, save_model_freq = 5, batch_size = 512, lr = 0.001):
    img1 = U.get_placeholder_cached(name="img1")
    img2 = U.get_placeholder_cached(name="img2")

    feat_cls = U.get_placeholder_cached(name="feat_cls")

    # batch size must be multiples of ntowers (# of GPUs)
    ntowers = len(models)
    tf.assert_equal(tf.shape(img1)[0], tf.shape(img2)[0])
    tf.assert_equal(tf.floormod(tf.shape(img1)[0], ntowers), 0)

    img1splits = tf.split(img1, ntowers, 0)
    img2splits = tf.split(img2, ntowers, 0)

    tower_vae_loss = []
    tower_latent_z1_tp = []
    tower_latent_z2_tp = []
    tower_losses = []
    tower_siam_max = []
    tower_reconst1 = []
    tower_reconst2 = []
    tower_cls_loss = []
    for gid, model in enumerate(models):
        with tf.name_scope('gpu%d' % gid) as scope:
            with tf.device('/gpu:%d' % gid):

                vae_loss = U.mean(model.vaeloss)
                latent_z1_tp = model.latent_z1
                latent_z2_tp = model.latent_z2
                losses = [U.mean(model.vaeloss),
                          U.mean(model.siam_loss),
                          U.mean(model.kl_loss1),
                          U.mean(model.kl_loss2),
                          U.mean(model.reconst_error1),
                          U.mean(model.reconst_error2),
                          ]
                siam_max = U.mean(model.max_siam_loss)
                cls_loss = U.mean(model.cls_loss)

                tower_vae_loss.append(vae_loss)
                tower_latent_z1_tp.append(latent_z1_tp)
                tower_latent_z2_tp.append(latent_z2_tp)
                tower_losses.append(losses)
                tower_siam_max.append(siam_max)
                tower_reconst1.append(model.reconst1)
                tower_reconst2.append(model.reconst2)
                tower_cls_loss.append(cls_loss)

                tf.summary.scalar('Cls Loss', cls_loss)

    vae_loss = U.mean(tower_vae_loss)
    siam_max = U.mean(tower_siam_max)
    latent_z1_tp = tf.concat(tower_latent_z1_tp, 0)
    latent_z2_tp = tf.concat(tower_latent_z2_tp, 0)
    model_reconst1 = tf.concat(tower_reconst1, 0)
    model_reconst2 = tf.concat(tower_reconst2, 0)
    cls_loss = U.mean(tower_cls_loss)

    losses = [[] for _ in range(len(losses))]
    for tl in tower_losses:
        for i, l in enumerate(tl):
            losses[i].append(l)

    losses = [U.mean(l) for l in losses]
    siam_normal = losses[1] / entangled_feat

    tf.summary.scalar('total/cls_loss', cls_loss)

    compute_losses = U.function([img1, img2], vae_loss)

    all_var_list = model.get_trainable_variables()
    vae_var_list = [v for v in all_var_list if v.name.split("/")[2].startswith("vae")]
    cls_var_list = [v for v in all_var_list if v.name.split("/")[2].startswith("cls")]
    warn("{}".format(all_var_list))
    warn("=======================")
    warn("{}".format(vae_var_list))
    warn("=======================")
    warn("{}".format(cls_var_list))

    # with tf.device('/cpu:0'):
    # optimizer = tf.train.AdamOptimizer(learning_rate=lr, epsilon = 0.01/batch_size)
    # optimize_expr1 = optimizer.minimize(vae_loss, var_list=vae_var_list)

    feat_cls_optimizer = tf.train.AdagradOptimizer(learning_rate=0.01)
    optimize_expr2 = feat_cls_optimizer.minimize(cls_loss, var_list=cls_var_list)

    merged = tf.summary.merge_all()
    # train = U.function([img1, img2],
    #                     [losses[0], losses[1], losses[2], losses[3], losses[4], losses[5], latent_z1_tp, latent_z2_tp, merged], updates = [optimize_expr1])

    classifier_train = U.function([img1, img2, feat_cls],
                        [cls_loss, latent_z1_tp, latent_z2_tp, merged], updates = [optimize_expr2])

    get_reconst_img = U.function([img1, img2], [model_reconst1, model_reconst2, latent_z1_tp, latent_z2_tp])
    get_latent_var = U.function([img1, img2], [latent_z1_tp, latent_z2_tp])

    cur_dir = get_cur_dir()
    chk_save_dir = os.path.join(cur_dir, chkfile_name)
    log_save_dir = os.path.join(cur_dir, logfile_name)
    cls_logfile_name = 'cls_{}'.format(logfile_name)
    cls_log_save_dir = os.path.join(cur_dir, cls_logfile_name)
    validate_img_saver_dir = os.path.join(cur_dir, validatefile_name)
    if dataset == 'chairs' or dataset == 'celeba':
        test_img_saver_dir = os.path.join(cur_dir, "test_images")
        testing_img_dir = os.path.join(cur_dir, "dataset/{}/test_img".format(dataset))

    cls_train_writer = U.summary_writer(dir = cls_log_save_dir)

    U.initialize()

    saver, chk_file_epoch_num = U.load_checkpoints(load_requested = True, checkpoint_dir = chk_save_dir)
    if dataset == 'chairs' or dataset == 'celeba':
        validate_img_saver = Img_Saver(Img_dir = validate_img_saver_dir)
    elif dataset == 'dsprites':
        validate_img_saver = BW_Img_Saver(Img_dir = validate_img_saver_dir) # Black and White, temporary usage
    else:
        warn("Unknown dataset Error")
        # break

    warn("dataset: {}".format(dataset))
    if dataset == 'chairs' or dataset == 'celeba':
        training_images_list = read_dataset(img_dir)
        n_total_train_data = len(training_images_list)
        testing_images_list = read_dataset(testing_img_dir)
        n_total_testing_data = len(testing_images_list)
    elif dataset == 'dsprites':
        cur_dir = osp.join(cur_dir, 'dataset')
        cur_dir = osp.join(cur_dir, 'dsprites')
        img_dir = osp.join(cur_dir, 'dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz')
        manager = DataManager(img_dir, batch_size)
    else:
        warn("Unknown dataset Error")
        # break

    meta_saved = False

    cls_train_iter = 10000
    for cls_train_i in range(cls_train_iter):
        # warn("Train:{}".format(cls_train_i))
        if dataset == 'dsprites':
            # At every epoch, train classifier and check result
            # (1) Load images
            num_img_pair = cls_L * num_gpus * cls_batch_per_gpu
            # warn("{} {} {}".format(len(manager.latents_sizes)-1, num_gpus, cls_batch_per_gpu))
            feat = np.random.randint(len(manager.latents_sizes)-1, size = num_gpus * cls_batch_per_gpu)
            [images1, images2] = manager.get_image_fixed_feat_batch(feat, num_img_pair)

            # warn("images shape:{}".format(np.shape(images1)))

            # (2) Input PH images
            [classification_loss, _, _, summary] = classifier_train(images1, images2, feat)
            if cls_train_i % 100 == 0:
                warn("cls loss {}: {}".format(cls_train_i, classification_loss))

            cls_train_writer.add_summary(summary, cls_train_i)
Esempio n. 27
0
def mgpu_train_net(models, num_gpus, mode, img_dir, dataset, chkfile_name, logfile_name, validatefile_name, entangled_feat, max_epoch = 300, check_every_n = 500, loss_check_n = 10, save_model_freq = 5, batch_size = 512, lr = 0.001):
    img1 = U.get_placeholder_cached(name="img1")
    img2 = U.get_placeholder_cached(name="img2")

    feat_cls = U.get_placeholder_cached(name="feat_cls")

    # batch size must be multiples of ntowers (# of GPUs)
    ntowers = len(models)
    tf.assert_equal(tf.shape(img1)[0], tf.shape(img2)[0])
    tf.assert_equal(tf.floormod(tf.shape(img1)[0], ntowers), 0)

    img1splits = tf.split(img1, ntowers, 0)
    img2splits = tf.split(img2, ntowers, 0)

    tower_vae_loss = []
    tower_latent_z1_tp = []
    tower_latent_z2_tp = []
    tower_losses = []
    tower_siam_max = []
    tower_reconst1 = []
    tower_reconst2 = []
    tower_cls_loss = []
    for gid, model in enumerate(models):
        with tf.name_scope('gpu%d' % gid) as scope:
            with tf.device('/gpu:%d' % gid):

                vae_loss = U.mean(model.vaeloss)
                latent_z1_tp = model.latent_z1
                latent_z2_tp = model.latent_z2
                losses = [U.mean(model.vaeloss),
                          U.mean(model.siam_loss),
                          U.mean(model.kl_loss1),
                          U.mean(model.kl_loss2),
                          U.mean(model.reconst_error1),
                          U.mean(model.reconst_error2),
                          ]
                siam_max = U.mean(model.max_siam_loss)
                cls_loss = U.mean(model.cls_loss)

                tower_vae_loss.append(vae_loss)
                tower_latent_z1_tp.append(latent_z1_tp)
                tower_latent_z2_tp.append(latent_z2_tp)
                tower_losses.append(losses)
                tower_siam_max.append(siam_max)
                tower_reconst1.append(model.reconst1)
                tower_reconst2.append(model.reconst2)
                tower_cls_loss.append(cls_loss)

                tf.summary.scalar('Total Loss', losses[0])
                tf.summary.scalar('Siam Loss', losses[1])
                tf.summary.scalar('kl1_loss', losses[2])
                tf.summary.scalar('kl2_loss', losses[3])
                tf.summary.scalar('reconst_err1', losses[4])
                tf.summary.scalar('reconst_err2', losses[5])
                tf.summary.scalar('Siam Max', siam_max)

    vae_loss = U.mean(tower_vae_loss)
    siam_max = U.mean(tower_siam_max)
    latent_z1_tp = tf.concat(tower_latent_z1_tp, 0)
    latent_z2_tp = tf.concat(tower_latent_z2_tp, 0)
    model_reconst1 = tf.concat(tower_reconst1, 0)
    model_reconst2 = tf.concat(tower_reconst2, 0)
    cls_loss = U.mean(tower_cls_loss)

    losses = [[] for _ in range(len(losses))]
    for tl in tower_losses:
        for i, l in enumerate(tl):
            losses[i].append(l)

    losses = [U.mean(l) for l in losses]
    siam_normal = losses[1] / entangled_feat

    tf.summary.scalar('total/Total Loss', losses[0])
    tf.summary.scalar('total/Siam Loss', losses[1])
    tf.summary.scalar('total/kl1_loss', losses[2])
    tf.summary.scalar('total/kl2_loss', losses[3])
    tf.summary.scalar('total/reconst_err1', losses[4])
    tf.summary.scalar('total/reconst_err2', losses[5])
    tf.summary.scalar('total/Siam Normal', siam_normal)
    tf.summary.scalar('total/Siam Max', siam_max)

    compute_losses = U.function([img1, img2], vae_loss)

    all_var_list = model.get_trainable_variables()
    vae_var_list = [v for v in all_var_list if v.name.split("/")[2].startswith("vae")]
    cls_var_list = [v for v in all_var_list if v.name.split("/")[2].startswith("cls")]

    warn("{}".format(all_var_list))
    warn("==========================")
    warn("{}".format(vae_var_list))
    # warn("==========================")
    # warn("{}".format(cls_var_list))

    # with tf.device('/cpu:0'):
    optimizer = tf.train.AdamOptimizer(learning_rate=lr, epsilon = 0.01/batch_size)
    optimize_expr1 = optimizer.minimize(vae_loss, var_list=vae_var_list)

    feat_cls_optimizer = tf.train.AdagradOptimizer(learning_rate=0.01)
    optimize_expr2 = feat_cls_optimizer.minimize(cls_loss, var_list=cls_var_list)


    merged = tf.summary.merge_all()
    train = U.function([img1, img2],
                        [losses[0], losses[1], losses[2], losses[3], losses[4], losses[5], latent_z1_tp, latent_z2_tp, merged], updates = [optimize_expr1])


    get_reconst_img = U.function([img1, img2], [model_reconst1, model_reconst2, latent_z1_tp, latent_z2_tp])
    get_latent_var = U.function([img1, img2], [latent_z1_tp, latent_z2_tp])

    cur_dir = get_cur_dir()
    chk_save_dir = os.path.join(cur_dir, chkfile_name)
    log_save_dir = os.path.join(cur_dir, logfile_name)
    validate_img_saver_dir = os.path.join(cur_dir, validatefile_name)
    if dataset == 'chairs' or dataset == 'celeba':
        test_img_saver_dir = os.path.join(cur_dir, "test_images")
        testing_img_dir = os.path.join(cur_dir, "dataset/{}/test_img".format(dataset))

    train_writer = U.summary_writer(dir = log_save_dir)

    U.initialize()

    saver, chk_file_epoch_num = U.load_checkpoints(load_requested = True, checkpoint_dir = chk_save_dir)
    if dataset == 'chairs' or dataset == 'celeba':
        validate_img_saver = Img_Saver(Img_dir = validate_img_saver_dir)
    elif dataset == 'dsprites':
        validate_img_saver = BW_Img_Saver(Img_dir = validate_img_saver_dir) # Black and White, temporary usage
    else:
        warn("Unknown dataset Error")
        # break

    warn("dataset: {}".format(dataset))
    if dataset == 'chairs' or dataset == 'celeba':
        training_images_list = read_dataset(img_dir)
        n_total_train_data = len(training_images_list)
        testing_images_list = read_dataset(testing_img_dir)
        n_total_testing_data = len(testing_images_list)
    elif dataset == 'dsprites':
        cur_dir = osp.join(cur_dir, 'dataset')
        cur_dir = osp.join(cur_dir, 'dsprites')
        img_dir = osp.join(cur_dir, 'dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz')
        manager = DataManager(img_dir, batch_size)
    else:
        warn("Unknown dataset Error")
        # break

    meta_saved = False

    if mode == 'train':
        for epoch_idx in range(chk_file_epoch_num+1, max_epoch):
            t_epoch_start = time.time()
            num_batch = manager.get_len()

            for batch_idx in range(num_batch):
                if dataset == 'chairs' or dataset == 'celeba':
                    idx = random.sample(range(n_total_train_data), 2*batch_size)
                    batch_files = [training_images_list[i] for i in idx]
                    [images1, images2] = load_image(dir_name = img_dir, img_names = batch_files)
                elif dataset == 'dsprites':
                    [images1, images2] = manager.get_next()
                img1, img2 = images1, images2
                [l1, l2, _, _] = get_reconst_img(img1, img2)

                [loss0, loss1, loss2, loss3, loss4, loss5, latent1, latent2, summary] = train(img1, img2)

                if batch_idx % 50 == 1:
                    header("******* epoch: {}/{} batch: {}/{} *******".format(epoch_idx, max_epoch, batch_idx, num_batch))
                    warn("Total Loss: {}".format(loss0))
                    warn("Siam loss: {}".format(loss1))
                    warn("kl1_loss: {}".format(loss2))
                    warn("kl2_loss: {}".format(loss3))
                    warn("reconst_err1: {}".format(loss4))
                    warn("reconst_err2: {}".format(loss5))

                if batch_idx % check_every_n == 1:
                    if dataset == 'chairs' or dataset == 'celeba':
                        idx = random.sample(range(len(training_images_list)), 2*5)
                        validate_batch_files = [training_images_list[i] for i in idx]
                        [images1, images2] = load_image(dir_name = img_dir, img_names = validate_batch_files)
                    elif dataset == 'dsprites':
                        [images1, images2] = manager.get_next()

                    [reconst1, reconst2, _, _] = get_reconst_img(images1, images2)

                    if dataset == 'chairs':
                        for img_idx in range(len(images1)):
                            sub_dir = "iter_{}_{}".format(epoch_idx, batch_idx)

                            save_img = np.squeeze(images1[img_idx])
                            save_img = Image.fromarray(save_img)
                            img_file_name = "{}_ori.png".format(validate_batch_files[img_idx].split('.')[0])
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

                            save_img = np.squeeze(reconst1[img_idx])
                            save_img = Image.fromarray(save_img)
                            img_file_name = "{}_rec.png".format(validate_batch_files[img_idx].split('.')[0])
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)
                    elif dataset == 'celeba':
                        for img_idx in range(len(images1)):
                            sub_dir = "iter_{}_{}".format(epoch_idx, batch_idx)

                            save_img = np.squeeze(images1[img_idx])
                            save_img = Image.fromarray(save_img, 'RGB')
                            img_file_name = "{}_ori.png".format(validate_batch_files[img_idx].split('.')[0])
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

                            save_img = np.squeeze(reconst1[img_idx])
                            save_img = Image.fromarray(save_img, 'RGB')
                            img_file_name = "{}_rec.png".format(validate_batch_files[img_idx].split('.')[0])
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)
                    elif dataset == 'dsprites':
                        for img_idx in range(len(images1)):
                            sub_dir = "iter_{}_{}".format(epoch_idx, batch_idx)

                            # save_img = images1[img_idx].reshape(64, 64)
                            save_img = np.squeeze(images1[img_idx])
                            save_img = save_img.astype(np.float32)
                            img_file_name = "{}_ori.jpg".format(img_idx)
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

                            # save_img = reconst1[img_idx].reshape(64, 64)
                            save_img = np.squeeze(reconst1[img_idx])
                            save_img = save_img.astype(np.float32)
                            img_file_name = "{}_rec.jpg".format(img_idx)
                            validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

                if batch_idx % loss_check_n == 1:
                    train_writer.add_summary(summary, batch_idx)

            t_epoch_end = time.time()
            t_epoch_run = t_epoch_end - t_epoch_start
            if dataset == 'dsprites':
                t_check = manager.sample_size / t_epoch_run

                warn("==========================================")
                warn("Run {} th epoch in {} sec: {} images / sec".format(epoch_idx+1, t_epoch_run, t_check))
                warn("==========================================")


            if meta_saved == True:
                saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = epoch_idx, write_meta_graph = False)
            else:
                print "Save  meta graph"
                saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = epoch_idx, write_meta_graph = True)
                meta_saved = True
Esempio n. 28
0
def build_act_with_param_noise(make_obs_ph,
                               q_func,
                               num_actions,
                               scope="deepq",
                               reuse=None,
                               param_noise_filter_func=None):
    """Creates the act function with support for parameter space noise exploration (https://arxiv.org/abs/1706.01905):

    Parameters
    ----------
    make_obs_ph: str -> tf.placeholder or TfInput
        a function that take a name and creates a placeholder of input with that name
    q_func: (tf.Variable, int, str, bool) -> tf.Variable
        the model that takes the following inputs:
            observation_in: object
                the output of observation placeholder
            num_actions: int
                number of actions
            scope: str
            reuse: bool
                should be passed to outer variable scope
        and returns a tensor of shape (batch_size, num_actions) with values of every action.
    num_actions: int
        number of actions.
    scope: str or VariableScope
        optional scope for variable_scope.
    reuse: bool or None
        whether or not the variables should be reused. To be able to reuse the scope must be given.
    param_noise_filter_func: tf.Variable -> bool
        function that decides whether or not a variable should be perturbed. Only applicable
        if param_noise is True. If set to None, default_param_noise_filter is used by default.

    Returns
    -------
    act: (tf.Variable, bool, float, bool, float, bool) -> tf.Variable
        function to select and action given observation.
`       See the top of the file for details.
    """
    if param_noise_filter_func is None:
        param_noise_filter_func = default_param_noise_filter

    with tf.variable_scope(scope, reuse=reuse):
        observations_ph = make_obs_ph("observation")
        stochastic_ph = tf.placeholder(tf.bool, (), name="stochastic")
        update_eps_ph = tf.placeholder(tf.float32, (), name="update_eps")
        update_param_noise_threshold_ph = tf.placeholder(
            tf.float32, (), name="update_param_noise_threshold")
        update_param_noise_scale_ph = tf.placeholder(
            tf.bool, (), name="update_param_noise_scale")
        reset_ph = tf.placeholder(tf.bool, (), name="reset")

        eps = tf.get_variable("eps", (),
                              initializer=tf.constant_initializer(0))
        param_noise_scale = tf.get_variable(
            "param_noise_scale", (),
            initializer=tf.constant_initializer(0.01),
            trainable=False)
        param_noise_threshold = tf.get_variable(
            "param_noise_threshold", (),
            initializer=tf.constant_initializer(0.05),
            trainable=False)

        # Unmodified Q.
        q_values = q_func(observations_ph.get(), num_actions, scope="q_func")

        # Perturbable Q used for the actual rollout.
        q_values_perturbed = q_func(observations_ph.get(),
                                    num_actions,
                                    scope="perturbed_q_func")

        # We have to wrap this code into a function due to the way tf.cond() works. See
        # https://stackoverflow.com/questions/37063952/confused-by-the-behavior-of-tf-cond for
        # a more detailed discussion.
        def perturb_vars(original_scope, perturbed_scope):
            all_vars = scope_vars(absolute_scope_name(original_scope))
            all_perturbed_vars = scope_vars(
                absolute_scope_name(perturbed_scope))
            assert len(all_vars) == len(all_perturbed_vars)
            perturb_ops = []
            for var, perturbed_var in zip(all_vars, all_perturbed_vars):
                if param_noise_filter_func(perturbed_var):
                    # Perturb this variable.
                    op = tf.assign(
                        perturbed_var,
                        var + tf.random_normal(shape=tf.shape(var),
                                               mean=0.,
                                               stddev=param_noise_scale))
                else:
                    # Do not perturb, just assign.
                    op = tf.assign(perturbed_var, var)
                perturb_ops.append(op)
            assert len(perturb_ops) == len(all_vars)
            return tf.group(*perturb_ops)

        # Set up functionality to re-compute `param_noise_scale`. This perturbs yet another copy
        # of the network and measures the effect of that perturbation in action space. If the perturbation
        # is too big, reduce scale of perturbation, otherwise increase.
        q_values_adaptive = q_func(observations_ph.get(),
                                   num_actions,
                                   scope="adaptive_q_func")
        perturb_for_adaption = perturb_vars(original_scope="q_func",
                                            perturbed_scope="adaptive_q_func")
        kl = tf.reduce_sum(tf.nn.softmax(q_values) *
                           (tf.log(tf.nn.softmax(q_values)) -
                            tf.log(tf.nn.softmax(q_values_adaptive))),
                           axis=-1)
        mean_kl = tf.reduce_mean(kl)

        def update_scale():
            with tf.control_dependencies([perturb_for_adaption]):
                update_scale_expr = tf.cond(
                    mean_kl < param_noise_threshold,
                    lambda: param_noise_scale.assign(param_noise_scale * 1.01),
                    lambda: param_noise_scale.assign(param_noise_scale / 1.01),
                )
            return update_scale_expr

        # Functionality to update the threshold for parameter space noise.
        update_param_noise_threshold_expr = param_noise_threshold.assign(
            tf.cond(update_param_noise_threshold_ph >= 0,
                    lambda: update_param_noise_threshold_ph,
                    lambda: param_noise_threshold))

        # Put everything together.
        deterministic_actions = tf.argmax(q_values_perturbed, axis=1)
        batch_size = tf.shape(observations_ph.get())[0]
        random_actions = tf.random_uniform(tf.stack([batch_size]),
                                           minval=0,
                                           maxval=num_actions,
                                           dtype=tf.int64)
        chose_random = tf.random_uniform(
            tf.stack([batch_size]), minval=0, maxval=1, dtype=tf.float32) < eps
        stochastic_actions = tf.where(chose_random, random_actions,
                                      deterministic_actions)

        output_actions = tf.cond(stochastic_ph, lambda: stochastic_actions,
                                 lambda: deterministic_actions)
        update_eps_expr = eps.assign(
            tf.cond(update_eps_ph >= 0, lambda: update_eps_ph, lambda: eps))
        updates = [
            update_eps_expr,
            tf.cond(
                reset_ph,
                lambda: perturb_vars(original_scope="q_func",
                                     perturbed_scope="perturbed_q_func"),
                lambda: tf.group(*[])),
            tf.cond(update_param_noise_scale_ph, lambda: update_scale(),
                    lambda: tf.Variable(0., trainable=False)),
            update_param_noise_threshold_expr,
        ]
        _act = U.function(inputs=[
            observations_ph, stochastic_ph, update_eps_ph, reset_ph,
            update_param_noise_threshold_ph, update_param_noise_scale_ph
        ],
                          outputs=output_actions,
                          givens={
                              update_eps_ph: -1.0,
                              stochastic_ph: True,
                              reset_ph: False,
                              update_param_noise_threshold_ph: False,
                              update_param_noise_scale_ph: False
                          },
                          updates=updates)

        def act(ob,
                reset,
                update_param_noise_threshold,
                update_param_noise_scale,
                stochastic=True,
                update_eps=-1):
            return _act(ob, stochastic, update_eps, reset,
                        update_param_noise_threshold, update_param_noise_scale)

        return act
Esempio n. 29
0
def load_policy(filename):
    '''
    loading and building expert policy
    '''
    print('################ Env: ', filename, '###################')
    with open(filename, 'rb') as f:
        data = pickle.loads(f.read())
        #print(type(data)) #<class 'dict'>

    # assert len(data.keys()) == 2
    nonlin_type = data['nonlin_type']
    #print(nonlin_type) # tanh
    policy_type = [k for k in data.keys() if k != 'nonlin_type'][0]
    #print(policy_type) # GaussianPolicy

    assert policy_type == 'GaussianPolicy', 'Policy type {} not supported'.format(policy_type) #assert type(t) is int, '정수 아닌 값이 있네'
    
    policy_params = data[policy_type]
    assert set(policy_params.keys()) == {'logstdevs_1_Da', 'hidden', 'obsnorm', 'out'}
    ''' 집합 자료형
    >>> s1 = set([1,2,3])
    >>> s1
    {1, 2, 3}
    '''

    # Keep track of input and output dims (i.e. observation and action dims) for the user

    # Encapsulation
    def build_policy(obs_bo):
        def read_layer(layer_data):
            '''
            Extract Weight, bias from layer <class 'dict'>
            '''
            assert list(layer_data.keys()) == ['AffineLayer']
            assert sorted(layer_data['AffineLayer'].keys()) == ['W', 'b']
            return layer_data['AffineLayer']['W'].astype(np.float32), layer_data['AffineLayer']['b'].astype(np.float32)
            '''
            numpy.ndarray.astype
                Copy of the array, cast to a specified type.
            '''

        def apply_nonlin(x):
            '''
            Apply the nonlinear activation function such as leack relu, tanh
            '''
            if nonlin_type == 'lrelu':
                return tf_util.lrelu(x, leak=.01) # openai/imitation nn.py:233
            elif nonlin_type == 'tanh':
                return tf.tanh(x)
            else:
                raise NotImplementedError(nonlin_type)

        # Build the policy. First, observation normalization.
        assert list(policy_params['obsnorm'].keys()) == ['Standardizer']
        obsnorm_mean = policy_params['obsnorm']['Standardizer']['mean_1_D'] # <class 'numpy.ndarray'>
        obsnorm_meansq = policy_params['obsnorm']['Standardizer']['meansq_1_D'] # <class 'numpy.ndarray'>
        obsnorm_stdev = np.sqrt(np.maximum(0, obsnorm_meansq - np.square(obsnorm_mean))) # <class 'numpy.ndarray'>, standard deviation = \sqrt{E( X^2 ) - ( E(X) )^2}
        print('observation mean, standard deviation shape: ', obsnorm_mean.shape, obsnorm_stdev.shape) #(1, 11)
        
        normedobs_bo = (obs_bo - obsnorm_mean) / (obsnorm_stdev + 1e-6) # 1e-6 constant from Standardizer class in nn.py:409 in openai/imitation
        ''' Nomalized observation _ behavior observation (Standard score \frac{X-\mu}{\sigma})
        obs_bo          = X
        obsnorm_mean    = /mu
        obsnorm_stdev   = \sigma
        ----------------------------
        normedobs_bo   = normalized data
        '''

        curr_activations_bd = normedobs_bo

        # Hidden layers next
        assert list(policy_params['hidden'].keys()) == ['FeedforwardNet']

        layer_params = policy_params['hidden']['FeedforwardNet'] # < class 'dict'>, layer_0, layer_2        
        
        # 2 layers
        for layer_name in sorted(layer_params.keys()): # <class 'str'>, , layer_name = layer_0, layer_2
            '''
            Pass the layers given from expert, 
            '''
            layer_data = layer_params[layer_name]  # < class 'dict'>, layer_data = {'W', 'b'}
            W, b = read_layer(layer_data) # layer_0:  (11, 64) (1, 64), layer_2: (64, 64) (1, 64)
            print(W.shape, b.shape)
            curr_activations_bd = apply_nonlin(tf.matmul(curr_activations_bd, W) + b) # current activation behavior data + nonlinear activation funtion

        print('----end---')
        # Output layer, 1 layer
        W, b = read_layer(policy_params['out']) # (64, 3) (1, 3)
        # print(W.shape, b.shape)
        
        output_bo = tf.matmul(curr_activations_bd, W) + b # (?, 3), ?은 위의 과정에서 (1, 11)과 브로팅캐스팅 진행
        #print(output_bo.shape)
        
        return output_bo # Output behavior output

    #we create pairs of <observation, action>
    obs_bo = tf.placeholder(tf.float32, [None, None])  # <class 'tensorflow.python.framework.ops.Tensor'>, Tensor("Placeholder:0", shape=(?, ?), dtype=float32) 
    a_ba = build_policy(obs_bo) # Output behavior, <class 'tensorflow.python.framework.ops.Tensor'>, Tensor("add_2:0", shape=(?, 3), dtype=float32)
    
    policy_fn = tf_util.function([obs_bo], a_ba) # <class 'function'>, 
    '''
    function(inputs, outputs, updates=None, givens=None)
    [obs_bo]: list, [<tf.Tensor 'Placeholder:0' shape=(?, ?) dtype=float32>]
    a_ba: <class 'tensorflow.python.framework.ops.Tensor'>, Tensor("add_2:0", shape=(?, 3), dtype=float32)
    '''

    return policy_fn
Esempio n. 30
0
def build_train(make_obs_ph,
                q_func,
                num_actions,
                optimizer,
                grad_norm_clipping=None,
                gamma=1.0,
                double_q=True,
                scope="deepq",
                reuse=None,
                param_noise=False,
                param_noise_filter_func=None):
    """Creates the train function:

    Parameters
    ----------
    make_obs_ph: str -> tf.placeholder or TfInput
        a function that takes a name and creates a placeholder of input with that name
    q_func: (tf.Variable, int, str, bool) -> tf.Variable
        the model that takes the following inputs:
            observation_in: object
                the output of observation placeholder
            num_actions: int
                number of actions
            scope: str
            reuse: bool
                should be passed to outer variable scope
        and returns a tensor of shape (batch_size, num_actions) with values of every action.
    num_actions: int
        number of actions
    reuse: bool
        whether or not to reuse the graph variables
    optimizer: tf.train.Optimizer
        optimizer to use for the Q-learning objective.
    grad_norm_clipping: float or None
        clip gradient norms to this value. If None no clipping is performed.
    gamma: float
        discount rate.
    double_q: bool
        if true will use Double Q Learning (https://arxiv.org/abs/1509.06461).
        In general it is a good idea to keep it enabled.
    scope: str or VariableScope
        optional scope for variable_scope.
    reuse: bool or None
        whether or not the variables should be reused. To be able to reuse the scope must be given.
    param_noise: bool
        whether or not to use parameter space noise (https://arxiv.org/abs/1706.01905)
    param_noise_filter_func: tf.Variable -> bool
        function that decides whether or not a variable should be perturbed. Only applicable
        if param_noise is True. If set to None, default_param_noise_filter is used by default.

    Returns
    -------
    act: (tf.Variable, bool, float) -> tf.Variable
        function to select and action given observation.
`       See the top of the file for details.
    train: (object, np.array, np.array, object, np.array, np.array) -> np.array
        optimize the error in Bellman's equation.
`       See the top of the file for details.
    update_target: () -> ()
        copy the parameters from optimized Q function to the target Q function.
`       See the top of the file for details.
    debug: {str: function}
        a bunch of functions to print debug data like q_values.
    """
    if param_noise:
        act_f = build_act_with_param_noise(
            make_obs_ph,
            q_func,
            num_actions,
            scope=scope,
            reuse=reuse,
            param_noise_filter_func=param_noise_filter_func)
    else:
        act_f = build_act(make_obs_ph,
                          q_func,
                          num_actions,
                          scope=scope,
                          reuse=reuse)

    with tf.variable_scope(scope, reuse=reuse):
        # set up placeholders
        obs_t_input = make_obs_ph("obs_t")
        act_t_ph = tf.placeholder(tf.int32, [None], name="action")
        rew_t_ph = tf.placeholder(tf.float32, [None], name="reward")
        obs_tp1_input = make_obs_ph("obs_tp1")
        done_mask_ph = tf.placeholder(tf.float32, [None], name="done")
        importance_weights_ph = tf.placeholder(tf.float32, [None],
                                               name="weight")

        # q network evaluation
        q_t = q_func(obs_t_input.get(),
                     num_actions,
                     scope="q_func",
                     reuse=True)  # reuse parameters from act
        q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                        scope=tf.get_variable_scope().name +
                                        "/q_func")

        # target q network evalution
        q_tp1 = q_func(obs_tp1_input.get(), num_actions, scope="target_q_func")
        target_q_func_vars = tf.get_collection(
            tf.GraphKeys.GLOBAL_VARIABLES,
            scope=tf.get_variable_scope().name + "/target_q_func")

        # q scores for actions which we know were selected in the given state.
        q_t_selected = tf.reduce_sum(q_t * tf.one_hot(act_t_ph, num_actions),
                                     1)

        # compute estimate of best possible value starting from state at t + 1
        if double_q:
            q_tp1_using_online_net = q_func(obs_tp1_input.get(),
                                            num_actions,
                                            scope="q_func",
                                            reuse=True)
            q_tp1_best_using_online_net = tf.argmax(q_tp1_using_online_net, 1)
            q_tp1_best = tf.reduce_sum(
                q_tp1 * tf.one_hot(q_tp1_best_using_online_net, num_actions),
                1)
        else:
            q_tp1_best = tf.reduce_max(q_tp1, 1)
        q_tp1_best_masked = (1.0 - done_mask_ph) * q_tp1_best

        # compute RHS of bellman equation
        q_t_selected_target = rew_t_ph + gamma * q_tp1_best_masked

        # compute the error (potentially clipped)
        td_error = q_t_selected - tf.stop_gradient(q_t_selected_target)
        errors = U.huber_loss(td_error)
        weighted_error = tf.reduce_mean(importance_weights_ph * errors)

        # compute optimization op (potentially with gradient clipping)
        if grad_norm_clipping is not None:
            gradients = optimizer.compute_gradients(weighted_error,
                                                    var_list=q_func_vars)
            for i, (grad, var) in enumerate(gradients):
                if grad is not None:
                    gradients[i] = (tf.clip_by_norm(grad,
                                                    grad_norm_clipping), var)
            optimize_expr = optimizer.apply_gradients(gradients)
        else:
            optimize_expr = optimizer.minimize(weighted_error,
                                               var_list=q_func_vars)

        # update_target_fn will be called periodically to copy Q network to target Q network
        update_target_expr = []
        for var, var_target in zip(
                sorted(q_func_vars, key=lambda v: v.name),
                sorted(target_q_func_vars, key=lambda v: v.name)):
            update_target_expr.append(var_target.assign(var))
        update_target_expr = tf.group(*update_target_expr)

        # Create callable functions
        train = U.function(inputs=[
            obs_t_input, act_t_ph, rew_t_ph, obs_tp1_input, done_mask_ph,
            importance_weights_ph
        ],
                           outputs=td_error,
                           updates=[optimize_expr])
        update_target = U.function([], [], updates=[update_target_expr])

        q_values = U.function([obs_t_input], q_t)

        return act_f, train, update_target, {'q_values': q_values}
Esempio n. 31
0
def train_net(model, manager, chkfile_name, logfile_name, validatefile_name, entangled_feat, max_iter = 6000001, check_every_n = 1000, loss_check_n = 10, save_model_freq = 5000, batch_size = 32):
	img1 = U.get_placeholder_cached(name="img1")
	img2 = U.get_placeholder_cached(name="img2")


	# Testing
	# img_test = U.get_placeholder_cached(name="img_test")
	# reconst_tp = U.get_placeholder_cached(name="reconst_tp")


	vae_loss = U.mean(model.vaeloss)

	latent_z1_tp = model.latent_z1
	latent_z2_tp = model.latent_z2

	losses = [U.mean(model.vaeloss),
			U.mean(model.siam_loss),
			U.mean(model.kl_loss1), 
			U.mean(model.kl_loss2), 
			U.mean(model.reconst_error1), 
			U.mean(model.reconst_error2), 
			]

	siam_normal = losses[1]/entangled_feat		
	siam_max = U.mean(model.max_siam_loss)

	tf.summary.scalar('Total Loss', losses[0])
	tf.summary.scalar('Siam Loss', losses[1])
	tf.summary.scalar('kl1_loss', losses[2])
	tf.summary.scalar('kl2_loss', losses[3])
	tf.summary.scalar('reconst_err1', losses[4])
	tf.summary.scalar('reconst_err2', losses[5])
	tf.summary.scalar('Siam Normal', siam_normal)
	tf.summary.scalar('Siam Max', siam_max)

	# decoded_img = [model.reconst1, model.reconst2]


	compute_losses = U.function([img1, img2], vae_loss)
	lr = 0.005
	optimizer=tf.train.AdagradOptimizer(learning_rate=lr)

	all_var_list = model.get_trainable_variables()

	# print all_var_list
	img1_var_list = all_var_list
	#[v for v in all_var_list if v.name.split("/")[1].startswith("proj1") or v.name.split("/")[1].startswith("unproj1")]
	optimize_expr1 = optimizer.minimize(vae_loss, var_list=img1_var_list)
	merged = tf.summary.merge_all()
	train = U.function([img1, img2], 
						[losses[0], losses[1], losses[2], losses[3], losses[4], losses[5], latent_z1_tp, latent_z2_tp, merged], updates = [optimize_expr1])
	get_reconst_img = U.function([img1, img2], [model.reconst1_mean, model.reconst2_mean, latent_z1_tp, latent_z2_tp])
	get_latent_var = U.function([img1, img2], [latent_z1_tp, latent_z2_tp])


	# testing
	# test = U.function([img_test], model.latent_z_test)
	# test_reconst = U.function([reconst_tp], [model.reconst_test])

	cur_dir = get_cur_dir()
	chk_save_dir = os.path.join(cur_dir, chkfile_name)
	log_save_dir = os.path.join(cur_dir, logfile_name)
	validate_img_saver_dir = os.path.join(cur_dir, validatefile_name)
	# test_img_saver_dir = os.path.join(cur_dir, "test_images")
	# testing_img_dir = os.path.join(cur_dir, "dataset/test_img")
	
	train_writer = U.summary_writer(dir = log_save_dir)


	U.initialize()

	saver, chk_file_num = U.load_checkpoints(load_requested = True, checkpoint_dir = chk_save_dir)
	validate_img_saver = BW_Img_Saver(validate_img_saver_dir)

	# testing
	# test_img_saver = Img_Saver(test_img_saver_dir)

	meta_saved = False

	iter_log = []
	loss1_log = []
	loss2_log = []

	loss3_log = []

	training_images_list = manager.imgs
	# read_dataset(img_dir)
	n_total_train_data = len(training_images_list)

	# testing_images_list = read_dataset(testing_img_dir)
	# n_total_testing_data = len(testing_images_list)

	training = True
	testing = False

	if training == True:
		for num_iter in range(chk_file_num+1, max_iter):
			header("******* {}th iter: *******".format(num_iter))

			idx = random.sample(range(n_total_train_data), 2*batch_size)
			batch_files = idx
			# print batch_files
			[images1, images2] = manager.get_images(indices = idx)
			img1, img2 = images1, images2
			[l1, l2, _, _] = get_reconst_img(img1, img2)

			[loss0, loss1, loss2, loss3, loss4, loss5, latent1, latent2, summary] = train(img1, img2)	

			warn("Total Loss: {}".format(loss0))
			warn("Siam loss: {}".format(loss1))
			warn("kl1_loss: {}".format(loss2))
			warn("kl2_loss: {}".format(loss3))
			warn("reconst_err1: {}".format(loss4))
			warn("reconst_err2: {}".format(loss5))

			# warn("num_iter: {} check: {}".format(num_iter, check_every_n))
			# warn("Total Loss: {}".format(loss6))
			if num_iter % check_every_n == 1:
				header("******* {}th iter: *******".format(num_iter))
				idx = random.sample(range(len(training_images_list)), 2*5)
				[images1, images2] = manager.get_images(indices = idx)
				[reconst1, reconst2, _, _] = get_reconst_img(images1, images2)
				# for i in range(len(latent1[0])):
				# 	print "{} th: {:.2f}".format(i, np.mean(np.abs(latent1[:, i] - latent2[:, i])))
				for img_idx in range(len(images1)):
					sub_dir = "iter_{}".format(num_iter)

					save_img = images1[img_idx].reshape(64, 64)
					save_img = save_img.astype(np.float32)
					img_file_name = "{}_ori.jpg".format(img_idx)				
					validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

					save_img = reconst1[img_idx].reshape(64, 64)
					save_img = save_img.astype(np.float32)
					img_file_name = "{}_rec.jpg".format(img_idx)				
					validate_img_saver.save(save_img, img_file_name, sub_dir = sub_dir)

			if num_iter % loss_check_n == 1:
				train_writer.add_summary(summary, num_iter)

			if num_iter > 11 and num_iter % save_model_freq == 1:
				if meta_saved == True:
					saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = num_iter, write_meta_graph = False)
				else:
					print "Save  meta graph"
					saver.save(U.get_session(), chk_save_dir + '/' + 'checkpoint', global_step = num_iter, write_meta_graph = True)
					meta_saved = True