def __init__(self, T, S, layers, n_comp, batch_size, C=1., data_dim=3, keep_latest_k=None, lr=1e-3, reg_scale=0.): """ Params: - T: the maximum time of the sequences - S: the space of location - C: the constant in diffusion kernel - batch_size: batch size of the training data - maximum: upper bound of the conditional intensity - data_dim: data dimension (=3 by default) - keep_latest_k: only compute latest k points in log-likelihood calculation - lr: learning rate for the SGD optimizer """ self.batch_size = batch_size # Hawkes process self.hawkes = SpatialTemporalHawkes(T, S, layers=layers, n_comp=n_comp, C=C, maximum=1e+3, verbose=False) # regularization l1_regularizer = tf.contrib.layers.l1_regularizer(scale=reg_scale, scope=None) penalty_term = tf.contrib.layers.apply_regularization( l1_regularizer, self.hawkes.Wss) # input tensors: expert sequences (time, location, marks) self.input_seqs = tf.placeholder( tf.float32, [batch_size, None, data_dim]) # [batch_size, seq_len, data_dim] self.cost = -1 * self.log_likelihood( S, keep_latest_k=keep_latest_k) / batch_size # + penalty_term # Adam optimizer global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay(lr, global_step, decay_steps=100, decay_rate=0.99, staircase=True) self.optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.6, beta2=0.9).minimize( self.cost, global_step=global_step)
def __init__(self, T, S, C=1., maximum=1e+4): """ Params: - T: the maximum time of the sequences - S: the space of location - C: the constant in diffusion kernel """ # model hyper-parameters self.T = T # maximum time self.S = S # location space # Hawkes process generator self.hawkes = SpatialTemporalHawkes(C=C, maximum=maximum)
def __init__(self, T, S, layers, n_comp, batch_size, C=1., maximum=1e+3, keep_latest_k=None, lr=1e-5, eps=0.2): """ Params: - T: the maximum time of the sequences - S: the space of location - C: the constant in diffusion kernel """ # model hyper-parameters self.T = T # time space self.S = S # location space self.batch_size = batch_size # batch size self.maximum = maximum # upper bound of the conditional intensity # Hawkes process generator self.hawkes = SpatialTemporalHawkes(T, S, layers=layers, n_comp=n_comp, C=C, maximum=1e+3, verbose=False) # input tensors: expert sequences (time, location) self.input_expert_seqs = tf.placeholder(tf.float32, [batch_size, None, 3]) self.input_learner_seqs = tf.placeholder(tf.float32, [batch_size, None, 3]) # TODO: make esp decay exponentially # coaching # self.coached_learner_seqs = self._coaching(self.input_learner_seqs, self.input_expert_seqs, eps=eps) self.learner_seqs_loglik = self._log_likelihood( learner_seqs=self.input_learner_seqs, keep_latest_k=keep_latest_k) # build policy optimizer self._policy_optimizer(expert_seqs=self.input_expert_seqs, learner_seqs=self.input_learner_seqs, learner_seqs_loglik=self.learner_seqs_loglik, lr=lr)
class MLE_Hawkes_Generator(object): """ Reinforcement Learning Based Point Process Generator """ def __init__(self, T, S, layers, n_comp, batch_size, C=1., data_dim=3, keep_latest_k=None, lr=1e-3, reg_scale=0.): """ Params: - T: the maximum time of the sequences - S: the space of location - C: the constant in diffusion kernel - batch_size: batch size of the training data - maximum: upper bound of the conditional intensity - data_dim: data dimension (=3 by default) - keep_latest_k: only compute latest k points in log-likelihood calculation - lr: learning rate for the SGD optimizer """ self.batch_size = batch_size # Hawkes process self.hawkes = SpatialTemporalHawkes(T, S, layers=layers, n_comp=n_comp, C=C, maximum=1e+3, verbose=False) # regularization l1_regularizer = tf.contrib.layers.l1_regularizer(scale=reg_scale, scope=None) penalty_term = tf.contrib.layers.apply_regularization( l1_regularizer, self.hawkes.Wss) # input tensors: expert sequences (time, location, marks) self.input_seqs = tf.placeholder( tf.float32, [batch_size, None, data_dim]) # [batch_size, seq_len, data_dim] self.cost = -1 * self.log_likelihood( S, keep_latest_k=keep_latest_k) / batch_size # + penalty_term # Adam optimizer global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay(lr, global_step, decay_steps=100, decay_rate=0.99, staircase=True) self.optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.6, beta2=0.9).minimize( self.cost, global_step=global_step) def log_likelihood(self, S, keep_latest_k): """ compute the log-likelihood of the input data given the hawkes point process. """ # log-likelihood loglikli = 0. for b in range(batch_size): seq = self.input_seqs[b, :, :] mask_t = tf.cast(seq[:, 0] > 0, tf.float32) trunc_seq = tf.boolean_mask(seq, mask_t) seq_len = tf.shape(trunc_seq)[0] # calculate the log conditional pdf for each of data points in the sequence. loglikli += tf.reduce_sum( tf.scan( lambda a, i: self.hawkes.log_conditional_pdf( trunc_seq[:i, :], keep_latest_k=keep_latest_k), tf.range(1, seq_len + 1), # from the first point to the last point initializer=np.array(0., dtype=np.float32))) return loglikli def train( self, sess, epoches, # number of epoches (how many times is the entire dataset going to be trained) expert_seqs, # [n, seq_len, data_dim=3] pretrained=False): """train the point process generator given expert sequences.""" # initialization if not pretrained: # initialize network parameters init_op = tf.global_variables_initializer() sess.run(init_op) print("[%s] parameters are initialized." % arrow.now(), file=sys.stderr) # data configurations # - number of expert sequences n_data = expert_seqs.shape[0] # - number of batches n_batches = int(n_data / batch_size) # training over epoches for epoch in range(epoches): # shuffle indices of the training samples shuffled_ids = np.arange(n_data) np.random.shuffle(shuffled_ids) # training over batches avg_train_cost = [] for b in range(n_batches): idx = np.arange(batch_size * b, batch_size * (b + 1)) # training and testing indices selected in current batch batch_train_ids = shuffled_ids[idx] # training and testing batch data batch_train_seqs = expert_seqs[batch_train_ids, :, :] # optimization procedure sess.run(self.optimizer, feed_dict={self.input_seqs: batch_train_seqs}) # cost for train batch and test batch train_cost = sess.run( self.cost, feed_dict={self.input_seqs: batch_train_seqs}) print("[%s] batch training cost: %.2f." % (arrow.now(), train_cost), file=sys.stderr) # record cost for each batch avg_train_cost.append(train_cost) # training log output avg_train_cost = np.mean(avg_train_cost) print('[%s] Epoch %d (n_train_batches=%d, batch_size=%d)' % (arrow.now(), epoch, n_batches, batch_size), file=sys.stderr) print('[%s] Training cost:\t%f' % (arrow.now(), avg_train_cost), file=sys.stderr)
class RL_Hawkes_Generator(object): """ Reinforcement Learning Based Point Process Generator """ def __init__(self, T, S, C=1., maximum=1e+4): """ Params: - T: the maximum time of the sequences - S: the space of location - C: the constant in diffusion kernel """ # model hyper-parameters self.T = T # maximum time self.S = S # location space # Hawkes process generator self.hawkes = SpatialTemporalHawkes(C=C, maximum=maximum) def _rebulid_policy_optimizer(self, sess, batch_size, lr=1e-2): """ """ # generated tensors: learner sequences (time, location, loglikelihood) learner_seq_t, learner_seq_l, learner_seq_loglik = self.hawkes.get_learner_seqs( sess, self.T, self.S, batch_size) # concatenate batches in the sequences expert_seq_t, expert_seq_l = \ self.__concatenate_batch(self.input_seq_t), \ self.__concatenate_batch(self.input_seq_l) learner_seq_t, learner_seq_l, learner_seq_loglik = \ self.__concatenate_batch(learner_seq_t), \ self.__concatenate_batch(learner_seq_l), \ self.__concatenate_batch(learner_seq_loglik) print("[%s] rebuiding reward." % arrow.now(), file=sys.stderr) # calculate average rewards reward = self._reward(batch_size, self.T[0], self.T[1],\ expert_seq_t, expert_seq_l, learner_seq_t, learner_seq_l) # [batch_size*seq_len, 1] print("[%s] rebuiding optimizer." % arrow.now(), file=sys.stderr) # cost and optimizer self.cost = tf.reduce_sum(tf.multiply(reward, learner_seq_loglik), axis=0) / batch_size # global_step = tf.Variable(0, trainable=False) # learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, decay_step, decay_rate, staircase=True) # self.optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.6, beta2=0.9).minimize(self.cost, global_step=global_step) self.optimizer = tf.train.GradientDescentOptimizer(lr).minimize( self.cost) def _reward( self, batch_size, t0, T, expert_seq_t, expert_seq_l, # expert sequences learner_seq_t, learner_seq_l, # learner sequences kernel_bandwidth=0.5): """reward function""" # get mask for concatenated expert and learner sequences expert_seq_mask = self.__get_mask_truncate_by_T( expert_seq_t, T, t0) # [batch_size*seq_len, 1] learner_seq_mask = self.__get_mask_truncate_by_T( learner_seq_t, T, t0) # [batch_size*seq_len, 1] # calculate mask for kernel matrix learner_learner_kernel_mask = tf.matmul(learner_seq_mask, tf.transpose(learner_seq_mask)) expert_learner_kernel_mask = tf.matmul(expert_seq_mask, tf.transpose(learner_seq_mask)) # concatenate each data dimension for both expert sequence and learner sequence # TODO: Add mark to the sequences # expert_seq = tf.concat([expert_seq_t, expert_seq_l], axis=1) # [batch_size*seq_len, t_dim+l_dim+m_dim] # learner_seq = tf.concat([learner_seq_t, learner_seq_l], axis=1) # [batch_size*seq_len, t_dim+l_dim+m_dim] expert_seq = tf.concat([expert_seq_l], axis=1) # [batch_size*seq_len, t_dim] learner_seq = tf.concat([learner_seq_l], axis=1) # [batch_size*seq_len, t_dim] # calculate upper-half kernel matrix learner_learner_kernel, expert_learner_kernel = self.__kernel_matrix( learner_seq, expert_seq, kernel_bandwidth) # 2 * [batch_size*seq_len, batch_size*seq_len] learner_learner_kernel = tf.multiply(learner_learner_kernel, learner_learner_kernel_mask) expert_learner_kernel = tf.multiply(expert_learner_kernel, expert_learner_kernel_mask) # calculate reward for each of data point in learner sequence emp_ll_mean = tf.reduce_sum(learner_learner_kernel, axis=0) * 2 # batch_size*seq_len emp_el_mean = tf.reduce_sum(expert_learner_kernel, axis=0) * 2 # batch_size*seq_len return tf.expand_dims(emp_ll_mean - emp_el_mean, -1) # [batch_size*seq_len, 1] @staticmethod def __get_mask_truncate_by_T(seq_t, T, t_0=0): """Masking time, location and mark sequences for the entries before the maximum time T.""" # get basic mask where 0 if t > T else 1 mask_t = tf.multiply(tf.cast(seq_t < T, tf.float32), tf.cast(seq_t > t_0, tf.float32)) return mask_t # [batch_size*seq_len, 1] or [batch_size, seq_len, 1] @staticmethod def __concatenate_batch(seqs): """Concatenate each batch of the sequences into a single sequence.""" array_seq = tf.unstack(seqs, axis=0) # [batch_size, seq_len, data_dim] seq = tf.concat(array_seq, axis=0) # [batch_size*seq_len, data_dim] return seq @staticmethod def __kernel_matrix(learner_seq, expert_seq, kernel_bandwidth): """ Construct kernel matrix based on learn sequence and expert sequence, each entry of the matrix is the distance between two data points in learner_seq or expert_seq. return two matrix, left_mat is the distances between learn sequence and learn sequence, right_mat is the distances between learn sequence and expert sequence. """ # calculate l2 distances learner_learner_mat = utils.l2_norm( learner_seq, learner_seq) # [batch_size*seq_len, batch_size*seq_len] expert_learner_mat = utils.l2_norm( expert_seq, learner_seq) # [batch_size*seq_len, batch_size*seq_len] # exponential kernel learner_learner_mat = tf.exp(-learner_learner_mat / kernel_bandwidth) expert_learner_mat = tf.exp(-expert_learner_mat / kernel_bandwidth) return learner_learner_mat, expert_learner_mat def train( self, sess, batch_size, epoches, # number of epoches (how many times is the entire dataset going to be trained) expert_seq_t, # [n, seq_len, 1] expert_seq_l, # [n, seq_len, 2] trainplot=True, # plot the change of intensity over epoches lr=1e-2, # learning rate pretrained=False): """Train the point process generator given expert sequences.""" # input tensors: expert sequences (time, location) self.input_seq_t = tf.placeholder(tf.float32, [batch_size, None, 1]) self.input_seq_l = tf.placeholder(tf.float32, [batch_size, None, 2]) # check the consistency of the shape of the expert sequences assert expert_seq_t.shape[:-1] == expert_seq_l.shape[:-1], \ "inconsistant 'number of sequences' or 'sequence length' of input expert sequences" # initialization if not pretrained: print("[%s] parameters are initialized." % arrow.now(), file=sys.stderr) # initialize network parameters init_op = tf.global_variables_initializer() sess.run(init_op) # data configurations # - number of expert sequences n_data = expert_seq_t.shape[0] # - number of batches n_batches = int(n_data / batch_size) if trainplot: ppim = utils.PointProcessIntensityMeter(self.T[1], batch_size) # training over epoches for epoch in range(epoches): # shuffle indices of the training samples shuffled_ids = np.arange(n_data) np.random.shuffle(shuffled_ids) # shuffled_train_ids = shuffled_ids[:n_train] # shuffled_test_ids = shuffled_ids[-n_test:] # training over batches avg_train_cost = [] for b in range(n_batches): idx = np.arange(batch_size * b, batch_size * (b + 1)) # training and testing indices selected in current batch batch_train_ids = shuffled_ids[idx] # batch_test_ids = shuffled_test_ids[:batch_size] # training and testing batch data batch_train_expert_t = expert_seq_t[batch_train_ids, :, :] batch_train_expert_l = expert_seq_l[batch_train_ids, :, :] self._rebulid_policy_optimizer(sess, batch_size, lr) # optimization procedure sess.run(self.optimizer, feed_dict={ self.input_seq_t: batch_train_expert_t, self.input_seq_l: batch_train_expert_l }) # cost for train batch and test batch train_cost = sess.run(self.cost, feed_dict={ self.input_seq_t: batch_train_expert_t, self.input_seq_l: batch_train_expert_l }) print("[%s] batch training cost: %.2f." % (arrow.now(), train_cost), file=sys.stderr) # record cost for each batch avg_train_cost.append(train_cost) if trainplot: # update intensity plot learner_seq_t, learner_seq_l, _ = self.hawkes.get_learner_seqs( sess, self.T, self.S, batch_size) ppim.update_time_intensity(batch_train_expert_t, learner_seq_t) ppim.update_location_intensity(batch_train_expert_l, learner_seq_l) # training log output avg_train_cost = np.mean(avg_train_cost) print('[%s] Epoch %d (n_train_batches=%d, batch_size=%d)' % (arrow.now(), epoch, n_batches, batch_size), file=sys.stderr) print('[%s] Training cost:\t%f' % (arrow.now(), avg_train_cost), file=sys.stderr)
class RL_Hawkes_Generator(object): """ Reinforcement Learning Based Point Process Generator """ def __init__(self, T, S, layers, n_comp, batch_size, C=1., maximum=1e+3, keep_latest_k=None, lr=1e-5, eps=0.2): """ Params: - T: the maximum time of the sequences - S: the space of location - C: the constant in diffusion kernel """ # model hyper-parameters self.T = T # time space self.S = S # location space self.batch_size = batch_size # batch size self.maximum = maximum # upper bound of the conditional intensity # Hawkes process generator self.hawkes = SpatialTemporalHawkes(T, S, layers=layers, n_comp=n_comp, C=C, maximum=1e+3, verbose=False) # input tensors: expert sequences (time, location) self.input_expert_seqs = tf.placeholder(tf.float32, [batch_size, None, 3]) self.input_learner_seqs = tf.placeholder(tf.float32, [batch_size, None, 3]) # TODO: make esp decay exponentially # coaching # self.coached_learner_seqs = self._coaching(self.input_learner_seqs, self.input_expert_seqs, eps=eps) self.learner_seqs_loglik = self._log_likelihood( learner_seqs=self.input_learner_seqs, keep_latest_k=keep_latest_k) # build policy optimizer self._policy_optimizer(expert_seqs=self.input_expert_seqs, learner_seqs=self.input_learner_seqs, learner_seqs_loglik=self.learner_seqs_loglik, lr=lr) def _log_likelihood(self, learner_seqs, keep_latest_k): """ compute the log-likelihood of the input data given the hawkes point process. """ # max length of the sequence in learner_seqs max_len = tf.shape(learner_seqs)[1] # log-likelihoods logliklis = [] for b in range(self.batch_size): seq = learner_seqs[b, :, :] mask_t = tf.cast(seq[:, 0] > 0, tf.float32) trunc_seq = tf.boolean_mask(seq, mask_t) seq_len = tf.shape(trunc_seq)[0] # calculate the log conditional pdf for each of data points in the sequence. loglikli = tf.scan( lambda a, i: self.hawkes.log_conditional_pdf( trunc_seq[:i, :], keep_latest_k=keep_latest_k), tf.range(1, seq_len + 1), # from the first point to the last point initializer=np.array(0., dtype=np.float32)) # padding zeros for loglikli paddings = tf.zeros(max_len - seq_len, dtype=tf.float32) loglikli = tf.concat([loglikli, paddings], axis=0) logliklis.append(loglikli) logliklis = tf.expand_dims(tf.stack(logliklis, axis=0), -1) return logliklis def _policy_optimizer(self, expert_seqs, learner_seqs, learner_seqs_loglik, lr): """policy optimizer""" # concatenate batches in the sequences concat_expert_seq = self.__concatenate_batch( expert_seqs) # [batch_size * expert_seq_len, data_dim] concat_learner_seq = self.__concatenate_batch( learner_seqs) # [batch_size * learner_seq_len, data_dim] concat_learner_seq_loglik = self.__concatenate_batch( learner_seqs_loglik) # [batch_size * learner_seq_len, 1] # calculate average rewards print("[%s] building reward." % arrow.now(), file=sys.stderr) reward = self._reward(concat_expert_seq, concat_learner_seq) # TODO: record the discrepency # cost and optimizer print("[%s] building optimizer." % arrow.now(), file=sys.stderr) # self.cost = tf.reduce_sum(tf.multiply(reward, concat_learner_seq_loglik), axis=0) / self.batch_size self.cost = tf.reduce_sum( \ tf.reduce_sum(tf.reshape(reward, [self.batch_size, tf.shape(learner_seqs)[1]]), axis=1) * \ tf.reduce_sum(tf.reshape(concat_learner_seq_loglik, [self.batch_size, tf.shape(learner_seqs)[1]]), axis=1)) / self.batch_size # Adam optimizer global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay(lr, global_step, decay_steps=100, decay_rate=0.99, staircase=True) self.optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.6, beta2=0.9).minimize( self.cost, global_step=global_step) def _reward(self, expert_seq, learner_seq, kb=5): """reward function""" # get mask for concatenated expert and learner sequences learner_mask_t = tf.expand_dims( tf.cast(learner_seq[:, 0] > 0, tf.float32), -1) expert_mask_t = tf.expand_dims( tf.cast(expert_seq[:, 0] > 0, tf.float32), -1) # calculate mask for kernel matrix learner_learner_kernel_mask = tf.matmul(learner_mask_t, tf.transpose(learner_mask_t)) expert_learner_kernel_mask = tf.matmul(expert_mask_t, tf.transpose(learner_mask_t)) # calculate upper-half kernel matrix # - [learner_seq_len, learner_seq_len], [expert_seq_len, learner_seq_len] learner_learner_kernel, expert_learner_kernel = self.__kernel_matrix( learner_seq, expert_seq, kb) learner_learner_kernel = tf.multiply(learner_learner_kernel, learner_learner_kernel_mask) expert_learner_kernel = tf.multiply(expert_learner_kernel, expert_learner_kernel_mask) # calculate reward for each of data point in learner sequence emp_ll_mean = tf.reduce_sum( learner_learner_kernel, axis=0) / self.batch_size # [batch_size * learner_seq_len] emp_el_mean = tf.reduce_sum( expert_learner_kernel, axis=0) / self.batch_size # [batch_size * learner_seq_len] return tf.expand_dims(emp_ll_mean - emp_el_mean, -1) # [batch_size * learner_seq_len, 1] def _coaching(self, learner_seqs, expert_seqs, eps): """ coach the learner by replacing part of generated learner sequences with the expert sequence for the (greedy) exploration. """ # align learner and expert sequences learner_seqs, expert_seqs, seq_len = self.__align_learner_expert_seqs( learner_seqs, expert_seqs) # coaching and retain mask p = tf.random_uniform([self.batch_size, 1, 1], 0, 1) # [batch_size, 1] coaching_mask = tf.tile(tf.cast(p <= eps, dtype=tf.float32), [1, seq_len, 3]) # [batch_size, 1] retain_mask = 1. - coaching_mask # replace part of learner sequences by expert sequences learner_seqs = tf.multiply(learner_seqs, retain_mask) + tf.multiply( expert_seqs, coaching_mask) return learner_seqs @staticmethod def __align_learner_expert_seqs(learner_seqs, expert_seqs): """ align learner sequences and expert sequences, i.e., make two batch of sequences have the same sequence length by padding zeros to the tail. """ batch_size = tf.shape(learner_seqs)[0] learner_seq_len = tf.shape(learner_seqs)[1] expert_seq_len = tf.shape(expert_seqs)[1] max_seq_len = tf.cond(tf.less(learner_seq_len, expert_seq_len), lambda: expert_seq_len, lambda: learner_seq_len) learner_paddings = tf.zeros( [batch_size, max_seq_len - learner_seq_len, 3]) expert_paddings = tf.zeros( [batch_size, max_seq_len - expert_seq_len, 3]) learner_seqs = tf.concat([learner_seqs, learner_paddings], axis=1) expert_seqs = tf.concat([expert_seqs, expert_paddings], axis=1) return learner_seqs, expert_seqs, max_seq_len @staticmethod def __concatenate_batch(seqs): """Concatenate each batch of the sequences into a single sequence.""" array_seq = tf.unstack(seqs, axis=0) # [batch_size, seq_len, data_dim] seq = tf.concat(array_seq, axis=0) # [batch_size*seq_len, data_dim] return seq @staticmethod def __kernel_matrix(learner_seq, expert_seq, kernel_bandwidth): """ Construct kernel matrix based on learn sequence and expert sequence, each entry of the matrix is the distance between two data points in learner_seq or expert_seq. return two matrix, left_mat is the distances between learn sequence and learn sequence, right_mat is the distances between learn sequence and expert sequence. """ # calculate l2 distances learner_learner_mat = utils.l2_norm( learner_seq, learner_seq ) # [batch_size*learner_seq_len, batch_size*learner_seq_len] expert_learner_mat = utils.l2_norm( expert_seq, learner_seq ) # [batch_size*expert_seq_len, batch_size*learner_seq_len] # exponential kernel learner_learner_mat = tf.exp(-learner_learner_mat / kernel_bandwidth) expert_learner_mat = tf.exp(-expert_learner_mat / kernel_bandwidth) return learner_learner_mat, expert_learner_mat def mmd(self, sess, expert_seqs, learner_seqs): """ """ batch_size = expert_seqs.shape[1] # convert to tensors expert_seqs = tf.constant(expert_seqs, dtype=tf.float32) learner_seqs = tf.constant(learner_seqs, dtype=tf.float32) # concatenate batches in the sequences concat_expert_seq = self.__concatenate_batch( expert_seqs) # [batch_size * expert_seq_len, data_dim] concat_learner_seq = self.__concatenate_batch( learner_seqs) # [batch_size * learner_seq_len, data_dim] # calculate the reward (mmd) reward = tf.reduce_sum( self._reward(concat_expert_seq, concat_learner_seq)) / batch_size return sess.run(reward) def train( self, sess, epoches, # number of epoches (how many times is the entire dataset going to be trained) expert_seqs, # [n, seq_len, 3] trainplot=True, # plot the change of intensity over epoches pretrained=False): """Train the point process generator given expert sequences.""" # initialization if not pretrained: print("[%s] parameters are initialized." % arrow.now(), file=sys.stderr) # initialize network parameters init_op = tf.global_variables_initializer() sess.run(init_op) # data configurations # - number of expert sequences n_data = expert_seqs.shape[0] # - number of batches n_batches = int(n_data / self.batch_size) # training over epoches all_train_cost = [] for epoch in range(epoches): # shuffle indices of the training samples shuffled_ids = np.arange(n_data) np.random.shuffle(shuffled_ids) # training over batches avg_train_cost = [] for b in range(n_batches): idx = np.arange(self.batch_size * b, self.batch_size * (b + 1)) # training and testing indices selected in current batch batch_train_ids = shuffled_ids[idx] # training and testing batch data batch_train_expert = expert_seqs[batch_train_ids, :, :] batch_train_learner = self.hawkes.sampling( sess, self.batch_size) # optimization procedure sess.run(self.optimizer, feed_dict={ self.input_expert_seqs: batch_train_expert, self.input_learner_seqs: batch_train_learner }) # cost for train batch and test batch train_cost = sess.run(self.cost, feed_dict={ self.input_expert_seqs: batch_train_expert, self.input_learner_seqs: batch_train_learner }) print("[%s] batch training cost: %.2f." % (arrow.now(), train_cost), file=sys.stderr) # record cost for each batch avg_train_cost.append(train_cost) all_train_cost.append(train_cost) # training log output avg_train_cost = np.mean(avg_train_cost) print('[%s] Epoch %d (n_train_batches=%d, batch_size=%d)' % \ (arrow.now(), epoch, n_batches, self.batch_size), file=sys.stderr) print('[%s] Training cost:\t%f' % (arrow.now(), avg_train_cost), file=sys.stderr) # save all training cost into numpy file. np.savetxt("results/robbery_rl_train_cost.txt", all_train_cost, delimiter=",")