def _set_variables(session: tf.Session, values: Dict[str, Any]) -> None: variables = { v.name: v for v in session.graph.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) } for name, value in values.items(): session.run(tf.assign(variables[name + ":0"], np.array(value)))
def train(height = CAPTCHA_HEIGHT, width = CAPTCHA_WIDTH, y_size = len(CAPTCHA_LIST) * CAPTCHA_LEN): acc_rate = 0.95 x = placeholder(float32, [None, height * width]) y = placeholder(float32, [None, y_size]) keep_prob = placeholder(float32) y_conv = cnn_graph(x, keep_prob, (height, width)) optimizer = optimize_graph(y, y_conv) accuracy = accuracy_graph(y, y_conv) saver = Saver() sess = Session() sess.run(global_variables_initializer()) step = 0 while 1: batch_x, batch_y = get_next_batch(64) sess.run(optimizer, feed_dict = {x: batch_x, y: batch_y, keep_prob: 0.75}) if step % 100 == 0: batch_x_test, batch_y_test = get_next_batch(100) acc = sess.run(accuracy, feed_dict = {x: batch_x_test, y: batch_y_test, keep_prob: 1.0}) print(datetime.now().strftime('%c'), ' step:', step, ' accuracy:', acc) if acc > acc_rate: if not isdir('./model'): mkdir('./model') print('Saving to model/captcha.model') saver.save(sess, './model/captcha.model', global_step = step) print('Saved to model/captcha.model') acc_rate += 0.005 if acc_rate >= 1: break step += 1 sess.close()
def __call__(self, session: tf.Session) -> Iterable[Dict[str, np.ndarray]]: session.run(self._reset) try: for n in it.count(): yield session.run(self._step) except tf.errors.OutOfRangeError: assert n == self.n_batch
def __call__(self, session: tf.Session) -> Iterable[Dict[str, np.ndarray]]: if self._initializer is not None: session.run(self._initializer) self._initializer = None session.run(self._run_loop) out = session.run(self._dequeue) for n in range(self.n_batch): yield {k: out[k][n] for k in out.keys()}
def create_tf_var(tensor: np.ndarray, name: str, session: tf.Session): tf_dtype = tf.dtypes.as_dtype(tensor.dtype) tf_var = tf.get_variable(dtype=tf_dtype, shape=tensor.shape, name=name, initializer=tf.zeros_initializer()) session.run(tf.variables_initializer([tf_var])) session.run(tf_var) return tf_var
class ImageEncoder(object): def __init__(self, checkpoint_filename, input_name="images", output_name="features"): self.session = Session() with GFile(checkpoint_filename, "rb") as file_handle: graph_def = GraphDef() graph_def.ParseFromString(file_handle.read()) import_graph_def(graph_def, name="net") self.input_var = get_default_graph().get_tensor_by_name("net/%s:0" % input_name) self.output_var = get_default_graph().get_tensor_by_name("net/%s:0" % output_name) assert len(self.output_var.get_shape()) == 2 assert len(self.input_var.get_shape()) == 4 self.feature_dim = self.output_var.get_shape().as_list()[-1] self.image_shape = self.input_var.get_shape().as_list()[1:] def __call__(self, data_x, batch_size=32): out = np.zeros((len(data_x), self.feature_dim), np.float32) _run_in_batches( lambda x: self.session.run(self.output_var, feed_dict=x), {self.input_var: data_x}, out, batch_size) return out
def herding_selection(self, sess: tf.Session, model: Union[Ader]): """ This method selects exemplars using herding and selects exemplars. Args: sess (tf.Session): Tensorflow session. model (object): Trained model for evaluate. Returns: saved_num (int): Total number of exemplars saved for all items at current cycle. """ saved_num = 0 for item in tqdm(self.sess_by_item, ncols=70, leave=False, unit='b', desc='Selecting exemplar'): m = self.item_count[item - 1] seq = self.sess_by_item[item] seq = np.array(seq) input_seq = seq[:, :-1] rep, logits = sess.run( [model.rep, model.logits], { model.input_seq: input_seq, model.dropout_rate: self.dropout_rate, model.max_item: self.max_item, model.is_training: False }) rep = np.array(rep) logits = np.array(logits) saved = self.herding(rep, logits, seq, item, min(m, len(seq))) saved_num += saved return saved_num
def out(self, sess: tf.Session, X: 'np.ndarray[np.float32]', batch_size: int = 64) -> 'np.ndarray[np.float32]': n_row, n_col = X.shape F = np.zeros((n_col, self._k), dtype=np.float32) for i in range(0, n_row, batch_size): actual_batch_size = min(batch_size, n_row - i) F[i:i + actual_batch_size, :] = sess.run( self.F, feed_dict={self.x: X[i:i + actual_batch_size]}) return F
def gen_league_data(session: tf1.Session, n_archetypes, n_rounds, n_matches, wait_time=10, matchup_prior=priors.matchup(), ind_decks=0, ind_matches=0, ind_winners=0, winner_win_count=6, winner_loss_count=2, field=None): if field is None: field = priors.field(n_archetypes).sample() p_win, matchups = gen_matchups(n_archetypes, matchup_prior) ev = tf.reshape(tf.linalg.matmul(matchups, tf.expand_dims(field, -1)), [-1]) sample = session.run({ 'field': field, 'matchups': matchups, 'matchups_free': p_win, 'ev': ev }) sample['wait_time'] = wait_time sim_data = simulation.generate_league(sample['field'], sample['matchups'], n_rounds, n_matches, tries=wait_time) # foo = gen_match_wins(sample['field'], sample['ev'], ind_decks, ind_matches) matchup_data = gen_matches(sample['field'], sample['matchups'], ind_matches) ind_data = gen_winners(sample['field'], sample['ev'], ind_winners, winner_win_count, winner_loss_count) obs_data = { 'pairing_counts': np.array(sim_data['pairing_counts']), 'record_counts': np.array(sim_data['record_counts']), 'n_rounds': n_rounds, 'n_archetypes': n_archetypes, 'deck_counts': ind_data['deck_counts'], 'win_counts': ind_data['win_counts'], 'loss_counts': ind_data['loss_counts'], 'matchup_counts': matchup_data['matchup_counts'], 'matchup_wins': matchup_data['matchup_wins'] } return sample, obs_data
def fit(self, sess: tf.Session, X: 'np.ndarray[np.float32]', Y: float, batch_size: int = 64) -> float: n_row, n_col = X.shape ridxs = np.random.permutation(n_row) obj = 0 for i in range(0, n_row, batch_size): actual_batch_size = min(batch_size, n_row - i) batch_obj, _ = sess.run( [self.obj, self.solver], feed_dict={ self.x: X[ridxs[i:i + actual_batch_size]], self.y: Y[ridxs[i:i + actual_batch_size]] }) obj += batch_obj return obj
def gen_sample_data(session: tf1.Session, n_archetypes, n_matches, matchup_prior=priors.matchup(), field=None): if field is None: field = priors.field(n_archetypes).sample() p_win, matchups = gen_matchups(n_archetypes, matchup_prior) pairings = tf.reshape( rv_match_counts(field, n_matches).sample(), matchups.shape) win_counts = outcomes_to_wins(rv_outcomes(pairings, matchups).sample()) return session.run({ 'field': field, 'matchups': matchups, 'matchups_free': p_win, 'match_counts': pairings, 'match_wins': win_counts })
def Query(image_bgr,curses:tf.Session=None,netin=None,netout=None): "Feeding an input image to the neural network (TensorFlow model), and getting the output (scores of classes)" #Args: #-image_bgr: the input image (in BGR colorspace) #-curses: the TensorFlow session #-netin/netout: the variables which have the role of the input/output of the network in the TensorFlow computational graph if curses is None: curses=Loadedses netin=Loadedinp netout=Loadedout x=np.zeros((1,Res,Res,1)) imageg=cv.cvtColor(image_bgr,cv.COLOR_BGR2GRAY) x[0,:,:,0]=cv.resize(imageg,(Res,Res)) y=curses.run(netout,{netin:x}) return (y[0,0],y[0,1])
def randomly_selection(self, sess: tf.Session, model: Union[Ader]) -> int: """ This method randomly selects exemplars. Args: sess (tf.Session): Tensorflow session. model (object): Trained model for evaluate. Returns: saved_num (int): Total number of exemplars saved for all items at current cycle. """ saved_num = 0 for item in tqdm(self.sess_by_item, ncols=70, leave=False, unit='b', desc='Selecting exemplar'): seq = self.sess_by_item[item] seq = np.array(seq) seq_num = len(seq) m = self.item_count[item - 1] if m > 0: selected_ids = np.random.choice(seq_num, min(m, seq_num), replace=False) selected_seq = seq[selected_ids] logits = sess.run( model.logits, { model.input_seq: selected_seq[:, :-1], model.dropout_rate: self.dropout_rate, model.max_item: self.max_item, model.is_training: False }) logits = np.array(logits) for s, l in zip(selected_seq, logits): self.exemplars[item].append( [s[s != 0].tolist(), l.tolist()]) saved_num += 1 return saved_num
def loss_selection(self, sess: tf.Session, model: Union[Ader]) -> int: """ This method selects exemplars by ranking loss. Args: sess (tf.Session): Tensorflow session. model (object): Trained model for evaluate. Returns: saved_num (int): Total number of exemplars saved for all items at current cycle. """ saved_num = 0 for item in tqdm(self.sess_by_item, ncols=70, leave=False, unit='b', desc='Selecting exemplar'): m = self.item_count[item - 1] if m < 0.5: continue seq = self.sess_by_item[item] seq_num = len(seq) seq = np.array(seq) loss, logits = sess.run( [model.loss, model.logits], { model.input_seq: seq[:, :-1], model.pos: seq[:, -1], model.dropout_rate: self.dropout_rate, model.max_item: self.max_item, model.is_training: False }) loss = np.array(loss) logits = np.array(logits) selected_ids = loss.argsort()[:int(min(m, seq_num))] self.exemplars[item] = [[ seq[i][seq[i] != 0].tolist(), logits[i].tolist() ] for i in selected_ids] saved_num += len(selected_ids) return saved_num
# -- induction -- # f(x) = ax^2 +bx + c fx = a * tf.square(x) + b * x + c # -- loss -- # but we want f(x) to equal y y = placeholder(dtype=tf.float32, shape=[None]) # so we calculate a loss accordingly # loss = tf.reduce_mean(tf.square(fx - y)) loss = tf.sqrt(tf.reduce_mean(tf.square(fx - y))) learn = GradientDescentOptimizer(.0001).minimize(loss) # start a session sess = Session() # initialize the variables sess.run(global_variables_initializer()) data_x = [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0 ] data_y = [ 1.0, 2.0, 4.0, 4.0, 6.0, 5.0, 7.0, 7.0, 9.0, 10.0, 9.50, 8.00, 7.00, 6.00, 4.00, 5.00, 3.00, 2.00, 1.00 ] print(f""" We are using tensorflow to find the best equation that maps x = {data_x} to y = {data_y}
def train_mfmodel_without_ipw(sess: tf.Session, model: MFMODEL, data: str, train: np.ndarray, val: np.ndarray, test: np.ndarray, max_iters: int = 500, batch_size: int = 2**9, model_name: str = 'mf', seed: int = 0) -> Tuple: """Train and evaluate the MF-IPS model.""" train_loss_list = [] val_loss_list = [] test_mse_list = [] test_mae_list = [] # Initialise all the TF variables init_op = tf.global_variables_initializer() sess.run(init_op) # Count the num of training data and estimate the propensity scores num_train = train.shape[0] train_mcar, test = train_test_split(test, test_size=0.95, random_state=rand_seed_val) labels_train = np.expand_dims(train[:, 2], 1) labels_val = np.expand_dims(val[:, 2], 1) labels_test = np.expand_dims(test[:, 2], 1) # Start training a recommender np.random.seed(rand_seed_val) for iter_ in np.arange(max_iters): # Sample mini-batch idx = np.random.choice(np.arange(num_train), size=batch_size) train_batch, labels_batch = train[idx], labels_train[idx] # Update user-item latent factors _, loss, wmse = sess.run( [model.apply_grads, model.loss, model.weighted_mse], feed_dict={ model.users: train_batch[:, 0], model.items: train_batch[:, 1], model.labels: labels_batch, model.scores: np.ones( (np.int(batch_size), 1)) # We just use 1 as propensity score for all records }) # print('train_loss:', loss, wmse) train_loss_list.append(loss) # Calculate validation loss val_loss = sess.run( model.loss, feed_dict={ model.users: val[:, 0], model.items: val[:, 1], model.labels: labels_val, model.scores: np.ones( (np.int(len(labels_val)), 1)) # We just use 1 as propensity score for all records }) # print('val_loss:', val_loss) val_loss_list.append(val_loss) # Calculate test loss mse_score, mae_score = sess.run( [model.mse, model.mae], feed_dict={ model.users: test[:, 0], model.items: test[:, 1], model.labels: labels_test }) # mse_score = round(mse_score, round_digit) # mae_score = round(mae_score, round_digit) # print('mse_score:', mse_score) # print('mae_score:', mae_score) test_mse_list.append(mse_score) test_mae_list.append(mae_score) u_emb, i_emb, u_bias, i_bias, g_bias = sess.run([ model.user_embeddings, model.item_embeddings, model.user_bias, model.item_bias, model.global_bias ]) sess.close() return (np.min(val_loss_list), test_mse_list[np.argmin(val_loss_list)], test_mae_list[np.argmin(val_loss_list)], u_emb, i_emb, u_bias, i_bias, g_bias)
# -- induction -- # f(x) = a * x + b fx = tf.add(tf.multiply(a, x), b) # -- loss -- # but we want f(x) to equal y y = placeholder(dtype=tf.float32) # so we calculate a loss accordingly loss = tf.square(fx - y) learn = GradientDescentOptimizer(.001).minimize(loss) # start a session sess = Session() # initialize the variables sess.run(global_variables_initializer()) data_x = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0] data_y = [0.1, 0.2, 0.4, 0.4, 0.6, 0.5, 0.7, 0.7, 0.9] # let's calculate the total loss print(f""" We are using tensorflow to find the best equation that maps x = {data_x} to y = {data_y} """) def printTotalLoss(): total_loss = 0
class face_utils_cls(): def __init__(self): ''' dlib库对应的关键点模型 ''' # import pdb # pdb.set_trace() self.path = './face_models' # self.face_landmark_dlib = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat') # self.face_detector_dlib = dlib.get_frontal_face_detector() #set sess '''如果使用gpu,按需分配''' gpu_options = GPUOptions(allow_growth=True) session_config = ConfigProto(allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options) ''' 初始化人脸特征模型, 人脸检测模型,人脸关键点模型 ''' self.face_feature_sess = Session(graph=tf.Graph(), config=session_config) self.face_detection_sess = Session(graph=tf.Graph(), config=session_config) self.face_landmark_sess = Session(graph=tf.Graph(), config=session_config) self.face_attribute_sess = Session(graph=tf.Graph(), config=session_config) self.ff_pb_path = self.path + "/face_recognition_model.pb" self.init_feature_face() self.detect_pb_path = self.path + "/face_detection_model.pb" self.init_detection_face_tf() self.landmark_pb_path = self.path + "/landmark.pb" self.init_face_landmark_tf() self.attribute_pb_path = self.path + "/face_attribute.pb" self.init_face_attribute() def init_feature_face(self): with self.face_feature_sess.as_default(): with self.face_feature_sess.graph.as_default(): with GFile(self.ff_pb_path, 'rb') as f: graph_def = self.face_feature_sess.graph_def graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def, name='') self.ff_images_placeholder = get_default_graph().get_tensor_by_name("input:0") self.ff_train_placeholder = get_default_graph().get_tensor_by_name("phase_train:0") self.ff_embeddings = get_default_graph().get_tensor_by_name("embeddings:0") def init_detection_face_tf(self): with self.face_detection_sess.as_default(): with self.face_detection_sess.graph.as_default(): face_detect_od_graph_def = self.face_detection_sess.graph_def with GFile(self.detect_pb_path, 'rb') as fid: serialized_graph = fid.read() face_detect_od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(face_detect_od_graph_def, name='') ops = get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} self.detection_tensor_dict = {} for key in ['num_detections', 'detection_boxes', 'detection_scores','detection_classes']: tensor_name = key + ':0' if tensor_name in all_tensor_names: self.detection_tensor_dict[key] = get_default_graph().get_tensor_by_name( tensor_name) self.detection_image_tensor = get_default_graph().get_tensor_by_name('image_tensor:0') def init_face_landmark_tf(self): with self.face_landmark_sess.as_default(): with self.face_landmark_sess.graph.as_default(): graph_def = self.face_landmark_sess.graph_def with GFile(self.landmark_pb_path, 'rb') as fid: serialized_graph = fid.read() graph_def.ParseFromString(serialized_graph) tf.import_graph_def(graph_def, name='') self.face_landmark_tensor = get_default_graph(). \ get_tensor_by_name("fully_connected_9/Relu:0") def init_face_attribute(self): with self.face_attribute_sess.as_default(): with self.face_attribute_sess.graph.as_default(): graph_def = self.face_attribute_sess.graph_def with GFile(self.attribute_pb_path, 'rb') as fid: serialized_graph = fid.read() graph_def.ParseFromString(serialized_graph) tf.import_graph_def(graph_def, name='') self.pred_eyeglasses = get_default_graph().get_tensor_by_name("ArgMax:0") self.pred_young = get_default_graph().get_tensor_by_name("ArgMax_1:0") self.pred_male = get_default_graph().get_tensor_by_name("ArgMax_2:0") self.pred_smiling = get_default_graph().get_tensor_by_name("ArgMax_3:0") self.face_attribute_image_tensor = get_default_graph().get_tensor_by_name("Placeholder:0") def detection_face_by_dlib(self, im_data): ##调用dlib pass sp = im_data.shape im_data = cv2.cvtColor(im_data, cv2.COLOR_BGR2GRAY) rects = self.face_detector_dlib(im_data, 1) if len(rects) == 0: return None, None, None, None # 只取第一个人脸 x1 = rects[0].left() * 1.0 / sp[1] y1 = rects[0].top() * 1.0 / sp[0] x2 = rects[0].right() * 1.0 / sp[1] y2 = rects[0].bottom() * 1.0 / sp[0] ''' #调整人脸区域 ''' y1 = int(max(y1 - 0.3 * (y2 - y1), 0)) return x1, y1, x2, y2 def detection_face_by_tf(self, im_data): im_data_re = cv2.resize(im_data, (256, 256)) output_dict = self.face_detection_sess.run(self.detection_tensor_dict, feed_dict={self.detection_image_tensor: np.expand_dims( im_data_re, 0)}) # all outputs are float32 numpy arrays, so convert types as appropriate output_dict['num_detections'] = int(output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict[ 'detection_classes'][0].astype(np.uint8) output_dict['detection_boxes'] = output_dict['detection_boxes'][0] output_dict['detection_scores'] = output_dict['detection_scores'][0] for i in range(len(output_dict['detection_scores'])): if output_dict['detection_scores'][i] > 0.1: bbox = output_dict['detection_boxes'][i] y1 = bbox[0] x1 = bbox[1] y2 = bbox[2] x2 = bbox[3] return x1, y1, x2, y2 return None, None, None, None # 图像数据标准化 def prewhiten(self, x): mean = np.mean(x) std = np.std(x) std_adj = np.maximum(std, 1.0/np.sqrt(x.size)) y = np.multiply(np.subtract(x, mean), 1/std_adj) return y def face_feature(self, face_data): im_data = self.prewhiten(face_data) # 预处理 im_data = cv2.resize(im_data, (160, 160)) im_data1 = np.expand_dims(im_data, axis=0) ##人脸特征提取 emb1 = self.face_feature_sess.run(self.ff_embeddings, feed_dict={self.ff_images_placeholder: im_data1, self.ff_train_placeholder: False}) return emb1 def face_landmark_tf(self, face_data): print("begin ... landmark") pred = self.face_landmark_sess.run(self.face_landmark_tensor, {"Placeholder:0": np.expand_dims(face_data, 0)}) print("success ... landmark") pred = pred[0] #cv2.imwrite("0_landmark.jpg", face_data) return pred def face_attribute(self, im_data): [eye_glass, young, male, smiling] = self.face_attribute_sess.run( [self.pred_eyeglasses, self.pred_young, self.pred_male, self.pred_smiling], feed_dict={self.face_attribute_image_tensor: np.expand_dims(im_data, 0)}) return eye_glass, young, male, smiling def load_fea_from_str(self, fea_path): with open(fea_path) as f: fea_str = f.readlines() f.close() emb2_str = fea_str[0].split(",") emb2 = [] for ss in emb2_str: emb2.append(float(ss)) emb2 = np.array(emb2) return emb2
def train_mfmodel_with_at(sess: tf.Session, model: MFMODEL, mfmodel1: MFMODEL, mfmodel2: MFMODEL, data: str, train: np.ndarray, val: np.ndarray, test: np.ndarray, epsilon: float, pre_iters: int = 500, post_iters: int = 50, post_steps: int = 5, batch_size: int = 2**9, model_name: str = 'naive-at', seed: int = 0) -> Tuple: """Train and evaluate the MF-IPS model with asymmetric tri-training""" train_loss_list = [] val_loss_list = [] test_mse_list = [] test_mae_list = [] # Initialise all the TF variables init_op = tf.global_variables_initializer() sess.run(init_op) # Count the num of training data and estimate the propensity scores num_train = train.shape[0] train_mcar, test = train_test_split(test, test_size=0.95, random_state=rand_seed_val) pscore_train, pscore_val = estimate_pscore(train=train, train_mcar=train_mcar, val=val, model_name=model_name) labels_train = np.expand_dims(train[:, 2], 1) labels_val = np.expand_dims(val[:, 2], 1) labels_test = np.expand_dims(test[:, 2], 1) pscore_model_all_1 = np.ones((batch_size, 1)) ### Start training a recommender np.random.seed(rand_seed_val) ## Start pre-training step for i in np.arange(pre_iters): # Sample mini-batch idx = np.random.choice(np.arange(num_train), size=batch_size) idx1 = np.random.choice(np.arange(num_train), size=batch_size) idx2 = np.random.choice(np.arange(num_train), size=batch_size) train_batch, train_batch1, train_batch2 = train[idx], train[ idx1], train[idx2] labels_batch, labels_batch1, labels_batch2 = labels_train[ idx], labels_train[idx1], labels_train[idx2] pscore_batch1, pscore_batch2 = pscore_train[idx1], pscore_train[idx2] # print('pscore_batch1', pscore_batch1) # print('pscore_batch2', pscore_batch2) # Update user-item latent factors _, train_loss, train_wmse = sess.run( [model.apply_grads, model.loss, model.weighted_mse], feed_dict={ model.users: train_batch[:, 0], model.items: train_batch[:, 1], model.labels: labels_batch, model.scores: pscore_model_all_1 }) _, mfmodel1_loss, mfmodel1_wmse = sess.run( [mfmodel1.apply_grads, mfmodel1.loss, mfmodel1.weighted_mse], feed_dict={ mfmodel1.users: train_batch1[:, 0], mfmodel1.items: train_batch1[:, 1], mfmodel1.labels: labels_batch1, mfmodel1.scores: pscore_batch1 }) _, mfmodel2_loss, mfmodel2_wmse = sess.run( [mfmodel2.apply_grads, mfmodel2.loss, mfmodel2.weighted_mse], feed_dict={ mfmodel2.users: train_batch2[:, 0], mfmodel2.items: train_batch2[:, 1], mfmodel2.labels: labels_batch2, mfmodel2.scores: pscore_batch2 }) # print('train_loss:', train_loss, train_wmse) # print('mfmodel1_loss:', mfmodel1_loss, mfmodel1_wmse) # print('mfmodel2_loss:', mfmodel2_loss, mfmodel2_wmse) # print() ## Start psuedo-labeling and final prediction steps # Cast to integer to avoid an error train = train.astype(int) val = val.astype(int) all_data = pd.DataFrame( np.zeros((train[:, 0].max() + 1, train[:, 1].max() + 1))) all_data = all_data.stack().reset_index().values[:, :2] for k in np.arange(post_iters): for j in np.arange(post_steps): idx = np.random.choice(np.arange(all_data.shape[0]), size=num_train * 5) batch_data = all_data[idx] # Create psuedo-labeled dataset preds1 = sess.run(mfmodel1.preds, feed_dict={ mfmodel1.users: batch_data[:, 0], mfmodel1.items: batch_data[:, 1] }) preds2 = sess.run(mfmodel2.preds, feed_dict={ mfmodel2.users: batch_data[:, 0], mfmodel2.items: batch_data[:, 1] }) # Extract records whose prediction difference between model1 and model2 are less than or equal to epsilon idx = np.array(np.abs(preds1 - preds2) <= epsilon).flatten() # print(idx.sum()) target_users, target_items, pseudo_labels = batch_data[ idx, 0], batch_data[idx, 1], preds1[idx] target_data = np.c_[target_users, target_items, pseudo_labels] # Store information during the pseudo-labeleing step num_target = target_data.shape[0] # Sample mini-batch for the pseudo-labeleing step idx = np.random.choice(np.arange(num_target), size=batch_size) idx1 = np.random.choice(np.arange(num_target), size=batch_size) idx2 = np.random.choice(np.arange(num_target), size=batch_size) pseudo_train_batch, pseudo_train_batch1, pseudo_train_batch2 = target_data[ idx], target_data[idx1], target_data[idx2] # Update user-item latent factors of the final prediction model _, train_loss = sess.run( [model.apply_grads, model.loss], feed_dict={ model.users: pseudo_train_batch[:, 0], model.items: pseudo_train_batch[:, 1], model.labels: np.expand_dims(pseudo_train_batch[:, 2], 1), model.scores: np.ones((np.int(batch_size), 1)) }) # print('train_loss:', train_loss) # Calculate validation loss during the psuedo-labeleing step val_loss = sess.run( model.loss, ##model.weighted_mse, feed_dict={ model.users: val[:, 0], model.items: val[:, 1], model.scores: pscore_val, model.labels: labels_val }) # print('val_loss:', val_loss) # Calculate test losses during the psuedo-labeleing step mse_score, mae_score = sess.run( [model.mse, model.mae], feed_dict={ model.users: test[:, 0], model.items: test[:, 1], model.labels: labels_test }) # mse_score = round(mse_score, round_digit) # mae_score = round(mae_score, round_digit) # print('mse_score:', mse_score) # print('mae_score:', mae_score) train_loss_list.append(train_loss) val_loss_list.append(val_loss) test_mse_list.append(mse_score) test_mae_list.append(mae_score) # Re-update the model parameters of pre-trained models using pseudo-labeled data _ = sess.run(mfmodel1.apply_grads, feed_dict={ mfmodel1.users: pseudo_train_batch1[:, 0], mfmodel1.items: pseudo_train_batch1[:, 1], mfmodel1.labels: np.expand_dims(pseudo_train_batch1[:, 2], 1), mfmodel1.scores: np.ones((batch_size, 1)) }) _ = sess.run(mfmodel2.apply_grads, feed_dict={ mfmodel2.users: pseudo_train_batch2[:, 0], mfmodel2.items: pseudo_train_batch2[:, 1], mfmodel2.labels: np.expand_dims(pseudo_train_batch2[:, 2], 1), mfmodel2.scores: np.ones((batch_size, 1)) }) # Obtain user-item embeddings u_emb, i_emb, u_bias, i_bias, g_bias = sess.run([ model.user_embeddings, model.item_embeddings, model.user_bias, model.item_bias, model.global_bias ]) sess.close() return (np.min(val_loss_list), test_mse_list[np.argmin(val_loss_list)], test_mae_list[np.argmin(val_loss_list)], u_emb, i_emb, u_bias, i_bias, g_bias)
rA = tf.compat.v1.reshape(tf.compat.v1.reduce_sum(tf.compat.v1.square(x_data), 1), [-1, 1]) rB = tf.compat.v1.reshape(tf.compat.v1.reduce_sum(tf.compat.v1.square(prediction_grid)), [-1, 1]) pred_sq_dist = tf.compat.v1.add(tf.compat.v1.subtract(rA, tf.compat.v1.multiply(2., tf.compat.v1.matmul(x_data, tf.compat.v1.transpose(prediction_grid)))), tf.transpose(rB)) pred_kernel = tf.compat.v1.exp(tf.compat.v1.multiply(gamma, tf.compat.v1.abs(pred_sq_dist))) prediction_output = tf.compat.v1.matmul(tf.compat.v1.multiply(tf.compat.v1.transpose(y_target), b), pred_kernel) prediction = tf.compat.v1.sign(prediction_output - tf.compat.v1.reduce_mean(prediction_output)) accuracy = tf.compat.v1.reduce_mean(tf.compat.v1.cast(tf.compat.v1.equal(tf.compat.v1.squeeze(prediction), tf.compat.v1.squeeze(y_target)), tf.float32)) # Train optimizer = tf.compat.v1.train.GradientDescentOptimizer(LEARNING_RATE) train_step = optimizer.minimize(loss) # Start training init = tf.compat.v1.global_variables_initializer() session.run(init) # Really start training print("Training...") loss_vec = [] batch_accuracy = [] for i in range(NUMER_OF_BATCHES): rand_indices = np.random.choice(df.text.size, size = BATCH_SIZE) #print(f"Step {i}: selected items with indices \n{rand_indices}") #print(f"Step {i}: pre-selected items \n{df.text[rand_indices]}") rand_x = np.transpose(np.dstack(df.text[rand_indices].to_numpy())[0]) #print(f"Step {i}: selected items \n{rand_x}\n with shape {rand_x.shape}") rand_y = np.transpose([df.type_Request[rand_indices]]) #print(f"Step {i}: corresponding true labels are \n{rand_y}") session.run(train_step, feed_dict={x_data: rand_x, y_target: rand_y}) temp_loss =session.run(loss, feed_dict={x_data: rand_x, y_target: rand_y})
def test_fw_iter(IteratorClass, args): iterator_name = IteratorClass.__module__ + "." + IteratorClass.__name__ print("Start testing {}".format(iterator_name)) sess = None daliop = None dali_train_iter = None images = [] labels = [] pipes = [ RN50Pipeline(batch_size=args.batch_size, num_threads=args.workers, device_id=n, num_gpus=args.gpus, data_paths=data_paths, prefetch=PREFETCH, fp16=args.fp16, nhwc=args.nhwc) for n in range(args.gpus) ] [pipe.build() for pipe in pipes] iters = args.iters if args.iters < 0: iters = pipes[0].epoch_size("Reader") assert (all(pipe.epoch_size("Reader") == iters for pipe in pipes)) iters_tmp = iters iters = iters // args.batch_size if iters_tmp != iters * args.batch_size: iters += 1 iters_tmp = iters iters = iters // args.gpus if iters_tmp != iters * args.gpus: iters += 1 if iterator_name == "nvidia.dali.plugin.tf.DALIIterator": daliop = IteratorClass() for dev in range(args.gpus): with tf.device('/gpu:%i' % dev): if args.fp16: out_type = tf.float16 else: out_type = tf.float32 image, label = daliop(pipeline=pipes[dev], shapes=[(args.batch_size, 3, 224, 224), ()], dtypes=[out_type, tf.int32]) images.append(image) labels.append(label) gpu_options = GPUOptions(per_process_gpu_memory_fraction=0.8) config = ConfigProto(gpu_options=gpu_options) sess = Session(config=config) end = time.time() for i in range(args.epochs): if i == 0: print("Warm up") else: print("Test run " + str(i)) data_time = AverageMeter() if iterator_name == "nvidia.dali.plugin.tf.DALIIterator": assert sess != None for j in range(iters): res = sess.run([images, labels]) data_time.update(time.time() - end) if j % args.print_freq == 0: print( "{} {}/ {}, avg time: {} [s], worst time: {} [s], speed: {} [img/s]" .format(iterator_name, j + 1, iters, data_time.avg, data_time.max_val, args.gpus * args.batch_size / data_time.avg)) end = time.time() else: dali_train_iter = IteratorClass(pipes, pipes[0].epoch_size("Reader")) j = 0 for it in iter(dali_train_iter): data_time.update(time.time() - end) if j % args.print_freq == 0: print( "{} {}/ {}, avg time: {} [s], worst time: {} [s], speed: {} [img/s]" .format(iterator_name, j + 1, iters, data_time.avg, data_time.max_val, args.gpus * args.batch_size / data_time.avg)) end = time.time() j = j + 1 if j > iters: break
class DqlTensorFlow(LearningModel): #by default learning rate should not decay at all, since this is not the default behavior #of Deep-Q Learning def __init__(self, action_wrapper: ActionWrapper, state_builder: StateBuilder, learning_rate=0.0002, learning_rate_min=0.00002, learning_rate_decay=1, learning_rate_decay_ep_cutoff=0, gamma=0.95, name='DQN', build_model=ModelBuilder.DEFAULT_BUILD_MODEL, epsilon_start=1.0, epsilon_min=0.5, epsilon_decay=0.995, per_episode_epsilon_decay=False, use_memory=False, memory_maxlen=10000, batch_training=False, batch_size=32, min_memory_size=5000, seed_value=None, cpu_only=False, epsilon_linear_decay=False, lr_linear_decay=False): super().__init__(action_wrapper, state_builder, gamma, learning_rate, learning_rate_min, learning_rate_decay, epsilon_start, epsilon_min, epsilon_decay, per_episode_epsilon_decay, learning_rate_decay_ep_cutoff, name, seed_value, cpu_only, epsilon_linear_decay, lr_linear_decay) # Defining the model's layers. Tensorflow's objects are stored into self.model_layers self.batch_size = batch_size self.build_model = build_model self.make_model() self.use_memory = use_memory if self.use_memory: self.memory = deque(maxlen=memory_maxlen) self.memory_maxlen = memory_maxlen self.min_memory_size = min_memory_size def learn(self, s, a, r, s_, done): if self.use_memory: self.memory_learn(s, a, r, s_, done) else: self.no_memory_learn(s, a, r, s_, done) def memory_learn(self, s, a, r, s_, done): self.memorize(s, a, r, s_, done) if len(self.memory) < self.min_memory_size: return batch = random.sample(self.memory, self.batch_size) states = np.array([val[0] for val in batch]) states = np.squeeze(states) next_states = np.array([ (np.zeros(self.state_size) if val[3] is None else val[3]) for val in batch ]) next_states = np.squeeze(next_states) # predict Q(s,a) given the batch of states q_s_a = self.sess.run(self.model_layers[-1], feed_dict={self.model_layers[0]: states}) # predict Q(s',a') - so that we can do gamma * max(Q(s'a')) below q_s_a_d = self.sess.run(self.model_layers[-1], feed_dict={self.model_layers[0]: next_states}) # setup training arrays x = np.zeros((len(batch), self.state_size)) y = np.zeros((len(batch), self.action_size)) for i, (state, action, reward, next_state, done) in enumerate(batch): # get the current q values for all actions in state current_q = q_s_a[i] if done: # if this is the last step, there is no future max q value, so we the new_q is just the reward current_q[action] = reward else: # new Q-value is equal to the reward at that step + discount factor * the max q-value for the next_state current_q[action] = reward + self.gamma * np.amax(q_s_a_d[i]) x[i] = state y[i] = current_q self.sess.run(self.optimizer, feed_dict={ self.model_layers[0]: x, self.tf_qsa: y }) def no_memory_learn(self, s, a, r, s_, done): qsa_values = self.sess.run(self.model_layers[-1], feed_dict={self.model_layers[0]: s}) current_q = 0 if done: current_q = r else: current_q = r + self.gamma * self.__maxq(s_) qsa_values[0, a] = current_q self.sess.run(self.optimizer, feed_dict={ self.model_layers[0]: s, self.tf_qsa: qsa_values }) qsa_values = self.sess.run(self.model_layers[-1], feed_dict={self.model_layers[0]: s}) def __maxq(self, state): values = self.sess.run(self.model_layers[-1], feed_dict={self.model_layers[0]: state}) index = np.argmax(values[0]) mxq = values[0, index] return mxq def choose_action(self, state, excluded_actions=[], is_testing=False): if is_testing: return self.predict(state, excluded_actions) else: expl_expt_tradeoff = np.random.rand() action = None if self.epsilon_greedy > expl_expt_tradeoff: ex_int = self.actions[random.randint(0, len(self.actions) - 1)] random_action = random.choice(self.actions) # Removing excluded actions while random_action in excluded_actions: random_action = random.choice(self.actions) action = random_action else: action = self.predict(state, excluded_actions) if not self.per_episode_epsilon_decay: self.decay_epsilon() return action def predict(self, state, excluded_actions=[]): q_values = self.sess.run(self.model_layers[-1], feed_dict={self.model_layers[0]: state}) action_idx = np.argmax(q_values) # Removing excluded actions # This is possibly badly optimized, eventually look back into this while action_idx in excluded_actions: q_values = np.delete(q_values, action_idx) action_idx = np.argmax(q_values) action = int(action_idx) return action def save_extra(self, persist_path): #Saving tensorflow stuff self.saver.save( self.sess, self.get_full_persistance_tensorflow_path(persist_path)) def load_extra(self, persist_path): #Makes model, needed to be done before loading tensorflow's persistance self.make_model() #Check if tf file exists exists = os.path.isfile( self.get_full_persistance_tensorflow_path(persist_path) + ".meta") #If yes, load it if exists: self.saver.restore( self.sess, self.get_full_persistance_tensorflow_path(persist_path)) self.set_seeds() def make_model(self): #These are already inside make_model(), commenting out ops.reset_default_graph() tf.compat.v1.disable_eager_execution() # Initializing TensorFlow session self.sess = Session(config=ConfigProto(allow_soft_placement=True)) if self.build_model[0][ 'type'] == ModelBuilder.LAYER_INPUT and self.build_model[-1][ 'type'] == ModelBuilder.LAYER_OUTPUT: self.build_model[0]['shape'] = [None, self.state_size] self.build_model[-1]['length'] = self.action_size #Load each layer self.model_layers = [] for layer_model in self.build_model: if layer_model['type'] == ModelBuilder.LAYER_INPUT: if self.build_model.index(layer_model) == 0: self.model_layers.append( placeholder(dtype=tf.float32, shape=layer_model['shape'], name='inputs_')) else: raise IncoherentBuildModelError( "Input Layer must be the first one.") elif layer_model['type'] == ModelBuilder.LAYER_FULLY_CONNECTED: self.model_layers.append( layers.dense(inputs=self.model_layers[-1], units=layer_model['nodes'], activation=tf.nn.relu, name=layer_model['name'])) elif layer_model['type'] == ModelBuilder.LAYER_OUTPUT: self.model_layers.append( layers.dense(inputs=self.model_layers[-1], units=self.action_size, activation=None)) else: raise UnsupportedBuildModelLayerTypeError( "Unsuported Layer Type " + layer_model['type']) #Setup output qsa layer and loss self.tf_qsa = placeholder(shape=[None, self.action_size], dtype=tf.float32) self.loss = tf.losses.mean_squared_error(self.tf_qsa, self.model_layers[-1]) self.optimizer = train.AdamOptimizer(self.learning_rate).minimize( self.loss) #self.logits = layers.dense(self.model_layers[-1], self.action_size) #self._states = placeholder(shape=[None, self.state_size], dtype=tf.float32) self.sess.run(global_variables_initializer()) self.saver = train.Saver() def memorize(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done))
# -- loss -- # In machine learning, we often call the error function the 'loss'. # We want c to equal the target so we calculate a loss accordingly. # In this case we will use the square of the difference as the error. loss = tf.square(c - args.target) # We want tensorflow to learn from the loss (the error) and # this is how we do that optimizer = GradientDescentOptimizer(args.learning_rate) learn = optimizer.minimize(loss) # start a session sess = Session() # initialize the variables sess.run(global_variables_initializer()) # let's do the multiplication print("The result of ", sess.run(a), "x", sess.run(b), "is", sess.run(c), ", but we want it to =", args.target) print() print("We will use tensorflow to 'learn' the variable a.") print() print('-' * 40) print('Iteration | Result a*b=c') print('-' * 40) for iteration in range(1, args.iterations + 1): # learn a
# Our code will be really limited in use if all we can use is constants # in tensorflow # Here we introduce how we can pass different values using placeholders # and passing the values in using the feed_dict argument in sess.run # -- imports -- import tensorflow as tf from tensorflow.compat.v1 import placeholder, Session # -- variables -- a = placeholder(dtype=tf.float32) b = placeholder(dtype=tf.float32) # -- induction -- # Multiply a by b c = tf.multiply(a, b) # start a session sess = Session() # let's do the multiplication print("The result of 5x7 is", sess.run(c, feed_dict={a: 5, b: 7})) # let's do the multiplication print("The result of 2x3 is", sess.run(c, feed_dict={a: 2, b: 3}))
def vectorize_sentences(cls, sentences: List[str], session: tf.Session, embedded_text, text_input: tf.placeholder) -> list: vectors = session.run(embedded_text, feed_dict={text_input: sentences}) return [vector.tolist() for vector in vectors]
# notice that when we print them out we do not get their values, # that is because we need to evaluate them # -- imports -- import tensorflow as tf from tensorflow.compat.v1 import Session # -- constants -- a = tf.constant(5.0) b = tf.constant(7.0) # -- induction -- # Multiply a by b c = tf.multiply(a, b) # start a session sess = Session() # let's check out a, b, and c print("a =", a) print("b =", b) print("c =", c) # let's do the multiplication print("The result of 5x7 is", sess.run(c)) # that evaluates c # let's evaluate a and b print("The value of a is", sess.run(a)) print("The value of b is", sess.run(b))
# -- induction -- # f(x) = ax + b fx = tf.add(tf.multiply(a, x), b) # -- loss -- # but we want f(x) to equal y y = placeholder(dtype=tf.float32, shape=[None]) # so we calculate a loss accordingly loss = tf.reduce_mean(tf.square(fx - y)) learn = GradientDescentOptimizer(.01).minimize(loss) # start a session sess = Session() # initialize the variables sess.run(global_variables_initializer()) data_x = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0] data_y = [0.1, 0.2, 0.4, 0.4, 0.6, 0.5, 0.7, 0.7, 0.9] for iteration in range(1, 1001): sess.run(learn, feed_dict={x: data_x, y: data_y}) if iteration == 1 or iteration == 10 or iteration == 100 or iteration % 1000 == 0: print("iteration", iteration, ", total loss =", sess.run(loss, feed_dict={ x: data_x, y: data_y })) print("The equation is approximately f(x) =", sess.run(a), "* x +", sess.run(b))
def restore_or_initialize(self, session: tf.Session, model_filename: Path, global_step: Optional[int]) -> int: """ Tries to find a checkpoint file from which to restore variables in the graph. If no checkpoint file is found, the variables are initialized using their respective initializers. This method searches for checkpoint files in the directory containing `model_filename`. It is assumed that model files have the same base filename as `model_filename`, followed by a dash, followed by the global step number. If, for example, `model_filename` were to be "./log/model", valid checkpoint filenames would be, for instance, "./log/model-10", "./log/model-100". If the parameter `global_step` is not given, this method scans all checkpoint files matching the pattern described above, and selects the latest checkpoint. If `global_step` is given, this method tries to restore variables from a checkpoint file for that specific global step, and fails if such a file does not exist. Parameters ---------- session: tf.Session The Tensorflow session in which to restore variables model_filename: pathlib.Path The name of model files, without extension. Tensorflow saves models in several files per checkpoint, and appends, for example, the global step number to filenames. This parameter should indicate the common prefix for these filenames, analogous to the `save_path` parameter of the `tf.train.Saver.save` method. global_step: int, optional If given, restore variables values at the specified global step. Otherwise, restore variables from the latest checkpoint. Returns ------- int The global step number from which variables were restored, or None if no checkpoint files were found and variables were initialized using their initializers """ if global_step is None: self.log.debug( "no global step specified - searching for checkpoints") global_step = -1 for file in model_filename.parent.glob("%s-*.index" % model_filename.name): name = str(file.name) step = int(name[len(model_filename.name) + 1:name.find(".")]) self.log.debug("found checkpoint for global step %d at %s", step, file) global_step = max(global_step, step) if global_step is -1: self.log.info("initializing variables") session.run( tf.variables_initializer( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, "model"))) else: filename = model_filename.with_name( "%s-%d" % (model_filename.name, global_step)) self.log.info("restoring variables from %s", filename) self.saver.restore(session, str(filename)) return global_step if global_step != -1 else None
parser.add_argument('--plot', action='store_const', const=True, default=False, help="Plot the data") parser.add_argument('--iterations', nargs='?', type=int, default=10000, help="The number of iterations") args = parser.parse_args() # -- get the model and data -- data_directory = "lesson_09" model = import_module(data_directory + "." + args.model) data = import_module(data_directory + "." + args.data) print() print(f"Using model {args.model} on the dataset {args.data}") print() # start a session sess = Session() sess.run(global_variables_initializer()) for iteration in range(1, args.iterations + 1): # learn sess.run(model.learn, feed_dict={model.x: data.x, model.y: data.y}) # print(feedback once in a while) if iteration == 1 or iteration == 10 or iteration == 100 or iteration % 1000 == 0: print(f"iteration {iteration:5}, RMS error = {sess.run(model.rms_error, feed_dict={model.x: data.x, model.y: data.y}):.2f}") print("\ndone training\n") print("The equation is f(x) =") model.printEquation(sess)
class PpoGraph: """ Proximal Policy Implementation in tensorflow. https://arxiv.org/abs/1707.06347 ("Proximal Policy Optimization Algorithms", J. Schulman et al, 2017) This class encapsulates all tensorflow interactions """ def __init__(self, observation_size, net_arch, initializer, activation, clip_range, value_coef, entropy_coef, learning_rate, pre_training_learning_rate, action_bounds, policy): """ :param observation_size: :param net_arch: :param initializer: :param activation: :param clip_range: :param value_coef: :param entropy_coef: :param learning_rate: :param pre_training_learning_rate: :param action_bounds: :param policy: """ """Set class constants""" self.observation_size = observation_size self.net_arch = net_arch self.initializer = initializer self.activation = activation self.clip_range = clip_range self.value_coef = value_coef self.entropy_coef = entropy_coef if action_bounds is None: action_bounds = [0.0, 1.5] self.action_bounds = action_bounds self.learning_rate = learning_rate self.pre_training_learning_rate = pre_training_learning_rate if policy is None: policy = GaussFull() self.policy = policy """Set up the tensorflow graph""" self.graph = Graph() with self.graph.as_default(): self.sess = Session(graph=self.graph) """ core """ # place holders self.observation_string_ph = placeholder( shape=(None, 1), dtype=string, name="observation_string_ph") self.action_ph = placeholder(dtype=float32, shape=(None, 1), name="action_ph") self.old_neg_logits = placeholder(dtype=float32, shape=(None, 1), name="old_neg_logits") self.advantage_ph = placeholder(dtype=float32, shape=(None, 1), name="advantage_ph") self.value_target_ph = placeholder(dtype=float32, shape=(None, 1), name="value_target_ph") # learning rate tensors self.learning_rate_ph = placeholder_with_default( input=self.learning_rate, shape=()) self.pre_training_learning_rate_ph = placeholder_with_default( input=self.pre_training_learning_rate, shape=()) # observation tensor replaced1 = regex_replace(self.observation_string_ph, "/", "_") replaced2 = regex_replace(replaced1, r"\+", "-") byte_tensor = decode_base64(replaced2) decoded = decode_raw(byte_tensor, out_type=float32) squeezed = squeeze(decoded, axis=1) self.observation_input = ensure_shape( squeezed, shape=(None, self.observation_size), name="observation_input") # policy net latent_policy = net_core(self.observation_input, self.net_arch, self.initializer, self.activation) self.policy.construct(latent_policy=latent_policy) self.clipped_action = clip_by_value( cast(self.policy.action, float32), self.action_bounds[0], self.action_bounds[1], "clipped_action") # value net latent_value = net_core(self.observation_input, self.net_arch, self.initializer, self.activation) self.value = identity( input=Dense(units=1, activation=None, kernel_initializer=self.initializer)(latent_value), name="value") """loss calculation""" # policy loss self.neg_logits = self.policy.neg_logits_from_actions( self.action_ph) ratio = exp(self.old_neg_logits - self.neg_logits) standardized_adv = (self.advantage_ph - reduce_mean( self.advantage_ph)) / (reduce_std(self.advantage_ph) + 1e-8) raw_policy_loss = -standardized_adv * ratio clipped_policy_loss = -standardized_adv * clip_by_value( ratio, 1 - self.clip_range, 1 + self.clip_range) self.policy_loss = reduce_mean( maximum(raw_policy_loss, clipped_policy_loss)) self.value_loss = mean_squared_error(self.value_target_ph, self.value) # entropy loss self.entropy_loss = -reduce_mean(self.policy.entropy) # total loss self.total_loss = self.policy_loss + self.value_coef * self.value_loss + self.entropy_coef * self.entropy_loss # optimizer optimizer = AdamOptimizer(learning_rate=self.learning_rate_ph) # training ops self.training_op = optimizer.minimize(self.total_loss) # pre training self.dist_param_target_ph = placeholder( dtype=float32, shape=(None, self.policy.dist_params.shape[1]), name="dist_param_label_ph") self.pre_training_loss = mean_squared_error( self.dist_param_target_ph, self.policy.dist_params) pre_training_optimizer = GradientDescentOptimizer( learning_rate=self.pre_training_learning_rate_ph) self.pre_training_op = pre_training_optimizer.minimize( self.pre_training_loss) """utility nodes""" # inspect model weights self.trainable_variables = trainable_variables() # saviour self.saver = Saver() # tensorboard summaries self.summary = merge([ histogram("values", self.value), histogram("advantages", standardized_adv), histogram("actions", self.clipped_action), histogram("det_actions", replace_nan(self.policy.det_action, 0.0)), histogram("value_targets", self.value_target_ph), scalar("policy_loss", self.policy_loss), scalar("value_loss", self.value_loss), scalar("entropy_loss", self.entropy_loss) ]) self.pre_summary = merge([ histogram("pretraining_actions", self.clipped_action), scalar("pretraining_loss", self.pre_training_loss) ]) # initialization init = global_variables_initializer() self.sess.run(init) def predict(self, observation): """ :param observation: input environment state :return: action, deterministic action (mode), negative log dist value, value prediction """ fetches = [ self.clipped_action, self.policy.dist_params, self.policy.neg_logits, self.value ] action, dist_params, neg_logit, value = self.sess.run( fetches, {self.observation_input: observation}) return action, dist_params, neg_logit, value def train_step(self, observations, actions, old_neg_logits, value_targets, advantages, obs_as_string=False, learning_rate=None, additional_fetches=None): fetches = [self.training_op, self.summary] + ( [] if additional_fetches is None else additional_fetches) obs_tensor = self.observation_string_ph if obs_as_string else self.observation_input feed_dict = { obs_tensor: observations, self.action_ph: actions, self.old_neg_logits: old_neg_logits, self.value_target_ph: value_targets, self.advantage_ph: advantages } if learning_rate is not None: feed_dict.update({self.learning_rate_ph: learning_rate}) return self.sess.run(fetches, feed_dict) def pre_train_step(self, observations, dist_param_targets, obs_as_string=False, learning_rate=None, additional_fetches=None): fetches = [self.pre_training_op, self.pre_summary] + ( [] if additional_fetches is None else additional_fetches) obs_tensor = self.observation_string_ph if obs_as_string else self.observation_input feed_dict = { obs_tensor: observations, self.dist_param_target_ph: dist_param_targets } if learning_rate is not None: feed_dict.update( {self.pre_training_learning_rate_ph: learning_rate}) return self.sess.run(fetches, feed_dict) def simple_save(self, path): with self.graph.as_default(): simple_save(self.sess, path, inputs={"obs": self.observation_input}, outputs={"action": self.clipped_action}) def save(self, path): with self.graph.as_default(): self.saver.save(sess=self.sess, save_path=path) def restore(self, path): with self.graph.as_default(): self.saver.restore(sess=self.sess, save_path=path) def close_session(self): self.sess.close() def get_trainable_variables(self): return self.sess.run(self.trainable_variables)