def _read_latest_config_files(self, run_path_pairs): """Reads and returns the projector config files in every run directory.""" """If no specific config exists, use the default config provided in the root directory.""" default_config_fpath = os.path.join(self.logdir, metadata.PROJECTOR_FILENAME) default_config = ProjectorConfig() if tf.io.gfile.exists(default_config_fpath): with tf.io.gfile.GFile(default_config_fpath, "r") as f: file_content = f.read() text_format.Merge(file_content, default_config) # Relative metadata paths do not work with subdirs, so convert # any metadata paths to absolute paths. for embedding in default_config.embeddings: embedding.metadata_path = _rel_to_abs_asset_path( embedding.metadata_path, default_config_fpath) configs = {} config_fpaths = {} for run_name, assets_dir in run_path_pairs: config = ProjectorConfig() config_fpath = os.path.join(assets_dir, metadata.PROJECTOR_FILENAME) if tf.io.gfile.exists(config_fpath): with tf.io.gfile.GFile(config_fpath, "r") as f: file_content = f.read() text_format.Merge(file_content, config) elif tf.io.gfile.exists(default_config_fpath): config = default_config has_tensor_files = False for embedding in config.embeddings: if embedding.tensor_path: if not embedding.tensor_name: embedding.tensor_name = os.path.basename( embedding.tensor_path) has_tensor_files = True break if not config.model_checkpoint_path: # See if you can find a checkpoint file in the logdir. logdir = _assets_dir_to_logdir(assets_dir) ckpt_path = _find_latest_checkpoint(logdir) if not ckpt_path and not has_tensor_files: continue if ckpt_path: config.model_checkpoint_path = ckpt_path # Sanity check for the checkpoint file existing. if (config.model_checkpoint_path and _using_tf() and not tf.io.gfile.glob(config.model_checkpoint_path + "*")): logger.warn( 'Checkpoint file "%s" not found', config.model_checkpoint_path, ) continue configs[run_name] = config config_fpaths[run_name] = config_fpath return configs, config_fpaths
def _read_latest_config_files(self, run_path_pairs): """Reads and returns the projector config files in every run directory.""" configs = {} config_fpaths = {} for run_name, assets_dir in run_path_pairs: config = ProjectorConfig() config_fpath = os.path.join(assets_dir, metadata.PROJECTOR_FILENAME) if tf.io.gfile.exists(config_fpath): with tf.io.gfile.GFile(config_fpath, "r") as f: file_content = f.read() text_format.Merge(file_content, config) has_tensor_files = False for embedding in config.embeddings: if embedding.tensor_path: if not embedding.tensor_name: embedding.tensor_name = os.path.basename( embedding.tensor_path ) has_tensor_files = True break if not config.model_checkpoint_path: # See if you can find a checkpoint file in the logdir. logdir = _assets_dir_to_logdir(assets_dir) ckpt_path = _find_latest_checkpoint(logdir) if not ckpt_path and not has_tensor_files: continue if ckpt_path: config.model_checkpoint_path = ckpt_path # Sanity check for the checkpoint file existing. if ( config.model_checkpoint_path and _using_tf() and not tf.io.gfile.glob(config.model_checkpoint_path + "*") ): logger.warn( 'Checkpoint file "%s" not found', config.model_checkpoint_path, ) continue configs[run_name] = config config_fpaths[run_name] = config_fpath return configs, config_fpaths
def add_embedding(self, mat, metadata=None, label_img=None, global_step=None, tag='default', metadata_header=None): torch._C._log_api_usage_once("tensorboard.logging.add_embedding") mat = make_np(mat) if global_step is None: global_step = 0 # clear pbtxt? # Maybe we should encode the tag so slashes don't trip us up? # I don't think this will mess us up, but better safe than sorry. subdir = "%s/%s" % (str(global_step).zfill(5), self._encode(tag)) save_path = os.path.join(self._get_file_writer().get_logdir(), subdir) fs = tf.io.gfile.get_filesystem(save_path) if fs.exists(save_path): if fs.isdir(save_path): print( 'warning: Embedding dir exists, did you set global_step for add_embedding()?' ) else: raise Exception( "Path: `%s` exists, but is a file. Cannot proceed." % save_path) else: fs.makedirs(save_path) if metadata is not None: assert mat.shape[0] == len( metadata), '#labels should equal with #data points' make_tsv(metadata, save_path, metadata_header=metadata_header) if label_img is not None: assert mat.shape[0] == label_img.shape[ 0], '#images should equal with #data points' make_sprite(label_img, save_path) assert mat.ndim == 2, 'mat should be 2D, where mat.size(0) is the number of data points' make_mat(mat, save_path) # Filesystem doesn't necessarily have append semantics, so we store an # internal buffer to append to and re-write whole file after each # embedding is added if not hasattr(self, "_projector_config"): self._projector_config = ProjectorConfig() embedding_info = get_embedding_info(metadata, label_img, fs, subdir, global_step, tag) self._projector_config.embeddings.extend([embedding_info]) from google.protobuf import text_format config_pbtxt = text_format.MessageToString(self._projector_config) write_pbtxt(self._get_file_writer().get_logdir(), config_pbtxt)
def _read_latest_config_files(self, run_path_pairs): """Reads and returns the projector config files in every run directory.""" configs = {} config_fpaths = {} for run_name, assets_dir in run_path_pairs: config = ProjectorConfig() config_fpath = os.path.join(assets_dir, PROJECTOR_FILENAME) if tf.gfile.Exists(config_fpath): with tf.gfile.GFile(config_fpath, 'r') as f: file_content = f.read() text_format.Merge(file_content, config) has_tensor_files = False for embedding in config.embeddings: if embedding.tensor_path: if not embedding.tensor_name: embedding.tensor_name = os.path.basename(embedding.tensor_path) has_tensor_files = True break if not config.model_checkpoint_path: # See if you can find a checkpoint file in the logdir. logdir = _assets_dir_to_logdir(assets_dir) ckpt_path = _find_latest_checkpoint(logdir) if not ckpt_path and not has_tensor_files: continue if ckpt_path: config.model_checkpoint_path = ckpt_path # Sanity check for the checkpoint file. if (config.model_checkpoint_path and not tf.train.checkpoint_exists(config.model_checkpoint_path)): tf.logging.warning('Checkpoint file "%s" not found', config.model_checkpoint_path) continue configs[run_name] = config config_fpaths[run_name] = config_fpath return configs, config_fpaths
def _latest_checkpoints_changed(configs, run_path_pairs): """Returns true if the latest checkpoint has changed in any of the runs.""" for run_name, assets_dir in run_path_pairs: if run_name not in configs: config = ProjectorConfig() config_fpath = os.path.join(assets_dir, PROJECTOR_FILENAME) if tf.io.gfile.exists(config_fpath): with tf.io.gfile.GFile(config_fpath, 'r') as f: file_content = f.read() text_format.Merge(file_content, config) else: config = configs[run_name] # See if you can find a checkpoint file in the logdir. logdir = _assets_dir_to_logdir(assets_dir) ckpt_path = _find_latest_checkpoint(logdir) if not ckpt_path: continue if config.model_checkpoint_path != ckpt_path: return True return False
def add_embedding(self, mat, metadata=None, label_img=None, global_step=None, tag='default', metadata_header=None): """Add embedding projector data to summary. Args: mat (torch.Tensor or numpy.array): A matrix which each row is the feature vector of the data point metadata (list): A list of labels, each element will be convert to string label_img (torch.Tensor): Images correspond to each data point global_step (int): Global step value to record tag (string): Name for the embedding Shape: mat: :math:`(N, D)`, where N is number of data and D is feature dimension label_img: :math:`(N, C, H, W)` Examples:: import keyword import torch meta = [] while len(meta)<100: meta = meta+keyword.kwlist # get some strings meta = meta[:100] for i, v in enumerate(meta): meta[i] = v+str(i) label_img = torch.rand(100, 3, 10, 32) for i in range(100): label_img[i]*=i/100.0 writer.add_embedding(torch.randn(100, 5), metadata=meta, label_img=label_img) writer.add_embedding(torch.randn(100, 5), label_img=label_img) writer.add_embedding(torch.randn(100, 5), metadata=meta) """ torch._C._log_api_usage_once("tensorboard.logging.add_embedding") mat = make_np(mat) if global_step is None: global_step = 0 # clear pbtxt? # Maybe we should encode the tag so slashes don't trip us up? # I don't think this will mess us up, but better safe than sorry. subdir = "%s/%s" % (str(global_step).zfill(5), self._encode(tag)) save_path = os.path.join(self._get_file_writer().get_logdir(), subdir) fs = tf.io.gfile.get_filesystem(save_path) if fs.exists(save_path): if fs.isdir(save_path): print( 'warning: Embedding dir exists, did you set global_step for add_embedding()?' ) else: raise Exception( "Path: `%s` exists, but is a file. Cannot proceed." % save_path) else: fs.makedirs(save_path) if metadata is not None: assert mat.shape[0] == len( metadata), '#labels should equal with #data points' make_tsv(metadata, save_path, metadata_header=metadata_header) if label_img is not None: assert mat.shape[0] == label_img.shape[ 0], '#images should equal with #data points' make_sprite(label_img, save_path) assert mat.ndim == 2, 'mat should be 2D, where mat.size(0) is the number of data points' make_mat(mat, save_path) # Filesystem doesn't necessarily have append semantics, so we store an # internal buffer to append to and re-write whole file after each # embedding is added if not hasattr(self, "_projector_config"): self._projector_config = ProjectorConfig() embedding_info = get_embedding_info(metadata, label_img, fs, subdir, global_step, tag) self._projector_config.embeddings.extend([embedding_info]) from google.protobuf import text_format config_pbtxt = text_format.MessageToString(self._projector_config) write_pbtxt(self._get_file_writer().get_logdir(), config_pbtxt)
def train(self, dataset_list, config): """ Args: dataset_list (<StockDataSet>) config (tf.app.flags.FLAGS) """ assert len(dataset_list) > 0 self.merged_sum = tf.compat.v1.summary.merge_all() # Set up the logs folder self.writer = tf.compat.v1.summary.FileWriter( os.path.join("./logs", self.model_name)) self.writer.add_graph(self.sess.graph) if self.use_embed: # Set up embedding visualization # Format: tensorflow/tensorboard/plugins/projector/projector_config.proto projector_config = ProjectorConfig() # You can add multiple embeddings. Here we add only one. added_embed = projector_config.embeddings.add() added_embed.tensor_name = self.embed_matrix.name # Link this tensor to its metadata file (e.g. labels). shutil.copyfile(os.path.join(self.logs_dir, "metadata.tsv"), os.path.join(self.model_logs_dir, "metadata.tsv")) added_embed.metadata_path = "metadata.tsv" # The next line writes a projector_config.pbtxt in the LOG_DIR. TensorBoard will # read this file during startup. projector.visualize_embeddings(self.writer, projector_config) tf.compat.v1.global_variables_initializer().run() # Merged test data of different stocks. merged_test_X = [] merged_test_y = [] merged_test_labels = [] for label_, d_ in enumerate(dataset_list): merged_test_X += list(d_.test_X) merged_test_y += list(d_.test_y) merged_test_labels += [[label_]] * len(d_.test_X) merged_test_X = np.array(merged_test_X) merged_test_y = np.array(merged_test_y) merged_test_labels = np.array(merged_test_labels) print("len(merged_test_X) =", len(merged_test_X)) print("len(merged_test_y) =", len(merged_test_y)) print("len(merged_test_labels) =", len(merged_test_labels)) test_data_feed = { self.learning_rate: 0.0, self.keep_prob: 1.0, self.inputs: merged_test_X, self.targets: merged_test_y, self.symbols: merged_test_labels, } global_step = 0 num_batches = sum(len(d_.train_X) for d_ in dataset_list) // config.batch_size random.seed(time.time()) # Select samples for plotting. sample_labels = range(min(config.sample_size, len(dataset_list))) sample_indices = {} for l in sample_labels: sym = dataset_list[l].stock_sym target_indices = np.array([ i for i, sym_label in enumerate(merged_test_labels) if sym_label[0] == l ]) sample_indices[sym] = target_indices print(sample_indices) print("Start training for stocks:", [d.stock_sym for d in dataset_list]) for epoch in range(config.max_epoch): epoch_step = 0 learning_rate = config.init_learning_rate * ( config.learning_rate_decay**max( float(epoch + 1 - config.init_epoch), 0.0)) for label_, d_ in enumerate(dataset_list): for batch_X, batch_y in d_.generate_one_epoch( config.batch_size): global_step += 1 epoch_step += 1 batch_labels = np.array([[label_]] * len(batch_X)) train_data_feed = { self.learning_rate: learning_rate, self.keep_prob: config.keep_prob, self.inputs: batch_X, self.targets: batch_y, self.symbols: batch_labels, } train_loss, _, train_merged_sum = self.sess.run( [self.loss, self.optim, self.merged_sum], train_data_feed) self.writer.add_summary(train_merged_sum, global_step=global_step) if np.mod(global_step, len(dataset_list) * 200 / config.input_size) == 1: test_loss, test_pred = self.sess.run( [self.loss_test, self.pred], test_data_feed) print( "Step:%d [Epoch:%d] [Learning rate: %.6f] train_loss:%.6f test_loss:%.6f" % (global_step, epoch, learning_rate, train_loss, test_loss)) # Plot samples for sample_sym, indices in sample_indices.items(): image_path = os.path.join( self.model_plots_dir, "{}_epoch{:02d}_step{:04d}.png".format( sample_sym, epoch, epoch_step)) sample_preds = test_pred[indices] sample_truth = merged_test_y[indices] self.plot_samples(sample_preds, sample_truth, image_path, stock_sym=sample_sym) self.save(global_step) final_pred, final_loss = self.sess.run([self.pred, self.loss], test_data_feed) # Save the final model self.save(global_step) return final_pred