Example #1
0
    def _read_latest_config_files(self, run_path_pairs):
        """Reads and returns the projector config files in every run
        directory."""
        """If no specific config exists, use the default config provided in
        the root directory."""
        default_config_fpath = os.path.join(self.logdir,
                                            metadata.PROJECTOR_FILENAME)
        default_config = ProjectorConfig()
        if tf.io.gfile.exists(default_config_fpath):
            with tf.io.gfile.GFile(default_config_fpath, "r") as f:
                file_content = f.read()
            text_format.Merge(file_content, default_config)
            # Relative metadata paths do not work with subdirs, so convert
            # any metadata paths to absolute paths.
            for embedding in default_config.embeddings:
                embedding.metadata_path = _rel_to_abs_asset_path(
                    embedding.metadata_path, default_config_fpath)
        configs = {}
        config_fpaths = {}
        for run_name, assets_dir in run_path_pairs:
            config = ProjectorConfig()
            config_fpath = os.path.join(assets_dir,
                                        metadata.PROJECTOR_FILENAME)
            if tf.io.gfile.exists(config_fpath):
                with tf.io.gfile.GFile(config_fpath, "r") as f:
                    file_content = f.read()
                text_format.Merge(file_content, config)
            elif tf.io.gfile.exists(default_config_fpath):
                config = default_config
            has_tensor_files = False
            for embedding in config.embeddings:
                if embedding.tensor_path:
                    if not embedding.tensor_name:
                        embedding.tensor_name = os.path.basename(
                            embedding.tensor_path)
                    has_tensor_files = True
                    break

            if not config.model_checkpoint_path:
                # See if you can find a checkpoint file in the logdir.
                logdir = _assets_dir_to_logdir(assets_dir)
                ckpt_path = _find_latest_checkpoint(logdir)
                if not ckpt_path and not has_tensor_files:
                    continue
                if ckpt_path:
                    config.model_checkpoint_path = ckpt_path

            # Sanity check for the checkpoint file existing.
            if (config.model_checkpoint_path and _using_tf() and
                    not tf.io.gfile.glob(config.model_checkpoint_path + "*")):
                logger.warn(
                    'Checkpoint file "%s" not found',
                    config.model_checkpoint_path,
                )
                continue
            configs[run_name] = config
            config_fpaths[run_name] = config_fpath
        return configs, config_fpaths
Example #2
0
    def _read_latest_config_files(self, run_path_pairs):
        """Reads and returns the projector config files in every run
        directory."""
        configs = {}
        config_fpaths = {}
        for run_name, assets_dir in run_path_pairs:
            config = ProjectorConfig()
            config_fpath = os.path.join(assets_dir, metadata.PROJECTOR_FILENAME)
            if tf.io.gfile.exists(config_fpath):
                with tf.io.gfile.GFile(config_fpath, "r") as f:
                    file_content = f.read()
                text_format.Merge(file_content, config)
            has_tensor_files = False
            for embedding in config.embeddings:
                if embedding.tensor_path:
                    if not embedding.tensor_name:
                        embedding.tensor_name = os.path.basename(
                            embedding.tensor_path
                        )
                    has_tensor_files = True
                    break

            if not config.model_checkpoint_path:
                # See if you can find a checkpoint file in the logdir.
                logdir = _assets_dir_to_logdir(assets_dir)
                ckpt_path = _find_latest_checkpoint(logdir)
                if not ckpt_path and not has_tensor_files:
                    continue
                if ckpt_path:
                    config.model_checkpoint_path = ckpt_path

            # Sanity check for the checkpoint file existing.
            if (
                config.model_checkpoint_path
                and _using_tf()
                and not tf.io.gfile.glob(config.model_checkpoint_path + "*")
            ):
                logger.warn(
                    'Checkpoint file "%s" not found',
                    config.model_checkpoint_path,
                )
                continue
            configs[run_name] = config
            config_fpaths[run_name] = config_fpath
        return configs, config_fpaths
Example #3
0
    def add_embedding(self,
                      mat,
                      metadata=None,
                      label_img=None,
                      global_step=None,
                      tag='default',
                      metadata_header=None):
        torch._C._log_api_usage_once("tensorboard.logging.add_embedding")
        mat = make_np(mat)
        if global_step is None:
            global_step = 0
            # clear pbtxt?

        # Maybe we should encode the tag so slashes don't trip us up?
        # I don't think this will mess us up, but better safe than sorry.
        subdir = "%s/%s" % (str(global_step).zfill(5), self._encode(tag))
        save_path = os.path.join(self._get_file_writer().get_logdir(), subdir)

        fs = tf.io.gfile.get_filesystem(save_path)
        if fs.exists(save_path):
            if fs.isdir(save_path):
                print(
                    'warning: Embedding dir exists, did you set global_step for add_embedding()?'
                )
            else:
                raise Exception(
                    "Path: `%s` exists, but is a file. Cannot proceed." %
                    save_path)
        else:
            fs.makedirs(save_path)

        if metadata is not None:
            assert mat.shape[0] == len(
                metadata), '#labels should equal with #data points'
            make_tsv(metadata, save_path, metadata_header=metadata_header)

        if label_img is not None:
            assert mat.shape[0] == label_img.shape[
                0], '#images should equal with #data points'
            make_sprite(label_img, save_path)

        assert mat.ndim == 2, 'mat should be 2D, where mat.size(0) is the number of data points'
        make_mat(mat, save_path)

        # Filesystem doesn't necessarily have append semantics, so we store an
        # internal buffer to append to and re-write whole file after each
        # embedding is added
        if not hasattr(self, "_projector_config"):
            self._projector_config = ProjectorConfig()
        embedding_info = get_embedding_info(metadata, label_img, fs, subdir,
                                            global_step, tag)
        self._projector_config.embeddings.extend([embedding_info])

        from google.protobuf import text_format
        config_pbtxt = text_format.MessageToString(self._projector_config)
        write_pbtxt(self._get_file_writer().get_logdir(), config_pbtxt)
Example #4
0
  def _read_latest_config_files(self, run_path_pairs):
    """Reads and returns the projector config files in every run directory."""
    configs = {}
    config_fpaths = {}
    for run_name, assets_dir in run_path_pairs:
      config = ProjectorConfig()
      config_fpath = os.path.join(assets_dir, PROJECTOR_FILENAME)
      if tf.gfile.Exists(config_fpath):
        with tf.gfile.GFile(config_fpath, 'r') as f:
          file_content = f.read()
        text_format.Merge(file_content, config)
      has_tensor_files = False
      for embedding in config.embeddings:
        if embedding.tensor_path:
          if not embedding.tensor_name:
            embedding.tensor_name = os.path.basename(embedding.tensor_path)
          has_tensor_files = True
          break

      if not config.model_checkpoint_path:
        # See if you can find a checkpoint file in the logdir.
        logdir = _assets_dir_to_logdir(assets_dir)
        ckpt_path = _find_latest_checkpoint(logdir)
        if not ckpt_path and not has_tensor_files:
          continue
        if ckpt_path:
          config.model_checkpoint_path = ckpt_path

      # Sanity check for the checkpoint file.
      if (config.model_checkpoint_path and
          not tf.train.checkpoint_exists(config.model_checkpoint_path)):
        tf.logging.warning('Checkpoint file "%s" not found',
                           config.model_checkpoint_path)
        continue
      configs[run_name] = config
      config_fpaths[run_name] = config_fpath
    return configs, config_fpaths
def _latest_checkpoints_changed(configs, run_path_pairs):
  """Returns true if the latest checkpoint has changed in any of the runs."""
  for run_name, assets_dir in run_path_pairs:
    if run_name not in configs:
      config = ProjectorConfig()
      config_fpath = os.path.join(assets_dir, PROJECTOR_FILENAME)
      if tf.io.gfile.exists(config_fpath):
        with tf.io.gfile.GFile(config_fpath, 'r') as f:
          file_content = f.read()
        text_format.Merge(file_content, config)
    else:
      config = configs[run_name]

    # See if you can find a checkpoint file in the logdir.
    logdir = _assets_dir_to_logdir(assets_dir)
    ckpt_path = _find_latest_checkpoint(logdir)
    if not ckpt_path:
      continue
    if config.model_checkpoint_path != ckpt_path:
      return True
  return False
Example #6
0
    def add_embedding(self,
                      mat,
                      metadata=None,
                      label_img=None,
                      global_step=None,
                      tag='default',
                      metadata_header=None):
        """Add embedding projector data to summary.

        Args:
            mat (torch.Tensor or numpy.array): A matrix which each row is the feature vector of the data point
            metadata (list): A list of labels, each element will be convert to string
            label_img (torch.Tensor): Images correspond to each data point
            global_step (int): Global step value to record
            tag (string): Name for the embedding
        Shape:
            mat: :math:`(N, D)`, where N is number of data and D is feature dimension

            label_img: :math:`(N, C, H, W)`

        Examples::

            import keyword
            import torch
            meta = []
            while len(meta)<100:
                meta = meta+keyword.kwlist # get some strings
            meta = meta[:100]

            for i, v in enumerate(meta):
                meta[i] = v+str(i)

            label_img = torch.rand(100, 3, 10, 32)
            for i in range(100):
                label_img[i]*=i/100.0

            writer.add_embedding(torch.randn(100, 5), metadata=meta, label_img=label_img)
            writer.add_embedding(torch.randn(100, 5), label_img=label_img)
            writer.add_embedding(torch.randn(100, 5), metadata=meta)
        """
        torch._C._log_api_usage_once("tensorboard.logging.add_embedding")
        mat = make_np(mat)
        if global_step is None:
            global_step = 0
            # clear pbtxt?

        # Maybe we should encode the tag so slashes don't trip us up?
        # I don't think this will mess us up, but better safe than sorry.
        subdir = "%s/%s" % (str(global_step).zfill(5), self._encode(tag))
        save_path = os.path.join(self._get_file_writer().get_logdir(), subdir)

        fs = tf.io.gfile.get_filesystem(save_path)
        if fs.exists(save_path):
            if fs.isdir(save_path):
                print(
                    'warning: Embedding dir exists, did you set global_step for add_embedding()?'
                )
            else:
                raise Exception(
                    "Path: `%s` exists, but is a file. Cannot proceed." %
                    save_path)
        else:
            fs.makedirs(save_path)

        if metadata is not None:
            assert mat.shape[0] == len(
                metadata), '#labels should equal with #data points'
            make_tsv(metadata, save_path, metadata_header=metadata_header)

        if label_img is not None:
            assert mat.shape[0] == label_img.shape[
                0], '#images should equal with #data points'
            make_sprite(label_img, save_path)

        assert mat.ndim == 2, 'mat should be 2D, where mat.size(0) is the number of data points'
        make_mat(mat, save_path)

        # Filesystem doesn't necessarily have append semantics, so we store an
        # internal buffer to append to and re-write whole file after each
        # embedding is added
        if not hasattr(self, "_projector_config"):
            self._projector_config = ProjectorConfig()
        embedding_info = get_embedding_info(metadata, label_img, fs, subdir,
                                            global_step, tag)
        self._projector_config.embeddings.extend([embedding_info])

        from google.protobuf import text_format
        config_pbtxt = text_format.MessageToString(self._projector_config)
        write_pbtxt(self._get_file_writer().get_logdir(), config_pbtxt)
Example #7
0
    def train(self, dataset_list, config):
        """
        Args:
            dataset_list (<StockDataSet>)
            config (tf.app.flags.FLAGS)
        """
        assert len(dataset_list) > 0
        self.merged_sum = tf.compat.v1.summary.merge_all()

        # Set up the logs folder
        self.writer = tf.compat.v1.summary.FileWriter(
            os.path.join("./logs", self.model_name))
        self.writer.add_graph(self.sess.graph)

        if self.use_embed:
            # Set up embedding visualization
            # Format: tensorflow/tensorboard/plugins/projector/projector_config.proto
            projector_config = ProjectorConfig()

            # You can add multiple embeddings. Here we add only one.
            added_embed = projector_config.embeddings.add()
            added_embed.tensor_name = self.embed_matrix.name
            # Link this tensor to its metadata file (e.g. labels).
            shutil.copyfile(os.path.join(self.logs_dir, "metadata.tsv"),
                            os.path.join(self.model_logs_dir, "metadata.tsv"))
            added_embed.metadata_path = "metadata.tsv"

            # The next line writes a projector_config.pbtxt in the LOG_DIR. TensorBoard will
            # read this file during startup.
            projector.visualize_embeddings(self.writer, projector_config)

        tf.compat.v1.global_variables_initializer().run()

        # Merged test data of different stocks.
        merged_test_X = []
        merged_test_y = []
        merged_test_labels = []

        for label_, d_ in enumerate(dataset_list):
            merged_test_X += list(d_.test_X)
            merged_test_y += list(d_.test_y)
            merged_test_labels += [[label_]] * len(d_.test_X)

        merged_test_X = np.array(merged_test_X)
        merged_test_y = np.array(merged_test_y)
        merged_test_labels = np.array(merged_test_labels)

        print("len(merged_test_X) =", len(merged_test_X))
        print("len(merged_test_y) =", len(merged_test_y))
        print("len(merged_test_labels) =", len(merged_test_labels))

        test_data_feed = {
            self.learning_rate: 0.0,
            self.keep_prob: 1.0,
            self.inputs: merged_test_X,
            self.targets: merged_test_y,
            self.symbols: merged_test_labels,
        }

        global_step = 0

        num_batches = sum(len(d_.train_X)
                          for d_ in dataset_list) // config.batch_size
        random.seed(time.time())

        # Select samples for plotting.
        sample_labels = range(min(config.sample_size, len(dataset_list)))
        sample_indices = {}
        for l in sample_labels:
            sym = dataset_list[l].stock_sym
            target_indices = np.array([
                i for i, sym_label in enumerate(merged_test_labels)
                if sym_label[0] == l
            ])
            sample_indices[sym] = target_indices
        print(sample_indices)

        print("Start training for stocks:",
              [d.stock_sym for d in dataset_list])
        for epoch in range(config.max_epoch):
            epoch_step = 0
            learning_rate = config.init_learning_rate * (
                config.learning_rate_decay**max(
                    float(epoch + 1 - config.init_epoch), 0.0))

            for label_, d_ in enumerate(dataset_list):
                for batch_X, batch_y in d_.generate_one_epoch(
                        config.batch_size):
                    global_step += 1
                    epoch_step += 1
                    batch_labels = np.array([[label_]] * len(batch_X))
                    train_data_feed = {
                        self.learning_rate: learning_rate,
                        self.keep_prob: config.keep_prob,
                        self.inputs: batch_X,
                        self.targets: batch_y,
                        self.symbols: batch_labels,
                    }
                    train_loss, _, train_merged_sum = self.sess.run(
                        [self.loss, self.optim, self.merged_sum],
                        train_data_feed)
                    self.writer.add_summary(train_merged_sum,
                                            global_step=global_step)

                    if np.mod(global_step,
                              len(dataset_list) * 200 /
                              config.input_size) == 1:
                        test_loss, test_pred = self.sess.run(
                            [self.loss_test, self.pred], test_data_feed)

                        print(
                            "Step:%d [Epoch:%d] [Learning rate: %.6f] train_loss:%.6f test_loss:%.6f"
                            % (global_step, epoch, learning_rate, train_loss,
                               test_loss))

                        # Plot samples
                        for sample_sym, indices in sample_indices.items():
                            image_path = os.path.join(
                                self.model_plots_dir,
                                "{}_epoch{:02d}_step{:04d}.png".format(
                                    sample_sym, epoch, epoch_step))
                            sample_preds = test_pred[indices]
                            sample_truth = merged_test_y[indices]
                            self.plot_samples(sample_preds,
                                              sample_truth,
                                              image_path,
                                              stock_sym=sample_sym)

                        self.save(global_step)

        final_pred, final_loss = self.sess.run([self.pred, self.loss],
                                               test_data_feed)

        # Save the final model
        self.save(global_step)
        return final_pred