Exemple #1
0
    def visualisation(self):
        """
        Main process of visualisation
        Returns:

        """
        # get embedding_arrays, sprite_images, meta_data
        embedding, imgs, labels = self.process_embedding()
        self.create_sprite_image(imgs)
        self.create_meta_data(labels)

        emb = tf.Variable(embedding, name=TENSOR_NAME)

        # embedding configs
        config = projector.ProjectorConfig()
        embedding = config.embeddings.add()
        embedding.tensor_name = emb.name

        embedding.metadata_path = META_FILE
        embedding.sprite.image_path = SPRITE_FILE
        embedding.sprite.single_image_dim.extend([32, 32])

        # save data
        summary_writer = tf.summary.FileWriter(LOG_DIR)
        projector.visualize_embeddings(summary_writer, config)

        sess = tf.InteractiveSession()
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.save(sess, os.path.join(LOG_DIR, "emb"),
                   self.predictor.global_step)

        summary_writer.close()
    def initialize_training(self):
        optimizer = tf.train.AdamOptimizer()
        self.train = optimizer.minimize(self.loss)
        self.sess = tf.Session()
        with self.sess.as_default():
            tf.global_variables_initializer().run()

        variable_summaries('alpha', self.alpha)
        with tf.name_scope('objective'):
            tf.summary.scalar('loss', self.loss)
            tf.summary.scalar('priors', self.log_prior)
            tf.summary.scalar('ll_pos', self.ll_pos)
            tf.summary.scalar('ll_neg', self.ll_neg)
        self.summaries = tf.summary.merge_all()
        self.train_writer = tf.summary.FileWriter(self.logdir, self.sess.graph)
        self.saver = tf.train.Saver()
        config = projector.ProjectorConfig()
        alpha = config.embeddings.add()
        alpha.tensor_name = 'model/embeddings/alpha'
        alpha.metadata_path = '../vocab.tsv'
        if not self.dynamic:
            rho = config.embeddings.add()
            rho.tensor_name = 'model/embeddings/rho'
            rho.metadata_path = '../vocab.tsv'
        else:
            for t in range(self.T):
                rho = config.embeddings.add()
                rho.tensor_name = 'model/embeddings/rho_'+str(t)
                rho.metadata_path = '../vocab.tsv'
        projector.visualize_embeddings(self.train_writer, config)
Exemple #3
0
def register_embedding(weights, labels, log_dir) -> None:
    """Saves a metadata file (labels) and a checkpoint (derived from weights)
    and configures the Embedding Projector to read from the appropriate locations.

    Args:
      weights: tf.Variable with the weights of the embedding layer to be displayed.
      labels: list of labels corresponding to the weights.
      logdir: Directory into which to store the config file, as a `str`.
    """

    # Create a checkpoint from embedding, the filename and key are
    # name of the tensor.
    checkpoint = tf.train.Checkpoint(embedding=weights)
    checkpoint.save(os.path.join(LOG_DIR, "embedding.ckpt"))

    # Save Labels separately on a line-by-line manner.
    with open(os.path.join(log_dir, METADATA_FNAME), "w") as f:
        for label in labels:
            f.write("{}\n".format(label))

    # Set up config
    config = projector.ProjectorConfig()
    embedding = config.embeddings.add()
    # The name of the tensor will be suffixed by `/.ATTRIBUTES/VARIABLE_VALUE`
    embedding.tensor_name = "embedding/.ATTRIBUTES/VARIABLE_VALUE"
    embedding.metadata_path = METADATA_FNAME
    projector.visualize_embeddings(log_dir, config)
def visualisation(vectors):
    """
    param: 需要可视化的高维向量集合, list, [SAMPLE_NUM, dim]
    return: None
    """
    # PROJECTOR可视化的都是TensorFlow中的变量类型。
    y = tf.Variable(vectors)
    checkpoint = tf.train.Checkpoint(embedding=y)
    checkpoint.save(os.path.join(LOG_DIR, "embedding.ckpt"))

    # 通过project.ProjectorConfig类来帮助生成日志文件
    config = projector.ProjectorConfig()
    # 增加一个需要可视化的embedding结果
    embedding = config.embeddings.add()
    # 指定这个embedding结果所对应的Tensorflow变量名称
    embedding.tensor_name = "embedding/.ATTRIBUTES/VARIABLE_VALUE"

    # 指定embedding结果所对应的数据标签文件,改设置可选, 如果没有提供,可视化结果
    # 每个点颜色都是一样的
    # embedding.metadata_path = META_FIEL

    # 指定sprite 图像。这个也是可选的,如果没有提供sprite 图像,那么可视化的结果
    # 每一个点就是一个小困点,而不是具体的图片。
    embedding.sprite.image_path = SPRITE_FILE
    # 这将用于从sprite图像中截取正确的原始图片。
    embedding.sprite.single_image_dim.extend([IMAGE_SIZE, IMAGE_SIZE])

    # 将PROJECTOR所需要的内容写入日志文件。
    projector.visualize_embeddings(LOG_DIR, config)
    def write_embeddings(self, Wv, name="WordVectors"):
        """Write embedding matrix to the right place.

        Args:
          Wv: (numpy.ndarray) |V| x d matrix of word embeddings
        """
        with tf.Graph().as_default(), tf.Session() as session:
            ##
            # Feed embeddings to tf, and save.
            embedding_var = tf.Variable(Wv, name=name, dtype=tf.float32)
            session.run(tf.global_variables_initializer())

            saver = tf.train.Saver()
            saver.save(session, self.CHECKPOINT_FILE, 0)

            ##
            # Save metadata
            summary_writer = tf.summary.FileWriter(self.LOGDIR)
            config = projector.ProjectorConfig()
            embedding = config.embeddings.add()
            embedding.tensor_name = embedding_var.name
            embedding.metadata_path = self.VOCAB_FILE_BASE
            projector.visualize_embeddings(summary_writer, config)

        msg = "Saved {s0:d} x {s1:d} embedding matrix '{name}'"
        msg += " to LOGDIR='{logdir}'"
        print(msg.format(s0=Wv.shape[0], s1=Wv.shape[1], name=name,
                         logdir=self.LOGDIR))

        print("To view, run:")
        print("\n  tensorboard --logdir=\"{logdir}\"\n".format(logdir=self.LOGDIR))
        print("and navigate to the \"Embeddings\" tab in the web interface.")
Exemple #6
0
def _visualize():
    log.info("Visualizing all samples with tensorboard/t-sne...")
    samples = glob(cfg.Paths.samples + "/*sample*.npz")
    metadata = os.path.join(cfg.Paths.latent_space, 'metadata.txt')
    with open(metadata, 'w') as m:
        vecs = []
        count = 0
        for sample in samples:
            sample_name = sample.split(os.sep)[-1]
            composer = sample_name.split("_")[0]
            m.write(f"{composer}\n")
            npz = np.load(sample)
            npz = npz['sample']
            npz = dataset.preprocess_single(npz)
            encoding = best_encoder.predict(npz)
            vecs.append(np.squeeze(encoding))
            count += 1
    c = projector.ProjectorConfig()
    vecs = np.asarray(vecs)
    vecs = tf.Variable(vecs, trainable=False, name='vectors')
    embed = c.embeddings.add()
    embed.tensor_name = vecs.name
    embed.metadata_path = metadata
    sess = tf.InteractiveSession()
    tf.global_variables_initializer().run()
    saver = tf.train.Saver()
    saver.save(sess, os.path.join(cfg.Paths.latent_space, 'meta.ckpt'))
    writer = tf.summary.FileWriter(cfg.Paths.latent_space, sess.graph)
    projector.visualize_embeddings(writer, c)
    sess.close()
def _get_projector_data(user_embedding, user_indices, item_embedding,
                        item_indices):
    """Samples the given embeddings, joins them, and creates a projector config.

  Args:
    user_embedding: a (num_users x embedding_dim) embedding of users.
    user_indices: the indices to take from the user embedding.
    item_embedding: a (num_items x embedding_dim) embedding of items.
    item_indices: the indices to take from the item embedding.

  Returns:
    A tuple of (sample, config):
      sample: a tensor of samples of the user and item embeddings.
      config: a ProjectorConfig for the sample.
  """
    user_sample = tf.gather(user_embedding, user_indices)
    item_sample = tf.gather(item_embedding, item_indices)
    combined_samples = tf.concat([user_sample, item_sample], 0)
    sample = tf.get_variable(constants.PROJECTOR_NAME, combined_samples.shape)
    sample = tf.assign(sample, combined_samples)

    config = projector.ProjectorConfig()
    embedding = config.embeddings.add()
    embedding.tensor_name = constants.PROJECTOR_NAME
    embedding.metadata_path = constants.PROJECTOR_PATH
    return sample, config
def log_tensorboard(embedding_vectors, embedding_name, log_dir, metadata=None):

    if not os.path.isdir(log_dir):
        os.mkdir(log_dir)

    tensor_embedding = tf.Variable(embedding_vectors, name=embedding_name)

    config = projector.ProjectorConfig()
    embedding = config.embeddings.add()
    embedding.tensor_name = tensor_embedding.name

    # set metadata
    if metadata is not None:
        with open(os.path.join(log_dir, metadata_file), 'w') as f:
            for label in metadata:
                f.write(f'{label}\n')

        embedding.metadata_path = metadata_file

    # Specify sprite for image embeddings
    # embedding.sprite.image_path = path_for sprites image
    # embedding.sprite.single_image_dim.extend([28, 28]) sprite thumbnail size

    projector.visualize_embeddings(log_dir, config)
    saver = tf.compat.v1.train.Saver([tensor_embedding
                                      ])  # Must pass list or dict
    saver.save(sess=None, global_step=0, save_path=log_dir)
Exemple #9
0
def generate_embedding_op(vec, N, spritesize=50):
    """
    Add a Variable to the graph to store embeddings for some of your data points.
    
    :vec: tensor in your graph that outputs embedded vectors
    :N: number of data points you're storing
    :spritesize: number of pixels for your sprites
    
    Returns
    :store_embeddings: graph op to update your embedding vector
    :config: projector config
    """
    embed_dummy = tf.get_variable("dense_embeddings",
                                  shape=[N, vec.get_shape().as_list()[1]],
                                  initializer=tf.initializers.random_uniform())
    store_embeddings = tf.assign(embed_dummy, vec)

    config = projector.ProjectorConfig()
    embedding = config.embeddings.add()
    embedding.tensor_name = embed_dummy.name
    embedding.metadata_path = "metadata.tsv"
    embedding.sprite.image_path = "sprites.png"
    embedding.sprite.single_image_dim.extend([spritesize, spritesize])

    return store_embeddings, config
Exemple #10
0
def register_embedding(feature, img_data, labels, log_dir) -> None:
    #在register_embedding注册中要作2个事情:1. 保存特征变量在ckpt文件,2. 配置projector_config.pbtxt 文件。
    #给配置文件准备个文件夹
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    #准备一个变量去保存embedding数据三元组里的特征
    #名字必须是embedding,不要问我为什么,也不要问我怎么知道的
    embeding_name = "embedding"
    embedding_var = K.variable(feature, name=embeding_name)
    # 保存embedding变量在 “1. ckpt文件”
    # 用checkpoint保存embedding配置信息,指定其embedding是我们创建的embedding变量
    checkpoint = tf.train.Checkpoint(embedding=embedding_var)
    checkpoint.save(os.path.join(log_dir, "em.ckpt"))
    #创建 sprite 图像文件,label的meta文件
    sprite_and_meta_writer(img_data, labels, log_dir)
    # 配置投影 “2.projector_config.pbtxt”
    config = projector.ProjectorConfig()
    # 由于embedding是复合类型,这儿需要调用add()方法实例化一个embedding
    embedding = config.embeddings.add()
    # The name of the tensor will be suffixed by `/.ATTRIBUTES/VARIABLE_VALUE`
    #一定要加`/.ATTRIBUTES/VARIABLE_VALUE`!!!!!
    embedding.tensor_name = embeding_name + "/.ATTRIBUTES/VARIABLE_VALUE"
    embedding.metadata_path = METADATA_FNAME
    embedding.sprite.image_path = SPRITES_FNAME  #'mnistdigits.png'
    embedding.sprite.single_image_dim.extend(
        [img_data.shape[1], img_data.shape[2]])
    projector.visualize_embeddings(log_dir, config)
Exemple #11
0
def save_embeddings_metadata(log_dir,
                             variable_name,
                             vocabulary_file,
                             num_oov_buckets=1):
    """Registers an embedding variable for visualization in TensorBoard.

  This function registers :obj:`variable_name` in the ``projector_config.pbtxt``
  file and generates metadata from :obj:`vocabulary_file` to attach a label
  to each word ID.

  Args:
    log_dir: The active log directory.
    variable_name: The variable name in the checkpoint.
    vocabulary_file: The associated vocabulary file.
    num_oov_buckets: The number of additional unknown tokens.
  """
    # Assume it ends with /.ATTRIBUTES/VALUE
    filename = "%s.txt" % "_".join(variable_name.split("/")[:-2])
    metadata_path = os.path.join(log_dir, filename)

    with tf.io.gfile.GFile(vocabulary_file, mode="rb") as src, \
         tf.io.gfile.GFile(metadata_path, mode="wb") as dst:
        ws_index = 0
        for line in src:
            # The TensorBoard code checks line.trim().length == 0 when loading the
            # metadata file so make sure lines are not dropped.
            if not line.decode("utf-8").replace(u"\uFEFF", u"").strip():
                dst.write(tf.compat.as_bytes("<whitespace%d>\n" % ws_index))
                ws_index += 1
            else:
                dst.write(line)
        if num_oov_buckets == 1:
            dst.write(b"<unk>\n")
        else:
            for i in range(num_oov_buckets):
                dst.write(tf.compat.as_bytes("<unk%d>\n" % i))

    config = projector.ProjectorConfig()

    # If the projector file exists, load it.
    config_path = os.path.join(log_dir, "projector_config.pbtxt")
    if tf.io.gfile.exists(config_path):
        with tf.io.gfile.GFile(config_path, mode="rb") as config_file:
            text_format.Merge(config_file.read(), config)

    # If this embedding is already registered, just update the metadata path.
    exists = False
    for meta in config.embeddings:
        if meta.tensor_name == variable_name:
            meta.metadata_path = filename
            exists = True
            break

    if not exists:
        embedding = config.embeddings.add()
        embedding.tensor_name = variable_name
        embedding.metadata_path = filename

    with tf.io.gfile.GFile(config_path, "w") as config_file:
        config_file.write(text_format.MessageToString(config))
Exemple #12
0
def tb_projector(X_test, y_test, log_dir):
    """
    TB的映射器
    :param X_test: 数据
    :param y_test: 标签, 数值型
    :param log_dir: 文件夹
    :return: 写入日志
    """
    print "展示数据: %s" % str(X_test.shape)
    print "展示标签: %s" % str(y_test.shape)
    print "日志目录: %s" % str(log_dir)

    metadata = os.path.join(log_dir, 'metadata.tsv')

    images = tf.Variable(X_test)

    # 把标签写入metadata
    with open(metadata, 'w') as metadata_file:
        for row in y_test:
            metadata_file.write('%d\n' % row)

    with tf.Session() as sess:
        saver = tf.train.Saver([images])  # 把数据存储为矩阵

        sess.run(images.initializer)  # 图像初始化
        saver.save(sess, os.path.join(log_dir, 'images.ckpt'))  # 图像存储于images.ckpt

        config = projector.ProjectorConfig()  # 配置
        # One can add multiple embeddings.
        embedding = config.embeddings.add()  # 嵌入向量添加
        embedding.tensor_name = images.name  # Tensor名称
        # Link this tensor to its metadata file (e.g. labels).
        embedding.metadata_path = metadata  # Metadata的路径
        # Saves a config file that TensorBoard will read during startup.
        projector.visualize_embeddings(tf.summary.FileWriter(log_dir), config)  # 可视化嵌入向量
Exemple #13
0
def visualize_in_tensorboard(embedding_model, output_dirname=None, host="127.0.0.1"):
    tf.compat.v1.disable_eager_execution()
    if output_dirname is None:
        output_dirname =  embedding_model.model_name.split('.')[0]

    meta_file = f"{output_dirname}_metadata.tsv"
    output_path = os.path.join(os.getcwd(), output_dirname)
    if not os.path.isdir(output_path):
        os.mkdir(output_path)

    with open(os.path.join(output_path, meta_file), "wb") as file_metadata:
        for word in embedding_model.wv.index2word:
            file_metadata.write(gensim.utils.to_utf8(word) + gensim.utils.to_utf8("\n"))

    embedding = tf.Variable(embedding_model.wv.vectors, trainable = False, name = f"{output_dirname}_tensor")
    init_op = tf.compat.v1.global_variables_initializer()

    saver = tf.compat.v1.train.Saver()
    with tf.compat.v1.Session() as sess:
        sess.run(init_op)
        writer = tf.compat.v1.summary.FileWriter(output_path, sess.graph)

    # adding into projector
        config = projector.ProjectorConfig()
        embed = config.embeddings.add()
        embed.tensor_name = f"{output_dirname}_tensor"
        embed.metadata_path = meta_file

    # Specify the width and height of a single thumbnail.
        projector.visualize_embeddings(writer, config)
        saver.save(sess, os.path.join(output_path,f"{output_dirname}_metadata.ckpt"))

    print(f"Run `tensorboard --logdir={output_path} --host {host}` to run visualize result on tensorboard")
Exemple #14
0
def visualize_embeddings(lexicon, embed_lookup):
    embeds = []
    labels = []
    for i, label in enumerate(lexicon):
        labels.append(label)
        embeds.append(embed_lookup[i])
        if i > 5000:
            break

    if not os.path.exists(os.path.join('log', 'projector')):
        os.makedirs(os.path.join('log', 'projector'))

    embeddings = tf.Variable(np.array(embeds), name='embeddings')
    meta_path = os.path.join('log', 'projector', 'metadata.tsv')
    embeddings_path = os.path.join('log', 'projector', 'embeddings.ckpt')

    with open(meta_path, 'w') as f:
        for label in labels:
            f.write('%s\n' % label)

    with tf.Session() as sess:
        saver = tf.train.Saver([embeddings])
        sess.run(embeddings.initializer)
        saver.save(sess, embeddings_path)

        writer = tf.summary.FileWriter(os.path.join('log', 'projector'))
        config = projector.ProjectorConfig()

        embed = config.embeddings.add()
        embed.tensor_name = embeddings.name
        embed.metadata_path = 'metadata.tsv'
        projector.visualize_embeddings(writer, config)

    print('embeddings visualised in tensorboard')
Exemple #15
0
def register_embeddings(embeddings_name=EMBEDDINGS_TENSOR_NAME,
                        meta_data_fname=META_DATA_FNAME,
                        log_dir=LOG_DIR):
    config = projector.ProjectorConfig()
    embedding = config.embeddings.add()
    embedding.tensor_name = embeddings_name
    embedding.metadata_path = meta_data_fname
    projector.visualize_embeddings(log_dir, config)
def save_embeddings(images_features_labels, save_dir):
  '''
  Function to save embeddings (with corresponding labels and images) to a
      specified directory. Point tensorboard to that directory with
      tensorboard --logdir=<save_dir> and your embeddings will be viewable.
  Arguments:
  images_features_labels : dict
      each key in the dict should be the desired name for that embedding, and
      each element should be a list of [images, embeddings, labels] where
      images are a numpy array of images between 0. and 1. of shape [N*W*H*D]
      or [N*H*W] if grayscale (or None if no images), embeddings is a numpy
      array of shape [N*D], and labels is a numpy array of something that can
      be converted to string of shape D (or None if no labels available)
  save_dir : str
      path to save tensorboard checkpoints
  '''
  assert len(list(images_features_labels.keys())), 'Nothing in dictionary!'

  # Make directory if necessary
  if not os.path.exists(save_dir):
    os.makedirs(save_dir)

  # Reset graph and initialise file writer and session
  tf.reset_default_graph()
  writer = tf.summary.FileWriter(os.path.join(save_dir), graph=None)
  sess = tf.Session()
  config = projector.ProjectorConfig()

  # For each embedding name in the provided dictionary of embeddings
  for name in list(images_features_labels.keys()):

    [ims, fts, labs] = images_features_labels[name]

    # Save sprites and metadata
    if labs is not None:
      metadata_path = os.path.join(save_dir, name + '-metadata.tsv')
      save_metadata(labs, metadata_path)
    if ims is not None:
      sprites_path = os.path.join(save_dir, name + '.png')
      save_sprite_image(ims, path=sprites_path, invert=len(ims.shape) < 4)

    # Make a variable with the embeddings we want to visualise
    embedding_var = tf.Variable(fts, name=name, trainable=False)

    # Add this to our config with the image and metadata properties
    embedding = config.embeddings.add()
    embedding.tensor_name = embedding_var.name
    if labs is not None:
      embedding.metadata_path = name + '-metadata.tsv'
    if ims is not None:
      embedding.sprite.image_path = name + '.png'
      embedding.sprite.single_image_dim.extend(ims[0].shape)

    # Save the embeddings
    projector.visualize_embeddings(writer, config)
  saver = tf.train.Saver(max_to_keep=1)
  sess.run(tf.global_variables_initializer())
  saver.save(sess, os.path.join(save_dir, 'ckpt'))
Exemple #17
0
def export_projector_data(embeddings, meta_path, logs_path):
    embeddings_var = tf.Variable(embeddings, name='embeddings')
    checkpoint = tf.train.Checkpoint(embedding=embeddings_var)
    checkpoint.save(os.path.join(logs_path, 'embeddings.ckpt'))
    config = projector.ProjectorConfig()
    embedding = config.embeddings.add()
    embedding.tensor_name = 'embeddings'
    embedding.metadata_path = meta_path
    projector.visualize_embeddings(logs_path, config)
Exemple #18
0
 def _visualize(embedding, vocab_file, num_oov_buckets=1):
     text_inputter.save_embeddings_metadata(
         log_dir, embedding, vocab_file, num_oov_buckets=num_oov_buckets
     )
     projector_config = projector.ProjectorConfig()
     projector_config_path = os.path.join(log_dir, "projector_config.pbtxt")
     self.assertTrue(os.path.exists(projector_config_path))
     with open(projector_config_path) as projector_config_file:
         text_format.Merge(projector_config_file.read(), projector_config)
     return projector_config
Exemple #19
0
  def testVisualizeEmbeddings(self):
    # Create a dummy configuration.
    config = projector.ProjectorConfig()
    config.model_checkpoint_path = 'test'
    emb1 = config.embeddings.add()
    emb1.tensor_name = 'tensor1'
    emb1.metadata_path = 'metadata1'

    # Call the API method to save the configuration to a temporary dir.
    temp_dir = self.get_temp_dir()
    self.addCleanup(shutil.rmtree, temp_dir)
    writer = tf.summary.FileWriter(temp_dir)
    projector.visualize_embeddings(writer, config)

    # Read the configurations from disk and make sure it matches the original.
    with tf.gfile.GFile(os.path.join(temp_dir, 'projector_config.pbtxt')) as f:
      config2 = projector.ProjectorConfig()
      text_format.Parse(f.read(), config2)
      self.assertEqual(config, config2)
Exemple #20
0
def create_dummy_config():
    return projector.ProjectorConfig(
        model_checkpoint_path="test",
        embeddings=[
            projector.EmbeddingInfo(
                tensor_name="tensor1",
                metadata_path="metadata1",
            ),
        ],
    )
  def test_visualize_embeddings_with_logdir(self):
    logdir = self.get_temp_dir()
    config = create_dummy_config()
    projector.visualize_embeddings(logdir, config)

    # Read the configurations from disk and make sure it matches the original.
    with tf.io.gfile.GFile(os.path.join(logdir, 'projector_config.pbtxt')) as f:
      config2 = projector.ProjectorConfig()
      text_format.Parse(f.read(), config2)

    self.assertEqual(config, config2)
Exemple #22
0
def main():

    print(pg_model)
    bg = bottle_neck_graph(pg_model, None)

    # for all documents compute doc id
    keys, sample = load_data(raw_data)

    query_sample = "Having issues with SDB start blt --project --sdb-go"
    query_sample = query_sample.split()
    sample.append(query_sample)

    total_documents = len(sample)

    # write words and ids to metadata for tensorboard
    with open(metadata_path, 'w') as f:
        for key in keys:
            f.write("doc_id_" + str(key) + '\n')
        f.write("doc_id_query" + '\n')

    # collect all document vectors
    embeds = list()
    for id, sam in enumerate(sample):
        doc_vec = get_document_vector(bg, sam)
        embeds.append(doc_vec)
    embeds = np.asarray(embeds)
    embeds = np.reshape(embeds, [total_documents, embedding_size])
    print(embeds.shape)
    # save it to a json file
    # {doc_id: doc_vec, .. }

    # setup tensorbord for visualizing
    graph = tf.Graph()
    with tf.Session(graph=graph) as sess:

        input = tf.placeholder(tf.int32, shape=[None], name="input")
        with tf.name_scope('embeddings'):
            embeddings = tf.Variable(initial_value=embeds,
                                     name="doc_embeddings")
            #embed = tf.nn.embedding_lookup(embeddings, input, name="embed_look_up")

        writer = tf.summary.FileWriter(log_path, sess.graph)
        tf.global_variables_initializer().run()

        saver = tf.train.Saver()
        saver.save(sess, ckptdata_path)

        print("setting up tensorboard projector...")
        config = projector.ProjectorConfig()
        embedding_config = config.embeddings.add()
        embedding_config.tensor_name = embeddings.name
        embedding_config.metadata_path = metadata_path
        projector.visualize_embeddings(tf.summary.FileWriter(model_path),
                                       config)
    def output(self):
        x_test_embeddings = self.embedding_model.predict(self.x_test)
        test_emb_tensor = tf.Variable(x_test_embeddings)
        checkpoint = tf.train.Checkpoint(embedding=test_emb_tensor)
        checkpoint.save(os.path.join(self.log_dir, "embedding.ckpt"))

        # Set up config
        config = projector.ProjectorConfig()
        embedding = config.embeddings.add()
        # The name of the tensor will be suffixed by `/.ATTRIBUTES/VARIABLE_VALUE`
        embedding.tensor_name = "embedding/.ATTRIBUTES/VARIABLE_VALUE"
        embedding.metadata_path = self.metadata
        projector.visualize_embeddings(self.log_dir, config)
Exemple #24
0
    def register_embedding(self):
        config = projector.ProjectorConfig()
        embedding = config.embeddings.add()
        embedding.tensor_name = EMBEDDINGS
        embedding.metadata_path = METAFILE

        # this adds the sprite images
        if self.show_images:
            embedding.sprite.image_path = SPRITESFILE
            embedding.sprite.single_image_dim.extend(
                (self.image_size, self.image_size))

        projector.visualize_embeddings(self.log_dir, config)
Exemple #25
0
 def _add_emb_vis(self, embedding_var):
     """Do setup so that we can view word embedding visualization in Tensorboard, as described here:
     https://www.tensorflow.org/get_started/embedding_viz
     Make the vocab metadata file, then make the projector config file pointing to it."""
     train_dir = os.path.join(FLAGS.log_root, "train")
     vocab_metadata_path = os.path.join(train_dir, "vocab_metadata.tsv")
     self._vocab.write_metadata(vocab_metadata_path)  # write metadata file
     summary_writer = tf.summary.FileWriter(train_dir)
     config = projector.ProjectorConfig()
     embedding = config.embeddings.add()
     embedding.tensor_name = embedding_var.name
     embedding.metadata_path = vocab_metadata_path
     projector.visualize_embeddings(summary_writer, config)
Exemple #26
0
def visualize_data(data: CSVData,
                   ids: List[str],
                   name: str,
                   log_directory: str = LOG_DIRECTORY):
    """Visualize the given data by creating files for the Tensorboard projector.

    Creates `.tsv` and `.pbtxt` files in the log directory. To use it in Tensorboard,
    run "tensorboard --logdir <log directory path>" and choose PROJECTOR from the dropdown list."

    Parameters
    ----------
    data (CSVData): np array containing the data to be visualized

    ids (List[str]): ID for each vector contained in the data array
        (i.e. `len(ids) == `data.shape[0]`)

    name (str): Name for the dataset contained in the data array ()

    log_directory (str, optional): Directory name to store the created Tensorboard logfiles in.
        Defaults to LOG_DIRECTORY
    """
    Path(log_directory).mkdir(parents=True, exist_ok=True)

    if " " in name:
        print("Visualization error: Please specify a name without whitespaces")
        return

    data_file_name = f"{name}.tsv"
    metadata_file_name = f"{name}_metadata.tsv"

    np.savetxt(path.join(log_directory, data_file_name),
               data,
               delimiter="\t",
               fmt="%f")

    with open(path.join(log_directory, metadata_file_name),
              "w") as metadata_writer:
        for data_id in ids:
            metadata_writer.write(f"{data_id}\n")

    config = projector.ProjectorConfig()
    embedding = cast(Any, config).embeddings.add()
    embedding.tensor_path = data_file_name
    embedding.metadata_path = metadata_file_name
    embedding.tensor_name = name

    projector.visualize_embeddings(log_directory, config)
    print(
        f'Run "tensorboard --logdir {log_directory}" and choose PROJECTOR',
        "to see the data visualization\n",
    )
Exemple #27
0
def embedding_projector_files(source_tokenizer,
                              target_tokenizer,
                              model,
                              sentence_pair,
                              log_dir,
                              agg='mean'):
    #words = []
    source_sentence_vector = []
    target_sentence_vector = []
    souce_sentences = []
    target_sentences = []
    # Remove start and end token embedding
    target_embedding_layer = model.layers[1].get_weights()[0][1:-1, :]
    source_embedding_layer = model.layers[0].get_weights()[0][1:-1, :]
    with open(os.path.join(log_dir, 'metadata_source.tsv'),
              "w",
              encoding='utf-8') as out_meta_source:
        with open(os.path.join(log_dir, 'metadata_target.tsv'),
                  "w",
                  encoding='utf-8') as out_meta_target:
            out_meta_source.write('source' + "\t" + 'target' + "\n")
            out_meta_target.write('source' + "\t" + 'target' + "\n")
            # Remove tabs, newlines and spaces from the paragraph
            for source, target in sentence_pair:
                source_embedding_vector = tokenize_and_aggregate(
                    source, source_tokenizer, agg, source_embedding_layer)
                target_embedding_vector = tokenize_and_aggregate(
                    target, target_tokenizer, agg, target_embedding_layer)
                #test the above
                out_meta_source.write(source + "\t" + target + "\n")
                out_meta_target.write(source + "\t" + target + "\n")
                souce_sentences.append(source)
                target_sentences.append(target)
                source_sentence_vector.append(source_embedding_vector)
                target_sentence_vector.append(target_embedding_vector)
            source_sentence_vector = display_embedding_shape(
                source_sentence_vector, souce_sentences, 'source')
            target_sentence_vector = display_embedding_shape(
                target_sentence_vector, target_sentences, 'target')
            checkpoint = tf.train.Checkpoint(
                source_embedding=tf.Variable(source_sentence_vector),
                target_embedding=tf.Variable(target_sentence_vector))
            config = projector.ProjectorConfig()
            config = save_checkpoint_create_config(checkpoint, config,
                                                   'source', log_dir)
            config = save_checkpoint_create_config(checkpoint, config,
                                                   'target', log_dir)
            projector.visualize_embeddings(log_dir, config)

    return (souce_sentences, target_sentences, source_sentence_vector,
            target_sentence_vector)
Exemple #28
0
def projector_add(embedding, writer, SPRITES=None, LABELS=None, IMG_DIM=None):
    # config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig()
    config = projector.ProjectorConfig()
    embedding_config = config.embeddings.add()
    embedding_config.tensor_name = embedding.name
    if SPRITES:
        embedding_config.sprite.image_path = SPRITES
    if LABELS:
        embedding_config.metadata_path = LABELS
    if IMG_DIM:
        # Specify the width and height of a single thumbnail.
        embedding_config.sprite.single_image_dim.extend(IMG_DIM)  # like [28, 28]
    #tf.contrib.tensorboard.plugins.projector.visualize_embeddings(writer, config)
    projector.visualize_embeddings(writer, config)
def embedding_tensorboard(model, lookup_dict, embedd):
    # Set up a logs directory, so Tensorboard knows where to look for files
    log_dir = 'logs/%s' % embedd
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)

    if embedd == "type":
        column_string = "combined_type"
    else:
        column_string = "Gen"
    layer_type = model.get_layer('embedding-%s' % embedd)
    output_embeddings_type = layer_type.get_weights()
    output_embeddings_type_df = pd.DataFrame(output_embeddings_type[0])
    output_embeddings_type_df = output_embeddings_type_df.reset_index()
    output_embeddings_type_df.columns = [
        column_string, 'embedding_1', 'embedding_2', 'embedding_3'
    ]
    m = output_embeddings_type_df.iloc[:, 1:].values
    labels = output_embeddings_type_df.iloc[:, 0:1].values
    if embedd == "type":

        def get_key(val):
            for key, value in lookup_dict.items():
                if val == value:
                    return key

        labels = [get_key(label) for label in labels]
    else:
        labels += 1
        labels = [y for x in labels for y in x]

    # Save Labels separately on a line-by-line manner.
    with open(os.path.join(log_dir, 'metadata.tsv'), "w") as f:
        for label in labels:
            f.write("{}\n".format(label))

    weights = tf.Variable(
        model.get_layer("embedding-%s" % embedd).get_weights()[0][0:])
    # Create a checkpoint from embedding, the filename and key are
    # name of the tensor.
    checkpoint = tf.train.Checkpoint(embedding=weights)
    checkpoint.save(os.path.join(log_dir, "embedding.ckpt"))

    # Set up config
    config = projector.ProjectorConfig()
    embedding = config.embeddings.add()
    # The name of the tensor will be suffixed by `/.ATTRIBUTES/VARIABLE_VALUE`
    embedding.tensor_name = "embedding/.ATTRIBUTES/VARIABLE_VALUE"
    embedding.metadata_path = 'metadata.tsv'
    projector.visualize_embeddings(log_dir, config)
Exemple #30
0
def _create_projector_config(
    path_metadata: str, path_sprite: Optional[str]
) -> projector.ProjectorConfig:
    """Creates a projector-config as needed to show the embedding in Tensorboard."""
    config = projector.ProjectorConfig()
    embedding = config.embeddings.add()
    embedding.tensor_name = "embedding/.ATTRIBUTES/VARIABLE_VALUE"
    embedding.metadata_path = path_metadata

    if path_sprite is not None:
        embedding.sprite.image_path = path_sprite
        embedding.sprite.single_image_dim.extend(IMAGE_SIZE_IN_SPRITE)

    return config