Exemple #1
0
def main(name="spaCy_vectors"):
    vectors_loc = 'vectors_spacy'
    meta_file = "{}.tsv".format(name)
    out_loc = os.getcwd()+'\\tensorboard_out\\'
    pathlib.Path(out_loc).mkdir(parents=True, exist_ok=True)
    out_meta_file = path.join(out_loc, meta_file)

    print('Loading spaCy vectors model: {}'.format(vectors_loc))
    model = spacy.load(vectors_loc)
    print('Finding lexemes with vectors attached: {}'.format(vectors_loc))
    strings_stream = tqdm.tqdm(model.vocab.strings, total=len(model.vocab.strings), leave=False)
    queries = [w for w in strings_stream if model.vocab.has_vector(w)]
    vector_count = len(queries)

    print('Building Tensorboard Projector metadata for ({}) vectors: {}'.format(vector_count, out_meta_file))

    # Store vector data in a tensorflow variable
    tf_vectors_variable = numpy.zeros((vector_count, model.vocab.vectors.shape[1]))

    # Write a tab-separated file that contains information about the vectors for visualization
    #
    # Reference: https://www.tensorflow.org/programmers_guide/embedding#metadata
    with open(out_meta_file, 'wb') as file_metadata:
        # Define columns in the first row
        file_metadata.write("Text\tFrequency\n".encode('utf-8'))
        # Write out a row for each vector that we add to the tensorflow variable we created
        vec_index = 0
        for text in tqdm.tqdm(queries, total=len(queries), leave=False):
            # https://github.com/tensorflow/tensorflow/issues/9094
            text = '<Space>' if text.lstrip() == '' else text
            lex = model.vocab[text]

            # Store vector data and metadata
            tf_vectors_variable[vec_index] = model.vocab.get_vector(text)
            file_metadata.write("{}\t{}\n".format(text, math.exp(lex.prob) * vector_count).encode('utf-8'))
            vec_index += 1

    print('Running Tensorflow Session...')
    sess = tf.InteractiveSession()
    tf.Variable(tf_vectors_variable, trainable=False, name=name)
    tf.global_variables_initializer().run()
    saver = tf.train.Saver()
    writer = tf.summary.FileWriter(out_loc, sess.graph)

    # Link the embeddings into the config
    config = ProjectorConfig()
    embed = config.embeddings.add()
    embed.tensor_name = name
    embed.metadata_path = meta_file

    # Tell the projector about the configured embeddings and metadata file
    visualize_embeddings(writer, config)

    # Save session and print run command to the output
    print('Saving Tensorboard Session...')
    saver.save(sess, path.join(out_loc, '{}.ckpt'.format(name)))
    print('Done. Run `tensorboard --logdir={0}` to view in Tensorboard'.format(out_loc))
Exemple #2
0
def visualize(model, output_path):
    meta_file = "w2x_metadata.tsv"
    placeholder = np.zeros((len(model.wv.index2word), 100))

    with open(os.path.join(output_path, meta_file), 'wb') as file_metadata:
        for i, word in enumerate(model.wv.index2word):
            placeholder[i] = model[word]
            # temporary solution for https://github.com/tensorflow/tensorflow/issues/9094
            if word == '':
                print(
                    "Emply Line, should replecaed by any thing else, or will cause a bug of tensorboard"
                )
                file_metadata.write(
                    "{0}".format('<Empty Line>').encode('utf-8') + b'\n')
            else:
                file_metadata.write("{0}".format(word).encode('utf-8') + b'\n')

    config = tf.ConfigProto(device_count={'GPU': 0})

    # define the model without training
    sess = tf.InteractiveSession(config=config)

    embedding = tf.Variable(placeholder, trainable=False, name='w2x_metadata')
    tf.global_variables_initializer().run()

    saver = tf.train.Saver()
    writer = tf.summary.FileWriter(output_path, sess.graph)

    # adding into projector
    config = ProjectorConfig()
    embed = config.embeddings.add()
    embed.tensor_name = 'w2x_metadata'
    embed.metadata_path = meta_file

    # Specify the width and height of a single thumbnail.
    visualize_embeddings(writer, config)
    saver.save(sess, os.path.join(output_path, 'w2x_metadata.ckpt'))
    print(
        'Run `tensorboard --logdir={0}` to run visualize result on tensorboard'
        .format(output_path))
Exemple #3
0
def main(vectors_loc="/Users/edugallardopardo/OneDrive/Documentos/TFM/python/SPACY/modelos/EMT_models/model5", 
         out_loc="/Users/edugallardopardo/OneDrive/Documentos/TFM/python/SPACY/modelos/EMT_models/model5/TensorFlow model5 2", 
         name="spaCy_vectors"):

    meta_file = "{}.tsv".format(name)

    out_meta_file = path.join(out_loc, meta_file)



    print("Loading spaCy vectors model: {}".format(vectors_loc))

    model = spacy.load(vectors_loc)

    print("Finding lexemes with vectors attached: {}".format(vectors_loc))

    """
    strings_stream = tqdm.tqdm(

        model.vocab.strings, total=len(model.vocab.strings), leave=False

    )
    """
    strings_stream=[] 
    csvin = open('palabras_count.csv', 'r', encoding='utf-8')
    for cnt, line in enumerate(csvin):
        if cnt > 1:
          linea = line.split(',')
          if int(linea[2])>= 3:
            strings_stream.append(linea[0])
                 

    print(strings_stream)
    queries = [w for w in strings_stream if model.vocab.has_vector(w)]

    vector_count = len(queries)



    print(

        "Building Tensorboard Projector metadata for ({}) vectors: {}".format(

            vector_count, out_meta_file

        )

    )

 

    # Store vector data in a tensorflow variable

    tf_vectors_variable = numpy.zeros((vector_count, model.vocab.vectors.shape[1]))



    # Write a tab-separated file that contains information about the vectors for visualization

    #

    # Reference: https://www.tensorflow.org/programmers_guide/embedding#metadata

    with open(out_meta_file, "wb") as file_metadata:

        # Define columns in the first row

        file_metadata.write("Text\tFrequency\n".encode("utf-8"))

        # Write out a row for each vector that we add to the tensorflow variable we created

        vec_index = 0

        for text in tqdm.tqdm(queries, total=len(queries), leave=False):

            # https://github.com/tensorflow/tensorflow/issues/9094

            text = "<Space>" if text.lstrip() == "" else text

            lex = model.vocab[text]



            # Store vector data and metadata

            tf_vectors_variable[vec_index] = model.vocab.get_vector(text)

            file_metadata.write(

                "{}\t{}\n".format(text, math.exp(lex.prob) * vector_count).encode(

                    "utf-8"

                )

            )

            vec_index += 1



    print("Running Tensorflow Session...")

    sess = tf.InteractiveSession()

    tf.Variable(tf_vectors_variable, trainable=False, name=name)

    tf.global_variables_initializer().run()

    saver = tf.train.Saver()

    writer = tf.summary.FileWriter(out_loc, sess.graph)



    # Link the embeddings into the config

    config = ProjectorConfig()

    embed = config.embeddings.add()

    embed.tensor_name = name

    embed.metadata_path = meta_file



    # Tell the projector about the configured embeddings and metadata file

    visualize_embeddings(writer, config)



    # Save session and print run command to the output

    print("Saving Tensorboard Session...")

    saver.save(sess, path.join(out_loc, "{}.ckpt".format(name)))

    print("Done. Run `tensorboard --logdir={0}` to view in Tensorboard".format(out_loc))
def main(args):
    df = pd.read_csv(args.in_csv)
    os.makedirs(args.out_dir, exist_ok=True)

    meta_file = (f"{args.out_prefix}_meta.tsv"
                 if args.out_prefix is not None else "meta.tsv")
    out_meta_file = path.join(args.out_dir, meta_file)
    args.meta_cols = (None
                      if args.meta_cols is None else args.meta_cols.split(","))
    df_meta = (df if args.meta_cols is None else df[args.meta_cols])

    df_meta.to_csv(out_meta_file,
                   sep="\t",
                   index=False,
                   header=len(df_meta.columns) > 1)

    features = np.load(args.in_npy, mmap_mode="r")

    if args.n_rows is not None:
        rows_ids = np.random.choice(np.arange(0, len(features)),
                                    size=args.n_rows)
        features = features[rows_ids, :]
        df = df.iloc[rows_ids]

    if args.img_col is not None:
        img_data = np.concatenate(list(
            map(
                lambda x: np.expand_dims(
                    cv2.resize(cv2.imread(path.join(args.img_datapath, x)),
                               (args.img_size, args.img_size),
                               interpolation=cv2.INTER_NEAREST), 0),
                df[args.img_col].values)),
                                  axis=0)
        img_data = np.array(img_data).reshape(-1, args.img_size, args.img_size,
                                              3).astype(np.float32)
        sprite = images_to_sprite(img_data)
        cv2.imwrite(path.join(args.out_dir, "sprite.png"), sprite)

    print(
        f"Building Tensorboard Projector metadata for ({len(features)}) vectors: {out_meta_file}"
    )

    print("Running Tensorflow Session...")
    sess = tf.InteractiveSession()
    name = args.out_prefix or "tensors"
    tf.Variable(features, trainable=False, name=name)
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    writer = tf.summary.FileWriter(args.out_dir, sess.graph)

    # Link the embeddings into the config
    config = ProjectorConfig()
    embed = config.embeddings.add()
    embed.tensor_name = name
    embed.metadata_path = meta_file

    if args.img_col is not None:
        # embed.sprite.image_path = path.join(args.out_dir, "sprite.png")
        embed.sprite.image_path = "sprite.png"
        embed.sprite.single_image_dim.extend(
            [img_data.shape[1], img_data.shape[1]])

    # Tell the projector about the configured embeddings and metadata file
    visualize_embeddings(writer, config)

    # Save session and print run command to the output
    print("Saving Tensorboard Session...")
    saver.save(sess, path.join(args.out_dir, f"{name}.ckpt"))
    print(
        f"Done. Run `tensorboard --logdir={args.out_dir}` to view in Tensorboard"
    )
Exemple #5
0
        # Define columns in the first row
        # file_metadata.write("Text\n".encode('utf-8'))
        # Write out a row for each vector that we add to the tensorflow variable we created
        for word in vocab:
            # https://github.com/tensorflow/tensorflow/issues/9094

            # Store vector data and metadata
            file_metadata.write("{}\n".format(word).encode('utf-8'))

    print('Running Tensorflow Session...')
    sess = tf.InteractiveSession()
    tf.Variable(matrix, trainable=False, name=name)
    tf.global_variables_initializer().run()
    saver = tf.train.Saver()
    writer = tf.summary.FileWriter(out_loc, sess.graph)

    # Link the embeddings into the config
    config = ProjectorConfig()
    embed = config.embeddings.add()
    embed.tensor_name = name
    embed.metadata_path = meta_file

    # Tell the projector about the configured embeddings and metadata file
    visualize_embeddings(writer, config)

    # Save session and print run command to the output
    print('Saving Tensorboard Session...')
    saver.save(sess, path.join(out_loc, '{}.ckpt'.format(name)))
    print('Done. Run `tensorboard --logdir={0}` to view in Tensorboard'.format(
        out_loc))