BUFFER_SIZE = cp["Captioning_Model"].getint("buffer_size") embedding_dim = cp["Captioning_Model"].getint("embedding_dim") units = cp["Captioning_Model"].getint("units") checkpoint_path = cp["Captioning_Model_Train"].get("ckpt_path") continue_from_last_ckpt = cp["Captioning_Model_Train"].getboolean( "continue_from_last_ckpt") # compute steps steps = int(training_counts / batch_size) print(f"** train_steps: {steps} **") print("** load training generator **") tokenizer_wrapper = TokenizerWrapper(os.path.join(data_dir, all_data_csv), class_names[0], max_sequence_length, tokenizer_vocab_size) data_generator = AugmentedImageSequence( dataset_csv_file=os.path.join(data_dir, training_csv), class_names=class_names, tokenizer_wrapper=tokenizer_wrapper, source_image_dir=image_source_dir, batch_size=batch_size, target_size=(image_dimension, image_dimension), augmenter=augmenter, steps=steps, shuffle_on_epoch_end=True, ) medical_w2v = Medical_W2V_Wrapper()
from configs import argHandler from caption_evaluation import get_evalutation_scores from tokenizer_wrapper import TokenizerWrapper import re import collections from copy import deepcopy FLAGS = argHandler() FLAGS.setDefaults() df = pd.read_csv('predictions.csv') labels = df['real'] preds = df['prediction'] tokenizer_wrapper = TokenizerWrapper(FLAGS.all_data_csv, FLAGS.csv_label_columns[0], FLAGS.max_sequence_length, FLAGS.tokenizer_vocab_size) def tokenize(string): """Convert string to lowercase and split into words (ignoring punctuation), returning list of words. """ return re.findall(r'\w+', string.lower()) def count_ngrams(lines, min_length=3, max_length=3): """Iterate through given lines iterator (file object or list of lines) and return n-gram frequencies. The return value is a dict mapping the length of the n-gram to a collections.Counter object of n-gram tuple and number of times that n-gram occurred.
import os import json from augmenter import augmenter from gpt2.gpt2_model import TFGPT2LMHeadModel from test import evaluate_enqueuer import pandas as pd from glob import glob import shutil # tf.keras.mixed_precision.experimental.set_policy('mixed_float16') FLAGS = argHandler() FLAGS.setDefaults() tf.keras.backend.set_learning_phase(1) tokenizer_wrapper = TokenizerWrapper(FLAGS.all_data_csv, FLAGS.csv_label_columns[0], FLAGS.max_sequence_length, FLAGS.tokenizer_vocab_size) train_enqueuer, train_steps = get_enqueuer(FLAGS.train_csv, FLAGS.batch_size, FLAGS, tokenizer_wrapper) test_enqueuer, test_steps = get_enqueuer(FLAGS.test_csv, 1, FLAGS, tokenizer_wrapper) batch_test_enqueuer, batch_test_steps = get_enqueuer(FLAGS.test_csv, FLAGS.batch_size, FLAGS, tokenizer_wrapper) train_enqueuer.start(workers=FLAGS.generator_workers, max_queue_size=FLAGS.generator_queue_length) medical_w2v = Medical_W2V_Wrapper() # medical_w2v.save_embeddings(tokenizer_wrapper.get_word_tokens_list(),FLAGS.tags) # embeddings = medical_w2v.get_embeddings_matrix_for_words(tokenizer_wrapper.get_word_tokens_list(), # FLAGS.tokenizer_vocab_size) tags_embeddings = medical_w2v.get_embeddings_matrix_for_tags(FLAGS.tags) # print(f"Embeddings shape: {embeddings.shape}") print(f"Tags Embeddings shape: {tags_embeddings.shape}")
BUFFER_SIZE = cp["Captioning_Model"].getint("buffer_size") embedding_dim = cp["Captioning_Model"].getint("embedding_dim") units = cp["Captioning_Model"].getint("units") checkpoint_path = cp["Captioning_Model_Train"].get("ckpt_path") output_images_folder = cp["Captioning_Model_Inference"].get( "output_images_folder") # compute steps steps = int(testing_counts / batch_size) print(f"** test: {steps} **") print("** load test generator **") tokenizer_wrapper = TokenizerWrapper(os.path.join(data_dir, all_data_csv), class_names[0], max_sequence_length) data_generator = AugmentedImageSequence( dataset_csv_file=os.path.join(data_dir, testing_csv), class_names=class_names, tokenizer_wrapper=tokenizer_wrapper, source_image_dir=image_source_dir, batch_size=batch_size, target_size=(image_dimension, image_dimension), steps=steps, shuffle_on_epoch_end=False, ) encoder = CNN_Encoder(embedding_dim) decoder = RNN_Decoder(embedding_dim, units, tokenizer_vocab_size) optimizer = tf.keras.optimizers.Adam()