def extract_features(path, model_type): if model_type == 'inceptionv3': from keras.applications.inception_v3 import preprocess_input target_size = (299, 299) elif model_type == 'vgg16': from keras.applications.vgg16 import preprocess_input target_size = (224, 224) # Get CNN Model from model.py model = CNNModel(model_type) features = dict() # Extract features from each photo for name in tqdm(os.listdir(path)): # Loading and resizing image filename = path + name image = load_img(filename, target_size=target_size) # Convert the image pixels to a numpy array image = img_to_array(image) # Reshape data for the model image = image.reshape( (1, image.shape[0], image.shape[1], image.shape[2])) # Prepare the image for the CNN Model model image = preprocess_input(image) # Pass image into model to get encoded features feature = model.predict(image, verbose=0) # Store encoded features for the image image_id = name.split('.')[0] features[image_id] = feature return features
def __init__(self): tokenizer_path = get_absolute_path(config['tokenizer_path']) with open(tokenizer_path, 'rb') as f: self._tokenizer = load(f) self._max_length = config['max_length'] self._caption_model = load_model( get_absolute_path(config['model_load_path'])) self._image_model = CNNModel(config['model_type'])
def run_captioning(image_file): # import datetime # today = datetime.date.today() # image_file = '/home/lab02/imgdesc/django/media/' + '{:/%Y%m/%d/}'.format(today) + image_file_name # Load the tokenizer tokenizer_path = config['tokenizer_path'] tokenizer = load(open(tokenizer_path, 'rb')) # Max sequence length (from training) max_length = config['max_length'] # Load the model caption_model = load_model(config['model_load_path']) image_model = CNNModel(config['model_type']) # Load and prepare the image # if (image_file.split('.')[1] == 'jpg' or image_file.split('.')[1] == 'jpeg'): try: # Encode image using CNN Model image = extract_features(image_file, image_model, config['model_type']) except: return (config['errmsg_imgopen'] + '.') try: if len(image) == 0: return (config['errmsg_imgopen'] + '') # Generate caption using Decoder RNN Model + BEAM search generated_caption = generate_caption_beam_search( caption_model, tokenizer, image, max_length, beam_index=config['beam_search_k']) except: return (config['errmsg_imgopen'] + '..') try: # Remove startseq and endseq desc_en = generated_caption.split()[1].capitalize() for x in generated_caption.split()[2:len(generated_caption.split()) - 1]: desc_en = desc_en + ' ' + x desc_en += '.' except: return (config['errmsg_imgopen'] + '...') try: # show captions print('BEAM Search with k=', config['beam_search_k']) print(desc_en) return (desc_en) # print(desc_ko,'\n\n') except: return (config['errmsg_imgopen'] + '....')
image = preprocess_input(image) # Pass image into model to get encoded features features = model.predict(image, verbose=0) return features # Load the tokenizer tokenizer_path = config['tokenizer_path'] tokenizer = load(open(tokenizer_path, 'rb')) # Max sequence length (from training) max_length = config['max_length'] # Load the model caption_model = load_model(config['model_load_path']) image_model = CNNModel(config['model_type']) # Load and prepare the image for image_file in os.listdir(config['test_data_path']): if(image_file.split('--')[0]=='output'): continue if(image_file.split('.')[1]=='jpg' or image_file.split('.')[1]=='jpeg'): print('Generating caption for {}'.format(image_file)) # Encode image using CNN Model image = extract_features(config['test_data_path']+image_file, image_model, config['model_type']) # Generate caption using Decoder RNN Model + BEAM search generated_caption = generate_caption_beam_search(caption_model, tokenizer, image, max_length, beam_index=config['beam_search_k']) # Remove startseq and endseq caption = 'Caption: ' + generated_caption.split()[1].capitalize() for x in generated_caption.split()[2:len(generated_caption.split())-1]: