def get_conv_features(image_file, model_type, feature_layer): if model_type == "vgg": cnn_model = vgg16.create_vgg_model(448, only_conv=feature_layer != 'fc7') else: cnn_model = resnet.create_resnet_model(448) sess = cnn_model['session'] images = cnn_model['images_placeholder'] image_feature_layer = cnn_model[feature_layer] img_dim = 448 if model_type == 'resnet': image_array = sess.run(cnn_model['processed_image'], feed_dict={ cnn_model['pre_image']: utils.load_image_array(image_file, img_dim=None) }) else: image_array = utils.load_image_array(image_file, img_dim=img_dim) feed_dict = {images: [image_array]} conv_features_batch = sess.run(image_feature_layer, feed_dict=feed_dict) sess.close() return conv_features_batch
def get_minibatches(input_set, batchsize): batch_image = np.ndarray((batchsize, 224, 224, 3)) actual = 0 count = 0 for idx in input_set: image = os.path.join(args.data_dir, '%s2014/COCO_%s2014_%.12d.jpg' % (args.mode, args.mode, idx)) batch_image[actual, :, :, :] = utils.load_image_array(image) actual += 1 count += 1 if actual >= batchsize or count >= len(input_set): yield batch_image[0: actual, :, :, :], actual actual = 0
def test(): parser = argparse.ArgumentParser() parser.add_argument('--model', type=str, default='mask_rcnn_coco.h5') parser.add_argument('--image', type=str, default='example.jpg') args = parser.parse_args() detector = Detector(args.model) image = load_image_array(args.image) r = detector.detect(image) for i, (roi, class_id) in enumerate(zip(r['rois'], r['class_ids'])): print('Object #{}, roi: {}, class: {}'.format(i + 1, roi, CLASS_NAMES[class_id]))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--split', type=str, default='train', help='train/val/test') parser.add_argument('--batch_size', type=int, default=64, help='Batch Size') parser.add_argument('--feature_layer', type=str, default="block4", help='CONV FEATURE LAYER, fc7, pool5 or block4') parser.add_argument('--model', type=str, default="resnet", help='vgg') args = parser.parse_args() if args.split == "train": with open('Data/annotations/test.json') as f: images = json.loads(f.read())['images'] else: with open('Data/annotations/captions_val2014.json') as f: images = json.loads(f.read())['images'] image_ids = {image['image_id']: 1 for image in images} image_id_list = [img_id for img_id in image_ids] print("Total Images", len(image_id_list)) try: shutil.rmtree('Data/conv_features_{}_{}'.format( args.split, args.model)) except: pass os.makedirs('Data/conv_features_{}_{}'.format(args.split, args.model)) if args.model == "vgg": cnn_model = vgg16.create_vgg_model( 448, only_conv=args.feature_layer != 'fc7') else: cnn_model = resnet.create_resnet_model(448) image_id_file_name = "Data/conv_features_{}_{}/image_id_list_{}.h5".format( args.split, args.model, args.feature_layer) h5f_image_id_list = h5py.File(image_id_file_name, 'w') h5f_image_id_list.create_dataset('image_id_list', data=image_id_list) h5f_image_id_list.close() conv_file_name = "Data/conv_features_{}_{}/conv_features_{}.h5".format( args.split, args.model, args.feature_layer) hdf5_conv_file = h5py.File(conv_file_name, 'w') if args.feature_layer == "fc7": conv_features = None feature_shape = (len(image_id_list), 4096) img_dim = 224 else: if args.model == "vgg": conv_features = None feature_shape = (len(image_id_list), 14, 14, 512) img_dim = 448 else: conv_features = None feature_shape = (len(image_id_list), 14, 14, 2048) img_dim = 448 print("it's done!!!") hdf5_data = hdf5_conv_file.create_dataset('conv_features', shape=feature_shape, dtype='f') sess = cnn_model['session'] images = cnn_model['images_placeholder'] image_feature_layer = cnn_model[args.feature_layer] idx = 0 while idx < len(image_id_list): start = time.clock() image_batch = np.ndarray((args.batch_size, img_dim, img_dim, 3)) count = 0 for i in range(0, args.batch_size): if idx >= len(image_id_list): break image_file = ('Data/images/abstract_v002_%s2015_%.12d.jpg' % (args.split, image_id_list[idx])) if args.model == 'resnet': image_array = sess.run(cnn_model['processed_image'], feed_dict={ cnn_model['pre_image']: utils.load_image_array(image_file, img_dim=None) }) else: image_array = utils.load_image_array(image_file, img_dim=img_dim) image_batch[i, :, :, :] = image_array idx += 1 count += 1 feed_dict = {images: image_batch[0:count, :, :, :]} conv_features_batch = sess.run(image_feature_layer, feed_dict=feed_dict) #np.reshape not needed #conv_features_batch = np.reshape(conv_features_batch, ( conv_features_batch.shape[0], -1 )) hdf5_data[(idx - count):idx] = conv_features_batch[0:count] end = time.clock() print("Time for batch of photos", end - start) print("Hours Remaining", ((len(image_id_list) - idx) * 1.0) * (end - start) / 60.0 / 60.0 / args.batch_size) print("Images Processed", idx) hdf5_conv_file.close() print("Done!")
def main(): print "Total Images" parser = argparse.ArgumentParser() parser.add_argument('--split', type=str, default='train', help='train/val') parser.add_argument('--data_dir', type=str, default='Data', help='Data directory') parser.add_argument('--batch_size', type=int, default=10, help='Batch Size') print "Total Images" args = parser.parse_args() print "Total Images" data_loader.prepare_training_data(version=1); all_data = data_loader.load_questions_answers(version=1); if args.split == "train": qa_data = all_data['training'] else: qa_data = all_data['validation'] image_ids = {} for qa in qa_data: image_ids[qa['image_id']] = 1 image_id_list = [img_id for img_id in image_ids] print "Total Images", len(image_id_list) model = VGG16(weights='imagenet', include_top=False, outputs=base_model.get_layer('Conv2D').output) fc7 = np.ndarray((len(image_id_list), 4096)) idx = 0 while idx < len(image_id_list): start = time.clock() image_batch = np.ndarray((args.batch_size, 224, 224, 3)) count = 0 for i in range(0, args.batch_size): if idx >= len(image_id_list): break image_file = join(args.data_dir, '%s2014/COCO_%s2014_%.12d.jpg' % (args.split, args.split, image_id_list[idx])) image_batch[i, :, :, :] = utils.load_image_array(image_file) x = np.expand_dims(image_batch[i, :, :, :], axis=0) x = preprocess_input(x) features = model.predict(x) fc7_batch[i, :] = features idx += 1 count += 1 fc7[(idx - count):idx, :] = fc7_batch[0:count, :] end = time.clock() print "Time for batch 10 photos", end - start print "Hours For Whole Dataset", (len(image_id_list) * 1.0) * (end - start) / 60.0 / 60.0 / 10.0 print "Images Processed", idx print "Saving fc7 features" h5f_fc7 = h5py.File(join(args.data_dir, args.split + '_fc7.h5'), 'w') h5f_fc7.create_dataset('fc7_features', data=fc7) h5f_fc7.close() print "Saving image id list" h5f_image_id_list = h5py.File(join(args.data_dir, args.split + '_image_id_list.h5'), 'w') h5f_image_id_list.create_dataset('image_id_list', data=image_id_list) h5f_image_id_list.close() print "Done!"
def main(): parser = argparse.ArgumentParser() parser.add_argument('--split', type=str, default='train', help='train/val') parser.add_argument('--model_path', type=str, default='Data/vgg16.tfmodel', help='Pretrained VGG16 Model') parser.add_argument('--data_dir', type=str, default='Data', help='Data directory') parser.add_argument('--batch_size', type=int, default=10, help='Batch Size') args = parser.parse_args() vgg_file = open(args.model_path) vgg16raw = vgg_file.read() vgg_file.close() graph_def = tf.GraphDef() graph_def.ParseFromString(vgg16raw) images = tf.placeholder("float", [None, 224, 224, 3]) tf.import_graph_def(graph_def, input_map={ "images": images }) graph = tf.get_default_graph() for opn in graph.get_operations(): print "Name", opn.name, opn.values() all_data = data_loader.load_questions_answers(args) if args.split == "train": qa_data = all_data['training'] else: qa_data = all_data['validation'] image_ids = {} for qa in qa_data: image_ids[qa['image_id']] = 1 image_id_list = [img_id for img_id in image_ids] print "Total Images", len(image_id_list) sess = tf.Session() fc7 = np.ndarray( (len(image_id_list), 4096 ) ) idx = 0 while idx < len(image_id_list): start = time.clock() image_batch = np.ndarray( (args.batch_size, 224, 224, 3 ) ) count = 0 for i in range(0, args.batch_size): if idx >= len(image_id_list): break image_file = join(args.data_dir, '%s2014/COCO_%s2014_%.12d.jpg'%(args.split, args.split, image_id_list[idx]) ) image_batch[i,:,:,:] = utils.load_image_array(image_file) idx += 1 count += 1 feed_dict = { images : image_batch[0:count,:,:,:] } fc7_tensor = graph.get_tensor_by_name("import/Relu_1:0") fc7_batch = sess.run(fc7_tensor, feed_dict = feed_dict) fc7[(idx - count):idx, :] = fc7_batch[0:count,:] end = time.clock() print "Time for batch 10 photos", end - start print "Hours For Whole Dataset" , (len(image_id_list) * 1.0)*(end - start)/60.0/60.0/10.0 print "Images Processed", idx print "Saving fc7 features" h5f_fc7 = h5py.File( join(args.data_dir, args.split + '_fc7.h5'), 'w') h5f_fc7.create_dataset('fc7_features', data=fc7) h5f_fc7.close() print "Saving image id list" h5f_image_id_list = h5py.File( join(args.data_dir, args.split + '_image_id_list.h5'), 'w') h5f_image_id_list.create_dataset('image_id_list', data=image_id_list) h5f_image_id_list.close() print "Done!"
def main(): parser = argparse.ArgumentParser() parser.add_argument('--residual_channels', type=int, default=512, help='residual_channels') parser.add_argument('--data_dir', type=str, default='Data', help='Data directory') parser.add_argument('--version', type=int, default=1, help='VQA data version') parser.add_argument('--model_path', type=str, default=None, help='Trained Model Path') parser.add_argument('--feature_layer', type=str, default="block4", help='CONV FEATURE LAYER, fc7, pool5 or block4') parser.add_argument('--cnn_model', type=str, default="resnet", help='CNN model') parser.add_argument('--text_model', type=str, default="bytenet", help='bytenet/lstm') parser.add_argument('--question', type=str, default="What animal is shown in the picture", help='question about the image') parser.add_argument('--image_file', type=str, default="Image File path for the question", help='Image File path') args = parser.parse_args() conv_features_batch = get_conv_features(args.image_file, args.cnn_model, args.feature_layer) tf.reset_default_graph() meta_data = data_loader.load_meta_data(args.version, args.data_dir) ans_vocab_rev = meta_data['index_to_ans'] ques_vocab_rev = meta_data['index_to_qw'] qw_to_index = meta_data['qw_to_index'] question_words = data_loader.tokenize_mcb(args.question) question_indices = [qw_to_index[qw] if qw in qw_to_index else qw_to_index['UNK'] for qw in question_words] question_indices += [0 for i in range(len(question_indices), meta_data['max_question_length'])] sentence_batch = np.ndarray( (1, meta_data['max_question_length']), dtype = 'int32') sentence_batch[0] = question_indices model_options = { 'question_vocab_size' : len(meta_data['index_to_qw']), 'residual_channels' : args.residual_channels, 'ans_vocab_size' : len(meta_data['index_to_ans']), 'filter_width' : 3, 'img_dim' : 14, 'img_channels' : 2048, 'dilations' : [ 1, 2, 4, 8, 1, 2, 4, 8, ], 'text_model' : args.text_model, 'dropout_keep_prob' : 0.6, 'max_question_length' : meta_data['max_question_length'], 'num_answers' : 10 } model = VQA_model_attention.VQA_model(model_options) model.build_generator() sess = tf.InteractiveSession() tf.initialize_all_variables().run() saver = tf.train.Saver() if args.model_path: saver.restore(sess, args.model_path) try: shutil.rmtree('Data/gen_samples') except: pass os.makedirs('Data/gen_samples') pred_answer, prob1, prob2 = sess.run([model.g_predictions, model.g_prob1, model.g_prob2], feed_dict = { model.g_question : sentence_batch, model.g_image_features : conv_features_batch }) pred_ans_text = utils.answer_indices_to_text(pred_answer, ans_vocab_rev) sample_data = [] print "Actual vs Prediction" for sample_i in range(len(pred_ans_text)): print pred_ans_text[sample_i] image_array = utils.load_image_array(args.image_file, 224) blend1 = utils.get_blend_map(image_array, prob1[sample_i], overlap = True) blend2 = utils.get_blend_map(image_array, prob2[sample_i], overlap = True) sample_data.append({ 'question' : args.question, 'predicted_answer' : pred_ans_text[sample_i], 'batch_index' : sample_i }) misc.imsave('Data/gen_samples/{}_actual_image.jpg'.format(sample_i), image_array) misc.imsave('Data/gen_samples/{}_blend1.jpg'.format(sample_i), blend1) misc.imsave('Data/gen_samples/{}_blend2.jpg'.format(sample_i), blend2) f = open('Data/gen_samples/sample.json', 'wb') f.write(json.dumps(sample_data)) f.close() shutil.make_archive('Data/gen_samples', 'zip', 'Data/gen_samples')
def demo_process(): '''提供demo展示功能''' # 获取文件对象 file = request.files['file'] file = File(file) feature_model = request.form['feature_model'] quality_level = request.form['quality_level'] if model.quality_level != quality_level: model.switch_quality_level(quality_level) # 将二进制转为tensor input = file.load_tensor().cuda() # 输入模型,得到返回结果 e_data = model.encode(input) d_data = model.decode(feat=e_data['feat'], tex=e_data['tex'], intervals=e_data['intervals'], recon=e_data['recon']) data = {**e_data, **d_data} # 保存压缩数据 fic_path = get_path(f'{file.name}.fic') File.save_binary( { 'feat': data['feat'], 'tex': data['tex'], 'intervals': data['intervals'], 'ext': file.ext, }, fic_path) # fic 相关参数 fic_size = path.getsize(fic_path) fic_bpp = get_bpp(fic_size) # 单独保存特征以计算特征和纹理的大小 feat_path = get_path(f'{file.name}_feat.fic') File.save_binary({ 'feat': data['feat'], }, feat_path) # 特征相关参数 feat_size = path.getsize(feat_path) feat_bpp = get_bpp(feat_size) # 纹理相关参数 tex_size = fic_size - feat_size tex_bpp = get_bpp(tex_size) # 待保存图片 imgs = { 'input': data['input'], 'recon': data['recon'], 'resi': data['resi'], 'resi_decoded': data['resi_decoded'], 'resi_norm': data['resi_norm'], 'resi_decoded_norm': data['resi_decoded_norm'], 'output': data['output'], } # 将 imgs 保存并获得对应URL img_urls = {} for key, value in imgs.items(): # 保存图片 file_name = file.name_suffix(key, ext='.bmp') file_path = get_path(file_name) save_image(value, file_path) # 返回图片url链接 img_urls[key] = get_url(file_name) # 计算压缩率 input_name = file.name_suffix('input', ext='.bmp') input_path = get_path(input_name) input_size = path.getsize(input_path) fic_compression_ratio = fic_size / input_size # jpeg对照组处理 jpeg_name = file.name_suffix('jpeg', ext='.jpg') jpeg_path = get_path(jpeg_name) dichotomy_compress(input_path, jpeg_path, target_size=tex_size) img_urls['jpeg'] = get_url(jpeg_name) # jpeg 相关参数计算 jpeg_size = path.getsize(jpeg_path) jpeg_compression_ratio = jpeg_size / input_size jpeg_bpp = get_bpp(jpeg_size) # 其他数据 input_arr = tensor_to_array(data['input']) output_arr = tensor_to_array(data['output']) jpeg_arr = load_image_array(jpeg_path) # 返回的对象 ret = { 'image': img_urls, 'data': get_url(f'{file.name}.fic'), 'eval': { 'fic_bpp': fic_bpp, 'feat_bpp': feat_bpp, 'tex_bpp': tex_bpp, 'jpeg_bpp': jpeg_bpp, 'fic_compression_ratio': fic_compression_ratio, 'jpeg_compression_ratio': jpeg_compression_ratio, 'fic_psnr': psnr(input_arr, output_arr), 'fic_ssim': ssim(input_arr, output_arr), 'jpeg_psnr': psnr(input_arr, jpeg_arr), 'jpeg_ssim': ssim(input_arr, jpeg_arr), }, 'size': { 'fic': fic_size, 'input': input_size, # 'output': fic_size, 'output': tex_size, 'feat': feat_size, 'tex': tex_size, 'jpeg': jpeg_size, } } # 响应请求 response = jsonify(ret) return response
def main(): parser = argparse.ArgumentParser() parser.add_argument('--split', type=str, default='train', help='train/val/test') parser.add_argument('--model_path', type=str, default='./Data/ResNet/resnet_v2_101.ckpt', help='Pretrained RESNET Model') parser.add_argument('--data_dir', type=str, default='Data', help='Data directory') parser.add_argument('--batch_size', type=int, default=10, help='Batch Size') args = parser.parse_args() slim = tf.contrib.slim resnet = nets.resnet_v2 config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.9 sess = tf.InteractiveSession(config=config) sess.run(tf.global_variables_initializer()) if args.split == 'test': all_data = data_loader.load_test_questions() qa_data = all_data['testing'] else: all_data = data_loader.load_questions_answers(args) if args.split == "train": qa_data = all_data['training'] else: qa_data = all_data['validation'] image_ids = {} for qa in qa_data: image_ids[qa['image_id']] = 1 image_id_list = [img_id for img_id in image_ids] print("Total Images", len(image_id_list)) length = 100 if args.split == 'test' else len(image_id_list) res5c = np.ndarray((length, 2048)) idx = 0 SIZE = 299 flag = 0 while idx < length: if idx % 500 == 0: flag = 0 tf.reset_default_graph() with tf.Graph().as_default(): with tf.Session() as sess: while idx < length: start = time.clock() image_batch = np.ndarray((args.batch_size, SIZE, SIZE, 3), dtype=np.float32) count = 0 for i in range(0, args.batch_size): if idx >= len(image_id_list): break if args.split == 'test': image_file = join( args.data_dir, '%s2015/COCO_%s2015_%.12d.jpg' % (args.split, args.split, image_id_list[idx])) else: image_file = join( args.data_dir, '%s2014/COCO_%s2014_%.12d.jpg' % (args.split, args.split, image_id_list[idx])) image_batch[i, :, :, :] = utils.load_image_array( image_file, size=SIZE) idx += 1 count += 1 with slim.arg_scope(resnet.resnet_arg_scope()): logits, end_points = resnet.resnet_v2_101( image_batch[0:count, :, :, :], num_classes=None, is_training=False, reuse=tf.AUTO_REUSE) if not flag: vals = slim.get_model_variables('resnet_v2_101') init_fn = slim.assign_from_checkpoint_fn( args.model_path, vals) init_fn(sess) flag = 1 res5c_batch = sess.run([logits]) if idx % 10 == 0: res5c_batch = res5c_batch[0].reshape( (args.batch_size, 2048)) else: res5c_batch = res5c_batch[0].reshape( (idx % args.batch_size, 2048)) res5c[(idx - count):idx, :] = res5c_batch[0:count, :] end = time.clock() print("Time for batch 10 photos", end - start) print("Hours For Whole Dataset", (len(image_id_list) * 1.0) * (end - start) / 60.0 / 60.0 / 10.0) print("Images Processed", idx) if idx % 500 == 0: break print("Saving res5c features") h5f_res5c = h5py.File(join(args.data_dir, args.split + '_res5c.h5'), 'w') h5f_res5c.create_dataset('res5c_features', data=res5c) h5f_res5c.close() print("Saving image id list") h5f_image_id_list = h5py.File( join(args.data_dir, args.split + '_image_id_list.h5'), 'w') h5f_image_id_list.create_dataset('image_id_list', data=image_id_list) h5f_image_id_list.close() print("Done!")
fic_compression_ratio = fic_size / input_size # jpeg对照组处理 jpeg_name = file.name_suffix('jpeg', ext='.jpg') jpeg_path = get_path(jpeg_name) dichotomy_compress(input_path, jpeg_path, target_size=tex_size) # jpeg 相关参数计算 jpeg_size = path.getsize(jpeg_path) jpeg_compression_ratio = jpeg_size / input_size jpeg_bpp = jpeg_size / conf.IMAGE_PIXEL_NUM # 其他数据 input_arr = tensor_to_array(input) output_arr = tensor_to_array(output) jpeg_arr = load_image_array(jpeg_path) print(json.dumps({ 'eval': { # 'fic_bpp': fic_bpp, # 'feat_bpp': feat_bpp, 'tex_bpp': tex_bpp, 'jpeg_bpp': jpeg_bpp, # 'fic_compression_ratio': fic_compression_ratio, # 'jpeg_compression_ratio': jpeg_compression_ratio, 'fic_psnr': psnr(input_arr, output_arr), 'fic_ssim': ssim(input_arr, output_arr), 'jpeg_psnr': psnr(input_arr, jpeg_arr), 'jpeg_ssim': ssim(input_arr, jpeg_arr), }, 'size': {
def main(): parser = argparse.ArgumentParser() parser.add_argument('--split', type=str, default='train', help='train/val') parser.add_argument( '--model_path', type=str, default='Data/train2014/Tri Training 2/vgg16-20160129.tfmodel', help='Pretrained VGG16 Model') parser.add_argument('--data_dir', type=str, default='Data/', help='Data directory') parser.add_argument('--batch_size', type=int, default=10, help='Batch Size') args = parser.parse_args() vgg_file = io.open(args.model_path, mode='rb') vgg16raw = vgg_file.read() vgg_file.close() graph_def = tf.GraphDef() graph_def.ParseFromString(vgg16raw) images = tf.placeholder("float", [None, 224, 224, 3]) tf.import_graph_def(graph_def, input_map={"images": images}) graph = tf.get_default_graph() for opn in graph.get_operations(): print("Name", opn.name, opn.values()) image_id_list = [img_id for img_id in range(20000)] #print(image_id_list[:5]) print("Total Images", len(image_id_list)) #print(0/0) sess = tf.Session() fc7 = np.ndarray((len(image_id_list), 4096)) idx = 0 while idx < len(image_id_list): start = time.clock() image_batch = np.ndarray((args.batch_size, 224, 224, 3)) count = 0 for i in range(0, args.batch_size): if idx >= len(image_id_list): break image_file = join( args.data_dir, '%s_2014/VizWiz_%s_%.12d.jpg' % (args.split, args.split, image_id_list[idx])) image_batch[i, :, :, :] = utils.load_image_array(image_file) idx += 1 count += 1 feed_dict = {images: image_batch[0:count, :, :, :]} fc7_tensor = graph.get_tensor_by_name("import/Relu_1:0") fc7_batch = sess.run(fc7_tensor, feed_dict=feed_dict) fc7[(idx - count):idx, :] = fc7_batch[0:count, :] end = time.clock() print("Time for batch 10 photos", end - start) print("Hours For Whole Dataset", (len(image_id_list) * 1.0) * (end - start) / 60.0 / 60.0 / 10.0) print("Images Processed", idx) print("Saving fc7 features") h5f_fc7 = h5py.File(join(args.data_dir, args.split + '_fc7.h5'), 'w') h5f_fc7.create_dataset('fc7_features', data=fc7) h5f_fc7.close() print("Saving image id list") h5f_image_id_list = h5py.File( join(args.data_dir, args.split + '_image_id_list.h5'), 'w') h5f_image_id_list.create_dataset('image_id_list', data=image_id_list) h5f_image_id_list.close() print("Done!")
def main(): parser = argparse.ArgumentParser() parser.add_argument('--num_lstm_layers', type=int, default=2, help='num_lstm_layers') parser.add_argument('--fc7_feature_length', type=int, default=4096, help='fc7_feature_length') parser.add_argument('--rnn_size', type=int, default=512, help='rnn_size') parser.add_argument('--embedding_size', type=int, default=512, help='embedding_size'), parser.add_argument('--word_emb_dropout', type=float, default=0.5, help='word_emb_dropout') parser.add_argument('--image_dropout', type=float, default=0.5, help='image_dropout') parser.add_argument('--data_dir', type=str, default='Data', help='Data directory') parser.add_argument('--batch_size', type=int, default=200, help='Batch Size') parser.add_argument('--learning_rate', type=float, default=0.001, help='Batch Size') parser.add_argument('--epochs', type=int, default=100, help='Epochs') parser.add_argument('--debug', type=bool, default=False, help='Debug') parser.add_argument('--resume_model', type=str, default=None, help='Trained Model Path') parser.add_argument('--version', type=int, default=2, help='VQA data version') args = parser.parse_args() print("Creating QuestionAnswer data") prepare_training_data('trainquestions.json', 'trainannotations.json', 'valquestions.json', 'valannotations.json') print("Prepared given data") print("Reading QuestionAnswer data") qa_data = load_questions_answers('newqadata.pkl', 'Data') print(qa_data['answer_vocab']) print("Creating Image features") ################################################ split = 'train' vgg_file = open('Data/vgg16.tfmodel', 'rb') vgg16raw = vgg_file.read() vgg_file.close() graph_def = tf.GraphDef() graph_def.ParseFromString(vgg16raw) images = tf.placeholder("float", [None, 224, 224, 3]) tf.import_graph_def(graph_def, input_map={"images": images}) graph = tf.get_default_graph() for opn in graph.get_operations(): print("Name", opn.name, opn.values()) all_data = load_questions_answers() if split == "train": qa_data = all_data['training'] else: qa_data = all_data['validation'] image_ids = {} for qa in qa_data: image_ids[qa['image_id']] = 1 image_id_list = [img_id for img_id in image_ids] print("Total Images", len(image_id_list)) sess = tf.Session() fc7 = np.ndarray((len(image_id_list), 4096)) idx = 0 while idx < len(image_id_list): start = time.clock() image_batch = np.ndarray((10, 224, 224, 3)) count = 0 for i in range(0, args.batch_size): if idx >= len(image_id_list): break image_file = join('Data', '%snew/%.1d.jpg' % (split, image_id_list[idx])) image_batch[i, :, :, :] = utils.load_image_array(image_file) idx += 1 count += 1 feed_dict = {images: image_batch[0:count, :, :, :]} fc7_tensor = graph.get_tensor_by_name("import/Relu_1:0") fc7_batch = sess.run(fc7_tensor, feed_dict=feed_dict) fc7[(idx - count):idx, :] = fc7_batch[0:count, :] end = time.clock() print("Time for batch 10 photos", end - start) print("Hours For Whole Dataset", (len(image_id_list) * 1.0) * (end - start) / 60.0 / 60.0 / 10.0) print("Images Processed", idx) print("Saving fc7 features") h5f_fc7 = h5py.File(join('Data', 'fc7new.h5'), 'w') h5f_fc7.create_dataset('fc7_features', data=fc7) h5f_fc7.close() print("Saving image id list") h5f_image_id_list = h5py.File(join('Data', 'image_id_listnew.h5'), 'w') h5f_image_id_list.create_dataset('image_id_list', data=image_id_list) h5f_image_id_list.close() print("Done!") ##################################################33 print("Reading image features") fc7_features, image_id_list = load_fc7_features('Data', 'train') print("FC7 features", fc7_features.shape) print("image_id_list", image_id_list.shape) qa_data = load_questions_answers('newqadata.pkl', 'Data') print(qa_data['answer_vocab']) image_id_map = {} for i in range(len(image_id_list)): image_id_map[image_id_list[i]] = i ans_map = { qa_data['answer_vocab'][ans]: ans for ans in qa_data['answer_vocab'] } model_options = { 'num_lstm_layers': 2, 'rnn_size': 512, 'embedding_size': 512, 'word_emb_dropout': 0.5, 'image_dropout': 0.5, 'fc7_feature_length': 4096, 'lstm_steps': qa_data['max_question_length'] + 1, 'q_vocab_size': len(qa_data['question_vocab']), 'ans_vocab_size': len(qa_data['answer_vocab']) } model = vis_lstm_model.Vis_lstm_model(model_options) input_tensors, t_loss, t_accuracy, t_p = model.build_model() train_op = tf.train.AdamOptimizer(0.001).minimize(t_loss) sess = tf.InteractiveSession() tf.initialize_all_variables().run() saver = tf.train.Saver() #model.summary() #plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True) if args.resume_model: saver.restore(sess, args.resume_model) for i in range(100): batch_no = 0 while (batch_no * 10) < len(qa_data['training']): sentence, answer, fc7 = get_training_batch(batch_no, 10, fc7_features, image_id_map, qa_data, 'train') _, loss_value, accuracy, pred = sess.run( [train_op, t_loss, t_accuracy, t_p], feed_dict={ input_tensors['fc7']: fc7, input_tensors['sentence']: sentence, input_tensors['answer']: answer }) batch_no += 1 if args.debug: for idx, p in enumerate(pred): print(ans_map[p], ans_map[np.argmax(answer[idx])]) print("Loss", loss_value, batch_no, i) print("Accuracy", accuracy) print("---------------") skplt.metrics.plot_roc_curve(answer[idx], ans_map[p]) plt.show() else: print("Loss", loss_value, batch_no, i) print("Training Accuracy", accuracy) #skplt.metrics.plot_roc_curve(answer[0], pred[0]) #plt.show() save_path = saver.save(sess, "Data/Models/modelnew{}.ckpt".format(i))
def main(): with tf.Graph().as_default(): utils.prepare_training_data(FLAGS.data_dir) all_data = utils.load_questions_answers(FLAGS.data_dir) if FLAGS.split == "train": qa_data = all_data['training'] else: qa_data = all_data['validation'] image_ids = {} for qa in qa_data: image_ids[qa['image_id']] = 1 image_id_list = [img_id for img_id in image_ids] print("Total Images", len(image_id_list)) images = tf.placeholder("float", [None, 224, 224, 3]) with slim.arg_scope(resnet.resnet_arg_scope()): net, _ = resnet.resnet_v2_152(images, FLAGS.output_size, is_training=False) restorer = tf.train.Saver() results = np.ndarray((len(image_id_list), FLAGS.output_size)) idx = 0 with tf.Session(config=tf.ConfigProto( log_device_placement=True)) as sess: while idx < len(image_id_list): start = time.clock() image_batch = np.ndarray((FLAGS.batch_size, 224, 224, 3)) count = 0 for i in range(0, FLAGS.batch_size): if idx >= len(image_id_list): break image_file = join( FLAGS.data_dir, '%s2017/%.12d.jpg' % (FLAGS.split, image_id_list[idx])) image_batch[i, :, :, :] = utils.load_image_array( image_file) idx += 1 count += 1 feed_dict = {images: image_batch[0:count, :, :, :]} checkpoint = join(FLAGS.data_dir, FLAGS.checkpoint_path) restorer.restore(sess, checkpoint) print("Model Restored") pred_batch = sess.run(net, feed_dict=feed_dict) # print(np.squeeze(pred_batch).shape) results[(idx - count):idx, :] = np.squeeze(pred_batch)[0:count, :] end = time.clock() print("Time for batch 10 photos", end - start) print("Hours For Whole Dataset", (len(image_id_list) * 1.0) * (end - start) / 60.0 / 60.0 / 10.0) print("Images Processed", idx) print("Saving image features") h5f_img_embed = h5py.File( join(FLAGS.data_dir, FLAGS.split + '_img_embed.h5'), 'w') h5f_img_embed.create_dataset('img_features', data=results) h5f_img_embed.close() print("Saving image id list") h5f_image_id_list = h5py.File( join(FLAGS.data_dir, FLAGS.split + '_image_id_list.h5'), 'w') h5f_image_id_list.create_dataset('image_id_list', data=image_id_list) h5f_image_id_list.close() print("Done!")
def get_style_features(): lim=10000 model_path = './Data/vgg16.tfmodel' split = 'train' data_dir = './Data' batch_size = 1 vgg_file = open(model_path,'rb') vgg16raw = vgg_file.read() vgg_file.close() graph_def = tf.GraphDef() graph_def.ParseFromString(vgg16raw) print ("VGG done successfully") images = tf.placeholder("float", [None, 224, 224, 3]) tf.import_graph_def(graph_def, input_map={ "images": images }) graph = tf.get_default_graph() for opn in graph.get_operations(): print ("Name", opn.name, opn.values()) image_names1 = os.listdir('/scratch/bam_subset/') image_names1.sort() image_names1=image_names1[:lim] print ("No of images", len(image_names1)) image_names = [] no_of_images = len(image_names1) for i in range(no_of_images): im = image_names1[i] image_names.append(im) print ("Images extracted", no_of_images) image_id_list = [] for i in range(len(image_names)): image_id_list.append(i) print ("Total Images", len(image_id_list)) sess = tf.Session() #fc7 = np.ndarray( (len(image_id_list), 4096 ) ) conv4_3_n = np.ndarray( (len(image_id_list), 512*512) ) conv4_3 = [] idx = 0 from_start = time.clock() image_name_list = [] while idx < 1000: start = time.clock() image_batch = np.ndarray( (batch_size, 224, 224, 3 ) ) count = 0 for i in range(0, batch_size): if idx >= len(image_id_list): break image_name_list.append(image_names[idx]) #image_file = join(data_dir, '%s2014/COCO_%s2014_%.12d.jpg'%(split, split, image_id_list[idx]) ) image_file = "/scratch/bam_subset/"+image_names[idx] print ("Image name", image_file) image_batch[i,:,:,:] = utils.load_image_array(image_file) idx += 1 count += 1 #print(image_batch.shape) #print(image_batch[0:count,:,:,:].shape) #print(image_batch[0:count,:,:,:]) feed_dict2 = { images : image_batch[0:count,:,:,:] } #fc7_tensor = graph.get_tensor_by_name("import/Relu_1:0") conv4_3_tensor = graph.get_tensor_by_name("import/conv4_3/Relu:0") #fc7_batch = sess.run(fc7_tensor, feed_dict = feed_dict) conv4_3_batch = sess.run(conv4_3_tensor, feed_dict = feed_dict2) conv4_3_batch = conv4_3_batch.reshape((1,28*28,512)) conv4_3_batch = np.matmul(conv4_3_batch[0,:,:].T, conv4_3_batch[0,:,:]) temp = np.ndarray((1,512*512)) temp[0,:] = conv4_3_batch.reshape(512*512) conv4_3_batch = temp #fc7[(idx - count):idx, :] = fc7_batch[0:count,:] conv4_3_n[(idx - count):idx, :] = conv4_3_batch[0:count,:] #conv4_3.append( conv4_3_batch[0:count,:]) end = time.clock() #print ("Time for batch 1 photos", end - start) print ("Hours For Whole Dataset" , (len(image_id_list) * 1.0)*(end - start)/60.0/60.0/10.0) print ("Time Elapsed:", (from_start)/60, "Minutes") print ("Images Processed", idx) np.save('/scratch/mohsin/final_features/temp'+image_names[idx-1]+'.npy',conv4_3_batch[0:count,:]) #np.savetxt('/scratch/sid_imp/conv4_3_features_vgg16.txt', conv4_3) f = open('image_names_list_vgg_conv.txt', 'w') for name in image_name_list: f.write(name+'\n') f.close() return conv4_3_n
def main(): parser = argparse.ArgumentParser() parser.add_argument('--split', type=str, default='train', help='train/val') parser.add_argument('--model_path', type=str, help='VGGNet') #VGGNet version parser.add_argument('--data_dir', type=str, default='Data', help='Data directory') parser.add_argument('--batch_size', type=int, default=100) # read pretrained vgg16 network args = parser.parse_args() vgg_file = open(args.model_path, 'rb') vgg16raw = vgg_file.read() vgg_file.close() # load the pretrained network into a tf graph graph_def = tf.GraphDef() graph_def.ParseFromString(vgg16raw) images = tf.placeholder("float", [None, 224, 224, 3]) tf.import_graph_def(graph_def, input_map={"images": images}) graph = tf.get_default_graph() for opn in graph.get_operations(): print("[VGG16] Name", opn.name, list(opn.values())) #Loading data all_data = data_loader.load_questions_answers() print(args) if args.split == "train": qa_data = all_data['training'] else: qa_data = all_data['validation'] image_ids = {} for qa in qa_data: image_ids[qa['image_id']] = 1 image_id_list = [img_id for img_id in image_ids] print("Total Images", len(image_id_list)) print(image_id_list[0:10]) # begin extracting sess = tf.Session() idx = 0 cnn7 = np.ndarray((len(image_id_list), 512, 49)) while idx < len(image_id_list): start = time.clock() image_batch = np.ndarray((args.batch_size, 224, 224, 3)) # load images into a batch count = 0 for i in range(0, args.batch_size): if idx >= len(image_id_list): break image_file = join( args.data_dir, '%s2015/abstract_v002_%s2015_%.12d.png' % (args.split, args.split, image_id_list[idx])) image_batch[i, :, :, :] = utils.load_image_array( image_file)[:, :, :3] idx += 1 count += 1 feed_dict = {images: image_batch[0:count, :, :, :]} cnn7_tensor = graph.get_tensor_by_name("import/pool5:0") cnn7_batch = sess.run(cnn7_tensor, feed_dict=feed_dict) cnn7_batch = np.transpose(cnn7_batch, [0, 3, 1, 2]) cnn7_batch = cnn7_batch.reshape(count, 512, -1) for i in range(args.batch_size): cnn7_batch[i, :, :] = cnn7_batch[i, :, :] / np.linalg.norm( cnn7_batch[i, :, :], axis=0, keepdims=True) cnn7[(idx - count):idx, ...] = cnn7_batch[0:count, ...] end = time.clock() print("Time for batch 10 photos", end - start) print("Hours For Whole Dataset", (len(image_id_list) * 1.0) * (end - start) / 60.0 / 60.0 / 10.0) print("Images Processed", idx) print("Saving cnn7 features") h5f_cnn7 = h5py.File(join(args.data_dir, args.split + '_cnn7.h5'), 'w') h5f_cnn7.create_dataset('cnn7_features', data=cnn7) h5f_cnn7.close() print("Saving image id list") h5f_image_id_list = h5py.File( join(args.data_dir, args.split + '_image_id_list.h5'), 'w') h5f_image_id_list.create_dataset('image_id_list', data=image_id_list) h5f_image_id_list.close() print("Done!")
def main(image_path="test.jpg", question="what is in the image?"): slim = tf.contrib.slim resnet = nets.resnet_v2 """ tf.app.flags.DEFINE_string("image_path", image_path, "directory of image") tf.app.flags.DEFINE_string("question", question, "question") tf.app.flags.DEFINE_string("img_checkpoint_path", "./data/pretrain/resnet152/resnet_v2_152.ckpt", "directory of checkpoint files for image feature extraction") tf.app.flags.DEFINE_string("checkpoint_path", "./data/pretrain/model", "directory of checkpoint files for overall model") tf.app.flags.DEFINE_integer("num_lstm_layers", 2, "number of lstm layers") tf.app.flags.DEFINE_integer( "img_feat_len", 1001, "length of image feature vector") tf.app.flags.DEFINE_integer("rnn_size", 300, "size of rnn") tf.app.flags.DEFINE_integer( "que_feat_len", 300, "length of question feature vector") tf.app.flags.DEFINE_float("word_dropout", 0.5, "dropout rate of word nodes") tf.app.flags.DEFINE_float("img_dropout", 0.5, "dropout rate of image nodes") tf.app.flags.DEFINE_string("data_dir", "./data", "directory of data") FLAGS = tf.app.flags.FLAGS print ("Image:", FLAGS.image_path) print ("Question:", FLAGS.question) """ #FLAGS = object() flags_image_path = image_path flags_question = question flags_img_checkpoint_path = "./data/pretrain/resnet152/resnet_v2_152.ckpt" flags_checkpoint_path = "./data/pretrain/model" flags_num_lstm_layers = 2 flags_img_feat_len = 1001 flags_rnn_size = 300 flags_que_feat_len = 300 flags_word_dropout = 0.5 flags_img_dropout = 0.5 flags_data_dir = "./data" vocab_data = utils.get_question_answer_vocab(flags_data_dir) qvocab = vocab_data['question_vocab'] q_map = {vocab_data['question_vocab'][qw] : qw for qw in vocab_data['question_vocab']} with tf.Graph().as_default(): images = tf.placeholder("float32", [None, 224, 224, 3]) with slim.arg_scope(resnet.resnet_arg_scope()): net, _ = resnet.resnet_v2_152(images, 1001, is_training=False) restorer = tf.train.Saver() with tf.Session() as sess:#config=tf.ConfigProto(log_device_placement=True)) as sess: start = time.clock() image_array = utils.load_image_array(flags_image_path) image_feed = np.ndarray((1, 224, 224, 3)) image_feed[0:, :, :] = image_array # checkpoint = tf.train.latest_checkpoint(flags_img_checkpoint_path) checkpoint = flags_img_checkpoint_path restorer.restore(sess, checkpoint) print("Image Model loaded") feed_dict = {images: image_feed} img_feature = sess.run(net, feed_dict=feed_dict) img_feature = np.squeeze(img_feature) end = time.clock() print("Time elapsed", end - start) print("Image processed") model_options = { 'num_lstm_layers': flags_num_lstm_layers, 'rnn_size': flags_rnn_size, 'embedding_size': flags_que_feat_len, 'word_emb_dropout': flags_word_dropout, 'image_dropout': flags_img_dropout, 'img_feature_length': flags_img_feat_len, 'lstm_steps': vocab_data['max_question_length'] + 1, 'q_vocab_size': len(vocab_data['question_vocab']), 'ans_vocab_size': len(vocab_data['answer_vocab']) } question_vocab = vocab_data['question_vocab'] word_regex = re.compile(r'\w+') question_ids = np.zeros( (1, vocab_data['max_question_length']), dtype='int32') question_words = re.findall(word_regex, flags_question) base = vocab_data['max_question_length'] - len(question_words) for i in range(0, len(question_words)): if question_words[i] in question_vocab: question_ids[0][base + i] = question_vocab[question_words[i]] else: question_ids[0][base + i] = question_vocab['UNK'] ans_map = {vocab_data['answer_vocab'][ans] : ans for ans in vocab_data['answer_vocab']} with tf.Graph().as_default(): model = vis_lstm_model.Vis_lstm_model(model_options) input_tensors, t_prediction, t_ans_probab = model.build_generator() restorer = tf.train.Saver() with tf.Session() as sess:#config=tf.ConfigProto(log_device_placement=True)) as sess: checkpoint = tf.train.latest_checkpoint(flags_checkpoint_path) restorer.restore(sess, checkpoint) pred, answer_probab = sess.run([t_prediction, t_ans_probab], feed_dict={ input_tensors['img']: np.reshape(img_feature, [1,1001]), input_tensors['sentence']: question_ids, }) print("Ans:", ans_map[pred[0]]) answer_probab_tuples = [(-answer_probab[0][idx], idx) for idx in range(len(answer_probab[0]))] answer_probab_tuples.sort() print("Top Answers") for i in range(5): print(ans_map[answer_probab_tuples[i][1]]) return (ans_map, answer_probab_tuples)
def main(): config = json.load(open('config.json')) parser = argparse.ArgumentParser() parser.add_argument('--split', type=str, default=config['split'], help='train/val') parser.add_argument('--model_path', type=str, default=config['model_path'], help='Pretrained VGG16 Model') parser.add_argument('--qa_dir', type=str, default=config['qa_dir'], help='QA Data directory') parser.add_argument('--data_dir', type=str, default=config['data_dir'], help='Common Data directory') parser.add_argument('--batch_size', type=int, default=10, help='Batch Size') args = parser.parse_args() vgg_file = open(args.model_path, 'rb') vgg16raw = vgg_file.read() vgg_file.close() graph_def = tf.GraphDef() graph_def.ParseFromString(vgg16raw) images = tf.placeholder("float", [None, 224, 224, 3]) tf.import_graph_def(graph_def, input_map={"images": images}) graph = tf.get_default_graph() for opn in graph.get_operations(): print("Name", opn.name, list(opn.values())) all_data = data_loader.load_questions_answers(args.qa_dir) if args.split == "train": qa_data = all_data['training'] else: qa_data = all_data['validation'] image_ids = {} for qa in qa_data: image_ids[qa['image_id']] = 1 image_id_list = [img_id for img_id in image_ids] print("Total Images", len(image_id_list)) sess = tf.Session() fc7 = np.ndarray((len(image_id_list), 4096)) idx = 0 err_file = open('err.txt', 'w', encoding='utf-8') while idx < len(image_id_list): start = time.clock() image_batch = np.ndarray((args.batch_size, 224, 224, 3)) count = 0 for i in range(0, args.batch_size): if idx >= len(image_id_list): break # print(image_id_list[idx]) filename = 'COCO_%s2014_%.12d.jpg' % (args.split, image_id_list[idx]) image_file = join(args.data_dir, '%s2014' % args.split, filename) try: image_batch[i, :, :, :] = utils.load_image_array(image_file) except (ValueError, FileNotFoundError, OSError) as e: print("http://images.cocodataset.org/%s2014/%s" % (args.split, filename)) err_file.write(str(image_id_list[idx]) + '\n') idx += 1 count += 1 err_file.flush() feed_dict = {images: image_batch[0:count, :, :, :]} fc7_tensor = graph.get_tensor_by_name("import/Relu_1:0") fc7_batch = sess.run(fc7_tensor, feed_dict=feed_dict) fc7[(idx - count):idx, :] = fc7_batch[0:count, :] end = time.clock() print("Time for batch 10 photos", end - start) print("Hours For Whole Dataset", (len(image_id_list) * 1.0) * (end - start) / 60.0 / 60.0 / 10.0) print("Images Processed", idx) print("Saving fc7 features") h5f_fc7 = h5py.File(join(args.data_dir, args.split + '_fc7.h5'), 'w') h5f_fc7.create_dataset('fc7_features', data=fc7) h5f_fc7.close() print("Saving image id list") h5f_image_id_list = h5py.File( join(args.data_dir, args.split + '_image_id_list.h5'), 'w') h5f_image_id_list.create_dataset('image_id_list', data=image_id_list) h5f_image_id_list.close() print("Done!")
def main(): parser = argparse.ArgumentParser() parser.add_argument('--split', type=str, default='train', help='train/val') parser.add_argument('--model_path', type=str, default='Data/vgg16.tfmodel', help='Pretrained VGG16 Model') parser.add_argument('--data_dir', type=str, default='Data', help='Data directory') parser.add_argument('--batch_size', type=int, default=1, help='Batch Size') args = parser.parse_args() vgg_file = open(args.model_path) vgg16raw = vgg_file.read() vgg_file.close() graph_def = tf.GraphDef() graph_def.ParseFromString(vgg16raw) print("VGG done successfully") images = tf.placeholder("float", [None, 224, 224, 3]) tf.import_graph_def(graph_def, input_map={"images": images}) graph = tf.get_default_graph() for opn in graph.get_operations(): print "Name", opn.name, opn.values() #image_id_list = [img_id for img_id in image_ids] image_names = os.listdir('./Images/') image_names.sort() image_id_list = [] for i in range(len(image_names)): image_id_list.append(i) print "Total Images", len(image_id_list) sess = tf.Session() fc7 = np.ndarray((len(image_id_list), 4096)) idx = 0 from_start = time.clock() while idx < len(image_id_list): start = time.clock() image_batch = np.ndarray((args.batch_size, 224, 224, 3)) count = 0 for i in range(0, args.batch_size): if idx >= len(image_id_list): break #image_file = join(args.data_dir, '%s2014/COCO_%s2014_%.12d.jpg'%(args.split, args.split, image_id_list[idx]) ) image_file = "Images/" + image_names[idx] image_batch[i, :, :, :] = utils.load_image_array(image_file) idx += 1 count += 1 feed_dict = {images: image_batch[0:count, :, :, :]} fc7_tensor = graph.get_tensor_by_name("import/Relu_1:0") fc7_batch = sess.run(fc7_tensor, feed_dict=feed_dict) fc7[(idx - count):idx, :] = fc7_batch[0:count, :] end = time.clock() print "Time for batch 1 photos", end - start # print "Hours For Whole Dataset" , (len(image_id_list) * 1.0)*(end - start)/60.0/60.0/10.0 print "Time Elapsed:", (from_start) / 60, "Minutes" print "Images Processed", idx np.savetxt('FC7_Features_Animation', fc7)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--split', type=str, default='train', help='train/val') parser.add_argument('--model_path', type=str, default='Data/vgg16.tfmodel', help='Pretrained VGG16 Model') parser.add_argument('--data_dir', type=str, default='Data', help='Data directory') parser.add_argument('--batch_size', type=int, default=10, help='Batch Size') args = parser.parse_args() #print(args.model_path) vgg_file = open(args.model_path, 'rb') vgg16raw = vgg_file.read() vgg_file.close() graph_def = tf.GraphDef() graph_def.ParseFromString(vgg16raw) images = tf.placeholder("float", [None, 224, 224, 3]) tf.import_graph_def(graph_def, input_map={"images": images}) graph = tf.get_default_graph() for opn in graph.get_operations(): print("Name", opn.name, list(opn.values())) #Loading data # data_loader.prepare_training_data(version = 2, data_dir = 'Data') all_data = data_loader.load_questions_answers() print(args) if args.split == "train": qa_data = all_data['training'] else: qa_data = all_data['validation'] image_ids = {} for qa in qa_data: image_ids[qa['image_id']] = 1 image_id_list = [img_id for img_id in image_ids] print("Total Images", len(image_id_list)) sess = tf.Session() fc7 = np.ndarray((len(image_id_list), 4096)) idx = 0 while idx < len(image_id_list): start = time.clock() image_batch = np.ndarray((args.batch_size, 224, 224, 3)) count = 0 for i in range(0, args.batch_size): if idx >= len(image_id_list): break image_file = join( args.data_dir, '%s2015/abstract_v002_%s2015_%.12d.png' % (args.split, args.split, image_id_list[idx])) image_batch[i, :, :, :] = utils.load_image_array( image_file)[:, :, :3] idx += 1 count += 1 feed_dict = {images: image_batch[0:count, :, :, :]} fc7_tensor = graph.get_tensor_by_name("import/Relu_1:0") fc7_batch = sess.run(fc7_tensor, feed_dict=feed_dict) fc7[(idx - count):idx, :] = fc7_batch[0:count, :] end = time.clock() print("Time for batch 10 photos", end - start) print("Hours For Whole Dataset", (len(image_id_list) * 1.0) * (end - start) / 60.0 / 60.0 / 10.0) print("Images Processed", idx) print("Saving fc7 features") h5f_fc7 = h5py.File(join(args.data_dir, args.split + '_fc7.h5'), 'w') h5f_fc7.create_dataset('fc7_features', data=fc7) h5f_fc7.close() print("Saving image id list") h5f_image_id_list = h5py.File( join(args.data_dir, args.split + '_image_id_list.h5'), 'w') h5f_image_id_list.create_dataset('image_id_list', data=image_id_list) h5f_image_id_list.close() print("Done!")
return (r_ssim + g_ssim + b_ssim) / 3 # 均值 mu1 = img1.mean() mu2 = img2.mean() # 方差 sigma1 = np.sqrt(((img1 - mu1)**2).mean()) sigma2 = np.sqrt(((img2 - mu2)**2).mean()) # 协方差 sigma12 = ((img1 - mu1) * (img2 - mu2)).mean() # 超参数 k1, k2, L = 0.01, 0.03, 255 c1 = (k1 * L)**2 c2 = (k2 * L)**2 c3 = c2 / 2 # 按照SSIM公式计算 l12 = (2 * mu1 * mu2 + c1) / (mu1**2 + mu2**2 + c1) c12 = (2 * sigma1 * sigma2 + c2) / (sigma1**2 + sigma2**2 + c2) s12 = (sigma12 + c3) / (sigma1 * sigma2 + c3) ssim_val = l12 * c12 * s12 return ssim_val if __name__ == "__main__": sys.path.append(path.dirname(path.dirname(path.realpath(__file__)))) from utils import load_image_array input = load_image_array(sys.argv[1]) output = load_image_array(sys.argv[2]) print(psnr(input, output)) print(ssim(input, output))