def plotdecodeimages(): dataset = FLAGS.train_file.split('/')[-1] dataset = dataset.split('.')[0] s = FLAGS.init_checkpoint.split('/')[-1] name = (s.split('_')[1]).split('.ckpt')[0] nameac = '{}_{}_{}'.format(dataset, 'Ac', name) nameaudio = '{}_{}_{}'.format(dataset, 'Audio', name) nameimages = '{}_{}_{}'.format(dataset, 'Video', name) data_dirac = str.join('/', FLAGS.init_checkpoint.split('/')[:-1] + [nameac]) data_diraudio = str.join( '/', FLAGS.init_checkpoint.split('/')[:-1] + [nameaudio]) data_dirimages = str.join( '/', FLAGS.init_checkpoint.split('/')[:-1] + [nameimages]) num_classes = FLAGS.num_classes temporal_pooling = FLAGS.temporal_pooling nr_frames = FLAGS.nr_frames random_pick = True build_spectrogram = (FLAGS.model == 'AudioCoefficients' or FLAGS.model == 'ResNet50' or FLAGS.model == 'HearNet' or FLAGS.model == 'UNet' or FLAGS.model == 'ResNet18_v1') normalize = FLAGS.model == 'HearNet' # Create data loaders according to the received program arguments print('{} - Creating data loaders'.format(datetime.now())) modalities = [] modalities.append(0) modalities.append(1) modalities.append(2) with tf.device('/cpu:0'): train_data = ActionsDataLoader(FLAGS.train_file, 'inference', batch_size=FLAGS.batch_size, num_epochs=1, sample_length=1, datakind='outdoor', buffer_size=10, shuffle=False, normalize=normalize, build_spectrogram=build_spectrogram, correspondence=0, random_pick=random_pick, modalities=modalities, nr_frames=FLAGS.nr_frames) data_size = train_data.num_samples # Build model print('{} - Building model'.format(datetime.now())) with tf.device('/gpu:0'): modelimages = UNet(input_shape=[224, 298, 3]) modelaudio = UNetSound(input_shape=[99, 257, 1]) modelac = UNetAc(input_shape=[36, 48, 12]) # model = UNetE(input_shape=[36, 48, 1]) handle = tf.placeholder(tf.string, shape=()) iterator = tf.data.Iterator.from_string_handle( handle, train_data.data.output_types, train_data.data.output_shapes) train_iterat = train_data.data.make_initializable_iterator() next_batch = iterator.get_next() logenergy = tf.slice(next_batch[0], [0, 0, 0, 0, 0], [-1, 1, 36, 48, 1]) logenergy = tf.reshape(logenergy, shape=[-1, 36, 48, 1]) mfcc = tf.reshape(next_batch[1], shape=[-1, 99, 257, 1]) images = tf.reshape(next_batch[2], shape=[-1, 224, 298, 3]) acoustic = tf.reshape(next_batch[0], shape=[-1, 36, 48, 12]) logenergy = logenergy - tf.reduce_min( logenergy, axis=[1, 2], keep_dims=True) logenergy = logenergy / tf.reduce_max( logenergy, axis=[1, 2], keep_dims=True) # mfcc = mfcc - tf.reduce_min(mfcc, axis=[1, 2], keep_dims=True) # mfcc = mfcc / tf.reduce_max(mfcc, axis=[1, 2], keep_dims=True) if FLAGS.datatype == 'music': num_actions = 9 num_locations = 11 # maximum number of videos for a class else: # self.datakind == 'outdoor': num_actions = 10 num_locations = 61 num_embedding = 128 labels = tf.reshape(next_batch[3], shape=[-1, num_actions]) scenario = tf.reshape(next_batch[4], shape=[-1, num_locations]) modelac._build_model(acoustic) modelaudio._build_model(mfcc) modelimages._build_model(images) # samples = tf.random_normal([tf.shape(model.variance)[0], tf.shape(model.variance)[1]], 0, 1, # dtype=tf.float32) # guessed_z = model.mean + (model.variance * samples) extractedac = modelac.network['features'] extractedaudio = modelaudio.network['features'] extractedvideo = modelimages.network['features'] #FLAGS.model == 'UNet' var_listac = slim.get_variables(modelac.scope + '/') var_listaudio = slim.get_variables(modelaudio.scope + '/') var_listimages = slim.get_variables(modelimages.scope + '/') if os.path.exists(data_dirac): print("Features already computed!") else: os.makedirs( data_dirac ) # mkdir creates one directory, makedirs all intermediate directories if os.path.exists(data_diraudio): print("Features already computed!") else: os.makedirs( data_diraudio ) # mkdir creates one directory, makedirs all intermediate directories if os.path.exists(data_dirimages): print("Features already computed!") else: os.makedirs( data_dirimages ) # mkdir creates one directory, makedirs all intermediate directories total_size = 0 batch_count = 0 dataset_list_featuresac = np.zeros([data_size, 9, 12, num_embedding], dtype=float) dataset_list_featuresaudio = np.zeros([data_size, 6, 16, num_embedding], dtype=float) dataset_list_featuresimages = np.zeros([data_size, 14, 18, num_embedding], dtype=float) dataset_labels = np.zeros([data_size, num_actions], dtype=int) dataset_scenario = np.zeros([data_size, num_locations], dtype=int) print('{} - Starting'.format(datetime.now())) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=tf.GPUOptions( allow_growth=True))) as session: train_handle = session.run(train_iterat.string_handle()) saver = tf.train.Saver(var_list=var_listac + var_listaudio + var_listimages) saver.restore(session, FLAGS.init_checkpoint) print('{} - Done'.format(datetime.now())) #variables_in_checkpoint = tf.train.list_variables('path.ckpt') session.run(train_iterat.initializer) while True: try: labels_data, scenario_data, featuresac, featuresaudio, featuresimages = session.run( [ labels, scenario, extractedac, extractedaudio, extractedvideo ], feed_dict={ handle: train_handle, modelac.network['keep_prob']: 1.0, modelac.network['is_training']: 0, modelaudio.network['keep_prob']: 1.0, modelaudio.network['is_training']: 0, modelimages.network['keep_prob']: 1.0, modelimages.network['is_training']: 0 }) batchnum = labels_data.shape[0] # copy block of data dataset_list_featuresimages[total_size:total_size + batchnum, :] = featuresimages dataset_list_featuresaudio[total_size:total_size + batchnum, :] = featuresaudio dataset_list_featuresac[total_size:total_size + batchnum, :] = featuresac dataset_labels[total_size:total_size + batchnum, :] = labels_data dataset_scenario[total_size:total_size + batchnum, :] = scenario_data # increase number of data total_size += batchnum end_time = datetime.now() print('{} samples'.format(total_size)) except tf.errors.OutOfRangeError: break batch_count += 1 print('{}'.format(data_size)) print('{} - Completed, got {} samples'.format(datetime.now(), total_size)) np.save('{}/{}_data.npy'.format(data_dirac, dataset), dataset_list_featuresac) np.save('{}/{}_labels.npy'.format(data_dirac, dataset), dataset_labels) np.save('{}/{}_scenario.npy'.format(data_dirac, dataset), dataset_scenario) np.save('{}/{}_data.npy'.format(data_diraudio, dataset), dataset_list_featuresaudio) np.save('{}/{}_labels.npy'.format(data_diraudio, dataset), dataset_labels) np.save('{}/{}_scenario.npy'.format(data_diraudio, dataset), dataset_scenario) np.save('{}/{}_data.npy'.format(data_dirimages, dataset), dataset_list_featuresimages) np.save('{}/{}_labels.npy'.format(data_dirimages, dataset), dataset_labels) np.save('{}/{}_scenario.npy'.format(data_dirimages, dataset), dataset_scenario)
def plotdecodeimages(): dataset = FLAGS.train_file.split('/')[-1] dataset = dataset.split('.')[0] s = FLAGS.init_checkpoint.split('/')[-1] name = (s.split('_')[1]).split('.ckpt')[0] name = '{}_{}_{}_{}'.format(FLAGS.model, dataset, 'Ac', name) data_dir = str.join('/', FLAGS.init_checkpoint.split('/')[:-1] + [name]) random_pick = True build_spectrogram = (FLAGS.model == 'AudioCoefficients' or FLAGS.model == 'ResNet50' or FLAGS.model == 'HearNet' or FLAGS.model == 'UNet' or FLAGS.model == 'ResNet18_v1') normalize = FLAGS.model == 'HearNet' # Create data loaders according to the received program arguments print('{} - Creating data loaders'.format(datetime.now())) modalities = [] modalities.append(0) modalities.append(1) modalities.append(2) with tf.device('/cpu:0'): train_data = ActionsDataLoader(FLAGS.train_file, 'inference', batch_size=FLAGS.batch_size, num_epochs=1, sample_length=1, datakind='outdoor', buffer_size=10, shuffle=False, normalize=normalize, build_spectrogram=build_spectrogram, correspondence=0, random_pick=random_pick, modalities=modalities, nr_frames=FLAGS.nr_frames) data_size = train_data.num_samples # Build model print('{} - Building model'.format(datetime.now())) with tf.device('/gpu:0'): modelimages = UNet(input_shape=[224, 298, 3]) modelaudio = UNetSound(input_shape=[99, 257, 1]) modelac = UNetAc(input_shape=[36, 48, 12]) if FLAGS.fusion: model_associator = JointTwomvae2() elif FLAGS.onlyaudiovideo: model_associator = JointTwomvae() else: model_associator = Jointmvae() handle = tf.placeholder(tf.string, shape=()) iterator = tf.data.Iterator.from_string_handle( handle, train_data.data.output_types, train_data.data.output_shapes) train_iterat = train_data.data.make_initializable_iterator() next_batch = iterator.get_next() mfcc = tf.reshape(next_batch[1], shape=[-1, 99, 257, 1]) mfcc = tf.image.resize_bilinear(mfcc, [193, 257], align_corners=False) video = tf.reshape(next_batch[2], shape=[-1, 224, 298, 3]) acoustic = tf.reshape(next_batch[0], shape=[-1, 36, 48, 12]) # mfcc = mfcc - tf.reduce_min(mfcc, axis=[1, 2], keep_dims=True) # mfcc = mfcc / tf.reduce_max(mfcc, axis=[1, 2], keep_dims=True) if FLAGS.datatype == 'music': num_actions = 9 num_locations = 11 # maximum number of videos for a class else: # self.datakind == 'outdoor': num_actions = 10 num_locations = 61 num_embedding = 128 labels = tf.reshape(next_batch[3], shape=[-1, num_actions]) scenario = tf.reshape(next_batch[4], shape=[-1, num_locations]) output = modelac._build_network(acoustic) outputvideo = modelimages._build_network(video) outputaudio = modelaudio._build_network(mfcc) # fuse feature maps and get new feature maps for 3 mod if FLAGS.fusion or FLAGS.onlyaudiovideo: model_associator._build_model(outputvideo, outputaudio) else: model_associator._build_model(output, outputvideo, outputaudio) if FLAGS.onlyaudiovideo: modelac._build_model(model_associator.outputac) else: modelac._build_model(model_associator.outputac) modelaudio._build_model(model_associator.outputaudio) modelimages._build_model(model_associator.outputvideo) #FLAGS.model == 'UNet' var_listac = slim.get_variables(modelac.scope + '/') var_listaudio = slim.get_variables(modelaudio.scope + '/') var_listimages = slim.get_variables(modelimages.scope + '/') var_listassociator = slim.get_variables(model_associator.scope + '/') if os.path.exists(data_dir): print("Features already computed!") else: os.makedirs( data_dir ) # mkdir creates one directory, makedirs all intermediate directories num = 0 total_size = 0 batch_count = 0 print('{} - Starting'.format(datetime.now())) namesimage = ['Acoustic image', 'Reconstructed'] with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=tf.GPUOptions( allow_growth=True))) as session: train_handle = session.run(train_iterat.string_handle()) saver = tf.train.Saver(var_list=var_listac + var_listaudio + var_listimages + var_listassociator) saver.restore(session, FLAGS.init_checkpoint) print('{} - Done'.format(datetime.now())) #variables_in_checkpoint = tf.train.list_variables('path.ckpt') session.run(train_iterat.initializer) while True: try: data, reconstructed = session.run( [acoustic, modelac.output], feed_dict={ handle: train_handle, modelac.keep_prob: 1.0, modelac.is_training: 0, modelaudio.keep_prob: 1.0, modelaudio.is_training: 0, modelimages.keep_prob: 1.0, modelimages.is_training: 0 }) batchnum = reconstructed.shape[0] # copy block of data # increase number of data total_size += batchnum print('{} samples'.format(total_size)) for h in range(np.shape(reconstructed)[0]): # original and reconstructed fig, axs = plt.subplots(4, 2, figsize=(6, 2.9 * 4)) plt.tight_layout(pad=1.0) fig.suptitle('Reconstructed image') imagesvideo = np.stack((data, reconstructed), 0) for i in range(2): for j in range(4): x = j y = i axs[x, y].imshow(imagesvideo[i, h, :, :, j * 3:(j + 1) * 3]) axs[x, y].axis('off') axs[x, y].set_title('{}'.format(namesimage[i])) outImage_path = '{}/{}_images_{}.png'.format( data_dir, dataset, num) plt.savefig(outImage_path) plt.clf() num = num + 1 except tf.errors.OutOfRangeError: break batch_count += 1 print('{}'.format(data_size)) print('{} - Completed, got {} samples'.format(datetime.now(), total_size))
def plotdecodeimages(): dataset = FLAGS.train_file.split('/')[-1] dataset = dataset.split('.')[0] s = FLAGS.init_checkpoint.split('/')[-1] name = (s.split('_')[1]).split('.ckpt')[0] name = '{}_{}_{}_{}'.format(FLAGS.model, dataset, FLAGS.encoder_type, name) data_dir = str.join('/', FLAGS.init_checkpoint.split('/')[:-1] + [name]) num_classes = FLAGS.num_classes temporal_pooling = FLAGS.temporal_pooling nr_frames = FLAGS.nr_frames random_pick = True build_spectrogram = (FLAGS.model == 'AudioCoefficients' or FLAGS.model == 'ResNet50' or FLAGS.model == 'HearNet' or FLAGS.model == 'UNet' or FLAGS.model == 'ResNet18_v1') normalize = FLAGS.model == 'HearNet' # Create data loaders according to the received program arguments print('{} - Creating data loaders'.format(datetime.now())) modalities = [] modalities.append(0) modalities.append(1) modalities.append(2) with tf.device('/cpu:0'): train_data = ActionsDataLoader(FLAGS.train_file, 'inference', batch_size=FLAGS.batch_size, num_epochs=1, sample_length=1, datakind='outdoor', buffer_size=10, shuffle=False, normalize=normalize, build_spectrogram=build_spectrogram, correspondence=0, random_pick=random_pick, modalities=modalities, nr_frames=FLAGS.nr_frames) # Build model print('{} - Building model'.format(datetime.now())) with tf.device('/gpu:0'): if FLAGS.encoder_type == 'Video': model = UNet(input_shape=[224, 298, 3]) elif FLAGS.encoder_type == 'Audio': model = UNetSound(input_shape=[99, 257, 1]) elif FLAGS.encoder_type == 'Ac': model = UNetAc(input_shape=[36, 48, 12]) else: model = UNetE(input_shape=[36, 48, 1]) handle = tf.placeholder(tf.string, shape=()) iterator = tf.data.Iterator.from_string_handle( handle, train_data.data.output_types, train_data.data.output_shapes) train_iterat = train_data.data.make_initializable_iterator() next_batch = iterator.get_next() logenergy = tf.slice(next_batch[0], [0, 0, 0, 0, 0], [-1, 1, 36, 48, 1]) logenergy = tf.reshape(logenergy, shape=[-1, 36, 48, 1]) mfcc = tf.reshape(next_batch[1], shape=[-1, 99, 257, 1]) mfcc = tf.image.resize_bilinear(mfcc, [193, 257], align_corners=False) images = tf.reshape(next_batch[2], shape=[-1, 224, 298, 3]) acoustic = tf.reshape(next_batch[0], shape=[-1, 36, 48, 12]) logenergy = logenergy - tf.reduce_min( logenergy, axis=[1, 2], keep_dims=True) logenergy = logenergy / tf.reduce_max( logenergy, axis=[1, 2], keep_dims=True) # mfcc = mfcc - tf.reduce_min(mfcc, axis=[1, 2], keep_dims=True) # mfcc = mfcc / tf.reduce_max(mfcc, axis=[1, 2], keep_dims=True) if FLAGS.encoder_type == 'Video': considered_modality = images elif FLAGS.encoder_type == 'Audio': considered_modality = mfcc elif FLAGS.encoder_type == 'Ac': considered_modality = acoustic else: considered_modality = logenergy model._build_model(considered_modality) #FLAGS.model == 'UNet' output = model.output var_list2 = slim.get_variables(model.scope + '/') if os.path.exists(data_dir): print("Features already computed!") else: os.makedirs( data_dir ) # mkdir creates one directory, makedirs all intermediate directories total_size = 0 batch_count = 0 num = 0 print('{} - Starting'.format(datetime.now())) if FLAGS.encoder_type == 'Video': namesimage = ['RGB', 'Reconstructed'] elif FLAGS.encoder_type == 'Audio': namesimage = ['Spectrogram', 'Reconstructed'] elif FLAGS.encoder_type == 'Ac': namesimage = ['Acoustic image', 'Reconstructed'] else: namesimage = ['Energy', 'Reconstructed'] with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=tf.GPUOptions( allow_growth=True))) as session: train_handle = session.run(train_iterat.string_handle()) # Initialize student model if FLAGS.init_checkpoint is None: print('{} - Initializing student model'.format(datetime.now())) model.init_model(session, FLAGS.init_checkpoint) print('{} - Done'.format(datetime.now())) else: print('{} - Restoring student model'.format(datetime.now())) saver = tf.train.Saver(var_list=var_list2) saver.restore(session, FLAGS.init_checkpoint) print('{} - Done'.format(datetime.now())) #variables_in_checkpoint = tf.train.list_variables('path.ckpt') session.run(train_iterat.initializer) if FLAGS.encoder_type == 'Audio' or FLAGS.encoder_type == 'Energy': while True: try: data, reconstructed = session.run( [considered_modality, output], feed_dict={ handle: train_handle, model.network['keep_prob']: 1.0, model.network['is_training']: 0 }) total_size += reconstructed.shape[0] for h in range(np.shape(reconstructed)[0]): # original and reconstructed fig, axs = plt.subplots(1, 2, figsize=(6, 2.9)) plt.tight_layout(pad=1.0) fig.suptitle('Reconstructed image') imagesvideo = np.stack((data, reconstructed), 0) for i in range(2): x = 0 y = i axs[y].imshow(imagesvideo[i, h, :, :, 0]) axs[y].axis('off') axs[y].set_title('{}'.format(namesimage[i])) outImage_path = '{}/{}_images_{}.png'.format( data_dir, dataset, num) plt.savefig(outImage_path) plt.clf() num = num + 1 print(total_size) except tf.errors.OutOfRangeError: break batch_count += 1 print('{} - Completed, got {} samples'.format( datetime.now(), total_size)) elif FLAGS.encoder_type == 'Video': while True: try: data, reconstructed = session.run( [considered_modality, output], feed_dict={ handle: train_handle, model.network['keep_prob']: 1.0, model.network['is_training']: 0 }) total_size += reconstructed.shape[0] for h in range(np.shape(reconstructed)[0]): # original and reconstructed fig, axs = plt.subplots(1, 2, figsize=(6, 2.9)) plt.tight_layout(pad=1.0) fig.suptitle('Reconstructed image') imagesvideo = np.stack((data, reconstructed), 0) for i in range(2): x = 0 y = i axs[y].imshow(imagesvideo[i, h, :, :, :]) axs[y].axis('off') axs[y].set_title('{}'.format(namesimage[i])) outImage_path = '{}/{}_images_{}.png'.format( data_dir, dataset, num) plt.savefig(outImage_path) plt.clf() num = num + 1 print(total_size) except tf.errors.OutOfRangeError: break batch_count += 1 print('{} - Completed, got {} samples'.format( datetime.now(), total_size)) else: while True: try: data, reconstructed = session.run( [considered_modality, output], feed_dict={ handle: train_handle, model.network['keep_prob']: 1.0, model.network['is_training']: 0 }) total_size += reconstructed.shape[0] for h in range(np.shape(reconstructed)[0]): # original and reconstructed fig, axs = plt.subplots(4, 2, figsize=(6, 2.9 * 4)) plt.tight_layout(pad=1.0) fig.suptitle('Reconstructed image') imagesvideo = np.stack((data, reconstructed), 0) for i in range(2): for j in range(4): x = j y = i axs[x, y].imshow(imagesvideo[i, h, :, :, j * 3:(j + 1) * 3]) axs[x, y].axis('off') axs[x, y].set_title('{}'.format(namesimage[i])) outImage_path = '{}/{}_images_{}.png'.format( data_dir, dataset, num) plt.savefig(outImage_path) plt.clf() num = num + 1 print(total_size) except tf.errors.OutOfRangeError: break batch_count += 1 print('{} - Completed, got {} samples'.format( datetime.now(), total_size))
def plotdecodeimages(): encoder_type = FLAGS.encoder_type dataset = FLAGS.train_file.split('/')[-1] dataset = dataset.split('.')[0] s = FLAGS.init_checkpoint.split('/')[-1] name = (s.split('_')[1]).split('.ckpt')[0] if FLAGS.fusion: name2 = '{}_Ac{}_{}'.format(dataset, 'VideoAudio', name) else: name2 = '{}_Ac{}_{}'.format(dataset, encoder_type, name) data_dir = str.join('/', FLAGS.init_checkpoint.split('/')[:-1] + [name2]) random_pick = True build_spectrogram = True normalize = False # Create data loaders according to the received program arguments print('{} - Creating data loaders'.format(datetime.now())) modalities = [] modalities.append(0) modalities.append(1) modalities.append(2) with tf.device('/cpu:0'): train_data = ActionsDataLoader(FLAGS.train_file, 'inference', batch_size=FLAGS.batch_size, num_epochs=1, sample_length=1, datakind='outdoor', buffer_size=10, shuffle=False, normalize=normalize, build_spectrogram=build_spectrogram, correspondence=0, random_pick=random_pick, modalities=modalities, nr_frames=FLAGS.nr_frames) data_size = train_data.num_samples # Build model print('{} - Building model'.format(datetime.now())) handle = tf.placeholder(tf.string, shape=()) iterator = tf.data.Iterator.from_string_handle( handle, train_data.data.output_types, train_data.data.output_shapes) train_iterat = train_data.data.make_initializable_iterator() next_batch = iterator.get_next() # logenergy = tf.slice(next_batch[0], [0, 0, 0, 0, 0], [-1, 1, 36, 48, 1]) # logenergy = tf.reshape(logenergy, shape=[-1, 36, 48, 1]) mfcc = tf.reshape(next_batch[1], shape=[-1, 99, 257, 1]) images = tf.reshape(next_batch[2], shape=[-1, 224, 298, 3]) acoustic = tf.reshape(next_batch[0], shape=[-1, 36, 48, 12]) # logenergy = logenergy - tf.reduce_min(logenergy, axis=[1, 2], keep_dims=True) # logenergy = logenergy / tf.reduce_max(logenergy, axis=[1, 2], keep_dims=True) # mfcc = mfcc - tf.reduce_min(mfcc, axis=[1, 2], keep_dims=True) # mfcc = mfcc / tf.reduce_max(mfcc, axis=[1, 2], keep_dims=True) if FLAGS.datatype == 'music': num_actions = 9 num_locations = 11 # maximum number of videos for a class else: # self.datakind == 'outdoor': num_actions = 10 num_locations = 61 num_embedding = 128 labels = tf.reshape(next_batch[3], shape=[-1, num_actions]) scenario = tf.reshape(next_batch[4], shape=[-1, num_locations]) with tf.device('/gpu:0'): modelac = UNetAc(input_shape=[36, 48, 12]) if FLAGS.fusion: modelimages = UNet(input_shape=[224, 298, 3]) modelimages._build_model(images) modelaudio = UNetSound(input_shape=[99, 257, 1]) modelaudio._build_model(mfcc) meanimages = modelimages.mean varianceimages = modelimages.variance meanaudio = modelaudio.mean varianceaudio = modelaudio.variance samples = tf.random_normal( [tf.shape(varianceimages)[0], tf.shape(varianceimages)[1]], 0, 1, dtype=tf.float32) z = meanimages + meanaudio + ( (varianceaudio + varianceimages) * samples) var_list = slim.get_variables(modelaudio.scope + '/') + slim.get_variables( modelimages.scope + '/') else: if FLAGS.encoder_type == 'Video': model = UNet(input_shape=[224, 298, 3]) model._build_model(images) elif FLAGS.encoder_type == 'Audio': model = UNetSound(input_shape=[99, 257, 1]) model._build_model(mfcc) mean = model.mean variance = model.variance samples = tf.random_normal( [tf.shape(variance)[0], tf.shape(variance)[1]], 0, 1, dtype=tf.float32) z = mean + (variance * samples) var_list = slim.get_variables(model.scope + '/') modelac._build_model(acoustic, z) output = modelac.output var_listac = slim.get_variables(modelac.scope + '/') if os.path.exists(data_dir): print("Features already computed!") else: os.makedirs( data_dir ) # mkdir creates one directory, makedirs all intermediate directories total_size = 0 batch_count = 0 num = 0 print('{} - Starting'.format(datetime.now())) namesimage = ['Acoustic image', 'Reconstructed'] with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=tf.GPUOptions( allow_growth=True))) as session: train_handle = session.run(train_iterat.string_handle()) saver = tf.train.Saver(var_list=var_listac + var_list) saver.restore(session, FLAGS.init_checkpoint) print('{} - Done'.format(datetime.now())) #variables_in_checkpoint = tf.train.list_variables('path.ckpt') session.run(train_iterat.initializer) if FLAGS.fusion: while True: try: data, reconstructed = session.run( [acoustic, output], feed_dict={ handle: train_handle, modelac.network['keep_prob']: 1.0, modelac.network['is_training']: 0, modelaudio.network['keep_prob']: 1.0, modelaudio.network['is_training']: 0, modelimages.network['keep_prob']: 1.0, modelimages.network['is_training']: 0 }) total_size += reconstructed.shape[0] for h in range(np.shape(reconstructed)[0]): # original and reconstructed fig, axs = plt.subplots(4, 2, figsize=(6, 2.9 * 4)) plt.tight_layout(pad=1.0) fig.suptitle('Reconstructed image') imagesvideo = np.stack((data, reconstructed), 0) for i in range(2): for j in range(4): x = j y = i axs[x, y].imshow(imagesvideo[i, h, :, :, j * 3:(j + 1) * 3]) axs[x, y].axis('off') axs[x, y].set_title('{}'.format(namesimage[i])) outImage_path = '{}/{}_images_{}.png'.format( data_dir, dataset, num) plt.savefig(outImage_path) plt.clf() num = num + 1 print('{} samples'.format(total_size)) except tf.errors.OutOfRangeError: break batch_count += 1 else: while True: try: data, reconstructed = session.run( [acoustic, output], feed_dict={ handle: train_handle, modelac.network['keep_prob']: 1.0, modelac.network['is_training']: 0, model.network['keep_prob']: 1.0, model.network['is_training']: 0 }) total_size += reconstructed.shape[0] for h in range(np.shape(reconstructed)[0]): # original and reconstructed fig, axs = plt.subplots(4, 2, figsize=(6, 2.9 * 4)) plt.tight_layout(pad=1.0) fig.suptitle('Reconstructed image') imagesvideo = np.stack((data, reconstructed), 0) for i in range(2): for j in range(4): x = j y = i axs[x, y].imshow(imagesvideo[i, h, :, :, j * 3:(j + 1) * 3]) axs[x, y].axis('off') axs[x, y].set_title('{}'.format(namesimage[i])) outImage_path = '{}/{}_images_{}.png'.format( data_dir, dataset, num) plt.savefig(outImage_path) plt.clf() num = num + 1 print('{} samples'.format(total_size)) except tf.errors.OutOfRangeError: break batch_count += 1 print('{}'.format(data_size)) print('{} - Completed, got {} samples'.format(datetime.now(), total_size))