def flatten_hierarchic_model(root_gmm, model_folder): num_comps = len(root_gmm.components) all_comps = {} for i in range(num_comps): comp_gmm = mfa.MFA() comp_folder = os.path.join(model_folder, 'hierarchic_model', 'comp_{}'.format(i)) comp_gmm.load(os.path.join(comp_folder, 'final_gmm')) num_sub_comps = len(comp_gmm.components) for j in range(num_sub_comps): comp_num = len(all_comps) all_comps[comp_num] = comp_gmm.components[j] all_comps[comp_num]['pi'] *= root_gmm.components[i]['pi'] print('Component', i, '/', j, 'pi=', all_comps[comp_num]['pi']) flat_gmm = mfa.MFA(all_comps) total_pi = sum([c['pi'] for c in flat_gmm.components.values()]) assert abs(total_pi-1.0) < 1e-5 flat_gmm.components[0]['pi'] = 1.0 - (total_pi - flat_gmm.components[0]['pi']) flat_gmm.save(os.path.join(model_folder, 'final_flat_model')) print('Total number of components:', len(flat_gmm.components))
def main(argv): parser = argparse.ArgumentParser() parser.add_argument('--dataset_dir', default='../../Datasets/CelebA/img_align_celeba') parser.add_argument('--output_dir', help='Parent directory for storing all trained models', default='./restuls') parser.add_argument('--num_components', help='Number of (root level) mixture components', default=200) parser.add_argument( '--samples_per_sub_component', help= 'For hierarchic (two-level) training, target number of samples per final component', default=400) parser.add_argument('--latent_dimension', help='Dimension of input factors z', default=10) args = parser.parse_args() image_shape = (64, 64) batch_size = 256 test_size = batch_size * 10 image_provider = image_batch_provider.ImageBatchProvider( args.dataset_dir, output_size=image_shape, crop_bbox=(25, 50, 128, 128), flatten=True, batch_size=batch_size, list_file=os.path.join(args.dataset_dir, '../list_eval_partition.txt')) output_folder = os.path.join( args.output_dir, 'celeba_mfa_{}c_{}l'.format(args.num_components, args.latent_dimension)) print('Running MFA Teaining. Output folder is', output_folder) os.makedirs(output_folder, exist_ok=True) if not os.path.isfile(os.path.join(output_folder, 'final_gmm.pkl')): gmm_model = mfa_sgd_training.train( num_components=args.num_components, latent_dimension=args.latent_dimension, out_folder=output_folder, image_shape=image_shape, init_method='km', image_provider=image_provider, batch_size=batch_size, test_size=test_size, learning_rate=5e-5, max_iters=10000) else: print('Loading pre-trained root model...') gmm_model = mfa.MFA() gmm_model.load(os.path.join(output_folder, 'final_gmm')) # Hierarchic training... if args.samples_per_sub_component > 0: print('Now splitting each root component to sub-components...') if not os.path.isdir(os.path.join(output_folder, 'component_lists')): split_data_by_model_components(gmm_model, output_folder, image_provider, image_shape, batch_size) for comp_num in range(args.num_components): list_file = os.path.join(output_folder, 'component_lists', 'comp_{}.txt'.format(comp_num)) comp_image_provider = image_batch_provider.ImageBatchProvider( args.dataset_dir, output_size=image_shape, crop_bbox=(25, 50, 128, 128), flatten=True, batch_size=batch_size, mirror=False, list_file=list_file) comp_out_folder = os.path.join(output_folder, 'hierarchic_model', 'comp_{}'.format(comp_num)) if os.path.isfile(os.path.join(comp_out_folder, 'final_gmm.pkl')): print('Skipping component {} - already learned.'.format( comp_num)) else: os.makedirs(comp_out_folder, exist_ok=True) num_sub_comps = comp_image_provider.num_train_images // args.samples_per_sub_component if num_sub_comps < 2: print('No sub-components for component number {}.'.format( comp_num)) comp_gmm = mfa.MFA({0: gmm_model.components[comp_num]}) comp_gmm.components[0]['pi'] = 1.0 comp_gmm.save(os.path.join(comp_out_folder, 'final_gmm')) else: print( 'Training {} sub-components for root component {}...'. format(num_sub_comps, comp_num)) for tries in range(3): try: mfa_sgd_training.train( num_components=num_sub_comps, latent_dimension=args.latent_dimension, out_folder=comp_out_folder, image_shape=image_shape, init_method='km', image_provider=comp_image_provider, batch_size=batch_size, test_size=comp_image_provider.num_test_images, learning_rate=5e-5, max_iters=5000) except: print('An error occured.') else: break else: print('Training of component {} failed!!!'.format( comp_num)) print('Creating the final flat model...') flatten_hierarchic_model(gmm_model, output_folder) print('Done')
def main(argv): parser = argparse.ArgumentParser() parser.add_argument('--dataset_dir', default='../../Datasets/CelebA/img_align_celeba') parser.add_argument('--output_dir', help='Parent directory for storing all trained models', default='./restuls') parser.add_argument('--ndb_dir', help='NDB cache directory', default='./restuls/celeba_ndb_cache') args = parser.parse_args() model_name = 'celeba_mfa_200c_10l' model_dir = os.path.join(args.output_dir, model_name) image_shape = (64, 64) num_train = 80000 num_test = 20000 # Load the pre-trained model (run mfa_train_celeba.py first to train) gmm_model = mfa.MFA() gmm_model.load(os.path.join(model_dir, 'final_flat_model')) print( 'Loaded MFA model with {} components, data and and latent dimensions of {}' .format(len(gmm_model.components), gmm_model.components[0]['A'].shape)) # First generate some random mosaics - just for fun print('Generating mosaic images...') mosaic_dir = os.path.join(model_dir, 'final_flat_mosaic') os.makedirs(mosaic_dir, exist_ok=True) for i in range(10): samples = gmm_model.draw_samples(16 * 9, add_noise=False) images = mfa_utils.to_images(samples, image_shape[0], image_shape[1]) scipy.misc.imsave(os.path.join(mosaic_dir, '{}.jpeg'.format(i)), mfa_utils.images_to_mosaic(images)) # Now generate images for evaluation print('Generating {} random images for evaluation...'.format(num_test)) samples = gmm_model.draw_samples(num_test, add_noise=False) output_dir = os.path.join(model_dir, 'final_flat_generated') os.makedirs(output_dir, exist_ok=True) for i in range(num_test): image = mfa_utils.to_image_8u(samples[i], image_shape[0], image_shape[1]) imageio.imwrite(os.path.join(output_dir, '{}.png'.format(i)), image) # Perform NDB evaluation of the trained model image_provider = image_batch_provider.ImageBatchProvider( args.dataset_dir, output_size=image_shape, crop_bbox=(25, 50, 128, 128), flatten=True, list_file=os.path.join(args.dataset_dir, '../list_eval_partition.txt')) print('Reading train samples') train_samples = image_provider.get_random_samples(num_train) os.makedirs(args.ndb_dir, exist_ok=True) images_folder = os.path.join(model_dir, 'final_flat_generated') mfa_provider = image_batch_provider.ImageBatchProvider(images_folder, flatten=True, mirror=False, test_set_ratio=0) for num_bins in (100, 200, 300): print('Performing NDB evaluation for K={}'.format(num_bins)) # Initializng NDB celeba_ndb = ndb.NDB(training_data=train_samples, number_of_bins=num_bins, whitening=True, cache_folder=args.ndb_dir) # Evaluating MFA samples celeba_ndb.evaluate(mfa_provider.get_random_samples(num_test), model_label=model_name)
def main(argv): parser = argparse.ArgumentParser() parser.add_argument('--dataset_dir', default='../../Datasets/MNIST') parser.add_argument('--output_dir', help='Parent directory for storing all trained models', default='./restuls') parser.add_argument('--ndb_dir', help='NDB cache directory', default='./restuls/mnist_ndb_cache') args = parser.parse_args() model_name = 'mnist_mfa_150c_5l' model_dir = os.path.join(args.output_dir, model_name) image_shape = (28, 28, 1) num_train = 50000 num_test = 10000 # Load the pre-trained model (run mfa_train_mnist.py first to train) gmm_model = mfa.MFA() gmm_model.load(os.path.join(model_dir, 'final_gmm')) print( 'Loaded MFA model with {} components, data and and latent dimensions of {}' .format(len(gmm_model.components), gmm_model.components[0]['A'].shape)) # First generate some random mosaics - just for fun print('Generating mosaic images...') mosaic_dir = os.path.join(model_dir, 'final_mosaic') os.makedirs(mosaic_dir, exist_ok=True) for i in range(10): samples = gmm_model.draw_samples(16 * 9, add_noise=False) images = mfa_utils.to_images(samples, *image_shape) scipy.misc.imsave(os.path.join(mosaic_dir, '{}.jpeg'.format(i)), mfa_utils.images_to_mosaic(images)) # Now generate images for evaluation print('Generating {} random images for evaluation...'.format(num_test)) samples = gmm_model.draw_samples(num_test, add_noise=False) samples = np.maximum(0.0, np.minimum(1.0, samples)) output_dir = os.path.join(model_dir, 'old_generated') os.makedirs(output_dir, exist_ok=True) for i in range(num_test): image = mfa_utils.to_image_8u(samples[i], *image_shape) imageio.imwrite(os.path.join(output_dir, '{}.png'.format(i)), image) # Perform NDB evaluation of the trained model image_provider = image_batch_provider.ImageBatchProvider(args.dataset_dir, flatten=True, batch_size=512, mirror=False) print('Reading train samples') train_samples = image_provider.get_random_samples(num_train) os.makedirs(args.ndb_dir, exist_ok=True) images_folder = os.path.join(model_dir, 'old_generated') mfa_provider = image_batch_provider.ImageBatchProvider(images_folder, flatten=True, mirror=False, test_set_ratio=0, read_as_gray=True) for num_bins in (50, 100, 200): print('Performing NDB evaluation for K={}'.format(num_bins)) # Initializng NDB mnist_ndb = ndb.NDB(training_data=train_samples, number_of_bins=num_bins, whitening=False, cache_folder=args.ndb_dir) # Evaluating MFA samples mnist_ndb.evaluate(mfa_provider.get_random_samples(num_test), model_label=model_name)