def compress_seq_gibbs(passes=PASSES): ''' Compress image via sequentiall-initialized gibbs sampling. ''' assert passes >= 1 assert os.path.exists(SAMPLES), 'first create dataset' print 'seq+gibbs start {} passes'.format(passes) model = ImageModel() mixture = ImageModel.Mixture() mixture.init(model) scores = numpy.zeros(1, dtype=numpy.float32) assignments = {} for i, xy in enumerate(json_stream_load(SAMPLES)): scores.resize(len(mixture)) mixture.score_value(model, xy, scores) groupid = sample_discrete_log(scores) mixture.add_value(model, groupid, xy) assignments[i] = mixture.id_tracker.packed_to_global(groupid) print 'seq+gibbs init with {} components'.format(len(mixture)) for _ in xrange(passes - 1): for i, xy in enumerate(json_stream_load(SAMPLES)): groupid = mixture.id_tracker.global_to_packed(assignments[i]) mixture.remove_value(model, groupid, xy) scores.resize(len(mixture)) mixture.score_value(model, xy, scores) groupid = sample_discrete_log(scores) mixture.add_value(model, groupid, xy) assignments[i] = mixture.id_tracker.packed_to_global(groupid) print 'seq+gibbs found {} components'.format(len(mixture)) image = synthesize_image(model, mixture) scipy.misc.imsave(os.path.join(RESULTS, 'seq_gibbs.png'), image)
def get_subprobs(size, max_size): ''' Compute probabilities of shapes of partial assignment vectors. Inputs: size = sample_size max_size = dataset_size Returns: dict : shape -> prob ''' assert 0 <= size assert size <= max_size cache_file = '{}/subprobs.{}.{}.json.bz2'.format(TEMP, size, max_size) if cache_file not in CACHE: if os.path.exists(cache_file): flat = json_stream_load(cache_file) small_probs = {tuple(key): val for key, val in flat} else: if size == max_size: small_probs = get_probs(size) else: small_counts = get_counts(size) large_counts = get_counts(size + 1) large_probs = get_subprobs(size + 1, max_size) small_probs = get_smaller_probs( small_counts, large_counts, large_probs) print 'caching', cache_file json_stream_dump(small_probs.iteritems(), cache_file) CACHE[cache_file] = small_probs return CACHE[cache_file]
def get_counts(size): ''' Count partition shapes of a given sample size. Inputs: size = sample_size Returns: dict : shape -> count ''' assert 0 <= size cache_file = '{}/counts.{}.json.bz2'.format(TEMP, size) if cache_file not in CACHE: if os.path.exists(cache_file): flat = json_stream_load(cache_file) large = {tuple(key): val for key, val in flat} else: if size == 0: large = {(): 1.0} else: small = get_counts(size - 1) large = get_larger_counts(small) print 'caching', cache_file json_stream_dump(large.iteritems(), cache_file) CACHE[cache_file] = large return CACHE[cache_file]
def create_dataset(sample_count=SAMPLE_COUNT): ''' Extract dataset from image. ''' scipy.misc.imsave(os.path.join(RESULTS, 'original.png'), IMAGE) print 'sampling {} points from image'.format(sample_count) samples = sample_from_image(IMAGE, sample_count) json_stream_dump(samples, SAMPLES) image = visualize_dataset(json_stream_load(SAMPLES)) scipy.misc.imsave(os.path.join(RESULTS, 'samples.png'), image)
def compress_sequential(): ''' Compress image via sequential initialization. ''' assert os.path.exists(SAMPLES), 'first create dataset' print 'sequential start' model = ImageModel() mixture = ImageModel.Mixture() mixture.init(model) scores = numpy.zeros(1, dtype=numpy.float32) for xy in json_stream_load(SAMPLES): scores.resize(len(mixture)) mixture.score_value(model, xy, scores) groupid = sample_discrete_log(scores) mixture.add_value(model, groupid, xy) print 'sequential found {} components'.format(len(mixture)) image = synthesize_image(model, mixture) scipy.misc.imsave(os.path.join(RESULTS, 'sequential.png'), image)
def json_loop_load(filename): while True: for i, item in enumerate(json_stream_load(filename)): yield i, item