def get_subprobs(size, max_size): ''' Compute probabilities of shapes of partial assignment vectors. Inputs: size = sample_size max_size = dataset_size Returns: dict : shape -> prob ''' assert 0 <= size assert size <= max_size cache_file = '{}/subprobs.{}.{}.json.bz2'.format(TEMP, size, max_size) if cache_file not in CACHE: if os.path.exists(cache_file): flat = json_stream_load(cache_file) small_probs = {tuple(key): val for key, val in flat} else: if size == max_size: small_probs = get_probs(size) else: small_counts = get_counts(size) large_counts = get_counts(size + 1) large_probs = get_subprobs(size + 1, max_size) small_probs = get_smaller_probs( small_counts, large_counts, large_probs) print 'caching', cache_file json_stream_dump(small_probs.iteritems(), cache_file) CACHE[cache_file] = small_probs return CACHE[cache_file]
def get_counts(size): ''' Count partition shapes of a given sample size. Inputs: size = sample_size Returns: dict : shape -> count ''' assert 0 <= size cache_file = '{}/counts.{}.json.bz2'.format(TEMP, size) if cache_file not in CACHE: if os.path.exists(cache_file): flat = json_stream_load(cache_file) large = {tuple(key): val for key, val in flat} else: if size == 0: large = {(): 1.0} else: small = get_counts(size - 1) large = get_larger_counts(small) print 'caching', cache_file json_stream_dump(large.iteritems(), cache_file) CACHE[cache_file] = large return CACHE[cache_file]
def create_dataset(sample_count=SAMPLE_COUNT): ''' Extract dataset from image. ''' scipy.misc.imsave(os.path.join(RESULTS, 'original.png'), IMAGE) print 'sampling {} points from image'.format(sample_count) samples = sample_from_image(IMAGE, sample_count) json_stream_dump(samples, SAMPLES) image = visualize_dataset(json_stream_load(SAMPLES)) scipy.misc.imsave(os.path.join(RESULTS, 'samples.png'), image)