def get_subprobs(size, max_size):
    '''
    Compute probabilities of shapes of partial assignment vectors.

    Inputs:
        size = sample_size
        max_size = dataset_size
    Returns:
        dict : shape -> prob
    '''
    assert 0 <= size
    assert size <= max_size
    cache_file = '{}/subprobs.{}.{}.json.bz2'.format(TEMP, size, max_size)
    if cache_file not in CACHE:
        if os.path.exists(cache_file):
            flat = json_stream_load(cache_file)
            small_probs = {tuple(key): val for key, val in flat}
        else:
            if size == max_size:
                small_probs = get_probs(size)
            else:
                small_counts = get_counts(size)
                large_counts = get_counts(size + 1)
                large_probs = get_subprobs(size + 1, max_size)
                small_probs = get_smaller_probs(
                    small_counts,
                    large_counts,
                    large_probs)
            print 'caching', cache_file
            json_stream_dump(small_probs.iteritems(), cache_file)
        CACHE[cache_file] = small_probs
    return CACHE[cache_file]
def get_counts(size):
    '''
    Count partition shapes of a given sample size.

    Inputs:
        size = sample_size
    Returns:
        dict : shape -> count
    '''
    assert 0 <= size
    cache_file = '{}/counts.{}.json.bz2'.format(TEMP, size)
    if cache_file not in CACHE:
        if os.path.exists(cache_file):
            flat = json_stream_load(cache_file)
            large = {tuple(key): val for key, val in flat}
        else:
            if size == 0:
                large = {(): 1.0}
            else:
                small = get_counts(size - 1)
                large = get_larger_counts(small)
            print 'caching', cache_file
            json_stream_dump(large.iteritems(), cache_file)
        CACHE[cache_file] = large
    return CACHE[cache_file]
Example #3
0
def create_dataset(sample_count=SAMPLE_COUNT):
    '''
    Extract dataset from image.
    '''
    scipy.misc.imsave(os.path.join(RESULTS, 'original.png'), IMAGE)
    print 'sampling {} points from image'.format(sample_count)
    samples = sample_from_image(IMAGE, sample_count)
    json_stream_dump(samples, SAMPLES)
    image = visualize_dataset(json_stream_load(SAMPLES))
    scipy.misc.imsave(os.path.join(RESULTS, 'samples.png'), image)
Example #4
0
def create_dataset(sample_count=SAMPLE_COUNT):
    '''
    Extract dataset from image.
    '''
    scipy.misc.imsave(os.path.join(RESULTS, 'original.png'), IMAGE)
    print 'sampling {} points from image'.format(sample_count)
    samples = sample_from_image(IMAGE, sample_count)
    json_stream_dump(samples, SAMPLES)
    image = visualize_dataset(json_stream_load(SAMPLES))
    scipy.misc.imsave(os.path.join(RESULTS, 'samples.png'), image)