Exemple #1
0
def discretize(M, w):
    X = np.zeros(M.shape, dtype='float32')
    for i in range(M.shape[0]):
        score = scale_score(M[i, :])
        which, = np.nonzero(score <= w)
        X[i, which] += 1
    for j in range(M.shape[0]):
        score = scale_score(M[:, j])
        which, = np.nonzero(score <= w)
        X[which, j] += 1
    return X
def report_statistics(id_sub, stats):
    records = stats['records']
    distance = records['distance']
    delta = records['delta']
    order = scale_score(distance)
    order = order / float(order.size)

    r = Report('stats-%s' % id_sub)
    r.data('records', records)
    f = r.figure()
    
    with f.plot('scatter') as pylab:
        pylab.scatter(delta, distance)
        pylab.xlabel('delta')
        pylab.ylabel('distance')
        pylab.axis((-1, np.max(delta) + 1, -0.05, np.max(distance)))
        
    with f.plot('with_stats', **dp_predstats_fig) as pylab:
        fancy_error_display(pylab, delta, distance, 'g')

    with f.plot('distance_order', **dp_predstats_fig) as pylab:
        fancy_error_display(pylab, delta, order, color='k')
        
    f = r.figure(cols=1)        
    bins = np.linspace(0, np.max(distance), 100)
    for i, d in enumerate(set(delta)):
        with f.plot('conditional%d' % i) as pylab:
            which = delta == d
            pylab.hist(distance[which], bins)

    return r
Exemple #3
0
def report_statistics(id_sub, stats):
    records = stats['records']
    distance = records['distance']
    delta = records['delta']
    order = scale_score(distance)
    order = order / float(order.size)

    r = Report('stats-%s' % id_sub)
    r.data('records', records)
    f = r.figure()

    with f.plot('scatter') as pylab:
        pylab.scatter(delta, distance)
        pylab.xlabel('delta')
        pylab.ylabel('distance')
        pylab.axis((-1, np.max(delta) + 1, -0.05, np.max(distance)))

    with f.plot('with_stats', **dp_predstats_fig) as pylab:
        fancy_error_display(pylab, delta, distance, 'g')

    with f.plot('distance_order', **dp_predstats_fig) as pylab:
        fancy_error_display(pylab, delta, order, color='k')

    f = r.figure(cols=1)
    bins = np.linspace(0, np.max(distance), 100)
    for i, d in enumerate(set(delta)):
        with f.plot('conditional%d' % i) as pylab:
            which = delta == d
            pylab.hist(distance[which], bins)

    return r
Exemple #4
0
    def get_S(self, dimensions=2, pub=None):
        similarity = self.get_similarity(self.statistic)
        if pub is not None:
            pub.array_as_image('similarity', similarity,
                               caption='Similarity statistic')
            plot_spectrum(pub, 'similarity', similarity)

        if self.scale_score:
            R = scale_score(similarity).astype('float32')
            R = R / R.max()
            if pub is not None:
                pub.array_as_image('scale_score', R)

        else:
            R = similarity

        D = 1 - R
        D = D * np.pi / D.max()
        np.fill_diagonal(D, 0)

        if pub is not None:
            #            pub.array_as_image('D', D)
            P = D * D
            B = double_center(P)
            #            plot_spectrum(pub, 'D', D)
            #            plot_spectrum(pub, 'P', P)
            plot_spectrum(pub, 'B', B)

        S = mds(D, ndim=dimensions)
#        S = inner_product_embedding(similarity, 3)
#        S = S[1:3, :]
        return S
def report_predstats(id_discdds, id_subset, id_distances, records):
    r = Report('predistats-%s-%s' % (id_discdds, id_subset))
    print records.dtype
    r.data('records', records)
    f = r.figure()
    
    colors = list(islice(cycle(['r', 'g', 'b', 'k', 'y', 'm']), 50))
    delta = records['delta']
    W = 0.2
#    pdb.set_trace()
    # Save the raw values
    for i, id_d in enumerate(id_distances):
        r.data(id_d, records[id_d])
    
    with f.plot('values_order', **dp_predstats_fig) as pylab:
        ax = pylab.subplot(111)

        for i, id_d in enumerate(id_distances):
            distance = records[id_d]
            distance_order = scale_score(distance) / (float(distance.size) - 1)
            
            step = float(i) / max(len(id_distances) - 1, 1)
            xstep = W * 2 * (step - 0.5) 
            fancy_error_display(ax, delta + xstep, distance_order,
                                colors[i], perc=10, label=id_d)
            
        ieee_spines(pylab)    
        ticks = sorted(list(set(list(delta))))
        pylab.xlabel('interval length')
        pylab.ylabel('normalized distance')
        pylab.xticks(ticks, ticks)
        pylab.yticks((0, 1), (0, 1))
        pylab.axis((0.5, 0.5 + np.max(delta), -0.024, 1.2))
        legend_put_below(ax)

    with f.plot('values', **dp_predstats_fig) as pylab:
        ax = pylab.subplot(111)

        for i, id_d in enumerate(id_distances):
            distance = records[id_d]
            
            step = float(i) / max(len(id_distances) - 1, 1)
            xstep = W * 2 * (step - 0.5) 
            fancy_error_display(ax, delta + xstep, distance,
                                colors[i], perc=10, label=id_d)
            
        ieee_spines(pylab)    
        ticks = sorted(list(set(list(delta))))
        pylab.xlabel('interval length')
        pylab.ylabel('distance')
        pylab.xticks(ticks, ticks)
#        pylab.yticks((0, 1), (0, 1))
        a = pylab.axis()
        pylab.axis((0.5, 0.5 + np.max(delta), -0.024, a[3]))
        legend_put_below(ax)

    return r
def check_scale_score_variants_test():
    print('Original scale_score')
    v = data['v5']
    x0 = scale_score(v)
    check_scale_score(v, x0)
    
    for id_algo, algo in algos.items():
        print('Variant %s' % id_algo)
        x = algo(v)
        check_scale_score(v, x)
        assert_allclose(x, x0)
def report_statistics_all(id_sub, stats, perc=10, W=0.2):
    records = stats['records']

    r = Report('statsall-%s' % id_sub)
    r.data('records', records)
    f = r.figure()
    
    id_distances = sorted(set(records['id_distance']))
        
    logger.info('%s: %s %s reo %s' % (id_sub, len(stats), id_distances,
                                      len(records)))

    colors = list(islice(cycle(['r', 'g', 'b', 'k', 'y', 'm']), 50))
    

    with f.plot('distance_order', **dp_predstats_fig) as pylab:
        ax = pylab.subplot(111)
        for i, id_d in enumerate(id_distances):
            which = records['id_distance'] == id_d
            delta = records[which]['delta']
            distance = records[which]['distance']
            order = scale_score(distance)
            order = order / float(order.size)

            
            step = float(i) / (max(len(id_distances) - 1, 1))
            xstep = W * 2 * (step - 0.5) 
            fancy_error_display(ax, delta + xstep, order,
                                colors[i], perc=perc, label=id_d)
            
        ieee_spines(pylab)    
        ticks = sorted(list(set(list(delta))))
        pylab.xlabel('plan length')
        pylab.ylabel('normalized distance')
        pylab.xticks(ticks, ticks)
        pylab.yticks((0, 1), (0, 1))
        pylab.axis((0.5, 0.5 + np.max(delta), -0.024, 1.2))
        legend_put_below(ax)

    with f.plot('distance', **dp_predstats_fig) as pylab:
        ax = pylab.subplot(111)
        for i, id_d in enumerate(id_distances):
            which = records['id_distance'] == id_d
            delta = records[which]['delta']
            distance = records[which]['distance']

            step = float(i) / max(len(id_distances) - 1, 1)
            xstep = W * 2 * (step - 0.5) 
            fancy_error_display(ax, delta + xstep, distance,
                                colors[i], perc=perc, label=id_d)
            
        ieee_spines(pylab)    
        ticks = sorted(list(set(list(delta))))
        pylab.xlabel('plan length')
        pylab.ylabel('distance')
        pylab.xticks(ticks, ticks)
#        pylab.yticks((0, 1), (0, 1))
        a = pylab.axis()
        pylab.axis((0.5, 0.5 + np.max(delta), -0.024, a[3]))
        legend_put_below(ax)

    return r
def report_predstats(id_discdds, id_subset, id_distances, records):
    r = Report('predistats-%s-%s' % (id_discdds, id_subset))
    print records.dtype
    r.data('records', records)
    f = r.figure()

    colors = list(islice(cycle(['r', 'g', 'b', 'k', 'y', 'm']), 50))
    delta = records['delta']
    W = 0.2
    #    pdb.set_trace()
    # Save the raw values
    for i, id_d in enumerate(id_distances):
        r.data(id_d, records[id_d])

    with f.plot('values_order', **dp_predstats_fig) as pylab:
        ax = pylab.subplot(111)

        for i, id_d in enumerate(id_distances):
            distance = records[id_d]
            distance_order = scale_score(distance) / (float(distance.size) - 1)

            step = float(i) / max(len(id_distances) - 1, 1)
            xstep = W * 2 * (step - 0.5)
            fancy_error_display(ax,
                                delta + xstep,
                                distance_order,
                                colors[i],
                                perc=10,
                                label=id_d)

        ieee_spines(pylab)
        ticks = sorted(list(set(list(delta))))
        pylab.xlabel('interval length')
        pylab.ylabel('normalized distance')
        pylab.xticks(ticks, ticks)
        pylab.yticks((0, 1), (0, 1))
        pylab.axis((0.5, 0.5 + np.max(delta), -0.024, 1.2))
        legend_put_below(ax)

    with f.plot('values', **dp_predstats_fig) as pylab:
        ax = pylab.subplot(111)

        for i, id_d in enumerate(id_distances):
            distance = records[id_d]

            step = float(i) / max(len(id_distances) - 1, 1)
            xstep = W * 2 * (step - 0.5)
            fancy_error_display(ax,
                                delta + xstep,
                                distance,
                                colors[i],
                                perc=10,
                                label=id_d)

        ieee_spines(pylab)
        ticks = sorted(list(set(list(delta))))
        pylab.xlabel('interval length')
        pylab.ylabel('distance')
        pylab.xticks(ticks, ticks)
        #        pylab.yticks((0, 1), (0, 1))
        a = pylab.axis()
        pylab.axis((0.5, 0.5 + np.max(delta), -0.024, a[3]))
        legend_put_below(ax)

    return r
from bootstrapping_olympics.utils import assert_allclose
import numpy as np
from boot_agents.utils import scale_score_scipy
import itertools
from bootstrapping_olympics.utils.in_a_while import InAWhile




algos = {
    #'_argsort': lambda x: np.argsort(x),
    'scipy': lambda x: scale_score_scipy(x),
#    'quick2': lambda x: scale_score2(x, 'quicksort'),
#    'merge2': lambda x: scale_score2(x, 'mergesort'),
#    'heapsort2': lambda x: scale_score2(x, 'heapsort'),
    'quick-quick': lambda x: scale_score(x, 'quicksort', 'quicksort'),
    'quick-merge': lambda x: scale_score(x, 'quicksort', 'mergesort'),
    'quick-heap': lambda x: scale_score(x, 'quicksort', 'heapsort'),
    'merge-quick': lambda x: scale_score(x, 'mergesort', 'quicksort'),
    'merge-merge': lambda x: scale_score(x, 'mergesort', 'mergesort'),
    'merge-heap': lambda x: scale_score(x, 'mergesort', 'heapsort'),
    'heap-quick': lambda x: scale_score(x, 'heapsort', 'quicksort'),
    'heap-merge': lambda x: scale_score(x, 'heapsort', 'mergesort'),
    'heap-heap': lambda x: scale_score(x, 'heapsort', 'heapsort'),
}

data = {
        'v5': np.random.rand(5).astype('float32'),
        'v10': np.random.rand(10, 10).astype('float32'),
        'v50': np.random.rand(50, 50).astype('float32'),
        'v100': np.random.rand(100, 100).astype('float32'),
Exemple #10
0
def report_statistics_all(id_sub, stats, perc=10, W=0.2):
    records = stats['records']

    r = Report('statsall-%s' % id_sub)
    r.data('records', records)
    f = r.figure()

    id_distances = sorted(set(records['id_distance']))

    logger.info('%s: %s %s reo %s' %
                (id_sub, len(stats), id_distances, len(records)))

    colors = list(islice(cycle(['r', 'g', 'b', 'k', 'y', 'm']), 50))

    with f.plot('distance_order', **dp_predstats_fig) as pylab:
        ax = pylab.subplot(111)
        for i, id_d in enumerate(id_distances):
            which = records['id_distance'] == id_d
            delta = records[which]['delta']
            distance = records[which]['distance']
            order = scale_score(distance)
            order = order / float(order.size)

            step = float(i) / (max(len(id_distances) - 1, 1))
            xstep = W * 2 * (step - 0.5)
            fancy_error_display(ax,
                                delta + xstep,
                                order,
                                colors[i],
                                perc=perc,
                                label=id_d)

        ieee_spines(pylab)
        ticks = sorted(list(set(list(delta))))
        pylab.xlabel('plan length')
        pylab.ylabel('normalized distance')
        pylab.xticks(ticks, ticks)
        pylab.yticks((0, 1), (0, 1))
        pylab.axis((0.5, 0.5 + np.max(delta), -0.024, 1.2))
        legend_put_below(ax)

    with f.plot('distance', **dp_predstats_fig) as pylab:
        ax = pylab.subplot(111)
        for i, id_d in enumerate(id_distances):
            which = records['id_distance'] == id_d
            delta = records[which]['delta']
            distance = records[which]['distance']

            step = float(i) / max(len(id_distances) - 1, 1)
            xstep = W * 2 * (step - 0.5)
            fancy_error_display(ax,
                                delta + xstep,
                                distance,
                                colors[i],
                                perc=perc,
                                label=id_d)

        ieee_spines(pylab)
        ticks = sorted(list(set(list(delta))))
        pylab.xlabel('plan length')
        pylab.ylabel('distance')
        pylab.xticks(ticks, ticks)
        #        pylab.yticks((0, 1), (0, 1))
        a = pylab.axis()
        pylab.axis((0.5, 0.5 + np.max(delta), -0.024, a[3]))
        legend_put_below(ax)

    return r