Example #1
0
def graph_rocchio(fname):
    with open(fname) as f:
        results = pickle.load(f)
        for percent in results.keys():
            x = []
            y = []
            z = []
            for alpha in results[percent]:
                for beta in results[percent][alpha]:
                    x.append(alpha)
                    y.append(beta)
                    recalls = results[percent][alpha][beta]['recalls']
                    z.append(sum(recalls) / len(recalls))

            fig = plt.figure()
            ax = fig.gca(projection='3d')
            xi = np.linspace(min(x), max(x))
            yi = np.linspace(min(y), max(y))
            X, Y = np.meshgrid(xi, yi)
            Z = griddata(x, y, z, xi, yi)

            surf = ax.plot_surface(X, Y, Z, rstride=5, cstride=5, cmap=cm.jet,
                                   linewidth=1, antialiased=True)
            ax.set_zlim3d(np.min(Z), np.max(Z))
            fig.colorbar(surf)

            ax.set_xlabel('alpha')
            ax.set_ylabel('beta')
            ax.set_zlabel('recall')

            plt.show()
Example #2
0
def submit_time_histogram(arr):
    """
    Use Matplotlib to plot a normalized histogram of submit times
    """
    from math import ceil, log
    try:
        import matplotlib.mlab as mlab
        from prettyplotlib import plt
    except ImportError:
        print(
            'You must have Matplotlib and Prettyplotlib installed to plot a histogram.'
        )

    # Use Sturges' formula for number of bins: k = ceiling(log2 n + 1)
    k = ceil(log(len(arr), 2) + 1)
    n, bins, patches = plt.hist(arr,
                                k,
                                normed=1,
                                facecolor='green',
                                alpha=0.75)
    # throw a PDF plot on top of it
    #y = mlab.normpdf(bins, np.mean(arr), np.std(arr))
    #l = plt.plot(bins, y, 'r--', linewidth=1)

    # Get a Bayesian confidence interval for mean, variance, standard deviation
    dmean, dvar, dsd = bayes_mvs(deltas)

    # drop a line in at the mean for fun
    plt.axvline(dmean[0], color='blue', alpha=0.5)
    plt.axvspan(dmean[1][0], dmean[1][1], color='blue', alpha=0.5)
    plt.axvline(np.median(deltas), color='y', alpha=0.5)

    # Caclulate a Kernel Density Estimate
    density = gaussian_kde(deltas)
    xs = np.arange(0., np.max(deltas), 0.1)
    density.covariance_factor = lambda: .25
    density._compute_covariance()
    plt.plot(xs, density(xs), color='m')

    #FIXME: come up with better legend names
    #plt.legend(('Normal Curve', 'Mean', 'Median', 'KDE'))
    plt.legend(('Mean', 'Median', 'KDE'))

    plt.xlabel('Submit Times (in Seconds)')
    plt.ylabel('Probability')
    plt.title('Histogram of Worker submit times')
    plt.grid(True)

    plt.show()
Example #3
0
    def draw_color_mesh(self):
        '''
        Draws a heatmap of pairs of heroes which co-occur
        in the winning and in the losing teams, useful to
        visualize the relationship between strong pairs of
        heroes which lead to victories vs. weak pairs of
        heroes which don't have much synergy
        '''
        red_yellow = brewer2mpl.get_map('YlGnBu', 'Sequential', 9).mpl_colormap

        fig, ax = plt.subplots(1, figsize=(13, 10))
        ax.set_xlim([0, self.c])
        ax.set_ylim([0, self.c])

        mesh = np.zeros((self.c, self.c), dtype=float)
        for i in range(0, self.c):
            for j in range(0, self.c):
                if i >= j:
                    # Same hero cannot be picked twice
                    continue

                if (i, j) in self.dwstat:
                    if self.ddstat[(i, j)] != 0:
                        k = round(
                            self.dwstat[(i, j)] /
                            float(self.ddstat[(i, j)] + self.dwstat[(i, j)]),
                            2)
                        mesh[i][j] = k
                        mesh[j][i] = k

        # *************************************************************** #
        # Code to calculate the max ratios in the heatmap
        # and obtain their hero indices too
        # Get the indices for the largest `num_largest` values.
        num_largest = 8
        indices = mesh.argpartition(mesh.size - num_largest,
                                    axis=None)[-num_largest:]
        x, y = np.unravel_index(indices, mesh.shape)
        print "full:"
        print "x =", x
        print "y =", y
        print "Largest values:", mesh[x, y]
        # print "Compare to:    ", np.sort(mesh, axis=None)[-num_largest:]
        # **************************************************************** #

        ppl.pcolormesh(fig, ax, mesh, cmap=red_yellow)
        fig.savefig('../Figures/HeatMap-heroPairs.png')
        plt.show()
        plt.clf()
Example #4
0
def show_timeorder_info(Dt, mesh_sizes, errors):
    '''Performs consistency check for the given problem/method combination and
    show some information about it. Useful for debugging.
    '''
    # Compute the numerical order of convergence.
    orders = {}
    for key in errors:
        orders[key] = _compute_numerical_order_of_convergence(Dt, errors[key])

    # Print the data to the screen
    for i, mesh_size in enumerate(mesh_sizes):
        print
        print('Mesh size %d:' % mesh_size)
        print('dt = %e' % Dt[0]),
        for label, e in errors.items():
            print('   err_%s = %e' % (label, e[i][0])),
        print
        for j in range(len(Dt) - 1):
            print('                 '),
            for label, o in orders.items():
                print('   ord_%s = %e' % (label, o[i][j])),
            print
            print('dt = %e' % Dt[j+1]),
            for label, e in errors.items():
                print('   err_%s = %e' % (label, e[i][j+1])),
            print

    # Create a figure
    for label, err in errors.items():
        pp.figure()
        ax = pp.axes()
        # Plot the actual data.
        for i, mesh_size in enumerate(mesh_sizes):
            pp.loglog(Dt, err[i], '-o', label=mesh_size)
        # Compare with order curves.
        pp.autoscale(False)
        e0 = err[-1][0]
        for o in range(7):
            pp.loglog([Dt[0], Dt[-1]],
                      [e0, e0 * (Dt[-1] / Dt[0]) ** o],
                      color='0.7')
        pp.xlabel('dt')
        pp.ylabel('||%s-%s_h||' % (label, label))
        # pp.title('Method: %s' % method['name'])
        ppl.legend(ax, loc=4)
    pp.show()
    return
Example #5
0
def histogram(original, updated, bins=None, main="", save=None, log=False):
    """Plot a histogram of score improvements (updated-origianl)

    Input:
    original - list of original scores
    updated - list of updates scores in same order as original
    bins - number of bins to represent improvements
    """
    #Lengths of score lists must be identical, assume in same order
    assert len(original) == len(original)

    #Set up bins:
    if bins is not None and bins > 0:
        imoprovements = {(-1,-1):0}
        for i in xrange(0, len(original), bins):
            improvements[(0,i+bins)] = 0
    else:
        improvements = {(-1,-1):0, (-5,0):0, (0,1):0, (1,25):0, (25,50):0, (50,75):0, (75,100):0, (100,125):0, (125,150):0, (150,200):0, (200,300):0, (300,400):0, (500,10000):0} #defaultdict(int)
    
    #Calcualte improvements
    for o, u in izip(original, updated):
        if o>u: 
            improvements[(-1,-1)] += 1
            continue
        for lower, upper in improvements:
            if lower <= int(u-o) < upper:
                improvements[(lower,upper)] += 1
                break
    keys = sorted(improvements.keys(), key=lambda x:x[0])
    values = [improvements[r] for r in keys]

    fig, ax = plt.subplots()
    ax.set_title(main)
    ax.set_xlabel("Improvement (updated-original) bitscores")
    ax.set_ylabel("log(Frequency)")
    #ax.set_yscale('log')

    width = 1.0
    #ax.set_xticks(np.arange(len(improvements)))
    #ax.set_xticklabels([l for l, u in keys])
    bar(ax, np.arange(len(improvements)), values, log=log,
        annotate=True, grid='y', xticklabels=[l for l, u in keys])

    if save is None:
        plt.show()
    else:
        plt.savefig(save)
Example #6
0
    def hero_stats_bar(self, thresh):
        '''
        Creates a bar chart of the heroes with highest
        Wins / #Games played ratio
        '''
        # Create individual hero wins and losses
        self.hero_stats()

        fig, ax = plt.subplots(1, figsize=(9, 7))
        # Compute the ratio of #Wins to the #Games played by that hero
        # Most relevant statistic, better than W/L Ratio, better than
        # just wins, all of them can be statistically insignificant
        # in edge cases, but this can be the least of all
        val = [
            (k, self.wstat[k] / float(self.dstat[k] + self.wstat[k]))
            for k in self.wstat
            if self.wstat[k] / float(self.dstat[k] + self.wstat[k]) >= thresh
        ]
        plt.title('Hero ID vs. Win Ratio (Matches from 01/23 - 02/24)')
        plt.xlabel('Hero ID')
        plt.ylabel('Win Ratio')
        ax.set_xlim([0, len(val)])
        ann = [round(k[1], 2) for k in val]
        # Extract the xticklabels
        xtl = [k[0] for k in val]
        # Extract the individual values to be plotted
        val = [k[1] for k in val]
        ppl.bar(ax,
                np.arange(len(val)),
                val,
                annotate=ann,
                xticklabels=xtl,
                grid='y',
                color=ppl.colors.set2[2])
        fig.savefig('../Figures/HIDvs#Wins#Games.png')
        plt.show()
        plt.clf()
  
    xs,ys = np.where(np.isnan(sparse_eps))

    for x,y in zip(xs,ys):
        sparse_eps[x,y] = 0.5

    max1 = np.max(sparse_eps)
    max2 = np.max(dense_eps)
    max3 = np.max(particle_eps)

    maxtotal = np.max([max1,max2,max3])


    min1 = np.min(sparse_eps)
    min2 = np.min(dense_eps)
    min3 = np.min(particle_eps)

    mintotal = np.min([min1,min2,min3])


    fig, (ax1,ax2,ax3) = ppl.subplots(3,1)

    p1 = ppl.pcolormesh(fig,ax1,dense_eps)
    p2 = ppl.pcolormesh(fig,ax2,sparse_eps)
    p3 = ppl.pcolormesh(fig,ax3,particle_eps)

    [p.set_clim(vmin=mintotal,vmax=maxtotal) for p in [p1,p2,p3]]


    plt.show()
#print len(c)

for b in range(len(k)):
    za = []
    for a in range(len(e)):
        if a != len(e) - 1:
            za.append(c[b + len(k) * a])
    z.append(za)

z = np.array(z)

#print z

#se consegue x e y (matrices) directamente conseguia
#X,Y = np.meshgrid(e,k)

#   print z

fig, ax = plt.subplots()

cax = ax.pcolor(e, k, z, vmin=-25, vmax=175., cmap=('hot_r'))
plt.xscale('log')
plt.axis([1, 0.00223872113857, 2, 21])
plt.ylabel('<k>', rotation='vertical')
plt.xlabel(u'\u03B5')
plt.yticks([2, 4, 6, 8, 10, 12, 14, 16, 18, 20])
cbar = fig.colorbar(cax, ticks=[0, 25, 50, 75, 100, 125, 150, 175])
cbar.ax.set_yticklabels([0, 25, 50, 75, 100, 125, 150, 'N.C.'])

plt.show()
Example #9
0
 print 'Testing raw SVD => exact reconstruction'
 svT = scipy.linalg.diagsvd(s, u.shape[0], vT.shape[1]).dot(vT)
 for y in xrange(train.shape[0]):
   for x in xrange(train.shape[1]):
     colU = u[y, :]
     rowV = svT[:, x]
     assert np.allclose(train[y, x], single_dot(u, svT, x, y))
 """
   ##
   plt.title('SVD reconstruction error on {}x{} matrix'.format(*train.shape))
   plt.xlabel('Low rank approximation (k)')
   plt.ylabel('Frobenius norm')
   plt.ylim(0, max(svdY))
   plt.legend(loc='best')
   plt.savefig('reconstruct_fro_{}x{}.pdf'.format(*train.shape))
   plt.show(block=True)
   ##
   plt.plot(orthoX,
            orthoY,
            label="SVD",
            color='black',
            linewidth=2,
            linestyle='--')
   for label, X, Y in incr_ortho:
       plt.plot(X, Y, label=label)
   plt.title('SVD orthogonality error on {}x{} matrix'.format(*train.shape))
   plt.xlabel('Low rank approximation (k)')
   plt.ylabel('Deviation from orthogonality')
   plt.semilogy()
   #plt.ylim(0, max(orthoY))
   plt.legend(loc='best')
    np.random.shuffle(nodes)

  print 'Pageviews from "real" edge weights'
  print '-=-=-=-=-'
  display_graph(G)
  print
  print 'Pageviews from evenly distributed edge weights'
  print '-=-=-=-=-'
  display_graph(approxG)

  plt.plot(np.arange(0, len(rerrs)), rerrs, label='Relative error over time')
  plt.xlabel('Iteration')
  plt.ylabel('Average pageview relative error per node')
  plt.legend()
  plt.savefig('error_over_time.pdf')
  plt.show(block=True)

  plt.plot(np.arange(0, len(werrs)), werrs, label='Weight error over time')
  plt.xlabel('Iteration')
  plt.ylabel('Average weight error per edge')
  plt.legend()
  plt.savefig('weight_over_time.pdf')
  plt.show(block=True)

  fig, ax = plt.subplots(1)
  ppl.bar(ax, *orig_weight_data, alpha=0.5, color='black', label='Weight error before')
  ppl.bar(ax, np.arange(0, G.number_of_edges()), [abs(G[u][v]['weight'] - approxG[u][v]['weight']) for u, v in G.edges()], alpha=0.8, label='Weight error after')
  #plt.ylim(-1, 1)
  plt.legend(loc='best')
  plt.show(block=True)
Example #11
0
def scatter(original, updated, main="", save=None):
    """Plot a scatterplot of updated bitscores vs. original bitscores

    """ 
    #Remove hits with no improvement and calcate the number of hits with no
    #improvement(udated == original), positive imporvent (updated > original), 
    #and negative improvment (updated < original)
    print len(original)
    positiveImprovement = []
    negativeImprovement = []
    noImprovement = 0
    for o, u in izip(original, updated):
        if int(o) == int(u):
            noImprovement +=1
        elif u > o:
            positiveImprovement.append((o,u))
        elif u < o:
            negativeImprovement.append((o,u))
        else:
            noImprovement +=1

    if not positiveImprovement:
        positiveImprovement = [()]
    if not negativeImprovement:
        negativeImprovement = [()]

    print positiveImprovement
    print negativeImprovement
    print noImprovement

    #Set deimensions
    x, y = zip(*positiveImprovement+negativeImprovement)
    xMax = int(round(sorted(x)[-1]/500.0)*500.0)
    yMax = int(round(sorted(y)[-1]/500.0)*500.0)
    sep = 500
    xticks = range(0, xMax, sep)
    yticks = range(0,yMax,sep)
    color_cycle = brewer2mpl.get_map('Set2', 'qualitative', 8).mpl_colors

    fig, ax = plt.subplots()
    ax.set_title(main)
    ax.set_xlabel("Original Bitscores")
    ax.set_ylabel("Updated Bitscores")

    
    #Plot postive improvement (green, automatically by prettyplotlib)
    if positiveImprovement:
        ppl.scatter(ax, *zip(*positiveImprovement), 
                    label="Positive Improvement ({} seqs)".format(len(positiveImprovement)),
                    color=color_cycle[0])

    #Draw no improvement line
    ppl.plot(ax, (0,xMax), (0,xMax), color='k', linestyle='-', linewidth=2,
             label="No Improvement ({} seqs)".format(noImprovement))

    #Plot negative improvement (red, automatically by prettyplotlib)
    if negativeImprovement:
        ppl.scatter(ax, *zip(*negativeImprovement),
                    label="Negative Improvement ({} seqs)".format(len(negativeImprovement)),
                    color=color_cycle[1])

    #Draw labels
    ppl.legend(ax)

    #Set axis
    ax.set_ylim([0,yMax])
    ax.set_xlim([0,xMax])

    if save is None:
        plt.show()
    else:
        pp = PdfPages(save)
        pp.savefig(fig)
        pp.close()