def main(argv=None):
    if argv is None:
        argv = sys.argv
    if len(argv) != 4:
        print "Usage: " + argv[0] + " <report name> <report dir> <graph dir>"
        return 1

    report = argv[1]
    report_dir = argv[2]
    graph_dir = argv[3]

    file_list = glob(os.path.join(report_dir, 'part*'))
    #Copy the raw data file to the graph_dir
    raw_file = os.path.join(graph_dir, report + '.tsv')
    shutil.copyfile(file_list[0], raw_file)

    #Process the file into a graph, ideally I would combine the two into one but for now I'll stick with two
    data_file = csv.DictReader(open(raw_file, 'rb'), fieldnames = ['hour', 'Requests', 'Bytes'], delimiter="\t")
    
    #Make an empty set will all the hours in it os if an hour is not in the data it will be 0
    length = 24
    requests_dict = {}
    for num in range(length):
        requests_dict['%0*d' % (2, num)] = (0, 0)

    #add the values we have to the dictionaries
    for row in data_file:
        requests_dict[row['hour']] = (int(row['Requests']), int(row['Bytes']))

    #Now get the lists for graphing in the right order
    requests = []
    num_bytes = []
    requests_lists = requests_dict.items()
    requests_lists.sort(key=lambda req: req[0])
    for req in requests_lists:
        requests.append(req[1][0])
        num_bytes.append(req[1][1])

    fig = pylab.figure(1)
    pos = pylab.arange(length) + .5
    pylab.bar(pos, requests[:length], aa=True, ecolor='r')
    pylab.ylabel('Requests')
    pylab.xlabel('Hour')
    pylab.title('Request per hour')
    pylab.grid(True)

    #Save the figure
    pylab.savefig(os.path.join(graph_dir, report + '_requests.pdf'), bbox_inches='tight', pad_inches=1)

    #bytes listed 
    fig = pylab.figure(2)
    pos = pylab.arange(length) + .5
    pylab.bar(pos, num_bytes[:length], aa=True, ecolor='r')
    pylab.ylabel('Bytes')
    pylab.xlabel('Hour')
    pylab.title('Bytes per hour')
    pylab.grid(True)

    #Save the figure
    pylab.savefig(os.path.join(graph_dir, report + '_bytes.pdf'), bbox_inches='tight', pad_inches=1)
예제 #2
0
def stackedHistogram(data, bins=10, range=None, log = False, normed = False,
                     filename = None, labels = None,
                     **formating):

    histograms = []
    d1 = data[0]
    curHist, bins = numpy.histogram(d1, bins = bins, range = range, normed = normed)
    histograms.append(curHist)

    for d in data[1:]:
        curHist, bins = numpy.histogram(d, bins = bins, normed = normed)
        histograms.append(curHist)

    width = bins[1] - bins[0]

    colors = 'b r k g c m y w'.split()
    nRepeats = len(data) / len(colors)
    for i in xrange(nRepeats):
        colors = colors.extend(colors)

    for histo, color in zip(histograms, colors):

        pylab.bar(bins[:-1], histo[:-1], width = width, log=log,
                  edgecolor = color, facecolor = None)


        
        
    doFormating(**formating)
    pylab.show()
    if filename is not None:
        pylab.savefig(filename)
        pylab.clf()
예제 #3
0
def plot_tuning_curves(direction_rates, title):
    """
    This function takes the x-values and the y-values  in units of spikes/s 
    (found in the two columns of direction_rates) and plots a histogram and 
    polar representation of the tuning curve. It adds the given title.
    """
    x = direction_rates[:,0]
    y = direction_rates[:,1]
    plt.figure()
    plt.subplot(2,2,1)
    plt.bar(x,y,width=45,align='center')
    plt.xlim(-22.5,337.5)
    plt.xticks(x)
    plt.xlabel('Direction of Motion (degrees)')
    plt.ylabel('Firing Rate (spikes/s)')
    plt.title(title)   
        
        
    
    plt.subplot(2,2,2,polar=True)
    r = np.append(y,y[0])
    theta = np.deg2rad(np.append(x, x[0]))
    plt.polar(theta,r,label='Firing Rate (spikes/s)')
    plt.legend(loc=8)
    plt.title(title)
예제 #4
0
def distributionValues(y,bins=None):
    if bins==None:
        bins=int(sqrt(len(y)))
    
    hist,bins=np.histogram(y,bins=bins)

    plt.bar(bins[1:],hist,width=bins[1]-bins[0])
예제 #5
0
파일: model.py 프로젝트: PabloHN/htmd
    def eqDistribution(self, plot=True):
        """ Obtain and plot the equilibrium probabilities of each macrostate

        Parameters
        ----------
        plot : bool, optional, default=True
            Disable plotting of the probabilities by setting it to False

        Returns
        -------
        eq : ndarray
            An array of equilibrium probabilities of the macrostates

        Examples
        --------
        >>> model = Model(data)
        >>> model.markovModel(100, 5)
        >>> model.eqDistribution()
        """
        self._integrityCheck(postmsm=True)
        macroeq = np.ones(self.macronum) * -1
        for i in range(self.macronum):
            macroeq[i] = np.sum(self.msm.stationary_distribution[self.macro_ofmicro == i])

        if plot:
            from matplotlib import pylab as plt
            plt.ion()
            plt.figure()
            plt.bar(range(self.macronum), macroeq)
            plt.ylabel('Equilibrium probability')
            plt.xlabel('Macrostates')
            plt.xticks(np.arange(0.4, self.macronum+0.4, 1), range(self.macronum))
            plt.show()
        return macroeq
예제 #6
0
def plot_fullstack( binning = np.linspace(0,10,1), myquery='', plotvar = default_plot_variable, \
                    scalefactor = 1., user_ylim = None):

    fig = plt.figure(figsize=(10,6))
    plt.grid(True)
    lasthist = 0
    myhistos = gen_histos(binning=binning,myquery=myquery,plotvar=plotvar,scalefactor=scalefactor)
    for key, (hist, bins) in myhistos.iteritems():

      plt.bar(bins[:-1],hist,
              width=bins[1]-bins[0],
              color=colors[key],
              bottom = lasthist,
              edgecolor = 'k',
              label='%s: %d Events'%(labels[key],sum(hist)))
      lasthist += hist
     

    plt.title('CCSingleE Stacked Backgrounds',fontsize=25)
    plt.ylabel('Events',fontsize=20)
    if plotvar == '_e_nuReco' or plotvar == '_e_nuReco_better':
        xstring = 'Reconstructed Neutrino Energy [GeV]' 
    elif plotvar == '_e_CCQE':
        xstring = 'CCQE Energy [GeV]'
    else:
        xstring = plotvar
    plt.xlabel(xstring,fontsize=20)
    plt.legend()
    plt.xticks(list(plt.xticks()[0]) + [binning[0]])
    plt.xlim([binning[0],binning[-1]])
예제 #7
0
def plot_call_rate(c):
    # Histogram
    P.clf()
    P.figure(1)
    P.hist(c[:,1], normed=True)
    P.xlabel('Call Rate')
    P.ylabel('Portion of Variants')
    P.savefig(os.environ['OBER'] + '/doc/imputation/cgi/call_rate.png')

####################################################################################
#if __name__ == '__main__':
#    # Input parameters
#    file_name = sys.argv[1]  # Name of data file with MAF, call rates
#
#    # Load data
#    c = np.loadtxt(file_name, dtype=np.float16)
#
#    # Breakdown by call rate (proportional to the #samples, 1415)
#    plot_call_rate(c)
#    h = np.histogram(c[:,1])
#    a = np.flipud(np.cumsum(np.flipud(h[0])))/float(c.shape[0])
#    print np.concatenate((h[1][:-1][newaxis].transpose(), a[newaxis].transpose()), axis=1)

    # Breakdown by minor allele frequency
    maf_n = 20
    maf_bins = np.linspace(0, 0.5, maf_n + 1)
    maf_bin = np.digitize(c[:,0], maf_bins)
    d = c.astype(float64)
    mean_call_rate = np.array([(1.*np.mean(d[maf_bin == i,1])) for i in xrange(len(maf_bins))])
    P.bar(maf_bins - h, mean_call_rate, width=h)

    P.figure(2)
    h = (maf_bins[-1] - maf_bins[0]) / maf_n
    P.bar(maf_bins - h, mean_call_rate, width=h)
    P.savefig(os.environ['OBER'] + '/doc/imputation/cgi/call_rate_maf.png')
예제 #8
0
def plot_tuning_curves(direction_rates, title):
    """
    This function takes the x-values and the y-values  in units of spikes/s 
    (found in the two columns of direction_rates) and plots a histogram and 
    polar representation of the tuning curve. It adds the given title.
    """
        # yank columns and keep in correspondance 
    directions = direction_rates[0:][:,0]
    rates = direction_rates[0:][:,1]
    # histogram plot 
    plt.subplot(2, 2, 1)
    plt.title('Histogram ' + title)
    plt.axis([0, 360, 0, 70])
    plt.xlim(-22.5,337.5)
    plt.xlabel('Directions (in degrees)')
    plt.ylabel('Average Firing Rate (in spikes/s)')
    plt.bar(directions, rates, width=45, align='center')
    plt.xticks(directions)
    # polar plot 
    plt.subplot(2,2,2,polar=True)
    plt.title('Polar plot ' + title)
    #plt.legend('Diring Rate (spikes/s)')
    rates = np.append(rates, rates[0]) 
    theta = np.arange(0,361,45)*np.pi/180
    plt.polar(theta, rates)

    plt.show()
    # end plot_tuning_curves
    return 0
예제 #9
0
def plotDist(subplot, X, Y, label):
    pylab.grid()
    pylab.subplot(subplot)
    pylab.bar(X, Y, 0.05)
    pylab.ylabel(label)
    pylab.xticks(arange(len(X)), X)
    pylab.yticks(arange(0,1,0.1))
예제 #10
0
def Importance_Plot(data,label=None):
    '''
    :param data: DATAFRAME style
    :param label: y vector
    :param threshold: jude threshold
    :return: figure
    '''
    import numpy as np
    import matplotlib.pylab as plt
    from sklearn.ensemble import ExtraTreesClassifier
    import pandas as pd
    model=ExtraTreesClassifier()
    data1=np.array(data)
    model.fit(data1,label)
    importance=model.feature_importances_
    std = np.std([importance for tree in model.estimators_],axis=0)
    indices = np.argsort(importance)[::-1]
    namedata=data
    # Print the feature ranking
    print("Feature ranking:")
    importa=pd.DataFrame({'importance':list(importance[indices]),'Feature name':list(namedata.columns[indices])})
    print importa
    # Plot the feature importances of the forest
    plt.figure(figsize=(20, 8))
    plt.title("Feature importances")
    plt.bar(range(data1.shape[1]), importance[indices],
            color="g", yerr=std[indices], align="center")
    plt.xticks(range(data1.shape[1]), indices)
    plt.xlim([-1, data1.shape[1]])
    plt.grid(True)
    plt.show()
예제 #11
0
def test_probabilities(exp, n=1000):
    d = {}
    for i in range(n):
        foo = rp.parsex(exp)
        # foo = len(foo.replace(' ',''))
        if foo in d.keys():
            d[foo] += 1
        else:
            d[foo] = 1
    # lists = sorted(d.items())
    # x, y = zip(*lists)  # unpack a list of pairs into two tuples
    # print x
    # print y
    # for a, b in zip(x, y):
    #     plt.text(a,b, str("%s\n%s" % (a, b)))
    plt.xlabel("String length")
    plt.ylabel("Occurence")
    plt.title("Union: %s P=0.3 N=1000" % exp)
    # plt.plot(x, y)

    # For bar chart (use on Union)
    # See for labeling: https://stackoverflow.com/a/30229062
    l = sorted(d.items())
    x, y = zip(*l)
    plt.bar(range(len(y)), y, align="center")
    plt.xticks(range(len(x)), x)

    plt.show()
예제 #12
0
    def show(self,H):
        """
        Display the histogram of the data, together with the mixture model

        Parameters
        ----------
        H : ndarray
            The histogram of the data.
        """
        xmax = np.size(H)
        sH = np.sum(H)
        nH = H.astype(float)/sH

        L = np.zeros(xmax)
        ndraw = xmax-1
        for i in range(xmax):
            L0 = np.exp(self._bcoef(ndraw,i)+i*np.log(self.r0)+ \
                 (ndraw-i)*np.log(1-self.r0))
            L1 = np.exp(self._bcoef(ndraw,i)+i*np.log(self.r1)+ \
                 (ndraw-i)*np.log(1-self.r1))
            L[i] = self.Lambda*L0 + (1-self.Lambda)*L1

        L = L/L.sum()
        import matplotlib.pylab as mp
        mp.figure()
        mp.bar(np.arange(xmax),nH)
        mp.plot(np.arange(xmax)+0.5,L,'k',linewidth=2)
def plotHousing(impression):
    """
    生成房价随时间变化的图标
    """
    f = open("midWestHousingPrices.txt", 'r')
    #文件每一行是年季度价格
    labels, prices = [], []
    for line in f:
        year, quarter, price = line.split()
        label = year[2:4] + "\n Q" + quarter[1]
        labels.append(label)
        prices.append(float(price)/1000)
    #柱的X坐标
    quarters = np.arange(len(labels))
    #柱宽
    width = 0.5
    if impression == 'flat':
        plt.semilogy()
    plt.bar(quarters, prices, width, color='r')
    plt.xticks(quarters + width / 2.0, labels)
    plt.title("美国中西部各州房价")
    plt.xlabel("季度")
    plt.ylabel("平均价格($1000)")

    if impression == 'flat':
        plt.ylim(10, 10**3)
    elif impression == "volatile":
        plt.ylim(180, 220)
    elif impression == "fair":
        plt.ylim(150, 250)
    else:
        raise ValueError("Invalid input.")
예제 #14
0
    def plot_histogram(self, main="", numrows=1, numcols=1, fignum=1):
        """Plot a histogram of choices and probability sums. Expects probabilities as (at least) a 2D array.
        """
        from matplotlib.pylab import bar, xticks, yticks, title, text, axis, figure, subplot

        probabilities = self.get_probabilities()
        if probabilities.ndim < 2:
            raise StandardError, "probabilities must have at least 2 dimensions."
        alts = probabilities.shape[1]
        width_par = (1 / alts + 1) / 2.0
        choice_counts = self.get_choice_histogram(0, alts)
        sum_probs = self.get_probabilities_sum()

        subplot(numrows, numcols, fignum)
        bar(arange(alts), choice_counts, width=width_par)
        bar(arange(alts) + width_par, sum_probs, width=width_par, color="g")
        xticks(arange(alts))
        title(main)
        Axis = axis()
        text(
            alts + 0.5,
            -0.1,
            "\nchoices histogram (blue),\nprobabilities sum (green)",
            horizontalalignment="right",
            verticalalignment="top",
        )
예제 #15
0
    def plot_histogram_with_capacity(self, capacity, main=""):
        """Plot histogram of choices and capacities. The number of alternatives is determined
        from the second dimension of probabilities.
        """
        from matplotlib.pylab import bar, xticks, yticks, title, text, axis, figure, subplot

        probabilities = self.get_probabilities()
        if probabilities.ndim < 2:
            raise StandardError, "probabilities must have at least 2 dimensions."
        alts = self.probabilities.shape[1]
        width_par = (1 / alts + 1) / 2.0
        choice_counts = self.get_choice_histogram(0, alts)
        sum_probs = self.get_probabilities_sum()

        subplot(212)
        bar(arange(alts), choice_counts, width=width_par)
        bar(arange(alts) + width_par, capacity, width=width_par, color="r")
        xticks(arange(alts))
        title(main)
        Axis = axis()
        text(
            alts + 0.5,
            -0.1,
            "\nchoices histogram (blue),\ncapacities (red)",
            horizontalalignment="right",
            verticalalignment="top",
        )
예제 #16
0
def infer_latent_dim(X, verbose=0, maxr=-1):
    """
    r = infer_latent_dim(X, verbose=0)
    Infer the latent dimension of an aray assuming data+gaussian noise mixture
    
    Parameters
    ----------
    array X, data whose deimsnionhas to be inferred
    verbose=0, int, verbosity level
    maxr=-1, int, maximum dimension that can be achieved
             if maxr = -1, this is equal to rank(X)
    
    Returns
    -------
    r, int, the inferred dimension
    """
    if maxr ==-1:
        maxr = np.minimum(X.shape[0],X.shape[1])
        
    U,S,V = nl.svd(X,0)
    if verbose>1:
        print "Singular Values", S
    L = []
    for k in range(maxr):
        L.append(_linear_dim_criterion_(S,k,X.shape[1],X.shape[0])/X.shape[0])

    L = np.array(L)
    rank = np.argmax(L)
    if verbose:
        import matplotlib.pylab as mp
        mp.figure()
        mp.bar(np.arange(maxr),L-L.mean())

    return rank
예제 #17
0
 def plot_height(self):
     """Plot the height of the non-leaves nodes
     """
     import matplotlib.pylab as mp
     mp.figure()
     sh = np.sort(self.height[self.isleaf() == False])
     n = np.sum(self.isleaf() == False)
     mp.bar(np.arange(n), sh)
예제 #18
0
 def eqDistribution(self, plot=True):
     if plot:
         from matplotlib import pylab as plt
         plt.bar(range(self.macronum), self.hmm.stationary_distribution)
         #ax = plt.gca()
         #ax.set_xticklabels([str(a) for a in self.hmm.active_set])
         plt.xticks(np.arange(self.macronum)+0.4, [str(a) for a in self.hmm.active_set])
     return self.hmm.stationary_distribution
예제 #19
0
def barplot(labels,data,error=None,bar_opts={},ticks_opts={}):
    from matplotlib.pylab import bar,yticks,xticks,xlim
    xlocations = [0.5+x for x in range(len(data))]
    bar_opts.setdefault('width',0.5)
    bar(xlocations, data, yerr=error, **bar_opts)
    xticks([bar_opts['width']/2.0+x for x in xlocations], labels, **ticks_opts)
    xlim(0, xlocations[-1]+1)
    return xlocations
예제 #20
0
def LE(G, dim, verbose=0, maxiter=1000):
    """
    Laplacian Embedding of the data
    returns the dim-dimensional LE of the graph G
    
    Parameters
    ----------
    G, nipy.neurospin.graph.WeightedGraph instance that represents the data
    dim=1, int, number of dimensions
    verbose=0, verbosity level
    maxiter=1000, int, maximum number of iterations of the algorithm 
    
    Returns
    -------
    chart, array of shape(G.V,dim)
    
    Note
    ----
    In fact the current implementation retruns
    what is now referred to a diffusion map at time t=1
    """
    n = G.V
    dim = np.minimum(dim,n)
    chart = nr.randn(G.V,dim+2)
    f = ff.Field(G.V,G.edges,G.weights,chart)
    LNorm,RNorm = local_sym_normalize(G)
    # note : normally Rnorm = Lnorm
    if verbose:
        print np.sqrt(np.sum((LNorm-RNorm)**2))/np.sum(LNorm)
    eps = 1.e-7

    f1 = np.zeros((G.V,dim+2))
    for i in range(maxiter):
        f.diffusion(10)
        f0 = Orthonormalize(f.field)
        f.field = f0
        if nl.norm(f1-f0)<eps:
            break
        else:
            f1 = f0

    if verbose:
        print i,nl.norm(f1-f0)
    f.diffusion()
    f0 = f.field
    
    U,S,V = nl.svd(f0,0)
    RNorm = np.reshape(np.sqrt(RNorm),(n,1))
    chart = S[:dim]*np.repeat(1./RNorm,dim,1)*U[:,1:dim+1]
    chart = chart/np.sqrt(np.sum(chart**2,0))
            
    if verbose:
        import matplotlib.pylab as mp
        mp.figure()
        mp.bar(np.arange(np.size(S)-1),np.sqrt(1-S[1:]))
        print "laplacian eigenvalues: ",1-S
        
    return chart
예제 #21
0
 def showSkyline(self, l):
     pylab.cla()
     for tpl in l:
         st = tpl[0]
         end = tpl[1]
         height = tpl[2]
         pylab.bar(st, height, (end - st))
     pylab.show()
     print "Done"
예제 #22
0
def plot_parm_imp(parms, equation):
    """plot for one set of variables"""
    rank = loop_diff(parms, equation)
    #sorted(rank, key=rank.get,reverse=True)
    f1 = plot.figure()
    variable = [i for i in rank]
    value = [rank[str(i)] for i in rank]
    plot.bar(variable, height=value, width=0.8)
    plot.show()
예제 #23
0
파일: nlecw.py 프로젝트: Flav13/NLECW
def plot_results(results, title, xlabels, ylabel="Success Rate"):
    '''Plot a bar graph of results'''
    ind = np.arange(len(results))
    width = 0.4
    plt.bar(ind, results, width, color="#1AADA4")
    plt.ylabel(ylabel)
    plt.ylim(ymax=100)
    plt.xticks(ind+width/2.0, xlabels)
    plt.title(title)
예제 #24
0
    def do_plot(stat):
        ind = np.arange(len(CL))
        w = 0.15
        for i, c in enumerate(contexts):
            vals = [ select(data, c, cl, stat) for cl in CL ]
            plt.bar(ind + w*i, vals, w, color = COLORS[i])

        plt.xticks(ind + 0.3, CL, rotation=0)
        plt.ylabel(stat)
예제 #25
0
 def show_time_difference(self,font,portfolioSize,timeSpent):
     font.set_size(15)
     sns.set(style="ticks")
     pylab.figure(figsize=(12,8))
     pylab.bar(portfolioSize,timeSpent,color = 'r',width=300)
     pylab.grid(True)
     pylab.title(u'期权计算时间耗时(单位:秒)',fontproperties = font,fontsize = 18)
     pylab.ylabel(u'时间(s)',fontproperties = font,fontsize = 15)
     pylab.xlabel(u'组合数量',fontproperties = font,fontsize = 15)
def plot(letter_frequency):
    centers = range(len(alfa))
    plt.bar(centers,
            letter_frequency.values(),
            align='center',
            tick_label=letter_frequency.keys())
    plt.xlim([0, len(alfa) - 1])

    plt.show()
def getTweets(filename):
    tweets = []
    firstpo_date = []
    tmlb=[]
    
    f=open(filename)
    for line in f:
        try: 
            tweets.append(json.loads(line))
        except:
            print "error when reading tweets.txt"
                    
                    
    #extract all the first post date into the list
    for i in range(len(tweets)):
        firstpo_date.append(tweets[i]['firstpost_date'])
        #transfer the time stamp into actual time
        timefrom=datetime.datetime.fromtimestamp(float(tweets[i]['firstpost_date'])).strftime('%H:%M:%S')
        #store the actual time into tmlb        
        tmlb.append(timefrom)

    hashtag=filename[7:-4]
    intvl=tmlb[::100]
    mintime = min(firstpo_date)
    maxtime = max(firstpo_date)
    num = len(firstpo_date)
    delta = maxtime - mintime

    print 'Mintime: ', mintime
    print 'Maxtime: ', maxtime

    period = delta / 3600
    if (maxtime - mintime) % 3600 != 0:
        period += 1
    
    tweetphour = np.zeros(period) #create a list to count the number   
    
    for j in range(num):
        for i in range(period):
            if firstpo_date[j]>=(mintime+i*3600) and firstpo_date[j]<=(mintime+(i+1)*3600):
                tweetphour[i] += 1 

    
    index = np.arange(len(tweetphour))
    
    ax=plt.subplot(111)    
    plt.bar(index,tweetphour)
    plt.xlabel('Time')
    plt.ylabel('Number of Tweets')
    plt.title('Number of Tweets Per Hour')
    ax.set_xticklabels(intvl,rotation=20, rotation_mode="anchor", ha="right")
    plt.legend()
    plt.tight_layout()
    plt.show()   
    
    return tweetphour
    f.close()
예제 #28
0
def plot_data(hist):
    X = np.arange(len(hist))
    plt.bar(X, hist.values(), align='center', width=0.5)
    plt.xticks(X, hist.keys())
    locs, labels = plt.xticks()
    plt.setp(labels, rotation=90)
    ymax = max(hist.values()) + 0.1
    plt.ylim(0, ymax)
    plt.show()
예제 #29
0
def showSkyline(l):
    pylab.cla()
    for tuple in l:
        st = tuple[0]
        end = tuple[1]
        height = tuple[2]
        pylab.bar(st,height,(end-st))
    pylab.show()
    print "Done"
예제 #30
0
def plot_choice_patterns(choice_probabilities, task):
    """ Function to produce plot for choice patterns.
    """

    deciles = range(40)
    colors = ['blue', 'yellow', 'orange', 'red']
    width = 0.9

    # Plotting
    bottom = [0] * 40

    # Initialize plot
    ax = plt.figure(figsize=(12, 8)).add_subplot(111)
    labels = ['Home', 'School', 'Occupation A', 'Occupation B']
    for j, i in enumerate([3, 2, 0, 1]):
        heights = choice_probabilities[:, i]
        plt.bar(deciles,
                heights,
                width,
                bottom=bottom,
                color=colors[j],
                alpha=0.70)
        bottom = [heights[i] + bottom[i] for i in range(40)]

    # Both Axes
    ax.tick_params(labelsize=16,
                   direction='out',
                   axis='both',
                   top='off',
                   right='off')

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)

    # X axis
    ax.set_xlabel('Period', fontsize=16)
    ax.set_xlim([0, 40])

    # Y axis
    ax.set_ylabel('Share of Population', fontsize=16)
    ax.yaxis.get_major_ticks()[0].set_visible(False)

    # Legend
    plt.legend(labels,
               loc='upper center',
               bbox_to_anchor=(0.5, -0.10),
               fancybox=False,
               frameon=False,
               shadow=False,
               ncol=4,
               fontsize=20)

    # Write out to
    plt.savefig('choices_' + task + '.png', bbox_inches='tight', format='png')
예제 #31
0
def histogram(args):
    """ Make a histogram over kmer overlaps for reads over reference, to
    evaluate a proper cutoff value.
    """
    import matplotlib.pylab as plt
    from matplotlib.ticker import FuncFormatter

    with open(args.reference) as fh:
        fa = readfq(fh)
        contams = [t[1] for t in fa]

    k = args.k

    kmers = set()
    for contam in contams:
        kmers |= set(str(contam[i:i + k]) for i in xrange(len(contam) - k))

    kmers = frozenset(kmers)

    fh = open(args.fastq)
    fq = readfq(fh)

    overlap = defaultdict(int)
    for _, seq, _ in fq:
        read_kmers = frozenset(seq[i:i + k] for i in xrange(len(seq) - k))
        overlap[round(100 * float(len(read_kmers.intersection(kmers))) / len(seq), 0)] += 1

    fh.close()

    out_file = os.path.basename(args.fastq).replace('.fastq', '_kmer_histogram.png')
    keys = sorted(overlap.keys())
    nreads = sum(overlap.values())
    percentages = [float(v) / nreads for v in overlap.values()]

    cumulative = [float(overlap[keys[0]]) / nreads]
    for key in keys[1:]:
        cumulative.append(cumulative[-1] + float(overlap[key]) / nreads)

    plt.bar(overlap.keys(), percentages, ec='none', label='histogram')
    plt.plot(keys, cumulative, label='cumulative')

    plt.gca().yaxis.set_major_formatter(FuncFormatter(prc_format))
    plt.grid(True)

    title = \
    """kmer overlap
    k = {}
    number of reads: {}"""

    plt.title(title.format(k, nreads))
    plt.xlabel('Overlapment')
    plt.ylabel('% of reads')
    plt.legend()

    plt.tight_layout()
    plt.savefig(out_file)
예제 #32
0
def get_random_mean_edges(G, gene_set, sims):
    # ============================================
    G = nx.Graph()
    for line in open(network_file, 'r'):
        # lines starting with '#' will be ignored
        if line[0] == '#':
            continue
        # The first two columns in the line will be interpreted as an
        # interaction gene1 <=> gene2
        # 这个表的前两列将被解释为交互作用gene1<==>gene2
        line_data = line.strip().split('\t')
        # line.strip()删除数据中的换行符 .split('\t')遇到四个空格就隔开
        node1 = line_data[0]
        node2 = line_data[1]
        # 定义node1为一组数据的前一项数据
        # 定义node2为一组数据的后一项数据
        G.add_edge(node1, node2)
    all_genes_in_network = set(G.nodes())
    tools.remove_self_links(G)
    # print(list(G))
    # =============================================
    all_genes = G.nodes()
    number_of_seed_genes = len(gene_set & set(all_genes))
    # print(number_of_seed_genes)
    # print(number_of_seed_genes)
    edges_list = []
    print("")
    for i in range(1, sims + 1):
        if i % 100 == 0:
            sys.stdout.write("> random simulation [%s of %s]\r" % (i, sims))
            sys.stdout.flush()
        rand_seeds = set(random.sample(all_genes,number_of_seed_genes))
        edges = get_edges_size(G, rand_seeds)
        edges_list.append(edges)

    # =============================================
    def all_list(arr):
        result2 = {}
        for i in set(arr):
            result2[i] = arr.count(i)

        return result2

    result2 = all_list(edges_list)
    # result2[result2.keys()]=result2.pop()
    # print(result)
    x = result2.keys()
    y = result2.values()
    plt.xlabel('edges')
    plt.ylabel('Frequency_percentage')
    plt.bar(x, y)
    plt.title('The histogram of the edges-Frequency_percentage')
    plt.savefig('edges_final.pdf')
    plt.close()
    # =============================================
    return edges_list
예제 #33
0
def display_lines_length(data):
    dictionary = {}
    for line in data['lines']:
        length = len(line.split(' '))
        count = dictionary.get(length, 0) + 1
        dictionary[length] = count
    x = [pair[0] for pair in dictionary.items()]
    y = [pair[1] for pair in dictionary.items()]
    plt.bar(x, y)
    plt.show()
예제 #34
0
def plot_dropout(model, sort=True):
    # Show dropout effect
    do = model.dropout.detach().numpy().reshape(-1)
    if sort:
        do = np.sort(do)
    plt.figure()
    plt.bar(range(len(do)), do)
    plt.suptitle(
        f'Dropout probability of {model.lat_dim} fitted latent dimensions in Sparse Model'
    )
예제 #35
0
 def eqDistribution(self, plot=True):
     if plot:
         from matplotlib import pylab as plt
         plt.bar(range(self.macronum), self.hmm.stationary_distribution)
         #ax = plt.gca()
         #ax.set_xticklabels([str(a) for a in self.hmm.active_set])
         plt.xticks(
             np.arange(self.macronum) + 0.4,
             [str(a) for a in self.hmm.active_set])
     return self.hmm.stationary_distribution
예제 #36
0
def plottrends(searchterm, tf, booRegion, booCreateCSV, secondtermforseries):

    print('Importing data from Google Trends')
    pytrend = TrendReq()

    if booRegion or len(secondtermforseries) == 0:
        search_list = [searchterm]
    else:
        search_list = [searchterm, secondtermforseries]

    pytrend.build_payload(search_list, cat=0, timeframe=tf, geo='', gprop='')

    register_matplotlib_converters()
    plt.figure(figsize=(10, 6))
    fig = plt.figure(1)

    if booRegion:
        df = pytrend.interest_by_region()
        df.sort_values(searchterm, inplace=True, ascending=True)
        df.reset_index(level=0, inplace=True)
        df = df.tail(25)

        ax = fig.add_subplot(111)
        ax.tick_params(axis='x', which='major', labelsize=6)
        ax.tick_params(axis='x', which='minor', labelsize=6)
        ax.set_xticklabels(df['geoName'], rotation=50, ha="right")

        plt.bar(df['geoName'], df[searchterm], color='grey')
        plt.title("'" + searchterm + "'" +
                  ' google searches by country over time : ' + tf)

    else:
        df = pytrend.interest_over_time()
        df.reset_index(level=0, inplace=True)
        plt.ylabel('Index')
        plt.plot(df['date'], df[searchterm], color='black', label=searchterm)

        if len(secondtermforseries) > 0:
            plt.plot(df['date'],
                     df[secondtermforseries],
                     color='grey',
                     label=secondtermforseries)
            searchterm = searchterm + ' \\ ' + secondtermforseries

        plt.legend(loc="upper left")
        plt.title("'" + searchterm + "'" + ' google searches through time : ' +
                  tf)

    print(df.head())

    if booCreateCSV:
        print('Creating csv file')
        df.to_csv('c:\\temp\\googletrends.csv')

    plt.show()
예제 #37
0
def makeBarGraph(assm1_chrom_list, assm1_dict, assm1_name, assm2_chrom_list, assm2_dict, assm2_name, out_fi, data_type):
	#check that chrom lists are the same- they should be but better to check
	err=0
	if [item for item in assm1_chrom_list if item in assm2_chrom_list]:
		pass
	else:
		logging.critical("%s not in both lists" % item)
		err += 1
	if err>0:
		logging.error("Chromosome lists not the same, not making graphic")
		return
	#set up lists for graphing
	assm1_list=[]
	assm2_list=[]
	title=""
	for seq in assm1_chrom_list:
		if data_type == "collapse":
			assm1_list.append(assm1_dict[seq].sp_len)
			assm2_list.append(assm2_dict[seq].sp_len)
			title="Collapse sequence: %s and %s" % (assm1_name, assm2_name)
		elif data_type == "expand":
			assm1_list.append(assm1_dict[seq].sp_only_len)
			assm2_list.append(assm2_dict[seq].sp_only_len)
			title="Expanded sequence: %s and %s" % (assm1_name, assm2_name)
		elif data_type == "no_hit":
			assm1_list.append(assm1_dict[seq].nohit_len)
			assm2_list.append(assm2_dict[seq].nohit_len)
			title="NoHit sequence: %s and %s" % (assm1_name, assm2_name)
		elif data_type == "ungap_nohit":
			assm1_list.append(assm1_dict[seq].ungap_nohit_len)
			assm2_list.append(assm2_dict[seq].ungap_nohit_len)
			title="Ungapped NoHit sequence: %s and %s" % (assm1_name, assm2_name)
		else:
			logging.error("Unknown data type, not making image: %s" % data_type)
			return
	#set up plot
	sns.set_style("ticks")
	sns.set_context("talk")
	plt.figure(figsize=(20,10), dpi=100)
	ax = plt.gca()
	ax.get_xaxis().get_major_formatter().set_scientific(False)
	X = np.arange(len(assm1_chrom_list))
	width=0.5
	y1_label=assm1_name
	y2_label=assm2_name
	plt.bar(X, assm1_list, width=0.5, facecolor='seagreen', edgecolor='none', label=y1_label)
	plt.bar(X+0.5, assm2_list, width=0.5, facecolor="blue", edgecolor='none', label=y2_label)
	plt.xticks(np.arange(len(assm1_list))+0.5, assm1_chrom_list, ha='center', size='22')
	plt.yticks(size="22")
	plt.xlabel('sequences', size='36')
	plt.ylabel('number of bases', size='36')
	plt.title(title, size='36')
	plt.legend(loc='upper left', prop={'size':24})
	sns.despine(top=True, right=True)
	plt.savefig(out_fi, dpi=100)
예제 #38
0
def analyse_data(table, showMode=False, img_prefix=''):
    '''Compute number of comments for each class, total number of articles, number of authors and number of tokens in data
	   Plot claim depending on non-believers and authors depending on number of comments
	'''
    labels = Counter()
    articles = Counter()
    authors = Counter()
    count_tokens = Counter()

    for (i, l) in enumerate(table['label']):
        if not pandas.isna(l):
            labels[l] += 1
            if l == -1:
                articles[table.iloc[i]['claim_id']] += 1
            if not pandas.isna(table.iloc[i]['author']):
                authors[table.iloc[i]['author']] += 1

    for i, comment in enumerate(table['body']):
        count_tokens.update(tokenize(comment))

    if showMode:
        print('For category "non-believer", we have', labels[-1], 'comments.')
        print('For category "believer", we have', labels[1], 'comments.')
        print('For category "none of the above", we have', labels[0],
              'comments.')

        print('Number of labelized articles', len(articles))
        print('Number of authors on labelized data', len(authors))
        print('Number of tokens in data', len(count_tokens))

    nonBelievers = OrderedDict(
        sorted(articles.items(), key=lambda pair: pair[1], reverse=True))
    authors = OrderedDict(
        sorted(authors.items(), key=lambda pair: pair[1], reverse=True))

    plt.figure()
    plt.xlabel('Claims', fontsize=15)
    plt.ylabel('Number of non-believers', fontsize=15)
    plt.plot(range(len(nonBelievers)), list(nonBelievers.values()))
    plt.xticks([])
    plt.savefig(STAT_PATH + img_prefix + 'nonBelievers.png')
    if showMode:
        plt.show()

    plt.figure()
    plt.xlabel('Authors', fontsize=15)
    plt.ylabel('Number of comments', fontsize=15)
    plt.bar(list(authors.keys()), list(authors.values()), width=.9)
    plt.title("Author histogram")
    plt.xticks([])
    plt.savefig(STAT_PATH + img_prefix + 'comments.png')
    if showMode:
        plt.show()

    return labels, articles, authors, count_tokens
예제 #39
0
def plot(results):
    ''' Plots the given results returned from 'perform' and stores the resulting
      file in the submission folder. '''

    _, size_of_largest_component, component_distribution = results

    plt.bar(range(1, size_of_largest_component + 1), component_distribution)
    plt.ylabel("Fraction of Nodes %")
    plt.xlabel("Size of Component")
    plt.title("Component Distribution")
    plt.savefig("submission/components.png")
예제 #40
0
def lines_per_character(data):
    dictionary = {}
    for rank in data['character']:
        count = dictionary.get(rank, 0) + 1
        dictionary[rank] = count
    x = [pair[0] for pair in dictionary.items()]
    y = [pair[1] for pair in dictionary.items()]
    plt.xlabel("Character")
    plt.ylabel("Amount of lines")
    plt.bar(x, y)
    plt.show()
예제 #41
0
def plot(data, index):
    plt.figure(index)
    plt.bar(np.arange(len(data)),
            data,
            align='center',
            alpha=0.5,
            label="Rating Star = " + str(index))
    plt.xlabel('Sentence Count')
    plt.ylabel('Review Count')
    plt.legend()
    plt.show()
예제 #42
0
    def plotProbabilityDetectionBar(self, results, confInterval):
        
#        colors = ['brown', 'blue', 'darkgreen', 'red', 'black']
#        xticks = [0, 1, 2, 3, 4]
#        labels = [1, 2, 3, 4, 5]

        n_groups = 5
        index = numpy.arange(n_groups)
        bar_width = 0.2
        opacity = 0.7

        fig, ax = plt.subplots(figsize=(13,8))
        
        rects2 = plt.bar(index-bar_width, results[:,0], bar_width,
                 alpha=opacity,
                 color='lightslategrey',
                 yerr = confInterval['hop0'],
                 ecolor = 'black',
                 label='Node')
        
        rects3 = plt.bar(index, results[:,1], bar_width,
                 alpha=opacity,
                 yerr = confInterval['hop1'],
                 ecolor = 'black',
                 color='cadetblue',
                 label='1 Hop Away')
        
        rects1 = plt.bar(index + bar_width, results[:,2], bar_width,
                 alpha=opacity,
                 yerr = confInterval['hop2'],
                 ecolor = 'black',
                 color='seagreen',
                 label='2 Hops Away')
            
        plt.xlabel('K', fontsize=fontlabel)
        plt.ylabel('Detection Probability', fontsize=fontlabel)
#        plt.title('Scores by group and gender')
        plt.xticks(index + bar_width/2, ('1', '2', '3', '4', '5'))
        ax.set_xlim((-2*bar_width, index[4]+3*bar_width))
        ax.set_ylim((0,1))
        ax.grid(c='gray')

        legend = plt.legend(loc=1, fontsize=fontlabel)
        legend.get_frame().set_alpha(0.5)        
#        plt.tight_layout()
#        plt.show()
        
        box = ax.get_position()
        ax.set_position([box.x0, box.y0 + box.height * 0.1,
                         box.width, box.height * 0.9])
        
        # Put a legend below current axis
        ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.10),
                  fancybox=False, shadow=False, ncol=5, fontsize=fontlabel, frameon=False)
예제 #43
0
def dataAnalysis(cid,schoolYear,semester) :

    host = '127.0.0.1'
    user = '******'
    password = '******'
    database = 'cims'
    port = 3306

    mysql = pymysql.connect(host=host,user = user,password=password,database=database,port=port)

    sql1 = "SELECT score,school_year,semester FROM tb_class_score_info WHERE cid = '" + cid +"' ORDER BY school_year,semester"

    sql2 = "SELECT b.indicator_name,a.score FROM tb_first_index_score a,tb_evaluation_template b WHERE a.first_indicator_id = b.indicator_id and school_year = '" + schoolYear + "' and semester = " + str(semester)

    print(sql2)
    cursor1 = mysql.cursor()
    cursor1.execute(sql1)
    #得到一个元组,元组中有两条数据,第一条为学年,第二条为学期
    result1 = cursor1.fetchall()
    cursor1.close()

    cursor2 = mysql.cursor()
    cursor2.execute(sql2)
    # 得到一个元组,元组中有两条数据,第一条为学年,第二条为学期
    result2 = cursor2.fetchall()
    cursor2.close()

    x1 = []
    y1 = []
    for i in range(len(result1)) :
        y1.append(result1[i][0])
        x1.append(result1[i][1] + '-' + str(result1[i][2]))
    x2 = []
    y2 = []
    for j in range(len(result2)) :
        x2.append(result2[j][0])
        y2.append(result2[j][1])

    mysql.close()
    #解决乱码问题
    pyl.rcParams['font.sans-serif'] = ['KaiTi']  # 指定默认字体
    pyl.rcParams['axes.unicode_minus'] = False
    pyl.subplot(1,2,1)#行,列,当前区域
    pyl.title('历史对比')
    pyl.xlabel('时间')
    pyl.ylabel('得分')
    pyl.plot(x1,y1)
    pyl.gcf().autofmt_xdate()
    pyl.subplot(1,2,2)
    pyl.title("指标得分")
    pyl.xlabel("指标名称")
    pyl.ylabel("得分")
    pyl.bar(x2,y2,align='center')
    pyl.show()
예제 #44
0
def ver_hist():

    import matplotlib.pylab as plt
    import numpy as np

    x = [1, 2, 3, 4, 5, 6, 7, 8, 9]
    xlabels = ["H", "B", "E", "G", "I", "T", "S", "U", "NF"]
    y = [4265, 536, 3632, 587, 3, 1765, 1478, 4694, 196]

    plt.bar(x, y)
    plt.xticks(x, xlabels)
    plt.show()
예제 #45
0
def plot_psds(data, rate, subject, condition, label_set, title):
    """
    Plots the frequency response for all 9 channels
    using the entire recording    
    """
    fig = plt.figure()   
   # common title
    fig.suptitle('Frequency Response ('+title
            +')'+subject+' '+condition, 
            fontsize=14, fontweight='bold')            
    # common ylabel
    fig.text(0.06, 0.5, 'Normalized Power Spectral Density', 
             ha='center', va='center', rotation='vertical',
             fontsize=14, fontweight='bold')
    # common xlabel
    fig.text(0.5, 0.05,'Frequency (Hz)',
             ha='center', va='center',fontsize=14, fontweight='bold')
  # use this to stack EEG, EOG, EMG on top of each other         
    sub_order = [1,4,7,10,2,5,3,6,9]    

    # determine max response to scale y-axis 
    maxY = 0
    for i in range(0, len(data)):
        Pxx, freqs = m.psd(data[i], NFFT=512, Fs=rate)
        normalizedPxx = Pxx/sum(Pxx)
        if normalizedPxx.max() > maxY:
            maxY = normalizedPxx.max()
                 
    # plot all subplots
    for i in range(0, len(data)):
        plt.subplot(4, 3, sub_order[i])  # 4 x 3 layout
        #plt.subplot(9, 1, i + 1)  # vertical 9 x 1 layout
        plt.subplots_adjust(hspace=.6)  # adds space between subplots
        
        Pxx, freqs = m.psd(data[i], NFFT=512, Fs=rate)
        normalizedPxx = Pxx/sum(Pxx)
        #plt.plot(freqs, normalizedPxx, label=label_set[ch])
        plt.bar(freqs, normalizedPxx, label=label_set[i],width=0.2)
        plt.axis([0,70,0,maxY])
        
        plt.title(label_set[i]) 
        #plt.xlabel('Frequency (Hz)')
        #plt.ylabel('Normalized Power Spectral Density')        
      
        ## Inserted into plotting loop
        ## Possible Classifier - calculate average power-weighted frequency    
#        sumPower = 0
#        for j in range(0, len(normalizedPxx)):
#            sumPower = sumPower + (normalizedPxx[j] * freqs[j])
#    
#        avgFreq = sumPower/len(freqs)
#        print(channel_name[i] + " average frequency = " + str(avgFreq))
    return     
예제 #46
0
	def plot_score_ratio(self, keyword):
		results = self.data[self.data['sentence'].str.contains(keyword)]
		ratio = [
			len(results[results['score'] == 0]),
			len(results[results['score'] == 1])
		]
		plt.bar(['negative', 'positive'], ratio)
		plt.title(f"keyword: {keyword}")
		plt.xlabel("score")
		plt.ylabel("ratio")
		plt.savefig("scoreRatio.png")
		plt.show()
예제 #47
0
def compare_psds2(data1, data2, rate, subject, condition1, condition2,
                 channel, stage):
    """
    Plot PSDs of 2 datasets
    implements low-pass filter to eliminate 60 Hz noise
        """
    Fcutoff = 55  # threshold for low-pass filter    
        
    fig = plt.figure()   
      # common title
    fig.suptitle('Frequency Response ('+channel
                +') - Subject #'+subject+' '+condition1+
                ' vs. '+condition2,
                fontsize=14, fontweight='bold')            
    # common ylabel
    fig.text(0.06, 0.5, 'Normalized Power Spectral Density', 
             ha='center', va='center', rotation='vertical',
             fontsize=14, fontweight='bold')
    # common xlabel
    fig.text(0.5, 0.05,'Frequency (Hz)',
             ha='center', va='center',fontsize=14, fontweight='bold')
        
    # FFT for data1
    df1 = runFFT(data1, rate, Fcutoff)
    maxY = df1.nPxx.max() 
    
    # FFT for data2
    df2 = runFFT(data2, rate, Fcutoff)
    if df2.nPxx.max() > maxY:
        maxY = df2.nPxx.max()      
       
    # overlay psds for baseline & recovery conditions   
    plt.subplot(2, 1, 1)                    
    plt.bar(df1.freqs, df1.nPxx, label=condition1, width=0.2,
            edgecolor = 'none') 
    plt.plot(df2.freqs, df2.nPxx, label=condition2, color='r')
          
    plt.axis([0,Fcutoff,0,maxY])
    #ax.axis.set_ticklabels([])   # hide x-axis labels on this subplot
    plt.legend()      
    plt.title(stage) 
    fig.subplots_adjust(hspace=.35)    
    
    # plot differential between recovery & baseline conditions
    plt.subplot(2, 1, 2)
    plt.bar(df1.freqs, (df2.nPxx - df1.nPxx), 
            label='', width=0.2, color='r',
            edgecolor = 'none')
    plt.axis([0,Fcutoff,min(df2.nPxx - df1.nPxx),maxY]) 
    plt.title(str(stage)+' Differential ('+str(condition1)+' - '
                +str(condition2) +' )')   
    plt.show()
    return      
예제 #48
0
    def barRecallPrecisionvsK2(self, recall, falseRate, precision, errorsRecall, errorprecision, errorFalse):
            
        n_groups = len(recall)

        fig, ax = plt.subplots(figsize=figsize)
        index = numpy.arange(n_groups)
        bar_width = 0.2
        opacity = 0.7
        
        rects2 = plt.bar(index-bar_width, precision, bar_width,
                 alpha=opacity,
                 color='goldenrod',
                 yerr = errorprecision,
                 ecolor = 'black',
                 edgecolor='black',
                 label='Precision',
                 hatch="//")
        
        rects3 = plt.bar(index, recall, bar_width,
                 alpha=opacity,
                 color='darkorange',
                 edgecolor='black',
                 yerr = errorsRecall,
                 ecolor = 'black',
                 label='Recall',
                 hatch="xx")
        
        rects1 = plt.bar(index + bar_width, falseRate, bar_width,
                 alpha=opacity,
                 yerr = errorFalse,
                 ecolor = 'black',
                 edgecolor='black',
                 color='firebrick',
                 label='False Rate',
                 hatch="--")
            
        plt.xlabel('$K_{T}$', fontsize=fontlabel)
#        plt.ylabel('Scores', fontsize=fontlabel)
        plt.xticks(index + bar_width/2, ('1', '2', '3', '4', '5'))
        ax.set_xlim((-2*bar_width, index[len(index)-1]+3*bar_width))
        ax.set_ylim((0,1))
        ax.grid(c='gray')

#        legend = plt.legend(fontsize=fontlabel)
#        legend.get_frame().set_alpha(0.5)  
        
        box = ax.get_position()
        ax.set_position([box.x0, box.y0 + box.height * 0.1,
                         box.width, box.height * 0.9])
        
        # Put a legend below current axis
        ax.legend(loc='upper center', bbox_to_anchor=(0.5, +1.35),
                  fancybox=False, shadow=False, ncol=5, fontsize=fontlabel, frameon=False)
예제 #49
0
def plot_distribution(N, n1, n2):
    '''Illustration: plot the distribution.'''
    p = PairIntersectionSizePdf(N, n1, n2)
    a, b = 0, min(n1, n2) + 1
    pdf = [p.pdf(k) for k in xrange(a, b)]
    P.figure(1)
    P.clf()
    P.bar(range(a, b), pdf)
    P.xlabel('Intersection Size (k)')
    P.ylabel('Probability')
    P.title('PDF of Intersection Size of Two Sets of Sizes %d,%d Out Of %d' % (n1, n2, N))
    P.show()
예제 #50
0
def plotRecordDistribution(df, timeline='M'):
    """plots records distribution according to timeline specified, options include
    'month', 'date', 'year', given the date is of the form yyyy-mm-dd
    
    Arguments:
    - df: a dataFrame containing the column 'uploadDate'
    - timeline: the unit of time on the time axis {'M':Month, 'W': Week, 'D':Day}
    """

    try:
        # Get the uploadDates and make em' a dataframe
        uploadDate = pd.DataFrame()
        uploadDate['date'] = pd.to_datetime(df['uploadDate'].dropna())
        uploadDate.index = uploadDate['date']
        uploadDate['score'] = 1  # one score to each upload

        # Converting the uploadDate to the format of required frequency of sampling (month, day or year)
        resampled_dates = uploadDate.resample(timeline).sum()
        resampled_dates_plot_ticks = list(resampled_dates.index)

        # plotting stuff
        x_pos = np.arange(len(resampled_dates))
        plt.figure(figsize=(16, 9))
        plt.bar(x_pos, resampled_dates['score'])
        if timeline == 'D':
            resampled_dates_plot_ticks = [
                x.strftime('%d %B %Y') for x in resampled_dates_plot_ticks
            ]
            plt.xticks(x_pos[0:len(x_pos):10],
                       resampled_dates_plot_ticks[0:len(x_pos):10],
                       rotation='vertical')
            timeline = 'Day'
        elif timeline == 'W':
            resampled_dates_plot_ticks = [
                x.strftime('%d %B %Y') for x in resampled_dates_plot_ticks
            ]
            plt.xticks(x_pos, resampled_dates_plot_ticks, rotation='vertical')
            timeline = 'Week'
        else:
            resampled_dates_plot_ticks = [
                x.strftime('%B %Y') for x in resampled_dates_plot_ticks
            ]
            plt.xticks(x_pos, resampled_dates_plot_ticks, rotation='vertical')
            timeline = 'month'
        plt.xlabel(timeline, fontsize=15)
        plt.ylabel('Number of submissions', fontsize=15)
        plt.title('Submissions per ' + timeline, fontsize=18)
        plt.xticks(fontsize=12)
        plt.yticks(fontsize=12)
        plt.show()

    except:
        print('Given table does not have column uploadDate')
예제 #51
0
def plot(data):
    x, y = data
    # plt.plot(x, y)
    # plt.show()

    index = np.arange(len(x))
    plt.bar(index, y)
    plt.xlabel('File', fontsize=5)
    plt.ylabel('Size', fontsize=5)
    plt.xticks(index, x, fontsize=5, rotation=30)
    plt.title('LRU files and size')
    plt.show()
예제 #52
0
def PredictNSteps(NSteps):
    for i in range(0, NSteps):
        CurrentPrediction = GetPrediction()
        print(str(XVal[-1] + 1) + ' : ' + str(CurrentPrediction))
        DeltaXVal.append(XVal[-1] + 1)
        DeltaYVal.append(float(CurrentPrediction))
        XVal.append(XVal[-1] + 1)
        YVal.append(float(CurrentPrediction))

    plt.bar(XVal, YVal)
    plt.bar(DeltaXVal, DeltaYVal, color='red')
    plt.show()
def plot_class_frequencies(labels):
    freqs = group(labels)
    plt.figure(figsize=(15, 5))
    plt.bar(freqs[:, 0], freqs[:, 1])
    plt.xlabel('ClassID')
    plt.ylabel('Frequency')
    ind = np.arange(0.5, 43.5)
    plt.xticks(ind,
               get_label_map('signnames.csv', np.unique(labels)),
               ha='right',
               rotation=45)
    plt.show()
예제 #54
0
def plot_attack(data, class_distrs_clean, class_distrs_attacked, node_id,
                retrain_iters):
    def make_xlabel(ix, correct):
        if ix == correct:
            return "Class {}\n(correct)".format(ix)
        return "Class {}".format(ix)

    figure = plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    center_ixs_clean = []
    for ix, block in enumerate(class_distrs_clean.T):
        x_ixs = np.arange(len(block)) + ix * (len(block) + 2)
        center_ixs_clean.append(np.mean(x_ixs))
        color = '#555555'
        if ix == data.y[node_id].item():
            color = 'darkgreen'
        plt.bar(x_ixs, block, color=color)

    ax = plt.gca()
    plt.ylim((-.05, 1.05))
    plt.ylabel("Predicted probability")
    ax.set_xticks(center_ixs_clean)
    ax.set_xticklabels([
        make_xlabel(k, data.y[node_id].item())
        for k in range(info["num_classes"])
    ])
    ax.set_title(
        "Predicted class probabilities for node {} on clean data\n({} re-trainings)"
        .format(node_id, retrain_iters))

    fig = plt.subplot(1, 2, 2)
    center_ixs_attacked = []
    for ix, block in enumerate(class_distrs_attacked.T):
        x_ixs = np.arange(len(block)) + ix * (len(block) + 2)
        center_ixs_attacked.append(np.mean(x_ixs))
        color = '#555555'
        if ix == data.y[node_id].item():
            color = 'darkgreen'
        plt.bar(x_ixs, block, color=color)

    ax = plt.gca()
    plt.ylim((-.05, 1.05))
    ax.set_xticks(center_ixs_attacked)
    ax.set_xticklabels([
        make_xlabel(k, data.y[node_id].item())
        for k in range(info["num_classes"])
    ])
    ax.set_title(
        "Predicted class probabilities for node {} after {} perturbations\n({} re-trainings)"
        .format(node_id, info_attack["n_perturbations"], retrain_iters))
    plt.tight_layout()
    plt.show()
예제 #55
0
def barChartPlot(index, coefficients, expected=[]):
    """
    Plots a bar chart with Zernike coefficients, `:math: C_{i}`.
    
    Parameters
    ----------
    index : list of ints
        Indices of the polynomial coefficients.
    coefficients : array
        Coefficients of the Zernike polynomials.
    expected : array, optional
        Expected values of the coefficients.
    """

    fig = plt.figure(figsize=(9, 6), dpi=80)
    xticklist = []
    width = 0.6
    # Prepare the labels of the  x ticks.
    for i in index:
        xticklist.append('Z' + str(i))
    barfigure = plt.bar(index,
                        coefficients,
                        width,
                        color='#2E9AFE',
                        edgecolor='#2E9AFE',
                        label='Measured')
    if len(expected) > 0:
        plt.bar(index - width / 3,
                expected,
                width,
                color='#882255',
                edgecolor='#882255',
                label='Input',
                alpha=0.5)

    # Add a legend.
    plt.legend(loc=0, fancybox=True)

    # Set the ticks and their labels.
    plt.xticks(index + width // 2, xticklist, rotation=90)
    plt.xlabel('Zernike polynomials', fontsize=18)
    plt.ylabel('Coefficient', fontsize=18)
    plt.title('Fitted Zernike polynomial coefficients', fontsize=18)

    plt.gca().minorticks_on()
    plt.gca().tick_params('both', direction='in', top=True, right=True)
    plt.gca().tick_params('y',
                          which='minor',
                          direction='in',
                          left=True,
                          right=True)
    plt.gca().tick_params('x', which='minor', bottom=False)
def plot_trigger_timmdiff():
    f = open(sys.argv[1])

    triggers = []
    ini = True
    iniini = True
    last_trigger = 0

    for line in f.readlines():
        if iniini:
            iniini = False
            continue
        try:
            if ini:
                last_line = line.split()
                last_trigger = last_line[0].split('(')[1]
                last_trigger = last_trigger.split(',')[0]
                ini = False
                continue

            this_line = line.split()
            this_trigger = this_line[0].split('(')[1]
            this_trigger = this_trigger.split(',')[0]

            difference = (float(this_trigger) -
                          float(last_trigger)) / 1000000

            print(difference)

            triggers.append(difference)
            last_trigger = this_trigger
        except:
            pass

    xmax = max(triggers)
    pulsebins = n.linspace(0, xmax, xmax + 1)
    # pulsebins = n.linspace(0, xmax, 1000)
    hist0 = n.histogram(n.array(triggers), bins=pulsebins)

    # print(xmax)
    # print(pulsebins)

    baredges0 = n.linspace(0, hist0[1][-1], len(hist0[0]))
    p.ylim(ymax=1.2 * max(hist0[0]))
    # pulsebins = hist0[1]
    p.bar(baredges0, hist0[0], width=pulsebins[1] - pulsebins[0],
          color='b', label='triggers')
    p.grid()
    p.legend()
    p.xlabel('Triggerdistance')
    p.ylabel('Events')
    p.show()
예제 #57
0
def plotData(Data, nObsPlot=5000):
  ''' Plot data items, at most nObsPlot distinct points (for quick rendering)
  '''
  if type(Data) == bnpy.data.XData:
    PRNG = np.random.RandomState(nObsPlot)
    pIDs = PRNG.permutation(Data.nObs)[:nObsPlot]
    if Data.dim > 1:
      pylab.plot(Data.X[pIDs,0], Data.X[pIDs,1], 'k.')  
    else:
      hist, bin_edges = pylab.histogram(Data.X, bins=25)
      xs = bin_edges[:-1]
      ys = np.asarray(hist, dtype=np.float32) / np.sum(hist)
      pylab.bar(xs, ys, width=0.8*(bin_edges[1]-bin_edges[0]), color='k')
예제 #58
0
def plot_hist(centers, i):
    import matplotlib.pyplot as plt
    fig, ax = plt.subplots()
    plt.bar(np.arange(20), centers / np.sum(centers))

    #plt.grid(axis='y', alpha=0.75)
    plt.xlabel('cluster')
    my_x_ticks = np.arange(0, 20, 1)
    plt.xticks(my_x_ticks)
    plt.ylabel('Probability')
    plt.title('Histogram for the cluster centers %d' % i)
    plt.text(23, 45, r'$\mu=15, b=3$')
    plt.savefig('cluster_' + str(i) + '.eps')
예제 #59
0
def draw_state_frequency_percentage(values):
    max_frequency = 200
    figures = values[:, 0]
    indexes = np.arange(max_frequency)
    percentages = []
    for i in range(max_frequency):
        percentages.append(len(np.argwhere(figures > i)) / len(figures) * 100)
    plt.bar(indexes + 0.1, percentages)
    plt.title("State Frequency Percentage")
    plt.xlabel("State Frequency")
    plt.ylabel("Percentage")

    plt.show()