Esempi in Python per cumfreq, esempi in Python per scipy.stats.cumfreq

Esempio n. 1

0

Mostra file

def ks_metric(y_true, y_scores, bins, path):
    '''
    :param y_true: array-like of shape = (n_samples) or (n_samples, n_outputs)
    Ground truth (correct) target values.
    :param y_scores: array-like of shape = (n_samples) or (n_samples, n_outputs)
    Estimated target values.
    :param bins: bins of y_scores
    :param path: if path equal 0, show ks plot; if path equal string of filepath, ks plot will
    save to filepath.
    :return: ks value
    '''
    df = pd.DataFrame({'y': y_true,
                       'score': y_scores})
    cdf_data1 = df[df['y'] == 0]['score']
    cdf_data2 = df[df['y'] == 1]['score']
    cdf1 = stats.cumfreq(cdf_data1, numbins=bins)
    cdf2 = stats.cumfreq(cdf_data2, numbins=bins)
    y_0 = cdf1[0] / cdf1[0][-1]
    y_1 = cdf2[0] / cdf2[0][-1]
    cdf_data = pd.DataFrame({'y_0': y_0, 'y_1': y_1})

    # plot
    fig, ax = plt.subplots(figsize=(10, 8))
    ax.plot(cdf_data)
    ax.legend(list(cdf_data.columns))
    plt.ylabel('累积概率')
    plt.xlabel('预测得分')
    if path == 0:
        plt.show()
    else:
        plt.savefig(path, dpi=150)

    # KS值
    ks = np.max(cdf1[0] / cdf1[0][-1] - cdf2[0] / cdf2[0][-1])
    return ks

Esempio n. 2

0

Mostra file

File: plotTimeserie.py Progetto: wande001/validationPCR

def plotCDF(forecast, validation, title):
    ax1 = plt.figure(figsize=(7, 5))
    vals, x1, x2, x3 = cumfreq(forecast['modelled'], len(forecast['modelled']))
    ax1 = plt.plot(np.linspace(np.min(forecast['modelled']),
                               np.max(forecast['modelled']),
                               len(forecast['modelled'])),
                   vals / len(forecast['modelled']),
                   "r",
                   label=str(config.get('Main options', 'RunName')))
    vals, x1, x2, x3 = cumfreq(validation['modelled'],
                               len(validation['modelled']))
    ax2 = plt.plot(np.linspace(np.min(validation['modelled']),
                               np.max(validation['modelled']),
                               len(validation['modelled'])),
                   vals / len(validation['modelled']),
                   "b",
                   label=str(config.get('Reference options', 'RunName')))
    vals, x1, x2, x3 = cumfreq(validation['observations'],
                               len(validation['observations']))
    ax3 = plt.plot(np.linspace(np.min(validation['observations']),
                               np.max(validation['observations']),
                               len(validation['observations'])),
                   vals / len(validation['observations']),
                   "black",
                   label="Observations")
    ax3 = plt.legend(prop={'size': 10}, loc=2)
    ax1 = plt.title(title)
    ax1 = plt.xlabel("Discharge (m3/s)")
    ax1 = plt.ylabel("ECDF")
    ax1 = plt.gcf().set_tight_layout(True)
    pdf.savefig()
    plt.clf()

Esempio n. 3

0

Mostra file

File: color_transfer.py Progetto: RemondaTalaat/artistic-style-transfer-using-texture-synthesis

def HM_color_transfer(style, content):
    """
        Color transfer the content image to the style image using cumulative
        distribution of both images,
        Args:
            style: target style in RGB space.
            content: content image in RGB space.

        Returns:
            Color transfer image in RGB space.
    """
    #copy style image and content image then convert them from 0:1 to 0:255 scale.
    transfered = np.copy(content)
    style = np.copy(style)
    transfered *=255
    style *= 255

    #calculate normalized cumulative histogram then update the content image based on the calculated values.
    for h in range (0,3):
        content_c = transfered[:,:,h]
        style_c   = style[:,:,h]
        height , width = content_c.shape
        contentValues,_,_,_ = stats.cumfreq(content_c, numbins=256)
        contentValues /= contentValues[-1]

        styleValues,_,_,_   = stats.cumfreq(style_c, numbins=256)
        styleValues /= styleValues[-1]

        K=256
        new_values=np.zeros((K))

        for a in np.arange(K):
            j=K-1
            while True:
                new_values[a]=j
                j=j-1
                if j<0 or contentValues[a]>styleValues[j]:
                    break

        for i in np.arange(height):
            for j in np.arange(width):
                a=content_c.item(i,j)
                b=new_values[int(a)]
                transfered[:,:,h].itemset((i,j),b)
        #transfered[:,:,h] = gaussian(transfered[:,:,h])
    #return the image to 0:1 scale
    transfered = transfered /255
    return transfered

Esempio n. 4

0

Mostra file

File: week6.py Progetto: hannanabdul55/seldonian-fairness

def compare_cdfs(data, A, num_bins=100):
    cdfs = {}
    assert len(np.unique(A)) == 2

    limits = (min(data), max(data))
    s = 0.5 * (limits[1] - limits[0]) / (num_bins - 1)
    limits = (limits[0] - s, limits[1] + s)

    for a in np.unique(A):
        subset = data[A == a]

        cdfs[a] = cumfreq(subset, numbins=num_bins, defaultreallimits=limits)

    lower_limits = [v.lowerlimit for _, v in cdfs.items()]
    bin_sizes = [v.binsize for _, v in cdfs.items()]
    actual_num_bins = [v.cumcount.size for _, v in cdfs.items()]

    assert len(np.unique(lower_limits)) == 1
    assert len(np.unique(bin_sizes)) == 1
    assert np.all([num_bins == v.cumcount.size for _, v in cdfs.items()])

    xs = lower_limits[0] + np.linspace(0, bin_sizes[0] * num_bins, num_bins)

    disparities = np.zeros(num_bins)
    for i in range(num_bins):
        cdf_values = np.clip(
            [v.cumcount[i] / len(data[A == k]) for k, v in cdfs.items()], 0, 1)
        disparities[i] = max(cdf_values) - min(cdf_values)

    return xs, cdfs, disparities

Esempio n. 5

0

Mostra file

def visualize_cumulative_sum():
    ssandtss = [ss_mins,tss_mins,ss3end_mins]
    plt.rcParams["font.size"] = 16
    for index in range(3):
        plt.figure()
        dists=[]
        for dis in ssandtss[index]:
            if abs(int(dis)) > args.xlimit:
                dists.append(args.xlimit+2)
                continue
            dists.append(abs(int(dis)))
        cums = stats.cumfreq(dists,numbins=args.xlimit+2)
        plt.xlabel('distance [bp]')
        x = pd.Series(cums.cumcount)
        plt.xlim(0,args.xlimit)
        ax = sns.lineplot(data=x)
        if index==0:
            plt.ylabel('Numper of splice sites')
            plt.savefig("ss_cumulative_plot.png", dpi=500, bbox_inches='tight')
            label="splice site"
        elif index==1:
            plt.ylabel('Numper of TSSs')
            plt.savefig("tss_cumulative_plot.png", dpi=500,bbox_inches='tight')
        else:
            plt.ylabel('Numper of 3-prime ends')
            plt.savefig("ss3end_cumulative_plot.png", dpi=500,bbox_inches='tight')

Esempio n. 6

0

Mostra file

def create_cdf(X):
	
	"""Create the cummulative density function of a continuous random
	variable, e.g. observed data.
	
	arguments

	X	-	Observed data from which a cdf should be constructed; please
			provide the data as a vector (Nx1 NumPy array or list).
	
	returns
	
	(bins, cdf)

	bins	-	All unique values in X, used as bins for the cdf.

	cdf	-	The cummulative density for each value in bins.
	"""
	
	# convert the data to a NumPy array.
	data = numpy.array(X)
	# the bins are all unique values in the data
	bins = copy.deepcopy(data)
	bins.sort()
	# calculate the cummulative frequency for each unique value in the data
	cumfreq, lowerlim, binsize, extra = stats.cumfreq(data, numbins=len(bins))
	# transform the cummulative frequencies to a cdf
	cdf = cumfreq / numpy.max(cumfreq)

	return bins, cdf

Esempio n. 7

0

Mostra file

File: bsb_distance.py Progetto: preinh/MMD_TFinal

def cumdist(vec, nbins=100):
    hist(vec, color='g', bins=nbins, normed=True, align='mid')
#    hist(vec, bins=nbins, normed=False, align='mid')
#    figure(2)    
    disc = cumfreq(vec, numbins=nbins)
    plot(disc[0]/len(vec)) 
    show()

Esempio n. 8

0

Mostra file

File: PGA_rectifier_LSTM.py Progetto: jread-usgs/PGA_LSTM

def plot_percent_percentile_plot(test_pred_do, test_y1):
    test_pred_do = np.swapaxes(test_pred_do, 0, 1)
    percentile = np.zeros((test_pred_do.shape[0], test_pred_do.shape[1]))
    z_score = np.zeros_like(percentile)
    for i in range(test_pred_do.shape[0]):
        for j in range(test_pred_do.shape[1]):
            if test_y1[i, j, 1] == 0:
                percentile[i, j] = np.nan
                z_score[i, j] = np.nan
                continue
            temp = np.append(test_pred_do[i, j, :], test_y1[i, j, 0])
            temp = np.sort(temp)
            ix = np.where(temp == test_y1[i, j, 0])
            percentile[i, j] = ix[0][0] / (len(temp) - 1) * 100
            z_score[i, j] = (test_y1[i, j, 0] - np.mean(
                test_pred_do[i, j, :])) / np.std(test_pred_do[i, j, :])

    mask = test_y1[:, :, 1].reshape((-1, ))
    ix = np.where(mask == 1)
    percentile = percentile.reshape((-1, ))[ix]
    #pyplot.figure();
    #pyplot.hist(percentile,bins=100);
    res = stats.cumfreq(percentile, numbins=100)
    x = res.lowerlimit + np.linspace(0, res.binsize * res.cumcount.size,
                                     res.cumcount.size)
    pyplot.figure()
    pyplot.bar(x,
               res.cumcount / np.count_nonzero(mask) * 100,
               width=res.binsize)
    pyplot.plot(x, x, '-r', label='y=x')
    pyplot.show()
    return z_score

Esempio n. 9

0

Mostra file

def KS_principle(inData):
    '''Show the principle of the Kolmogorov-Smirnov test.'''

    # CDF of normally distributed data
    nd = stats.norm()
    nd_x = np.linspace(-4, 4, 101)
    nd_y = nd.cdf(nd_x)

    # Empirical CDF of the sample data, which range for approximately 0 to 10
    numPts = 50
    lowerLim = 0
    upperLim = 10
    ecdf_x = np.linspace(lowerLim, upperLim, numPts)
    ecdf_y = stats.cumfreq(data, numPts, (lowerLim, upperLim))[0] / len(inData)

    #Add zero-point by hand
    ecdf_x = np.hstack((0., ecdf_x))
    ecdf_y = np.hstack((0., ecdf_y))

    # Plot the data
    sns.set_style('ticks')
    sns.set_context('poster')
    setFonts(36)

    plt.plot(nd_x, nd_y, 'k--')
    plt.hold(True)
    plt.plot(ecdf_x, ecdf_y, color='k')
    plt.xlabel('X')
    plt.ylabel('Cumulative Probability')

    # For the arrow, find the start
    ecdf_startIndex = np.min(np.where(ecdf_x >= 2))
    arrowStart = np.array([ecdf_x[ecdf_startIndex], ecdf_y[ecdf_startIndex]])

    nd_startIndex = np.min(np.where(nd_x >= 2))
    arrowEnd = np.array([nd_x[nd_startIndex], nd_y[nd_startIndex]])
    arrowDelta = arrowEnd - arrowStart

    plt.arrow(arrowStart[0],
              arrowStart[1],
              0,
              arrowDelta[1],
              width=0.05,
              length_includes_head=True,
              head_length=0.04,
              head_width=0.4,
              color='k')

    plt.arrow(arrowStart[0],
              arrowStart[1] + arrowDelta[1],
              0,
              -arrowDelta[1],
              width=0.05,
              length_includes_head=True,
              head_length=0.04,
              head_width=0.4,
              color='k')

    outFile = 'KS_Example.png'
    showData(outFile)

Esempio n. 10

0

Mostra file

File: myplot.py Progetto: dtnaylor/myplot

def cdf_vals_from_data(data, numbins=None, maxbins=None):

    # make sure data is a numpy array
    data = numpy.array(data)
    
    # by default, use numbins equal to number of distinct values
    # TODO: shouldn't this be one per possible x val?
    if numbins == None:
        numbins = numpy.unique(data).size

    if maxbins != None and numbins > maxbins:
        numbins = maxbins
    
    # bin the data and count fraction of points in each bin (for PDF)
    rel_bin_counts, min_bin_x, bin_size, _ =\
        stats.relfreq(data, numbins, (data.min(), data.max()))
    
    # bin the data and count each bin (cumulatively) (for CDF)
    cum_bin_counts, min_bin_x, bin_size, _ =\
        stats.cumfreq(data, numbins, (data.min(), data.max()))

    # normalize bin counts so rightmost count is 1
    cum_bin_counts /= cum_bin_counts.max()

    # make array of x-vals (lower end of each bin)
    x_vals = numpy.linspace(min_bin_x, min_bin_x+bin_size*numbins, numbins)

    # CDF always starts at y=0
    cum_bin_counts = numpy.insert(cum_bin_counts, 0, 0)  # y = 0
    cdf_x_vals = numpy.insert(x_vals, 0, x_vals[0])  # x = min x


    return cum_bin_counts, cdf_x_vals, rel_bin_counts, x_vals

Esempio n. 11

0

Mostra file

def cdfs(valueses, xlabel='value', labels=None, title='CDF', n_bins=500):
    """
    Plot one or more cumulative density functions
    :param valueses:
    :param xlabel:
    :param labels:
    :param title:
    :param n_bins:
    :return:
    """
    x_valueses = []
    y_valueses = []
    logger.debug("cdfs")
    for values in valueses:
        freq = cumfreq(values, n_bins)
        x_values = [
            freq.lowerlimit + x * freq.binsize for x in xrange(0, n_bins)
        ]
        y_values = freq.cumcount / len(values)
        logger.debug("binsize: %f" % freq.binsize)
        logger.debug("range: %f" % (freq.binsize * n_bins))
        logger.debug("y range: %f - %f" % (min(y_values), max(y_values)))
        x_valueses.append(x_values)
        y_valueses.append(y_values)

    return multiline(x_valueses,
                     y_valueses,
                     title=title,
                     xlabel=xlabel,
                     ylabel='density',
                     labels=labels)

Esempio n. 12

0

Mostra file

File: mdnplot.py Progetto: kaeufl/pybrain

    def plotRECCurve(self, nbins=20, highlight_error=None, linestyle='-',
                     linewidth=1.0):
        """
        Plot a Regression Error Characteristic (REC) curve.

        The resulting REC curve shows the cumulative distribution of errors
        over the dataset, where the error is measured in distance of the mode
        of the mixture distribution from the target value in standard
        deviations.

        TODO: Use the true mode rather than the kernel with the largest mixing
        coefficient.
        """
        if self.y == None:
            self.update()
        alpha, sigma2, mu = mdn.getMixtureParams(self.y, self.module.M, self.module.c)
        #maxidxs = np.argmax(alpha, axis=1)
        maxidxs=self.getMaxKernel(alpha, sigma2)
        N=len(mu)
        mu = mu[np.arange(0,N), maxidxs]
        sigma2 = sigma2[np.arange(0,N), maxidxs]
        dist = np.sum(np.abs(mu-self.tgts), axis=1)
        dist /= np.sqrt(sigma2)
        h,_,_,_ = cumfreq(dist, nbins,[0,10])
        h/=N
        plt.plot(np.linspace(0,10,nbins), h, linestyle, linewidth=linewidth)
        if highlight_error:
            plt.vlines(highlight_error, 0, 1, linestyles='-.')
        plt.xlabel('$\epsilon$ [n std deviations]')
        plt.ylabel('accuracy')
        return dist

Esempio n. 13

0

Mostra file

File: figs_BasicPrinciples.py Progetto: wavelets/statsintro

def main():
    # Univariate data -------------------------
    # Generate data that are normally distributed
    x = randn(500)
    
    # Set the fonts the way I like them
    sns.set_context('poster')
    sns.set_style('ticks')
    #mystyle.set()
    
    # Scatter plot
    scatter(arange(len(x)), x)
    xlim([0, len(x)])
    mystyle.printout('scatterPlot.png', xlabel='x', ylabel='y', title='Scatter')
    
    # Histogram
    hist(x)
    mystyle.printout('histogram_plain.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, default settings')
    
    hist(x,25)
    mystyle.printout('histogram.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, 25 bins')
    
    # Cumulative probability density
    numbins = 20
    plot(stats.cumfreq(x,numbins)[0])
    mystyle.printout('CumulativeFrequencyFunction.png', xlabel='Data Values', ylabel='Cumulative Frequency')
    
    # Boxplot
    # The ox consists of the first, second (middle) and third quartile
    boxplot(x, sym='*')
    mystyle.printout('boxplot.png', xlabel='Values', title='Boxplot')
    
    boxplot(x, sym='*', vert=False)
    title('Boxplot, horizontal')
    xlabel('Values')
    show()
    
    # Errorbars
    x = arange(5)
    y = x**2
    errorBar = x/2
    errorbar(x,y, yerr=errorBar, fmt='o', capsize=5, capthick=3)
    xlim([-0.2, 4.2])
    ylim([-0.2, 19])
    mystyle.printout('Errorbars.png', xlabel='Data Values', ylabel='Measurements', title='Errorbars')
    
    # Violinplot
    nd = stats.norm
    data = nd.rvs(size=(100))
    
    nd2 = stats.norm(loc = 3, scale = 1.5)
    data2 = nd2.rvs(size=(100))
    
    # Use pandas and the seaborn package for the violin plot
    df = pd.DataFrame({'Girls':data, 'Boys':data2})
    #sns.violinplot(df, color = ["#999999", "#DDDDDD"])
    sns.violinplot(df)
    
    mystyle.printout('violinplot.png')

Esempio n. 14

0

Mostra file

File: figs_BasicPrinciples.py Progetto: Marena1991/statsintro

def main():
    # Univariate data -------------------------
    # Generate data that are normally distributed
    x = randn(500)

    # Set the fonts the way I like them
    sns.set_context('paper')
    sns.set_style('white')
    mystyle.set()

    # Scatter plot
    plot(x, '.')
    mystyle.printout('scatterPlot.png',
                     xlabel='x',
                     ylabel='y',
                     title='Scatter')

    # Histogram
    hist(x, color='#999999')
    mystyle.printout('histogram_plain.png',
                     xlabel='Data Values',
                     ylabel='Frequency',
                     title='Histogram, default settings')

    hist(x, 25, color='#999999')
    mystyle.printout('histogram.png',
                     xlabel='Data Values',
                     ylabel='Frequency',
                     title='Histogram, 25 bins')

    # Cumulative probability density
    numbins = 20
    plot(stats.cumfreq(x, numbins)[0])
    mystyle.printout('CumulativeFrequencyFunction.png',
                     xlabel='Data Values',
                     ylabel='Cumulative Freuqency')

    # Boxplot
    # The ox consists of the first, second (middle) and third quartile
    boxplot(x, sym='*')
    mystyle.printout('boxplot.png', xlabel='Values', title='Boxplot')

    boxplot(x, sym='*', vert=False)
    title('Boxplot, horizontal')
    xlabel('Values')
    show()

    # Violinplot
    nd = stats.norm
    data = nd.rvs(size=(100))

    nd2 = stats.norm(loc=3, scale=1.5)
    data2 = nd2.rvs(size=(100))

    # Use pandas and the seaborn package for the violin plot
    df = pd.DataFrame({'Girls': data, 'Boys': data2})
    sns.violinplot(df, color=["#999999", "#DDDDDD"])

    mystyle.printout('violinplot.png')

Esempio n. 15

0

Mostra file

def hist_eq(b):
    bf = b.flatten()
    min_, max_ = nanmin(bf), nanmax(bf)
    cumfreqs, lowlim, binsize, extrapoints = cumfreq(bf, numbins=256, defaultreallimits=(min_, max_))
    cumfreqs = (255.99 * cumfreqs / cumfreqs[-1]).astype('u1')

    result = (255.99*(b-min_)/(max_-min_)).clip(0, 255).astype('u1')
    return cumfreqs[result]

Esempio n. 16

0

Mostra file

 def __init__(self, data, numBins=None):
     if not numBins:
         numBins = int(len(data) / 5)
     res = stats.cumfreq(data, numbins=numBins)
     self.cdistr = res.cumcount / len(data)
     self.loLim = res.lowerlimit
     self.upLim = res.lowerlimit + res.binsize * res.cumcount.size
     self.binWidth = res.binsize

Esempio n. 17

0

Mostra file

File: Data.py Progetto: scn1901/videoDataAnalysis

 def getPercentile(self):
     self.percentile16 = []
     self.percentile84 = []
     for i in range(0, 6):
         res = stats.cumfreq(self.fileData[i], numbins=400)
         self.percentile16.append(
             self.findPercentile(res, 0.16, len(self.fileData[i])))
         self.percentile84.append(
             self.findPercentile(res, 0.84, len(self.fileData[i])))

Esempio n. 18

0

Mostra file

 def frequency(self, frequencyMap):
     arrayOfkeys = []
     weights = []
     for key in frequencyMap:
         arrayOfkeys.append(int(key))
         weights.append(frequencyMap[key])
     cumcout, lowerlimit, binsize, extrapoints = stats.cumfreq(
         arrayOfkeys, numbins=10, weights=weights)
     return cumcout

Esempio n. 19

0

Mostra file

def draw_cdf(e, cap, subplot):
    subplot.set_title(e['dist'].__name__ + ' capacity=' + str(cap))
    samples = sorted(e['dist'](cap))
    res = stats.cumfreq(samples, numbins=cap, defaultreallimits=e['section'])
    x = min(e['section']) + np.linspace(0, res.binsize * res.cumcount.size,
                                        res.cumcount.size)
    subplot.bar(x, res.cumcount / cap, width=res.binsize)
    subplot.set_ylim([0, 1.2])
    subplot.set_xlim([min(e['section']) - 1, max(e['section']) + 1])

Esempio n. 20

0

Mostra file

File: cdf_spliced_reads.py Progetto: babonis/gimme

def main(filename):
    counts = get_data(filename)
    sorted_counts = sorted([v for v in counts.itervalues()])
    cumfreqs, lowlim, binsize, extrapoints = cumfreq(sorted_counts,
                                                        max(sorted_counts))
    norm_cumfreqs = cumfreqs / max(cumfreqs)
    plot.plot(norm_cumfreqs[:500], linewidth=1.5)
    plot.xlabel("mapped reads")
    plot.ylabel("splice junction")
    plot.show()

Esempio n. 21

0

Mostra file

def plot_cdf(hist_list, bins, norm_factor, min_spike_threshold,
             max_spike_threshold, plt_handle):

    res1 = stats.cumfreq(hist_list,
                         numbins=len(bins),
                         defaultreallimits=(min_spike_threshold,
                                            max_spike_threshold))
    total_count = res1.cumcount[-1]
    cum_count = total_count - res1.cumcount
    plt_handle.plot(bins, cum_count * norm_factor)

Esempio n. 22

0

Mostra file

File: runValidation.py Progetto: ruudvdent/PCR-GLOBWB_validation

def plotCDF(forecast, validation, title, xlims=[-1, 1]):
    vals, x1, x2, x3 = cumfreq(forecast, len(forecast))
    ax1 = plt.plot(np.linspace(np.min(forecast), np.max(forecast),
                               len(forecast)),
                   vals / len(forecast),
                   label='Simulation')
    vals, x1, x2, x3 = cumfreq(validation, len(validation))
    ax2 = plt.plot(np.linspace(np.min(validation), np.max(validation),
                               len(validation)),
                   vals / len(validation),
                   label='Reference')
    ax2 = plt.legend(prop={'size': 10})
    ax1 = plt.title(title)
    ax1 = plt.xlabel("Value")
    ax1 = plt.ylabel("ECDF")
    ax1 = plt.xlim(xlims[0], xlims[1])
    ax1 = plt.ylim(0, 1)
    pdf.savefig()
    plt.clf()

Esempio n. 23

0

Mostra file

File: b_hess.py Progetto: matthewzhang1998/pruning

    def plot(self, weights, row, col, shape, ix):
        full_lstm = np.zeros(shape)
        ix_lstm = np.zeros(shape)
        full_lstm[(row, col)] = weights
        ix_lstm[(row, col)] = 1

        plt.imshow(-ix_lstm, cmap=plt.get_cmap('binary'))
        plt.savefig('{}/{}.png'.format(self.Dir, ix))
        plt.clf()

        ng = shape[-1] // self.nh

        ix_lstm_p = np.reshape(ix_lstm, [-1, self.nh, ng])
        reduce_row = np.sum(ix_lstm_p, axis=(0, -1))
        from scipy import stats
        cumcount, lower, binsize, _ = stats.cumfreq(reduce_row, numbins=30)
        x = lower + np.linspace(0, binsize * cumcount.size, cumcount.size)
        plt.bar(x, cumcount / (len(reduce_row)), width=binsize)
        plt.xlim(0, 400)
        plt.xlabel('Parameters per Neuron')
        plt.ylabel('Cumulative %')

        plt.savefig('{}/cml{}.png'.format(self.Dir, ix))
        plt.clf()

        ng = shape[-1] // self.nh
        input_list = []
        rec_list = []
        for i in range(ng):
            input_list.append(
                np.sum(ix_lstm[:self.ni, i * self.nh:(i + 1) * self.nh]))
            rec_list.append(
                np.sum(ix_lstm[self.ni:, i * self.nh:(i + 1) * self.nh]))

        print("ratio:",
              np.sum(input_list) / np.sum(rec_list), "true_ratio:",
              shape[0] / self.nh - 1)

        inds = np.arange(ng)
        width = 0.35
        p1 = plt.bar(inds, input_list, width)
        p2 = plt.bar(inds, rec_list, width, bottom=input_list)

        if ng == 3:
            plt.xticks(inds, ('R-gate', 'Z-gate', 'O-gate'))

        elif ng == 4:
            plt.xticks(inds, ('I-gate', 'J-gate', 'F-gate', 'O-gate'))

        plt.ylabel('Number of Connections')
        plt.title("Remaining Weights by Gate and Type")
        plt.legend((p1[0], p2[0]),
                   ('Input Parameters', 'Recurrent Parameters'))
        plt.savefig('{}/bar{}.png'.format(self.Dir, ix))

Esempio n. 24

0

Mostra file

        def GetDistributions(sample1, sample2, nbins):

            # For consistency between CDFs
            lower_limit = min(min(sample1), min(sample2))
            upper_limit = max(max(sample1), max(sample2))

            # Create objects (H1, H2) that includes the cumulative frequency and surrounding information
            H1 = stats.cumfreq(sample1,
                               numbins=nbins,
                               defaultreallimits=(lower_limit, upper_limit))
            H2 = stats.cumfreq(sample2,
                               numbins=nbins,
                               defaultreallimits=(lower_limit, upper_limit))

            cumdist1 = H1.cumcount
            cumdist2 = H2.cumcount

            binsize = H1.binsize

            return lower_limit, upper_limit, H1, H2, cumdist1, cumdist2, binsize

Esempio n. 25

0

Mostra file

File: visualization.py Progetto: superkailang/WPP2012

def cumulativePlot(samples, save_file=None):
    #fig = plt.figure(figsize=(8, 6))
    res = stats.cumfreq(samples, numbins=25)
    x = res.lowerlimit + np.linspace(0, res.binsize * res.cumcount.size,
                                     res.cumcount.size)
    plt.bar(x, res.cumcount / res.cumcount[-1], width=res.binsize)
    plt.title('Cumulative histogram')
    plt.xlim([x.min(), x.max()])
    plt.xlabel("Wind Speed m/s")
    if save_file is not None:
        plt.savefig(save_file, dpi=300, pad_inches=0, bbox_inches='tight')
    plt.show()

Esempio n. 26

0

Mostra file

File: PQ.py Progetto: NAIST-SE/Vulnerability-Fix-Lags-Release-Adoption-Propagation

def plot_cumulative_frequency(vul_list):
    percent_fixing_commits = sorted([vul['num_fix_commits']/vul['num_release_commits']*100 for vul in vul_list])
    cumulative_frequency = stats.cumfreq(percent_fixing_commits, defaultreallimits = (-1, 101), numbins=len(percent_fixing_commits))

    trace = go.Scatter(
        name = 'Fixing release',
        x = [0] + percent_fixing_commits,
        y = [0] + list(map(lambda w: w/len(percent_fixing_commits) * 100, cumulative_frequency.cumcount))
    )

    layout = go.Layout(
        showlegend = False,
        yaxis = dict(
            title = 'Cumulative Frequency Distribution<br>(Fixing releases)',
            titlefont = dict(size=16),
            range = [0, 100],
            ticksuffix = '%'
        ),
        xaxis = dict(
            title = 'Fixing commits (%)',
            titlefont = dict(size=16),
            range = [0, 20],
            ticksuffix = '%'
        ),
        shapes = [
            dict(
                type = 'line',
                x0 = 14.28,
                x1 = 14.28,
                y0 = 0,
                y1 = 110,
                line = dict(
                    color = 'black',
                    dash = 'dash'
                )
            ),
            dict(
                type = 'line',
                x0 = 0,
                x1 = 110,
                y0 = 91.77,
                y1 = 91.77,
                line = dict(
                    color = 'black',
                    dash = 'dash'
                )
            )
        ]
    )

    fig = go.Figure(data=[trace], layout=layout)
    fig.write_html('cum_freq_dist.html')
    fig.write_image('cum_freq_dist.pdf', height=400, width=600)

Esempio n. 27

0

Mostra file

File: plotValidation.py Progetto: ruudvdent/PCR-GLOBWB_validation

def plotCDF(forecast, validation, title, xlims=[-1, 1]):
    forecast[forecast < -1.01] = -1.01
    vals, x1, x2, x3 = cumfreq(forecast, len(forecast))
    ax1 = plt.plot(np.linspace(np.min(forecast), np.max(forecast),
                               len(forecast)),
                   vals / len(forecast),
                   label=str(config.get('Main options', 'RunName')))
    validation[validation < -1.01] = -1.01
    vals, x1, x2, x3 = cumfreq(validation, len(validation))
    ax2 = plt.plot(np.linspace(np.min(validation), np.max(validation),
                               len(validation)),
                   vals / len(validation),
                   label=str(config.get('Reference options', 'RunName')))
    ax2 = plt.legend(prop={'size': 10}, loc=2)
    ax1 = plt.title(title)
    ax1 = plt.xlabel("Value")
    ax1 = plt.ylabel("ECDF")
    ax1 = plt.xlim(xlims[0], xlims[1])
    ax1 = plt.ylim(0, 1)
    ax1 = plt.gcf().set_tight_layout(True)
    pdf.savefig()
    plt.clf()

Esempio n. 28

0

Mostra file

def iecdf(x, p, nbins=10):
    """f = iecdf(x, p, nbins=10) returns the reciprocal of the empirical cumulative distriution function at ordinate p
    """
    # if (p > 1 or p < 0):
    #     print "Error : Percentile p must be between 0 and 1."
    #     exit
    cum = stats.cumfreq(x, nbins)
    a = cum[0] / len(x)
    lowlim = cum[1]
    bsize = cum[2]
    uplim = lowlim + bsize * nbins
    bins = np.linspace(lowlim + bsize / 2, uplim - bsize / 2, nbins)
    freqs = interpolate.interp1d(a, bins)
    f = freqs(p)
    return f

Esempio n. 29

0

Mostra file

File: atmath.py Progetto: atantet/ATSuite

def iecdf(x, p, nbins=10):
    """f = iecdf(x, p, nbins=10) returns the reciprocal of the empirical cumulative distriution function at ordinate p
    """
    # if (p > 1 or p < 0):
    #     print "Error : Percentile p must be between 0 and 1."
    #     exit
    cum = stats.cumfreq(x, nbins)
    a = cum[0] / len(x)
    lowlim = cum[1]
    bsize = cum[2]
    uplim = lowlim + bsize * nbins
    bins = np.linspace(lowlim + bsize / 2, uplim - bsize / 2, nbins)
    freqs = interpolate.interp1d(a, bins)
    f = freqs(p)
    return f

Esempio n. 30

0

Mostra file

File: utils.py Progetto: JoaoBraveCoding/bitcoin-simulator

def computeCDF(data, precision=1000):
    from scipy.stats import cumfreq, scoreatpercentile
    maxVal = max(data) + 0.

    freqs, _, _, _ = cumfreq(data, precision)

    freqsNormalized = map(lambda x: x / maxVal, freqs)
    values = []

    step = 100. / precision

    scores = numpy.arange(0, 100 + step, step)
    for s in scores:
        values.append(scoreatpercentile(data, s))

    return values, freqs, freqsNormalized

Esempio n. 31

0

Mostra file

File: ArunScript.py Progetto: jme2005/AAsecretion

def ecdfSyt(df,Group,Conc,threshold):
    tmp=df[(df['Conc']==Conc) & (df['Group']==Group)]
    tmp['time']=np.round(tmp['time'],1)    
    tmp=tmp.sort('time')
    nbins=np.unique(tmp['time']).size    
    tmp1=cumfreq(tmp['time'].values,numbins=nbins)[0]
    time=np.unique(tmp['time'])    
    tmparray=np.zeros([nbins,4])    
    DF=pd.DataFrame(tmparray,columns=["Group","Conc","time","cumfreq"])
    DF['Group']=[Group]*nbins; DF['Conc']=[Conc]*nbins; DF['time']=time;DF['cumfreq']=tmp1    
    DF['cumfreq']=DF['cumfreq']/DF['cumfreq'].max()
    DF=DF[DF['cumfreq']>threshold]
    DF['cumfreq']=(DF['cumfreq']/(DF['cumfreq'].max()-DF['cumfreq'].min()))-DF['cumfreq'].min()
    
    DF['time']=DF['time']-DF['time'].min()    
    return DF

Esempio n. 32

0

Mostra file

def cumulative_histogram(X, axis, bins=30):
    info = calculate_descriptive_stats(X)
    res = sps.cumfreq(X, numbins=30)
    x = res.lowerlimit + np.linspace(0, res.binsize * res.cumcount.size,
                                     res.cumcount.size)
    axis.bar(
        x,
        res.cumcount / np.max(res.cumcount),
        width=res.binsize,
        color='b',
        label=
        "Count: %d\nMin: %.4f\nMax: %.4f\nMean: %.4f\nStd: %.4f\nSkew: %.4f\nKurt: %.4f"
        % info)
    axis.set_title('Cumulative Histogram of Data')
    axis.set_xlim([x.min(), x.max()])
    axis.legend(loc='lower right')

Esempio n. 33

0

Mostra file

def make_cdf(img, n_bins=256):
    """
    Creates CDF of input image (map from [0,255] to [0,1])

    Inputs:
    - img: input image
    - n_bins: Number of bins used

    Output:
    - Dictionary containing Cummulative frequencies (CDF) of pixel values, 
      contained in array, and number of items (pixels) used to compute CDF
    """
    cdf = stats.cumfreq(img, n_bins, (0, 255))[0]
    cdf_ = {'cdf': np.array(cdf) / int(max(cdf)), 'n_items': int(max(cdf))}

    return cdf_

Esempio n. 34

0

Mostra file

File: helper.py Progetto: Aslanfmh65/solar_anomaly

    def histogram(self,
                  data_dict,
                  file_name=False,
                  save=False,
                  resolution=None):

        if type(data_dict) == dict:
            data = []
            for i in data_dict:
                data.extend(data_dict[i])
        elif type(data_dict) == list:
            data = data_dict
        else:
            print("Input must be dictionary or list")
            return

        for j, mape in enumerate(data):
            if mape > 50:
                data[j] = 50

        res = stats.cumfreq(data, numbins=15, defaultreallimits=(0, 50))
        x = res.lowerlimit + np.linspace(0, res.binsize * res.cumcount.size,
                                         res.cumcount.size)
        cum_y = [
            i / (max(res.cumcount) - min(res.cumcount)) * 100
            for i in res.cumcount
        ]

        fig = plt.figure(figsize=(10, 4))
        ax1 = fig.add_subplot(1, 2, 1)
        ax2 = fig.add_subplot(1, 2, 2)
        ax1.hist(data, bins=15, histtype='bar', ec='black')
        ax1.set_title('Histogram')
        ax1.set_xlabel('MAPE(%)', fontsize=12)
        ax1.set_ylabel('Frequency (Days)', fontsize=12)
        # ax2.bar(x, res.cumcount, width=res.binsize)
        ax2.plot(x, cum_y, '-o')
        ax2.set_title('Cumulative Histogram')
        ax2.set_xlim([x.min(), x.max()])
        ax2.set_xlabel('MAPE(%)', fontsize=12)
        ax2.set_ylabel('Dataset Percentage (%)', fontsize=12)

        if save is True:
            plt.savefig(file_name + '.jpg',
                        format='jpg',
                        dpi=resolution,
                        bbox_inches='tight')

Esempio n. 35

0

Mostra file

File: F7_2_probplotChi2.py Progetto: sativa/statsintro_python

def KS_principle(inData):
    '''Show the principle of the Kolmogorov-Smirnov test.'''
    
    # CDF of normally distributed data
    nd = stats.norm()
    nd_x = np.linspace(-4, 4, 101)
    nd_y = nd.cdf(nd_x)
    
    # Empirical CDF of the sample data, which range for approximately 0 to 10
    numPts = 50
    lowerLim = 0
    upperLim = 10
    ecdf_x = np.linspace(lowerLim, upperLim, numPts)
    ecdf_y = stats.cumfreq(data, numPts, (lowerLim, upperLim))[0]/len(inData)
    
    #Add zero-point by hand
    ecdf_x = np.hstack((0., ecdf_x))
    ecdf_y = np.hstack((0., ecdf_y))
    
    # Plot the data
    sns.set_style('ticks')
    sns.set_context('poster')
    setFonts(36)
    
    plt.plot(nd_x, nd_y, 'k--')
    plt.hold(True)
    plt.plot(ecdf_x, ecdf_y, color='k')
    plt.xlabel('X')
    plt.ylabel('Cumulative Probability')
    
    # For the arrow, find the start
    ecdf_startIndex = np.min(np.where(ecdf_x >= 2))
    arrowStart = np.array([ecdf_x[ecdf_startIndex], ecdf_y[ecdf_startIndex]])
    
    nd_startIndex = np.min(np.where(nd_x >= 2))
    arrowEnd = np.array([nd_x[nd_startIndex], nd_y[nd_startIndex]])
    arrowDelta = arrowEnd - arrowStart
    
    plt.arrow(arrowStart[0], arrowStart[1], 0, arrowDelta[1],
              width=0.05, length_includes_head=True, head_length=0.04, head_width=0.4, color='k')
    
    plt.arrow(arrowStart[0], arrowStart[1]+arrowDelta[1], 0, -arrowDelta[1],
              width=0.05, length_includes_head=True, head_length=0.04, head_width=0.4, color='k')
    
    outFile = 'KS_Example.png'
    showData(outFile)

Esempio n. 36

0

Mostra file

File: data.py Progetto: ForrestCKoch/DCDF

def get_null_reference_cdf(
    lowerlimit: np.float32,
    upperlimit: np.float32,
    numbins: int = 1000,
) -> ModifiedECDF:
    """
    This function will return a CDF to be used as a null reference.
    
    :param lowerlimit: lower bound for the CDF
    :param upperlimit: upperbound for the CDF
    :param numbins: How many bins should be used for the reference

    :returns: ModifiedECDF of all zeros for the specified range
    """
    return ModifiedECDF(
        stats.cumfreq([],
                      numbins=numbins,
                      defaultreallimits=(lowerlimit, upperlimit)))

Esempio n. 37

0

Mostra file

File: psrplot.py Progetto: mapleyustat/ligoST

def p(hinj=[],
      hrec=[],
      s=[],
      psrname='',
      detname='',
      style=sd.default_style,
      methods=[]):

    for method in methods:
        # First Calculate the interquartile range
        #(http://comments.gmane.org/gmane.comp.python.scientific.user/19755)
        data = np.sort(hrec)
        upperQuartile = stats.scoreatpercentile(data, .75)
        lowerQuartile = stats.scoreatpercentile(data, .25)
        IQR = upperQuartile - lowerQuartile

        # Get ideal bin size
        #(http://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule)
        #         fdsize = 3.49*np.std(data)*len(data)**(-1./3.)
        fdsize = 2 * IQR * len(data)**(-1. / 3.)

        #Get number of bins
        #(http://stats.stackexchange.com/questions/798/calculating-optimal-number-of-bins-in-a-histogram-for-n-where-n-ranges-from-30)
        num_bins = int((np.amax(data) - np.amin(data)) / fdsize)

        cumfreqs, lowlim, binsize, _ = stats.cumfreq(data, num_bins)
        pv = [1. - cdf / max(cumfreqs) for cdf in cumfreqs]
        bins = np.linspace(lowlim, num_bins * binsize, num_bins)

        plt.plot(bins, pv, style, color=sd.sd.pltcolor[method], label=method)

        plt.yscale('log')

    plt.title(detname + ' PSR ' + psrname)

    plt.xlabel('$h_{rec}$')
    plt.ylabel('1 - CDF (log scale)')

    plt.legend(numpoints=1)
    plt.savefig('plots/p_' + detname + '_' + psrname, bbox_inches='tight')

    print 'Plotted and saved in: ',
    print 'plots/p_' + detname + '_' + psrname
    plt.close()

Esempio n. 38

0

Mostra file

File: psrplot.py Progetto: maxisi/ligoPol

def p_original(detector, psr, location='files/remote/source/'):
    d = pd.HDFStore(location + 'dataPitkin_' + detector + '.hdf5', 'r')
    a = d[psr].tolist()
    b = [abs(x) for x in a]

    # First Calculate the interquartile range
    #(http://comments.gmane.org/gmane.comp.python.scientific.user/19755)                                                                    
    data = np.sort(d[psr].tolist())                                                                                                   
    upperQuartile = stats.scoreatpercentile(data,.75)                                                                      
    lowerQuartile = stats.scoreatpercentile(data,.25)                                                                      
    IQR = upperQuartile - lowerQuartile
    
    
        # Get ideal bin size
        #(http://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule)
#         fdsize = 3.49*np.std(data)*len(data)**(-1./3.)
    fdsize = 2 * IQR * len(data)**(-1./3.)
        
    #Get number of bins
    #(http://stats.stackexchange.com/questions/798/calculating-optimal-number-of-bins-in-a-histogram-for-n-where-n-ranges-from-30)
    num_bins = int((np.amax(data) - np.amin(data))/fdsize)

    cumfreqs, lowlim, binsize, _ = stats.cumfreq(data, num_bins)
    pv = [1. - cdf/max(cumfreqs) for cdf in cumfreqs]
    bins = np.linspace(lowlim, num_bins*binsize, num_bins)

    plt.plot(bins, pv, style, color=sd.pltcolor[method], label=method)
    
    plt.yscale('log')

    plt.title(detname + ' PSR ' + psrname)

    plt.xlabel('$h_{rec}$')
    plt.ylabel('1 - CDF (log scale)')

    plt.legend(numpoints=1)
    plt.savefig('plots/p_' + detname + '_' + psrname, bbox_inches='tight')
    
    print 'Plotted and saved in: ',
    print 'plots/p_' + detname + '_' + psrname
    plt.close()

Esempio n. 39

0

Mostra file

File: psrplot.py Progetto: maxisi/ligoPol

def p(hinj=[], hrec=[], s=[], psrname='', detname='', style=sd.default_style, methods=[]):
        
    for method in methods:
        # First Calculate the interquartile range
        #(http://comments.gmane.org/gmane.comp.python.scientific.user/19755)                                                                    
        data = np.sort(hrec)                                                                                                   
        upperQuartile = stats.scoreatpercentile(data,.75)                                                                      
        lowerQuartile = stats.scoreatpercentile(data,.25)                                                                      
        IQR = upperQuartile - lowerQuartile
    
    
        # Get ideal bin size
        #(http://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule)
#         fdsize = 3.49*np.std(data)*len(data)**(-1./3.)
        fdsize = 2 * IQR * len(data)**(-1./3.)
            
        #Get number of bins
        #(http://stats.stackexchange.com/questions/798/calculating-optimal-number-of-bins-in-a-histogram-for-n-where-n-ranges-from-30)
        num_bins = int((np.amax(data) - np.amin(data))/fdsize)

        cumfreqs, lowlim, binsize, _ = stats.cumfreq(data, num_bins)
        pv = [1. - cdf/max(cumfreqs) for cdf in cumfreqs]
        bins = np.linspace(lowlim, num_bins*binsize, num_bins)

        plt.plot(bins, pv, style, color=sd.sd.pltcolor[method], label=method)
        
        plt.yscale('log')

    plt.title(detname + ' PSR ' + psrname)

    plt.xlabel('$h_{rec}$')
    plt.ylabel('1 - CDF (log scale)')

    plt.legend(numpoints=1)
    plt.savefig('plots/p_' + detname + '_' + psrname, bbox_inches='tight')
    
    print 'Plotted and saved in: ',
    print 'plots/p_' + detname + '_' + psrname
    plt.close()

Esempio n. 40

0

Mostra file

File: parse_dacapo.py Progetto: kmarkkk/Tessjvm

def plot_cdfs(benchmark, benchmark_experiments, os_type, results_dir, output_dir, output_extension):
  print "Parsing and plotting runtime results for %d %s experiments...\n" % (len(benchmark_experiments), benchmark)

  runtime_results = parse_runtime_results(benchmark, benchmark_experiments, os_type, aggregate=False)

  if len(runtime_results) == 0:
    print "Not enough results found for %s. Skipping..." % benchmark
    return

  keyed_by_mem_size = defaultdict(list)
  for jvm_count, memsize_to_results in sorted(runtime_results.iteritems(), key=lambda t: t[0]):
    for memsize, runtimes in memsize_to_results.iteritems():
      keyed_by_mem_size[memsize].append((jvm_count, runtimes))

  for mem_size, jvm_to_runtimes in sorted(keyed_by_mem_size.iteritems(), key=lambda t: t[0]):
    plt.clf()
    ax = plt.subplot(111)
    longest_time = max(reduce(lambda x,y: x + y, [t[1] for t in jvm_to_runtimes]))
    shortest_time = min(reduce(lambda x,y: x + y, [t[1] for t in jvm_to_runtimes]))
    for jvm_count, runtime_list in jvm_to_runtimes:
      cum_freqs, ll, binsize, xp = cumfreq(runtime_list, numbins=len(runtime_list))
      normed_cum_freqs = map(lambda x: x/max(cum_freqs), cum_freqs)
      padded_x = [shortest_time*0.8, min(runtime_list)] + sorted(runtime_list) + [longest_time*1.1]
      padded_y = [0, 0] + normed_cum_freqs + [1]
      ax.plot(padded_x, padded_y, label="%d JVMs" % jvm_count)

    # Apply labels and bounds
    plt.title("%s Mean Iteration Runtime CDF (%d MB Heap)" % (benchmark, mem_size))
    plt.ylabel("Fraction of Jobs Completed")
    plt.xlabel("Time (ms)")
    plt.xlim(shortest_time*0.8, longest_time*1.1)
    plt.ylim(-0.025, 1.025)
    # Move legend to the right
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.85, box.height])
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

    save_or_show_current(output_dir, 'cdfs', benchmark, output_extension, suffix='%03dMB' % mem_size)

Esempio n. 41

0

Mostra file

File: gmm.py Progetto: Upward-Spiral-Science/the-vat

def gmm_test(X,k0,k1,nboot):
    nsample = X.shape[0]

    gmm0 = mixture.GMM(n_components=k0, covariance_type='full')
    gmm0.fit(X)
    L0 = sum(gmm0.score(X))
    gmm1 = mixture.GMM(n_components=k1, covariance_type='full')
    gmm1.fit(X)
    L1 = sum(gmm1.score(X))
    LRstat = -2*(L1 - L0)
    
    LRstat0 = []

    for i in range(nboot):
        Xboot = gmm0.sample(n_samples=nsample)
        gmm0_boot = mixture.GMM(n_components=k0, covariance_type = 'full')
        gmm0_boot.fit(Xboot)
        L0_boot = sum(gmm0_boot.score(Xboot))
        gmm1_boot = mixture.GMM(n_components=k1, covariance_type = 'full')
        gmm1_boot.fit(Xboot)
        L1_boot = sum(gmm1_boot.score(Xboot))
        LRstat0.append(-2*(L1_boot - L0_boot))

    ecdf, lowlim, binsize, extrapoints = cumfreq(LRstat0)
    ecdf = ecdf/len(LRstat0)

    bin = np.mean([lowlim,lowlim+binsize])
    bins = []

    for i in range(len(ecdf)):
        bins.append(bin)
        bin = bin + binsize

    p = max(ecdf[bins<=LRstat])

    return p

Esempio n. 42

0

Mostra file

File: los.py Progetto: mac389/McAfee

#-------------------------------------------------------------------------------------------------	
##################################################################################################

#load data
data = array([4.92, 6.52,7.33, 5.75])



#bootstrapping, assumes that the data here completely describe (are completely representative thereof) the underlying distribution.
sample_count = data.shape[0]
variable_count = 1
jitter_count = 1000

replicates = tile(data,(jitter_count,1))
replicates += (random(replicates.shape)*(max(data)-min(data))+min(data))
map(shuffle,replicates)
distribution = ravel(diff(replicates,axis=1))
cdf = cumfreq(distribution)

overview = plt.figure(figsize =(8.27,11.69)) #Thus instructeth PDM
ax = overview.add_subplot(111)
xvals = linspace(cdf[1],cdf[1]+10*cdf[2],num=10) #By default cumfreq divides into 10 bins
n,bins,patches=ax.hist(distribution, normed=True)
plt.setp(patches, 'facecolor', 'k', 'alpha', 0.75)
tech.adjust_spines(ax,['left','bottom'])
overview.text(0.5, 0.08, r'Weekly Change in Length of Stay', ha='center', va='top', fontsize=30, weight='bold') #xlabel
overview.text(0.02875, 0.5, r'Chance of Occurrence', ha='center', va='center', rotation='vertical', fontsize=30, weight='bold')
ax.annotate(r'April 21, 2010',xy=(1.1,.18), xytext=(1.2,.2),arrowprops=dict(facecolor='black', shrink=0.05), fontsize=20)
plt.subplots_adjust(top=0.95, bottom =0.18, left=0.15)
plt.savefig('cdf_LOS.jpg',dpi=600)

Esempio n. 43

0

Mostra file

File: ISP_showPlots.py Progetto: NanoResearch/statsintro_python

def simplePlots():
    '''Demonstrate the generation of different statistical standard plots'''
    
    # Univariate data -------------------------
    
    # Make sure that always the same random numbers are generated
    np.random.seed(1234)
    
    # Generate data that are normally distributed
    x = np.random.randn(500)
    
    # Other graphics settings
    sns.set(context='poster', style='ticks', palette=sns.color_palette('muted'))
    
    # Set the fonts the way I like them
    setFonts(32)
    
    # Scatter plot
    plt.scatter(np.arange(len(x)), x)
    plt.xlim([0, len(x)])
    
    # Save and show the data, in a systematic format
    printout('scatterPlot.png', xlabel='Datapoints', ylabel='Values', title='Scatter')
    
    # Histogram
    plt.hist(x)
    printout('histogram_plain.png', xlabel='Data Values',
             ylabel='Frequency', title='Histogram, default settings')
    
    plt.hist(x,25)
    printout('histogram.png', xlabel='Data Values', ylabel='Frequency',
             title='Histogram, 25 bins')
    
    # Cumulative probability density
    numbins = 20
    plt.plot(stats.cumfreq(x,numbins)[0])
    printout('CumulativeFrequencyFunction.png', xlabel='Data Values',
             ylabel='CumFreq', title='Cumulative Frequency')

    # KDE-plot
    sns.kdeplot(x)
    printout('kde.png', xlabel='Data Values', ylabel='Density',
            title='KDE_plot')
    
    # Boxplot
    # The ox consists of the first, second (middle) and third quartile
    plt.boxplot(x, sym='*')
    printout('boxplot.png', xlabel='Values', title='Boxplot')
    
    plt.boxplot(x, sym='*', vert=False)
    plt.title('Boxplot, horizontal')
    plt.xlabel('Values')
    plt.show()
    
    # Errorbars
    x = np.arange(5)
    y = x**2
    errorBar = x/2
    plt.errorbar(x,y, yerr=errorBar, fmt='o', capsize=5, capthick=3)
    plt.xlim([-0.2, 4.2])
    plt.ylim([-0.2, 19])
    printout('Errorbars.png', xlabel='Data Values', ylabel='Measurements', title='Errorbars')
    
    # Violinplot
    nd = stats.norm
    data = nd.rvs(size=(100))
    
    nd2 = stats.norm(loc = 3, scale = 1.5)
    data2 = nd2.rvs(size=(100))
    
    # Use pandas and the seaborn package for the violin plot
    df = pd.DataFrame({'Girls':data, 'Boys':data2})
    sns.violinplot(df)
    
    printout('violinplot.png', title='Violinplot')
    
    # Barplot
    # The font-size is set such that the legend does not overlap with the data
    np.random.seed(1234)
    setFonts(20)
    
    df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd'])
    df.plot(kind='bar', grid=False, color=sns.color_palette('muted'))
    
    showData('barplot.png')
    setFonts(28)

    # Bivariate Plots
    df2 = pd.DataFrame(np.random.rand(50, 3), columns=['a', 'b', 'c'])
    df2.plot(kind='scatter', x='a', y='b', s=df2['c']*500);
    plt.axhline(0, ls='--', color='#999999')
    plt.axvline(0, ls='--', color='#999999')
    printout('bivariate.png')
    
    # Grouped Boxplot
    sns.set_style('whitegrid')
    sns.boxplot(df)
    setFonts(28)
    printout('groupedBoxplot.png', title='sns.boxplot')

    sns.set_style('ticks')

    # Pieplot
    txtLabels = 'Cats', 'Dogs', 'Frogs', 'Others'
    fractions = [45, 30, 15, 10]
    offsets =(0, 0.05, 0, 0)
    
    plt.pie(fractions, explode=offsets, labels=txtLabels,
            autopct='%1.1f%%', shadow=True, startangle=90,
            colors=sns.color_palette('muted') )
    plt.axis('equal')
    printout('piePlot.png', title=' ')

Esempio n. 44

0

Mostra file

File: RedAndBlueBalls.py Progetto: daiyoko/LotteryPrediction

density = gaussian_kde(list_merged_by_ball_id)
xs = numpy.linspace(0,8,200)
density.covariance_factor = lambda : .25
density._compute_covariance()
plt.plot(xs,density(xs))
plt.xlabel('KDE,number of appear time by blue ball number')
plt.ylabel('KDE,counter of appear time by blue ball number')
plt.show()
##CDF(The Cumulative Distribution Function
from scipy.stats import cumfreq
idx_max = max(dfs_blue_balls_count_values)
hi = idx_max
a = numpy.arange(hi) ** 2
#    for nbins in ( 2, 20, 100 ):
for nbins in dfs_blue_balls_count_values:    
    cf = cumfreq(a, nbins)  # bin values, lowerlimit, binsize, extrapoints
    w = hi / nbins
    x = numpy.linspace( w/2, hi - w/2, nbins )  # care
    # print x, cf
    plt.plot( x, cf[0], label=str(nbins) )

plt.legend()
plt.xlabel('CDF,number of appear time by blue ball number')
plt.ylabel('CDF,counter of appear time by blue ball number')
plt.show()

###Optional: Comparing Distributions with Probability Plots and QQ Plots
###Quantile plot of the server data. A quantile plot is a graph of the CDF with the x and y axes interchanged.
###Probability plot for the data set shown,a standard normal distribution:
###@see: http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.probplot.html
import scipy.stats as stats

Esempio n. 45

0

Mostra file

File: showStats.py Progetto: b-rodrigues/statsintro

# Histogram
hist(x)
xlabel('Data Values')
ylabel('Frequency')
title('Histogram, default settings')
show()

hist(x,25)
xlabel('Data Values')
ylabel('Frequency')
title('Histogram, 25 bins')
show()

# Cumulative probability density
numbins = 20
cdf = stats.cumfreq(x,numbins)
plot(cdf[0])
xlabel('Data Values')
ylabel('Cumulative Frequency')
title('Cumulative probablity density function')
show()

# Boxplot
# The error bars indiacte the range, and the box consists of the
# first, second (middle) and third quartile
boxplot(x)
title('Boxplot')
ylabel('Values')
show()

boxplot(x, vert=False)

Esempio n. 46

0

Mostra file

File: HistGraph.py Progetto: KPyda/tinyos-2.x-contrib

    def execute(self,
                sqr_nodes,
                connectivity,
                randomize_boot,
                sec_before_inject,
                sec_after_inject,
                inject_node,
                k,
                distance,
                filenamebase):

        print "="*40
        print "Executing HistGraph:"
        print "filenamebase\t\t", filenamebase
        print "="*40

        node_re = 'DEBUG \((\d+)\):'
        node_re_c = re.compile(node_re)
        time_re = '(\d+):(\d+):(\d+.\d+)'
        time_re_c = re.compile(time_re)

        consist = np.zeros((sqr_nodes, sqr_nodes))

        f = open(filenamebase+".log", "r")
        for line in f:
            #print line,
            if line.find("inconsistent") >= 0:
                #print line,

                node_obj = node_re_c.search(line)
                node = int(node_obj.group(1))

                time_obj = time_re_c.search(line)
                #print "\t", time_obj.group(0),
                t = Time(time_obj.group(1),
                         time_obj.group(2),
                         time_obj.group(3))
                #print t.in_second()

                #print "id", node,
                (x, y) = id2xy(node, sqr_nodes)
                #print "->", x, y

                consist[x][y] = t.in_second() - sec_before_inject

        f.close()

        LOW_TIME = 0
        HIGH_TIME = 50
        BINS = 100

        #print consist.flatten()
        cdf = stats.cumfreq(consist.flatten(), BINS, (LOW_TIME, HIGH_TIME))
        #print cdf #, max(cdf[0]), cdf[0]/max(cdf[0])
        #print floatRange(LOW_TIME, HIGH_TIME, cdf[2])

        fig = plt.figure(figsize=(10, 8))
        ax = fig.add_subplot(111)

        plt.plot(floatRange(LOW_TIME, HIGH_TIME, cdf[2]),
                 cdf[0]/max(cdf[0]),
                 ls='steps')
        # plt.hist(consist.flatten(),
        #          bins = 100,
        #          cumulative=True,
        #          normed=True,
        #          histtype='step')
        plt.grid()
        plt.title('Model Time to Consistency (cdf)')
        text = str(sqr_nodes) + "x" + str(sqr_nodes) + "\n" + \
            "Distance: " + str(distance) + "\n" + \
            "K: " + str(k)
#            "Connectivity: " + str(connectivity) + "\n" + \
        plt.text(.5, .1, text,
                 horizontalalignment='center',
                 verticalalignment='center',
                 transform = ax.transAxes,
                 bbox=dict(facecolor='red', alpha=0.2))

        plt.ylim(0,
                 1)
        #plt.xlim(0,
        #         50)

        plt.xlabel("Model Time [s]")
        plt.ylabel("Nodes consistent [%]")

        plt.savefig(filenamebase+"_hist.png")

Esempio n. 47

0

Mostra file

File: statistical_hypothesis_testing.py Progetto: pkohvaei/galaxytools

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--infile", required=True, help="Tabular file.")
    parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.")
    parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi")
    parser.add_argument("--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;")
    parser.add_argument("--test_id", help="statistical test method")
    parser.add_argument(
        "--mwu_use_continuity",
        action="store_true",
        default=False,
        help="Whether a continuity correction (1/2.) should be taken into account.",
    )
    parser.add_argument(
        "--equal_var",
        action="store_true",
        default=False,
        help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.",
    )
    parser.add_argument(
        "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values."
    )
    parser.add_argument("--fisher", action="store_true", default=False, help="if true then Fisher definition is used")
    parser.add_argument(
        "--bias",
        action="store_true",
        default=False,
        help="if false,then the calculations are corrected for statistical bias",
    )
    parser.add_argument("--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored")
    parser.add_argument(
        "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored"
    )
    parser.add_argument("--inclusive", action="store_true", default=False, help="if false,limit will be ignored")
    parser.add_argument(
        "--printextras",
        action="store_true",
        default=False,
        help="If True, if there are extra points a warning is raised saying how many of those points there are",
    )
    parser.add_argument(
        "--initial_lexsort",
        action="store_true",
        default="False",
        help="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.",
    )
    parser.add_argument("--correction", action="store_true", default=False, help="continuity correction ")
    parser.add_argument(
        "--axis",
        type=int,
        default=0,
        help="Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)",
    )
    parser.add_argument(
        "--n",
        type=int,
        default=0,
        help="the number of trials. This is ignored if x gives both the number of successes and failures",
    )
    parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram")
    parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.")
    parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction")
    parser.add_argument("--score", type=int, default=0, help="Score that is compared to the elements in a.")
    parser.add_argument("--m", type=float, default=0.0, help="limits")
    parser.add_argument("--mf", type=float, default=2.0, help="lower limit")
    parser.add_argument("--nf", type=float, default=99.9, help="higher_limit")
    parser.add_argument(
        "--p",
        type=float,
        default=0.5,
        help="The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5",
    )
    parser.add_argument("--alpha", type=float, default=0.9, help="probability")
    parser.add_argument("--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds")
    parser.add_argument(
        "--proportiontocut",
        type=float,
        default=0.0,
        help="Proportion (in range 0-1) of total data set to trim of each end.",
    )
    parser.add_argument(
        "--lambda_",
        type=float,
        default=1.0,
        help="lambda_ gives the power in the Cressie-Read power divergence statistic",
    )
    parser.add_argument(
        "--imbda",
        type=float,
        default=0,
        help="If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.",
    )
    parser.add_argument("--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e")
    parser.add_argument("--dtype", help="dtype")
    parser.add_argument("--med", help="med")
    parser.add_argument("--cdf", help="cdf")
    parser.add_argument("--zero_method", help="zero_method options")
    parser.add_argument("--dist", help="dist options")
    parser.add_argument("--ties", help="ties options")
    parser.add_argument("--alternative", help="alternative options")
    parser.add_argument("--mode", help="mode options")
    parser.add_argument("--method", help="method options")
    parser.add_argument("--md", help="md options")
    parser.add_argument("--center", help="center options")
    parser.add_argument("--kind", help="kind options")
    parser.add_argument("--tail", help="tail options")
    parser.add_argument("--interpolation", help="interpolation options")
    parser.add_argument("--statistic", help="statistic options")

    args = parser.parse_args()
    infile = args.infile
    outfile = open(args.outfile, "w+")
    test_id = args.test_id
    nf = args.nf
    mf = args.mf
    imbda = args.imbda
    inclusive1 = args.inclusive1
    inclusive2 = args.inclusive2
    sample0 = 0
    sample1 = 0
    sample2 = 0
    if args.sample_cols != None:
        sample0 = 1
        barlett_samples = []
        for sample in args.sample_cols.split(";"):
            barlett_samples.append(map(int, sample.split(",")))
    if args.sample_one_cols != None:
        sample1 = 1
        sample_one_cols = args.sample_one_cols.split(",")
    if args.sample_two_cols != None:
        sample_two_cols = args.sample_two_cols.split(",")
        sample2 = 1
    for line in open(infile):
        sample_one = []
        sample_two = []
        cols = line.strip().split("\t")
        if sample0 == 1:
            b_samples = columns_to_values(barlett_samples, line)
        if sample1 == 1:
            for index in sample_one_cols:
                sample_one.append(cols[int(index) - 1])
        if sample2 == 1:
            for index in sample_two_cols:
                sample_two.append(cols[int(index) - 1])
        if test_id.strip() == "describe":
            size, min_max, mean, uv, bs, bk = stats.describe(map(float, sample_one))
            cols.append(size)
            cols.append(min_max)
            cols.append(mean)
            cols.append(uv)
            cols.append(bs)
            cols.append(bk)
        elif test_id.strip() == "mode":
            vals, counts = stats.mode(map(float, sample_one))
            cols.append(vals)
            cols.append(counts)
        elif test_id.strip() == "nanmean":
            m = stats.nanmean(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "kurtosistest":
            z_value, p_value = stats.kurtosistest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "itemfreq":
            freq = stats.itemfreq(map(float, sample_one))
            for list in freq:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "nanmedian":
            m = stats.nanmedian(map(float, sample_one))
            cols.append(m)
        elif test_id.strip() == "variation":
            ra = stats.variation(map(float, sample_one))
            cols.append(ra)
        elif test_id.strip() == "boxcox_llf":
            IIf = stats.boxcox_llf(imbda, map(float, sample_one))
            cols.append(IIf)
        elif test_id.strip() == "tiecorrect":
            fa = stats.tiecorrect(map(float, sample_one))
            cols.append(fa)
        elif test_id.strip() == "rankdata":
            r = stats.rankdata(map(float, sample_one), method=args.md)
            cols.append(r)
        elif test_id.strip() == "nanstd":
            s = stats.nanstd(map(float, sample_one), bias=args.bias)
            cols.append(s)
        elif test_id.strip() == "anderson":
            A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist)
            cols.append(A2)
            for list in critical:
                cols.append(list)
            cols.append(",")
            for list in sig:
                cols.append(list)
        elif test_id.strip() == "binom_test":
            p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p)
            cols.append(p_value)
        elif test_id.strip() == "gmean":
            gm = stats.gmean(map(float, sample_one), dtype=args.dtype)
            cols.append(gm)
        elif test_id.strip() == "hmean":
            hm = stats.hmean(map(float, sample_one), dtype=args.dtype)
            cols.append(hm)
        elif test_id.strip() == "kurtosis":
            k = stats.kurtosis(map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias)
            cols.append(k)
        elif test_id.strip() == "moment":
            n_moment = stats.moment(map(float, sample_one), n=args.n)
            cols.append(n_moment)
        elif test_id.strip() == "normaltest":
            k2, p_value = stats.normaltest(map(float, sample_one))
            cols.append(k2)
            cols.append(p_value)
        elif test_id.strip() == "skew":
            skewness = stats.skew(map(float, sample_one), bias=args.bias)
            cols.append(skewness)
        elif test_id.strip() == "skewtest":
            z_value, p_value = stats.skewtest(map(float, sample_one))
            cols.append(z_value)
            cols.append(p_value)
        elif test_id.strip() == "sem":
            s = stats.sem(map(float, sample_one), ddof=args.ddof)
            cols.append(s)
        elif test_id.strip() == "zscore":
            z = stats.zscore(map(float, sample_one), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "signaltonoise":
            s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof)
            cols.append(s2n)
        elif test_id.strip() == "percentileofscore":
            p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind)
            cols.append(p)
        elif test_id.strip() == "bayes_mvs":
            c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha)
            cols.append(c_mean)
            cols.append(c_var)
            cols.append(c_std)
        elif test_id.strip() == "sigmaclip":
            c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n)
            cols.append(c)
            cols.append(c_low)
            cols.append(c_up)
        elif test_id.strip() == "kstest":
            d, p_value = stats.kstest(
                map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode
            )
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "chi2_contingency":
            chi2, p, dof, ex = stats.chi2_contingency(
                map(float, sample_one), correction=args.correction, lambda_=args.lambda_
            )
            cols.append(chi2)
            cols.append(p)
            cols.append(dof)
            cols.append(ex)
        elif test_id.strip() == "tmean":
            if nf is 0 and mf is 0:
                mean = stats.tmean(map(float, sample_one))
            else:
                mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(mean)
        elif test_id.strip() == "tmin":
            if mf is 0:
                min = stats.tmin(map(float, sample_one))
            else:
                min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive)
            cols.append(min)
        elif test_id.strip() == "tmax":
            if nf is 0:
                max = stats.tmax(map(float, sample_one))
            else:
                max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive)
            cols.append(max)
        elif test_id.strip() == "tvar":
            if nf is 0 and mf is 0:
                var = stats.tvar(map(float, sample_one))
            else:
                var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(var)
        elif test_id.strip() == "tstd":
            if nf is 0 and mf is 0:
                std = stats.tstd(map(float, sample_one))
            else:
                std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(std)
        elif test_id.strip() == "tsem":
            if nf is 0 and mf is 0:
                s = stats.tsem(map(float, sample_one))
            else:
                s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2))
            cols.append(s)
        elif test_id.strip() == "scoreatpercentile":
            if nf is 0 and mf is 0:
                s = stats.scoreatpercentile(
                    map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation
                )
            else:
                s = stats.scoreatpercentile(
                    map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation
                )
            for list in s:
                cols.append(list)
        elif test_id.strip() == "relfreq":
            if nf is 0 and mf is 0:
                rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b)
            else:
                rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b, (mf, nf))
            for list in rel:
                cols.append(list)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "binned_statistic":
            if nf is 0 and mf is 0:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b
                )
            else:
                st, b_edge, b_n = stats.binned_statistic(
                    map(float, sample_one),
                    map(float, sample_two),
                    statistic=args.statistic,
                    bins=args.b,
                    range=(mf, nf),
                )
            cols.append(st)
            cols.append(b_edge)
            cols.append(b_n)
        elif test_id.strip() == "threshold":
            if nf is 0 and mf is 0:
                o = stats.threshold(map(float, sample_one), newval=args.new)
            else:
                o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trimboth":
            o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut)
            for list in o:
                cols.append(list)
        elif test_id.strip() == "trim1":
            t1 = stats.trim1(map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail)
            for list in t1:
                cols.append(list)
        elif test_id.strip() == "histogram":
            if nf is 0 and mf is 0:
                hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b)
            else:
                hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b, (mf, nf))
            cols.append(hi)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "cumfreq":
            if nf is 0 and mf is 0:
                cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b)
            else:
                cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b, (mf, nf))
            cols.append(cum)
            cols.append(low_range)
            cols.append(binsize)
            cols.append(ex)
        elif test_id.strip() == "boxcox_normmax":
            if nf is 0 and mf is 0:
                ma = stats.boxcox_normmax(map(float, sample_one))
            else:
                ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method)
            cols.append(ma)
        elif test_id.strip() == "boxcox":
            if imbda is 0:
                box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha)
                cols.append(box)
                cols.append(ma)
                cols.append(ci)
            else:
                box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha)
                cols.append(box)
        elif test_id.strip() == "histogram2":
            h2 = stats.histogram2(map(float, sample_one), map(float, sample_two))
            for list in h2:
                cols.append(list)
        elif test_id.strip() == "ranksums":
            z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two))
            cols.append(z_statistic)
            cols.append(p_value)
        elif test_id.strip() == "ttest_1samp":
            t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two))
            for list in t:
                cols.append(list)
            for list in prob:
                cols.append(list)
        elif test_id.strip() == "ansari":
            AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two))
            cols.append(AB)
            cols.append(p_value)
        elif test_id.strip() == "linregress":
            slope, intercept, r_value, p_value, stderr = stats.linregress(
                map(float, sample_one), map(float, sample_two)
            )
            cols.append(slope)
            cols.append(intercept)
            cols.append(r_value)
            cols.append(p_value)
            cols.append(stderr)
        elif test_id.strip() == "pearsonr":
            cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two))
            cols.append(cor)
            cols.append(p_value)
        elif test_id.strip() == "pointbiserialr":
            r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two))
            cols.append(r)
            cols.append(p_value)
        elif test_id.strip() == "ks_2samp":
            d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two))
            cols.append(d)
            cols.append(p_value)
        elif test_id.strip() == "mannwhitneyu":
            mw_stats_u, p_value = stats.mannwhitneyu(
                map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "zmap":
            z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof)
            for list in z:
                cols.append(list)
        elif test_id.strip() == "ttest_ind":
            mw_stats_u, p_value = stats.ttest_ind(
                map(float, sample_one), map(float, sample_two), equal_var=args.equal_var
            )
            cols.append(mw_stats_u)
            cols.append(p_value)
        elif test_id.strip() == "ttest_rel":
            t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis)
            cols.append(t)
            cols.append(prob)
        elif test_id.strip() == "mood":
            z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis)
            cols.append(z)
            cols.append(p_value)
        elif test_id.strip() == "shapiro":
            W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta)
            cols.append(W)
            cols.append(p_value)
            for list in a:
                cols.append(list)
        elif test_id.strip() == "kendalltau":
            k, p_value = stats.kendalltau(
                map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort
            )
            cols.append(k)
            cols.append(p_value)
        elif test_id.strip() == "entropy":
            s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base)
            cols.append(s)
        elif test_id.strip() == "spearmanr":
            if sample2 == 1:
                rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two))
            else:
                rho, p_value = stats.spearmanr(map(float, sample_one))
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "wilcoxon":
            if sample2 == 1:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one),
                    map(float, sample_two),
                    zero_method=args.zero_method,
                    correction=args.correction,
                )
            else:
                T, p_value = stats.wilcoxon(
                    map(float, sample_one), zero_method=args.zero_method, correction=args.correction
                )
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "chisquare":
            if sample2 == 1:
                rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof)
            else:
                rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof)
            cols.append(rho)
            cols.append(p_value)
        elif test_id.strip() == "power_divergence":
            if sample2 == 1:
                stat, p_value = stats.power_divergence(
                    map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_
                )
            else:
                stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "theilslopes":
            if sample2 == 1:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha)
            else:
                mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha)
            cols.append(mpe)
            cols.append(met)
            cols.append(lo)
            cols.append(up)
        elif test_id.strip() == "combine_pvalues":
            if sample2 == 1:
                stat, p_value = stats.combine_pvalues(
                    map(float, sample_one), method=args.med, weights=map(float, sample_two)
                )
            else:
                stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med)
            cols.append(stat)
            cols.append(p_value)
        elif test_id.strip() == "obrientransform":
            ob = stats.obrientransform(*b_samples)
            for list in ob:
                elements = ",".join(map(str, list))
                cols.append(elements)
        elif test_id.strip() == "f_oneway":
            f_value, p_value = stats.f_oneway(*b_samples)
            cols.append(f_value)
            cols.append(p_value)
        elif test_id.strip() == "kruskal":
            h, p_value = stats.kruskal(*b_samples)
            cols.append(h)
            cols.append(p_value)
        elif test_id.strip() == "friedmanchisquare":
            fr, p_value = stats.friedmanchisquare(*b_samples)
            cols.append(fr)
            cols.append(p_value)
        elif test_id.strip() == "fligner":
            xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples)
            cols.append(xsq)
            cols.append(p_value)
        elif test_id.strip() == "bartlett":
            T, p_value = stats.bartlett(*b_samples)
            cols.append(T)
            cols.append(p_value)
        elif test_id.strip() == "levene":
            w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples)
            cols.append(w)
            cols.append(p_value)
        elif test_id.strip() == "median_test":
            stat, p_value, m, table = stats.median_test(
                ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples
            )
            cols.append(stat)
            cols.append(p_value)
            cols.append(m)
            cols.append(table)
            for list in table:
                elements = ",".join(map(str, list))
                cols.append(elements)
        outfile.write("%s\n" % "\t".join(map(str, cols)))
    outfile.close()

Esempio n. 48

0

Mostra file

File: showStats.py Progetto: CeasarSS/books

def main():
    # Univariate data -------------------------
    # Generate data that are normally distributed
    x = randn(500)
    
    # Scatter plot
    plot(x,'.')
    title('Scatter Plot')
    xlabel('X')
    ylabel('Y')
    draw()
    show()
    
    # Histogram
    hist(x)
    xlabel('Data Values')
    ylabel('Frequency')
    title('Histogram, default settings')
    show()
    
    hist(x,25)
    xlabel('Data Values')
    ylabel('Frequency')
    title('Histogram, 25 bins')
    show()
    
    # Cumulative probability density
    numbins = 20
    cdf = stats.cumfreq(x,numbins)
    plot(cdf[0])
    xlabel('Data Values')
    ylabel('Cumulative Frequency')
    title('Cumulative probablity density function')
    show()
    
    # Boxplot
    # The error bars indiacte the range, and the box consists of the
    # first, second (middle) and third quartile
    boxplot(x)
    title('Boxplot')
    ylabel('Values')
    show()
    
    boxplot(x, vert=False)
    title('Boxplot, horizontal')
    xlabel('Values')
    show()
    
    # Check for normality
    _ = stats.probplot(x, plot=plt)
    title('Probplot - check for normality')
    show()
    
    # Bivariate data -------------------------
    
    # Generate data
    x = randn(200)
    y = 10+0.5*x+randn(len(x))
    
    # Scatter plot
    scatter(x,y)
    # This one is quite similar to "plot(x,y,'.')"
    title('Scatter plot of data')
    xlabel('X')
    ylabel('Y')
    show()
    
    # LineFit
    M = vstack((ones(len(x)), x)).T
    pars = linalg.lstsq(M,y)[0]
    intercept = pars[0]
    slope = pars[1]
    scatter(x,y)
    hold(True)
    plot(x, intercept + slope*x, 'r')
    show()

Esempio n. 49

0

Mostra file

File: figs_BasicPrinciples.py Progetto: fluxium/statsintro

def main():
    # Univariate data -------------------------
    # Generate data that are normally distributed
    x = np.random.randn(500)
    
    # Set the fonts the way I like them
    sns.set_context('poster')
    sns.set_style('ticks')
    #mystyle.set()
    
    # Scatter plot
    plt.scatter(np.arange(len(x)), x)
    plt.xlim([0, len(x)])
    mystyle.printout('scatterPlot.png', xlabel='x', ylabel='y', title='Scatter')
    
    # Histogram
    plt.hist(x)
    mystyle.printout('histogram_plain.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, default settings')
    
    plt.hist(x,25)
    mystyle.printout('histogram.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, 25 bins')
    
    # Cumulative probability density
    numbins = 20
    plt.plot(stats.cumfreq(x,numbins)[0])
    mystyle.printout('CumulativeFrequencyFunction.png', xlabel='Data Values', ylabel='CumFreq', title='Cumulative Frequncy')

    # KDE-plot
    sns.kdeplot(x)
    mystyle.printout('kde.png', xlabel='Data Values', ylabel='Density',
            title='KDE_plot')
    
    # Boxplot
    # The ox consists of the first, second (middle) and third quartile
    plt.boxplot(x, sym='*')
    mystyle.printout('boxplot.png', xlabel='Values', title='Boxplot')
    
    plt.boxplot(x, sym='*', vert=False)
    plt.title('Boxplot, horizontal')
    plt.xlabel('Values')
    plt.show()
    
    # Errorbars
    x = np.arange(5)
    y = x**2
    errorBar = x/2
    plt.errorbar(x,y, yerr=errorBar, fmt='o', capsize=5, capthick=3)
    plt.xlim([-0.2, 4.2])
    plt.ylim([-0.2, 19])
    mystyle.printout('Errorbars.png', xlabel='Data Values', ylabel='Measurements', title='Errorbars')
    
    # Violinplot
    nd = stats.norm
    data = nd.rvs(size=(100))
    
    nd2 = stats.norm(loc = 3, scale = 1.5)
    data2 = nd2.rvs(size=(100))
    
    # Use pandas and the seaborn package for the violin plot
    df = pd.DataFrame({'Girls':data, 'Boys':data2})
    #sns.violinplot(df, color = ["#999999", "#DDDDDD"])
    sns.violinplot(df)
    
    mystyle.printout('violinplot.png', title='Violinplot')
    
    # Barplot
    df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd'])
    df.plot(kind='bar', grid=False)
    mystyle.printout('barplot.png', title='Barplot')

    # Grouped Boxplot
    sns.set_style('whitegrid')
    sns.boxplot(df)
    mystyle.printout('groupedBoxplot.png', title='sns.boxplot')

    # Bivariate Plots
    df2 = pd.DataFrame(np.random.rand(50, 4), columns=['a', 'b', 'c', 'd'])
    df2.plot(kind='scatter', x='a', y='b', s=df['c']*300);
    mystyle.printout('bivariate.png')

    # Pieplot
    series = pd.Series(3 * np.random.rand(4), index=['a', 'b', 'c', 'd'], name='series')
    sns.set_palette("husl")
    series.plot(kind='pie', figsize=(6, 6))
    mystyle.printout('piePlot.png', title='pie-plot')

Esempio n. 50

0

Mostra file

File: test_for_kate.py Progetto: vidyamuthukumar1/whitespace_evaluation_software_auxiliary_code

def plot_acceleration_or_instantaneous_curves(number_of_repacks):
    def check_for_zeros(latitude, longitude, latitude_index, longitude_index, current_value):
        if current_value == 0:
            return 1
        return 0

    def or_function(this_value, other_value):
        return numpy.logical_or(this_value, other_value)

    def calculate_population_of_zerows(datamap, populationmap):
        population = 0
        for i in range(400):
            for j in range(600):
                if datamap.get_value_by_index(i, j) == 1:
                    population += populationmap.get_value_by_index(i, j)
        return population


    colors = {7: 'b', 14: 'r', 22: 'g', 25: 'm'}


    for num_channels_removed in [25]:
        zerows_map = west.data_map.DataMap2DContinentalUnitedStates.create(400, 600)
        zerows_map.reset_all_values(0)

        datamap_spec = west.data_management.SpecificationDataMap(west.data_map.DataMap2DContinentalUnitedStates, 400, 600)
        is_in_region_map_spec = west.data_management.SpecificationRegionMap(west.boundary.BoundaryContinentalUnitedStates, datamap_spec)
        is_in_region_map = is_in_region_map_spec.fetch_data()
        population_map_spec = west.data_management.SpecificationPopulationMap(is_in_region_map_spec, west.population.PopulationData)
        population_map = population_map_spec.fetch_data()

        instantaneous_values = numpy.zeros(number_of_repacks)
        acceleration_values = numpy.zeros(number_of_repacks)
        num_repacks_index  = numpy.arange(number_of_repacks)

        if num_channels_removed == 25:
            repack_file_list = os.listdir(os.path.join("data", "Pickled Files - Whitespace Maps", "A-%dChannelsRemoved"%num_channels_removed, "Only UHF"))
            repack_file_list = repack_file_list[1:]

        else:
            repack_file_list = []
            for i in range(number_of_repacks):
                repack_file_list.append("%dUHFnewUSMinimumStationstoRemove_OnlyUHF_PLMRS_FCCcontours%d.pcl"%(num_channels_removed, i))
        for i in range(number_of_repacks):
            print i
            print repack_file_list[i]
            if num_channels_removed == 25:
                wsmap = west.data_map.DataMap2DContinentalUnitedStates.from_pickle(os.path.join("data", "Pickled Files - Whitespace Maps", "A-%dChannelsRemoved"%num_channels_removed, "Only UHF", repack_file_list[i]))
            else:
                wsmap = west.data_map.DataMap2DContinentalUnitedStates.from_pickle(os.path.join("data", "Pickled Files - Whitespace Maps", "A-%dChannelsREmoved"%num_channels_removed, repack_file_list[i]))
            wsmap.update_all_values_via_function(check_for_zeros)
            zerows_map = zerows_map.combine_datamaps_with_function(wsmap, or_function)

            instantaneous_values[i] = calculate_population_of_zerows(wsmap, population_map)
            acceleration_values[i] = calculate_population_of_zerows(zerows_map, population_map)

            print instantaneous_values[i], acceleration_values[i]


        from scipy.stats import cumfreq
        num_bins = 100
        inst_values_cdf = cumfreq(instantaneous_values, num_bins)
        xaxis = numpy.linspace(0, max(instantaneous_values), num_bins)
        plt.plot(xaxis, inst_values_cdf[0]/number_of_repacks)

    plt.xlabel("Population that sees zero whitespace after repack")
    plt.ylabel("CDF")
    plt.show()

Esempio n. 51

0

Mostra file

File: stat.py Progetto: suryakant54321/python-in-hydrology2

# plot the histogram
plt.clf()
plt.bar(bins[:-1], n, width=0.4, color='red')
plt.xlabel('X', fontsize=20)
plt.ylabel('number of data points in the bin', fontsize=15)
plt.savefig('/home/tomer/my_books/python_in_hydrology/images/hist.png')

# compute and plot the relfreq
relfreqs, lowlim, binsize, extrapoints = st.relfreq(x)
plt.clf()
plt.bar(bins[:-1], relfreqs, width=0.4, color='magenta')
plt.xlabel('X', fontsize=20)
plt.ylabel('Relative frequencies', fontsize=15)
plt.savefig('/home/tomer/my_books/python_in_hydrology/images/relfreq.png')

# compute and plot pdf
plt.clf()
n, bins, patches = plt.hist(x, 10, normed=1, facecolor='yellow', alpha=0.5)
plt.xlabel('X', fontsize=15)
plt.ylabel('PDF', fontsize=15)
plt.savefig('/home/tomer/my_books/python_in_hydrology/images/pdf.png')

# compute and plot cdf
cumfreqs, lowlim, binsize, extrapoints = st.cumfreq(x)
plt.clf()
plt.bar(bins[:-1], cumfreqs/cumfreqs[-1], width=0.4, color='black', alpha=0.45)
plt.xlabel('X', fontsize=15)
plt.ylabel('CDF', fontsize=15)
plt.savefig('/home/tomer/my_books/python_in_hydrology/images/cdf.png')

Esempio n. 52

0

Mostra file

File: control (Michael Chary's conflicted copy 2012-07-11).py Progetto: mac389/McAfee

label_files = [open(filename).readlines() for filename in os.listdir(".") if item.endswith(label_extension)]
#NEED TO FIX


#bootstrapping, assumes that the data here completely describe (are completely representative thereof) the underlying distribution.
sample_count = data_nodates.shape[0]
variable_count = data_nodates.shape[1]
jitter_count = 1000


distributions = reshape(array([diff(shuffler(data_nodates),axis=0) for jitter in range(jitter_count)]),((sample_count-1)*jitter_count,variable_count))

#generate cdfs


cdfs = array([(cumfreq(distribution)[0:3]) for distribution in distributions.transpose()])
 #-1 because the list of differences of n sample counts will have n-1 members
print cdfs[1]
overview = plt.figure()
ax = overview.add_subplot(111)
for cdf in cdfs:
	xvals = array([ cdf[1] + i*cdf[2] for i in range(10)])
	h, = ax.plot(xvals,cdf[0]/max(cdf[0]),'--.',markersize=30)
	h.set_clip_on(False)
#plt.legend(('CXR','ABD CT','ABD + Chest CT'),'lower right', numpoints=1, fancybox=True, frameon=False, bbox_to_anchor=(1.1,0.2))
tech.adjust_spines(ax,['left','bottom'])
overview.text(0.5, 0.08, r'Weekly Change in Cases', ha='center', va='top', fontsize=30, weight='bold') #xlabel
overview.text(0.02875, 0.5, r'Frequency of Occurrence', ha='center', va='center', rotation='vertical', fontsize=30, weight='bold')
plt.subplots_adjust(top=0.95, bottom =0.18, left=0.15)
plt.savefig('cdf_CXRT_s.png',dpi=300)

Esempio n. 53

0

Mostra file

File: figs_BasicPrinciples.py Progetto: CeasarSS/books

def main():
    # Univariate data -------------------------
    # Generate data that are normally distributed
    x = randn(500)
    
    # Scatter plot
    plot(x,'.')
    title('Scatter Plot')
    xlabel('X')
    ylabel('Y')
    draw()
    show()
    
    # Histogram
    hist(x)
    xlabel('Data Values')
    ylabel('Frequency')
    title('Histogram, default settings')
    show()
    
    hist(x,25)
    xlabel('Data Values')
    ylabel('Frequency')
    title('Histogram, 25 bins')
    show()
    
    # Cumulative probability density
    numbins = 20
    cdf = stats.cumfreq(x,numbins)
    plot(cdf[0])
    xlabel('Data Values')
    ylabel('Cumulative Frequency')
    title('Cumulative probablity density function')
    show()
    
    # Boxplot
    # The error bars indiacte the range, and the box consists of the
    # first, second (middle) and third quartile
    boxplot(x)
    title('Boxplot')
    ylabel('Values')
    show()
    
    boxplot(x, vert=False)
    title('Boxplot, horizontal')
    xlabel('Values')
    show()
    
    # Violinplot
    nd = stats.norm
    data = nd.rvs(size=(100))
    nd2 = stats.norm(loc = 0.5, scale = 1.2)
    data2 = nd2.rvs(size=(100))
    
    # Use the seaborn package for the violin plot, and set the context for "poster"
    sns.set(context='poster')
    df = pd.DataFrame({'Girls':data, 'Boys':data2})
    sns.violinplot(df)
    show()
    
    # Check for normality
    _ = stats.probplot(x, plot=plt)
    title('Probplot - check for normality')
    show()
    
    # Bivariate data -------------------------
    
    # Generate data
    x = randn(200)
    y = 10+0.5*x+randn(len(x))
    
    # Scatter plot
    scatter(x,y)
    # This one is quite similar to "plot(x,y,'.')"
    title('Scatter plot of data')
    xlabel('X')
    ylabel('Y')
    show()
    
    # LineFit
    M = vstack((ones(len(x)), x)).T
    pars = linalg.lstsq(M,y)[0]
    intercept = pars[0]
    slope = pars[1]
    scatter(x,y)
    hold(True)
    plot(x, intercept + slope*x, 'r')
    show()

Esempio n. 54

0

Mostra file

File: test.py Progetto: fuxes/mys-famaf

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import cumfreq

P = 0.4
Q = 1 - P 

a = np.random.geometric(P, size=1000)
b = np.random.geometric(P, size=1000)
c = np.random.geometric(P, size=1000)

common_params = dict(bins=[x for x in range(1,14)], normed=1, range=(0,15))

plt.title('Histograma comparativo de simulaciones y probabilidad teorica')
plt.ylabel('Frecuencia relativa')
plt.xlabel('Valores')
plt.hist([a, b, c], **common_params)

gteo = [P * pow(Q, i-1) for i in range(1,14)]
g = cumfreq(gteo, 15)



plt.show()

Esempio n. 55

0

Mostra file

File: MPLhisttools.py Progetto: dashamstyr/MPLcode

def histplot1D(datain,**kwargs):
    
    datatype=kwargs.get('datatype','df')
    
    if datatype is 'df':
        histvals=datain.values
        binrange=kwargs.get('binrange',[datain.min(),datain.max()])
    elif datatype is 'histdict':
        counts=datain['counts']
        edges=datain['edges']
        centers=datain['centers']
        step=np.diff(edges)
        
    numbins=kwargs.get('numbins',100)  
    missinglowval=kwargs.get('missinghighval',-99999)
    missinghighval=kwargs.get('missinglowval',99999)
    normalize=kwargs.get('normalize',True)
    cumulative=kwargs.get('cumulative',False)
    
    doplot=kwargs.get('doplot',True)  
    showplot=kwargs.get('showplot',False)
    saveplot=kwargs.get('saveplot',False)
    plotfilename=kwargs.get('plotfilename','1Dhist_test.png')
    fsize=kwargs.get('fsize',32) #baseline font size
    ar=kwargs.get('ar',1.0)  #aspect ratio
    figheight=kwargs.get('figheight',12) #inches 
    dpi=kwargs.get('dpi',100)      
    fignum=kwargs.get('fignum',0)
    xlog=kwargs.get('xlog',False)
    ylog=kwargs.get('ylog',False)
    xlimits=kwargs.get('xlimits',None)
    ylimits=kwargs.get('ylimits',None)
    xlabel=kwargs.get('xlabel',None)
    if ylog:
        ylabel=kwargs.get('ylabel','Log (Counts)')
    else:
        ylabel=kwargs.get('ylabel','Counts')
    
    if datatype is 'histdict':
        dictout=datain
    else:
        if not cumulative:
            counts,edges=np.histogram(datain.values,numbins,range=binrange,normed=normalize) 
            step=np.diff(edges)
            centers=edges[:-1]+step*0.5            
            dictout={'counts':counts,'centers':centers,'edges':edges}
        else:            
            counts,lowlim,barwidths,extrapoints=cumfreq(datain.values,numbins=numbins,defaultreallimits=binrange)
            if normalize:
                totcounts=((datain.values>missinglowval)&(datain.values<missinghighval)).sum()
                counts=counts/totcounts
            step=(binrange[1]-binrange[0])/numbins
            binvals=np.arange(binrange[0],binrange[1],step)
            centers=[v+step*0.5 for v in binvals]
            edges=np.hstack((binvals,binvals[-1]+step))
            dictout={'counts':counts,'centers':centers,'edges':edges}
    if doplot:
        plt.rc('font', family='serif', size=fsize)
        fig1=plt.figure(fignum)
        if ar:
            fig1.set_size_inches(figheight*ar,figheight)
        ax1=fig1.add_subplot(111)
        barwidths=step
        if xlog:
            logplot=True
            plt.xscale('log')
        else:
            logplot=False
        
        if ylog:
            logplot=True
            plt.yscale('log')
        else:
            logplot=False
            ax1.set_aspect(ar)
            
        ax1.bar(centers,counts,width=barwidths,align='center',log=logplot)  
        

        plt.xlabel(xlabel)
        plt.ylabel(ylabel)
        if ylimits:
            plt.ylim(ylimits)
        if xlimits:
            plt.xlim(xlimits)
        if saveplot:
            fig1.canvas.print_figure(plotfilename,dpi = dpi, edgecolor = 'b', bbox_inches = 'tight') 
        if showplot:
            fig1.canvas.draw()
    return dictout

Esempio n. 56

0

Mostra file

File: statistics.py Progetto: AShedko/PyReID

 def _calcCMC(self, size):
     cumfreqs = (cumfreq(self.matching_order, numbins=size)[0] / size) * 100.
     self.CMC = cumfreqs.astype(np.float32)

Esempio n. 57

0

Mostra file

File: DOS_bioactives_summary_Dec_16_2013.py Progetto: lhogstrom/jailbird

## plot
# countMax = max(overlapSer)
# bins = np.arange(countMax+1)
# plt.hist(overlapSer,bins)
# plt.ylabel('freq',fontweight='bold')
# plt.xlabel('number of cell lines',fontweight='bold')
# plt.title('summlySpace DOS compounds (' + str(overlapCount) + ') - cell lines is_gold')
# plt.xticks(bins)
# outF = os.path.join(wkdir, 'DOS_summly_cell_line_distribution.png')
# plt.savefig(outF, bbox_inches='tight',dpi=200)
# plt.close()


from scipy.stats import cumfreq
num_bins=20
b, lowlim, binsize, extrapoints=cumfreq(passSer,num_bins)
outF = os.path.join(wkdir, 'cdf_test.png')
plt.plot(b)
plt.savefig(outF, bbox_inches='tight',dpi=200)
plt.close()

#cumsum
dx = .01
X  = np.arange(-2,2,dx)
Y  = pylab.exp(-X**2)
# Normalize the data to a proper PDF
Y /= (dx*Y).sum()

# Compute the CDF
CY = np.cumsum(Y*dx)

Esempio n. 58

0

Mostra file

File: IPerfAnalyze.py Progetto: woody77/net-analysis-scripts

    build_count_graph([(name, [s.rtt / 1000 for s in data], color) for name, data, color in datasets], counts_of='ms')
    build_legend()
    save_graph("rtt")

"""
This is a CDF of the bandwidth, which is very useful for comparing the overall response of multiple versions/setups
"""

if "cdf" in args.graphs:
    build_graph("Cumulative Distribution of Throughput")

    # setup our x axis based on 1Gbps operation.
    hist_xpoints = range(0, 1000000000, 1000000)
    hist_xticks = xrange(0, 1000000000, 100000000)
    hist_xlabels = ["{:4.0f}".format(t / 1e6) for t in hist_xticks]

    for name, data, color in datasets:
        plt.plot(hist_xpoints[:-1],
                 stats.cumfreq([s.bw for s in data], hist_xpoints, (0, 1e9))[0] / len(data),
                 label=name,
                 color=color)

    plt.xticks(hist_xticks, hist_xlabels)
    hist_yticks = np.arange(0, 101, 10) / 100.0
    plt.yticks(hist_yticks, ["{:2.0%}".format(float(t)) for t in hist_yticks])
    plt.xlabel("Mbps")
    plt.ylabel("percentile")

    build_legend(loc=4)
    save_graph("cdf")

Esempio n. 59

0

Mostra file

File: BootStrapVariance_Cond.py Progetto: ruthubc/ruthubc

        loop_time = time.time() - start_time       
        
        if sdOutput == "NA":
            print "randomSDFun while loop did not run in time"            
        else:
            sd_list.append(sdOutput)
                   
        
    print("--- %s seconds ---" % (time.time() - start_time))
    
    if len(sd_list) == numBoots:
                
        minList=  min(sd_list)
        maxList=  max(sd_list)

        cumFreq = cumfreq(sd_list, numBins, defaultreallimits=(minList, maxList))
        lowerLimit = cumFreq[1]
        countValues =  cumFreq[0]
        freq_interval = cumFreq[2]
        upperLimit = lowerLimit + (freq_interval*numBins)
        xaxis = np.arange(lowerLimit, (upperLimit), freq_interval)
        
        if len(xaxis) > numBins:
            print "xaxis too long, length:", len(xaxis)
            del_index = numBins
            xaxis = np.delete(xaxis, del_index)
        
        

        result = (sd_data -minList) / freq_interval
        myIndex = int(round(result)) -1