예제 #1
0
def plothist(x, bin=None, nbins=None, xrange=None, yrange=None, min=None,
			max=None, overplot=False, color='black', xlog=False, ylog=False,
			nan=False, weights=None, norm=False, kernel=None, retpoints=False,
			adaptive=False, adaptive_thresh=30, adaptive_depth=[2,10],
			weight_norm=False, apply_func=None, **kw):
	"""
	Plot the 1D histogram
	Example:
	>> plothist(dat, bin=0.1, min=0, max=3)

	Keyword parameters:
	------------------
	bin
		the binsize(float)
	nbins
		number of bins(integer)
		It cannot be specified together with the bin= parameter
	xlog, ylog
		log the appropriate axis
	weights
		the 1-D array of weights used in the histogram creation
	nan
		boolean flag to ignore nan's
	norm
		boolean flag to normalize the histogram by the peak value
	min,max
		range of data for which the histogram is constructed
	retpoints
		boolean parameter controlling whether to return or not the
		computed histogram.
		If yes the tuple with two arrays (bin centers, Number of points in bins) 
		is returned
	overplot
		boolean parameter for overplotting 
	adaptive
		boolean for turning on/off the adaptive regime of
		histogramming (adaptive bin size). 
		If True weights, nbins, bin,kernel parameters are ignored
	adaptive_thresh
		the limiting number of points in the bin for the adaptive 
		histogramming (default 30)
	adaptive_depth
		the list of two integers for the detalisation levels of 
		adaptive histogramming (default [2,10]) 
	weight_norm
		if True the value in each bin is mean weight of points within
		the bin
	"""
	if nan:
		ind = numpy.isfinite(x)
		if weights is not None:
			ind = numpy.isfinite(weights)&ind
		dat = x[ind]
		if weights is not None:
			weights =weights[ind]
	else:
		dat = x
	maxNone = False
	if min is None:
		min = numpy.min(dat)
	if max is None:
		maxNone = True
		max = numpy.max(dat)
	
	if bin is None and nbins is None:
		nbins = 100
		bin = (max - min) * 1. / nbins
	elif nbins is None:
		nbins = int(math.ceil((max - min) * 1. / bin))
		if maxNone:
			max = min + nbins * bin
	elif bin is None:
		bin = (max - min) * 1. / nbins
	else:
		warnings.warn("both bin= and nbins= keywords were specified in the plothist call",RuntimeWarning)
		pass
		# if both nbins and bin are defined I don't do anything 
		# it may be non-intuitive if kernel option is used, because
		# it uses both nbins and bin options
	if kernel is None:
		if not adaptive:
			if not np.isscalar(weights):
				hh, loc = numpy.histogram(dat, range=(min, max), bins=nbins, weights=weights)
			else:
				hh, loc = numpy.histogram(dat, range=(min, max), bins=nbins)
				hh = hh * weights

			if weight_norm:
				hh1, loc = numpy.histogram(dat, range=(min, max), bins=nbins, weights=None)	
				hh = hh*1./hh1
		else:
			import adabinner
			hh, loc = adabinner.hist(dat, xmin=min, xmax=max, hi=adaptive_depth,
						thresh=adaptive_thresh)
		
		hh1 = np.repeat(hh,2)
		loc1 = np.concatenate(([loc[0]],np.repeat(loc[1:-1],2),[loc[-1]]))
	else:
		loc1=numpy.linspace(min,max,nbins*5)
		import statistics
		if weights is not None:
			hh1 = statistics.pdf( dat, loc1, h=bin/2.,kernel=kernel,weight=weights)*bin*len(dat)
		else:
			hh1 = statistics.pdf( dat, loc1, h=bin/2.,kernel=kernel)*bin*len(dat)

	if overplot:
		func = oplot 
	else:
		func = plot
	if norm:
		hh1=hh1*1./hh1.max()
	kw['ps'] = kw.get('ps') or 0
	if 'yr' not in kw:
		kw['yr']=[hh1.min(),hh1.max()]
	if 'xr' not in kw:
		kw['xr']=[min,max]
	if apply_func is not None:
		hh1 = apply_func (loc1,hh1)
	func(loc1, hh1, color=color,
		xlog=xlog, ylog=ylog, **kw)
	if retpoints:
		return 0.5*(loc[1:]+loc[:-1]),hh
예제 #2
0
def plothist(x,
             bin=None,
             nbins=None,
             xrange=None,
             yrange=None,
             min=None,
             max=None,
             overplot=False,
             color='black',
             xlog=False,
             ylog=False,
             nan=False,
             weights=None,
             norm=False,
             kernel=None,
             retpoints=False,
             adaptive=False,
             adaptive_thresh=30,
             adaptive_depth=[2, 10],
             weight_norm=False,
             apply_func=None,
             statistic=None,
             knuth=False,
             cumulative=False,
             **kw):
    """
    Plot the 1D histogram
    Example:
    >> plothist(dat, bin=0.1, min=0, max=3)

    Keyword parameters:
    ------------------
    bin
        the binsize(float)
    nbins
        number of bins(integer)
        It cannot be specified together with the bin= parameter
    xlog, ylog
        log the appropriate axis
    weights
        the 1-D array of weights used in the histogram creation
    nan
        boolean flag to ignore nan's
    norm
        boolean flag to normalize the histogram by the peak value
    min,max
        range of data for which the histogram is constructed
    retpoints
        boolean parameter controlling whether to return or not the
        computed histogram. If yes the tuple with two arrays
        (bin centers, Number of points in bins) is returned
    overplot
        boolean parameter for overplotting
    adaptive
        boolean for turning on/off the adaptive regime of
        histogramming (adaptive bin size).
        If True weights, nbins, bin,kernel parameters are ignored
    adaptive_thresh
        the limiting number of points in the bin for the adaptive
        histogramming (default 30)
    adaptive_depth
        the list of two integers for the detalisation levels of
        adaptive histogramming (default [2,10])
    weight_norm
        if True the value in each bin is mean weight of points within
        the bin
    """
    if nan:
        ind = np.isfinite(x)
        if weights is not None:
            ind = np.isfinite(weights) & ind
        dat = x[ind]
        if weights is not None:
            weights = weights[ind]
    else:
        dat = x
    maxNone = False
    if min is None:
        min = np.nanmin(dat)
    if max is None:
        maxNone = True
        max = np.nanmax(dat)

    if bin is None and nbins is None:
        if not knuth:
            nbins = 100
        else:
            nbins = __findKnuth(dat, min, max)
        bin = (max - min) * 1. / nbins
    elif nbins is None:
        nbins = int(math.ceil((max - min) * 1. / bin))
        if maxNone:
            max = min + nbins * bin
    elif bin is None:
        bin = (max - min) * 1. / nbins
    else:
        warnings.warn(
            'both bin= and nbins= keywords were specified in ' +
            'the plothist call', RuntimeWarning)
        pass
        # if both nbins and bin are defined I don't do anything
        # it may be non-intuitive if kernel option is used, because
        # it uses both nbins and bin options
    if cumulative:
        if (kernel is not None or adaptive or weights is not None):
            raise RuntimeError(
                'cumulative is incompatible with weights, kernel ' +
                'or adaptive options')
    if kernel is None:
        if not adaptive:
            if np.isscalar(weights) and weights is not None:
                weights = np.zeros_like(dat) + weights
            if statistic is None:
                hh, loc = np.histogram(dat,
                                       range=(min, max),
                                       bins=nbins,
                                       weights=weights)

                if weight_norm:
                    hh1, loc = np.histogram(dat,
                                            range=(min, max),
                                            bins=nbins,
                                            weights=None)
                    hh = hh * 1. / hh1
            else:
                S = scipy.stats.binned_statistic(dat,
                                                 weights,
                                                 statistic,
                                                 range=[min, max],
                                                 bins=nbins)
                hh = S.statistic
                loc = S.bin_edges
        else:
            import adabinner
            hh, loc = adabinner.hist(dat,
                                     xmin=min,
                                     xmax=max,
                                     hi=adaptive_depth,
                                     thresh=adaptive_thresh)
        if cumulative:
            hh = np.cumsum(hh)
        hh1 = np.repeat(hh, 2)
        loc1 = np.concatenate(([loc[0]], np.repeat(loc[1:-1], 2), [loc[-1]]))
    else:
        loc1 = np.linspace(min, max, nbins * 10)
        import sklearn.neighbors
        xind = np.isfinite(dat)
        kde = sklearn.neighbors.KernelDensity(bandwidth=bin, kernel=kernel)
        kde.fit(np.asarray(dat[xind]).flatten().reshape(-1, 1))
        hh1 = np.exp(kde.score_samples(loc1.reshape(-1, 1)))
        if weights is not None:
            print('WARNING weights ignored for KDE !')
    if overplot:
        func = oplot
    else:
        func = plot
    if norm:
        hh1 = hh1 * 1. / hh1.max()
    kw['ps'] = kw.get('ps') or 0
    if 'yr' not in kw:
        kw['yr'] = [np.nanmin(hh1), np.nanmax(hh1)]
    if 'xr' not in kw:
        kw['xr'] = [min, max]
    if apply_func is not None:
        hh1 = apply_func(loc1, hh1)
    func(loc1, hh1, color=color, xlog=xlog, ylog=ylog, **kw)
    if retpoints:
        return 0.5 * (loc[1:] + loc[:-1]), hh