def calculateMetrics(self, time_array, value_array, start_time_utc, end_time_utc, num_bins):

        mean_metric_tuple = stats.binned_statistic(x=time_array,
                                                   values=value_array,
                                                   statistic='mean',
                                                   bins=num_bins,
                                                   range=[(start_time_utc, end_time_utc)])

        min_metric_tuple = stats.binned_statistic(x=time_array,
                                                  values=value_array,
                                                  statistic='min',
                                                  bins=num_bins,
                                                  range=[(start_time_utc, end_time_utc)])

        max_metric_tuple = stats.binned_statistic(x=time_array,
                                                  values=value_array,
                                                  statistic='max',
                                                  bins=num_bins,
                                                  range=[(start_time_utc, end_time_utc)])

        std_metric_tuple = stats.binned_statistic(x=time_array,
                                                  values=value_array,
                                                  statistic='std',
                                                  bins=num_bins,
                                                  range=[(start_time_utc, end_time_utc)])

        return {'mean': self.populateFinalBucket(mean_metric_tuple),
               'minimum': self.populateFinalBucket(min_metric_tuple),
               'maximum': self.populateFinalBucket(max_metric_tuple),
               'standard_deviation': self.populateFinalBucket(std_metric_tuple)
               }
Esempio n. 2
0
    def _computeXvsAngle(self, var, stat='mean', scale=6, ratio=.5,
                         tolerance=.1, nbins=15, **kwargs):

        # angle at which elongate becomes collapsed
        # changed fixed 0.25 to ratio/2 so ratio can vary
        transition_angle = np.arccos(ratio/2) / np.pi * 180
        N_low_bins = np.floor(transition_angle / 180 * nbins)
        coll_bins = np.linspace(0, transition_angle, num=N_low_bins)
        elong_bins = np.linspace(transition_angle, 180, num=nbins - N_low_bins)

        collapsed_angles = self._computeAngularBins(collapsed=True)
        elongated_angles = self._computeAngularBins(collapsed=False)
        isRightSize = (np.exp(self.kkk.logr) * self.kkk.u > scale * ratio - scale * ratio * tolerance) & (
            np.exp(self.kkk.logr) * self.kkk.u < scale * ratio + scale * ratio * tolerance)
        isCollapsed = (((self.kkk.u * np.abs(self.kkk.v)) * np.exp(self.kkk.logr) + np.exp(self.kkk.logr) > scale - scale * tolerance)
                       & ((self.kkk.u * np.abs(self.kkk.v)) * np.exp(self.kkk.logr) + np.exp(self.kkk.logr) < scale + scale * tolerance))
        isElongated = ((np.exp(self.kkk.logr) > scale - scale * tolerance)
                       & (np.exp(self.kkk.logr) < scale + scale * tolerance))
        out1, bins1, _ = binned_statistic(elongated_angles[np.where(isRightSize & isElongated)], var[np.where(
            isRightSize & isElongated)], bins=elong_bins, statistic=stat)
        out2, bins2, _ = binned_statistic(collapsed_angles[np.where(isRightSize & isCollapsed)], var[np.where(
            isRightSize & isCollapsed)], bins=coll_bins, statistic=stat)
        full_var = np.concatenate((out2, out1))
        bins1 += (bins1[1] - bins1[0]) / 2  # make edges centers
        bins2 += (bins2[1] - bins2[0]) / 2
        full_bins = np.concatenate((bins2[:-1], bins1[:-1]))
        return full_var, full_bins
Esempio n. 3
0
def plot_percentile_fill(ms, gs, c):
    sig2m, edges, _ = stats.binned_statistic(
        ms, gs, lambda xs: np.percentile(xs, ps[0]), bins=hist_bins,
        range=(np.log10(m_low), np.log10(m_high)),
    )
    sig1m, edges, _ = stats.binned_statistic(
        ms, gs, lambda xs: np.percentile(xs, ps[1]), bins=hist_bins,
        range=(np.log10(m_low), np.log10(m_high)),
    )
    med, edges, _ = stats.binned_statistic(
        ms, gs, lambda xs: np.percentile(xs, ps[2]), bins=hist_bins,
        range=(np.log10(m_low), np.log10(m_high)),
    )
    sig1p, edges, _ = stats.binned_statistic(
        ms, gs, lambda xs: np.percentile(xs, ps[3]), bins=hist_bins,
        range=(np.log10(m_low), np.log10(m_high)),
    )
    sig2p, edges, _ = stats.binned_statistic(
        ms, gs, lambda xs: np.percentile(xs, ps[4]), bins=hist_bins,
        range=(np.log10(m_low), np.log10(m_high)),
    )

    mids = (edges[1:] + edges[:-1]) / 2
    plt.fill_between(mids, sig2m, sig2p, facecolor=c, alpha=0.3)
    plt.fill_between(mids, sig1m, sig1p, facecolor=c, alpha=0.3)
Esempio n. 4
0
def plot_binned_range(xs, ys, c, bins, label, low=bin_low, high=bin_high):
    ys = np.array(ys)
    """
    mean, edges, _ = stats.binned_statistic(
        xs, ys, "mean", bins=bins, range=(bin_low, bin_high),
    )
    sqr, _, _ = stats.binned_statistic(
        xs, ys*ys, "mean", bins=bins, range=(bin_low, bin_high),
    )
    std = np.sqrt(sqr - mean*mean)
    bin_xs = (edges[1:] + edges[:-1]) / 2

    low, high, mid = mean-std, mean+std, mean
    """

    mid, edges, _ = stats.binned_statistic(
        xs, ys, "median", bins=bins, range=(low, high),
    )
    low_curve, _, _ = stats.binned_statistic(
        xs, ys, lambda y: np.percentile(y, 16),
        bins=bins, range=(low, high),
    )
    high_curve, _, _ = stats.binned_statistic(
        xs, ys, lambda y: np.percentile(y, 84),
        bins=bins, range=(low, high),
    )

    bin_xs = (edges[1:] + edges[:-1]) / 2

    plt.plot(bin_xs, high_curve, c, lw=1)
    plt.plot(bin_xs, low_curve, c, lw=1)
    plt.fill_between(bin_xs, low_curve, high_curve, facecolor=c, alpha=0.3)
    plt.plot(xs, ys, "%s." % c)
    plt.plot(bin_xs, mid, c, lw=3, label=label)
Esempio n. 5
0
def plot_percentile_line(ms, gs, c):
    sig2m, edges, _ = stats.binned_statistic(
        ms, gs, lambda xs: np.percentile(xs, ps[0]), bins=hist_bins,
        range=(np.log10(m_low), np.log10(m_high)),
    )
    sig1m, edges, _ = stats.binned_statistic(
        ms, gs, lambda xs: np.percentile(xs, ps[1]), bins=hist_bins,
        range=(np.log10(m_low), np.log10(m_high)),
    )
    med, edges, _ = stats.binned_statistic(
        ms, gs, lambda xs: np.percentile(xs, ps[2]), bins=hist_bins,
        range=(np.log10(m_low), np.log10(m_high)),
    )
    sig1p, edges, _ = stats.binned_statistic(
        ms, gs, lambda xs: np.percentile(xs, ps[3]), bins=hist_bins,
        range=(np.log10(m_low), np.log10(m_high)),
    )
    sig2p, edges, _ = stats.binned_statistic(
        ms, gs, lambda xs: np.percentile(xs, ps[4]), bins=hist_bins,
        range=(np.log10(m_low), np.log10(m_high)),
    )

    mids = (edges[1:] + edges[:-1]) / 2
    plt.plot(mids, sig2m, c=c, lw=2)
    plt.plot(mids, sig1m, c=c, lw=2)
    plt.plot(mids, med, c=c, lw=2)
    plt.plot(mids, sig1p, c=c, lw=2)
    plt.plot(mids, sig2p, c=c, lw=2)
Esempio n. 6
0
def ring_blur_mask(img, geometry, alpha, bins=None, mask=None):
    if mask is None:
        mask = np.zeros(img.shape).astype(bool)
    r = geometry.rArray(img.shape)
    int_r = np.around(r / geometry.pixel1).astype(int)
    if bins is None:
        bins = int_r.max() + 1
    mr = dc(r)
    mr[mask] = -1

    # integration
    mean = sts.binned_statistic(mr.ravel(), img.ravel(), bins=bins,
                                range=[0, mr.max()], statistic='mean')[0]
    std = sts.binned_statistic(mr.ravel(), img.ravel(), bins=bins,
                               range=[0, mr.max()], statistic=np.std)[0]

    threshold = alpha * std
    lower = mean - threshold
    upper = mean + threshold

    # single out the too low and too high pixels
    too_low = img < lower[int_r]
    too_hi = img > upper[int_r]

    mask = mask | too_low | too_hi
    return mask.astype(int)
Esempio n. 7
0
def plot_psf_1d_new(ax, psf, r_lm, settings, r_max, color='b', label=None):
    r_lm = r_lm.flatten()
    order = numpy.argsort(r_lm)
    r_lm = r_lm[order]
    psf_1d = psf.flatten()[order]

    wavelength_m = 299792458.0 / settings['freq_hz']
    psf_hwhm = (wavelength_m / (r_max * 2.0)) / 2.0  # FIXME(BM) better expression?
    x = r_lm / psf_hwhm

    num_bins = 500
    bin_mean, edges, number = \
        stats.binned_statistic(x, psf_1d, statistic='mean', bins=num_bins)
    bin_x = (edges[1:] + edges[:-1]) / 2

    def bin_max(values):
        return numpy.abs(values).max()

    bin_max, edges, number = \
        stats.binned_statistic(x, psf_1d, statistic=bin_max, bins=num_bins)

    # ax.plot(x, psf_1d, 'k.', markersize=3.0, alpha=0.5)
    ax.plot(bin_x, bin_mean, '--', color=color, linewidth=1.0, alpha=0.6)
    ax.plot(bin_x, bin_max, '-', color=color, linewidth=1.0, alpha=0.6,
            label=label)
    ax.grid(True)
def plot_psf_1d(psf, r_lm, r_peak, mask_r, settings, plot_name='psf_1d.png'):
    r_lm = r_lm.flatten()
    order = np.argsort(r_lm)
    r_lm = r_lm[order]
    psf_1d = psf.flatten()[order]

    num_bins = 500
    bin_mean, edges, number = \
        stats.binned_statistic(r_lm, psf_1d, statistic='mean', bins=num_bins)
    bin_x = (edges[1:] + edges[:-1]) / 2

    def bin_max(values):
        return values.max()

    bin_max, edges, number = \
        stats.binned_statistic(r_lm, psf_1d, statistic=bin_max, bins=num_bins)

    fig = plt.figure(figsize=(10, 8))
    ax = fig.add_subplot(111)
    # ax.plot(r_lm, psf_1d, 'k.', markersize=3.0, alpha=0.5)
    ax.plot(bin_x, bin_mean, 'b--', linewidth=1.0)
    ax.plot(bin_x, bin_max, 'b-', linewidth=1.0)
    ax.grid(True)
    # ax.set_yscale('symlog', linthresh=0.05)
    y_max = psf_1d[r_lm > mask_r].max()
    y_min = psf_1d[r_lm > mask_r].min()
    # ax.set_ylim(y_min, y_max * 1.1)
    ax.set_ylim(-0.1, y_max * 1.1)
    ax.plot([r_peak, r_peak], ax.get_ylim(), 'g-', alpha=0.5, linewidth=3.0)
    ax.plot([mask_r, mask_r], ax.get_ylim(), 'r-', alpha=0.5, linewidth=3.0)
    ax.plot(r_peak, y_max, 'x', color='g', markersize=10.0, alpha=0.8)
    plt.savefig(join(settings['results_dir'], plot_name))
    # plt.show()
    plt.close(fig)
Esempio n. 9
0
def histogram_data(data, nbins=10, histrange=(0,10), edges=None, weights=None):
    """ Histogram a 1-d Array, integral normalized

    Histogram is integral normalized. A stdev is outputted as 
    sqrt(N)/norm where N is the total counts of each bin and
    norm is the normalization factor the whole histogram is
    divided by.
    
    Must specify range and number of bins
    Or a list of edges to histogram inside
    
    Weights is optional and is assumed equal weighting if None
    
    """
    
    if weights is None:
        weights = np.ones(np.shape(data)[0])
    
    if edges is None:
        hist, edges, slices = stats.binned_statistic(data, weights, statistic="sum", histrange=[histrange], bins=nbins)
    else:
        hist, edges, slices = stats.binned_statistic(data, weights, statistic="sum", edges=edges)

    normalization = math.abs(integrate_simple(hist, edges))
    
    stdev = np.sqrt(hist) / normalization
    hist = hist / normalization
    bincenters = 0.5*(edges[1:] + edges[:-1])
    
    return hist, stdev, bincenters, edges, slices
Esempio n. 10
0
def ring_blur_mask(img, q, alpha, bins, mask=None):
    """
    Perform a annular mask, which checks the ring statistics and masks any
    pixels which have a value greater or less than alpha * std away from the
    mean
    Parameters
    ----------
    img: 2darray
        The  image
    q: 2darray
        The  array which maps pixels to Q space
    alpha: float or tuple or, 1darray
        Then number of acceptable standard deviations, if tuple then we use
        a linear distribution of alphas from alpha[0] to alpha[1], if array
        then we just use that as the distribution of alphas
    rmax: float
        The maximum radial distance on the detector
    pixel_size: float
        The size of the pixels, in the same units as rmax
    distance: float
        The sample to detector distance, in the same units as rmax
    wavelength: float
        The wavelength of the x-rays
    mask: 1darray
        A starting flattened mask
    Returns
    --------
    2darray:
        The mask
    """

    if mask is None:
        mask = np.ones(img.shape).astype(bool)
    if mask.shape != img.shape:
        mask = mask.reshape(img.shape)
    msk_img = img[mask]
    msk_q = q[mask]

    int_q = np.zeros(q.shape, dtype=np.int)
    for i in range(len(bins) - 1):
        t_array = (bins[i] <= q) & (q < bins[i + 1])
        int_q[t_array] = i - 1
    # integration
    mean = sts.binned_statistic(msk_q, msk_img, bins=bins[1:],
                                statistic='mean')[0]
    std = sts.binned_statistic(msk_q, msk_img, bins=bins[1:],
                               statistic=np.std)[0]
    if type(alpha) is tuple:
        alpha = np.linspace(alpha[0], alpha[1], len(std))
    threshold = alpha * std
    lower = mean - threshold
    upper = mean + threshold

    # single out the too low and too high pixels
    too_low = img < lower[int_q]
    too_hi = img > upper[int_q]

    mask = mask * ~too_low * ~too_hi
    return mask.astype(bool)
Esempio n. 11
0
def computeXvsAngle(var):
    out1,bins1,_ = binned_statistic(elongated_angles[np.where(isRightSize & isElongated)],var[np.where(isRightSize & isElongated)],bins=nbins,statistic='mean')
    out2,bins2,_ = binned_statistic(collapsed_angles[np.where(isRightSize & isCollapsed)],var[np.where(isRightSize & isCollapsed)],bins=nbins,statistic='mean')
    full_var = np.concatenate((out2,out1))
    #make edges centers
    bins1 += (bins1[1]-bins1[0])/2
    bins2 += (bins2[1]-bins2[0])/2
    full_bins = np.concatenate((bins2[:-1],bins1[:-1]))
    return full_var, full_bins
    def test_1d_median(self):
        x = self.x
        v = self.v

        stat1, edges1, bc = binned_statistic(x, v, 'median', bins=10)
        stat2, edges2, bc = binned_statistic(x, v, np.median, bins=10)

        assert_array_almost_equal(stat1, stat2)
        assert_array_almost_equal(edges1, edges2)
    def test_1d_std(self):
        x = self.x
        v = self.v

        stat1, edges1, bc = binned_statistic(x, v, "std", bins=10)
        stat2, edges2, bc = binned_statistic(x, v, np.std, bins=10)

        assert_allclose(stat1, stat2)
        assert_allclose(edges1, edges2)
Esempio n. 14
0
def binPhi(df):
  bins = np.linspace(0,2*np.pi,19)
  ratio_mean, bins, foo = binned_statistic(df['phi']+df['phi0'],df['ratio'],bins=bins,statistic=np.mean,range=None)
  ratio_std, bins, foo = binned_statistic(df['phi']+df['phi0'],df['ratio'],bins=bins,statistic=np.std,range=None)
  df2 = pd.DataFrame()
  df2['phi'] = (bins[1:]+bins[:-1])/2.
  df2['ratio'] = ratio_mean
  df2['ratio_std'] = ratio_std
  return df2
    def test_1d_median(self):
        x = self.x
        v = self.v

        stat1, edges1, bc = binned_statistic(x, v, "median", bins=10)
        stat2, edges2, bc = binned_statistic(x, v, np.median, bins=10)

        assert_allclose(stat1, stat2)
        assert_allclose(edges1, edges2)
Esempio n. 16
0
    def test_1d_max(self):
        x = self.x
        v = self.v
        
        stat1, edges1, bc = binned_statistic(x, v, 'max', bins=10)
        stat2, edges2, bc = binned_statistic(x, v, np.max, bins=10)

        assert_allclose(stat1, stat2)
        assert_allclose(edges1, edges2)
Esempio n. 17
0
def plotgen(S_2, deltaX=30, SHADING=False):
	"""Generates and saves a 1D plot of the average structure function versus 
	   radius.

	   Argument format: (S_2, deltaX, SHADING=False). Plots are 
	   created using the resulting matrices from structgen, requiring the same
	   deltaX that was used in structgen.

	   NOTE: If SHADING is set to "True", then a shaded plot will be generated.
	   There may be some bugs, however; so False (default) is recommended."""

	dX = deltaX                      # This is simply the maximum absolute value of "dx". So if dX = 1, then dx = {-1,0,1}.
	dY = np.copy(dX)                      # Same as above, but for "dy". For simplicity, let it be the same as dX.
	nmax = abs(2*dX)+1

	# Goal: Create a 1D plot of the average value of structure function (inside a thin ring
	#       at radius r) versus radius. One plot includes a shaded region indicating standard deviation.
	x = np.linspace(-dX,dX,nmax)
	y = np.linspace(-dY,dY,nmax)
	xx, yy = np.meshgrid(x,y)

	maxradius = math.floor( (dX**2 + dY**2)**0.5 )
	mult = 1                        # Increases or decreases the numbers of bins used. Most accurate results at mult=1.
	reselements = math.floor(mult*maxradius)
		                        # This is the number of "resolution elements" (i.e. the number of points
		                        #      on the struct_funct vs. radius plot) that we're dealing with.

	radiusmap = (xx**2 + yy**2)**0.5
	struct_funct, edges, counts = ss.binned_statistic(
	    radiusmap[radiusmap<maxradius], S_2[radiusmap<maxradius], statistic=np.nanmean, bins = reselements)
	std, edges, counts = ss.binned_statistic(
	    radiusmap[radiusmap<maxradius], S_2[radiusmap<maxradius], statistic=np.std, bins = reselements)

		# PLOTTING
	# No shading
	if SHADING==False:
		plt.figure(2)
		plt.plot(np.arange(reselements)/mult,struct_funct,'r.',label='S_2 Average')
		plt.plot(np.arange(reselements)/mult,struct_funct+std,'r:')
		plt.plot(np.arange(reselements)/mult,struct_funct-std,'r:')
		plt.title('Average Structure Function vs. Radial "Distance" from Center of S_2 Plots')
		plt.xlabel(' "Radius" ')
		plt.ylabel('Average S_2')
		plt.legend(loc='upper left')
		plt.savefig('plot2D_noshading.png')
	else:
	# Yes, shading
		plt.figure(3)
		plt.plot(np.arange(reselements)/mult,struct_funct,'k.',label='S_2 Average')

		plt.fill_between(np.arange(reselements)/mult, struct_funct+std, struct_funct-std, facecolor='pink')

		plt.title('Average Structure Function vs. Radial "Distance" from Center of S_2 Plots')
		plt.xlabel(' "Radius" ')
		plt.ylabel('Average S_2')
		plt.legend(loc='upper left')
		plt.savefig('plot2D_yesshading.png')
 def binned(self, bins):
     '''
     Bin up by the bins given. The imagelist will no longer make any sense.
     '''
     med_flux, _, _ = stats.binned_statistic(self.mjd % 1, self.flux,
             bins=bins, statistic='median')
     med_mjd, _, _ = stats.binned_statistic(self.mjd % 1, self.mjd % 1,
             bins=bins, statistic='median')
     return self.__class__(med_mjd, med_flux, self.x, self.y, self.meta)
Esempio n. 19
0
 def binned_std(self, bins):
     mean, edges = stats.binned_statistic(
         self._angles, self._rs, statistic="mean",
         bins=bins, range=(0, 2 * np.pi),
     )
     sqr, _ = stats.binned_statistic(
         self._angles, self._rs, statistic="mean",
         bins=bins, range=(0, 2 * np.pi),
     )
     return (edges[1:] + edges[:-1]) / 2, np.sqrt(sqr - mean*mean)
Esempio n. 20
0
def RunningAverage(x,y, bins=6):
	"""
	Input: X and Y and number of bins.
	Output: Xbin_centres, Xerr, Ybin_centres, Yerr
	"""
	mean, binedges, num = ss.binned_statistic(x, y, bins=bins)
	error, binedges, num = ss.binned_statistic(x, y, statistic=np.std, bins=bins)
	xerr = binedges[1] - binedges[0]
	x = (binedges[:-1] + binedges[1:])/2.0
	return x, xerr, mean, error
Esempio n. 21
0
def compute_binned_errors(x, y, nbins):
    std, ledges, _ = stats.binned_statistic(x, y, statistic='std',
            bins=nbins)
    count, _, _ = stats.binned_statistic(x, y, statistic='count',
            bins=ledges)

    error = std / np.sqrt(count)
    error = np.append(error, error[-1])

    return ledges, error
def ring_blur_mask(img, r, int_r, alpha, bins=None, mask=None):
    """
    Perform a annular mask, which checks the ring statistics and masks any
    pixels which have a value greater or less than alpha * std away from the
    mean
    Parameters
    ----------
    img: 2darray
        The  image
    r: 2darray
        The  array which maps pixels to radii
    alpha: float or tuple or, 1darray
        Then number of acceptable standard deviations, if tuple then we use
        a linear distribution of alphas from alpha[0] to alpha[1], if array
        then we just use that as the distribution of alphas
    bins: int, optional
        Number of bins used in the integration, if not given then max number of
        pixels +1
    mask: 1darray
        A starting flattened mask
    Returns
    --------
    2darray:
        The mask
    """

    if mask is None:
        mask = np.ones(img.shape).astype(bool)
    if bins is None:
        bins = int_r.max() + 1
    if mask.shape != img.shape:
        mask = mask.reshape(img.shape)
    msk_img = img[mask]
    msk_r = r[mask]

    # integration
    mean = sts.binned_statistic(msk_r, msk_img, bins=bins,
                                # range=[0, r.max()],
                                statistic='mean')[0]
    std = sts.binned_statistic(msk_r, msk_img, bins=bins,
                               # range=[0, r.max()],
                               statistic=np.std)[0]
    if type(alpha) is tuple:
        alpha = np.linspace(alpha[0], alpha[1], len(std))
    threshold = alpha * std
    lower = mean - threshold
    upper = mean + threshold

    print(len(mean), np.max(int_r), np.min(int_r))
    # single out the too low and too high pixels
    too_low = img < lower[int_r]
    too_hi = img > upper[int_r]

    mask = mask * ~too_low * ~too_hi
    return mask.astype(bool)
Esempio n. 23
0
	def structure_function(self, bins):
		"""
		compute the structure function of the light curve at given time lags
		"""
		dt =  np.subtract.outer(self.t,self.t)[np.tril_indices(self.t.shape[0], k=-1)]
		dm =  np.subtract.outer(self.y,self.y)[np.tril_indices(self.y.shape[0], k=-1)]
		sqrsum, bins, _ = binned_statistic(dt, dm**2, bins=bins, statistic='sum')
		n, _, _ = binned_statistic(dt, dm**2, bins=bins, statistic='count')
		SF = np.sqrt(sqrsum/n)
		lags = 0.5*(bins[1:] + bins[:-1])

		return lags, SF
def make_binned_transit(time, detrended_flux, nb=200):
    import scipy.stats as ss
    from statsmodels.robust.scale import mad

    bins = np.linspace(min(time), max(time), num=nb)
    binned_time = np.array([0.5*(bins[i+1] + bins[i]) 
        for i in range(len(bins)-1)])
    binned_flux = ss.binned_statistic(time, detrended_flux, bins=bins)[0]
    binned_err = 1.4826*ss.binned_statistic(time, detrended_flux, bins=bins, 
            statistic=mad)[0]

    return binned_time, binned_flux, binned_err
Esempio n. 25
0
def binned_outlier(img, r, alpha, bins, mask=None):
    """
    Generates a mask by identifying outlier pixels in bins and masks any
    pixels which have a value greater or less than alpha * std away from the
    mean

    Parameters
    ----------
    img: 2darray
        The  image
    r: 2darray
        The  array which maps pixels to bins
    alpha: float or tuple or, 1darray
        Then number of acceptable standard deviations, if tuple then we use
        a linear distribution of alphas from alpha[0] to alpha[1], if array
        then we just use that as the distribution of alphas
    bins: list
        The bin edges
    mask: 1darray
        A starting flattened mask

    Returns
    --------
    2darray:
        The mask
    """

    if mask is None:
        working_mask = np.ones(img.shape).astype(bool)
    else:
        working_mask = mask.copy()
    if working_mask.shape != img.shape:
        working_mask = working_mask.reshape(img.shape)
    msk_img = img[working_mask]
    msk_r = r[working_mask]

    int_r = np.digitize(r, bins[:-1], True) - 1
    # integration
    mean = sts.binned_statistic(msk_r, msk_img, bins=bins,
                                statistic='mean')[0]
    std = sts.binned_statistic(msk_r, msk_img, bins=bins,
                               statistic=np.std)[0]
    if type(alpha) is tuple:
        alpha = np.linspace(alpha[0], alpha[1], len(std))
    threshold = alpha * std
    lower = mean - threshold
    upper = mean + threshold

    # single out the too low and too high pixels
    working_mask *= img > lower[int_r]
    working_mask *= img < upper[int_r]

    return working_mask.astype(bool)
    def test_dd_binnumbers_unraveled(self):
        X = self.X
        v = self.v

        stat, edgesx, bcx = binned_statistic(X[:, 0], v, "mean", bins=10)
        stat, edgesy, bcy = binned_statistic(X[:, 1], v, "mean", bins=10)
        stat, edgesz, bcz = binned_statistic(X[:, 2], v, "mean", bins=10)

        stat2, edges2, bc2 = binned_statistic_dd(X, v, "mean", bins=10, expand_binnumbers=True)

        assert_allclose(bcx, bc2[0])
        assert_allclose(bcy, bc2[1])
        assert_allclose(bcz, bc2[2])
Esempio n. 27
0
    def test_1d_multi_values(self):
        x = self.x
        v = self.v
        w = self.w

        stat1v, edges1v, bc1v = binned_statistic(x, v, 'mean', bins=10)
        stat1w, edges1w, bc1w = binned_statistic(x, w, 'mean', bins=10)
        stat2, edges2, bc2 = binned_statistic(x, [v, w], 'mean', bins=10)

        assert_allclose(stat2[0], stat1v)
        assert_allclose(stat2[1], stat1w)
        assert_allclose(edges1v, edges2)
        assert_allclose(bc1v, bc2)
	def fitData(self, n = 10, quality = 1):
		self.n = n
		if not self.loaded:
			self.loadData(quality)
		bins, _, _ = self.dataset.getBinnedData(quality, 100)
		mu, _, _ = stats.binned_statistic(self.data[0], self.data[1], bins=n)
		sig, _, _ = stats.binned_statistic(self.data[0], self.data[1], statistic=
										lambda x: np.var(x), bins=n)
		self.best = np.concatenate((mu, sig))
		print self.best
		start = time.clock()
		self.trainDifferentialEvolution(fun=self.evalTrainCost, nGenerations = 1000)
		print "Done in", np.ceil(time.clock() - start), "s"
Esempio n. 29
0
def plotBias(paramPaths,checkPaths,correct,y,identifier): #plot bias data for path with option corrected/not corrected
  res, dfParam = parametrizeBias(paramPaths,level=1)
  qualityCuts = getQuCuts()
  dfAll = getFitData(checkPaths,qualityCuts)
  if correct: dfAll = correctBias(dfAll,res.x)
  rbins = np.linspace(round(dfAll['r'].min()),round(dfAll['r'].max()),9)
  rmeans = (rbins[1:]+rbins[:-1])/2.
  fig= plt.figure(figsize=(8,5))
  ax= fig.add_subplot(111)
  colors = plt.cm.Set1(np.linspace(0, 1, 5))
  ax.axhline(0,c='k',linestyle='--',lw=2)
  offset = -10
  dfAll['phiRel'] = np.array(dfAll['phi']+dfAll['phi0'])
  for phi, philabel, c in zip(['all',0,90],[r'$\varphi \in [0^{\circ},360^{\circ}]$',r'$\varphi = 0^{\circ} \pm 15^{\circ}$',r'$\varphi = 90^{\circ} \pm 15^{\circ}$'],colors):
    dPhi = 15
    if phi == 'all': 
      df = dfAll
    elif phi == 0:
      print "!0",  phi
      sel1 = (0<=degrees_(dfAll['phiRel']))*(degrees_(dfAll['phiRel'])<=dPhi)+(360-dPhi<=degrees_(dfAll['phiRel']))*(degrees_(dfAll['phiRel'])<=360)
      sel2 = (math.radians(180-dPhi)<=np.array(dfAll['phiRel']))*(np.array(dfAll['phiRel'])<=math.radians(180+dPhi))
      df = dfAll[sel1+sel2]
    elif phi == 90:
      print phi
      sel1 = (90-dPhi<=degrees_(dfAll['phiRel']))*(degrees_(dfAll['phiRel'])<=90+dPhi)
      sel2 = (270-dPhi<=degrees_(dfAll['phiRel']))*(degrees_(dfAll['phiRel'])<=270+dPhi)
      df = dfAll[sel1+sel2]
    if y == 'sqrt_nmc':
      y_mean, bins, foo = binned_statistic(df['r'],(df['nmumd']-df['nmumc'])/np.sqrt(df['nmumc']),bins=rbins,statistic=np.mean,range=None)
      y_std, bins, foo = binned_statistic(df['r'],(df['nmumd']-df['nmumc'])/np.sqrt(df['nmumc']),bins=rbins,statistic=np.std,range=None)
      ylabel = '$(N_\mathrm{Rec}-N_\mathrm{MC})/\sqrt{N_\mathrm{MC}}$'
    elif y == 'nmc':
      y_mean, bins, foo = binned_statistic(df['r'],(df['nmumd']-df['nmumc'])/df['nmumc'],bins=rbins,statistic=np.mean,range=None)
      y_std, bins, foo = binned_statistic(df['r'],(df['nmumd']-df['nmumc'])/df['nmumc'],bins=rbins,statistic=np.std,range=None)
      ylabel = '$(N_\mathrm{Rec}-N_\mathrm{MC})/N_\mathrm{MC}$'
    x_mean = rmeans+offset
    xlabel = r"$r/\mathrm{m}$"
    xlim = [rbins[0],rbins[-1]]
    ylim=[-0.61,0.61]
    ax.errorbar(x_mean,y_mean,yerr=y_std,marker='o',mfc="None",mec=c,ecolor=c,ms=9,mew=2,elinewidth=2,capsize=0,linewidth=0,zorder=1,label=philabel)   
    offset += 10
  customAx(ax,xlabel=xlabel,ylabel=ylabel,legloc=1,xlim=xlim,ylim=ylim,ncol=1,bbox=None,
    fontS=18,frameon=True,xscale_log=False,yscale_log=False,ticklabelsize=18,labelsize=22,legend=True,
    handleL=1,colSp=.2,labelSp=0.001,handleTp=.1,borderP=.2,yaxis_labelpad=5)    
  ax.minorticks_on()
  figpath = 'pics/RecEff'
  if correct: correctStr = '-corrected'
  else: correctStr = '-uncorrected'
  figname = 'bias-%(identifier)s%(correctStr)s-%(y)s.pdf'%locals()
  print 'save fig', figpath, figname  
  fig.savefig('%(figpath)s/%(figname)s'%locals(),bbox_inches='tight')
Esempio n. 30
0
    def test_1d_range_keyword(self):
        # Regression test for gh-3063, range can be (min, max) or [(min, max)]
        np.random.seed(9865)
        x = np.arange(30)
        data = np.random.random(30)

        mean, bins, _ = binned_statistic(x[:15], data[:15])
        mean_range, bins_range, _ = binned_statistic(x, data, range=[(0, 14)])
        mean_range2, bins_range2, _ = binned_statistic(x, data, range=(0, 14))

        assert_array_almost_equal(mean, mean_range)
        assert_array_almost_equal(bins, bins_range)
        assert_array_almost_equal(mean, mean_range2)
        assert_array_almost_equal(bins, bins_range2)
Esempio n. 31
0
y = np.nan_to_num(tmpdata)

with open('parameters.json') as data_file:
    dic = json.load(data_file)

model = dic['model']

bin_num = dic['num_bins']
parameter = dic['parameter']

#MODULO2
#PROCESS THE DATA(BINNING)

#binning model matrix,binned matrix==stat
stat, bin_edges, binnum = stats.binned_statistic(range(X.shape[1]),
                                                 X,
                                                 'median',
                                                 bins=int(bin_num))

#MODULO3
#APPLY THE MODEL AND PRINT THE RESULT

if model == 'svm.LinearSVC()':
    clf = svm.LinearSVC(C=parameter)
if model == 'RandomForestClassifier()':
    clf = RandomForestClassifier(n_estimators=parameters, n_jobs=-1)
if model == 'LinearDiscriminantAnlysis()':
    clf = LinearDiscriminantAnalysis()

clf.fit(stat, y)

joblib.dump(clf, "model.pkl")
Esempio n. 32
0
def plot_histograms(data, sims, types, figname):
    cols = [
        "x1",
        "c",
        "zHD",
        "FITPROB",
        "FITCHI2",
        "cERR",
        "x1ERR",
        "PKMJDERR",
        "HOST_LOGMASS",
        "SNRMAX1",
        "SNRMAX2",
        "SNRMAX3",
        #"SNRMAX_g",
        #"SNRMAX_r",
        #"SNRMAX_i",
        #"SNRMAX_z",
        ["zHD", "c"],
        ["zHD", "x1"],
        ["zHD", "HOST_LOGMASS"],
        "NDOF",
        #"chi2_g",
        #"chi2_r",
        #"chi2_i",
        #"chi2_z",
        "__MUDIFF",
    ]
    restricted = [
        "FITCHI2", "SNRMAX1", "SNRMAX2", "SNRMAX3", "SNRMAX_g", "SNRMAX_r",
        "SNRMAX_i", "SNRMAX_z", "chi2_g", "chi2_r", "chi2_i", "chi2_z"
    ]
    logs = [
        "SNRMAX1", "SNRMAX2", "SNRMAX3", "SNRMAX_g", "SNRMAX_r", "SNRMAX_i",
        "SNRMAX_z", "FITCHI2", "chi2_g", "chi2_r", "chi2_i", "chi2_z",
        "__MUDIFF"
    ]

    cs = [
        "#1976D2", "#FB8C00", "#8BC34A", "#E53935", "#4FC3F7", "#43A047",
        "#F2D026", "#673AB7", "#FFB300", "#E91E63", "#F2D026"
    ] * 3
    usecols = []
    for c in cols:
        if isinstance(c, list):
            if (c[0] in data[0][0].columns) & (c[1] in data[0][0].columns):
                usecols.append(c)
        else:
            if c in data[0][0].columns:
                usecols.append(c)

    for c in restricted:
        for x in data + sims:
            if c in cols:
                x[0].loc[x[0][c] < -10, c] = -9

    ncols = (len(cols) + 3) // 3
    fig, axes = plt.subplots(3,
                             ncols,
                             figsize=(1 + 2.5 * ncols, 8),
                             gridspec_kw={
                                 "wspace": 0.13,
                                 "hspace": 0.4
                             })
    for ax in axes.flatten():
        ax.set_axis_off()

    for c, ax in zip(cols, axes.flatten()):
        ax.set_axis_on()
        u = 0.95 if c in restricted else 0.99

        #HISTOGRAM
        if not isinstance(c, list):
            minv = min([x[0][c].quantile(0.01) for x in data + sims])
            maxv = max([x[0][c].quantile(u) for x in data + sims])
            bins = np.linspace(minv, maxv, 20)  # Keep binning uniform.
            bc = 0.5 * (bins[1:] + bins[:-1])

            for i, (d, n) in enumerate(data):
                hist, _ = np.histogram(d[c], bins=bins)
                err = np.sqrt(hist)
                area = (bins[1] - bins[0]) * hist.sum()
                delta = (bc[1] - bc[0]) / 20
                ax.errorbar(bc + i * delta,
                            hist / area,
                            yerr=err / area,
                            fmt="o",
                            ms=2,
                            elinewidth=0.75,
                            label=n)

            lw = 1 if len(sims) < 3 else 0.5
            for index, (s, n) in enumerate(sims):
                mask = np.isin(s["TYPE"], types)
                ia = s[mask]
                nonia = s[~mask]
                if len(np.unique(s[c])) == 1:
                    continue
                hist, _ = np.histogram(s[c], bins=bins)
                area = (bins[1] - bins[0]) * hist.sum()

                ax.hist(s[c],
                        bins=bins,
                        histtype="step",
                        weights=np.ones(s[c].shape) / area,
                        label=n,
                        linewidth=lw,
                        color=cs[index])
                if len(sims) == 1 and nonia.shape[0] > 10 and len(data) == 1:
                    logging.info(f"Nonia shape is {nonia.shape}")
                    ax.hist(ia[c],
                            bins=bins,
                            histtype="step",
                            weights=np.ones(ia[c].shape) / area,
                            linestyle="--",
                            label=n + " Ia only",
                            linewidth=1)
                    ax.hist(nonia[c],
                            bins=bins,
                            histtype="step",
                            weights=np.ones(nonia[c].shape) / area,
                            linestyle=":",
                            label=n + " CC only",
                            linewidth=1)

            if "MUDIFF" in c:
                ax.set_xlabel("FAKE MUDIFF")
            else:
                ax.set_xlabel(c)
            if c in logs:
                ax.set_yscale("log")
                ax.tick_params(axis="y", which="both", labelsize=2)
                labels = ["" for item in ax.get_yticklabels()]
                ax.set_yticklabels(labels)

            # Add the reduced chi2 value if there are only one data and one sim
            if len(sims) < 3 and len(data) == 1:
                data_col = data[0][0][c]
                data_hist, _ = np.histogram(data_col, bins=bins)
                data_err = 1 / np.sqrt(data_hist)
                data_dist, _ = np.histogram(data_col, bins=bins, density=True)

                for i, (s, n) in enumerate(sims):
                    sim_col = s[c]

                    sim_hist, _ = np.histogram(sim_col, bins=bins)
                    sim_err = 1 / np.sqrt(data_hist)
                    sim_dist, _ = np.histogram(sim_col,
                                               bins=bins,
                                               density=True)

                    dist_error = np.sqrt((data_dist * data_err)**2 +
                                         (sim_dist * sim_err)**2)
                    dist_diff = data_dist - sim_dist

                    chi2 = np.nansum(((dist_diff / dist_error)**2))
                    ndof = len(bc)
                    red_chi2 = chi2 / ndof

                    ax.text(
                        0.99,
                        0.99 - 0.1 * i,
                        f"{chi2:0.1f}/{ndof:d}={red_chi2:0.1f}",
                        horizontalalignment="right",
                        verticalalignment="top",
                        transform=ax.transAxes,
                        color=cs[i],
                        fontsize=8,
                    )
            ax.set_yticklabels([])
        else:
            minv = min([x[0][c[0]].quantile(0.01) for x in data + sims])
            maxv = max([x[0][c[0]].quantile(u) for x in data + sims])
            bins = np.linspace(minv, maxv, 20)

            for i, (s, n) in enumerate(sims):
                sim_xcol = s[c[0]]
                sim_ycol = s[c[1]]
                bin_medians, bin_edges, binnumber = binned_statistic(
                    sim_xcol, sim_ycol, statistic='median', bins=bins)
                bincenters = (bin_edges[:-1] + bin_edges[1:]) / 2.
                ax.plot(bincenters,
                        bin_medians,
                        label=n,
                        alpha=.9,
                        color=cs[i])

            for i, (d, n) in enumerate(data):
                data_xcol = d[c[0]]
                data_ycol = d[c[1]]
                try:
                    bin_medians, bin_edges, binnumber = binned_statistic(
                        data_xcol, data_ycol, statistic='median', bins=bins)
                    bin_stds, bin_edges, binnumber = binned_statistic(
                        data_xcol, data_ycol, statistic='std', bins=bins)
                    bin_counts, bin_edges, binnumber = binned_statistic(
                        data_xcol, data_ycol, statistic='count', bins=bins)

                    bincenters = (bin_edges[:-1] + bin_edges[1:]) / 2.
                    ax.errorbar(bincenters,
                                bin_medians,
                                yerr=bin_stds / np.sqrt(bin_counts),
                                fmt='o',
                                label=n,
                                alpha=.9)
                except:
                    pass
            ax.set_xlabel(c[0])
            ax.set_ylabel(c[1])
            #ax.legend()
    handles, labels = ax.get_legend_handles_labels()
    bb = (fig.subplotpars.left, fig.subplotpars.top + 0.02,
          fig.subplotpars.right - fig.subplotpars.left, 0.1)

    #for ax in axes.flatten():
    #    ax.set_yticklabels([])

    fig.legend(handles,
               labels,
               loc="upper center",
               ncol=4,
               mode="expand",
               frameon=False,
               bbox_to_anchor=bb,
               borderaxespad=0.0,
               bbox_transform=fig.transFigure)
    # plt.legend(bbox_to_anchor=(-3, 2.3, 4.0, 0.2), loc="lower left", mode="expand", ncol=3, frameon=False)
    # plt.tight_layout(rect=[0, 0, 0.75, 1])
    fig.savefig(figname, bbox_inches="tight", dpi=600)
Esempio n. 33
0
    def fit_data(self, x, y, p0=None, bounds=None):
        #nParams=5
        ## Bounds
        #self.setup_bounds(bounds,nParams) # TODO
        ## Initial conditions
        #self.setup_guess(p0,bounds,nParams) # TODO

        # Cleaning data, and store it in object
        x, y = self.clean_data(x, y)

        I = np.argsort(x)
        x = x[I]
        y = y[I]

        # Estimating deltas
        xMin, xMax = np.min(x), np.max(x)
        yMin, yMax = np.min(y), np.max(y)
        DeltaX = (xMax - xMin) * 0.02
        DeltaY = (yMax - yMin) * 0.02

        # Binning data
        x_bin = np.linspace(xMin, xMax, min(200, len(x)))
        x_lin = x_bin[0:-1] + np.diff(x_bin)
        #y_lin=np.interp(x_lin,x,y) # TODO replace by bining
        y_lin = np.histogram(y, x_bin, weights=y)[0] / np.histogram(y,
                                                                    x_bin)[0]
        y_lin, _, _ = stats.binned_statistic(x,
                                             y,
                                             statistic='mean',
                                             bins=x_bin)
        x_lin, _, _ = stats.binned_statistic(x,
                                             x,
                                             statistic='mean',
                                             bins=x_bin)
        bNaN = ~np.isnan(y_lin)
        y_lin = y_lin[bNaN]
        x_lin = x_lin[bNaN]

        # --- Find good guess of parameters based on data
        # SpdGenOn
        iOn = np.where(y > 0)[0][0]
        SpdGenOn_0 = x[iOn]
        SpdGenOn_Bnds = (max(x[iOn] - DeltaX,
                             xMin), min(x[iOn] + DeltaX, xMax))
        # Slpc
        Slpc_0 = 5
        Slpc_Bnds = (0, 10)
        # RtTq
        RtTq_0 = yMax
        RtTq_Bnds = (yMax - DeltaY, yMax + DeltaY)
        # RtGnSp
        iCloseRt = np.where(y > yMax * 0.50)[0][0]
        RtGnSp_0 = x[iCloseRt]
        RtGnSp_Bnds = (RtGnSp_0 - DeltaX * 2, RtGnSp_0 + DeltaX * 2)
        # Rgn2K
        #print('>>>',SpdGenOn_0, RtGnSp_0)
        bR2 = np.logical_and(x > SpdGenOn_0, x < RtGnSp_0)
        exponents = [2]
        _, pfit, _ = fit_polynomial_discrete(x[bR2], y[bR2], exponents)
        #print(pfit)
        Rgn2K_0 = pfit[0]
        Rgn2K_Bnds = (pfit[0] / 2, pfit[0] * 2)

        #         import matplotlib.pyplot as plt
        #         fig,ax = plt.subplots(1, 1, sharey=False, figsize=(6.4,4.8)) # (6.4,4.8)
        #         fig.subplots_adjust(left=0.12, right=0.95, top=0.95, bottom=0.11, hspace=0.20, wspace=0.20)
        #         ax.plot(x,y ,'-'   , label='')
        #         ax.plot(x[bR2],y[bR2],'ko', label='')
        #         ax.plot(x_lin,y_lin,'bd', label='')
        #         ax.set_xlabel('')
        #         ax.set_ylabel('')
        #         ax.tick_params(direction='in')
        #         plt.show()
        def minimize_me(p):
            RtGnSp, RtTq, Rgn2K, SlPc, SpdGenOn = p
            y_model = np.array(
                [gentorque(x_lin, (RtGnSp, RtTq, Rgn2K, SlPc, SpdGenOn))])
            eps = np.mean((y_lin - y_model)**2)
            #             print(eps,p)
            return eps

        bounds = (RtGnSp_Bnds, RtTq_Bnds, Rgn2K_Bnds, Slpc_Bnds, SpdGenOn_Bnds)
        p0 = [RtGnSp_0, RtTq_0, Rgn2K_0, Slpc_0, SpdGenOn_0]
        #print('Bounds',bounds)
        #print('p0',p0)
        res = so.minimize(minimize_me, x0=p0, bounds=bounds, method='SLSQP')
        pfit = res.x

        # --- Reporting information about the fit (after the fit)
        y_fit = gentorque(x, pfit)
        self.store_fit_info(y_fit, pfit)
        # --- Return a fitted function
        self.model['fitted_function'] = lambda x: gentorque(x, pfit)
Esempio n. 34
0
youwe = b[:,4]
ind = np.where(youwe <= -7.0)[0]
print len(ind)
b = b[ind]
print np.shape(b)

opt = b[:,1]
inf = b[:,2]
mssfr = b[:,3]
uvssfr = b[:,4]
glug = np.vectorize(log10)
mass = glug(b[:,5])
nn = b[:,6]
res = np.asarray([mssfr[i] - uvssfr[i] for i in range(len(mssfr))])

env = [log10(nn[i]) for i in range(len(nn))]

ind = np.where(res<=-5.0)[0]
print b[:,0][ind]

h = binned_statistic(res,nn, statistic = 'mean',bins = 100)
b = [(h[1][i] + h[1][i+1])*0.5 for i in range(100)]
envz = np.log10(h[0])

plt.scatter(res, env)
plt.show()

plt.scatter(b,envz)
plt.show()
Esempio n. 35
0
x = sim["/PartType0/Coordinates"][:,0]
y = sim["/PartType0/Coordinates"][:,1]
vx = sim["/PartType0/Velocities"][:,0]
vy = sim["/PartType0/Velocities"][:,1]
u = sim["/PartType0/InternalEnergy"][:]
S = sim["/PartType0/Entropy"][:]
P = sim["/PartType0/Pressure"][:]
rho = sim["/PartType0/Density"][:]

r = np.sqrt((x-1)**2 + (y-1)**2)
v = -np.sqrt(vx**2 + vy**2)

# Bin te data
r_bin_edge = np.arange(0., 1., 0.02)
r_bin = 0.5*(r_bin_edge[1:] + r_bin_edge[:-1])
rho_bin,_,_ = stats.binned_statistic(r, rho, statistic='mean', bins=r_bin_edge)
v_bin,_,_ = stats.binned_statistic(r, v, statistic='mean', bins=r_bin_edge)
P_bin,_,_ = stats.binned_statistic(r, P, statistic='mean', bins=r_bin_edge)
S_bin,_,_ = stats.binned_statistic(r, S, statistic='mean', bins=r_bin_edge)
u_bin,_,_ = stats.binned_statistic(r, u, statistic='mean', bins=r_bin_edge)
rho2_bin,_,_ = stats.binned_statistic(r, rho**2, statistic='mean', bins=r_bin_edge)
v2_bin,_,_ = stats.binned_statistic(r, v**2, statistic='mean', bins=r_bin_edge)
P2_bin,_,_ = stats.binned_statistic(r, P**2, statistic='mean', bins=r_bin_edge)
S2_bin,_,_ = stats.binned_statistic(r, S**2, statistic='mean', bins=r_bin_edge)
u2_bin,_,_ = stats.binned_statistic(r, u**2, statistic='mean', bins=r_bin_edge)
rho_sigma_bin = np.sqrt(rho2_bin - rho_bin**2)
v_sigma_bin = np.sqrt(v2_bin - v_bin**2)
P_sigma_bin = np.sqrt(P2_bin - P_bin**2)
S_sigma_bin = np.sqrt(S2_bin - S_bin**2)
u_sigma_bin = np.sqrt(u2_bin - u_bin**2)
Esempio n. 36
0
    def sample(self, group):
        # TODO: implement progressive averaging to handle very long trajs
        # TODO: implement memory cleanup
        if not isinstance(group, AtomGroup):
            raise TypeError("The first argument passed to "
                            "Profile.sample() must be an AtomGroup.")
        box = group.universe.trajectory.ts.dimensions[:3]
        if self._range is None:
            self._determine_range(box)
            self._determine_bins()
        v = np.prod(box)
        self._totvol.append(v)

        if self.interface is None:
            pos = group.positions[::, self._dir]
        else:
            deltabin = 1 + (self._nbins - 1) // 2
            pos = IntrinsicDistance(self.interface,
                                    symmetry=self.symmetry).compute(group)

            if self._MCnorm is False:
                rnd_accum = np.ones(self._nbins)

            else:
                rnd_accum = np.array(0)
                try:
                    size = kargs['MCpoints']
                except:
                    # assume atomic volumes of ~ 30 A^3 and sample
                    # 10 points per atomic volue as a rule of thumb
                    size1 = int(np.prod(box) / 3.)
                    # just in case 'unphysical' densities are used:
                    size2 = 10 * len(group.universe.atoms)
                    size = np.max([size1, size2])
                rnd = np.random.random((size, 3))
                rnd *= self.interface.universe.dimensions[:3]
                rnd_pos = IntrinsicDistance(
                    self.interface, symmetry=self.symmetry).compute(rnd)
                rnd_accum, bins, _ = stats.binned_statistic(rnd_pos,
                                                            np.ones(
                                                                len(rnd_pos)),
                                                            range=self._range,
                                                            statistic='sum',
                                                            bins=self._nbins)

        values = self.observable.compute(group)
        accum, bins, _ = stats.binned_statistic(pos,
                                                values,
                                                range=tuple(self._range),
                                                statistic='sum',
                                                bins=self._nbins)
        accum[~np.isfinite(accum)] = 0.0
        if self.interface is not None:
            accum[deltabin] = np.inf

        if self.sampled_values is None:
            self.sampled_values = accum.copy()
            if self.interface is not None:
                self.sampled_rnd_values = rnd_accum.copy()
            # stores the midpoints
            self.sampled_bins = bins[1:] - self.binsize / 2.
        else:
            self.sampled_values += accum
            if self.interface is not None:
                self.sampled_rnd_values += rnd_accum
        self._counts += 1
Esempio n. 37
0
sfe = cloud_irlum / mlum
R0 = 8.5e3
rgal = (R0**2 + t['distance']**2 -
        2 * R0 * t['distance'] * np.cos(t['x_coor'] * np.pi / 180))**0.5

rho = t['mlum_msun'].data * u.M_sun / (4 * np.pi *
                                       (t['radius'].data * u.pc)**3 / 3)

x = (((3 * np.pi / (32 * con.G * rho))**0.5).to(u.Myr)).value
y = sfe.data

fig, (ax1) = plt.subplots(1)
fig.set_size_inches(5, 4)
idx = mlum > 3e3
val, edges, _ = ss.binned_statistic(np.log10(x[idx]),
                                    np.log10(y[idx]),
                                    statistic=np.nanmedian,
                                    bins=10)

histdata, xedge, yedge = np.histogram2d(np.log10(x[idx]),
                                        np.log10(y[idx]),
                                        range=[[0, 2], [-2, 2]],
                                        bins=40)
ax1.scatter(x[idx], y[idx], edgecolor='k', facecolor='none', zorder=-99)
ax1.plot(1e1**(0.5 * (edges[1:] + edges[0:-1])), 1e1**val, color='green', lw=3)
ax1.set_xscale('log')
ax1.set_yscale('log')

ax1.set_xlabel(r'$t_{\mathrm{ff}}$' + r' (Myr)', size=16)
ax1.set_ylabel(r'$L_{\mathrm{IR}}/M_{\mathrm{CO}}\ (L_{\odot}/M_{\odot})$',
               size=16)
# ax1.set_xlim(1e1**])
Esempio n. 38
0
def fit_SED_lor(x,y,**keywords):
    # keywards are
    # use_range ---is an n length tuple of frequencies to use while fitting
    # Example: [[1,57],[63,117],[123,177]] here we fit from 1 to 57Hz and 63 to 117 Hz and 123 to 177Hz avoid 60 Hz and harmonics
    # x0    --- intial guess for the fit this can be very important becuase because least square space over all the parameter is comple
    # there are two way the function if fit one is it is fit without binning. In this case a error for the fit is calculated
    # by calculating the standard deviation of the surronding 50 or so points. Then the data below 10Hz has the error artificially
    # lowered so that the fitter doesn't ignore in when comparing it to the many more points at higher frequencies. The other way is to use
    # the log keyword which then log bins the data and calculates the error for each bin. This way there are around the same number of points
    # at low frequency as compared to high frequency
    # since there are less points at low frequencies than high frequencies I artificially increase the accuracy of the low frequency points
    # below sigma_increase_cutoff by scaling the error that the fitter uses by sigma_increase_factor
    if ('sigma_increase_cutoff' in keywords):
        sigma_increase_cutoff = keywords['sigma_increase_cutoff']
    else:
        #define default bounds
        sigma_increase_cutoff = 2. #(Hz)
    if ('sigma_increase_cutoff' in keywords):
        sigma_increase_factor = keywords['sigma_increase_factor']
    else:
        #define default bounds
        sigma_increase_factor = 5. 

    # bounds with out these some paramter might converge to non physical values
    if ('bounds' in keywords):
        bounds = keywords['bounds']
    else:
        #define default bounds
        print("default bounds used")
        bounds = ([10**-20,10**-20,0,0.00001],[10**-10,10**-10,3,0.001]) 
    if ('use_range' in keywords):
        use_range = keywords['use_range']
        # create an index of the values you want to fit
        index = np.where((x>use_range[0][0]) & (x<use_range[0][1]) )[0]
        for i in range(1,len(use_range)):
            index2 = np.where((x>use_range[i][0]) & (x<use_range[i][1]) )
            index = np.hstack((index,index2[0]))  
    else:
        index = range(0,x.shape[0])

    # initial conditions    
    if ('x0' in keywords):
        x0 = keywords['x0']
    else:
        #define default intial guess
        print("default initial guess used")        
        x0  = np.array([1.*10**(-15.75), 1.*10**(-17),1,0.00001]) # default values that work OK for superspec

    # log bin the data first or no    
    if ('log' in keywords):
        print("SED will be log binned before fitting")
        log = 1
        bins = np.logspace(np.log10(x[0]),np.log10(x[x.shape[0]-1]),100) #100 logspaced bins 
    else:
        log = 0

    if log == 1:
        binnedfreq_temp =  binned_statistic(x[index], x[index], bins=bins)[0]
        binnedvals_temp = binned_statistic(x[index], y[index], bins=bins)[0]
        binnedvals_std = binned_statistic(x[index], y[index], bins=bins, statistic = std_of_mean)[0]
        binnedfreq = binnedfreq_temp[~np.isnan(binnedfreq_temp)]
        binnedvals = binnedvals_temp[~np.isnan(binnedfreq_temp)]
        binnedstd = binnedvals_std[~np.isnan(binnedfreq_temp)]

    freqs = x[index]
    vals = y[index]

    if log ==0: #when fitting there are so many points at high frequencies compared to at low frequecies a fitting will almost ingnore the low frequency end
        # I get an extimate fo the noise by taking the standard deviation of each 10 consective points (will be some error for th last 10 points)
        std_pts = 100 # if this number is to low it seems to bias the fits to the lower side
        low_freq_index = np.where(freqs<sigma_increase_cutoff)
        temp = np.zeros((vals.shape[0],std_pts))
        # here I estimate the error by looking at the 100 surronding points and calculated the std      
        for i in range(0,std_pts):
            temp[:,i] = np.roll(vals,-i)
        sigma = np.std(temp,axis = 1)
        sigma[low_freq_index] = sigma[low_freq_index]/sigma_increase_factor # artificial pretend the noise at low frequcies is 5 time lower than every where else
        fit = optimization.curve_fit(noise_profile_lor, freqs, vals, x0 , sigma,bounds = bounds)
    else:
        sigma = binnedstd
        fit = optimization.curve_fit(noise_profile_lor, binnedfreq, binnedvals, x0 ,sigma,bounds = bounds)

    return fit
Esempio n. 39
0
     for lgal in lensedBackgroundGalaxies]) * u.arcsec
e1 = np.array([lgal.e1.to_value("") for lgal in lensedBackgroundGalaxies])
e2 = np.array([lgal.e2.to_value("") for lgal in lensedBackgroundGalaxies])
phi = np.array([lgal.phi.to_value(u.rad) for lgal in lensedBackgroundGalaxies])

# Calculate radial position theta and magnitude of ellipticity epsilon
theta = np.sqrt(theta_x**2 + theta_y**2)
epsilon = -e1 * np.cos(2 * phi) - e2 * np.sin(2 * phi)
# Create theta bins
bin_start = 10 * u.arcsec
bin_stop = viewSize / np.sqrt(2)
N_bins = 40
theta_bin_edges = np.linspace(bin_start, bin_stop, N_bins)
# Bin into annuli and calculate mean and standard deviation
epsilon_mean, bin_edges, binnumber = binned_statistic(theta,
                                                      epsilon,
                                                      statistic="mean",
                                                      bins=theta_bin_edges)
epsilon_sigma, bin_edges, binnumber = binned_statistic(theta,
                                                       epsilon,
                                                       statistic=np.std,
                                                       bins=theta_bin_edges)
# Calculate number of galaxies in each bin
N_in_bin, bin_edges = np.histogram(theta, bin_edges)
# Calculate centers of bins
theta_bin_centers = theta_bin_edges[:-1] + (theta_bin_edges[1] -
                                            theta_bin_edges[0]) / 2
# Save out bin centers, ellipticities, and uncertainties for analysis in Mathematica
np.savetxt(
    "data.csv",
    np.stack(
        [theta_bin_centers, epsilon_mean, epsilon_sigma / np.sqrt(N_in_bin)],
Esempio n. 40
0
def main():
    # assuming 'theFile' contains one name per line, read the file

    if getpass.getuser() == 'frenchd':
        #         pickleFilename = '/Users/frenchd/Research/inclination/git_inclination/pilot_paper_code/pilotData2.p'
        #         resultsFilename = '/Users/frenchd/inclination/git_inclination/LG_correlation_combined5_11_25cut_edit4.csv'
        #         saveDirectory = '/Users/frenchd/Research/inclination/git_inclination/pilot_paper_code/plots6/'
        #         WS09data = '/Users/frenchd/Research/inclination/git_inclination/WS2009_lya_data.tsv'

        #         pickleFilename = '/Users/frenchd/Research/inclination/git_inclination/rotation_paper/pickleSALT.p'
        #         saveDirectory = '/Users/frenchd/Research/inclination/git_inclination/rotation_paper/figures/'

        #         pickleFilename = '/Users/frenchd/Research/inclination/git_inclination/picklePilot_plusSALTcut.p'
        pickleFilename = '/Users/frenchd/Research/inclination/git_inclination/picklePilot_plusSALT_14.p'
        gtPickleFilename = '/Users/frenchd/Research/inclination/git_inclination/pickleGT.p'
        saveDirectory = '/Users/frenchd/Research/inclination/git_inclination/plotting_code/figs/'

    else:
        print 'Could not determine username. Exiting.'
        sys.exit()

    # use the old pickle file to get the full galaxy dataset info
    pickleFile = open(pickleFilename, 'rU')
    fullDict = pickle.load(pickleFile)
    pickleFile.close()

    # for the whole galaxy table:
    gtPickleFile = open(gtPickleFilename, 'rU')
    gtDict = pickle.load(gtPickleFile)
    gtPickleFile.close()

    # save each plot?
    save = False

    #     results = open(resultsFilename,'rU')
    #     reader = csv.DictReader(results)

    #     WS = open(WS09data,'rU')
    #     WSreader = csv.DictReader(WS,delimiter=';')

    virInclude = False
    cusInclude = False
    finalInclude = 1

    maxEnv = 100
    minL = 0.001
    maxLyaW = 1500

    # if match, then the includes in the file have to MATCH the includes above. e.g., if
    # virInclude = False, cusInclude = True, finalInclude = False, then only systems
    # matching those three would be included. Otherwise, all cusInclude = True would be included
    # regardless of the others
    match = False

    # all the lists to be used for associated lines
    raList = []
    decList = []
    lyaVList = []
    lyaWList = []
    lyaErrList = []
    naList = []
    bList = []
    impactList = []
    azList = []
    incList = []
    fancyIncList = []
    cosIncList = []
    cosFancyIncList = []
    paList = []
    vcorrList = []
    majList = []
    difList = []
    envList = []
    morphList = []
    m15List = []
    virList = []
    likeList = []
    likem15List = []

    AGNnameList = []
    nameList = []

    # for ambiguous lines (include = 0)
    lyaVAmbList = []
    lyaWAmbList = []
    envAmbList = []
    ambAGNnameList = []

    # for include = 2 lines
    lyaV_2List = []
    lyaW_2List = []
    env_2List = []
    vir_2List = []
    impact_2List = []
    like_2List = []

    # for include = 3 lines
    lyaV_3List = []
    lyaW_3List = []
    env_3List = []
    vir_3List = []
    impact_3List = []
    like_3List = []

    # for all lines with a galaxy within 500 kpc
    lyaV_nearestList = []
    lyaW_nearestList = []
    env_nearestList = []
    impact_nearestList = []
    diam_nearestList = []
    vir_nearestList = []
    cus_nearestList = []

    # WS lists
    #     WSvcorr = []
    #     WSdiam = []
    #     WSimpact =[]
    #     WSew = []
    #     WSvel = []
    #     WSlya = []
    #     WSvel_dif = []
    #     WSvir = []
    #     WSlike = []
    #
    #     l_min = 0.001
    #
    #     for w in WSreader:
    #         vcorr = w['HV']
    #         diam = w['Diam']
    #         rho = w['rho']
    #         ew = w['EWLya']
    #         vel = w['LyaVel']
    #         lya = w['Lya']
    #
    #         if lya == 'Lya  ' and isNumber(diam) and isNumber(ew) and isNumber(rho):
    #             if float(rho) <=500.0:
    #                 # this is a single galaxy association
    #                 vir = calculateVirialRadius(float(diam))
    #
    #                 vel_dif = float(vcorr) - float(vel)
    #
    #                 # try this "sphere of influence" value instead
    #                 m15 = float(diam)**1.5
    #
    #                 # first for the virial radius
    #                 likelihood = math.exp(-(float(rho)/vir)**2) * math.exp(-(vel_dif/200.)**2)
    #
    #                 if vir>= float(rho):
    #                     likelihood = likelihood*2
    #
    #                 # then for the second 'virial like' m15 radius
    #                 likelihoodm15 = math.exp(-(float(rho)/m15)**2) * math.exp(-(vel_dif/200.)**2)
    #
    #                 if m15>= float(rho):
    #                     likelihoodm15 = likelihoodm15*2
    #
    #                 if likelihood <= likelihoodm15:
    #                     likelihood = likelihoodm15
    #
    #                 WSlike.append(likelihood)
    #
    # #                 l_min=0
    #
    #                 if likelihood >= l_min:
    #
    #                     WSvcorr.append(float(vcorr))
    #                     WSdiam.append(float(diam))
    #                     WSvir.append(vir)
    #                     WSimpact.append(float(rho))
    #                     WSew.append(float(ew))
    #                     WSvel.append(float(vel))
    #                     WSlya.append(lya)
    #                     WSvel_dif.append(vel_dif)

    targetNameL = fullDict['targetName']
    galaxyNameL = fullDict['galaxyName']
    environmentL = fullDict['environment']
    RA_agnL = fullDict['RA_agn']
    Dec_agnL = fullDict['Dec_agn']
    RA_galL = fullDict['RA_gal']
    Dec_galL = fullDict['Dec_gal']
    likelihoodL = fullDict['likelihood']
    likelihood_cusL = fullDict['likelihood_cus']
    virialRadiusL = fullDict['virialRadius']
    cusL = fullDict['cus']
    impactParameterL = fullDict['impact']
    vcorrL = fullDict['vcorr']
    radialVelocityL = fullDict['radialVelocity']
    vel_diffL = fullDict['vel_diff']
    distGalaxyL = fullDict['distGalaxy']
    majorAxisL = fullDict['majorAxis']
    minorAxisL = fullDict['minorAxis']
    inclinationL = fullDict['inclination']
    positionAngleL = fullDict['PA']
    azimuthL = fullDict['azimuth']
    RC3flagL = fullDict['RC3flag']
    RC3typeL = fullDict['RC3type']
    RC3incL = fullDict['RC3inc']
    RC3paL = fullDict['RC3pa']
    final_morphologyL = fullDict['final_morphology']
    includeL = fullDict['include']
    include_virL = fullDict['include_vir']
    include_customL = fullDict['include_custom']
    Lya_vL = fullDict['Lya_v']
    vlimitsL = fullDict['vlimits']
    Lya_WL = fullDict['Lya_W']
    NaL = fullDict['Na']
    bL = fullDict['b']
    identifiedL = fullDict['identified']
    sourceL = fullDict['source']

    print 'initial len(Lya_vL): ', len(Lya_vL)
    #     print
    #     print 'type(includeL): ',type(includeL)
    #     print 'type(includeL[0]): ',type(includeL[0])
    includeL = [int(i) for i in includeL]
    Lya_WL = [int(i) for i in Lya_WL]
    Lya_vL = [int(i) for i in Lya_vL]
    #     impactParameterL = [float(i) for i in impactParameterL]
    #     virialRadiusL = [float(i) for i in virialRadiusL]

    i = -1
    for include, include_vir, include_cus in zip(includeL, include_virL,
                                                 include_customL):
        i += 1
        go = False
        if match:
            if virInclude == include_vir and cusInclude == include_cus:
                go = True
            else:
                go = False

        else:
            if virInclude and include_vir:
                go = True
            elif cusInclude and include_cus:
                go = True
            elif finalInclude and include:
                go = True
            else:
                go = False

        galaxyName = galaxyNameL[i]
        targetName = targetNameL[i]
        RA_agn = RA_agnL[i]
        Dec_agn = Dec_agnL[i]
        RA_gal = RA_galL[i]
        Dec_gal = Dec_galL[i]
        lyaV = Lya_vL[i]
        lyaW = Lya_WL[i]
        lyaW_err = lyaW * 0.1
        env = environmentL[i]
        impact = impactParameterL[i]
        galaxyDist = distGalaxyL[i]
        pa = positionAngleL[i]
        RC3pa = RC3paL[i]
        morph = final_morphologyL[i]
        vcorr = vcorrL[i]
        maj = majorAxisL[i]
        minor = minorAxisL[i]
        inc = inclinationL[i]
        az = azimuthL[i]
        b = bL[i]
        b_err = b * 0.1
        na = NaL[i]
        na_err = na * 0.1
        likelihood = likelihoodL[i]
        likelihoodm15 = likelihood_cusL[i]
        virialRadius = virialRadiusL[i]
        m15 = cusL[i]
        vel_diff = vel_diffL[i]
        source = sourceL[i]

        AGNnameList.append(targetName)

        # for ambiguous lines
        if include == 0:
            lyaVAmbList.append(float(lyaV))
            lyaWAmbList.append(float(lyaW))
            envAmbList.append(float(env))
            ambAGNnameList.append(targetName)

        print 'include = ', include
        if include == 2:
            print 'include2 = ', include
            # for include = 2 lines
            lyaV_2List.append(float(lyaV))
            lyaW_2List.append(float(lyaW))
            env_2List.append(float(env))
            vir_2List.append(float(virialRadius))
            impact_2List.append(float(impact))
            like_2List.append(float(likelihood))

        if include == 3:
            # for include = 3 lines
            lyaV_3List.append(float(lyaV))
            lyaW_3List.append(float(lyaW))
            env_3List.append(float(env))
            vir_3List.append(float(virialRadius))
            impact_3List.append(float(impact))
            like_3List.append(float(likelihood))

        # for all absorbers with a galaxy within 500kpc
        if isNumber(impact):
            lyaV_nearestList.append(float(lyaV))
            lyaW_nearestList.append(float(lyaW))
            env_nearestList.append(float(env))
            impact_nearestList.append(float(impact))
            diam_nearestList.append(float(maj))
            nameList.append(galaxyName)
            vir_nearestList.append(float(virialRadius))
            cus_nearestList.append(float(m15))

#         if go and source == 'salt':
#         if go and source == 'pilot':
        if go and env <= maxEnv:
            #         if go:
            if isNumber(RC3pa) and not isNumber(pa):
                pa = RC3pa

            if isNumber(inc):
                cosInc = cos(float(inc) * pi / 180.)

                if isNumber(maj) and isNumber(minor):
                    q0 = 0.2
                    fancyInc = calculateFancyInclination(maj, minor, q0)
                    cosFancyInc = cos(fancyInc * pi / 180)
                else:
                    fancyInc = -99
                    cosFancyInc = -99
            else:
                cosInc = -99
                inc = -99
                fancyInc = -99
                cosFancyInc = -99

            # all the lists to be used for associated lines
            if float(env) <= maxEnv and float(likelihood) >= minL and float(
                    lyaW) <= maxLyaW:
                raList.append(RA_gal)
                decList.append(Dec_gal)
                lyaVList.append(float(lyaV))
                lyaWList.append(float(lyaW))
                lyaErrList.append(float(lyaW_err))
                naList.append(na)
                bList.append(float(b))
                impactList.append(float(impact))
                azList.append(float(az))
                incList.append(float(inc))
                fancyIncList.append(fancyInc)
                cosIncList.append(cosInc)
                cosFancyIncList.append(cosFancyInc)
                paList.append(pa)
                vcorrList.append(vcorr)
                majList.append(maj)
                difList.append(float(vel_diff))
                envList.append(float(env))
                morphList.append(morph)
                m15List.append(m15)
                virList.append(virialRadius)
                likeList.append(likelihood)
                likem15List.append(likelihoodm15)
                nameList.append(galaxyName)

    # lists for the full galaxy dataset
    majorAxisL = gtDict['majorAxis']
    incL = gtDict['inc']
    adjustedIncL = gtDict['adjustedInc']
    paL = gtDict['PA']
    BmagL = gtDict['Bmag']
    Bmag_sdssL = gtDict['Bmag_sdss']
    RID_medianL = gtDict['RID_median']
    RID_meanL = gtDict['RID_mean']
    RID_stdL = gtDict['RID_std']
    VhelL = gtDict['Vhel']
    RAdegL = gtDict['RAdeg']
    DEdegL = gtDict['DEdeg']
    NameL = gtDict['Name']

    allPA = paL
    allInclinations = []
    allCosInclinations = []

    #     print 'type: ',type(incL)
    for i in incL:
        if i != -99:
            i = float(i)
            allInclinations.append(i)

            i2 = pi / 180. * i
            cosi2 = cos(i)
            allCosInclinations.append(cosi2)

    allFancyInclinations = []
    allCosFancyCosInclinations = []
    for i in adjustedIncL:
        if i != -99:
            i = float(i)

            allFancyInclinations.append(i)

            i2 = pi / 180. * i
            cosi2 = cos(i)
            allCosFancyCosInclinations.append(cosi2)

    allDiameter = majorAxisL

    print 'finished with this shit'
    print 'len(lyaV_2List): ', len(lyaV_2List)

    total = 0
    totalNo = 0
    totalYes = 0
    totalIsolated = 0
    totalGroup = 0

    ##########################################################################################
    ##########################################################################################
    # plot doppler parameter as a function of impact / R_vir
    #

    plotB_vir = False
    save = False

    if plotB_vir:
        fig = figure()
        ax = fig.add_subplot(111)
        countb = 0
        countr = 0
        count = -1

        bSymbol = 'D'
        rSymbol = 'o'
        alpha = 0.7

        labelr = 'Red Shifted Absorber'
        labelb = "Blue Shifted Absorber"
        for d, v, b, i in zip(difList, virList, bList, impactList):
            if isNumber(d) and isNumber(v) and isNumber(b) and isNumber(i):
                xVal = float(i) / float(v)
                yVal = float(b)
                if v != -99:
                    count += 1
                    if d > 0:
                        # galaxy is behind absorber, so gas is blue shifted
                        color = 'Blue'
                        symbol = bSymbol
                        if countb == 0:
                            countb += 1
                            plotb = ax.scatter(xVal,
                                               yVal,
                                               c='Blue',
                                               s=50,
                                               label=labelb,
                                               marker=symbol,
                                               alpha=alpha)
                    if d < 0:
                        # gas is red shifted compared to galaxy
                        color = 'Red'
                        symbol = rSymbol
                        if countr == 0:
                            countr += 1
                            plotr = ax.scatter(xVal,
                                               yVal,
                                               c='Red',
                                               s=50,
                                               label=labelr,
                                               marker=symbol,
                                               alpha=alpha)

                    plot1 = scatter(xVal,
                                    yVal,
                                    c=color,
                                    s=50,
                                    marker=symbol,
                                    alpha=alpha)

        # x-axis
        majorLocator = MultipleLocator(0.5)
        majorFormatter = FormatStrFormatter(r'$\rm %0.1f$')
        minorLocator = MultipleLocator(0.25)
        ax.xaxis.set_major_locator(majorLocator)
        ax.xaxis.set_major_formatter(majorFormatter)
        ax.xaxis.set_minor_locator(minorLocator)

        # y-axis
        majorLocator = MultipleLocator(10)
        majorFormatter = FormatStrFormatter(r'$\rm %d$')
        minorLocator = MultipleLocator(5)
        ax.yaxis.set_major_locator(majorLocator)
        ax.yaxis.set_major_formatter(majorFormatter)
        ax.yaxis.set_minor_locator(minorLocator)

        xlabel(r'$\rm \rho / R_{vir}$')
        ylabel(r'$\rm Doppler ~b ~Parameter ~[km/s]$')
        ax.grid(b=None, which='major', axis='both')
        ylim(min(bList) - 5, max(bList) + 5)
        xlim(0, 2.0)
        ax.legend(scatterpoints=1, prop={'size': 14}, loc=2)

        if save:
            savefig('{0}/B(vir).pdf'.format(saveDirectory), format='pdf')
        else:
            show()

##########################################################################################
##########################################################################################
# plot equivalent width as a function of virial radius for red vs blue
# shifted absorption, include average histograms
#

    plotW_vir_avg = False
    save = False

    if plotW_vir_avg:
        fig = figure(figsize=(7.7, 5.7))
        ax = fig.add_subplot(111)

        countb = 0
        countr = 0
        count = -1
        alpha = 0.7
        binSize = 50
        markerSize = 60

        bSymbol = 'D'
        rSymbol = 'o'
        labelr = 'Redshifted Absorber'
        labelb = "Blueshifted Absorber"

        placeArrayr = zeros(7)
        placeCountr = zeros(7)
        placeArrayb = zeros(7)
        placeCountb = zeros(7)
        redW = []
        blueW = []
        redVir = []
        blueVir = []

        virList = [int(round(float(v), 0)) for v in virList]

        for d, v, w, m in zip(difList, virList, lyaWList, majList):
            # check if all the values are good
            if isNumber(d) and isNumber(v) and isNumber(w) and isNumber(m):
                if d != -99 and v != -99 and w != -99 and m != -99:
                    w = float(w)
                    m = float(m)
                    d = float(d)
                    if d > 0:
                        # galaxy is behind absorber, so gas is blue shifted
                        color = 'Blue'
                        symbol = bSymbol

                        blueW.append(w)
                        blueVir.append(v)

                        # which bin does it belong too?
                        place = v / binSize
                        print 'place: ', place
                        placeArrayb[place] += float(w)
                        print 'placeArrayb: ', placeArrayb
                        placeCountb[place] += 1.
                        print 'placecountb: ', placeCountb

                        if countb == 0:
                            countb += 1
                            plotb = ax.scatter(v,w,marker=symbol,alpha=alpha,c='Blue',\
                            s=markerSize)

                    if d < 0:
                        # gas is red shifted compared to galaxy
                        color = 'Red'
                        symbol = rSymbol

                        redW.append(w)
                        redVir.append(v)

                        # which bin does it belong too?
                        place = v / binSize
                        placeArrayr[place] += float(w)
                        placeCountr[place] += 1.

                        if countr == 0:
                            countr += 1
                            plotr = ax.scatter(v,w,marker=symbol,alpha=alpha,c='Red',\
                            s=markerSize)

                    plot1 = scatter(v,
                                    w,
                                    marker=symbol,
                                    alpha=alpha,
                                    c=color,
                                    s=markerSize)

        rHist = placeArrayr / placeCountr
        print 'rHist: ', rHist
        bHist = placeArrayb / placeCountb
        print 'bHist: ', bHist

        totalrHist = []
        totalrVir = []
        totalbHist = []
        totalbVir = []

        for r, v in zip(rHist, arange(0, max(virList), binSize)):
            if not isNumber(r):
                r = 0

            totalrHist.append(r)
            totalrHist.append(r)

            totalrVir.append(v)
            totalrVir.append(v + binSize)

        for b, v in zip(bHist, arange(0, max(virList), binSize)):
            if not isNumber(b):
                b = 0
            totalbHist.append(b)
            totalbHist.append(b)

            totalbVir.append(v)
            totalbVir.append(v + binSize)

        print 'totalrVir: ', totalrVir
        print 'totalrHist: ', totalrHist
        print
        print 'totalbVir: ', totalbVir
        print 'totalbHist: ', totalbHist
        print

        # x-axis
        majorLocator = MultipleLocator(50)
        majorFormatter = FormatStrFormatter(r'$\rm %d$')
        minorLocator = MultipleLocator(25)
        ax.xaxis.set_major_locator(majorLocator)
        ax.xaxis.set_major_formatter(majorFormatter)
        ax.xaxis.set_minor_locator(minorLocator)

        # y-axis
        majorLocator = MultipleLocator(200)
        majorFormatter = FormatStrFormatter(r'$\rm %d$')
        minorLocator = MultipleLocator(100)
        ax.yaxis.set_major_locator(majorLocator)
        ax.yaxis.set_major_formatter(majorFormatter)
        ax.yaxis.set_minor_locator(minorLocator)

        # bins are in Rvir
        bins = arange(150, 400, binSize)

        #         bin_means,edges,binNumber = stats.binned_statistic(array(redVir), array(redW), statistic='mean', bins=bins)
        #         left,right = edges[:-1],edges[1:]
        #         X = array([left,right]).T.flatten()
        #         Y = array([bin_means,bin_means]).T.flatten()
        #         plot(X,Y, c='red',ls='dotted',lw=2.5,alpha=alpha,label=r'\rm $Mean ~Redshifted ~EW$')
        #
        #         bin_means,edges,binNumber = stats.binned_statistic(array(blueVir), array(blueW), statistic='mean', bins=bins)
        #         left,right = edges[:-1],edges[1:]
        #         X = array([left,right]).T.flatten()
        #         Y = array([bin_means,bin_means]).T.flatten()
        #         plot(X,Y, c='blue',ls='dashed',lw=1.5,alpha=alpha,label=r'\rm $Mean ~Blueshifted ~EW$')

        plot2 = ax.plot(totalrVir,
                        totalrHist,
                        c='Red',
                        lw=2.5,
                        ls='dotted',
                        label=r'$\rm Mean ~ Redshifted ~ EW$')
        plot3 = ax.plot(totalbVir,
                        totalbHist,
                        c='Blue',
                        lw=1.5,
                        ls='dashed',
                        label=r'$\rm Mean ~ Blueshifted ~ EW$')

        xlabel(r'$\rm R_{vir} ~ [kpc]$')
        ylabel(r'$\rm Equivalent ~ Width ~ [m\AA]$')
        ax.legend(scatterpoints=1, prop={'size': 15}, loc=2, fancybox=True)
        ax.grid(b=None, which='major', axis='both')
        ylim(0, 1200)
        xlim(150, 350)

        if save:
            savefig('{0}/W(vir)_avgHistograms2_maxEnv{1}.pdf'.format(
                saveDirectory, maxEnv),
                    format='pdf',
                    bbox_inches='tight')
        else:
            show()

##########################################################################################
##########################################################################################
# plot equivalent width as a function of impact parameter/R_vir, split between
# red and blue shifted absorption, overplot median histograms for red and blue
#
# plot shaded error regions around histogram
#

    plotW_impact_vir_hist_errors = False
    save = False

    if plotW_impact_vir_hist_errors:
        fig = figure(figsize=(7.7, 5.7))
        ax = fig.add_subplot(111)

        countb = 0
        countr = 0
        count = -1

        alpha = 0.7
        alphaInside = 0.7
        markerSize = 60
        errorAlpha = 0.15

        plotErrors = False
        plotCombinedMedian = True

        binSize = 50
        bins = arange(150, 400, binSize)

        labelr = r'$\rm Redshifted ~Absorber$'
        labelb = r'$\rm Blueshifted ~Absorber$'
        bSymbol = 'D'
        rSymbol = 'o'

        xVals = []
        yVals = []
        redX = []
        redY = []
        blueX = []
        blueY = []

        for d, i, w, v in zip(difList, impactList, lyaWList, virList):
            # check if all the values are good
            if isNumber(d) and isNumber(i) and isNumber(w) and isNumber(v):
                if d != -99 and i != -99 and w != -99 and v != -99:
                    xVal = float(v)
                    yVal = float(w)

                    xVals.append(xVal)
                    yVals.append(yVal)

                    if d > 0:
                        # galaxy is behind absorber, so gas is blue shifted
                        color = 'Blue'
                        symbol = bSymbol

                        if float(i) > float(v):
                            # impact parameter > virial radius
                            a = alpha
                            fc = color
                            ec = 'black'

                        if float(i) <= float(v):
                            # impact parameter <= virial radius
                            a = alphaInside
                            fc = 'none'
                            ec = color

                        blueX.append(xVal)
                        blueY.append(yVal)

                        if countb == 0:
                            countb += 1
                            plotb = ax.scatter(xVal,yVal,marker=symbol,c='Blue',\
                            facecolor=fc,edgecolor=ec,s=markerSize,alpha=a)

                    if d < 0:
                        # gas is red shifted compared to galaxy
                        color = 'Red'
                        symbol = rSymbol

                        if float(i) > float(v):
                            # impact parameter > virial radius
                            a = alpha
                            fc = color
                            ec = 'black'

                        if float(i) <= float(v):
                            # impact parameter <= virial radius
                            a = alphaInside
                            fc = 'none'
                            ec = color

                        redX.append(xVal)
                        redY.append(yVal)

                        if countr == 0:
                            countr += 1
                            plotr = ax.scatter(xVal,yVal,marker=symbol,c='Red',\
                            s=markerSize,facecolor=fc,edgecolor=ec,alpha=a)

                    plot1 = scatter(xVal,yVal,marker=symbol,c=color,s=markerSize,\
                    facecolor=fc,edgecolor=ec,alpha=a)

        if plotCombinedMedian:
            bin_means,edges,binNumber = stats.binned_statistic(array(xVals), array(yVals), \
            statistic='mean', bins=bins)

            bin_errors,edges_e,binNumber_e = stats.binned_statistic(array(xVals), array(yVals), \
            statistic=lambda y: errors(y), bins=bins)

            bin_std,edges_std,binNumber_std = stats.binned_statistic(array(xVals), array(yVals), \
            statistic=lambda y: std(y), bins=bins)

            print 'bin_means,edges,binNumber : ', bin_means, edges, binNumber
            print
            print 'bin_errors, edges_e,binNumber_e : ', bin_errors, edges_e, binNumber_e
            print
            print 'bin_std,edges_std,binNumber_std : ', bin_std, edges_std, binNumber_std

            # the mean
            left, right = edges[:-1], edges[1:]
            X = array([left, right]).T.flatten()
            Y = array([nan_to_num(bin_means),
                       nan_to_num(bin_means)]).T.flatten()

            # the errors
            left_e, right_e = edges_e[:-1], edges_e[1:]
            X_e = array([left_e, right_e]).T.flatten()
            Y_e = array([nan_to_num(bin_errors),
                         nan_to_num(bin_errors)]).T.flatten()

            yErrorsTop = Y + Y_e
            yErrorsBot = Y - Y_e

            if plotErrors:
                plot(X_e,
                     yErrorsBot,
                     ls='solid',
                     color='grey',
                     lw=1,
                     alpha=errorAlpha)
                plot(X_e,
                     yErrorsTop,
                     ls='solid',
                     color='grey',
                     lw=1,
                     alpha=errorAlpha)
                fill_between(X_e,
                             yErrorsBot,
                             yErrorsTop,
                             facecolor='grey',
                             interpolate=True,
                             alpha=errorAlpha)

            plot(X,
                 Y,
                 ls='dotted',
                 color='black',
                 lw=2.1,
                 alpha=alpha + 0.2,
                 label=r'$\rm Mean ~EW$')

        else:
            # avg red
            bin_means,edges,binNumber = stats.binned_statistic(array(redX), array(redY), \
            statistic='mean', bins=bins)

            bin_errors,edges_e,binNumber_e = stats.binned_statistic(array(redX), array(redY), \
            statistic=lambda y: errors(y), bins=bins)

            bin_std,edges_std,binNumber_std = stats.binned_statistic(array(redX), array(redY), \
            statistic=lambda y: std(y), bins=bins)

            print 'bin_means,edges,binNumber : ', bin_means, edges, binNumber
            print
            print 'bin_errors, edges_e,binNumber_e : ', bin_errors, edges_e, binNumber_e
            print
            print 'bin_std,edges_std,binNumber_std : ', bin_std, edges_std, binNumber_std

            # the mean
            left, right = edges[:-1], edges[1:]
            X = array([left, right]).T.flatten()
            Y = array([nan_to_num(bin_means),
                       nan_to_num(bin_means)]).T.flatten()

            # the errors
            left_e, right_e = edges_e[:-1], edges_e[1:]
            X_e = array([left_e, right_e]).T.flatten()
            Y_e = array([nan_to_num(bin_errors),
                         nan_to_num(bin_errors)]).T.flatten()

            yErrorsTop = Y + Y_e
            yErrorsBot = Y - Y_e

            if plotErrors:
                plot(X_e,
                     yErrorsBot,
                     ls='solid',
                     color='red',
                     lw=1,
                     alpha=errorAlpha)
                plot(X_e,
                     yErrorsTop,
                     ls='solid',
                     color='red',
                     lw=1,
                     alpha=errorAlpha)
                fill_between(X_e,
                             yErrorsBot,
                             yErrorsTop,
                             facecolor='red',
                             interpolate=True,
                             alpha=errorAlpha)

            plot(X,
                 Y,
                 ls='dotted',
                 color='red',
                 lw=2.1,
                 alpha=alpha + 0.2,
                 label=r'$\rm Mean~ Redshifted ~EW$')

            # avg blue
            bin_means,edges,binNumber = stats.binned_statistic(array(blueX), array(blueY), \
            statistic='mean', bins=bins)

            bin_errors,edges_e,binNumber_e = stats.binned_statistic(array(blueX), array(blueY), \
            statistic=lambda y: errors(y), bins=bins)

            bin_std,edges_std,binNumber_std = stats.binned_statistic(array(blueX), array(blueY), \
            statistic=lambda y: std(y), bins=bins)

            print
            print 'bin_means,edges,binNumber : ', bin_means, edges, binNumber
            print
            print 'bin_errors, edges_e,binNumber_e : ', bin_errors, edges_e, binNumber_e
            print
            print 'bin_std,edges_std,binNumber_std : ', bin_std, edges_std, binNumber_std

            # the mean
            left, right = edges[:-1], edges[1:]
            X = array([left, right]).T.flatten()
            Y = array([nan_to_num(bin_means),
                       nan_to_num(bin_means)]).T.flatten()

            # the errors
            left_e, right_e = edges_e[:-1], edges_e[1:]
            X_e = array([left_e, right_e]).T.flatten()
            Y_e = array([nan_to_num(bin_errors),
                         nan_to_num(bin_errors)]).T.flatten()

            yErrorsTop = Y + Y_e
            yErrorsBot = Y - Y_e

            if plotErrors:
                plot(X_e,
                     yErrorsBot,
                     ls='solid',
                     color='blue',
                     lw=1,
                     alpha=errorAlpha)
                plot(X_e,
                     yErrorsTop,
                     ls='solid',
                     color='blue',
                     lw=1,
                     alpha=errorAlpha)
                fill_between(X_e,
                             yErrorsBot,
                             yErrorsTop,
                             facecolor='blue',
                             interpolate=True,
                             alpha=errorAlpha)

            plot(X,
                 Y,
                 ls='dashed',
                 color='blue',
                 lw=1.7,
                 alpha=alpha + 0.1,
                 label=r'$\rm Mean~ Blueshifted ~EW$')

        # x-axis
        majorLocator = MultipleLocator(50)
        majorFormatter = FormatStrFormatter(r'$\rm %d$')
        minorLocator = MultipleLocator(25)
        ax.xaxis.set_major_locator(majorLocator)
        ax.xaxis.set_major_formatter(majorFormatter)
        ax.xaxis.set_minor_locator(minorLocator)

        # y-axis
        majorLocator = MultipleLocator(200)
        majorFormatter = FormatStrFormatter(r'$\rm %d$')
        minorLocator = MultipleLocator(100)
        ax.yaxis.set_major_locator(majorLocator)
        ax.yaxis.set_major_formatter(majorFormatter)
        ax.yaxis.set_minor_locator(minorLocator)

        xlabel(r'$\rm R_{vir}$')
        ylabel(r'$\rm Equivalent ~ Width ~ [m\AA]$')
        leg = ax.legend(scatterpoints=1,
                        prop={'size': 14},
                        loc=1,
                        fancybox=True)
        #         leg.get_frame().set_alpha(0.5)

        ax.grid(b=None, which='major', axis='both')
        ylim(0, 1300)
        #         xlim(150,350)

        if save:
            savefig('{0}/W(vir)_median_{1}_virsep_maxEnv{2}.pdf'.format(
                saveDirectory, binSize, maxEnv),
                    format='pdf',
                    bbox_inches='tight')
        else:
            show()

##########################################################################################
##########################################################################################
# plot equivalent width as a function of Diameter, split between
# red and blue shifted absorption, overplot median histograms for red and blue
#
# plot shaded error regions around histogram
#

    plotW_impact_diam_hist_errors = True
    save = True

    if plotW_impact_diam_hist_errors:
        fig = figure(figsize=(7.7, 5.7))
        ax = fig.add_subplot(111)

        countb = 0
        countr = 0
        count = -1

        alpha = 0.7
        alphaInside = 0.7
        markerSize = 60
        errorAlpha = 0.15

        plotErrors = False
        plotCombinedMedian = True

        #         binSize = 50
        #         bins = arange(150,400,binSize)
        binSize = 0.1
        bins = arange(0, 1, binSize)

        labelr = r'$\rm Redshifted ~Absorber$'
        labelb = r'$\rm Blueshifted ~Absorber$'
        bSymbol = 'D'
        rSymbol = 'o'

        xVals = []
        yVals = []
        redX = []
        redY = []
        blueX = []
        blueY = []

        for d, i, w, v, m in zip(difList, impactList, lyaWList, virList,
                                 majList):
            # check if all the values are good
            if isNumber(d) and isNumber(i) and isNumber(w) and isNumber(
                    v) and isNumber(m):
                if d != -99 and i != -99 and w != -99 and v != -99 and m != -99:
                    xVal = float(m) / float(i)
                    yVal = float(w)

                    xVals.append(xVal)
                    yVals.append(yVal)

                    if d > 0:
                        # galaxy is behind absorber, so gas is blue shifted
                        color = 'Blue'
                        symbol = bSymbol

                        if float(i) > float(v):
                            # impact parameter > virial radius
                            a = alpha
                            fc = color
                            ec = 'black'

                        if float(i) <= float(v):
                            # impact parameter <= virial radius
                            a = alphaInside
                            fc = 'none'
                            ec = color

                        blueX.append(xVal)
                        blueY.append(yVal)

                        if countb == 0:
                            countb += 1
                            plotb = ax.scatter(xVal,yVal,marker=symbol,c='Blue',\
                            facecolor=fc,edgecolor=ec,s=markerSize,alpha=a)

                    if d < 0:
                        # gas is red shifted compared to galaxy
                        color = 'Red'
                        symbol = rSymbol

                        if float(i) > float(v):
                            # impact parameter > virial radius
                            a = alpha
                            fc = color
                            ec = 'black'

                        if float(i) <= float(v):
                            # impact parameter <= virial radius
                            a = alphaInside
                            fc = 'none'
                            ec = color

                        redX.append(xVal)
                        redY.append(yVal)

                        if countr == 0:
                            countr += 1
                            plotr = ax.scatter(xVal,yVal,marker=symbol,c='Red',\
                            s=markerSize,facecolor=fc,edgecolor=ec,alpha=a)

                    plot1 = scatter(xVal,yVal,marker=symbol,c=color,s=markerSize,\
                    facecolor=fc,edgecolor=ec,alpha=a)

        if plotCombinedMedian:
            bin_means,edges,binNumber = stats.binned_statistic(array(xVals), array(yVals), \
            statistic='mean', bins=bins)

            bin_errors,edges_e,binNumber_e = stats.binned_statistic(array(xVals), array(yVals), \
            statistic=lambda y: errors(y), bins=bins)

            bin_std,edges_std,binNumber_std = stats.binned_statistic(array(xVals), array(yVals), \
            statistic=lambda y: std(y), bins=bins)

            print 'bin_means,edges,binNumber : ', bin_means, edges, binNumber
            print
            print 'bin_errors, edges_e,binNumber_e : ', bin_errors, edges_e, binNumber_e
            print
            print 'bin_std,edges_std,binNumber_std : ', bin_std, edges_std, binNumber_std

            # the mean
            left, right = edges[:-1], edges[1:]
            X = array([left, right]).T.flatten()
            Y = array([nan_to_num(bin_means),
                       nan_to_num(bin_means)]).T.flatten()

            # the errors
            left_e, right_e = edges_e[:-1], edges_e[1:]
            X_e = array([left_e, right_e]).T.flatten()
            Y_e = array([nan_to_num(bin_errors),
                         nan_to_num(bin_errors)]).T.flatten()

            yErrorsTop = Y + Y_e
            yErrorsBot = Y - Y_e

            if plotErrors:
                plot(X_e,
                     yErrorsBot,
                     ls='solid',
                     color='grey',
                     lw=1,
                     alpha=errorAlpha)
                plot(X_e,
                     yErrorsTop,
                     ls='solid',
                     color='grey',
                     lw=1,
                     alpha=errorAlpha)
                fill_between(X_e,
                             yErrorsBot,
                             yErrorsTop,
                             facecolor='grey',
                             interpolate=True,
                             alpha=errorAlpha)

            plot(X,
                 Y,
                 ls='dotted',
                 color='black',
                 lw=2.1,
                 alpha=alpha + 0.2,
                 label=r'$\rm Mean ~EW$')

        else:
            # avg red
            bin_means,edges,binNumber = stats.binned_statistic(array(redX), array(redY), \
            statistic='mean', bins=bins)

            bin_errors,edges_e,binNumber_e = stats.binned_statistic(array(redX), array(redY), \
            statistic=lambda y: errors(y), bins=bins)

            bin_std,edges_std,binNumber_std = stats.binned_statistic(array(redX), array(redY), \
            statistic=lambda y: std(y), bins=bins)

            print 'bin_means,edges,binNumber : ', bin_means, edges, binNumber
            print
            print 'bin_errors, edges_e,binNumber_e : ', bin_errors, edges_e, binNumber_e
            print
            print 'bin_std,edges_std,binNumber_std : ', bin_std, edges_std, binNumber_std

            # the mean
            left, right = edges[:-1], edges[1:]
            X = array([left, right]).T.flatten()
            Y = array([nan_to_num(bin_means),
                       nan_to_num(bin_means)]).T.flatten()

            # the errors
            left_e, right_e = edges_e[:-1], edges_e[1:]
            X_e = array([left_e, right_e]).T.flatten()
            Y_e = array([nan_to_num(bin_errors),
                         nan_to_num(bin_errors)]).T.flatten()

            yErrorsTop = Y + Y_e
            yErrorsBot = Y - Y_e

            if plotErrors:
                plot(X_e,
                     yErrorsBot,
                     ls='solid',
                     color='red',
                     lw=1,
                     alpha=errorAlpha)
                plot(X_e,
                     yErrorsTop,
                     ls='solid',
                     color='red',
                     lw=1,
                     alpha=errorAlpha)
                fill_between(X_e,
                             yErrorsBot,
                             yErrorsTop,
                             facecolor='red',
                             interpolate=True,
                             alpha=errorAlpha)

            plot(X,
                 Y,
                 ls='dotted',
                 color='red',
                 lw=2.1,
                 alpha=alpha + 0.2,
                 label=r'$\rm Mean~ Redshifted ~EW$')

            # avg blue
            bin_means,edges,binNumber = stats.binned_statistic(array(blueX), array(blueY), \
            statistic='mean', bins=bins)

            bin_errors,edges_e,binNumber_e = stats.binned_statistic(array(blueX), array(blueY), \
            statistic=lambda y: errors(y), bins=bins)

            bin_std,edges_std,binNumber_std = stats.binned_statistic(array(blueX), array(blueY), \
            statistic=lambda y: std(y), bins=bins)

            print
            print 'bin_means,edges,binNumber : ', bin_means, edges, binNumber
            print
            print 'bin_errors, edges_e,binNumber_e : ', bin_errors, edges_e, binNumber_e
            print
            print 'bin_std,edges_std,binNumber_std : ', bin_std, edges_std, binNumber_std

            # the mean
            left, right = edges[:-1], edges[1:]
            X = array([left, right]).T.flatten()
            Y = array([nan_to_num(bin_means),
                       nan_to_num(bin_means)]).T.flatten()

            # the errors
            left_e, right_e = edges_e[:-1], edges_e[1:]
            X_e = array([left_e, right_e]).T.flatten()
            Y_e = array([nan_to_num(bin_errors),
                         nan_to_num(bin_errors)]).T.flatten()

            yErrorsTop = Y + Y_e
            yErrorsBot = Y - Y_e

            if plotErrors:
                plot(X_e,
                     yErrorsBot,
                     ls='solid',
                     color='blue',
                     lw=1,
                     alpha=errorAlpha)
                plot(X_e,
                     yErrorsTop,
                     ls='solid',
                     color='blue',
                     lw=1,
                     alpha=errorAlpha)
                fill_between(X_e,
                             yErrorsBot,
                             yErrorsTop,
                             facecolor='blue',
                             interpolate=True,
                             alpha=errorAlpha)

            plot(X,
                 Y,
                 ls='dashed',
                 color='blue',
                 lw=1.7,
                 alpha=alpha + 0.1,
                 label=r'$\rm Mean~ Blueshifted ~EW$')

        # x-axis
        majorLocator = MultipleLocator(0.2)
        majorFormatter = FormatStrFormatter(r'$\rm %0.1f$')
        minorLocator = MultipleLocator(0.1)
        ax.xaxis.set_major_locator(majorLocator)
        ax.xaxis.set_major_formatter(majorFormatter)
        ax.xaxis.set_minor_locator(minorLocator)

        # y-axis
        majorLocator = MultipleLocator(200)
        majorFormatter = FormatStrFormatter(r'$\rm %d$')
        minorLocator = MultipleLocator(100)
        ax.yaxis.set_major_locator(majorLocator)
        ax.yaxis.set_major_formatter(majorFormatter)
        ax.yaxis.set_minor_locator(minorLocator)

        xlabel(r'$\rm Diameter / \rho ~ [kpc]$')
        ylabel(r'$\rm Equivalent ~ Width ~ [m\AA]$')
        leg = ax.legend(scatterpoints=1,
                        prop={'size': 14},
                        loc=1,
                        fancybox=True)
        #         leg.get_frame().set_alpha(0.5)

        ax.grid(b=None, which='major', axis='both')
        ylim(0, 1300)
        xlim(0, 0.6)

        if save:
            savefig('{0}/W(maj_impact)_median_{1}_virsep_maxEnv{2}.pdf'.format(
                saveDirectory, binSize, maxEnv),
                    format='pdf',
                    bbox_inches='tight')
        else:
            show()
Esempio n. 41
0
def whole(eventfile,par_list,tbin_size,mode,ps_type,oversampling,xlims,vlines):
    """
    Plot the entire power spectrum without any cuts to the data.

    eventfile - path to the event file. Will extract ObsID from this for the NICER files.
    par_list - A list of parameters we'd like to extract from the FITS file
    (e.g., from eventcl, PI_FAST, TIME, PI,)
    tbin_size - the size of the time bins (in seconds!)
    >> e.g., tbin_size = 2 means bin by 2s
    >> e.g., tbin_size = 0.05 means bin by 0.05s!
    mode - whether we want to show or save the plot.
    ps_type - obtain power spectrum through the periodogram method ('period') or
    the manual FFT way ('manual') or both ('both')
    oversampling - whether to perform oversampling. Array will consist of
    [True/False, oversampling factor]
    xlims - a list or array: first entry = True/False as to whether to impose an
    xlim; second and third entry correspond to the desired x-limits of the plot
    vlines - a list or array: first entry = True/False as to whether to draw
    a vertical line in the plot; second entry is the equation for the vertical line
    """
    if type(eventfile) != str:
        raise TypeError("eventfile should be a string!")
    if 'TIME' not in par_list:
        raise ValueError("You should have 'TIME' in the parameter list!")
    if type(par_list) != list and type(par_list) != np.ndarray:
        raise TypeError("par_list should either be a list or an array!")
    if mode != 'show' and mode != 'save':
        raise ValueError("Mode should either be 'show' or 'save'!")
    if ps_type != 'period' and ps_type != 'manual' and ps_type != 'both':
        raise ValueError("ps_type should either be 'period' or 'show' or 'save'!")
    if type(oversampling) != list and type(oversampling) != np.ndarray:
        raise TypeError("oversampling should either be a list or an array!")
    if type(xlims) != list and type(xlims) != np.ndarray:
        raise TypeError("xlims should either be a list or an array!")
    if type(vlines) != list and type(vlines) != np.ndarray:
        raise TypeError("vlines should either be a list or an array!")

    parent_folder = str(pathlib.Path(eventfile).parent)

    data_dict = Lv0_fits2dict.fits2dict(eventfile,1,par_list)
    times = data_dict['TIME']
    counts = np.ones(len(times))

    shifted_t = times-times[0]
    t_bins = np.linspace(0,np.ceil(shifted_t[-1]),int(np.ceil(shifted_t[-1])*1/tbin_size+1))
    summed_data, bin_edges, binnumber = stats.binned_statistic(shifted_t,counts,statistic='sum',bins=t_bins) #binning the time values in the data

    event_header = fits.open(eventfile)[1].header
    obj_name = event_header['OBJECT']
    obsid = event_header['OBS_ID']

    if ps_type == 'period':
        plt.figure()
        pdgm_f,pdgm_ps = Lv2_ps_method.pdgm(t_bins,summed_data,xlims,vlines,True,oversampling)
        plt.title('Power spectrum for ' + obj_name + ', ObsID: ' + str(obsid) + '\n Periodogram method' + '\n Includes whole time interval and energy range',fontsize=12)
        if mode == 'show':
            plt.show()
        elif mode == 'save':
            filename = 'ps_' + obsid + '_bin' + str(tbin_size) + 's_pdgm.pdf'
            plt.savefig(parent_folder+'/'+filename,dpi=900)
            plt.close()

        return pdgm_f, pdgm_ps

    if ps_type == 'manual':
        plt.figure()
        manual_f,manual_ps = Lv2_ps_method.manual(t_bins,summed_data,xlims,vlines,True,oversampling)
        plt.title('Power spectrum for ' + obj_name + ', ObsID ' + str(obsid) + '\n Manual FFT method' + '\n Includes whole time interval and energy range',fontsize=12)
        if mode == 'show':
            plt.show()
        elif mode == 'save':
            filename = 'ps_' + obsid + '_bin' + str(tbin_size) + 's_manual.pdf'
            plt.savefig(parent_folder+'/'+filename,dpi=900)
            plt.close()

        return manual_f, manual_ps

    if ps_type == 'both':
        pdgm_f,pdgm_ps = Lv2_ps_method.pdgm(t_bins,summed_data,xlims,vlines,False,oversampling)
        manual_f,manual_ps = Lv2_ps_method.manual(t_bins,summed_data,xlims,vlines,False,oversampling)
        fig, (ax1,ax2) = plt.subplots(2,1)
        fig.suptitle('Power spectra for ' + obj_name + ', ObsID ' + str(obsid) + '\n both periodogram and manual FFT method' + '\n Includes whole time interval and energy range' , fontsize=12)

        ax1.semilogy(pdgm_f,pdgm_ps,'b-')#/np.mean(pdgm_ps),'b-') #periodogram; arrays already truncated!
        ax1.set_xlabel('Hz',fontsize=12)
        ax1.set_ylabel('Normalized power spectrum',fontsize=10)

        ax2.semilogy(manual_f,manual_ps,'r-')#/np.mean(manual_ps),'r-') #manual FFT; arrays already truncated!
        ax2.set_xlabel('Hz',fontsize=12)
        ax2.set_ylabel('Normalized power spectrum',fontsize=10)

        if xlims[0] == True:
            ax1.set_xlim([xlims[1],xlims[2]])
            ax2.set_xlim([xlims[1],xlims[2]])
        if vlines[0] == True:
            ax1.axvline(x=vlines[1],color='k',alpha=0.5,lw=0.5)
            ax2.axvline(x=vlines[1],color='k',alpha=0.5,lw=0.5)
            ax2.axhline(y=2,color='k',alpha=0.3,lw=0.3)

        plt.subplots_adjust(hspace=0.2)

        if mode == 'show':
            plt.show()
        elif mode == 'save':
            filename = 'ps_' + obsid + '_bin' + str(tbin_size) + 's_both.pdf'
            plt.savefig(parent_folder+'/'+filename,dpi=900)
            plt.close()

        return pdgm_f, pdgm_ps, manual_f, manual_ps
Esempio n. 42
0
try:
    diffusion = sim["/PartType0/Diffusion"][:]
    plot_diffusion = True
except:
    plot_diffusion = False

try:
    viscosity = sim["/PartType0/Viscosity"][:]
    plot_viscosity = True
except:
    plot_viscosity = False

# Bin te data
r_bin_edge = np.arange(0., 0.5, 0.01)
r_bin = 0.5*(r_bin_edge[1:] + r_bin_edge[:-1])
rho_bin,_,_ = stats.binned_statistic(r, rho, statistic='mean', bins=r_bin_edge)
v_bin,_,_ = stats.binned_statistic(r, v_r, statistic='mean', bins=r_bin_edge)
P_bin,_,_ = stats.binned_statistic(r, P, statistic='mean', bins=r_bin_edge)
S_bin,_,_ = stats.binned_statistic(r, S, statistic='mean', bins=r_bin_edge)
u_bin,_,_ = stats.binned_statistic(r, u, statistic='mean', bins=r_bin_edge)
rho2_bin,_,_ = stats.binned_statistic(r, rho**2, statistic='mean', bins=r_bin_edge)
v2_bin,_,_ = stats.binned_statistic(r, v_r**2, statistic='mean', bins=r_bin_edge)
P2_bin,_,_ = stats.binned_statistic(r, P**2, statistic='mean', bins=r_bin_edge)
S2_bin,_,_ = stats.binned_statistic(r, S**2, statistic='mean', bins=r_bin_edge)
u2_bin,_,_ = stats.binned_statistic(r, u**2, statistic='mean', bins=r_bin_edge)
rho_sigma_bin = np.sqrt(rho2_bin - rho_bin**2)
v_sigma_bin = np.sqrt(v2_bin - v_bin**2)
P_sigma_bin = np.sqrt(P2_bin - P_bin**2)
S_sigma_bin = np.sqrt(S2_bin - S_bin**2)
u_sigma_bin = np.sqrt(u2_bin - u_bin**2)
Esempio n. 43
0
import numpy as np
import matplotlib.pyplot as plt
import numpy.fft as ft
import scipy.stats as st

# Generating random numbers between 0,1 and plotting them
n = 1024
y = np.random.rand(n)
plt.subplot(2, 1, 1, title="Sample data")
plt.hist(y)
plt.title("Random numbers using numpy.random.rand")
plt.xlabel("$x_i$")
plt.ylabel("number in each bin")
x_p = np.linspace(0, 1, 100)
y_p = n / 10 * np.ones(len(x_p))
plt.plot(x_p, y_p, 'r')

ky = ft.fftshift(ft.fft(y))  # Taking FFT
k = 2 * np.pi * ft.fftshift(ft.fftfreq(len(y), 1))  # Finding Frequency
print("max = ", np.max(k), "  min = ", np.min(k))
Pky = (np.abs(ky)**2) / (len(y))  # Finding Periodogram
plt.subplot(2, 1, 2, title="Binned Periodogram")  # Binning Periodogram
ky_bin, k_be, binnumber = st.binned_statistic(k, Pky, bins=5)
k_bins = (k_be[0:len(k_be) - 1] + k_be[1:len(k_be)]) / 2
plt.bar(k_bins, ky_bin, width=k_be[1] - k_be[0])
plt.xlabel("k")
plt.ylabel("P(k)")
plt.subplots_adjust(wspace=0.4, hspace=0.5)
plt.show()
Esempio n. 44
0
def calibrate(thar1d_simple,
              thar_solution_temp,
              thar_list,
              poly_order=(5, 10),
              slit=0):
    wave_temp, thar_temp, order_temp = thar_solution_temp

    # 1.initial solution
    print("@SONG: [ThAr] 2D cross-correlation ...")
    shift, corr2d = calib.thar1d_corr2d(thar1d_simple,
                                        thar_temp,
                                        y_shiftmax=3,
                                        x_shiftmax=20)
    print("@SONG: [ThAr] the shift is ", shift)
    wave_init = calib.interpolate_wavelength(wave_temp, shift, thar_temp,
                                             thar1d_simple)
    order_init = calib.interpolate_order(order_temp, shift, thar1d_simple) + 80

    print("@SONG: [ThAr] refine wavelength ..." "")
    # 2.fit Gaussians to Thar lines
    lc_coord, lc_order, lc_thar, popt, pcov = calib.refine_thar_positions(
        wave_init,
        order_init,
        thar1d_simple,
        thar_list,
        fit_width=.3,
        lc_tol=.1,
        k=3,
        n_jobs=-1,
        verbose=10)

    # select using center deviation & line SNR
    ind_good0 = np.logical_and(
        np.abs(popt[:, 2] - lc_thar) < 5,  # (popt[:,3]*3),
        (popt[:, 1] / np.sqrt(2. * np.pi) / popt[:, 3] / np.abs(popt[:, 0])) >
        .1)
    print(np.sum(ind_good0))

    # 3.rejections of outliers
    ind_good1 = calib.clean_thar_polyfit1d_reject(lc_coord,
                                                  lc_order,
                                                  lc_thar,
                                                  popt,
                                                  ind_good0=ind_good0,
                                                  deg=1,
                                                  w=None,
                                                  epsilon=0.002,
                                                  n_reserve=8)
    print(np.sum(ind_good1))

    # 4. fit final solution
    print("@SONG: [ThAr] fit wavelength solution ")
    x_mini_lsq, ind_good_thar, scaler_coord, scaler_order, scaler_ml = calib.fit_grating_equation(
        lc_coord,
        lc_order,
        lc_thar,
        popt,
        pcov,
        ind_good_thar0=ind_good1,
        poly_order=poly_order,
        max_dev_threshold=.003,
        n_iter=1,
        lar=False,
        nl_eachorder=5)

    # construct grids for coordinates & order
    grid_coord, grid_order = np.meshgrid(
        np.arange(thar1d_simple.shape[1]),
        np.arange(thar1d_simple.shape[0]) + 80)

    # 4'.fit grating function
    sgrid_fitted_wave = calib.grating_equation_predict(grid_coord, grid_order,
                                                       x_mini_lsq, poly_order,
                                                       scaler_coord,
                                                       scaler_order, scaler_ml)

    # 5.get the fitted wavelength
    lc_thar_fitted = calib.grating_equation_predict(lc_coord, lc_order,
                                                    x_mini_lsq, poly_order,
                                                    scaler_coord, scaler_order,
                                                    scaler_ml)

    results = [sgrid_fitted_wave, lc_thar_fitted]

    # 6.figures for diagnostics
    bins = np.arange(4500, 7500, 500)
    bins_med, _, _ = binned_statistic(lc_thar[ind_good_thar],
                                      lc_thar_fitted[ind_good_thar] -
                                      lc_thar[ind_good_thar],
                                      statistic=np.median,
                                      bins=bins)
    bins_rms, _, _ = binned_statistic(lc_thar[ind_good_thar],
                                      lc_thar_fitted[ind_good_thar] -
                                      lc_thar[ind_good_thar],
                                      statistic=nanrms,
                                      bins=bins)
    """ [Figure]: calibration diagnostics """
    fig = plt.figure(figsize=(12, 8))
    fig.add_subplot(111)
    plt.plot(lc_thar, lc_thar_fitted - lc_thar, '.')
    plt.plot(lc_thar[ind_good_thar],
             lc_thar_fitted[ind_good_thar] - lc_thar[ind_good_thar], 'r.')
    plt.errorbar(bins[:-1] + np.diff(bins) * .5,
                 bins_med,
                 bins_rms,
                 color='k',
                 ecolor='k')
    plt.xlim(4300, 7200)
    plt.ylim(-.008, .008)
    plt.xlabel("Wavelength (A)")
    plt.ylabel("$\lambda(solution)-\lambda(true)$")
    plt.title("RMS = {:05f} A for SLIT {:d} [{} lines]".format(
        rms(lc_thar_fitted[ind_good_thar] - lc_thar[ind_good_thar]), slit,
        len(ind_good_thar)))
    plt.legend([
        "deviation of all lines", "deviation of used lines", "mean RMS in bins"
    ])
    fig.tight_layout()
    # fig.savefig(dir_work+"thar{}_{:s}".format(slit, thar_fn.replace(".fits", "_diagnostics.svg")))
    # plt.close(fig)
    results.append(fig)

    fig = plt.figure(figsize=(24, 8))
    plt.imshow(np.log10(thar1d_simple),
               aspect='auto',
               cmap=cm.viridis,
               vmin=np.nanpercentile(np.log10(thar1d_simple), 5),
               vmax=np.nanpercentile(np.log10(thar1d_simple), 95))
    plt.plot(lc_coord, lc_order - 80, ls='', marker='s', mfc='None', mec='b')
    plt.plot(lc_coord[ind_good_thar],
             lc_order[ind_good_thar] - 80,
             ls='',
             marker='s',
             mfc='None',
             mec='r')
    plt.xlabel("CCD Coordinate")
    plt.ylabel("Order")
    plt.colorbar()
    fig.tight_layout()
    # fig.savefig(dir_work+"thar{}_{:s}".format(slit, thar_fn.replace(".fits", "_used_lines.svg")))
    # plt.close(fig)
    results.append(fig)

    return results
Esempio n. 45
0
	data = np.loadtxt(filename)
	t, m, dm = data[:,0], data[:,1], data[:,2]

	N = len(m)
	indices = np.array((list(combinations(range(N), 2))))
	i, j = indices.T

	delta_m_ij = m[j] - m[i]
	delta_t_ij = t[j] - t[i]
	sigma_m_i, sigma_m_j = dm[j], dm[i]

	bins = 2000

	timelag = np.linspace(delta_t_ij.min(), delta_t_ij.max(), bins)
	V = np.sqrt(np.pi/2.0) * np.abs(delta_m_ij) - np.sqrt(sigma_m_i**2 + sigma_m_j**2)
	SF = stats.binned_statistic(delta_t_ij, V, bins = bins, statistic = 'mean')[0]

	edges = np.linspace(np.min(delta_t_ij), np.max(delta_t_ij), bins + 1)
	histogram = fasthistogram(delta_t_ij, edges)
	binmapping = np.digitize(delta_t_ij, edges)
	SF = [np.sum(V[binmapping == (i+1)]) for i in xrange(len(histogram))] / histogram

	try:
		popt, pcov = optimize.curve_fit(powerlaw, timelag[~np.isnan(SF)], SF[~np.isnan(SF)], p0 = (0.1, 0.1))
	except RuntimeError:
		pass
	A, gamma = popt
	dA, dgamma = np.sqrt(np.diag(pcov))

	print u'A: %.4f ± %.4f' % (A, dA)
	print u'γ: %.4f ± %.4f' % (gamma, dgamma)
Esempio n. 46
0
def bin_yield_climate_county(yield_data, fips, add_loss_ratio=False):

    # Add growing season precipitation
    P = pd.concat([
        prec_monthly[fips][prec_monthly[fips].index[4:-1:12]].to_period(
            'A').rename('Prec_5'), prec_monthly[fips]
        [prec_monthly[fips].index[5:-1:12]].to_period('A').rename('Prec_6'),
        prec_monthly[fips][prec_monthly[fips].index[6:-1:12]].to_period(
            'A').rename('Prec_7'), prec_monthly[fips]
        [prec_monthly[fips].index[7:-1:12]].to_period('A').rename('Prec_8')
    ],
                  axis=1).reset_index()

    # P = pd.concat([prec_monthly[fips][prec_monthly[fips].index[4:-1:12]].to_period('A').rename('Prec_5'),
    #        prec_monthly[fips][prec_monthly[fips].index[5:-1:12]].to_period('A').rename('Prec_6'),
    #        prec_monthly[fips][prec_monthly[fips].index[6:-1:12]].to_period('A').rename('Prec_7')],
    #        axis=1).reset_index()

    P['Year'] = P['Year'].apply(lambda x: x.year)
    P['Prec'] = P.iloc[:, 1::].sum(axis=1)
    P['Prec_percentile'] = (stats.rankdata(P['Prec'], method='average') * 2 -
                            1) / (P['Prec'].shape[0] * 2)

    # Add growing season tmax
    T = pd.concat([
        tmax_monthly[fips][tmax_monthly[fips].index[4:-1:12]].to_period(
            'A').rename('Tmax_5'), tmax_monthly[fips]
        [tmax_monthly[fips].index[5:-1:12]].to_period('A').rename('Tmax_6'),
        tmax_monthly[fips][tmax_monthly[fips].index[6:-1:12]].to_period(
            'A').rename('Tmax_7'), tmax_monthly[fips]
        [tmax_monthly[fips].index[7:-1:12]].to_period('A').rename('Tmax_8')
    ],
                  axis=1).reset_index()

    # T = pd.concat([tmax_monthly[fips][tmax_monthly[fips].index[4:-1:12]].to_period('A').rename('Tmax_5'),
    #        tmax_monthly[fips][tmax_monthly[fips].index[5:-1:12]].to_period('A').rename('Tmax_6'),
    #        tmax_monthly[fips][tmax_monthly[fips].index[6:-1:12]].to_period('A').rename('Tmax_7')],
    #        axis=1).reset_index()

    T['Year'] = T['Year'].apply(lambda x: x.year)
    T['Tmax'] = T.iloc[:, 1::].mean(axis=1)
    T['Tmax_percentile'] = (stats.rankdata(T['Tmax'], method='average') * 2 -
                            1) / (T['Tmax'].shape[0] * 2)

    temp = P.merge(T)

    #Use STD from 1981 to 2010?
    c = temp['Year'] <= 2020
    v_mean = temp[c][['Prec', 'Tmax']].apply(np.mean, axis=0)
    v_std = temp[c][['Prec', 'Tmax']].apply(np.std, axis=0)

    # bin yield anomaly and loss

    prec_bin_sigma = [
        v_mean['Prec'] + i * v_std['Prec'] for i in np.arange(-3.5, 3.6, 0.5)
    ]

    prec_bin_rank = np.arange(0, 1.0001, 0.05)

    bin_means1, bin_edges1, binnumber1 = stats.binned_statistic(
        temp['Prec_percentile'],
        temp['Prec_percentile'],
        'mean',
        bins=prec_bin_rank)
    bin_means2, bin_edges2, binnumber2 = stats.binned_statistic(
        temp['Prec'], temp['Prec'], 'mean', bins=prec_bin_sigma)

    temp['Prec_rank_bin'] = binnumber1
    temp['Prec_sigma_bin'] = binnumber2
    temp['Prec_to_sd'] = (temp['Prec'] - v_mean['Prec']) / v_std['Prec']

    tmax_bin_sigma = [
        v_mean['Tmax'] + i * v_std['Tmax'] for i in np.arange(-3.5, 3.6, 0.5)
    ]
    tmax_bin_rank = np.arange(0, 1.00001, 0.05)
    bin_means1, bin_edges1, binnumber1 = stats.binned_statistic(
        temp['Tmax_percentile'],
        temp['Tmax_percentile'],
        'mean',
        bins=tmax_bin_rank)
    bin_means2, bin_edges2, binnumber2 = stats.binned_statistic(
        temp['Tmax'], temp['Tmax'], 'mean', bins=tmax_bin_sigma)

    temp['Tmax_rank_bin'] = binnumber1
    temp['Tmax_sigma_bin'] = binnumber2
    temp['Tmax_to_sd'] = (temp['Tmax'] - v_mean['Tmax']) / v_std['Tmax']

    temp = temp.merge(yield_data[yield_data['FIPS'] == fips])

    # Add loss ratio (for drought)
    if add_loss_ratio:
        for cause_txt in cause_names[0:4]:
            temp = temp.merge(data_rma_yield_top.loc[fips].xs(cause_txt,level=1)[['Loss ratio by cause', 'Cause percent']]. \
                       rename(columns={'Loss ratio by cause': 'loss_ratio_'+cause_txt,'Cause percent':'loss_percent_'+cause_txt}).reset_index(),
                             how='outer')

    return temp
Esempio n. 47
0
def my_add_feature(df_train, df_test, columns, fill_value, coef, num_k):
    edge_right = defaultdict()
    edge_left = defaultdict()

    for column in columns:

        df_col = df_train[df_train[column].isnull() == False][[column, 'y']]
        st = stats.binned_statistic(df_col[column],
                                    df_col['y'],
                                    statistic='mean',
                                    bins=20)

        # Ищем фичи и границу для них, распределение которых выглядит как нижний правый угол
        min_value = st.statistic[0] * (1 - coef)
        max_value = st.statistic[0] * (1 + coef)
        k = 0
        mask = ~np.isnan(st.statistic)
        for value, edge in zip(st.statistic[mask], st.bin_edges[:-1][mask]):
            if (value >= min_value) & (value <= max_value):
                k += 1
            else:
                if k > num_k:
                    #print(k, round(edge,2),'right', column)
                    edge_right[column] = edge
                break
        # Ищем фичи и границу для них, распределение которых выглядит как нижний левый угол
        min_value = st.statistic[-1] * (1 - coef)
        max_value = st.statistic[-1] * (1 + coef)
        k = 0
        mask = ~np.isnan(st.statistic)
        for value, edge in zip(st.statistic[mask][::-1],
                               st.bin_edges[1:][mask][::-1]):
            if (value >= min_value) & (value <= max_value):
                k += 1
            else:
                if k > num_k:
                    #print(k, np.round(edge,2), 'left', column)
                    edge_left[column] = edge
                break
    # Теперь создаём новые фичи на основе отобранных
    for column in edge_left.keys():
        column1 = 'const__' + column
        column2 = 'varios__' + column
        column3 = 'nan__' + column
        df_train[column1] = df_train[column].apply(
            lambda x: 1 if x >= edge_left[column] else 0)
        df_train[column2] = df_train[column].apply(
            lambda x: x if x < edge_left[column] else fill_value)
        df_train[column3] = df_train[column].apply(lambda x: 1
                                                   if np.isnan(x) else 0)
        #df_train = df_train.drop(column, axis=1)
    for column in edge_right.keys():
        column1 = 'const__' + column
        column2 = 'varios__' + column
        column3 = 'nan__' + column
        df_train[column1] = df_train[column].apply(
            lambda x: 1 if x <= edge_right[column] else 0)
        df_train[column2] = df_train[column].apply(
            lambda x: x if x > edge_right[column] else fill_value)
        df_train[column3] = df_train[column].apply(lambda x: 1
                                                   if np.isnan(x) else 0)
        #df_train = df_train.drop(column, axis=1)

    # Для тестовой выборки проделаем то же самое,  с уже имеющимися границами
    for column in edge_left.keys():
        column1 = 'const__' + column
        column2 = 'varios__' + column
        column3 = 'nan__' + column
        df_test[column1] = df_test[column].apply(
            lambda x: 1 if x >= edge_left[column] else 0)
        df_test[column2] = df_test[column].apply(
            lambda x: x if x < edge_left[column] else fill_value)
        df_test[column3] = df_test[column].apply(lambda x: 1
                                                 if np.isnan(x) else 0)
        #df_train = df_train.drop(column, axis=1)
    for column in edge_right.keys():
        column1 = 'const__' + column
        column2 = 'varios__' + column
        column3 = 'nan__' + column
        df_test[column1] = df_test[column].apply(
            lambda x: 1 if x <= edge_right[column] else 0)
        df_test[column2] = df_test[column].apply(
            lambda x: x if x > edge_right[column] else fill_value)
        df_test[column3] = df_test[column].apply(lambda x: 1
                                                 if np.isnan(x) else 0)
        #df_train = df_train.drop(column, axis=1)
    return df_train, df_test, edge_right, edge_left
Esempio n. 48
0
# Try to add on the viscosity and diffusion.
try:
    data["visc"] = sim.gas.viscosity.value
except:
    pass

try:
    data["diff"] = sim.gas.diffusion.value
except:
    pass

# Bin the data
x_bin_edge = np.linspace(0.0, boxSize.to(kpc).value)
x_bin = 0.5 * (x_bin_edge[1:] + x_bin_edge[:-1])
binned = {
    k: stats.binned_statistic(data["x"], v, statistic="mean",
                              bins=x_bin_edge)[0]
    for k, v in data.items()
}
square_binned = {
    k: stats.binned_statistic(data["x"],
                              v**2,
                              statistic="mean",
                              bins=x_bin_edge)[0]
    for k, v in data.items()
}
sigma = {
    k: np.sqrt(v2 - v**2)
    for k, v2, v in zip(binned.keys(), square_binned.values(), binned.values())
}

# Now we can do the plotting.
    def __graph_results(self):
        yes_votes = self.poll_data[:, NUM_YES]
        no_votes = self.poll_data[:, NUM_NO]

        # Std processing
        std_yes_votes = yes_votes[self.std_gamemode_mask]
        std_no_votes = no_votes[self.std_gamemode_mask]
        std_votes = std_yes_votes + std_no_votes

        std_sort_idx = np.argsort(std_votes)
        std_votes = std_votes[std_sort_idx]
        std_yes_votes = std_yes_votes[std_sort_idx]

        std_mean_yes_votes_percent, std_bin_votes, _ = stats.binned_statistic(
            std_votes, std_yes_votes / std_votes, statistic='mean', bins=20)

        #heatmap, _, _ = np.histogram2d(std_votes, std_yes_votes/std_votes, bins=[ 40, 20 ])
        #print(np.where(heatmap == np.max(heatmap)))
        #heatmap_img = pyqtgraph.ImageItem(heatmap, compositionMode=QtGui.QPainter.CompositionMode_Plus)
        #heatmap_img.setRect(QRectF(min(std_votes), min(std_yes_votes/std_votes), max(std_votes) - min(std_votes), max(std_yes_votes/std_votes) - min(std_yes_votes/std_votes)))
        #heatmap_img.setParentItem(self.graphs['std']['plot'])
        #self.graphs['std']['widget'].addItem(heatmap_img)

        # Taiko processing
        taiko_yes_votes = yes_votes[self.taiko_gamemode_mask]
        taiko_no_votes = no_votes[self.taiko_gamemode_mask]
        taiko_votes = taiko_yes_votes + taiko_no_votes

        taiko_sort_idx = np.argsort(taiko_votes)
        taiko_votes = taiko_votes[taiko_sort_idx]
        taiko_yes_votes = taiko_yes_votes[taiko_sort_idx]

        taiko_mean_yes_votes_percent, taiko_bin_votes, _ = stats.binned_statistic(
            taiko_votes,
            taiko_yes_votes / taiko_votes,
            statistic='mean',
            bins=20)

        # Catch processing
        catch_yes_votes = yes_votes[self.catch_gamemode_mask]
        catch_no_votes = no_votes[self.catch_gamemode_mask]
        catch_votes = catch_yes_votes + catch_no_votes

        catch_sort_idx = np.argsort(catch_votes)
        catch_votes = catch_votes[catch_sort_idx]
        catch_yes_votes = catch_yes_votes[catch_sort_idx]

        catch_mean_yes_votes_percent, catch_bin_votes, _ = stats.binned_statistic(
            catch_votes,
            catch_yes_votes / catch_votes,
            statistic='mean',
            bins=20)

        # Mania processing
        mania_yes_votes = yes_votes[self.mania_gamemode_mask]
        mania_no_votes = no_votes[self.mania_gamemode_mask]
        mania_votes = mania_yes_votes + mania_no_votes

        mania_sort_idx = np.argsort(mania_votes)
        mania_votes = mania_votes[mania_sort_idx]
        mania_yes_votes = mania_yes_votes[mania_sort_idx]

        mania_mean_yes_votes_percent, mania_bin_votes, _ = stats.binned_statistic(
            mania_votes,
            mania_yes_votes / mania_votes,
            statistic='mean',
            bins=20)

        # Graphing
        self.graphs['std']['plot'].setData(std_votes,
                                           std_yes_votes / std_votes,
                                           pen=None,
                                           symbol='o',
                                           symbolPen=None,
                                           symbolSize=2,
                                           symbolBrush=(100, 100, 255, 200))
        self.graphs['taiko']['plot'].setData(taiko_votes,
                                             taiko_yes_votes / taiko_votes,
                                             pen=None,
                                             symbol='o',
                                             symbolPen=None,
                                             symbolSize=2,
                                             symbolBrush=(100, 100, 255, 200))
        self.graphs['catch']['plot'].setData(catch_votes,
                                             catch_yes_votes / catch_votes,
                                             pen=None,
                                             symbol='o',
                                             symbolPen=None,
                                             symbolSize=2,
                                             symbolBrush=(100, 100, 255, 200))
        self.graphs['mania']['plot'].setData(mania_votes,
                                             mania_yes_votes / mania_votes,
                                             pen=None,
                                             symbol='o',
                                             symbolPen=None,
                                             symbolSize=2,
                                             symbolBrush=(100, 100, 255, 200))

        self.std_mean_plot.setData(std_bin_votes[:-1],
                                   std_mean_yes_votes_percent,
                                   pen=pyqtgraph.mkPen(color=(255, 255, 0,
                                                              100)))
        self.taiko_mean_plot.setData(taiko_bin_votes[:-1],
                                     taiko_mean_yes_votes_percent,
                                     pen=pyqtgraph.mkPen(color=(255, 255, 0,
                                                                100)))
        self.catch_mean_plot.setData(catch_bin_votes[:-1],
                                     catch_mean_yes_votes_percent,
                                     pen=pyqtgraph.mkPen(color=(255, 255, 0,
                                                                100)))
        self.mania_mean_plot.setData(mania_bin_votes[:-1],
                                     mania_mean_yes_votes_percent,
                                     pen=pyqtgraph.mkPen(color=(255, 255, 0,
                                                                100)))
Esempio n. 50
0
xv, yv = np.meshgrid(x, x)
r = np.sqrt((xv + 75)**2 + (yv)**2)

data = xr.DataArray(r, [('y', y), ('x', x)])

data.plot()

# tube selection
# Fitting a polynomial of degree 6 to a tube
tube_axis = data.y.values
tube_values = data[dict(x=-50)].values
coefficients = scipy.polyfit(tube_axis, tube_values, 6)
tube_fit = scipy.poly1d(coefficients)
# binning the tubes
bin_means, bin_edges, binnumber = stats.binned_statistic(tube_axis,
                                                         tube_fit(tube_axis),
                                                         statistic='mean',
                                                         bins=40)
bin_width = (bin_edges[1] - bin_edges[0])
bin_centers = bin_edges[1:] - bin_width / 2

# Plotting tube

plt.figure()
plt.plot(tube_axis, tube_values, 'bo', label="Raw")
plt.plot(tube_axis, tube_fit(tube_axis), 'g-', label="Fit")
plt.plot(bin_centers, bin_means, 'r-', label="binning")
plt.legend()

# Pixel selection
# pixel = data[dict(y=-60,x=-50)].values
bin_values_whole_run = []
for snapshot_number in snapshot_numbers:
    snap_df = df.loc[str(
        snapshot_number)]  # Load the data for each snapshot separately
    # Here the axis coordinate of a bubble is used to sort them into bins. Then e.g. the mean of all 'vx' in a bin is
    # calculated to get an idea of how the bubbles are moving in x direction in one y slice segment. The width of the
    # y/x slice is determined by the value of bins, i.e. reduce bins to get wider slices.
    # The values of the first argument of binned_statistic is sorted into bins. the function statistic_to_apply
    # determines what will be done with all the values in one bin. It takes the second argument as its input and returns
    # one number. This value is the final value of the bin. E.g. using statistic_to_apply -> "mean" calculates the mean
    # of all values that were assigned to an individual bin by the first argument of binned_statistic
    # bin_values are the final values of each bin; can be not mean, depending on what statistic_to_apply is defined
    # bin_edges gives the edges of the bins and binnumber the bin number used
    bin_values, bin_edges, binnumber = stats.binned_statistic(
        snap_df[axis],
        snap_df[parameter_to_apply_statistics],
        statistics_function,
        bins=binnum)
    bin_values_whole_run.append(bin_values)

# Each row contains a list of the bin values for a bin
bin_values_whole_run = np.asarray(bin_values_whole_run)

bin_width = (bin_edges[1] - bin_edges[0])  # width of one bin
bin_centers = bin_edges[1:] - bin_width / 2  # mid point of bins

# Will create two separate graphs
fig = plt.figure(figsize=(24, 12))
origin_str = f"{snapshot_folder.parents[1].stem}/{snapshot_folder.parents[0].stem}/{snapshot_folder.stem}"
fig.suptitle(
    origin_str)  # Meaningful title for easier tracking of data source later
Esempio n. 52
0
def quantify_scatter(x,
                     y,
                     xbin_c,
                     weights=None,
                     inclusive=False,
                     method_avg='avg',
                     method_std='std',
                     cdf_lim=0.7,
                     pdf_bins=50):
    """
    Quantify the scatter in some relationship between two variables, x and y.

    Parameters
    ----------
    x : np.ndarray
        Independent variable
    y : np.array
        Dependent variable
    xbin_c : np.ndarray
        Bin centers for `x`
    weights : np.ndarray
        Can weight each samples by some factor.
    inclusive : bool
        Include samples above or below bounding bins in said bins.
    method_avg : str
        How to quantify the average y value in each x bin.
        Options: 'avg', 'median', 'mode'
    method_std : str, float
        How to quantify the spread in y in each x bin.
        Options: 'std', 'normal', 'lognormal', 'bounds', 'pdf', float
        If a float is provided, assume it's a percentile, e.g.,
        method_std=0.68 will return boundaries of region containing 68% of
        samples.
    cdf_lim : float
        If fitting a normal or log-normal function to the distribution in each
        bin, include the PDF up until this value of the CDF when fitting the
        distribution. Essentially a kludge to exclude long tails in fit, to
        better capture peak and width of (main part of) the distribution.

    """

    xbin_e = bin_c2e(xbin_c)

    if weights is None:
        have_weights = False
    else:
        if np.all(np.diff(np.diff(weights)) == 0):
            have_weights = False
        else:
            have_weights = True

    if not have_weights:

        if method_std in ['std', 'sum'] and method_avg == 'avg':

            print(
                "Deferring to scipy.stats.binned_statistic since weights=None."
            )

            yavg, _b, binid = binned_statistic(x,
                                               y,
                                               statistic='mean',
                                               bins=xbin_e)
            ysca, _b, binid = binned_statistic(x,
                                               y,
                                               statistic=method_std,
                                               bins=xbin_e)
            N, _b, binid = binned_statistic(x,
                                            y,
                                            statistic='count',
                                            bins=xbin_e)

            return xbin_c, yavg, ysca, N

    ysca = []
    yavg = []
    N = []
    for i, lo in enumerate(xbin_e):
        if i == len(xbin_e) - 1:
            break

        # Upper edge of bin
        hi = xbin_e[i + 1]

        if inclusive and i == 0:
            ok = x < hi
            ok = np.logical_and(ok, np.isfinite(y))
        elif inclusive and i == len(xbin_e) - 1:
            ok = x >= lo
            ok = np.logical_and(ok, np.isfinite(y))
        else:
            ok = np.logical_and(x >= lo, x < hi)
            ok = np.logical_and(ok, np.isfinite(y))

        f = y[ok == 1]

        # What to do when there aren't any samples in a bin?
        # Move on, that's what. Add masked elements.
        if (f.size == 0) or (weights[ok == 1].sum() == 0):

            yavg.append(-np.inf)
            if method_std == 'bounds' or type(method_std) in [
                    int, float, np.float64
            ]:
                ysca.append((-np.inf, -np.inf))
            elif type(method_std) in [list, tuple]:
                ysca.append((-np.inf, -np.inf))
            else:
                ysca.append(-np.inf)

            N.append(0)
            continue

        # If we made it here, we've got some samples.
        # Record the number of samples in this bin, the average value,
        # and some measure of the scatter.
        N.append(sum(ok == 1))

        yavg.append(np.average(f, weights=weights[ok == 1]))

        if method_std == 'std':
            ysca.append(np.std(f))
        elif method_std == 'sum':
            ysca.append(np.sum(f * weights[ok == 1]))
        elif method_std in _dist_opts:

            if method_std.startswith('lognormal'):
                pdf, ye = np.histogram(np.log10(y[ok == 1]),
                                       density=1,
                                       weights=weights[ok == 1],
                                       bins=pdf_bins)
            else:
                pdf, ye = np.histogram(y[ok == 1],
                                       density=1,
                                       weights=weights[ok == 1],
                                       bins=pdf_bins)

            yc = bin_e2c(ye)

            if method_std == 'pdf':
                ysca.append((yc, pdf))
                continue

            cdf = np.cumsum(pdf) / np.sum(pdf)

            if method_std == 'cdf':
                ysca.append((yc, cdf))
                continue

            # Compute median to use as initial guess
            med = np.interp(0.5, cdf, yc)
            std = np.nanstd(y[ok==1]) if method_std.startswith('norm') \
                else np.nanstd(np.log10(y[ok==1]))

            # Make sure we go a little past the peak in the fit.
            yc_fit = yc[cdf <= cdf_lim]
            pdf_fit = pdf[cdf <= cdf_lim]

            # If CDF very sharp, just use all of it.
            if len(pdf_fit) < 3 + int('skewnormal' in method_std):
                yc_fit = yc
                pdf_fit = pdf

            if 'skew' in method_std:
                _model = _normal_skew
                p0 = [pdf.max(), med, std, 1.1]
            else:
                _model = _normal
                p0 = [pdf.max(), med, std]

            print("guesses: {}".format(p0))

            try:
                pval, pcov = curve_fit(_model,
                                       yc_fit,
                                       pdf_fit,
                                       p0=p0,
                                       maxfev=100000)
            except RuntimeError:
                print("Gaussian fit failed!")
                pval = [-np.inf] * (3 + int('skewnormal' in method_std))

            if '-pars' in method_std:
                ysca.append(pval)
            else:
                ysca.append(pval[2])

        elif method_std == 'bounds':
            ysca.append((np.min(f), np.max(f)))
        elif type(method_std) in [int, float, np.float64]:
            q1 = 0.5 * 100 * (1. - method_std)
            q2 = 100 * method_std + q1
            lo, hi = np.percentile(f, (q1, q2))
            ysca.append((lo, hi))
        elif type(method_std) in [list, tuple]:
            q1 = 100 * method_std[0]
            q2 = 100 * method_std[1]
            lo, hi = np.percentile(f, (q1, q2))
            ysca.append((lo, hi))
        else:
            raise NotImplemented('help')

    return np.array(xbin_c), np.array(yavg), np.array(ysca), np.array(N)
Esempio n. 53
0
def main():
    objects = OrderedDict([('AGC198511', 22.91), ('AGC198606', 24.72),
                           ('AGC215417', 22.69), ('HI1151+20', 24.76),
                           ('AGC249525', 26.78), ('AGC268069', 24.24)])
    smooths = OrderedDict([('AGC198511', 3.0), ('AGC198606', 2.0),
                           ('AGC215417', 3.0), ('HI1151+20', 2.0),
                           ('AGC249525', 3.0), ('AGC268069', 3.0)])
    filter_file = os.path.dirname(os.path.abspath(__file__)) + '/filter.txt'

    for file_ in os.listdir("./"):
        if file_.endswith("i.fits"):
            fits_file_i = file_

    for file_ in os.listdir("./"):
        if file_.endswith("g.fits"):
            fits_file_g = file_

    # downloadSDSSgal(fits_file_g, fits_file_i)

    fits_i = fits.open(fits_file_i)
    # fits_g = fits.open(fits_file_g)
    # print "Opened fits files:",fits_file_g,"&",fits_file_i

    # objid = fits_i[0].header['OBJECT']
    title_string = fits_file_i.split('_')[
        0]  # get the name part of the filename.

    # set up some filenames
    mag_file = 'calibrated_mags.dat'

    # read in magnitudes, colors, and positions(x,y)
    # gxr,gyr,g_magr,g_ierrr,ixr,iyr,i_magr,i_ierrr,gmir,fwhm_sr= np.loadtxt(mag_file,usecols=(0,1,2,3,4,5,6,7,8,11),unpack=True)
    gxr, gyr, g_magr, g_ierrr, ixr, iyr, i_magr, i_ierrr, gmir = np.loadtxt(
        mag_file, usecols=(0, 1, 2, 3, 4, 5, 6, 7, 8), unpack=True)
    # print len(gxr), "total stars"
    fwhm_sr = np.ones_like(gxr)
    # filter out the things with crappy color errors
    color_error_cut = np.sqrt(2.0) * 0.2
    mag_error_cut = 0.2

    gmi_errr = [
        np.sqrt(g_ierrr[i]**2 + i_ierrr[i]**2) for i in range(len(gxr))
    ]
    gx = [
        gxr[i] for i in range(len(gxr))
        if (abs(gmi_errr[i] < color_error_cut and i_ierrr[i] < mag_error_cut))
    ]
    gy = [
        gyr[i] for i in range(len(gxr))
        if (abs(gmi_errr[i] < color_error_cut and i_ierrr[i] < mag_error_cut))
    ]
    g_mag = [
        g_magr[i] for i in range(len(gxr))
        if (abs(gmi_errr[i] < color_error_cut and i_ierrr[i] < mag_error_cut))
    ]
    g_ierr = [
        g_ierrr[i] for i in range(len(gxr))
        if (abs(gmi_errr[i] < color_error_cut and i_ierrr[i] < mag_error_cut))
    ]
    ix = [
        ixr[i] for i in range(len(gxr))
        if (abs(gmi_errr[i] < color_error_cut and i_ierrr[i] < mag_error_cut))
    ]
    iy = [
        iyr[i] for i in range(len(gxr))
        if (abs(gmi_errr[i] < color_error_cut and i_ierrr[i] < mag_error_cut))
    ]
    i_mag = [
        i_magr[i] for i in range(len(gxr))
        if (abs(gmi_errr[i] < color_error_cut and i_ierrr[i] < mag_error_cut))
    ]
    i_ierr = np.array([
        i_ierrr[i] for i in range(len(gxr))
        if (abs(gmi_errr[i] < color_error_cut and i_ierrr[i] < mag_error_cut))
    ])
    gmi = [
        gmir[i] for i in range(len(gxr))
        if (abs(gmi_errr[i] < color_error_cut and i_ierrr[i] < mag_error_cut))
    ]
    fwhm_s = [
        fwhm_sr[i] for i in range(len(gxr))
        if (abs(gmi_errr[i] < color_error_cut and i_ierrr[i] < mag_error_cut))
    ]
    gmi_err = np.array([
        np.sqrt(g_ierrr[i]**2 + i_ierrr[i]**2) for i in range(len(gxr))
        if (abs(gmi_errr[i] < color_error_cut and i_ierrr[i] < mag_error_cut))
    ])
    cutleft = [
        i for i in range(len(gxr))
        if (abs(gmi_errr[i] < color_error_cut and i_ierrr[i] < mag_error_cut))
    ]
    with open('cutleft.txt', 'w+') as spud:
        for i, item in enumerate(cutleft):
            print(item, file=spud)

    i_ierrAVG, bedges, binid = ss.binned_statistic(i_mag,
                                                   i_ierr,
                                                   statistic='median',
                                                   bins=10,
                                                   range=[15, 25])
    gmi_errAVG, bedges, binid = ss.binned_statistic(i_mag,
                                                    gmi_err,
                                                    statistic='median',
                                                    bins=10,
                                                    range=[15, 25])

    bcenters = (bedges[:-1] + bedges[1:]) / 2
    bxvals = [3.75, 3.75, 3.75, 3.75, 3.75, 3.75, 3.75, 3.75, 3.75, 3.75]
    # print bcenters
    # print i_ierrAVG
    # print gmi_errAVG
    # print len(gx), "after color+mag error cut"
    # nid = np.loadtxt(mag_file,usecols=(0,),dtype=int,unpack=True)
    pixcrd = list(zip(ix, iy))

    # print "Reading WCS info from image header..."
    # Parse the WCS keywords in the primary HDU
    warnings.filterwarnings('ignore', category=UserWarning, append=True)
    w = wcs.WCS(fits_i[0].header)
    # print fits_i[0].header['naxis1'], fits_i[0].header['naxis2']
    footprint = w.calc_footprint()
    se_corner = footprint[0]
    ne_corner = footprint[1]
    nw_corner = footprint[2]
    sw_corner = footprint[3]
    # print se_corner, ne_corner, nw_corner, sw_corner
    width = (ne_corner[0] - nw_corner[0]) * 60.
    height = (ne_corner[1] - se_corner[1]) * 60.
    # print width, height

    # Print out the "name" of the WCS, as defined in the FITS header
    # print w.wcs.name

    # Print out all of the settings that were parsed from the header
    # w.wcs.print_contents()

    # Convert pixel coordinates to world coordinates
    # The second argument is "origin" -- in this case we're declaring we
    # have 1-based (Fortran-like) coordinates.
    world = w.all_pix2world(pixcrd, 1)
    ra_corner, dec_corner = w.all_pix2world(0, 0, 1)
    ra_c_d, dec_c_d = deg2HMS(ra=ra_corner, dec=dec_corner, round=True)
    # print 'Corner RA:',ra_c_d,':: Corner Dec:',dec_c_d

    fwhm_i = 12.0  #fits_i[0].header['FWHMPSF']
    fwhm_g = 9.0  # fits_g[0].header['FWHMPSF']

    # print 'Image FWHM :: g = {0:5.3f} : i = {1:5.3f}'.format(fwhm_g,fwhm_i)

    fits_i.close()
    # fits_g.close()

    # split the ra and dec out into individual arrays and transform to arcmin from the corner
    i_ra = [
        abs((world[i, 0] - ra_corner) * 60) for i in range(len(world[:, 0]))
    ]
    i_dec = [
        abs((world[i, 1] - dec_corner) * 60) for i in range(len(world[:, 1]))
    ]
    # also preserve the decimal degrees for reference
    i_rad = [world[i, 0] for i in range(len(world[:, 0]))]
    i_decd = [world[i, 1] for i in range(len(world[:, 1]))]

    i_magBright = [i_mag[i] for i in range(len(i_mag)) if (i_mag[i] < 22.75)]
    g_magBright = [g_mag[i] for i in range(len(i_mag)) if (i_mag[i] < 22.75)]
    ixBright = [ix[i] for i in range(len(i_mag)) if (i_mag[i] < 22.75)]
    iyBright = [iy[i] for i in range(len(i_mag)) if (i_mag[i] < 22.75)]
    i_radBright = [i_rad[i] for i in range(len(i_mag)) if (i_mag[i] < 22.75)]
    i_decdBright = [i_decd[i] for i in range(len(i_mag)) if (i_mag[i] < 22.75)]

    if not os.path.isfile('brightStars2275.reg'):
        f1 = open('brightStars2275.reg', 'w+')
        for i in range(len(i_magBright)):
            print(
                '{0:12.4f} {1:12.4f} {2:10.5f} {3:9.5f} {4:8.2f} {5:8.2f} {6:8.2f}'
                .format(ixBright[i], iyBright[i], i_radBright[i],
                        i_decdBright[i], g_magBright[i], i_magBright[i],
                        g_magBright[i] - i_magBright[i]),
                file=f1)
        f1.close()

    i_magRed = [i_mag[i] for i in range(len(i_mag)) if (gmi[i] > 1.75)]
    g_magRed = [g_mag[i] for i in range(len(i_mag)) if (gmi[i] > 1.75)]
    ixRed = [ix[i] for i in range(len(i_mag)) if (gmi[i] > 1.75)]
    iyRed = [iy[i] for i in range(len(i_mag)) if (gmi[i] > 1.75)]
    i_radRed = [i_rad[i] for i in range(len(i_mag)) if (gmi[i] > 1.75)]
    i_decdRed = [i_decd[i] for i in range(len(i_mag)) if (gmi[i] > 1.75)]

    if not os.path.isfile('redStars175.reg'):
        f1 = open('redStars175.reg', 'w+')
        for i in range(len(i_magRed)):
            print(
                '{0:12.4f} {1:12.4f} {2:10.5f} {3:9.5f} {4:8.2f} {5:8.2f} {6:8.2f}'
                .format(ixRed[i], iyRed[i], i_radRed[i], i_decdRed[i],
                        g_magRed[i], i_magRed[i], g_magRed[i] - i_magRed[i]),
                file=f1)
        f1.close()

    dm = 22.0
    dm2 = 27.0
    fwhm = 2.0

    # if dm2 > 0.0 and filter_string != 'none':
    dms = np.arange(dm, dm2, 0.01)
    # search = open('search_{:3.1f}.txt'.format(fwhm),'w+')
    # else:
    #     dms = [dm]
    # search = open('spud.txt'.format(fwhm),'w+')

    # sig_bins = []
    # sig_cens = []
    # sig_max = []

    for dm in dms:
        mpc = pow(10, ((dm + 5.) / 5.)) / 1000000.
        dm_string = '{:5.2f}'.format(dm).replace('.', '_')

        out_file = 'anim/anim_' + dm_string + '_' + title_string + '.png'
        # mark_file = 'f_list_' + filter_string + '_' + fwhm_string + '_' + dm_string + '_' + title_string + '.reg'
        # filter_reg = 'f_reg_' + filter_string + '_' + fwhm_string + '_' + dm_string + '_' + title_string + '.reg'
        # circ_file = 'c_list_' + filter_string + '_' + fwhm_string + '_' + dm_string + '_' + title_string + '.reg'
        # fcirc_file = 'fc_list_' + filter_string + '_' + fwhm_string + '_' + dm_string + '_' + title_string + '.reg'
        # ds9_file = 'circles_' + filter_string + '_' + fwhm_string + '_' + dm_string + '_' + title_string + '.reg'
        circles_file = 'region_coords.dat'

        cm_filter, gi_iso, i_m_iso = make_filter(dm, filter_file)
        stars_f = filter_sources(i_mag,
                                 i_ierr,
                                 gmi,
                                 gmi_err,
                                 cm_filter,
                                 filter_sig=1)

        xy_points = list(zip(i_ra, i_dec))

        # make new vectors containing only the filtered points

        i_mag_f = [i_mag[i] for i in range(len(i_mag)) if (stars_f[i])]
        g_mag_f = [g_mag[i] for i in range(len(i_mag)) if (stars_f[i])]
        gmi_f = [gmi[i] for i in range(len(i_mag)) if (stars_f[i])]
        i_ra_f = [i_ra[i] for i in range(len(i_mag)) if (stars_f[i])]
        i_dec_f = [i_dec[i] for i in range(len(i_mag)) if (stars_f[i])]
        i_rad_f = [i_rad[i] for i in range(len(i_mag)) if (stars_f[i])]
        i_decd_f = [i_decd[i] for i in range(len(i_mag)) if (stars_f[i])]
        i_x_f = [ix[i] for i in range(len(i_mag)) if (stars_f[i])]
        i_y_f = [iy[i] for i in range(len(i_mag)) if (stars_f[i])]
        fwhm_sf = [fwhm_s[i] for i in range(len(i_mag)) if (stars_f[i])]
        n_in_filter = len(i_mag_f)

        # xedgesg, x_centg, yedgesg, y_centg, Sg, x_cent_Sg, y_cent_Sg, pltsigg, tblg = galaxyMap(fits_file_i, fwhm, dm, filter_file)

        xedges, x_cent, yedges, y_cent, S, x_cent_S, y_cent_S, pltsig, tbl = grid_smooth(
            i_ra_f, i_dec_f, fwhm, width, height)
        # corr = signal.correlate2d(S, Sg, boundary='fill', mode='full')
        # print corr

        # pct, d_bins, d_cens = distfit(n_in_filter,S[x_cent_S][y_cent_S],title_string,width,height,fwhm,dm)
        # pct_hi = 0.0 #getHIcoincidence(x_cent_S, y_cent_S, title_string, ra_corner, dec_corner, width, height, dm)

        # sig_bins.append(d_bins)
        # sig_cens.append(d_cens)
        # sig_max.append(S[x_cent_S][y_cent_S])

        # if pct > 90 :
        #     pct, bj,cj = distfit(n_in_filter,S[x_cent_S][y_cent_S],title_string,width,height,fwhm,dm, samples=25000)

        # make a circle to highlight a certain region
        cosd = lambda x: np.cos(np.deg2rad(x))
        sind = lambda x: np.sin(np.deg2rad(x))
        x_circ = [yedges[y_cent] + 3.0 * cosd(t) for t in range(0, 359, 1)]
        y_circ = [xedges[x_cent] + 3.0 * sind(t) for t in range(0, 359, 1)]

        verts_circ = list(zip(x_circ, y_circ))
        circ_filter = Path(verts_circ)

        stars_circ = circ_filter.contains_points(xy_points)

        i_mag_c = [i_mag[i] for i in range(len(i_mag)) if (stars_circ[i])]
        gmi_c = [gmi[i] for i in range(len(i_mag)) if (stars_circ[i])]
        i_ra_c = [i_ra[i] for i in range(len(i_mag)) if (stars_circ[i])]
        i_dec_c = [i_dec[i] for i in range(len(i_mag)) if (stars_circ[i])]
        i_rad_c = [i_rad[i] for i in range(len(i_mag)) if (stars_circ[i])]
        i_decd_c = [i_decd[i] for i in range(len(i_mag)) if (stars_circ[i])]
        i_x_c = [ix[i] for i in range(len(i_mag)) if (stars_circ[i])]
        i_y_c = [iy[i] for i in range(len(i_mag)) if (stars_circ[i])]
        fwhm_sc = [fwhm_s[i] for i in range(len(i_mag)) if (stars_circ[i])]

        # make a random reference cmd to compare to
        if not os.path.isfile('refCircle.center'):
            rCentx = 16.0 * np.random.random() + 2.0
            rCenty = 16.0 * np.random.random() + 2.0
            with open('refCircle.center', 'w+') as rc:
                print('{:8.4f} {:8.4f}'.format(rCentx, rCenty), file=rc)
        else:
            rCentx, rCenty = np.loadtxt('refCircle.center',
                                        usecols=(0, 1),
                                        unpack=True)

        x_circr = [rCentx + 3.0 * cosd(t) for t in range(0, 359, 1)]
        y_circr = [rCenty + 3.0 * sind(t) for t in range(0, 359, 1)]

        verts_circr = list(zip(x_circr, y_circr))
        rcirc_filter = Path(verts_circr)

        stars_circr = rcirc_filter.contains_points(xy_points)

        i_mag_cr = [i_mag[i] for i in range(len(i_mag)) if (stars_circr[i])]
        gmi_cr = [gmi[i] for i in range(len(i_mag)) if (stars_circr[i])]
        i_ra_cr = [i_ra[i] for i in range(len(i_mag)) if (stars_circr[i])]
        i_dec_cr = [i_dec[i] for i in range(len(i_mag)) if (stars_circr[i])]
        i_rad_cr = [i_rad[i] for i in range(len(i_mag)) if (stars_circr[i])]
        i_decd_cr = [i_decd[i] for i in range(len(i_mag)) if (stars_circr[i])]
        i_x_cr = [ix[i] for i in range(len(i_mag)) if (stars_circr[i])]
        i_y_cr = [iy[i] for i in range(len(i_mag)) if (stars_circr[i])]
        fwhm_scr = [fwhm_s[i] for i in range(len(i_mag)) if (stars_circr[i])]

        i_mag_fc = [
            i_mag[i] for i in range(len(i_mag))
            if (stars_circ[i] and stars_f[i])
        ]
        i_ierr_fc = [
            i_ierr[i] for i in range(len(i_mag))
            if (stars_circ[i] and stars_f[i])
        ]
        g_ierr_fc = [
            g_ierr[i] for i in range(len(i_mag))
            if (stars_circ[i] and stars_f[i])
        ]
        g_mag_fc = [
            i_mag[i] for i in range(len(i_mag))
            if (stars_circ[i] and stars_f[i])
        ]
        gmi_fc = [
            gmi[i] for i in range(len(i_mag)) if (stars_circ[i] and stars_f[i])
        ]
        i_ra_fc = [
            i_ra[i] for i in range(len(i_mag))
            if (stars_circ[i] and stars_f[i])
        ]
        i_dec_fc = [
            i_dec[i] for i in range(len(i_mag))
            if (stars_circ[i] and stars_f[i])
        ]
        i_rad_fc = [
            i_rad[i] for i in range(len(i_mag))
            if (stars_circ[i] and stars_f[i])
        ]
        i_decd_fc = [
            i_decd[i] for i in range(len(i_mag))
            if (stars_circ[i] and stars_f[i])
        ]
        i_x_fc = [
            ix[i] for i in range(len(i_mag)) if (stars_circ[i] and stars_f[i])
        ]
        i_y_fc = [
            iy[i] for i in range(len(i_mag)) if (stars_circ[i] and stars_f[i])
        ]
        fwhm_sfc = [
            fwhm_s[i] for i in range(len(i_mag))
            if (stars_circ[i] and stars_f[i])
        ]
        index_fc = [
            i for i in range(len(i_mag)) if (stars_circ[i] and stars_f[i])
        ]
        # with open('index_fc.txt', 'w+') as spud1:
        #     for i,item in enumerate(index_fc):
        #         print >> spud1, item

        # print len(i_mag_fc), 'filter stars in circle'

        # print 'max i mag in circle = ', min(i_mag_fc)

        rs = np.array([51, 77, 90, 180])
        for r in rs:
            x_circ = [
                yedges[y_cent] + r / 60. * cosd(t) for t in range(0, 359, 1)
            ]
            y_circ = [
                xedges[x_cent] + r / 60. * sind(t) for t in range(0, 359, 1)
            ]

            verts_circ = list(zip(x_circ, y_circ))
            circ_filter = Path(verts_circ)

            stars_circ = circ_filter.contains_points(xy_points)
            i_x_fc = [
                ix[i] for i in range(len(i_mag))
                if (stars_circ[i] and stars_f[i])
            ]
            i_y_fc = [
                iy[i] for i in range(len(i_mag))
                if (stars_circ[i] and stars_f[i])
            ]

            # fcirc_file = 'circle'+repr(r)+'.txt'
            # with open(fcirc_file,'w+') as f3:
            #     for i,x in enumerate(i_x_fc):
            #         print >> f3, i_x_fc[i], i_y_fc[i]

        i_mag_fcr = [
            i_mag[i] for i in range(len(i_mag))
            if (stars_circr[i] and stars_f[i])
        ]
        i_ierr_fcr = [
            i_ierr[i] for i in range(len(i_mag))
            if (stars_circr[i] and stars_f[i])
        ]
        g_ierr_fcr = [
            g_ierr[i] for i in range(len(i_mag))
            if (stars_circr[i] and stars_f[i])
        ]
        g_mag_fcr = [
            i_mag[i] for i in range(len(i_mag))
            if (stars_circr[i] and stars_f[i])
        ]
        gmi_fcr = [
            gmi[i] for i in range(len(i_mag))
            if (stars_circr[i] and stars_f[i])
        ]
        i_ra_fcr = [
            i_ra[i] for i in range(len(i_mag))
            if (stars_circr[i] and stars_f[i])
        ]
        i_dec_fcr = [
            i_dec[i] for i in range(len(i_mag))
            if (stars_circr[i] and stars_f[i])
        ]
        i_rad_fcr = [
            i_rad[i] for i in range(len(i_mag))
            if (stars_circr[i] and stars_f[i])
        ]
        i_decd_fcr = [
            i_decd[i] for i in range(len(i_mag))
            if (stars_circr[i] and stars_f[i])
        ]
        i_x_fcr = [
            ix[i] for i in range(len(i_mag)) if (stars_circr[i] and stars_f[i])
        ]
        i_y_fcr = [
            iy[i] for i in range(len(i_mag)) if (stars_circr[i] and stars_f[i])
        ]
        fwhm_sfcr = [
            fwhm_s[i] for i in range(len(i_mag))
            if (stars_circr[i] and stars_f[i])
        ]

        rcirc_c_x = ra_corner - (rCentx / 60.)
        rcirc_c_y = (rCenty / 60.) + dec_corner
        rcirc_pix_x, rcirc_pix_y = w.wcs_world2pix(rcirc_c_x, rcirc_c_y, 1)
        ra_cr, dec_cr = w.all_pix2world(rcirc_pix_x, rcirc_pix_y, 1)
        ra_cr_d, dec_cr_d = deg2HMS(ra=ra_cr, dec=dec_cr, round=False)

        circ_c_x = ra_corner - (yedges[y_cent] / 60.)
        circ_c_y = (xedges[x_cent] / 60.) + dec_corner
        circ_pix_x, circ_pix_y = w.wcs_world2pix(circ_c_x, circ_c_y, 1)
        ra_c, dec_c = w.all_pix2world(circ_pix_x, circ_pix_y, 1)
        ra_c_d, dec_c_d = deg2HMS(ra=ra_c, dec=dec_c, round=False)
        # print 'Peak RA:',ra_c_d,':: Peak Dec:',dec_c_d

        hi_x_circ, hi_y_circ = getHIellipse(title_string, ra_corner,
                                            dec_corner)
        hi_c_ra, hi_c_dec = getHIellipse(title_string,
                                         ra_corner,
                                         dec_corner,
                                         centroid=True)
        hi_pix_x, hi_pix_y = w.wcs_world2pix(hi_c_ra, hi_c_dec, 1)

        sep = dist2HIcentroid(ra_c_d, dec_c_d, hi_c_ra, hi_c_dec)

        # print "m-M = {:5.2f} | d = {:4.2f} Mpc | α = {:s}, δ = {:s}, Δʜɪ = {:5.1f}' | N = {:4d} | σ = {:6.3f} | ξ = {:6.3f}% | η = {:6.3f}%".format(dm, mpc, ra_c_d, dec_c_d, sep/60., n_in_filter, S[x_cent_S][y_cent_S], pct, pct_hi*100.)

        fig = plt.figure(figsize=(8, 6))
        # plot
        # print "Plotting for m-M = ",dm
        ax0 = plt.subplot(2, 2, 1)
        plt.scatter(i_ra,
                    i_dec,
                    color='black',
                    marker='o',
                    s=1,
                    edgecolors='none')
        plt.plot(x_circ, y_circ, linestyle='-', color='magenta')
        plt.plot(x_circr, y_circr, linestyle='-', color='gold')
        plt.plot(hi_x_circ, hi_y_circ, linestyle='-', color='limegreen')
        # plt.scatter(i_ra_c, i_dec_c,  color='red', marker='o', s=3, edgecolors='none')
        plt.scatter(i_ra_f,
                    i_dec_f,
                    c='red',
                    marker='o',
                    s=10,
                    edgecolors='none')
        # plt.clim(0,2)
        # plt.colorbar()
        plt.ylabel('Dec (arcmin)')
        plt.xlim(0, max(i_ra))
        plt.ylim(0, max(i_dec))
        plt.title('sky positions')
        ax0.set_aspect('equal')

        ax1 = plt.subplot(2, 2, 2)

        if os.path.isfile('i_gmi_compl.gr.out'):
            gmiCompl, iCompl = np.loadtxt('i_gmi_compl.gr.out',
                                          usecols=(0, 1),
                                          unpack=True)
            plt.plot(gmiCompl, iCompl, linestyle='--', color='green')
        if os.path.isfile('i_gmi_compl.gr2.out'):
            gmiCompl, iCompl = np.loadtxt('i_gmi_compl.gr2.out',
                                          usecols=(0, 1),
                                          unpack=True)
            plt.plot(gmiCompl, iCompl, linestyle='--', color='red')
        if os.path.isfile('i_gmi_compl2.out'):
            iCompl, gmiCompl = np.loadtxt('i_gmi_compl2.out',
                                          usecols=(0, 1),
                                          unpack=True)
            plt.plot(gmiCompl, iCompl, linestyle='--', color='blue')

        plt.plot(gi_iso, i_m_iso, linestyle='-', color='blue')
        plt.scatter(gmi,
                    i_mag,
                    color='black',
                    marker='o',
                    s=1,
                    edgecolors='none')
        plt.scatter(gmi_f,
                    i_mag_f,
                    color='red',
                    marker='o',
                    s=15,
                    edgecolors='none')

        # plt.scatter(gmi_c, i_mag_c,  color='red', marker='o', s=3, edgecolors='none')
        plt.errorbar(bxvals,
                     bcenters,
                     xerr=i_ierrAVG,
                     yerr=gmi_errAVG,
                     linestyle='None',
                     color='black',
                     capsize=0,
                     ms=0)
        plt.tick_params(axis='y',
                        left='on',
                        right='off',
                        labelleft='on',
                        labelright='off')
        ax1.yaxis.set_label_position('left')
        plt.ylabel('$i_0$')
        plt.xlabel('$(g-i)_0$')
        plt.ylim(25, 15)
        plt.xlim(-1, 4)
        plt.title('m-M = ' + '{:5.2f}'.format(dm) + ' (' +
                  '{0:4.2f}'.format(mpc) + ' Mpc)')
        ax1.set_aspect(0.5)

        ax2 = plt.subplot(2, 2, 3)

        extent = [yedges[0], yedges[-1], xedges[-1], xedges[0]]
        plt.imshow(S, extent=extent, interpolation='nearest', cmap=cm.gray)
        # plt.imshow(segm, extent=extent, cmap=rand_cmap, alpha=0.5)
        cbar_S = plt.colorbar()
        cbar_S.set_label('$\sigma$ from local mean')
        # cbar_S.tick_params(labelsize=10)
        plt.plot(x_circ, y_circ, linestyle='-', color='magenta')
        plt.plot(x_circr, y_circr, linestyle='-', color='gold')
        plt.plot(hi_x_circ, hi_y_circ, linestyle='-', color='limegreen')
        # X, Y = np.meshgrid(xedges,yedges)
        # ax3.pcolormesh(X,Y,grid_gaus)
        plt.xlabel('RA (arcmin)')
        plt.ylabel('Dec (arcmin)')
        plt.title('smoothed stellar density')
        # plt.ylabel('Dec (arcmin)')
        plt.xlim(0, max(i_ra))
        plt.ylim(0, max(i_dec))
        ax2.set_aspect('equal')

        # ax3 = plt.subplot(2,2,4)
        ax3 = plt.subplot2grid((2, 4), (1, 2))
        plt.scatter(gmi_c,
                    i_mag_c,
                    color='black',
                    marker='o',
                    s=3,
                    edgecolors='none')
        plt.scatter(gmi_fc,
                    i_mag_fc,
                    color='red',
                    marker='o',
                    s=15,
                    edgecolors='none')
        plt.tick_params(axis='y',
                        left='on',
                        right='on',
                        labelleft='off',
                        labelright='off')
        ax0.yaxis.set_label_position('left')
        plt.title('detection')
        plt.xlabel('$(g-i)_0$')
        plt.ylabel('$i_0$')
        plt.ylim(25, 15)
        plt.xlim(-1, 4)
        # ax3.set_aspect(0.5)

        ax4 = plt.subplot2grid((2, 4), (1, 3), sharey=ax3)
        plt.scatter(gmi_cr,
                    i_mag_cr,
                    color='black',
                    marker='o',
                    s=3,
                    edgecolors='none')
        plt.scatter(gmi_fcr,
                    i_mag_fcr,
                    color='red',
                    marker='o',
                    s=15,
                    edgecolors='none')
        plt.tick_params(axis='y',
                        left='on',
                        right='on',
                        labelleft='off',
                        labelright='on')
        plt.title('reference')
        ax0.yaxis.set_label_position('left')
        plt.xlabel('$(g-i)_0$')
        plt.ylim(25, 15)
        plt.xlim(-1, 4)
        # ax3.set _aspect(0.5)
        plt.tight_layout()
        plt.savefig(out_file)
        fig.clf()

    pass
Esempio n. 54
0
    def distance_hdig(self, Xp, yp, X, y):
        """
        Distance based on Hellinger Distance and Information Gain.
        
        References
        ----------
        .. [1] Lichtenwalter, Ryan N., and Nitesh V. Chawla. "Adaptive methods for classification in arbitrarily imbalanced and drifting data streams." Pacific-Asia Conference on Knowledge Discovery and Data Mining. Springer, Berlin, Heidelberg, 2009.
        
        Parameters
        ----------
        Xp : previous chunk t - samples
        yp : previous chunk t - classes
        X : current chunk t+1 - samples
        y : current chunk t+1 - classes
        self.n_bins : number of bins for numerical feature value 
        
        Returns
        -------
          : distance/weight
        """
        
        self.n_features = X[0].size
        
        # Number of elements/samples in the array y 
        len_y = len(y)
        
        # Bin size for numerical feature with initial value = -1
        bin_size = [-1] * self.n_features
        
        # !!! Check which features are numerical(continuous) or categorical, assumption:numerical - later TO DO distinguish between these 2 types
        is_numerical = [True] * self.n_features
        
        # Number of labels/samples belonging to class0 and class1
        classes_value, n_classlabels = np.unique(y, return_counts=True)
        
        # Calculate parent entropy (entropy of class labels)
        e_parent = entropy(n_classlabels[0]/len_y, n_classlabels[1]/len_y, base=2)
        
        # number of samples in every bin, for every feature, without division into classes
        bin_counts = []
        
        HDIG = []
        n_bins = self.n_bins
   
        # Calculate child entropy (entropy for every feature)
        for i in range(self.n_features):
            if is_numerical[i]:
                if bin_size[i] == -1:
                    # Split into bins for every feature; 3D array: axis0-features, axis1-bins, axis2-classes; this array contains number of samples, which belong to these categories
                    bin_classes = []
                    # Initial value for weighted average entropy
                    e_weighted_avg_f = 0
                    
                    # Feature column for c(current) and p(previous) chunk
                    feature_c = X[:,i]
                    feature_p = Xp[:,i]

                    minimum_c = np.amin(feature_c)
                    maximum_c = np.amax(feature_c)
                    minimum_p = np.amin(feature_p)
                    maximum_p = np.amax(feature_p)

                    minimum = min(minimum_c,minimum_p)
                    maximum = max(maximum_c,maximum_p)
                    
                    # !!! W razie zmiany na inny sposób liczenia binów
                    # bin_size[i] = (maximum - minimum)/n_bins
                    
                    # function binned_statistic split the set into n_bins equal width from minimum to maximum
                    # stat - return number of samples how many belongs to the given bin; bin_n - show for every sample, to which bin it belongs 
                    stat, bin_e, bin_n = binned_statistic(feature_c, feature_c, bins=n_bins, statistic='count', range=(minimum,maximum))
                    bin_counts = stat.astype(int).tolist()
                         
                    for index, bin_i in enumerate(np.unique(bin_n)):
                        bin_index = np.where(bin_n==bin_i)
                        classes, class_count = np.unique(y[bin_index], return_counts=True)
                        
                        # This if statement add one more value in classes and class_count, if the number of given class is 0
                        if len(classes) == 1: 
                            if classes[0] != 0:
                                class_count1 = class_count[0]
                                class_count[0] = 0
                                class_count = np.append(class_count, class_count1)
                            else:
                                classes[0] = 0
                                classes = np.append(classes, 1)
                                class_count = np.append(class_count, 0)
                            
                        bin_classes.append(class_count.tolist())

                        # print(bin_i-1)
                        # print(index)
                        
                        # print(bin_classes)
                        # print(bin_classes[index][0])
                        # print(bin_counts[bin_i-1])
                        # print(bin_classes[index][1])
                        # print("===")

                        # Calculate child entropy for every feature
                        e_child_f = entropy([bin_classes[index][0]/bin_counts[bin_i-1], bin_classes[index][1]/bin_counts[bin_i-1]], base=2)
                        # Weighted average entropy for all feature
                        e_weighted_avg_f += (bin_counts[bin_i-1]/len_y)*e_child_f

                    # Calculate Information Gain
                    inf_gain_f = e_parent-e_weighted_avg_f

                    # Count values in bins of previous chunk
                    stat, bin_e, bin_n = binned_statistic(feature_p, feature_p, bins=n_bins, statistic='count', range=(minimum,maximum))
                    bin_counts_p = stat.astype(int).tolist()

                    p = [a/len(Xp) for a in bin_counts_p]
                    q = [b/len(X) for b in bin_counts]
                    hellinger_dist_f = sqrt(sum((np.sqrt(p)-np.sqrt(q))**2))

                    # Calculate HDIG - Hellinger Distance with Information Gain
                    HDIG_f = hellinger_dist_f*(1+inf_gain_f)
                    HDIG.append(HDIG_f)
                    
                    
            # else:
                # !!! Tu znajdzie się kod, jeśli cecha jest kategorialna
            
        print(HDIG)
        # Calculate final distance HDIG
        dist_HDIG = sum(HDIG)/self.n_features
        print(dist_HDIG)

        # Calculate weights based on distance hdig and to the power of n_estimator - ensemble size
        weights = dist_HDIG**(-self.n_estimators)
        return weights
Esempio n. 55
0
def main():

    # constants
    BIGFONT = 22
    MIDFONT = 18
    SMFONT = 16
    FIG_WIDTH = 12.0
    FIG_HEIGHT = 8.0

    varname = 'HFX'
    nbins = 20
    #cases=["ERA5_C2008_add", "ERA5_TY2001_add", "ERA5_WRFROMS_add", "ERA5_WRF_add"]
    #    cases=["ERA5_C2008_dynlim",  "ERA5_TY2001_nolimit",  "ERA5_WRFROMS_add", "ERA5_WRF_add"]
    #cases=["ERA5_C2008", "ERA5_TY2001", "ERA5_WRFROMS", "ERA5_WRF"]
    #cases=[ "ERA5_WRF","ERA5_WRFROMS",   "ERA5_TY2001", "ERA5_C2008_dynlim"]
    cases = ["WRFROMS", "C2008", "TY2001"]
    #line_libs=['ko','ro','bo','go']
    #line_libs=['k.','r.','b.','g.']
    shade_color_lib = ['salmon', 'cyan', 'lime']
    line_lib = ['r', 'b', 'g']
    dot_lib = ['*', '^', 's']
    #line_libs=['b.','g*','r^','k+']
    wrf_root = '/disk/v092.yhuangci/lzhenn/1911-COAWST/'

    i_dom = 2
    strt_time_str = '201809151800'
    end_time_str = '201809160000'
    box_R = 80

    epsilon = 0.333
    rho_air = 1.29

    strt_time_obj = datetime.datetime.strptime(strt_time_str, '%Y%m%d%H%M')
    end_time_obj = datetime.datetime.strptime(end_time_str, '%Y%m%d%H%M')

    fig, ax = plt.subplots()
    fig.subplots_adjust(left=0.08,
                        bottom=0.18,
                        right=0.99,
                        top=0.92,
                        wspace=None,
                        hspace=None)

    for (dot_case, shade_color, line_color,
         case) in zip(dot_lib, shade_color_lib, line_lib, cases):

        # read track data
        tc_info_fn = wrf_root + '/' + case + '/trck.' + case + '.d0' + str(
            i_dom)
        dateparse = lambda x: datetime.datetime.strptime(x, '%Y%m%d%H0000')
        df_tc_info = pd.read_csv(tc_info_fn,
                                 sep='\s+',
                                 parse_dates=True,
                                 index_col='timestamp',
                                 header=0,
                                 date_parser=dateparse)
        df_tc_info = df_tc_info[((df_tc_info.index >= strt_time_obj) &
                                 (df_tc_info.index <= end_time_obj))]

        print(df_tc_info)
        tc_lat = df_tc_info['lat']
        tc_lon = df_tc_info['lon']

        # read raw input
        ds = salem.open_wrf_dataset('/disk/v092.yhuangci/lzhenn/1911-COAWST/' +
                                    case + '/wrfout_d02')
        ds = ds.sel(time=slice(strt_time_obj, end_time_obj))

        var1 = ds[varname]  # heat exch
        var2 = ds['U10']  # momentum exch
        var3 = ds['U10']
        var4 = ds['V10']
        varmask = ds['LANDMASK']
        var1.values = np.where(varmask.values == 1, np.nan, var1.values)
        var2.values = np.where(varmask.values == 1, np.nan, var2.values)
        ws = np.sqrt(var3 * var3 + var4 * var4)
        idx = get_closest_idx(var1.lat, var1.lon, tc_lat.values, tc_lon.values)
        var1_box_comp = box_collect(var1.values, box_R, idx)  # nparray inout
        var2_box_comp = box_collect(var2.values, box_R, idx)  # nparray inout
        ratio = var1_box_comp / var2_box_comp
        ws_box_comp = box_collect(ws.values, box_R, idx)  # nparray inout

        ws_box_comp = ws_box_comp[~np.isnan(ws_box_comp)]
        var1_box_comp = var1_box_comp[~np.isnan(var1_box_comp)]
        # get bins
        bin_means, bin_edges, binnumber = stats.binned_statistic(
            ws_box_comp.flatten(),
            var1_box_comp.flatten(),
            statistic='mean',
            bins=nbins)
        bin_counts, bin_edges, binnumber = stats.binned_statistic(
            ws_box_comp.flatten(),
            var1_box_comp.flatten(),
            statistic='count',
            bins=nbins)

        lh_heat = bin_means * bin_counts * 3600 * 27000 * 27000 / 10e15  # 1e15 J
        x_pos = (bin_edges[0:-1] + bin_edges[1:]) / 2

        tck = interpolate.splrep(x_pos, lh_heat, s=0)
        x_spline = np.linspace(x_pos.min(), x_pos.max(), 300)
        lh_heat_smooth = interpolate.splev(x_spline, tck, der=0)

        # scatter
        #ax.plot(x_pos, lh_heat, linewidth=0.0, color=line_color, marker=dot_case, markersize=8)
        ax.plot(x_spline,
                lh_heat_smooth,
                label=case,
                linewidth=2.,
                color=line_color)
        ax.fill_between(x_spline,
                        0,
                        lh_heat_smooth,
                        alpha=0.2,
                        color=shade_color)
    plt.legend(loc='best', fontsize=SMFONT)
    plt.xlabel('10m WindSpeed (m/s)', fontsize=SMFONT)
    plt.ylabel(varname + ' (10^15 J)', fontsize=SMFONT)
    plt.xticks(fontsize=SMFONT)
    plt.yticks(fontsize=SMFONT)

    plt.title('Accum. ' + varname + ' - 10m WindSpeed', fontsize=BIGFONT)
    fig.set_size_inches(FIG_WIDTH, FIG_HEIGHT)
    fig.savefig('../fig/acc_' + varname + '.png')
Esempio n. 56
0
def scaleSegregation_fft(images,
                         binsize,
                         resol,
                         order=False,
                         fillHoles=False,
                         flip=False,
                         Npts=None):
    """ Computes (through Monte Carlo sim) the linear scale of segregation from a set of image files
	@images: list of image file strings
	@binsize: length of each discrete grid cell in pixels
	@samplesize: number of successful Monte Carlo trials
	@resol: image resolution (distance/pixel)

	@[maxDist]: maximum distance (in pixels) to sample
	@[order]: read images in a chronological order if set to True
	@[fillHoles]: fill holes in 3D image
	@[flip]: flip image indices when reading data (for reading matlab images)
	@[Npts]: number of data points to average fft correlation over (bin size)

	Returns the coefficient of correlation R(r) and separation distance (r)

	TODO: support multi-component systems, not just binary systems
	"""

    if not Npts:
        Npts = 50

    volFrac, volMean, volVar = coarseDiscretize(images, binsize, order,
                                                fillHoles, flip)

    if len(volFrac) > 1:
        a, b = volFrac[0], volFrac[1]
    else:
        a, b = volFrac[0], 0 * volFrac[0]

    dist = a * 0

    x = np.arange(a.shape[0])
    y = np.arange(a.shape[1])
    z = np.arange(a.shape[2])

    for i in range(a.shape[0]):
        for j in range(a.shape[1]):
            for k in range(a.shape[2]):

                if a[i, j, k] > 0 or b[i, j, k] > 0:
                    dist[i, j, k] = np.sqrt(x[i]**2 + y[j]**2 + z[k]**2)

    dist = dist.flatten()
    a = a.flatten()
    b = b.flatten()

    a, _, _ = binned_statistic(dist, a, 'mean', Npts)
    b, dist, _ = binned_statistic(dist, b, 'mean', Npts)

    dist = dist[:-1]

    a = a - a.mean()

    corrfunc = a * 0

    var = np.linalg.norm(a)**2

    N = 2 * len(a) - 1
    corrfunc = (
        np.real(np.fft.ifft(np.fft.fft(a, N) * np.conj(np.fft.fft(a, N)))) /
        var)[:len(a)]

    return corrfunc, dist * resol * binsize
Esempio n. 57
0
def diag_all(eventfile, par_list, tbin_size, mode, diag_vars):
    """
    Get the diagnostic plots for a desired time interval.
    [Likely too large a range in time (and energy) to be sufficiently useful for
    diagnosis.]

    eventfile - path to the event file. Will extract ObsID from this for the NICER files.
    par_list - A list of parameters we'd like to extract from the FITS file
    (e.g., from eventcl, PI_FAST, TIME, PI,)
    tbin_size - the size of the time bins (in seconds!)
    >> e.g., tbin_size = 2 means bin by 2s
    >> e.g., tbin_size = 0.05 means bin by 0.05s!
    mode - whether we want to show or save the plot.
    diag_vars - a dictionary where each key = 'att','mkf','hk', or 'cl', and
    diag_vars[key] provides the list of variables to loop over.
    """
    if type(tbin_size) != int and type(tbin_size) != np.float:
        raise TypeError("tbin_size should be a float or integer!")
    if 'PI' and 'TIME' not in par_list:
        raise ValueError(
            "You should have BOTH 'PI' and 'TIME' in the parameter list!")
    if type(par_list) != list and type(par_list) != np.ndarray:
        raise TypeError("par_list should either be a list or an array!")
    if mode != 'show' and mode != 'save':
        raise ValueError("Mode should either be 'show' or 'save'!")

    parent_folder = str(pathlib.Path(eventfile).parent)
    event_header = fits.open(eventfile)[1].header
    obj_name = event_header['OBJECT']
    obsid = event_header['OBS_ID']

    #get the binned light curve
    data_dict = Lv0_fits2dict.fits2dict(eventfile, 1, par_list)

    times = data_dict['TIME']
    counts = np.ones(len(times))

    shifted_t = times - times[0]
    t_bins = np.linspace(0, int(shifted_t[-1]),
                         int(shifted_t[-1]) * 1 / tbin_size + 1)
    summed_data, bin_edges, binnumber = stats.binned_statistic(
        shifted_t, counts, statistic='sum',
        bins=t_bins)  #binning the time values in the data

    binned_t = t_bins
    binned_counts = summed_data
    #define the variables that we'd like to compare their behavior with the light curve
    att_var = diag_vars['att']
    mkf_var = diag_vars['mkf']
    hk_var = diag_vars['hk']

    ### FOR ATTITUDE
    dict_att = Lv0_nicer_housekeeping.get_att(eventfile, att_var)
    times_att = dict_att['TIME']
    shifted_t = times_att - times_att[0]
    for i in range(1, len(att_var)):  #as in, don't compare time with time...
        filtered_att = dict_att[att_var[i]]
        if len(shifted_t) != len(filtered_att):
            raise ValueError(
                "The lengths of arrays filtered t and filtered att for variable "
                + str(att_var[i]) + ' are different, with ' +
                str(len(shifted_t)) + ' and ' + str(len(filtered_att)) +
                ' respectively.')

        if mode == 'show':
            Lv3_diagnostics_display.display_all(eventfile, att_var[i],
                                                binned_t, binned_counts,
                                                shifted_t, filtered_att,
                                                '.att')
            plt.show()

    if mode == 'save':
        filename = parent_folder + '/diag_att_' + obsid + '_bin' + str(
            tbin_size) + 's.pdf'
        with PdfPages(filename) as pdf:
            for i in range(1, len(att_var)):
                filtered_att = dict_att[att_var[i]]
                Lv3_diagnostics_display.display_all(eventfile, att_var[i],
                                                    binned_t, binned_counts,
                                                    shifted_t, filtered_att,
                                                    '.att')
                pdf.savefig()
                plt.close()

    ### FOR FILTER
    dict_mkf = Lv0_nicer_housekeeping.get_mkf(eventfile, mkf_var)
    times_mkf = dict_mkf['TIME']
    shifted_t = times_mkf - times_mkf[0]
    for i in range(1, len(mkf_var)):  #as in, don't compare time with time...
        filtered_mkf = dict_mkf[mkf_var[i]]
        if len(shifted_t) != len(filtered_mkf):
            raise ValueError(
                "The lengths of arrays shifted t and filtered mkf for variable "
                + str(mkf_var[i]) + ' are different, with ' +
                str(len(shifted_t)) + ' and ' + str(len(filtered_mkf)) +
                ' respectively.')

        if mode == 'show':
            Lv3_diagnostics_display.display_all(eventfile, mkf_var[i],
                                                binned_t, binned_counts,
                                                shifted_t, filtered_mkf,
                                                '.mkf')
            plt.show()

    if mode == 'save':
        filename = parent_folder + '/diag_mkf_' + obsid + '_bin' + str(
            tbin_size) + 's.pdf'
        with PdfPages(filename) as pdf:
            for i in range(1, len(mkf_var)):
                filtered_mkf = dict_mkf[mkf_var[i]]
                Lv3_diagnostics_display.display_all(eventfile, mkf_var[i],
                                                    binned_t, binned_counts,
                                                    shifted_t, filtered_mkf,
                                                    '.mkf')
                pdf.savefig()
                plt.close()

    ### FOR HK
    if mode == 'show':
        for i in range(7):
            dict_hk = Lv0_nicer_housekeeping.get_hk(eventfile, str(i), hk_var)
            times_hk = dict_hk['TIME']
            shifted_t = times_hk - times_hk[0]
            for j in range(
                    1, len(hk_var)):  #as in, don't compare time with time...
                filtered_hk = dict_hk[hk_var[j]]
                if len(shifted_t) != len(filtered_hk):
                    raise ValueError(
                        "The lengths of arrays shifted t and filtered att for variable "
                        + str(hk_var[j]) + ' are different, with ' +
                        str(len(shifted_t)) + ' and ' + str(len(filtered_hk)) +
                        ' respectively. This is for HK MPU=' + str(i))
                Lv3_diagnostics_display.display_all(eventfile, hk_var[j],
                                                    binned_t, binned_counts,
                                                    shifted_t, filtered_hk,
                                                    ['.hk', str(i)])
                plt.show()

    if mode == 'save':
        filename = parent_folder + '/diag_hk_' + obsid + '_bin' + str(
            tbin_size) + 's.pdf'
        with PdfPages(filename) as pdf:
            for i in range(7):
                dict_hk = Lv0_nicer_housekeeping.get_hk(
                    eventfile, str(i), hk_var)
                times_hk = dict_hk['TIME']
                shifted_t = times_hk - times_hk[0]
                for j in range(
                        1,
                        len(hk_var)):  #as in, don't compare time with time...
                    filtered_hk = dict_hk[hk_var[j]]
                    if len(shifted_t) != len(filtered_hk):
                        raise ValueError(
                            "The lengths of arrays shifted t and filtered att for variable "
                            + str(hk_var[j]) + ' are different, with ' +
                            str(len(shifted_t)) + ' and ' +
                            str(len(filtered_hk)) +
                            ' respectively. This is for HK MPU=' + str(i))
                    Lv3_diagnostics_display.display_all(
                        eventfile, hk_var[j], binned_t, binned_counts,
                        shifted_t, filtered_hk, ['.hk', str(i)])
                    pdf.savefig()
                    plt.close()

    ### FOR EVENT_CL (BARY)
    data_dict = Lv0_fits2dict.fits2dict(eventfile, 1, par_list)
    times_cl = data_dict['TIME']
    shifted_t = times_cl - times_cl[0]

    for i in range(1, len(par_list)):  #as in, don't compare time with time...
        filtered_cl = data_dict[par_list[i]]
        if len(shifted_t) != len(filtered_cl):
            raise ValueError(
                "The lengths of arrays shifted t and filtered cl for variable "
                + str(eventcl_var[i]) + ' are different, with ' +
                str(len(shifted_t)) + ' and ' + str(len(filtered_cl)) +
                ' respectively.')

        if mode == 'show':
            Lv3_diagnostics_display.display_all(eventfile, par_list[i],
                                                binned_t, binned_counts,
                                                shifted_t, filtered_cl, '.cl')
            plt.show()

    if mode == 'save':
        filename = parent_folder + '/diag_cl_' + obsid + '_bin' + str(
            tbin_size) + 's.pdf'
        with PdfPages(filename) as pdf:
            for i in range(1, len(par_list)):
                filtered_cl = data_dict[par_list[i]]
                Lv3_diagnostics_display.display_all(eventfile, eventcl_var[i],
                                                    binned_t, binned_counts,
                                                    shifted_t, filtered_cl,
                                                    '.cl')
                pdf.savefig()
                plt.close()
Esempio n. 58
0
def detection_limits(results,
                     star_mass=1.0,
                     Np=None,
                     bins=200,
                     plot=True,
                     sorted_samples=True,
                     return_mask=False):
    """ 
    Calculate detection limits using samples with more than `Np` planets. By 
    default, this function uses the value of `Np` which passes the posterior
    probability threshold.

    Arguments
    ---------
    star_mass : float or tuple
        Stellar mass and optionally its uncertainty [in solar masses].
    Np : int
        Consider only posterior samples with more than `Np` planets.
    bins : int
        Number of bins at which to calculate the detection limits. The period 
        ranges from the minimum to the maximum orbital period in the posterior.
    plot : bool
        Whether to plot the detection limits
    sorted_samples : bool
        undoc
    return_mask: bool
        undoc
    
    Returns
    -------
    P, K, E, M : ndarray
        Orbital periods, semi-amplitudes, eccentricities, and planet masses used
        in the calculation of the detection limits. These correspond to all the
        posterior samples with more than `Np` planets
    s : DLresult, namedtuple
        Detection limits result, with attributes `max` and `bins`. The `max` 
        array is in units of Earth masses, `bins` is in days.
    """
    res = results
    if Np is None:
        Np = passes_threshold_np(res)
    print(f'Using samples with Np > {Np}')

    mask = res.posterior_sample[:, res.index_component] > Np
    pars = res.posterior_sample[mask, res.indices['planets']]

    if sorted_samples:
        pars = sort_planet_samples(res, pars)

    mc = res.max_components
    periods = slice(0 * mc, 1 * mc)
    amplitudes = slice(1 * mc, 2 * mc)
    eccentricities = slice(3 * mc, 4 * mc)

    P = pars[:, periods]
    K = pars[:, amplitudes]
    E = pars[:, eccentricities]

    inds = np.nonzero(P)

    P = P[inds]
    K = K[inds]
    E = E[inds]
    M = get_planet_mass(P, K, E, star_mass=star_mass, full_output=True)[2]

    if P.max() / P.min() > 100:
        bins = 10**np.linspace(np.log10(P.min()), np.log10(P.max()), bins)
    else:
        bins = np.linspace(P.min(), P.max(), bins)

    # bins_start = bins[:-1]# - np.ediff1d(bins)/2
    # bins_end = bins[1:]# + np.ediff1d(bins)/2
    # bins_start = np.append(bins_start, bins_end[-1])
    # bins_end = np.append(bins_end, P.max())

    DLresult = namedtuple('DLresult', ['max', 'bins'])
    s = binned_statistic(P, M, statistic='max', bins=bins)
    s = DLresult(max=s.statistic * mjup2mearth,
                 bins=s.bin_edges[:-1] + np.ediff1d(s.bin_edges) / 2)

    # s99 = binned_statistic(P, M, statistic=lambda x: np.percentile(x, 99),
    #                        bins=bins)
    # s99 = DLresult(max=s99.statistic,
    #                bins=s99.bin_edges[:-1] + np.ediff1d(s99.bin_edges) / 2)

    if plot:
        import matplotlib.pyplot as plt
        _, ax = plt.subplots(1, 1, constrained_layout=True)
        if isinstance(star_mass, tuple):
            star_mass = star_mass[0]

        sP = np.sort(P)
        one_ms = 4.919e-3 * star_mass**(2. / 3) * sP**(1. / 3) * 1
        kw = dict(color='C0', alpha=1, zorder=3)
        ax.loglog(sP, 5 * one_ms * mjup2mearth, ls=':', **kw)
        ax.loglog(sP, 3 * one_ms * mjup2mearth, ls='--', **kw)
        ax.loglog(sP, one_ms * mjup2mearth, ls='-', **kw)

        ax.loglog(P, M * mjup2mearth, 'k.', ms=2, alpha=0.2, zorder=-1)
        ax.loglog(s.bins, s.max, color='C3')
        # ax.hlines(s.max, bins_start, bins_end, lw=2)
        # ax.loglog(s99.bins, s99.max * mjup2mearth)

        lege = [f'{i} m/s' for i in (5, 3, 1)]
        lege += ['posterior samples', 'binned maximum']

        ax.legend(lege, ncol=2, frameon=False)
        ax.set(ylim=(0.5, None))
        ax.set(xlabel='Orbital period [days]',
               ylabel='Planet mass [M$_\odot$]')
        try:
            ax.set_title(res.star)
        except AttributeError:
            pass

    if return_mask:
        return P, K, E, M, s, mask
    else:
        return P, K, E, M, s
Esempio n. 59
0
def plot_angular_resolution(reconstructed_events,
                            reference,
                            plot_e_reco,
                            ylog=False,
                            ylim=None,
                            ax=None):

    df = reconstructed_events
    distance = calculate_distance_to_true_source_position(df)

    e_min, e_max = 0.01 * u.TeV, 180 * u.TeV
    bins, bin_center, _ = make_default_cta_binning(e_min=e_min, e_max=e_max)

    if plot_e_reco:
        x = df.gamma_energy_prediction_mean.values
    else:
        x = df.mc_energy.values

    y = distance

    b_68, bin_edges, _ = binned_statistic(
        x, y, statistic=lambda y: np.nanpercentile(y, 68), bins=bins)

    bin_centers = np.sqrt(bin_edges[1:] * bin_edges[:-1])
    # bins_y = np.logspace(np.log10(0.005), np.log10(50.8), 100)

    log_emin, log_emax = np.log10(0.007), np.log10(300)
    if not ylim:
        ylim = (0.01, 20) if ylog else (0, 1)
    ymin, ymax = np.log10([0.01, 20]) if ylog else ylim

    if not ax:
        fig, ax = plt.subplots(1, 1)
    else:
        fig = plt.gcf()
    im = ax.hexbin(x,
                   y,
                   xscale='log',
                   yscale='log' if ylog else None,
                   extent=(log_emin, log_emax, ymin, ymax + 0.1),
                   cmap=default_cmap)

    add_colorbar_to_figure(im, fig, ax, label='Counts')

    # hardcore fix for stupi step plotting artifact
    # b_68[-1] = b_68[-2]
    # ax.step(bin_edges[:-1], b_68, where='post', lw=2, color=main_color, label='68\\textsuperscript{th} Percentile')
    ax.hlines(b_68,
              bins[:-1],
              bins[1:],
              lw=2,
              color=main_color,
              label='68\\textsuperscript{th} Percentile')

    if reference:
        df = load_angular_resolution_requirement()
        ax.plot(df.energy,
                df.resolution,
                '--',
                color='#5b5b5b',
                label='Reference')

    ax.set_xscale('log')

    ax.set_ylabel('Distance to True Position / $\,^{\circ}$')
    if plot_e_reco:
        ax.set_xlabel('Estimated Energy / TeV')
    else:
        ax.set_xlabel('True Energy / TeV')
    ax.legend(framealpha=0)

    df = pd.DataFrame({
        'energy_prediction': bin_centers,
        'angular_resolution': b_68,
    })
    plt.tight_layout(pad=0, rect=(0, 0, 1.002, 1))
    return ax, df
Esempio n. 60
0
def main():
    # plot dopplar b parameter as a function of diameter w/o splitting, add other sets if you want
    plot_b_az_mean_plus = True
    plot_b_az_mean_plus_save = True

    # some colors
    color_blue = '#436bad'  # french blue
    color_red = '#ec2d01'  # tomato red

    if getpass.getuser() == 'frenchd':

        #         pickleFilename = '/Users/frenchd/Research/inclination/git_inclination/picklePilot_plusSALT_14.p'
        #         gtPickleFilename = '/Users/frenchd/Research/inclination/git_inclination/pickleGT.p'
        #         saveDirectory = '/Users/frenchd/Research/inclination/git_inclination/plotting_code/figs'

        #         gtPickleFilename = '/Users/frenchd/Research/inclination/git_inclination/pickleGT_filteredAll.p'
        gtPickleFilename = '/Users/frenchd/Research/GT_update2/pickleGT_filteredAll.p'

        saveDirectory = '/Users/frenchd/Research/inclination/git_inclination/plotting_code/figs/'

        isolated_filename = '/Users/frenchd/Research/inclination/git_inclination/isolated6.p'
        L_isolated_filename = '/Users/frenchd/Research/inclination/git_inclination/L_isolated6.p'
        L_associated_isolated_filename = '/Users/frenchd/Research/inclination/git_inclination/L_associated_isolated6.p'
        L_associated_filename = '/Users/frenchd/Research/inclination/git_inclination/L_associated6.p'
        L_nonassociated_filename = '/Users/frenchd/Research/inclination/git_inclination/L_nonassociated6.p'
        L_two_filename = '/Users/frenchd/Research/inclination/git_inclination/L_two6.p'
        L_two_plus_filename = '/Users/frenchd/Research/inclination/git_inclination/L_two_plus6.p'
        L_group_filename = '/Users/frenchd/Research/inclination/git_inclination/L_group6.p'
        L_summed_filename = '/Users/frenchd/Research/inclination/git_inclination/L_summed6.p'

    else:
        print 'Could not determine username. Exiting.'
        sys.exit()

    # pickle file for the whole galaxy table:
    gtPickleFile = open(gtPickleFilename, 'rU')
    gtDict = pickle.load(gtPickleFile)
    gtPickleFile.close()

    # open all the pickle files
    isolated_file = open(isolated_filename, 'r')
    L_isolated_file = open(L_isolated_filename, 'r')
    L_associated_isolated_file = open(L_associated_isolated_filename, 'r')
    L_associated_file = open(L_associated_filename, 'r')
    L_nonassociated_file = open(L_nonassociated_filename, 'r')
    L_two_file = open(L_two_filename, 'r')
    L_two_plus_file = open(L_two_plus_filename, 'r')
    L_group_file = open(L_group_filename, 'r')
    L_summed_file = open(L_summed_filename, 'r')

    # unload the data from them
    isolated = pickle.load(isolated_file)
    L_isolated = pickle.load(L_isolated_file)
    L_associated_isolated = pickle.load(L_associated_isolated_file)
    L_associated = pickle.load(L_associated_file)
    L_nonassociated = pickle.load(L_nonassociated_file)
    L_two = pickle.load(L_two_file)
    L_two_plus = pickle.load(L_two_plus_file)
    L_group = pickle.load(L_group_file)
    L_summed = pickle.load(L_summed_file)

    # close the files
    isolated_file.close()
    L_isolated_file.close()
    L_associated_isolated_file.close()
    L_associated_file.close()
    L_nonassociated_file.close()
    L_two_file.close()
    L_two_plus_file.close()
    L_group_file.close()
    L_summed_file.close()

    # which dataset to use for plotting?
    dataSet = L_associated_isolated

    Lya_vs = dataSet['Lya_vs']
    e_Lya_vs = dataSet['e_Lya_vs']
    Lya_Ws = dataSet['Lya_Ws']
    e_Lya_Ws = dataSet['e_Lya_Ws']
    Nas = dataSet['Nas']
    e_Nas = dataSet['e_Nas']
    bs = dataSet['bs']
    e_bs = dataSet['e_bs']
    Ws = dataSet['Ws']
    e_Ws = dataSet['e_Ws']
    targets = dataSet['targets']
    z_targets = dataSet['z_targets']
    RA_targets = dataSet['RA_targets']
    Dec_targets = dataSet['Dec_targets']
    Names = dataSet['Names']
    RA_galaxies = dataSet['RA_galaxies']
    Dec_galaxies = dataSet['Dec_galaxies']
    impacts = dataSet['impacts']
    azimuths = dataSet['azimuths']
    PAs = dataSet['PAs']
    incs = dataSet['incs']
    adjustedIncs = dataSet['adjustedIncs']
    ls = dataSet['ls']
    l_cuss = dataSet['l_cuss']
    R_virs = dataSet['R_virs']
    cuss = dataSet['cuss']
    MajDiams = dataSet['MajDiams']
    MTypes = dataSet['MTypes']
    Vhels = dataSet['Vhels']
    vcorrs = dataSet['vcorrs']
    bestDists = dataSet['bestDists']
    e_bestDists = dataSet['e_bestDists']
    group_nums = dataSet['group_nums']
    group_mems = dataSet['group_mems']
    group_dists = dataSet['group_dists']
    Lstar_meds = dataSet['Lstar_meds']
    e_Lstar_meds = dataSet['e_Lstar_meds']
    Bmags = dataSet['Bmags']

    majorAxisL = gtDict['majorAxis']
    incL = gtDict['inc']
    adjustedIncL = gtDict['adjustedInc']
    paL = gtDict['PA']
    BmagL = gtDict['Bmag']
    #     Bmag_sdssL = gtDict['Bmag_sdss']
    RID_medianL = gtDict['RID_median']
    RID_meanL = gtDict['RID_mean']
    RID_stdL = gtDict['RID_std']
    VhelL = gtDict['Vhel']
    RAdegL = gtDict['RAdeg']
    DEdegL = gtDict['DEdeg']
    NameL = gtDict['Name']

    allPA = paL
    allInclinations = []
    allAdjustedIncs = []
    allCosInclinations = []

    #     print 'type: ',type(incL)
    for i in incL:
        if i != -99:
            i = float(i)
            allInclinations.append(i)

            i2 = pi / 180. * i
            cosi2 = cos(i)
            allCosInclinations.append(cosi2)

    allFancyInclinations = []
    allCosFancyCosInclinations = []
    for i in adjustedIncL:
        if i != -99:
            i = float(i)

            allAdjustedIncs.append(i)

            i2 = pi / 180. * i
            cosi2 = cos(i)
            allCosFancyCosInclinations.append(cosi2)

    allDiameter = majorAxisL

    print 'finished with this shit'

    total = 0
    totalNo = 0
    totalYes = 0
    totalIsolated = 0
    totalGroup = 0

    #########################################################################################
    #########################################################################################

    ##########################################################################################
    ##########################################################################################

    if plot_b_az_mean_plus:
        fig = figure(figsize=(7.7, 5.7))
        ax = fig.add_subplot(111)

        countb = 0
        countr = 0
        count = -1

        color_purple = '#7570b3'
        color_purple2 = '#984ea3'

        color_green = '#1b9e77'
        color_orange = '#d95f02'
        color_purple3 = '#7570b3'
        color_pink = '#e7298a'
        color_lime = '#66a61e'
        color_yellow = '#e6ab02'
        color_brown = '#a6761d'
        color_coal = '#666666'

        alpha_isolated = 0.5
        alpha_assoc = 0.5
        alpha_two = 0.5
        alpha_three = 0.5
        alpha_group = 0.5
        alpha_bins = 0.99
        markerSize = 30

        #         binSize = 50
        #         bins = arange(0, 550, binSize)
        binSize = 15
        bins = arange(0, 100, binSize)

        label_isolated = r'$\rm Isolated$'
        label_assoc = r'$\rm Associated$'
        label_two = r'$\rm Two$'
        label_three = r'$\rm Three+$'
        label_group = r'$\rm Group$'

        symbol_isolated = 'D'
        symbol_assoc = 'o'
        symbol_two = 'o'
        symbol_three = 'o'
        symbol_group = 'o'

        #         color_isolated = color_green
        #         color_assoc = color_orange
        #         color_two = color_purple3
        #         color_group = color_yellow

        color_isolated = 'black'
        color_assoc = color_green
        color_two = color_purple2
        color_group = color_orange

        maxEW = 15000.

        # define the x and y data for the isolated set

        Lya_Ws2 = []
        R_virs2 = []
        impacts2 = []
        bs2 = []
        ls2 = []
        MajDiams2 = []
        azimuths2 = []
        for w, r, i, b, l, maj, az in zip(Lya_Ws, R_virs, impacts, bs, ls,
                                          MajDiams, azimuths):
            if float(w) <= maxEW:
                Lya_Ws2.append(w)
                R_virs2.append(r)
                impacts2.append(i)
                bs2.append(b)
                ls2.append(l)
                MajDiams2.append(maj)
                azimuths2.append(az)

        isolated_xs = np.array(azimuths2)
        isolated_ys = np.array(bs2)

        # grab the associated data and define the x and y data
        associated_Lya_Ws = L_associated['Lya_Ws']
        associated_R_virs = L_associated['R_virs']
        associated_impacts = L_associated['impacts']
        associated_bs = L_associated['bs']
        associated_ls = L_associated['ls']
        associated_MajDiams = L_associated['MajDiams']
        associated_azimuths = L_associated['azimuths']

        associated_Lya_Ws2 = []
        associated_R_virs2 = []
        associated_impacts2 = []
        associated_bs2 = []
        associated_ls2 = []
        associated_MajDiams2 = []
        associated_azimuths2 = []
        for w, r, i, b, l, maj, az in zip(associated_Lya_Ws, associated_R_virs,
                                          associated_impacts, associated_bs,
                                          associated_ls, associated_MajDiams,
                                          associated_azimuths):
            if float(w) <= maxEW:
                associated_Lya_Ws2.append(w)
                associated_R_virs2.append(r)
                associated_impacts2.append(i)
                associated_bs2.append(b)
                associated_ls2.append(l)
                associated_MajDiams2.append(maj)
                associated_azimuths2.append(az)

        associated_xs = np.array(associated_azimuths2)
        associated_ys = np.array(associated_bs2)

        # grab the two data and define the x and y data
        two_Lya_Ws = L_two['Lya_Ws']
        two_R_virs = L_two['R_virs']
        two_impacts = L_two['impacts']
        two_bs = L_two['bs']
        two_ls = L_two['ls']
        two_MajDiams = L_two['MajDiams']
        two_azimuths = L_two['azimuths']

        two_Lya_Ws2 = []
        two_R_virs2 = []
        two_impacts2 = []
        two_bs2 = []
        two_ls2 = []
        two_MajDiams2 = []
        two_azimuths2 = []
        for w, r, i, b, l, maj, az in zip(two_Lya_Ws, two_R_virs, two_impacts,
                                          two_bs, two_ls, two_MajDiams,
                                          two_azimuths):
            if float(w) <= maxEW:
                two_Lya_Ws2.append(w)
                two_R_virs2.append(r)
                two_impacts2.append(i)
                two_bs2.append(b)
                two_ls2.append(l)
                two_MajDiams2.append(maj)
                two_azimuths2.append(az)

        two_xs = np.array(two_azimuths2)
        two_ys = np.array(two_bs2)

        # grab the two_plus data and define the x and y data
        three_Lya_Ws = L_two_plus['Lya_Ws']
        three_R_virs = L_two_plus['R_virs']
        three_impacts = L_two_plus['impacts']
        three_bs = L_two_plus['bs']
        three_ls = L_two_plus['ls']
        three_MajDiams = L_two_plus['MajDiams']
        three_azimuths = L_two_plus['azimuths']

        three_Lya_Ws2 = []
        three_R_virs2 = []
        three_impacts2 = []
        three_bs2 = []
        three_ls2 = []
        three_MajDiams2 = []
        three_azimuths2 = []
        for w, r, i, b, l, maj, az in zip(three_Lya_Ws, three_R_virs,
                                          three_impacts, three_bs, three_ls,
                                          three_MajDiams, three_azimuths):
            if float(w) <= maxEW:
                three_Lya_Ws2.append(w)
                three_R_virs2.append(r)
                three_impacts2.append(i)
                three_bs2.append(b)
                three_ls2.append(l)
                three_MajDiams2.append(maj)
                three_azimuths2.append(az)

        three_xs = np.array(three_azimuths2)
        three_ys = np.array(three_bs2)

        # grab the group data and define the x and y data
        group_Lya_Ws = L_group['Lya_Ws']
        group_R_virs = L_group['R_virs']
        group_impacts = L_group['impacts']
        group_mems = L_group['group_mems']
        group_bs = L_group['bs']
        group_ls = L_group['ls']
        group_MajDiams = L_group['MajDiams']
        group_azimuths = L_group['azimuths']

        group_Lya_Ws2 = []
        group_R_virs2 = []
        group_impacts2 = []
        group_bs2 = []
        group_ls2 = []
        group_MajDiams2 = []
        group_azimuths2 = []
        for w, r, i, group, b, l, maj, az in zip(group_Lya_Ws, group_R_virs,
                                                 group_impacts, group_mems,
                                                 group_bs, group_ls,
                                                 group_MajDiams,
                                                 group_azimuths):
            if float(group) >= 2 and float(w) <= maxEW:
                group_Lya_Ws2.append(w)
                group_R_virs2.append(r)
                group_impacts2.append(i)
                group_bs2.append(b)
                group_ls2.append(l)
                group_MajDiams2.append(maj)
                group_azimuths2.append(az)

        group_xs = np.array(group_azimuths2)
        group_ys = np.array(group_bs2)

        ##########################################################################################
        # do the plotting

        # isolated
        plot1 = scatter(isolated_xs,
                        isolated_ys,
                        marker=symbol_isolated,
                        c=color_isolated,
                        s=markerSize,
                        edgecolor='black',
                        alpha=alpha_isolated,
                        label=label_isolated)

        # histogram isolated
        bin_means, edges, binNumber = stats.binned_statistic(isolated_xs,
                                                             isolated_ys,
                                                             statistic='mean',
                                                             bins=bins)
        left, right = edges[:-1], edges[1:]
        X = array([left, right]).T.flatten()
        Y = array([nan_to_num(bin_means), nan_to_num(bin_means)]).T.flatten()
        plot(X,
             Y,
             ls='solid',
             color=color_isolated,
             lw=2.0,
             alpha=alpha_bins,
             label=r'$\rm Isolated~ Mean ~b$')

        # associated
        plot1 = scatter(associated_xs,
                        associated_ys,
                        marker=symbol_assoc,
                        c=color_assoc,
                        s=markerSize,
                        edgecolor='black',
                        alpha=alpha_assoc,
                        label=label_assoc)

        # histogram associated
        bin_means, edges, binNumber = stats.binned_statistic(associated_xs,
                                                             associated_ys,
                                                             statistic='mean',
                                                             bins=bins)
        left, right = edges[:-1], edges[1:]
        X = array([left, right]).T.flatten()
        Y = array([nan_to_num(bin_means), nan_to_num(bin_means)]).T.flatten()
        plot(X,
             Y,
             ls='solid',
             color=color_assoc,
             lw=2.0,
             alpha=alpha_bins,
             label=r'$\rm Assoc. ~Mean ~b$')

        # two
        plot1 = scatter(two_xs,
                        two_ys,
                        marker=symbol_two,
                        c=color_two,
                        s=markerSize,
                        edgecolor='black',
                        alpha=alpha_two,
                        label=label_two)

        # histogram group
        bin_means, edges, binNumber = stats.binned_statistic(two_xs,
                                                             two_ys,
                                                             statistic='mean',
                                                             bins=bins)
        left, right = edges[:-1], edges[1:]
        X = array([left, right]).T.flatten()
        Y = array([nan_to_num(bin_means), nan_to_num(bin_means)]).T.flatten()
        plot(X,
             Y,
             ls='solid',
             color=color_two,
             lw=2.0,
             alpha=alpha_bins,
             label=r'$\rm Two ~Mean ~b$')

        #         # group
        plot1 = scatter(group_xs,
                        group_ys,
                        marker=symbol_group,
                        c=color_group,
                        s=markerSize,
                        edgecolor='black',
                        alpha=alpha_group,
                        label=label_group)

        # histogram group
        bin_means, edges, binNumber = stats.binned_statistic(group_xs,
                                                             group_ys,
                                                             statistic='mean',
                                                             bins=bins)
        left, right = edges[:-1], edges[1:]
        X = array([left, right]).T.flatten()
        Y = array([nan_to_num(bin_means), nan_to_num(bin_means)]).T.flatten()
        plot(X,
             Y,
             ls='solid',
             color=color_group,
             lw=2.0,
             alpha=alpha_bins,
             label=r'$\rm Group ~Mean ~b$')

        # x-axis
        majorLocator = MultipleLocator(10)
        majorFormatter = FormatStrFormatter(r'$\rm %d$')
        minorLocator = MultipleLocator(5)
        ax.xaxis.set_major_locator(majorLocator)
        ax.xaxis.set_major_formatter(majorFormatter)
        ax.xaxis.set_minor_locator(minorLocator)

        # y-axis
        majorLocator = MultipleLocator(20)
        majorFormatter = FormatStrFormatter(r'$\rm %d$')
        minorLocator = MultipleLocator(10)
        ax.yaxis.set_major_locator(majorLocator)
        ax.yaxis.set_major_formatter(majorFormatter)
        ax.yaxis.set_minor_locator(minorLocator)

        xlabel(r'$\rm Azimuth~[Deg]$')
        ylabel(r'$\rm b~ [km s^{{-1}}]$')
        leg = ax.legend(scatterpoints=1,
                        prop={'size': 12},
                        loc='upper left',
                        fancybox=True)
        #         leg.get_frame().set_alpha(0.5)

        ax.grid(b=None, which='major', axis='both')
        ylim(0, 200)
        #         xlim(0, 2.5)

        if plot_b_az_mean_plus_save:
            savefig('{0}/b(az)_mean_binSize{1}_plus4.pdf'.format(
                saveDirectory, binSize),
                    format='pdf',
                    bbox_inches='tight')
        else:
            show()