Beispiel #1
0
def single_correlation_flags(tf_plane, threshold=5.0, max_iter=5, previous_sums=[], verbose=False):
    flags    = tf_plane.mask
    sum_flags=flags.sum()
    if verbose:
        print('sum(flags): %s' % (sum_flags,))
        print('%5.3f%s flagged\n' % ((sum_flags*100.0/product(tf_plane.shape)),'%'))
    if sum_flags == product(flags.shape):
        return flags
    if max_iter <= 0:
        return ndimage.binary_dilation(flags,iterations=2)
    med       = ma.median(tf_plane.real) +1j*ma.median(tf_plane.imag)
    sigma     = sqrt(ma.std(tf_plane.real)**2 + ma.std(tf_plane.imag)**2)
    bad_vis   = abs(tf_plane.data-med) > threshold*sigma
    new_flags = logical_or(flags, bad_vis)
    new_data  = ma.array(tf_plane.data, mask=new_flags)
    sum_flags = new_flags.sum()
    if verbose:
        print('sum_flags: %s' % (sum_flags,))
        print('%5.3f%s flagged\nstd: %6.4f' % ((sum_flags*100.0/product(tf_plane.shape)),'%', ma.std(new_data)))
        print(sum_flags)
        print(previous_sums)
        print('------------------------------------------------------------')
    if sum_flags == reduce(max, previous_sums, 0):
        return single_correlation_flags(new_data,
                                        threshold = threshold,
                                        max_iter  = 0,
                                        previous_sums = previous_sums+[sum_flags])
    else:
        return single_correlation_flags(new_data, threshold=threshold, max_iter=max_iter-1, previous_sums=previous_sums+[sum_flags])
Beispiel #2
0
    def _calc_correlation(self, values_1, values_2, conf_level=0.95):
        """ Calculates Pearson's correlation coeffcient.
        Arguments:
            values_1 -- first data
            values_2 -- second data
            conf_level -- confidence level
        Returns:
            (corr_coeff, significance) -- correlation coefficient and significance arrays
        """
        n_samples = values_1.shape[0]  # Sample length
        # Calculate Pearson's correlatiob coefficient
        values_cov = ma.sum((values_1 - ma.mean(values_1, axis=0)) *
                            (values_2 - ma.mean(values_2, axis=0)),
                            axis=0)
        corr_coef = values_cov / (ma.std(values_1, axis=0) *
                                  ma.std(values_2, axis=0)) / n_samples

        # Calculate significance using t-distribution with n-2 degrees of freedom.
        deg_fr = n_samples - 2  # Degrees of freedom.
        t_distr = ma.abs(
            corr_coef *
            ma.sqrt(deg_fr / (1. - corr_coef**2)))  # Student's t-distribution.
        prob = 0.5 + conf_level / 2  # Probability for two tails.
        cr_value = student_t.ppf(prob, deg_fr)  # Student's Critical value.
        significance = ma.greater(t_distr, cr_value)

        return corr_coef, significance
def destroy_with_variance_2pol(Data, sigma_thres=6, bad_freq_list=[], submean=True):
    '''Mask frequencies with high variance.
    This is the same as last function, but for Parkes 2 pol data.

    '''
    # Get the normalized variance array for each polarization.
    #Data.data[Data.data>3] = ma.masked
    #Data.data[Data.data<3] = ma.masked
    Data.data[np.isnan(Data.data)] = ma.masked
    Data.data[Data.data <= 0.] = ma.masked
    if submean:
        a = ma.var(Data.data[:,0,0,:],0)/(ma.mean(Data.data[:,0,0,:],0)**2)#XX
        b = ma.var(Data.data[:,1,0,:],0)/(ma.mean(Data.data[:,1,0,:],0)**2)#YY
    else:
        a = ma.var(Data.data[:,0,0,:],0)
        b = ma.var(Data.data[:,1,0,:],0)
    # Get the mean and standard deviation [sigma].
    means = sp.array([ma.mean(a), ma.mean(b)]) 
    sig   = sp.array([ma.std(a), ma.std(b)])
    # Get the max accepted value [sigma_thres*sigma, sigma_thres=6 works really well].
    max_sig = sigma_thres*sig
    max_accepted = means + max_sig
    min_accepted = means - max_sig
    amount_masked = 0
    for freq in range(0, len(a)):
        if ((a[freq] > max_accepted[0]) or (b[freq] > max_accepted[1]) or
            (a[freq] < min_accepted[0]) or (b[freq] < min_accepted[1])):
            # mask
            amount_masked += 1
            bad_freq_list.append(freq)
            Data.data[:,:,:,freq].mask = True
    return amount_masked
def destroy_time_with_mean_arrays_2pol(Data, flag_size=40):
    '''Mask times with high means.
    This is the same as last function, but for Parkes 2 pol data.
    
    '''
    # Get the means over all frequencies. (for all pols. and cals.)
    a = ma.mean(Data.data[:, 0, 0, :], -1)
    b = ma.mean(Data.data[:, 1, 0, :], -1)
    # Get means and std for all arrays.
    means = sp.array([ma.mean(a), ma.mean(b)])
    sig = sp.array([ma.std(a), ma.std(b)])
    # Get max accepted values.
    max_accepted = means + 3*sig
    # Get min accepted values.
    min_accepted = means - 3*sig
    # Find bad times.
    bad_times = []
    for time in range(0,len(a)):
        if ((a[time] > max_accepted[0]) or (b[time] > max_accepted[1]) or
            (a[time] < min_accepted[0]) or (b[time] < min_accepted[1])):
            bad_times.append(time)
    # Mask bad times and those +- flag_size around.
    for time in bad_times:
        if time-flag_size < 0:
            Data.data[0:(time+flag_size),:,:,:].mask = True
        else:
            Data.data[(time-flag_size):(time+flag_size),:,:,:].mask = True
    return
Beispiel #5
0
def get_depths(noise_maps, pix_size, mask=None, pixel_weights=None):
    """Compute depth_i and depth_p (sensitivities) from noise maps.

    :param noise_maps: the noise maps
    :param pix_size: the pixel size in arcmin
    :param mask: the mask to apply
    :param pixel_weights: weighting of pixels (coverage)

    :return: depth_i and depth_p of the map
    """
    # apply pixel weights
    weighted_maps = np.empty_like(noise_maps)
    weighted_maps[...] = noise_maps[...] * pixel_weights

    # apply mask
    noise_ma = ma.array(weighted_maps, mask=mask)

    # noise estimation (in I component) using the noise maps
    depth_i = ma.getdata(ma.std(noise_ma[:, 0, :], axis=1))
    depth_i *= pix_size

    # noise estimation (in Q & U components)
    depth_p = ma.getdata(ma.std(noise_ma[:, 1:, :], axis=(1, 2)))
    depth_p *= pix_size

    return depth_i, depth_p
Beispiel #6
0
def statistics(numpy_array):
    return {'mean'   : ma.mean(numpy_array),
            'median' : ma.median(numpy_array.real)+1j*ma.median(numpy_array.imag),
            'max'    : ma.max(abs(array)),
            'min'    : ma.min(abs(array)),
            'std'    : ma.std(array),
            'stdmean': ma.std(numpy_array)/sqrt(sum(logical_not(numpy_array.mask))-1)}
Beispiel #7
0
def plot_all_correlations(data_col, plot_flags=True,amax_factor=1.0):
    flags = bad_data(data_col, threshold=4.0, max_iter=20)
    flagged_data = ma.array(data_col.data, mask=flags)
    xx,xy,yx,yy,num_pol = split_data_col(ma.array(flagged_data))
    
    scale=ma.max(abs(flagged_data))
    stddev = max(ma.std(flagged_data.real), ma.std(flagged_data.imag))
    if flags.sum() == product(flags.shape):
        amax=1.0
    else:
        amax=(scale-stddev)*amax_factor
    

    print('scale: %f\nsigma: %f' % (scale, stddev))
    good=logical_not(xx.mask)
    if not plot_flags:
        good = None
    clf()
    if num_pol is 2:
        subplot(121)
        plot_complex_image('XX',xx, good, amin=0.0, amax=amax)
        subplot(122)
        plot_complex_image('YY',yy, good, amin=0.0, amax=amax)
    elif num_pol is 4:
        subplot(141)
        plot_complex_image('XX',xx, good, amin=0.0, amax=amax)
        subplot(142)
        plot_complex_image('XY',xy, good, amin=0.0, amax=amax)
        subplot(143)
        plot_complex_image('YX',yx, good, amin=0.0, amax=amax)
        subplot(144)
        plot_complex_image('YY',yy, good, amin=0.0, amax=amax)
        pass
    pass
def Portrait_diagram_subregion(obs_subregion_mean,
                               obs_name,
                               model_subregion_mean,
                               model_names,
                               seasonal_cycle,
                               file_name,
                               normalize=True):

    nmodel, nt, nregion = model_subregion_mean.shape

    if seasonal_cycle:
        obs_data = ma.mean(obs_subregion_mean.reshape(
            [1, nt / 12, 12, nregion]),
                           axis=1)
        model_data = ma.mean(model_subregion_mean.reshape(
            [nmodel, nt / 12, 12, nregion]),
                             axis=1)
        nt = 12
    else:
        obs_data = obs_subregion_mean
        model_data = model_subregion_mean

    subregion_metrics = ma.zeros([4, nregion, nmodel])

    for imodel in np.arange(nmodel):
        for iregion in np.arange(nregion):
            # First metric: bias
            subregion_metrics[0, iregion, imodel] = metrics.calc_bias(
                model_data[imodel, :, iregion],
                obs_data[0, :, iregion],
                average_over_time=True)
            # Second metric: standard deviation
            subregion_metrics[1, iregion, imodel] = metrics.calc_stddev_ratio(
                model_data[imodel, :, iregion], obs_data[0, :, iregion])
            # Third metric: RMSE
            subregion_metrics[2, iregion, imodel] = metrics.calc_rmse(
                model_data[imodel, :, iregion], obs_data[0, :, iregion])
            # Fourth metric: correlation
            subregion_metrics[3, iregion, imodel] = metrics.calc_correlation(
                model_data[imodel, :, iregion], obs_data[0, :, iregion])

    if normalize:
        for iregion in np.arange(nregion):
            subregion_metrics[0, iregion, :] = subregion_metrics[
                0, iregion, :] / ma.std(obs_data[0, :, iregion]) * 100.
            subregion_metrics[
                1, iregion, :] = subregion_metrics[1, iregion, :] * 100.
            subregion_metrics[2, iregion, :] = subregion_metrics[
                2, iregion, :] / ma.std(obs_data[0, :, iregion]) * 100.

    region_names = ['R%02d' % i for i in np.arange(nregion) + 1]

    for imetric, metric in enumerate(['bias', 'std', 'RMSE', 'corr']):
        plotter.draw_portrait_diagram(subregion_metrics[imetric, :, :],
                                      region_names,
                                      model_names,
                                      file_name + '_' + metric,
                                      xlabel='model',
                                      ylabel='region')
Beispiel #9
0
def Portrait_diagram_subregion(obs_subregion_mean,
                               obs_name,
                               model_subregion_mean,
                               model_names,
                               seasonal_cycle,
                               file_name,
                               normalize=True):

    nmodel, nt, nregion = model_subregion_mean.shape

    if seasonal_cycle:
        obs_data = ma.mean(
            obs_subregion_mean.reshape([1, nt / 12, 12, nregion]), axis=1)
        model_data = ma.mean(
            model_subregion_mean.reshape([nmodel, nt / 12, 12, nregion]),
            axis=1)
        nt = 12
    else:
        obs_data = obs_subregion_mean
        model_data = model_subregion_mean

    subregion_metrics = ma.zeros([4, nregion, nmodel])

    for imodel in np.arange(nmodel):
        for iregion in np.arange(nregion):
            # First metric: bias
            subregion_metrics[0, iregion, imodel] = metrics.calc_bias(
                model_data[imodel, :, iregion],
                obs_data[0, :, iregion],
                average_over_time=True)
            # Second metric: standard deviation
            subregion_metrics[1, iregion, imodel] = metrics.calc_stddev_ratio(
                model_data[imodel, :, iregion], obs_data[0, :, iregion])
            # Third metric: RMSE
            subregion_metrics[2, iregion, imodel] = metrics.calc_rmse(
                model_data[imodel, :, iregion], obs_data[0, :, iregion])
            # Fourth metric: correlation
            subregion_metrics[3, iregion, imodel] = metrics.calc_correlation(
                model_data[imodel, :, iregion], obs_data[0, :, iregion])

    if normalize:
        for iregion in np.arange(nregion):
            subregion_metrics[0, iregion, :] = subregion_metrics[
                0, iregion, :] / ma.std(obs_data[0, :, iregion]) * 100.
            subregion_metrics[
                1, iregion, :] = subregion_metrics[1, iregion, :] * 100.
            subregion_metrics[2, iregion, :] = subregion_metrics[
                2, iregion, :] / ma.std(obs_data[0, :, iregion]) * 100.

    region_names = ['R%02d' % i for i in np.arange(nregion) + 1]

    for imetric, metric in enumerate(['bias', 'std', 'RMSE', 'corr']):
        plotter.draw_portrait_diagram(
            subregion_metrics[imetric, :, :],
            region_names,
            model_names,
            file_name + '_' + metric,
            xlabel='model',
            ylabel='region')
def snr_func(data):
	data /= np.max(np.abs(data), axis=0) 
	year_stack=ma.array(np.split(data, 10, axis=0))
	stdev_all_data = ma.std(data, axis=0)
	signal_array = ma.mean(year_stack, axis=0)
	stdev_seasonal = ma.std(signal_array, axis=0)
	stdev_non_seasonal = stdev_all_data - stdev_seasonal
	return stdev_seasonal/stdev_non_seasonal
Beispiel #11
0
 def scale(self):
     if self.gts.ndim == 2:
         self.gts = self.gts / ma.std(self.gts, axis=0)
     elif self.gts.ndim == 3:
         for i in range(0, self.gts.shape[1]):
             self.gts[:,
                      i, :] = self.gts[:, i, :] / ma.std(self.gts[:, i, :],
                                                         axis=0)
Beispiel #12
0
 def scale(self):
     """
     This normalises the SNPs/PGS columns to have variance 1.
     """
     if self.gts.ndim == 2:
         self.gts = self.gts/ma.std(self.gts, axis=0)
     elif self.gts.ndim == 3:
         for i in range(0, self.gts.shape[1]):
             self.gts[:, i, :] = self.gts[:, i, :]/ma.std(self.gts[:, i, :], axis=0)
def plot_stdevs(data, name):
	data /= np.max(np.abs(data), axis=0) 
	year_stack=ma.array(np.split(data, 10, axis=0))
	
	vmin, vmax = 0, 0.5
	#~ plt.figure(figsize=(10, 10), dpi=50)
	curr_map = Basemap(projection='cyl', llcrnrlon=ll_lon, llcrnrlat=ll_lat, urcrnrlon=ur_lon, urcrnrlat=ur_lat, resolution='i', area_thresh=100.)
	x, y = curr_map(lon, lat)
	
	plt.subplot(411)
	stdev_all_data = ma.std(data, axis=0)
	im = curr_map.pcolormesh(x, y, stdev_all_data , vmin=vmin, vmax=vmax, cmap=cmap)
	plt.axis('tight')
	plt.colorbar()
	curr_map.drawcoastlines()
	curr_map.fillcontinents(color='grey',lake_color='aqua')	
	#~ plt.title('stdev_all_data'+ longname)
	
	plt.subplot(412)
	annual_means = ma.mean(year_stack, axis = 1)
	stdev_annual_means = ma.std(annual_means, axis=0)
	im = curr_map.pcolormesh(x, y, stdev_annual_means , vmin=vmin, vmax=vmax, cmap=cmap)
	plt.axis('tight')
	plt.colorbar()
	curr_map.drawcoastlines()
	curr_map.fillcontinents(color='grey',lake_color='aqua')	
	#~ plt.title('Standard Deviation of the Annual Averages'+ longname)
	
	plt.subplot(413)
	signal_array = ma.mean(year_stack, axis=0)
	stdev_seasonal = ma.std(signal_array, axis=0)
	im = curr_map.pcolormesh(x, y, stdev_seasonal , vmin=vmin, vmax=vmax, cmap=cmap)
	plt.axis('tight')
	plt.colorbar()
	curr_map.drawcoastlines()
	curr_map.fillcontinents(color='grey',lake_color='aqua')	
	#~ plt.title('stdev_seasonal'+ longname)
	
	plt.subplot(414)
	stdev_all_data = ma.std(data, axis=0)
	signal_array = ma.mean(year_stack, axis=0)
	stdev_seasonal = ma.std(signal_array, axis=0)
	stdev_non_seasonal = stdev_all_data - stdev_seasonal
	#~ stdev_non_seasonal = ma.stdev(noise_array, axis=0)
	im = curr_map.pcolormesh(x, y, stdev_non_seasonal, vmin=vmin, vmax=vmax, cmap=cmap)
	plt.axis('tight')
	plt.colorbar()
	curr_map.drawcoastlines()
	curr_map.fillcontinents(color='grey',lake_color='aqua')	
	#~ plt.title('stdev_non_seasonal' + longname)
		
	plt.savefig('/home/nicholas/masters/figures/newplots/standard_deviations_' + name+ '.png')
	plt.close('all')
Beispiel #14
0
def flagging(data,freq,sigma_thres,linscale):
    """
    Flags data for RFI.
    Designed for a single time step scan.
    Uses a sigma threshold to flag out anything with
    RFI over a certain threshold.
    Expects data to be linear for spline (s=1e-10). want to try something else.
    seems like using db data getting reasonable results for s = 1e4
    
    Also flags out NaNs, infs.

    Output is flagging mask for input data array.
    """
#    data = 10.**(data/10.)
    mask = zeros(len(data))
    nanmask = array(where(isnan(data))[0])
    mask[nanmask] = 1.0
    infmask = array(where(isinf(data))[0])
    mask[infmask] = 1.0
    scale = linscale
    for f in range(0, len(data)/scale-1):
 #       smooth = itp.UnivariateSpline(freq[f*scale:(f+1)*scale],data[f*scale:(f+1)*scale])
	(Fa,Fb) = polyfit(freq[f*scale:(f+1)*scale],data[f*scale:(f+1)*scale],1)
# 	smooth = itp.interp1d(freq[f*scale:(f+1)*scale],data[f*scale:(f+1)*scale],'linear')
        flat_data = data[f*scale:(f+1)*scale]/polyval([Fa,Fb],freq[f*scale:(f+1)*scale])
        flat_sigma = ma.std(flat_data)
        flat_mean = ma.mean(flat_data)
        max_accept = 1.0+flat_sigma*sigma_thres
        min_accept = 1.0-flat_sigma*sigma_thres
        maxmask = array(where(flat_data>max_accept)[0])
        minmask = array(where(flat_data<min_accept)[0])
        maxmask = maxmask+f*scale
        minmask = minmask+f*scale
        mask[maxmask] = 1.0
        mask[minmask] = 1.0
        
#    smooth = itp.UnivariateSpline(freq[(f+1)*scale:-1],data[(f+1)*scale:-1])
#    smooth = itp.interp1d(freq[(f+1)*scale:-1],data[(f+1)*scale:-1],'linear')
    (Fa,Fb) = polyfit(freq[(f+1)*scale:-1],data[(f+1)*scale:-1],1)
    flat_data = data[(f+1)*scale:-1]/polyval([Fa,Fb],freq[(f+1)*scale:-1])
#    flat_data = data[(f+1)*scale:-1]/smooth(freq[(f+1)*scale:-1])
    flat_sigma = ma.std(flat_data)
    flat_mean = ma.mean(flat_data)
    max_accept = 1.0+flat_sigma*sigma_thres
    min_accept = 1.0-flat_sigma*sigma_thres
    maxmask = array(where(flat_data>max_accept)[0])
    minmask = array(where(flat_data<min_accept)[0])
    maxmask = maxmask+(f+1)*scale
    minmask = minmask+(f+1)*scale
    mask[maxmask] = 1.0
    mask[minmask] = 1.0
    
    return mask
Beispiel #15
0
def flagging(data,freq,sigma_thres,linscale):
    """
    Flags data for RFI.
    Designed for a single time step scan.
    Uses a sigma threshold to flag out anything with
    RFI over a certain threshold.
   
    Also flags out NaNs, infs.
    Inputs are:
    data - linear input
    freq - can be any units
    sigma_thres - cutoff for bad data
    linscale - size of flattened window

    Output is flagging mask for input data array.
    """

    mask = np.zeros(len(data))
    nanmask = np.where(np.isnan(data))[0]
    mask[nanmask] = 1.0
    infmask = np.where(np.isinf(data))[0]
    mask[infmask] = 1.0
    scale = linscale
    for f in range(0, len(data)/scale-1):
        (Fa,Fb) = np.polyfit(freq[f*scale:(f+1)*scale],data[f*scale:(f+1)*scale],1)
        flat_data = data[f*scale:(f+1)*scale]/np.polyval([Fa,Fb],freq[f*scale:(f+1)*scale])
        flat_sigma = ma.std(flat_data)
        flat_mean = ma.mean(flat_data)
        max_accept = 1.0+flat_sigma*sigma_thres
        min_accept = 1.0-flat_sigma*sigma_thres
        maxmask = ma.array(np.where(flat_data>max_accept)[0])
        minmask = ma.array(np.where(flat_data<min_accept)[0])
        maxmask = maxmask+f*scale
        minmask = minmask+f*scale
        mask[maxmask] = 1.0
        mask[minmask] = 1.0
        
    (Fa,Fb) = np.polyfit(freq[(f+1)*scale:-1],data[(f+1)*scale:-1],1)
    flat_data = data[(f+1)*scale:-1]/np.polyval([Fa,Fb],freq[(f+1)*scale:-1])
    flat_sigma = ma.std(flat_data)
    flat_mean = ma.mean(flat_data)
    max_accept = 1.0+flat_sigma*sigma_thres
    min_accept = 1.0-flat_sigma*sigma_thres
    maxmask = ma.array(np.where(flat_data>max_accept)[0])
    minmask = ma.array(np.where(flat_data<min_accept)[0])
    maxmask = maxmask+(f+1)*scale
    minmask = minmask+(f+1)*scale
    mask[maxmask] = 1.0
    mask[minmask] = 1.0
    
    return mask
Beispiel #16
0
def destroy_time_with_mean_arrays(Data, flag_size=40):
    '''Mask times with high means.
    
    If there is a problem in time, the mean over all frequencies
    will stand out greatly [>10 sigma has been seen]. Flag these bad
    times and +- `flag_size` times around it. Will only be called if `Data`
    has 'badness'.

    Parameters
    ----------
    Data : DataBlock
        Contains information in a usable format direct from GBT. Bad
        times will be flagged in all polarizations and cal states.
    time_cut : int
        How many frequency bins (as an absolute number) to flag in time.
    '''
    # Get the means over all frequencies. (for all pols. and cals.)
    a = ma.mean(Data.data[:, 0, 0, :], -1)
    b = ma.mean(Data.data[:, 1, 0, :], -1)
    c = ma.mean(Data.data[:, 2, 0, :], -1)
    d = ma.mean(Data.data[:, 3, 0, :], -1)
    e = ma.mean(Data.data[:, 0, 1, :], -1)
    f = ma.mean(Data.data[:, 1, 1, :], -1)
    g = ma.mean(Data.data[:, 2, 1, :], -1)
    h = ma.mean(Data.data[:, 3, 1, :], -1)
    # Get means and std for all arrays.
    means = sp.array([
        ma.mean(a),
        ma.mean(b),
        ma.mean(c),
        ma.mean(d),
        ma.mean(e),
        ma.mean(f),
        ma.mean(g),
        ma.mean(h)
    ])
    sig = sp.array([
        ma.std(a),
        ma.std(b),
        ma.std(c),
        ma.std(d),
        ma.std(e),
        ma.std(f),
        ma.std(g),
        ma.std(h)
    ])
    # Get max accepted values.
    max_accepted = means + 3 * sig
    # Find bad times.
    bad_times = []
    for time in range(0, len(a)):
        if ((a[time] > max_accepted[0]) or (b[time] > max_accepted[1])
                or (c[time] > max_accepted[2]) or (d[time] > max_accepted[3])
                or (e[time] > max_accepted[4]) or (f[time] > max_accepted[5])
                or (g[time] > max_accepted[6]) or (h[time] > max_accepted[7])):
            bad_times.append(time)
    # Mask bad times and those +- flag_size around.
    for time in bad_times:
        Data.data[(time - flag_size):(time + flag_size), :, :, :].mask = True
    return
def get_noise_levels(ncfile):

    # ----------------
    # Open NetCDF file
    # ----------------
    print('Opening NetCDF file ' + ncfile)
    dataset = nc4.Dataset(ncfile,'r+',format='NETCDF3_CLASSIC')

    nray    = len(dataset.dimensions['time']);
    ngate   = len(dataset.dimensions['range']);

    elv = np.transpose(np.tile(dataset.variables['elevation'][:],(ngate,1)));
    rng = np.tile(dataset.variables['range'][:],(nray,1))

    height = rng*np.sin(elv*np.pi/180.)

    zh = dataset.variables['ZED_H'][:];
    zed = ma.masked_where(height<14000, zh);

    rngkm = ma.masked_where(rng<=0.0, rng/1000.);

    range2 = 20.*ma.log10(rngkm);

    zh[:] = zed - range2;
    zv = zh.copy();
    zv[:] = zh[:] - dataset.variables['ZDR'][:]

    zx = zh.copy();
    zx[:] = zh[:] + dataset.variables['LDR'][:]

    nezharr = ma.mean(zh,axis=1)
    nezherr = ma.std(zh,axis=1)
    nezvarr = ma.mean(zv,axis=1)
    nezverr = ma.std(zv,axis=1)
    nezxarr = ma.mean(zx,axis=1)
    nezxerr = ma.std(zx,axis=1)

    nezharr = ma.masked_where(nezherr>MAX_ERR,nezharr)
    nezvarr = ma.masked_where(nezverr>MAX_ERR,nezvarr)
    nezxarr = ma.masked_where(nezxerr>MAX_ERR,nezxarr)

    nezh = ma.median(nezharr)
    nezv = ma.median(nezvarr)
    nezx = ma.median(nezxarr)

    dataset.close()


    return np.round(nezh,2), np.round(nezv,2), np.round(nezx,2)
Beispiel #18
0
    def average_combine(self):
        """Average combine together a set of arrays.   A CCDData object is
           returned with the data property set to the average of the arrays.
           If the data was masked or any data have been rejected, those pixels
           will not be included in the median.   A mask will be returned, and
           if a pixel has been rejected in all images, it will be masked.   The
           uncertainty of the combined image is set by the standard deviation
           of the input images.

           Returns
           -------
           combined_image: CCDData object
               CCDData object based on the combined input of CCDData objects.

        """
        #set up the data
        data, wei = ma.average(self.data_arr, axis=0, weights=self.weights,
                               returned=True)

        #set up the mask
        mask = self.data_arr.mask.sum(axis=0)
        mask = (mask == len(self.data_arr))

        #set up the variance
        uncertainty = ma.std(self.data_arr, axis=0)

        #create the combined image
        combined_image = CCDData(data.data, mask=mask, unit=self.unit,
                                 uncertainty=StdDevUncertainty(uncertainty))

        #update the meta data
        combined_image.meta['NCOMBINE'] = len(self.data_arr)

        #return the combined image
        return combined_image
def test_baseline_use_all_features_with_signified_random(data, conf):
    conf['feature_selection']['must_be_in_thesaurus'] = False
    conf['vectorizer']['decode_token_handler'] = \
        'eval.pipeline.feature_handlers.SignifiedOnlyFeatureHandler'
    conf['vectorizer']['k'] = 1

    x1, x2, voc = _vectorize_data(data, conf, dummy=True)

    assert full_vocab == strip(voc)

    assert isinstance(x1, sp.spmatrix)
    t.assert_array_equal(
        x1.toarray(),
        training_matrix
    )

    t.assert_array_almost_equal(
        x2.toarray(),
        np.array(
            [
                [0, 11.0, 0, 0, 0, 0],
            ]
        )
    )
    # the thesaurus will always say the neighbour for something is
    # b/N with a similarity of 1, and we look up 11 tokens overall in
    # the test document
    x1, x2, voc = _vectorize_data(data, conf, dummy=True)
    assert x2.sum(), 11.0
    assert std(x2.todense()) > 0
Beispiel #20
0
def is_hit(history_data, test_data, year):
    """

    :param history_data: probability for default given rating for a past years
    :type history_data: np.array
    :param test_data: default for given rating in given year
    :type test_data: float
    :return:
    """
    m = mean(history_data)
    st_dev = std(history_data)
    floar = get_upper(history_data)
    min_defaults = max(0.0, m - 1.96 * st_dev)
    max_defaults = min(100.0, m + 1.96 * st_dev)

    if year in [2007, 2008, 2011, 2014]:
        in_interval = "\\in"
        color = "green"
        if not min_defaults <= test_data <= max_defaults:
            in_interval = "\\not " + in_interval
            color = "red"
        min_defaults = "%.1f" % min_defaults
        max_defaults = "%.1f" % max_defaults
        end = "& " if year != 2014 else "\\\\"
        print("$\\textcolor{{{5}}}{{ {3} {4} [{1}, {2}] }}$".format(year, min_defaults, max_defaults, test_data, in_interval, color), end=end)
    if abs(test_data - m) <= 1.96 * st_dev:
        return 0
    return 1
Beispiel #21
0
def pca(data, nPCs=-1):
    domain = None

    suma = data.sum(axis=0) / float(len(data))
    data -= suma  # substract average value to get zero mean
    data /= MA.std(data, axis=0)
    covMatrix = MA.dot(data.T, data) / len(data)

    eigVals, eigVectors = linalg.eigh(covMatrix)
    eigVals = list(eigVals)

    if nPCs == -1:
        nPCs = len(eigVals)
    nPCs = min(nPCs, len(eigVals))

    pairs = [(val, i) for i, val in enumerate(eigVals)]
    pairs.sort()
    pairs.reverse()
    indices = [pair[1] for pair in pairs[:nPCs]
               ]  # take indices of the wanted number of principal components

    vectors = MA.take(eigVectors, indices, axis=1)
    values = [eigVals[i] for i in indices]
    projectedData = MA.dot(data, vectors)

    return projectedData, vectors, values
Beispiel #22
0
def pca(data, nPCs = -1):
    domain = None
    
    suma = data.sum(axis=0)/float(len(data))
    data -= suma       # substract average value to get zero mean
    data /= MA.std(data, axis=0)
    covMatrix = MA.dot(data.T, data) / len(data)

    eigVals, eigVectors = linalg.eigh(covMatrix)
    eigVals = list(eigVals)
    
    if nPCs == -1:
        nPCs = len(eigVals)
    nPCs = min(nPCs, len(eigVals))
    
    pairs = [(val, i) for i, val in enumerate(eigVals)]
    pairs.sort()
    pairs.reverse()
    indices = [pair[1] for pair in pairs[:nPCs]]  # take indices of the wanted number of principal components

    vectors = MA.take(eigVectors, indices, axis = 1)
    values = [eigVals[i] for i in indices]
    projectedData = MA.dot(data, vectors)
    
    return projectedData, vectors, values
Beispiel #23
0
def update_background(fn):
    with fits.open(fn, mode='update') as hdu:
        im = hdu[0].data.copy()
        mask = ~np.isfinite(im) + (im < DATA_FLOOR)
        if 'MASK' in hdu:
            mask += hdu['MASK'].data > 0
        im = ma.MaskedArray(im, mask=mask, copy=True)

        scim = sigma_clip(im)

        mean = ma.mean(scim)
        mean = mean if mean is not ma.masked else 0

        median = ma.median(scim)
        median = median if median is not ma.masked else 0

        stdev = ma.std(scim)
        stdev = stdev if stdev is not ma.masked else 0

        hdu['SCI'].header['bgmean'] = (mean, 'background sigma-clipped mean')
        hdu['SCI'].header['bgmedian'] = (median,
                                         'background sigma-clipped median')
        hdu['SCI'].header['bgstdev'] = (
            stdev, 'background sigma-clipped standard dev.')
        hdu['SCI'].header['nbg'] = (ma.sum(~scim.mask),
                                    'area considered in background stats.')
Beispiel #24
0
def calc_subregion_area_mean_and_std(dataset_array, subregions):
    ''' Calculate area mean and standard deviation values for a given subregions using datasets on common grid points
    :param dataset_array: An array of OCW Dataset Objects
    :type list:  
    :param subregions: list of subregions
    :type subregions: :class:`numpy.ma.array`
    :returns: area averaged time series for the dataset of shape (ntime, nsubregion)
    '''

    ndata = len(dataset_array)
    dataset0 = dataset_array[0]
    if dataset0.lons.ndim == 1:
       lons, lats = np.meshgrid(dataset0.lons, dataset0.lats)
    else:
       lons = dataset0.lons
       lats = dataset0.lats
    subregion_array = np.zeros(lons.shape)
    mask_array = dataset_array[0].values[0,:].mask
    # dataset0.values.shsape[0]: length of the time dimension
    # spatial average
    t_series =ma.zeros([ndata, dataset0.values.shape[0], len(subregions)])
    # spatial standard deviation
    spatial_std =ma.zeros([ndata, dataset0.values.shape[0], len(subregions)])

    for iregion, subregion in enumerate(subregions):
        lat_min, lat_max, lon_min, lon_max = subregion[1]
        y_index,x_index = np.where((lats >= lat_min) & (lats <= lat_max) & (lons >= lon_min) & (lons <= lon_max))
        subregion_array[y_index,x_index] = iregion+1
        for idata in np.arange(ndata):
            t_series[idata, :, iregion] = ma.mean(dataset_array[idata].values[:,y_index, x_index], axis=1)
            spatial_std[idata, :, iregion] = ma.std(dataset_array[idata].values[:,y_index, x_index], axis=1)
    subregion_array = ma.array(subregion_array, mask=mask_array) 
    return t_series, spatial_std, subregion_array
def makeFluxSigMask(flux=None, minThresh=2, maxThresh=5):
    """
      Compute the mean total integrated flux value
      and its standard deviation.

      Find all pixels with a flux in between min/max thresholds and mask them.

      Parameters
      ----------
      fluxImage: array_like
        The 2D array of the total integrated fluxes.
      min/maxThresh: int
        Sigma limit thresholds for the min/max.

      Return
      ------
      Boolean mask.
    """

    sigma = ma.std(flux)
    ave = ma.mean(flux)

    if sigma > ave:
        intervalMin = ave
    else:
        intervalMin = ave - (minThresh * sigma)

    intervalMax = ave + (maxThresh * sigma)

    maskedOutside = ma.masked_outside(flux, intervalMin, intervalMax)
    maskedZeros = ma.masked_where(maskedOutside == 0,
                                  maskedOutside,
                                  copy=False)

    return ma.getmask(maskedZeros)
Beispiel #26
0
 def _check(self, data):
     array = biggus.NumpyArrayAdapter(data)
     result = std(array, axis=0, ddof=0).masked_array()
     expected = ma.std(data, axis=0, ddof=0)
     if expected.ndim == 0:
         expected = ma.asarray(expected)
     np.testing.assert_array_equal(result.filled(), expected.filled())
     np.testing.assert_array_equal(result.mask, expected.mask)
Beispiel #27
0
def calc_stddev(array, axis=None):
    """ Calculate a sample standard deviation of an array along the array

    :param array: an array to calculate sample standard deviation
    :type array: :class:'numpy.ma.core.MaskedArray'
    
    :param axis: Axis along which the sample standard deviation is computed.
    :type axis: 'int'

    :returns: sample standard deviation of array
    :rtype: :class:'numpy.ma.core.MaskedArray'
    """

    if isinstance(axis, int):
        return ma.std(array, axis=axis, ddof=1)
    else:
        return ma.std(array, ddof=1)
 def calc_chrom_fast(self, index, coords_vals):
     self.population[index]['fitness'] = \
     np.abs(self.array_mean - ma.mean(coords_vals[0])) + \
     np.abs(self.array_stdev - ma.std(coords_vals[0])) + \
     np.abs(self.array_range - (ma.max(coords_vals[0])-ma.min(coords_vals[0])))/10  + \
     np.abs((self.chromosome_size-1) - coords_vals[2]) #locations
     #~ print "Chromosome size: ",self.chromosome_size
     print "Number of locations is: ", coords_vals[2]
Beispiel #29
0
def calc_stddev(array, axis=None):
    ''' Calculate a sample standard deviation of an array along the array

    :param array: an array to calculate sample standard deviation
    :type array: :class:'numpy.ma.core.MaskedArray'

    :param axis: Axis along which the sample standard deviation is computed.
    :type axis: 'int'

    :returns: sample standard deviation of array
    :rtype: :class:'numpy.ma.core.MaskedArray'
    '''

    if isinstance(axis, int):
        return ma.std(array, axis=axis, ddof=1)
    else:
        return ma.std(array, ddof=1)
    def std_(self):
        """
        calculates the standard deviation of the image over the binarised
        segmentation

        :return:
        """
        return ma.std(self.masked_img, 0)
Beispiel #31
0
 def _sky(data, ellipsefit, diameter=2.0):
     """Estimate the sky brightness in each band."""
     #area = diameter**2 # arcsec^2
     for filt in band:
         img = data['{}_masked'.format(filt)]
         #ellipsefit['{}_sky'.format(filt)] = 22.5 - 2.5 * np.log10( ma.std(img) )
         #ellipsefit['mu_{}_sky'.format(filt)] = ellipsefit['{}_sky'.format(filt)] # + 2.5 * np.log10(area)
         ellipsefit['mu_{}_sky'.format(filt)] = 22.5 - 2.5 * np.log10( ma.std(img) )
Beispiel #32
0
def search_noise(data, low_deviation, high_deviation, max_diff):
    global logger
    high_info   = list()
    low_info    = list()
    jitter_info = list()
    
    spec_median  = ma.median(data, axis=2)
    spec_max     = spec_median.max(axis=1)
    spec_min     = spec_median.min(axis=1)
    ref_value    = ma.median(data)
    ref_diff     = ma.median(spec_max) - ma.median(spec_min)
    ref_std      = ma.std(spec_median)
    
    limit = ref_value + min(max((ref_std * 3.0),0.75), high_deviation)
    
    n_secs = data.shape[1]
    logger.debug("median-signal=%5.3fdB, median-fluctuation=%5.3fdB, std=%5.3f, high-limit=%5.3fdB" %(ref_value, ref_diff, ref_std, limit))
    for rcu in range(data.shape[0]):
        peaks = cSearchPeak(data[rcu,0,:])
        if not peaks.valid_data:
            return (low_info, high_info, jitter_info)
        peaks.search(delta=10.0)
        if peaks.nMaxPeaks() >= 30:
            logger.debug("RCU=%d: found %d peaks, skip noise test" %(rcu, peaks.nMaxPeaks()))
        else:
            n_bad_high_secs    = 0
            n_bad_low_secs     = 0
            n_bad_jitter_secs  = 0
            
            rcu_max_diff = spec_max[rcu] - spec_min[rcu]
            
            for val in spec_median[rcu,:]:
                #logger.debug("RCU=%d: high-noise value=%5.3fdB  max-ref-value=%5.3fdB" %(rcu, val, ref_val)) 
                if ((val > limit) and (rcu_max_diff > 1.0)) or (val > (ref_value + high_deviation)):
                    n_bad_high_secs += 1
                
                if ((val < (ref_value + low_deviation)) and (rcu_max_diff > 1.0))  or (val < (ref_value + low_deviation)):
                    n_bad_low_secs += 1
            
            if n_bad_high_secs > 0:    
                high_info.append((rcu, spec_max[rcu], n_bad_high_secs, limit, rcu_max_diff))
                logger.debug("RCU=%d: max-noise=%5.3f  %d of %d seconds bad" %(rcu, spec_max[rcu], n_bad_high_secs, n_secs)) 

            if n_bad_low_secs > 0:    
                low_info.append((rcu, spec_min[rcu], n_bad_low_secs , (ref_value+low_deviation), rcu_max_diff)) 
                logger.debug("RCU=%d: min-noise=%5.3f %d of %d seconds bad" %(rcu, spec_min[rcu], n_bad_low_secs, n_secs)) 
            
            if (n_bad_high_secs == 0) and (n_bad_low_secs == 0):
                if rcu_max_diff > (ref_diff + max_diff):
                    check_high_value = ref_value + (ref_diff / 2.0)
                    check_low_value  = ref_value - (ref_diff / 2.0)
                    for val in spec_median[rcu,:]:
                        if val > check_high_value or val < check_low_value:
                            n_bad_jitter_secs += 1
                    jitter_info.append((rcu, rcu_max_diff, ref_diff, n_bad_jitter_secs))
                    logger.debug("RCU=%d: max spectrum fluctuation %5.3f dB" %(rcu, rcu_max_diff)) 
                
    return (low_info, high_info, jitter_info)
Beispiel #33
0
    def average_combine(self, scale_func=ma.average, scale_to=None):
        """ Average combine together a set of arrays.

           A `~ccdproc.CCDData` object is returned with the data property
           set to the average of the arrays.  If the data was masked or any
           data have been rejected, those pixels will not be included in the
           average.  A mask will be returned, and if a pixel has been
           rejected in all images, it will be masked.  The uncertainty of
           the combined image is set by the standard deviation of the input
           images.

           Parameters
           ----------
           scale_func : function, optional
               Function to calculate the average. Defaults to
               `~numpy.ma.average`.

           scale_to : float, optional
               Scaling factor used in the average combined image. If given,
               it overrides ``CCDData.scaling``. Defaults to None.

           Returns
           -------
           combined_image: `~ccdproc.CCDData`
               CCDData object based on the combined input of CCDData objects.

        """
        if scale_to is not None:
            scalings = scale_to
        elif self.scaling is not None:
            scalings = self.scaling
        else:
            scalings = 1.0

        # set up the data
        data, wei = scale_func(scalings * self.data_arr, axis=0, weights=self.weights, returned=True)

        # set up the mask
        mask = self.data_arr.mask.sum(axis=0)
        mask = mask == len(self.data_arr)

        # set up the deviation
        uncertainty = ma.std(self.data_arr, axis=0)

        # create the combined image with a dtype that matches the combiner
        combined_image = CCDData(
            np.asarray(data.data, dtype=self.dtype),
            mask=mask,
            unit=self.unit,
            uncertainty=StdDevUncertainty(uncertainty),
        )

        # update the meta data
        combined_image.meta["NCOMBINE"] = len(self.data_arr)

        # return the combined image
        return combined_image
Beispiel #34
0
    def average_combine(self, scale_func=ma.average, scale_to=None):
        """ Average combine together a set of arrays.

           A `~ccdproc.CCDData` object is returned with the data property
           set to the average of the arrays.  If the data was masked or any
           data have been rejected, those pixels will not be included in the
           average.  A mask will be returned, and if a pixel has been
           rejected in all images, it will be masked.  The uncertainty of
           the combined image is set by the standard deviation of the input
           images.

           Parameters
           ----------
           scale_func : function, optional
               Function to calculate the average. Defaults to
               `~numpy.ma.average`.

           scale_to : float, optional
               Scaling factor used in the average combined image. If given,
               it overrides ``CCDData.scaling``. Defaults to None.

           Returns
           -------
           combined_image: `~ccdproc.CCDData`
               CCDData object based on the combined input of CCDData objects.

        """
        if scale_to is not None:
            scalings = scale_to
        elif self.scaling is not None:
            scalings = self.scaling
        else:
            scalings = 1.0

        # set up the data
        data, wei = scale_func(scalings * self.data_arr,
                               axis=0, weights=self.weights,
                               returned=True)

        # set up the mask
        mask = self.data_arr.mask.sum(axis=0)
        mask = (mask == len(self.data_arr))

        # set up the deviation
        uncertainty = ma.std(self.data_arr, axis=0)

        # create the combined image with a dtype that matches the combiner
        combined_image = CCDData(np.asarray(data.data, dtype=self.dtype),
                                 mask=mask, unit=self.unit,
                                 uncertainty=StdDevUncertainty(uncertainty))

        # update the meta data
        combined_image.meta['NCOMBINE'] = len(self.data_arr)

        # return the combined image
        return combined_image
def infer_ks_test_goodness(l1):
    # l = np.histogram(l1)
    # n = len(l)
    mean = average(l1)
    sigma = std(l1)
    res = kstest(l1, 'norm', [mean, sigma])
    if res[1] < 0.01:
        print('reject')
    else:
        print('accept')
    print(res)
 def get_mask(self):
     self.array_mean = ma.mean(self.array)
     self.array_stdev = ma.std(self.array)
     self.array_range = ma.max(self.array) - ma.min(self.array)
     print "The mean is %f, the stdev is %f, the range is %f." %(self.array_mean, self.array_stdev, self.array_range)
     from scipy.io.netcdf import netcdf_file as NetCDFFile
     ### get landmask
     nc = NetCDFFile(os.getcwd()+ '/../data/netcdf_files/ORCA2_landmask.nc','r')
     self.mask = ma.masked_values(nc.variables['MASK'][:, :self.time_len, :self.lat_len, :180], -9.99999979e+33)
     nc.close()
     self.xxx, self.yyy, self.zzz = np.lib.index_tricks.mgrid[0:self.time_len, 0:self.lat_len, 0:180]
Beispiel #37
0
def calculate_moments(d,
                      minchan=False,
                      maxchan=False,
                      vel=False,
                      bestmask=False,
                      mask=False):
    """This function actually calculates moments"""
    nglat = d.shape[1]
    nglon = d.shape[2]
    nspec = d.shape[0]
    maps = np.zeros(
        (nglat, nglon),
        dtype={
            'names': [
                'mean', 'sd', 'errmn', 'errsd', 'skew', 'kurt', 'error',
                'intint', 'npix'
            ],
            'formats': ['f4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4']
        })
    #These definitions for mask seem backward but are correct.
    noise_portion = ma.masked_where(mask == 1, d)
    good_d = d[minchan:maxchan, ...]
    mask2 = mask[minchan:maxchan, ...]
    #print(minchan)
    #print(maxchan)
    signal_portion = ma.masked_where(mask2 == 0, good_d)
    maps['error'] = ma.std(noise_portion, axis=0)
    maps['intint'] = ma.sum(signal_portion, axis=0)
    for x in range(nglat):
        for y in range(nglon):
            fullspec = d[..., x, y]  #Exract a single spectrum
            ind = np.arange(nspec)
            velmask = mask[minchan:maxchan, x, y]
            if np.sum(velmask) != 0:
                velmask = bestmask
                npix = max(np.sum(velmask), 1)
            ind = ind[velmask > 0]
            sigma = maps['error'][x, y]
            if ind.size > 2 and (sigma > 0):
                mom = idl_stats.wt_moment(vel[ind],
                                          fullspec[ind],
                                          errors=np.zeros(ind.size) + sigma)
                maps['mean'][x, y] = mom['mean']
                maps['sd'][x, y] = mom['stdev']
                maps['errmn'][x, y] = mom['errmn']
                maps['errsd'][x, y] = mom['errsd']
                maps['npix'][x, y] = npix
            else:
                maps['mean'][x, y] = np.nan
                maps['sd'][x, y] = np.nan
                maps['errmn'][x, y] = np.nan
                maps['errsd'][x, y] = np.nan
                maps['npix'][x, y] = np.nan
    return (maps)
Beispiel #38
0
def infer_ks_test_goodness(l1):
    # l = np.histogram(l1)
    # n = len(l)
    mean = average(l1)
    sigma = std(l1)
    res = kstest(l1, 'norm', [mean, sigma])
    if res[1] < 0.01:
        print('reject')
    else:
        print('accept')
    print(res)
 def calc_chrom_fast(self, index, coords_vals):
     self.population[index]["fitness"] = np.abs(self.array_mean - ma.mean(coords_vals[0])) + np.abs(
         self.array_stdev - ma.std(coords_vals[0])
     )  # + \
     # np.abs(self.array_range - (ma.max(coords_vals[0])-ma.min(coords_vals[0])))/10  + \
     # np.abs((self.chromosome_size-1) - coords_vals[2]) #locations
     # ~ print "Chromosome size: ",self.chromosome_size
     # print "Number of locations is: ", coords_vals[2]
     # ~ print "The sample range is: %g. The array range is: %g " % ((ma.max(coords_vals[0])-ma.min(coords_vals[0])), self.array_range)
     # ~ print np.abs(self.array_mean - ma.mean(coords_vals[0])), np.abs(self.array_stdev - ma.std(coords_vals[0])), np.abs(self.array_range - (ma.max(coords_vals[0])-ma.min(coords_vals[0])))
     # ~ print ma.mean(coords_vals[0]), ma.std(coords_vals[0]), (ma.max(coords_vals[0])-ma.min(coords_vals[0]))
     "Fitness is: ", self.population[index]["fitness"]
Beispiel #40
0
def generate_rb_hist_n(f_name, n, doy_start, doy_end):
    fh_in = Dataset(os.path.join("Data", "Sentinel", f_name + ".nc"), "r")
    out_path = get_out_path(
        os.path.join("Data", "Sentinel", "usa_rb_hist_" + str(n)))

    init_doy, final_doy = f_name.split("_")[1], f_name.split("_")[2]
    init_doy = date(*map(int, [init_doy[:4], init_doy[4:6], init_doy[6:]]))
    final_doy = date(*map(int, [final_doy[:4], final_doy[4:6], final_doy[6:]]))

    doy_s = date(*map(int, [doy_start[:4], doy_start[4:6], doy_start[6:]]))
    doy_e = date(*map(int, [doy_end[:4], doy_end[4:6], doy_end[6:]]))
    assert ((doy_s - init_doy).days >= n)
    assert ((final_doy - doy_e).days >= 0)

    i_doy = (doy_s - init_doy).days
    for doy in generate_doy(doy_start, doy_end, ""):
        fh_out = Dataset(os.path.join(out_path, doy + ".nc"), "w")

        for name, dim in fh_in.dimensions.items():
            if name != "time":
                fh_out.createDimension(name, len(dim))

        for v_name, varin in fh_in.variables.items():
            if v_name == 'lat' or v_name == 'lon':
                outVar = fh_out.createVariable(v_name, varin.datatype,
                                               varin.dimensions)
                outVar.setncatts(
                    {k: varin.getncattr(k)
                     for k in varin.ncattrs()})
                outVar[:] = varin[:]
            elif v_name != "time":
                outVar = fh_out.createVariable(v_name + "_hist_mean_" + str(n),
                                               varin.datatype, (
                                                   "lat",
                                                   "lon",
                                               ))
                outVar.setncatts(
                    {k: varin.getncattr(k)
                     for k in varin.ncattrs()})
                s_doy = i_doy - n
                print(s_doy, i_doy)
                outVar[:] = ma.mean(varin[s_doy:i_doy, :, :], axis=0)
                outVar = fh_out.createVariable(v_name + "_hist_std_" + str(n),
                                               varin.datatype, (
                                                   "lat",
                                                   "lon",
                                               ))
                outVar[:] = ma.std(varin[s_doy:i_doy, :, :], axis=0)
        i_doy += 1

        fh_out.close()

    fh_in.close()
def res_dist(x, y, e, n_runs=100, random_state=None):
    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=0.4, random_state=random_state)

    test_res = []
    train_res = []
    start_time = time()

    for i in range(n_runs):
        e.fit(x_train, y_train)
        train_res.append(e.score(x_train, y_train))
        test_res.append(e.score(x_test, y_test))
        if (i % (n_runs / 10) == 0): print("%d" % i, end=' ')

    print("\nTime: %.3f secs" % (time() - start_time))
    print("Test Min: %.3f Mean: %.3f Max: %.3f SD: %.3f" %
          (min(test_res), mean(test_res), max(test_res), std(test_res)))
    print("Train Min: %.3f Mean: %.3f Max: %.3f SD: %.3f" %
          (min(train_res), mean(train_res), max(train_res), std(train_res)))

    return (train_res, test_res)
def flag_data(dynamic_spectrum, channels_per_subband=16):
    data = dynamic_spectrum
    data_mean = data.mean()
    data_std = data.std()
    flags= numpy.logical_or(abs(data - data_mean) > 8*data_std,
                       data == 0)
    flags[:, 0::channels_per_subband] = True
    flagged_data = ma.array(data, mask=flags, copy=True)

    data_mean = ma.mean(flagged_data)
    data_std = ma.std(flagged_data)
    flags = numpy.logical_or(abs(data - data_mean) > 6*data_std,
                       data == 0)
    print(type(flags))
    flags[:, 0::channels_per_subband] = True
    flagged_data = ma.array(data, mask=flags, copy=True)

    data_mean = ma.mean(flagged_data)
    data_std = ma.std(flagged_data)
    flags = numpy.logical_or(abs(data - data_mean) > 4*data_std,
                       data == 0)
    flags[:, 0::channels_per_subband] = True
    flagged_data = ma.array(data, mask=flags, copy=True)

    data_mean = ma.mean(flagged_data)
    data_std = ma.std(flagged_data)
    flags = numpy.logical_or(abs(data - data_mean) > 4*data_std,
                       data == 0)
    flags[:, 0::channels_per_subband] = True
    flagged_data = ma.array(data, mask=flags, copy=True)

    data_mean = ma.mean(flagged_data)
    data_std = ma.std(flagged_data)
    flags = numpy.logical_or(abs(data - data_mean) > 4*data_std,
                       data == 0)
    flags[:, 0::channels_per_subband] = True
    flagged_data = ma.array(data, mask=flags, copy=True)
    
    return flagged_data
Beispiel #43
0
def destroy_time_with_mean_arrays(Data, flag_size=40):
    '''Mask times with high means.
    
    If there is a problem in time, the mean over all frequencies
    will stand out greatly [>10 sigma has been seen]. Flag these bad
    times and +- `flag_size` times around it. Will only be called if `Data`
    has 'badness'.

    Parameters
    ----------
    Data : DataBlock
        Contains information in a usable format direct from GBT. Bad
        times will be flagged in all polarizations and cal states.
    time_cut : int
        How many frequency bins (as an absolute number) to flag in time.
    '''
    # Get the means over all frequencies. (for all pols. and cals.)
    a = ma.mean(Data.data[:, 0, 0, :], -1)
    b = ma.mean(Data.data[:, 1, 0, :], -1)
    c = ma.mean(Data.data[:, 2, 0, :], -1)
    d = ma.mean(Data.data[:, 3, 0, :], -1)
    e = ma.mean(Data.data[:, 0, 1, :], -1)
    f = ma.mean(Data.data[:, 1, 1, :], -1)
    g = ma.mean(Data.data[:, 2, 1, :], -1)
    h = ma.mean(Data.data[:, 3, 1, :], -1)
    # Get means and std for all arrays.
    means = sp.array([ma.mean(a), ma.mean(b), ma.mean(c), ma.mean(d),
                        ma.mean(e), ma.mean(f), ma.mean(g), ma.mean(h)])
    sig = sp.array([ma.std(a), ma.std(b), ma.std(c), ma.std(d),
                      ma.std(e), ma.std(f), ma.std(g), ma.std(h)])
    # Get max accepted values.
    max_accepted = means + 3*sig
    # Find bad times.
    bad_times = []
    for time in range(0,len(a)):
        if ((a[time] > max_accepted[0]) or
            (b[time] > max_accepted[1]) or
            (c[time] > max_accepted[2]) or
            (d[time] > max_accepted[3]) or
            (e[time] > max_accepted[4]) or
            (f[time] > max_accepted[5]) or
            (g[time] > max_accepted[6]) or
            (h[time] > max_accepted[7])):
            bad_times.append(time)
    # Mask bad times and those +- flag_size around.
    for time in bad_times:
        Data.data[(time-flag_size):(time+flag_size),:,:,:].mask = True
    return
Beispiel #44
0
def calculate_moments(d,minchan=False,maxchan=False,vel=False,bestmask=False,mask=False):

    nglat = d.shape[1]
    nglon = d.shape[2]
    nspec = d.shape[0]


    maps = np.zeros((nglat,nglon),dtype={'names':['mean','sd','errmn',
            'errsd','skew','kurt','error','intint','npix'],
            'formats':['f4','f4','f4','f4','f4','f4','f4','f4','f4']})

    #These definitions for mask seem backward but are correct.
    noise_portion = ma.masked_where(mask == 1,d)
    good_d = d[minchan:maxchan,...]
    mask2 = mask[minchan:maxchan,...]
    #print(mask)
    #print(mask2)
    print(minchan)
    print(maxchan)
    signal_portion = ma.masked_where(mask2 == 0,good_d)
    maps['error']  = ma.std(noise_portion,axis=0)
    maps['intint'] = ma.sum(signal_portion,axis=0)
    #print(maps['error'])


    for x in range(nglat):
        for y in range(nglon):
            fullspec = d[...,x,y]#Exract a single spectrum
            ind = np.arange(nspec)
            velmask = mask[minchan:maxchan,x,y]
            if np.sum(velmask) != 0:
                velmask = bestmask
                npix = max(np.sum(velmask),1)
            ind = ind[velmask > 0]
            sigma = maps['error'][x,y]
            if ind.size > 2 and (sigma > 0):
                mom = idl_stats.wt_moment(vel[ind],fullspec[ind],
                                errors = np.zeros(ind.size)+sigma)
                maps['mean'][x,y]  = mom['mean']
                maps['sd'][x,y]    = mom['stdev']
                maps['errmn'][x,y] = mom['errmn']
                maps['errsd'][x,y] = mom['errsd']
                maps['npix'][x,y]  = npix
            else:
                maps['mean'][x,y]  = np.nan
                maps['sd'][x,y]    = np.nan
                maps['errmn'][x,y] = np.nan
                maps['errsd'][x,y] = np.nan
                maps['npix'][x,y]  = np.nan
    return(maps)
Beispiel #45
0
def nothing(noth):
    # If requested, remove the time gradient from all channels.
    if remove_slope:
        un_mask = sp.logical_not(ma.getmaskarray(NoiseData.data))
        NoiseData.calc_time()
        time = NoiseData.time
        n_time = len(time)
        # Test if the mask is the same for all slices.  If it is, that greatly
        # reduces the work as we only have to generate one set of polynomials.
        all_masks_same = True
        for jj in range(n_time):
            if sp.all(un_mask[jj, ...] == un_mask[jj, 0, 0, 0]):
                continue
            else:
                all_masks_same = False
                break
        if all_masks_same:
            polys = misc.ortho_poly(time, 2, un_mask[:, 0, 0, 0], 0)
            polys.shape = (2, len(time), 1, 1, 1)
        else:
            polys = misc.ortho_poly(time[:, None, None, None], 2, un_mask, 0)
        # Subtract the slope mode (1th mode) out of the NoiseData.
        slope_amps = sp.sum(polys[1, ...] * un_mask * NoiseData.data.filled(0),
                            0)
        NoiseData.data -= polys[1, ...] * slope_amps
    # Iteratively flag on sliding scale to get closer and closer to desired
    # threshold.
    n_time = Data.data.shape[0]
    max_thres = sp.sqrt(n_time) / 2.
    n_iter = 3
    thresholds = (max_thres**(n_iter - 1 - sp.arange(n_iter)) *
                  thres**sp.arange(n_iter))**(1. / (n_iter - 1))
    for threshold in thresholds:
        # Get the deviation from the mean.
        residuals = ma.anom(NoiseData.data, 0).filled(0)
        # Get indices above the threshold.
        mask = abs(residuals) > threshold * ma.std(NoiseData.data, 0)
        # Mask the data.
        Data.data[mask] = ma.masked
        NoiseData.data[mask] = ma.masked

    # Now flag for very noisey channels.
    if max_noise_factor > 0:
        vars = ma.var(NoiseData.data, 0)
        mean_vars = ma.mean(vars, -1).filled(0)
        bad_chans = vars.filled(0) > max_noise_factor * mean_vars[:, :, None]
        Data.data[:, bad_chans] = ma.masked
        NoiseData.data[:, bad_chans] = ma.masked
Beispiel #46
0
def nothing(noth):
    # If requested, remove the time gradient from all channels.
    if remove_slope:
        un_mask = sp.logical_not(ma.getmaskarray(NoiseData.data))
        NoiseData.calc_time()
        time = NoiseData.time
        n_time = len(time)
        # Test if the mask is the same for all slices.  If it is, that greatly
        # reduces the work as we only have to generate one set of polynomials.
        all_masks_same = True
        for jj in range(n_time):
            if sp.all(un_mask[jj,...] == un_mask[jj,0,0,0]):
                continue
            else:
                all_masks_same = False
                break
        if all_masks_same:
            polys = misc.ortho_poly(time, 2, un_mask[:,0,0,0], 0)
            polys.shape = (2, len(time), 1, 1, 1)
        else:
            polys = misc.ortho_poly(time[:,None,None,None], 2, un_mask, 0)
        # Subtract the slope mode (1th mode) out of the NoiseData.
        slope_amps = sp.sum(polys[1,...] * un_mask * NoiseData.data.filled(0),
                            0)
        NoiseData.data -= polys[1,...] * slope_amps
    # Iteratively flag on sliding scale to get closer and closer to desired
    # threshold.
    n_time = Data.data.shape[0]
    max_thres = sp.sqrt(n_time)/2.
    n_iter = 3
    thresholds = (max_thres ** (n_iter - 1 - sp.arange(n_iter))
                 * thres ** sp.arange(n_iter)) ** (1./(n_iter - 1))
    for threshold in thresholds:
        # Get the deviation from the mean.
        residuals = ma.anom(NoiseData.data, 0).filled(0)
        # Get indices above the threshold.
        mask = abs(residuals) > threshold * ma.std(NoiseData.data, 0)
        # Mask the data.
        Data.data[mask] = ma.masked
        NoiseData.data[mask] = ma.masked
    
    # Now flag for very noisey channels.
    if max_noise_factor > 0:
        vars = ma.var(NoiseData.data, 0)
        mean_vars = ma.mean(vars, -1).filled(0)
        bad_chans = vars.filled(0) > max_noise_factor * mean_vars[:,:,None]
        Data.data[:,bad_chans] = ma.masked
        NoiseData.data[:,bad_chans] = ma.masked
Beispiel #47
0
def ifas_masked_std(array, axis=None):
    """ This returns the true standard deviation of the data. It 
    only counts valid data.

    There are outstanding problems with how the masked arrays 
    handle stds. For some reason, there is no np.ma.nanstd 
    function. This adds that functionality.

    Parameters
    ----------
    array : ndarray
        The value or array of values by which the standard deviation 
        will be taken from.
    axis : int 
        The axis that the median will be taken over.

    Returns
    -------
    true_std : float or ndarray
        The standard deviation of the array along which ever axis 
        was given. 
    """

    # Fix all invalid data before taking the median.
    valid_array = np_ma.fix_invalid(array)

    # Test to see if there is any invalid data left.
    if (np.any(np.isnan(valid_array))):
        raise core.error.DataError("The array still contains invalid nan "
                                   "data after the invalid data was fixed. "
                                   "The true mean function will not work "
                                   "as expected.")
    if (np.any(np.isinf(valid_array))):
        raise core.error.DataError("The array still contains invalid inf "
                                   "data after the invalid data was fixed. "
                                   "The true mean function will not work "
                                   "as expected.")

    # Calculate and return the standard deviation. The masked array
    # version of the functions seems to properly ignore masks as
    # intended.
    true_std = np_ma.std(valid_array, axis=axis)

    return true_std
Beispiel #48
0
    def average_combine(self, scale_func=None, scale_to=1.0):
        """Average combine together a set of arrays.   A CCDData object is
           returned with the data property set to the average of the arrays.
           If the data was masked or any data have been rejected, those pixels
           will not be included in the median.   A mask will be returned, and
           if a pixel has been rejected in all images, it will be masked.   The
           uncertainty of the combined image is set by the standard deviation
           of the input images.

           Returns
           -------
           combined_image: `~ccdproc.CCDData`
               CCDData object based on the combined input of CCDData objects.

        """
        if self.scaling is not None:
            scalings = self.scaling
        else:
            scalings = 1.0
        #set up the data
        data, wei = ma.average(scalings * self.data_arr,
                               axis=0,
                               weights=self.weights,
                               returned=True)

        #set up the mask
        mask = self.data_arr.mask.sum(axis=0)
        mask = (mask == len(self.data_arr))

        #set up the deviation
        uncertainty = ma.std(self.data_arr, axis=0)

        #create the combined image
        combined_image = CCDData(data.data,
                                 mask=mask,
                                 unit=self.unit,
                                 uncertainty=StdDevUncertainty(uncertainty))

        #update the meta data
        combined_image.meta['NCOMBINE'] = len(self.data_arr)

        #return the combined image
        return combined_image
Beispiel #49
0
def res_dist(x, y, e, n_runs=100, random_state=None):
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4, random_state=random_state)

    test_res = []
    train_res = []
    start_time = time()

    for i in range(n_runs):
        e.fit(x_train, y_train)
        train_res.append(e.score(x_train, y_train))
        test_res.append(e.score(x_test, y_test))
        if (i % (n_runs / 10) == 0): print("%d" % i, end=' ')

    print("\nTime: %.3f secs" % (time() - start_time))
    print("Test Min: %.3f Mean: %.3f Max: %.3f SD: %.3f" % (min(test_res), mean(test_res), max(test_res), std(test_res)))
    print("Train Min: %.3f Mean: %.3f Max: %.3f SD: %.3f" % (
    min(train_res), mean(train_res), max(train_res), std(train_res)))

    return (train_res, test_res)
Beispiel #50
0
def destroy_with_variance(Data, sigma_thres=6, bad_freq_list=[]):
    '''Mask spikes in Data using variance. Polarizations must be in
    XX,XY,YX,YY format.
    sigma_thres represents how sensitive the flagger is (smaller = more masking).
    The flagged frequencies are appended to bad_freq_list.'''
    XX_YY_0 = ma.mean(Data.data[:, 0, 0, :], 0) * ma.mean(Data.data[:, 3, 0, :], 0)
    XX_YY_1 = ma.mean(Data.data[:, 0, 1, :], 0) * ma.mean(Data.data[:, 3, 1, :], 0)
    # Get the normalized variance array for each polarization.
    a = ma.var(Data.data[:, 0, 0, :], 0) / (ma.mean(Data.data[:, 0, 0, :], 0)**2) # XX
    b = ma.var(Data.data[:, 1, 0, :], 0) / XX_YY_0                                # XY
    c = ma.var(Data.data[:, 2, 0, :], 0) / XX_YY_0                                # YX
    d = ma.var(Data.data[:, 3, 0, :], 0) / (ma.mean(Data.data[:, 3, 0, :], 0)**2) # YY
    # And for cal off.
    e = ma.var(Data.data[:, 0, 1, :], 0) / (ma.mean(Data.data[:, 0, 1, :], 0)**2) # XX
    f = ma.var(Data.data[:, 1, 1, :], 0) / XX_YY_1                                # XY
    g = ma.var(Data.data[:, 2, 1, :], 0) / XX_YY_1                                # YX
    h = ma.var(Data.data[:, 3, 1, :], 0) / (ma.mean(Data.data[:, 3, 1, :], 0)**2) # YY
    # Get the mean and standard deviation [sigma].
    means = sp.array([ma.mean(a), ma.mean(b), ma.mean(c), ma.mean(d),
                        ma.mean(e), ma.mean(f), ma.mean(g), ma.mean(h)]) 
    sig = sp.array([ma.std(a), ma.std(b), ma.std(c), ma.std(d),
                      ma.std(e), ma.std(f), ma.std(g), ma.std(h)])
    # Get the max accepted value [sigma_thres*sigma, sigma_thres=6 works really well].
    max_sig = sigma_thres*sig
    max_accepted = means + max_sig
    amount_masked = 0
    for freq in range(0, len(a)):
        if ((a[freq] > max_accepted[0]) or
            (b[freq] > max_accepted[1]) or
            (c[freq] > max_accepted[2]) or
            (d[freq] > max_accepted[3]) or
            (e[freq] > max_accepted[4]) or
            (f[freq] > max_accepted[5]) or
            (g[freq] > max_accepted[6]) or
            (h[freq] > max_accepted[7])):
            # mask
            amount_masked += 1
            bad_freq_list.append(freq)
            Data.data[:,:,:,freq].mask = True
    return amount_masked
Beispiel #51
0
    def average_combine(self, scale_func=None, scale_to=1.0):
        """Average combine together a set of arrays.   A CCDData object is
           returned with the data property set to the average of the arrays.
           If the data was masked or any data have been rejected, those pixels
           will not be included in the median.   A mask will be returned, and
           if a pixel has been rejected in all images, it will be masked.   The
           uncertainty of the combined image is set by the standard deviation
           of the input images.

           Returns
           -------
           combined_image: `~ccdproc.CCDData`
               CCDData object based on the combined input of CCDData objects.

        """
        if self.scaling is not None:
            scalings = self.scaling
        else:
            scalings = 1.0
        #set up the data
        data, wei = ma.average(scalings * self.data_arr,
                               axis=0, weights=self.weights,
                               returned=True)

        #set up the mask
        mask = self.data_arr.mask.sum(axis=0)
        mask = (mask == len(self.data_arr))

        #set up the deviation
        uncertainty = ma.std(self.data_arr, axis=0)

        # create the combined image with a dtype that matches the combiner
        combined_image = CCDData(np.asarray(data.data, dtype=self.dtype),
                                 mask=mask, unit=self.unit,
                                 uncertainty=StdDevUncertainty(uncertainty))

        #update the meta data
        combined_image.meta['NCOMBINE'] = len(self.data_arr)

        #return the combined image
        return combined_image
Beispiel #52
0
def measure(mode, x, y, x0, x1):
    """ return the mean and standard deviation of y in the window x0 to x1
    """
    xm = ma.masked_outside(x, x0, x1)
    ym = ma.array(y, mask=ma.getmask(xm))
    if mode == 'mean':
        r1 = ma.mean(ym)
        r2 = ma.std(ym)
    if mode == 'max':
        r1 = ma.max(ym)
        r2 = 0
    if mode == 'min':
        r1 = ma.min(ym)
        r2 = 0
    if mode == 'median':
        r1 = ma.median(ym)
        r2 = 0
    if mode == 'p2p':  # peak to peak
        r1 = ma.ptp(ym)
        r2 = 0
    return (r1, r2)
Beispiel #53
0
    def dynamic_mask(self, image, sigrange):
        """
		Creates a numpy mask on the image, filtering out any
		pixel values that are more than sigrange*std from the median value

		Input: numpy array of the image, sigrange for multiplier on standard dev range
		Output: Masked numpy array covering any pixels above or below the standard dev range
		"""

        # Make a masked array using the static mask and imput image
        pre_masked = ma.array(image, mask=self.static_mask)

        # Mask saturated or empty
        masked1 = ma.masked_greater(pre_masked, 254)
        masked1 = ma.masked_less(masked1, 0)

        median = ma.median(masked1)
        mean = ma.mean(masked1)
        std = ma.std(masked1)

        return masked1, median, mean, std
    def __fit__(self, rating, row=True):
        if isinstance(rating, ma.MaskedArray):
            self._rating = rating
        else:
            self._rating = ma.masked_equal(rating, 0)

        self._mean = ma.mean(self._rating, axis=1, keepdims=True)
        self._sigma = ma.std(self._rating, axis=1, keepdims=True)
        self._mean_center_rating = self._rating - self._mean
        self._z = self._mean_center_rating / self._sigma

        assert self.config.sim_config.name in ["person", "discounted_person", "amplify_person", "idf_person", "pca_person","cosine"]

        similaritor = SimilaritorFactory(self.config.sim_config)

        if row:
            self._sim = similaritor(rating=self._rating, mean_center_rating=self._mean_center_rating)
        else:
            self._rating = self._rating.T
            self._mean_center_rating = self._mean_center_rating.T
            self._z = self._z.T
            self._sim = similaritor(rating=self._rating, mean_center_rating=self._mean_center_rating)