コード例 #1
0
ファイル: main.py プロジェクト: SterVeen/pyautoplot
def single_correlation_flags(tf_plane, threshold=5.0, max_iter=5, previous_sums=[], verbose=False):
    flags    = tf_plane.mask
    sum_flags=flags.sum()
    if verbose:
        print('sum(flags): %s' % (sum_flags,))
        print('%5.3f%s flagged\n' % ((sum_flags*100.0/product(tf_plane.shape)),'%'))
    if sum_flags == product(flags.shape):
        return flags
    if max_iter <= 0:
        return ndimage.binary_dilation(flags,iterations=2)
    med       = ma.median(tf_plane.real) +1j*ma.median(tf_plane.imag)
    sigma     = sqrt(ma.std(tf_plane.real)**2 + ma.std(tf_plane.imag)**2)
    bad_vis   = abs(tf_plane.data-med) > threshold*sigma
    new_flags = logical_or(flags, bad_vis)
    new_data  = ma.array(tf_plane.data, mask=new_flags)
    sum_flags = new_flags.sum()
    if verbose:
        print('sum_flags: %s' % (sum_flags,))
        print('%5.3f%s flagged\nstd: %6.4f' % ((sum_flags*100.0/product(tf_plane.shape)),'%', ma.std(new_data)))
        print(sum_flags)
        print(previous_sums)
        print('------------------------------------------------------------')
    if sum_flags == reduce(max, previous_sums, 0):
        return single_correlation_flags(new_data,
                                        threshold = threshold,
                                        max_iter  = 0,
                                        previous_sums = previous_sums+[sum_flags])
    else:
        return single_correlation_flags(new_data, threshold=threshold, max_iter=max_iter-1, previous_sums=previous_sums+[sum_flags])
コード例 #2
0
ファイル: main.py プロジェクト: SterVeen/pyautoplot
def statistics(numpy_array):
    return {'mean'   : ma.mean(numpy_array),
            'median' : ma.median(numpy_array.real)+1j*ma.median(numpy_array.imag),
            'max'    : ma.max(abs(array)),
            'min'    : ma.min(abs(array)),
            'std'    : ma.std(array),
            'stdmean': ma.std(numpy_array)/sqrt(sum(logical_not(numpy_array.mask))-1)}
コード例 #3
0
ファイル: main.py プロジェクト: SterVeen/pyautoplot
def plot_all_correlations(data_col, plot_flags=True,amax_factor=1.0):
    flags = bad_data(data_col, threshold=4.0, max_iter=20)
    flagged_data = ma.array(data_col.data, mask=flags)
    xx,xy,yx,yy,num_pol = split_data_col(ma.array(flagged_data))
    
    scale=ma.max(abs(flagged_data))
    stddev = max(ma.std(flagged_data.real), ma.std(flagged_data.imag))
    if flags.sum() == product(flags.shape):
        amax=1.0
    else:
        amax=(scale-stddev)*amax_factor
    

    print('scale: %f\nsigma: %f' % (scale, stddev))
    good=logical_not(xx.mask)
    if not plot_flags:
        good = None
    clf()
    if num_pol is 2:
        subplot(121)
        plot_complex_image('XX',xx, good, amin=0.0, amax=amax)
        subplot(122)
        plot_complex_image('YY',yy, good, amin=0.0, amax=amax)
    elif num_pol is 4:
        subplot(141)
        plot_complex_image('XX',xx, good, amin=0.0, amax=amax)
        subplot(142)
        plot_complex_image('XY',xy, good, amin=0.0, amax=amax)
        subplot(143)
        plot_complex_image('YX',yx, good, amin=0.0, amax=amax)
        subplot(144)
        plot_complex_image('YY',yy, good, amin=0.0, amax=amax)
        pass
    pass
コード例 #4
0
def destroy_time_with_mean_arrays_2pol(Data, flag_size=40):
    '''Mask times with high means.
    This is the same as last function, but for Parkes 2 pol data.
    
    '''
    # Get the means over all frequencies. (for all pols. and cals.)
    a = ma.mean(Data.data[:, 0, 0, :], -1)
    b = ma.mean(Data.data[:, 1, 0, :], -1)
    # Get means and std for all arrays.
    means = sp.array([ma.mean(a), ma.mean(b)])
    sig = sp.array([ma.std(a), ma.std(b)])
    # Get max accepted values.
    max_accepted = means + 3*sig
    # Get min accepted values.
    min_accepted = means - 3*sig
    # Find bad times.
    bad_times = []
    for time in range(0,len(a)):
        if ((a[time] > max_accepted[0]) or (b[time] > max_accepted[1]) or
            (a[time] < min_accepted[0]) or (b[time] < min_accepted[1])):
            bad_times.append(time)
    # Mask bad times and those +- flag_size around.
    for time in bad_times:
        if time-flag_size < 0:
            Data.data[0:(time+flag_size),:,:,:].mask = True
        else:
            Data.data[(time-flag_size):(time+flag_size),:,:,:].mask = True
    return
コード例 #5
0
def destroy_with_variance_2pol(Data, sigma_thres=6, bad_freq_list=[], submean=True):
    '''Mask frequencies with high variance.
    This is the same as last function, but for Parkes 2 pol data.

    '''
    # Get the normalized variance array for each polarization.
    #Data.data[Data.data>3] = ma.masked
    #Data.data[Data.data<3] = ma.masked
    Data.data[np.isnan(Data.data)] = ma.masked
    Data.data[Data.data <= 0.] = ma.masked
    if submean:
        a = ma.var(Data.data[:,0,0,:],0)/(ma.mean(Data.data[:,0,0,:],0)**2)#XX
        b = ma.var(Data.data[:,1,0,:],0)/(ma.mean(Data.data[:,1,0,:],0)**2)#YY
    else:
        a = ma.var(Data.data[:,0,0,:],0)
        b = ma.var(Data.data[:,1,0,:],0)
    # Get the mean and standard deviation [sigma].
    means = sp.array([ma.mean(a), ma.mean(b)]) 
    sig   = sp.array([ma.std(a), ma.std(b)])
    # Get the max accepted value [sigma_thres*sigma, sigma_thres=6 works really well].
    max_sig = sigma_thres*sig
    max_accepted = means + max_sig
    min_accepted = means - max_sig
    amount_masked = 0
    for freq in range(0, len(a)):
        if ((a[freq] > max_accepted[0]) or (b[freq] > max_accepted[1]) or
            (a[freq] < min_accepted[0]) or (b[freq] < min_accepted[1])):
            # mask
            amount_masked += 1
            bad_freq_list.append(freq)
            Data.data[:,:,:,freq].mask = True
    return amount_masked
コード例 #6
0
ファイル: metrics_and_plots.py プロジェクト: CWSL/climate
def Portrait_diagram_subregion(obs_subregion_mean,
                               obs_name,
                               model_subregion_mean,
                               model_names,
                               seasonal_cycle,
                               file_name,
                               normalize=True):

    nmodel, nt, nregion = model_subregion_mean.shape

    if seasonal_cycle:
        obs_data = ma.mean(
            obs_subregion_mean.reshape([1, nt / 12, 12, nregion]), axis=1)
        model_data = ma.mean(
            model_subregion_mean.reshape([nmodel, nt / 12, 12, nregion]),
            axis=1)
        nt = 12
    else:
        obs_data = obs_subregion_mean
        model_data = model_subregion_mean

    subregion_metrics = ma.zeros([4, nregion, nmodel])

    for imodel in np.arange(nmodel):
        for iregion in np.arange(nregion):
            # First metric: bias
            subregion_metrics[0, iregion, imodel] = metrics.calc_bias(
                model_data[imodel, :, iregion],
                obs_data[0, :, iregion],
                average_over_time=True)
            # Second metric: standard deviation
            subregion_metrics[1, iregion, imodel] = metrics.calc_stddev_ratio(
                model_data[imodel, :, iregion], obs_data[0, :, iregion])
            # Third metric: RMSE
            subregion_metrics[2, iregion, imodel] = metrics.calc_rmse(
                model_data[imodel, :, iregion], obs_data[0, :, iregion])
            # Fourth metric: correlation
            subregion_metrics[3, iregion, imodel] = metrics.calc_correlation(
                model_data[imodel, :, iregion], obs_data[0, :, iregion])

    if normalize:
        for iregion in np.arange(nregion):
            subregion_metrics[0, iregion, :] = subregion_metrics[
                0, iregion, :] / ma.std(obs_data[0, :, iregion]) * 100.
            subregion_metrics[
                1, iregion, :] = subregion_metrics[1, iregion, :] * 100.
            subregion_metrics[2, iregion, :] = subregion_metrics[
                2, iregion, :] / ma.std(obs_data[0, :, iregion]) * 100.

    region_names = ['R%02d' % i for i in np.arange(nregion) + 1]

    for imetric, metric in enumerate(['bias', 'std', 'RMSE', 'corr']):
        plotter.draw_portrait_diagram(
            subregion_metrics[imetric, :, :],
            region_names,
            model_names,
            file_name + '_' + metric,
            xlabel='model',
            ylabel='region')
コード例 #7
0
def snr_func(data):
	data /= np.max(np.abs(data), axis=0) 
	year_stack=ma.array(np.split(data, 10, axis=0))
	stdev_all_data = ma.std(data, axis=0)
	signal_array = ma.mean(year_stack, axis=0)
	stdev_seasonal = ma.std(signal_array, axis=0)
	stdev_non_seasonal = stdev_all_data - stdev_seasonal
	return stdev_seasonal/stdev_non_seasonal
コード例 #8
0
def plot_stdevs(data, name):
	data /= np.max(np.abs(data), axis=0) 
	year_stack=ma.array(np.split(data, 10, axis=0))
	
	vmin, vmax = 0, 0.5
	#~ plt.figure(figsize=(10, 10), dpi=50)
	curr_map = Basemap(projection='cyl', llcrnrlon=ll_lon, llcrnrlat=ll_lat, urcrnrlon=ur_lon, urcrnrlat=ur_lat, resolution='i', area_thresh=100.)
	x, y = curr_map(lon, lat)
	
	plt.subplot(411)
	stdev_all_data = ma.std(data, axis=0)
	im = curr_map.pcolormesh(x, y, stdev_all_data , vmin=vmin, vmax=vmax, cmap=cmap)
	plt.axis('tight')
	plt.colorbar()
	curr_map.drawcoastlines()
	curr_map.fillcontinents(color='grey',lake_color='aqua')	
	#~ plt.title('stdev_all_data'+ longname)
	
	plt.subplot(412)
	annual_means = ma.mean(year_stack, axis = 1)
	stdev_annual_means = ma.std(annual_means, axis=0)
	im = curr_map.pcolormesh(x, y, stdev_annual_means , vmin=vmin, vmax=vmax, cmap=cmap)
	plt.axis('tight')
	plt.colorbar()
	curr_map.drawcoastlines()
	curr_map.fillcontinents(color='grey',lake_color='aqua')	
	#~ plt.title('Standard Deviation of the Annual Averages'+ longname)
	
	plt.subplot(413)
	signal_array = ma.mean(year_stack, axis=0)
	stdev_seasonal = ma.std(signal_array, axis=0)
	im = curr_map.pcolormesh(x, y, stdev_seasonal , vmin=vmin, vmax=vmax, cmap=cmap)
	plt.axis('tight')
	plt.colorbar()
	curr_map.drawcoastlines()
	curr_map.fillcontinents(color='grey',lake_color='aqua')	
	#~ plt.title('stdev_seasonal'+ longname)
	
	plt.subplot(414)
	stdev_all_data = ma.std(data, axis=0)
	signal_array = ma.mean(year_stack, axis=0)
	stdev_seasonal = ma.std(signal_array, axis=0)
	stdev_non_seasonal = stdev_all_data - stdev_seasonal
	#~ stdev_non_seasonal = ma.stdev(noise_array, axis=0)
	im = curr_map.pcolormesh(x, y, stdev_non_seasonal, vmin=vmin, vmax=vmax, cmap=cmap)
	plt.axis('tight')
	plt.colorbar()
	curr_map.drawcoastlines()
	curr_map.fillcontinents(color='grey',lake_color='aqua')	
	#~ plt.title('stdev_non_seasonal' + longname)
		
	plt.savefig('/home/nicholas/masters/figures/newplots/standard_deviations_' + name+ '.png')
	plt.close('all')
コード例 #9
0
ファイル: data_analysis_funcs.py プロジェクト: tcv/hibiscus
def flagging(data,freq,sigma_thres,linscale):
    """
    Flags data for RFI.
    Designed for a single time step scan.
    Uses a sigma threshold to flag out anything with
    RFI over a certain threshold.
    Expects data to be linear for spline (s=1e-10). want to try something else.
    seems like using db data getting reasonable results for s = 1e4
    
    Also flags out NaNs, infs.

    Output is flagging mask for input data array.
    """
#    data = 10.**(data/10.)
    mask = zeros(len(data))
    nanmask = array(where(isnan(data))[0])
    mask[nanmask] = 1.0
    infmask = array(where(isinf(data))[0])
    mask[infmask] = 1.0
    scale = linscale
    for f in range(0, len(data)/scale-1):
 #       smooth = itp.UnivariateSpline(freq[f*scale:(f+1)*scale],data[f*scale:(f+1)*scale])
	(Fa,Fb) = polyfit(freq[f*scale:(f+1)*scale],data[f*scale:(f+1)*scale],1)
# 	smooth = itp.interp1d(freq[f*scale:(f+1)*scale],data[f*scale:(f+1)*scale],'linear')
        flat_data = data[f*scale:(f+1)*scale]/polyval([Fa,Fb],freq[f*scale:(f+1)*scale])
        flat_sigma = ma.std(flat_data)
        flat_mean = ma.mean(flat_data)
        max_accept = 1.0+flat_sigma*sigma_thres
        min_accept = 1.0-flat_sigma*sigma_thres
        maxmask = array(where(flat_data>max_accept)[0])
        minmask = array(where(flat_data<min_accept)[0])
        maxmask = maxmask+f*scale
        minmask = minmask+f*scale
        mask[maxmask] = 1.0
        mask[minmask] = 1.0
        
#    smooth = itp.UnivariateSpline(freq[(f+1)*scale:-1],data[(f+1)*scale:-1])
#    smooth = itp.interp1d(freq[(f+1)*scale:-1],data[(f+1)*scale:-1],'linear')
    (Fa,Fb) = polyfit(freq[(f+1)*scale:-1],data[(f+1)*scale:-1],1)
    flat_data = data[(f+1)*scale:-1]/polyval([Fa,Fb],freq[(f+1)*scale:-1])
#    flat_data = data[(f+1)*scale:-1]/smooth(freq[(f+1)*scale:-1])
    flat_sigma = ma.std(flat_data)
    flat_mean = ma.mean(flat_data)
    max_accept = 1.0+flat_sigma*sigma_thres
    min_accept = 1.0-flat_sigma*sigma_thres
    maxmask = array(where(flat_data>max_accept)[0])
    minmask = array(where(flat_data<min_accept)[0])
    maxmask = maxmask+(f+1)*scale
    minmask = minmask+(f+1)*scale
    mask[maxmask] = 1.0
    mask[minmask] = 1.0
    
    return mask
コード例 #10
0
ファイル: file_funcs.py プロジェクト: tcv/hibiscus
def flagging(data,freq,sigma_thres,linscale):
    """
    Flags data for RFI.
    Designed for a single time step scan.
    Uses a sigma threshold to flag out anything with
    RFI over a certain threshold.
   
    Also flags out NaNs, infs.
    Inputs are:
    data - linear input
    freq - can be any units
    sigma_thres - cutoff for bad data
    linscale - size of flattened window

    Output is flagging mask for input data array.
    """

    mask = np.zeros(len(data))
    nanmask = np.where(np.isnan(data))[0]
    mask[nanmask] = 1.0
    infmask = np.where(np.isinf(data))[0]
    mask[infmask] = 1.0
    scale = linscale
    for f in range(0, len(data)/scale-1):
        (Fa,Fb) = np.polyfit(freq[f*scale:(f+1)*scale],data[f*scale:(f+1)*scale],1)
        flat_data = data[f*scale:(f+1)*scale]/np.polyval([Fa,Fb],freq[f*scale:(f+1)*scale])
        flat_sigma = ma.std(flat_data)
        flat_mean = ma.mean(flat_data)
        max_accept = 1.0+flat_sigma*sigma_thres
        min_accept = 1.0-flat_sigma*sigma_thres
        maxmask = ma.array(np.where(flat_data>max_accept)[0])
        minmask = ma.array(np.where(flat_data<min_accept)[0])
        maxmask = maxmask+f*scale
        minmask = minmask+f*scale
        mask[maxmask] = 1.0
        mask[minmask] = 1.0
        
    (Fa,Fb) = np.polyfit(freq[(f+1)*scale:-1],data[(f+1)*scale:-1],1)
    flat_data = data[(f+1)*scale:-1]/np.polyval([Fa,Fb],freq[(f+1)*scale:-1])
    flat_sigma = ma.std(flat_data)
    flat_mean = ma.mean(flat_data)
    max_accept = 1.0+flat_sigma*sigma_thres
    min_accept = 1.0-flat_sigma*sigma_thres
    maxmask = ma.array(np.where(flat_data>max_accept)[0])
    minmask = ma.array(np.where(flat_data<min_accept)[0])
    maxmask = maxmask+(f+1)*scale
    minmask = minmask+(f+1)*scale
    mask[maxmask] = 1.0
    mask[minmask] = 1.0
    
    return mask
コード例 #11
0
def test_baseline_use_all_features_with_signified_random(data, conf):
    conf['feature_selection']['must_be_in_thesaurus'] = False
    conf['vectorizer']['decode_token_handler'] = \
        'eval.pipeline.feature_handlers.SignifiedOnlyFeatureHandler'
    conf['vectorizer']['k'] = 1

    x1, x2, voc = _vectorize_data(data, conf, dummy=True)

    assert full_vocab == strip(voc)

    assert isinstance(x1, sp.spmatrix)
    t.assert_array_equal(
        x1.toarray(),
        training_matrix
    )

    t.assert_array_almost_equal(
        x2.toarray(),
        np.array(
            [
                [0, 11.0, 0, 0, 0, 0],
            ]
        )
    )
    # the thesaurus will always say the neighbour for something is
    # b/N with a similarity of 1, and we look up 11 tokens overall in
    # the test document
    x1, x2, voc = _vectorize_data(data, conf, dummy=True)
    assert x2.sum(), 11.0
    assert std(x2.todense()) > 0
コード例 #12
0
ファイル: combiner.py プロジェクト: sargas/ccdproc
    def average_combine(self):
        """Average combine together a set of arrays.   A CCDData object is
           returned with the data property set to the average of the arrays.
           If the data was masked or any data have been rejected, those pixels
           will not be included in the median.   A mask will be returned, and
           if a pixel has been rejected in all images, it will be masked.   The
           uncertainty of the combined image is set by the standard deviation
           of the input images.

           Returns
           -------
           combined_image: CCDData object
               CCDData object based on the combined input of CCDData objects.

        """
        #set up the data
        data, wei = ma.average(self.data_arr, axis=0, weights=self.weights,
                               returned=True)

        #set up the mask
        mask = self.data_arr.mask.sum(axis=0)
        mask = (mask == len(self.data_arr))

        #set up the variance
        uncertainty = ma.std(self.data_arr, axis=0)

        #create the combined image
        combined_image = CCDData(data.data, mask=mask, unit=self.unit,
                                 uncertainty=StdDevUncertainty(uncertainty))

        #update the meta data
        combined_image.meta['NCOMBINE'] = len(self.data_arr)

        #return the combined image
        return combined_image
コード例 #13
0
ファイル: utils.py プロジェクト: MBoustani/climate
def calc_subregion_area_mean_and_std(dataset_array, subregions):
    ''' Calculate area mean and standard deviation values for a given subregions using datasets on common grid points
    :param dataset_array: An array of OCW Dataset Objects
    :type list:  
    :param subregions: list of subregions
    :type subregions: :class:`numpy.ma.array`
    :returns: area averaged time series for the dataset of shape (ntime, nsubregion)
    '''

    ndata = len(dataset_array)
    dataset0 = dataset_array[0]
    if dataset0.lons.ndim == 1:
       lons, lats = np.meshgrid(dataset0.lons, dataset0.lats)
    else:
       lons = dataset0.lons
       lats = dataset0.lats
    subregion_array = np.zeros(lons.shape)
    mask_array = dataset_array[0].values[0,:].mask
    # dataset0.values.shsape[0]: length of the time dimension
    # spatial average
    t_series =ma.zeros([ndata, dataset0.values.shape[0], len(subregions)])
    # spatial standard deviation
    spatial_std =ma.zeros([ndata, dataset0.values.shape[0], len(subregions)])

    for iregion, subregion in enumerate(subregions):
        lat_min, lat_max, lon_min, lon_max = subregion[1]
        y_index,x_index = np.where((lats >= lat_min) & (lats <= lat_max) & (lons >= lon_min) & (lons <= lon_max))
        subregion_array[y_index,x_index] = iregion+1
        for idata in np.arange(ndata):
            t_series[idata, :, iregion] = ma.mean(dataset_array[idata].values[:,y_index, x_index], axis=1)
            spatial_std[idata, :, iregion] = ma.std(dataset_array[idata].values[:,y_index, x_index], axis=1)
    subregion_array = ma.array(subregion_array, mask=mask_array) 
    return t_series, spatial_std, subregion_array
コード例 #14
0
def pca(data, nPCs = -1):
    domain = None
    
    suma = data.sum(axis=0)/float(len(data))
    data -= suma       # substract average value to get zero mean
    data /= MA.std(data, axis=0)
    covMatrix = MA.dot(data.T, data) / len(data)

    eigVals, eigVectors = linalg.eigh(covMatrix)
    eigVals = list(eigVals)
    
    if nPCs == -1:
        nPCs = len(eigVals)
    nPCs = min(nPCs, len(eigVals))
    
    pairs = [(val, i) for i, val in enumerate(eigVals)]
    pairs.sort()
    pairs.reverse()
    indices = [pair[1] for pair in pairs[:nPCs]]  # take indices of the wanted number of principal components

    vectors = MA.take(eigVectors, indices, axis = 1)
    values = [eigVals[i] for i in indices]
    projectedData = MA.dot(data, vectors)
    
    return projectedData, vectors, values
コード例 #15
0
ファイル: backtesting.py プロジェクト: DaryaPopova/diplom
def is_hit(history_data, test_data, year):
    """

    :param history_data: probability for default given rating for a past years
    :type history_data: np.array
    :param test_data: default for given rating in given year
    :type test_data: float
    :return:
    """
    m = mean(history_data)
    st_dev = std(history_data)
    floar = get_upper(history_data)
    min_defaults = max(0.0, m - 1.96 * st_dev)
    max_defaults = min(100.0, m + 1.96 * st_dev)

    if year in [2007, 2008, 2011, 2014]:
        in_interval = "\\in"
        color = "green"
        if not min_defaults <= test_data <= max_defaults:
            in_interval = "\\not " + in_interval
            color = "red"
        min_defaults = "%.1f" % min_defaults
        max_defaults = "%.1f" % max_defaults
        end = "& " if year != 2014 else "\\\\"
        print("$\\textcolor{{{5}}}{{ {3} {4} [{1}, {2}] }}$".format(year, min_defaults, max_defaults, test_data, in_interval, color), end=end)
    if abs(test_data - m) <= 1.96 * st_dev:
        return 0
    return 1
コード例 #16
0
 def calc_chrom_fast(self, index, coords_vals):
     self.population[index]['fitness'] = \
     np.abs(self.array_mean - ma.mean(coords_vals[0])) + \
     np.abs(self.array_stdev - ma.std(coords_vals[0])) + \
     np.abs(self.array_range - (ma.max(coords_vals[0])-ma.min(coords_vals[0])))/10  + \
     np.abs((self.chromosome_size-1) - coords_vals[2]) #locations
     #~ print "Chromosome size: ",self.chromosome_size
     print "Number of locations is: ", coords_vals[2]
コード例 #17
0
ファイル: region_properties.py プロジェクト: fepegar/NiftyNet
    def std_(self):
        """
        calculates the standard deviation of the image over the binarised
        segmentation

        :return:
        """
        return ma.std(self.masked_img, 0)
コード例 #18
0
ファイル: test_std_var.py プロジェクト: QuLogic/biggus
 def _check(self, data):
     array = biggus.NumpyArrayAdapter(data)
     result = std(array, axis=0, ddof=0).masked_array()
     expected = ma.std(data, axis=0, ddof=0)
     if expected.ndim == 0:
         expected = ma.asarray(expected)
     np.testing.assert_array_equal(result.filled(), expected.filled())
     np.testing.assert_array_equal(result.mask, expected.mask)
コード例 #19
0
ファイル: metrics.py プロジェクト: CWSL/climate
def calc_stddev(array, axis=None):
    """ Calculate a sample standard deviation of an array along the array

    :param array: an array to calculate sample standard deviation
    :type array: :class:'numpy.ma.core.MaskedArray'
    
    :param axis: Axis along which the sample standard deviation is computed.
    :type axis: 'int'

    :returns: sample standard deviation of array
    :rtype: :class:'numpy.ma.core.MaskedArray'
    """

    if isinstance(axis, int):
        return ma.std(array, axis=axis, ddof=1)
    else:
        return ma.std(array, ddof=1)
コード例 #20
0
ファイル: search_lib.py プロジェクト: saiyanprince/pyimager
def search_noise(data, low_deviation, high_deviation, max_diff):
    global logger
    high_info   = list()
    low_info    = list()
    jitter_info = list()
    
    spec_median  = ma.median(data, axis=2)
    spec_max     = spec_median.max(axis=1)
    spec_min     = spec_median.min(axis=1)
    ref_value    = ma.median(data)
    ref_diff     = ma.median(spec_max) - ma.median(spec_min)
    ref_std      = ma.std(spec_median)
    
    limit = ref_value + min(max((ref_std * 3.0),0.75), high_deviation)
    
    n_secs = data.shape[1]
    logger.debug("median-signal=%5.3fdB, median-fluctuation=%5.3fdB, std=%5.3f, high-limit=%5.3fdB" %(ref_value, ref_diff, ref_std, limit))
    for rcu in range(data.shape[0]):
        peaks = cSearchPeak(data[rcu,0,:])
        if not peaks.valid_data:
            return (low_info, high_info, jitter_info)
        peaks.search(delta=10.0)
        if peaks.nMaxPeaks() >= 30:
            logger.debug("RCU=%d: found %d peaks, skip noise test" %(rcu, peaks.nMaxPeaks()))
        else:
            n_bad_high_secs    = 0
            n_bad_low_secs     = 0
            n_bad_jitter_secs  = 0
            
            rcu_max_diff = spec_max[rcu] - spec_min[rcu]
            
            for val in spec_median[rcu,:]:
                #logger.debug("RCU=%d: high-noise value=%5.3fdB  max-ref-value=%5.3fdB" %(rcu, val, ref_val)) 
                if ((val > limit) and (rcu_max_diff > 1.0)) or (val > (ref_value + high_deviation)):
                    n_bad_high_secs += 1
                
                if ((val < (ref_value + low_deviation)) and (rcu_max_diff > 1.0))  or (val < (ref_value + low_deviation)):
                    n_bad_low_secs += 1
            
            if n_bad_high_secs > 0:    
                high_info.append((rcu, spec_max[rcu], n_bad_high_secs, limit, rcu_max_diff))
                logger.debug("RCU=%d: max-noise=%5.3f  %d of %d seconds bad" %(rcu, spec_max[rcu], n_bad_high_secs, n_secs)) 

            if n_bad_low_secs > 0:    
                low_info.append((rcu, spec_min[rcu], n_bad_low_secs , (ref_value+low_deviation), rcu_max_diff)) 
                logger.debug("RCU=%d: min-noise=%5.3f %d of %d seconds bad" %(rcu, spec_min[rcu], n_bad_low_secs, n_secs)) 
            
            if (n_bad_high_secs == 0) and (n_bad_low_secs == 0):
                if rcu_max_diff > (ref_diff + max_diff):
                    check_high_value = ref_value + (ref_diff / 2.0)
                    check_low_value  = ref_value - (ref_diff / 2.0)
                    for val in spec_median[rcu,:]:
                        if val > check_high_value or val < check_low_value:
                            n_bad_jitter_secs += 1
                    jitter_info.append((rcu, rcu_max_diff, ref_diff, n_bad_jitter_secs))
                    logger.debug("RCU=%d: max spectrum fluctuation %5.3f dB" %(rcu, rcu_max_diff)) 
                
    return (low_info, high_info, jitter_info)
コード例 #21
0
ファイル: combiner.py プロジェクト: AnthonyHorton/ccdproc
    def average_combine(self, scale_func=ma.average, scale_to=None):
        """ Average combine together a set of arrays.

           A `~ccdproc.CCDData` object is returned with the data property
           set to the average of the arrays.  If the data was masked or any
           data have been rejected, those pixels will not be included in the
           average.  A mask will be returned, and if a pixel has been
           rejected in all images, it will be masked.  The uncertainty of
           the combined image is set by the standard deviation of the input
           images.

           Parameters
           ----------
           scale_func : function, optional
               Function to calculate the average. Defaults to
               `~numpy.ma.average`.

           scale_to : float, optional
               Scaling factor used in the average combined image. If given,
               it overrides ``CCDData.scaling``. Defaults to None.

           Returns
           -------
           combined_image: `~ccdproc.CCDData`
               CCDData object based on the combined input of CCDData objects.

        """
        if scale_to is not None:
            scalings = scale_to
        elif self.scaling is not None:
            scalings = self.scaling
        else:
            scalings = 1.0

        # set up the data
        data, wei = scale_func(scalings * self.data_arr, axis=0, weights=self.weights, returned=True)

        # set up the mask
        mask = self.data_arr.mask.sum(axis=0)
        mask = mask == len(self.data_arr)

        # set up the deviation
        uncertainty = ma.std(self.data_arr, axis=0)

        # create the combined image with a dtype that matches the combiner
        combined_image = CCDData(
            np.asarray(data.data, dtype=self.dtype),
            mask=mask,
            unit=self.unit,
            uncertainty=StdDevUncertainty(uncertainty),
        )

        # update the meta data
        combined_image.meta["NCOMBINE"] = len(self.data_arr)

        # return the combined image
        return combined_image
コード例 #22
0
def infer_ks_test_goodness(l1):
    # l = np.histogram(l1)
    # n = len(l)
    mean = average(l1)
    sigma = std(l1)
    res = kstest(l1, 'norm', [mean, sigma])
    if res[1] < 0.01:
        print('reject')
    else:
        print('accept')
    print(res)
コード例 #23
0
 def get_mask(self):
     self.array_mean = ma.mean(self.array)
     self.array_stdev = ma.std(self.array)
     self.array_range = ma.max(self.array) - ma.min(self.array)
     print "The mean is %f, the stdev is %f, the range is %f." %(self.array_mean, self.array_stdev, self.array_range)
     from scipy.io.netcdf import netcdf_file as NetCDFFile
     ### get landmask
     nc = NetCDFFile(os.getcwd()+ '/../data/netcdf_files/ORCA2_landmask.nc','r')
     self.mask = ma.masked_values(nc.variables['MASK'][:, :self.time_len, :self.lat_len, :180], -9.99999979e+33)
     nc.close()
     self.xxx, self.yyy, self.zzz = np.lib.index_tricks.mgrid[0:self.time_len, 0:self.lat_len, 0:180]
コード例 #24
0
 def calc_chrom_fast(self, index, coords_vals):
     self.population[index]["fitness"] = np.abs(self.array_mean - ma.mean(coords_vals[0])) + np.abs(
         self.array_stdev - ma.std(coords_vals[0])
     )  # + \
     # np.abs(self.array_range - (ma.max(coords_vals[0])-ma.min(coords_vals[0])))/10  + \
     # np.abs((self.chromosome_size-1) - coords_vals[2]) #locations
     # ~ print "Chromosome size: ",self.chromosome_size
     # print "Number of locations is: ", coords_vals[2]
     # ~ print "The sample range is: %g. The array range is: %g " % ((ma.max(coords_vals[0])-ma.min(coords_vals[0])), self.array_range)
     # ~ print np.abs(self.array_mean - ma.mean(coords_vals[0])), np.abs(self.array_stdev - ma.std(coords_vals[0])), np.abs(self.array_range - (ma.max(coords_vals[0])-ma.min(coords_vals[0])))
     # ~ print ma.mean(coords_vals[0]), ma.std(coords_vals[0]), (ma.max(coords_vals[0])-ma.min(coords_vals[0]))
     "Fitness is: ", self.population[index]["fitness"]
コード例 #25
0
ファイル: flag_data.py プロジェクト: OMGitsHongyu/analysis_IM
def destroy_time_with_mean_arrays(Data, flag_size=40):
    '''Mask times with high means.
    
    If there is a problem in time, the mean over all frequencies
    will stand out greatly [>10 sigma has been seen]. Flag these bad
    times and +- `flag_size` times around it. Will only be called if `Data`
    has 'badness'.

    Parameters
    ----------
    Data : DataBlock
        Contains information in a usable format direct from GBT. Bad
        times will be flagged in all polarizations and cal states.
    time_cut : int
        How many frequency bins (as an absolute number) to flag in time.
    '''
    # Get the means over all frequencies. (for all pols. and cals.)
    a = ma.mean(Data.data[:, 0, 0, :], -1)
    b = ma.mean(Data.data[:, 1, 0, :], -1)
    c = ma.mean(Data.data[:, 2, 0, :], -1)
    d = ma.mean(Data.data[:, 3, 0, :], -1)
    e = ma.mean(Data.data[:, 0, 1, :], -1)
    f = ma.mean(Data.data[:, 1, 1, :], -1)
    g = ma.mean(Data.data[:, 2, 1, :], -1)
    h = ma.mean(Data.data[:, 3, 1, :], -1)
    # Get means and std for all arrays.
    means = sp.array([ma.mean(a), ma.mean(b), ma.mean(c), ma.mean(d),
                        ma.mean(e), ma.mean(f), ma.mean(g), ma.mean(h)])
    sig = sp.array([ma.std(a), ma.std(b), ma.std(c), ma.std(d),
                      ma.std(e), ma.std(f), ma.std(g), ma.std(h)])
    # Get max accepted values.
    max_accepted = means + 3*sig
    # Find bad times.
    bad_times = []
    for time in range(0,len(a)):
        if ((a[time] > max_accepted[0]) or
            (b[time] > max_accepted[1]) or
            (c[time] > max_accepted[2]) or
            (d[time] > max_accepted[3]) or
            (e[time] > max_accepted[4]) or
            (f[time] > max_accepted[5]) or
            (g[time] > max_accepted[6]) or
            (h[time] > max_accepted[7])):
            bad_times.append(time)
    # Mask bad times and those +- flag_size around.
    for time in bad_times:
        Data.data[(time-flag_size):(time+flag_size),:,:,:].mask = True
    return
コード例 #26
0
ファイル: moment_utils.py プロジェクト: jfoster17/ramps
def calculate_moments(d,minchan=False,maxchan=False,vel=False,bestmask=False,mask=False):

    nglat = d.shape[1]
    nglon = d.shape[2]
    nspec = d.shape[0]


    maps = np.zeros((nglat,nglon),dtype={'names':['mean','sd','errmn',
            'errsd','skew','kurt','error','intint','npix'],
            'formats':['f4','f4','f4','f4','f4','f4','f4','f4','f4']})

    #These definitions for mask seem backward but are correct.
    noise_portion = ma.masked_where(mask == 1,d)
    good_d = d[minchan:maxchan,...]
    mask2 = mask[minchan:maxchan,...]
    #print(mask)
    #print(mask2)
    print(minchan)
    print(maxchan)
    signal_portion = ma.masked_where(mask2 == 0,good_d)
    maps['error']  = ma.std(noise_portion,axis=0)
    maps['intint'] = ma.sum(signal_portion,axis=0)
    #print(maps['error'])


    for x in range(nglat):
        for y in range(nglon):
            fullspec = d[...,x,y]#Exract a single spectrum
            ind = np.arange(nspec)
            velmask = mask[minchan:maxchan,x,y]
            if np.sum(velmask) != 0:
                velmask = bestmask
                npix = max(np.sum(velmask),1)
            ind = ind[velmask > 0]
            sigma = maps['error'][x,y]
            if ind.size > 2 and (sigma > 0):
                mom = idl_stats.wt_moment(vel[ind],fullspec[ind],
                                errors = np.zeros(ind.size)+sigma)
                maps['mean'][x,y]  = mom['mean']
                maps['sd'][x,y]    = mom['stdev']
                maps['errmn'][x,y] = mom['errmn']
                maps['errsd'][x,y] = mom['errsd']
                maps['npix'][x,y]  = npix
            else:
                maps['mean'][x,y]  = np.nan
                maps['sd'][x,y]    = np.nan
                maps['errmn'][x,y] = np.nan
                maps['errsd'][x,y] = np.nan
                maps['npix'][x,y]  = np.nan
    return(maps)
コード例 #27
0
ファイル: reflag.py プロジェクト: OMGitsHongyu/analysis_IM
def nothing(noth):
    # If requested, remove the time gradient from all channels.
    if remove_slope:
        un_mask = sp.logical_not(ma.getmaskarray(NoiseData.data))
        NoiseData.calc_time()
        time = NoiseData.time
        n_time = len(time)
        # Test if the mask is the same for all slices.  If it is, that greatly
        # reduces the work as we only have to generate one set of polynomials.
        all_masks_same = True
        for jj in range(n_time):
            if sp.all(un_mask[jj,...] == un_mask[jj,0,0,0]):
                continue
            else:
                all_masks_same = False
                break
        if all_masks_same:
            polys = misc.ortho_poly(time, 2, un_mask[:,0,0,0], 0)
            polys.shape = (2, len(time), 1, 1, 1)
        else:
            polys = misc.ortho_poly(time[:,None,None,None], 2, un_mask, 0)
        # Subtract the slope mode (1th mode) out of the NoiseData.
        slope_amps = sp.sum(polys[1,...] * un_mask * NoiseData.data.filled(0),
                            0)
        NoiseData.data -= polys[1,...] * slope_amps
    # Iteratively flag on sliding scale to get closer and closer to desired
    # threshold.
    n_time = Data.data.shape[0]
    max_thres = sp.sqrt(n_time)/2.
    n_iter = 3
    thresholds = (max_thres ** (n_iter - 1 - sp.arange(n_iter))
                 * thres ** sp.arange(n_iter)) ** (1./(n_iter - 1))
    for threshold in thresholds:
        # Get the deviation from the mean.
        residuals = ma.anom(NoiseData.data, 0).filled(0)
        # Get indices above the threshold.
        mask = abs(residuals) > threshold * ma.std(NoiseData.data, 0)
        # Mask the data.
        Data.data[mask] = ma.masked
        NoiseData.data[mask] = ma.masked
    
    # Now flag for very noisey channels.
    if max_noise_factor > 0:
        vars = ma.var(NoiseData.data, 0)
        mean_vars = ma.mean(vars, -1).filled(0)
        bad_chans = vars.filled(0) > max_noise_factor * mean_vars[:,:,None]
        Data.data[:,bad_chans] = ma.masked
        NoiseData.data[:,bad_chans] = ma.masked
コード例 #28
0
def res_dist(x, y, e, n_runs=100, random_state=None):
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4, random_state=random_state)

    test_res = []
    train_res = []
    start_time = time()

    for i in range(n_runs):
        e.fit(x_train, y_train)
        train_res.append(e.score(x_train, y_train))
        test_res.append(e.score(x_test, y_test))
        if (i % (n_runs / 10) == 0): print("%d" % i, end=' ')

    print("\nTime: %.3f secs" % (time() - start_time))
    print("Test Min: %.3f Mean: %.3f Max: %.3f SD: %.3f" % (min(test_res), mean(test_res), max(test_res), std(test_res)))
    print("Train Min: %.3f Mean: %.3f Max: %.3f SD: %.3f" % (
    min(train_res), mean(train_res), max(train_res), std(train_res)))

    return (train_res, test_res)
コード例 #29
0
ファイル: flag_data.py プロジェクト: adam-lewis/analysis_IM
def destroy_with_variance(Data, sigma_thres=6, bad_freq_list=[]):
    '''Mask spikes in Data using variance. Polarizations must be in
    XX,XY,YX,YY format.
    sigma_thres represents how sensitive the flagger is (smaller = more masking).
    The flagged frequencies are appended to bad_freq_list.'''
    XX_YY_0 = ma.mean(Data.data[:, 0, 0, :], 0) * ma.mean(Data.data[:, 3, 0, :], 0)
    XX_YY_1 = ma.mean(Data.data[:, 0, 1, :], 0) * ma.mean(Data.data[:, 3, 1, :], 0)
    # Get the normalized variance array for each polarization.
    a = ma.var(Data.data[:, 0, 0, :], 0) / (ma.mean(Data.data[:, 0, 0, :], 0)**2) # XX
    b = ma.var(Data.data[:, 1, 0, :], 0) / XX_YY_0                                # XY
    c = ma.var(Data.data[:, 2, 0, :], 0) / XX_YY_0                                # YX
    d = ma.var(Data.data[:, 3, 0, :], 0) / (ma.mean(Data.data[:, 3, 0, :], 0)**2) # YY
    # And for cal off.
    e = ma.var(Data.data[:, 0, 1, :], 0) / (ma.mean(Data.data[:, 0, 1, :], 0)**2) # XX
    f = ma.var(Data.data[:, 1, 1, :], 0) / XX_YY_1                                # XY
    g = ma.var(Data.data[:, 2, 1, :], 0) / XX_YY_1                                # YX
    h = ma.var(Data.data[:, 3, 1, :], 0) / (ma.mean(Data.data[:, 3, 1, :], 0)**2) # YY
    # Get the mean and standard deviation [sigma].
    means = sp.array([ma.mean(a), ma.mean(b), ma.mean(c), ma.mean(d),
                        ma.mean(e), ma.mean(f), ma.mean(g), ma.mean(h)]) 
    sig = sp.array([ma.std(a), ma.std(b), ma.std(c), ma.std(d),
                      ma.std(e), ma.std(f), ma.std(g), ma.std(h)])
    # Get the max accepted value [sigma_thres*sigma, sigma_thres=6 works really well].
    max_sig = sigma_thres*sig
    max_accepted = means + max_sig
    amount_masked = 0
    for freq in range(0, len(a)):
        if ((a[freq] > max_accepted[0]) or
            (b[freq] > max_accepted[1]) or
            (c[freq] > max_accepted[2]) or
            (d[freq] > max_accepted[3]) or
            (e[freq] > max_accepted[4]) or
            (f[freq] > max_accepted[5]) or
            (g[freq] > max_accepted[6]) or
            (h[freq] > max_accepted[7])):
            # mask
            amount_masked += 1
            bad_freq_list.append(freq)
            Data.data[:,:,:,freq].mask = True
    return amount_masked
コード例 #30
0
ファイル: combiner.py プロジェクト: JenniferKarr/ccdproc
    def average_combine(self, scale_func=None, scale_to=1.0):
        """Average combine together a set of arrays.   A CCDData object is
           returned with the data property set to the average of the arrays.
           If the data was masked or any data have been rejected, those pixels
           will not be included in the median.   A mask will be returned, and
           if a pixel has been rejected in all images, it will be masked.   The
           uncertainty of the combined image is set by the standard deviation
           of the input images.

           Returns
           -------
           combined_image: `~ccdproc.CCDData`
               CCDData object based on the combined input of CCDData objects.

        """
        if self.scaling is not None:
            scalings = self.scaling
        else:
            scalings = 1.0
        #set up the data
        data, wei = ma.average(scalings * self.data_arr,
                               axis=0, weights=self.weights,
                               returned=True)

        #set up the mask
        mask = self.data_arr.mask.sum(axis=0)
        mask = (mask == len(self.data_arr))

        #set up the deviation
        uncertainty = ma.std(self.data_arr, axis=0)

        # create the combined image with a dtype that matches the combiner
        combined_image = CCDData(np.asarray(data.data, dtype=self.dtype),
                                 mask=mask, unit=self.unit,
                                 uncertainty=StdDevUncertainty(uncertainty))

        #update the meta data
        combined_image.meta['NCOMBINE'] = len(self.data_arr)

        #return the combined image
        return combined_image