コード例 #1
0
ファイル: useful.py プロジェクト: hughbg/leda_analysis_2016
def statistics(data):	# I think 8 is better for flattening
    
    def num2str(x):
        return ( "%.3f" % x )
        
    # Define model function to be used to fit to the data
    def gauss(x, *p):
        A, mu, sigma = p
        return A*np.exp(-(x-mu)**2/(2.*sigma**2))
        
    def gauss_fit(in_data):
        # p0 is the initial guess for the fitting coefficients (A, mu and sigma above)
        p0 = [np.max(in_data), np.argmax(in_data), np.std(in_data)]

        coeff, var_matrix = scipy.optimize.curve_fit(gauss, np.array(range(len(in_data))), in_data, p0=p0)

        return coeff, np.sqrt(np.mean(np.diag(var_matrix)))
        
    data = ensure_mask(data)

    # Now can use the demeaning
    flat = bn.move_nanmean(data, params.st_bp_window_t, axis=0)
    flat = np.roll(flat, -params.st_bp_window_t/2+1, axis=0)
    flat = bn.move_nanmean(flat, params.st_bp_window_f, axis=1)
    flat = np.roll(flat, -params.st_bp_window_f/2+1, axis=1)
    flat = data-flat
    flat = np.ma.ravel(flat)
    flat = flat[np.logical_not(flat.mask)]
    if len(flat) != np.ma.MaskedArray.count(data):
        print "ERROR: mask not preserved in statistics", len(flat), np.ma.MaskedArray.count(data)
        exit(1)
    flat = flat[np.logical_not(np.isnan(flat))]
    flat -= np.mean(flat)
    
    # Print stats. Some are from the data, others from the flattened data
    print "Gaussian statistics, from de-meaned data:"
    print "  Min", num2str(np.min(flat)), "Max", num2str(np.max(flat)), "Std", num2str(np.std(flat)),
    print "Skewness", num2str(skew(flat)), "Kurtosis", num2str(kurtosis(flat, fisher=True))
    print "Statistics from data, not de-meaned:"
    print "  Min", num2str(np.ma.min(data)), "Max", num2str(np.ma.max(data)), "Std", num2str(np.ma.std(data))
    total = data.shape[0]*data.shape[1]
    num_in = np.ma.MaskedArray.count(data)
    print "Flags:", ( "%.3f%%" % (100*(total-num_in)/total) ), "flagged (num:"+str(total-num_in)+")" 
    
    # Get histogram for Gauss fit
    histogram = np.zeros((params.histogram_length, 2))
    hist = np.histogram(flat, params.histogram_length)
    histogram[:, 0] = hist[1][:params.histogram_length]
    histogram[:, 1] = hist[0]
    np.savetxt("hist_data.dat", histogram)
    
    # See how Gaussian it is
    try:
        coeff, err = gauss_fit(histogram[:, 1])
        print "Gauss fit error", ( "%.3f" % err ), "(hoping for < 5)"
        histogram[:, 1] = np.array([gauss(i, coeff[0], coeff[1], coeff[2]) for i in range(len(histogram))])
        np.savetxt("hist_fit.dat", histogram)
        
    except:
        print "Gauss fit failed"
コード例 #2
0
def clip(data):

    bp_window_t = params.sc_bp_window_t
    bp_window_f = params.sc_bp_window_f

    # Get the standard deviation of the high (by frequency) third of the data, for clipping
    cut = data.shape[1] / 3
    chunk = data[:, data.shape[1] - cut:]
    chunk = bn.move_nanmean(chunk, bp_window_t, axis=0)
    chunk = np.roll(chunk, -bp_window_t / 2 + 1, axis=0)
    chunk = bn.move_nanmean(chunk, bp_window_f, axis=1)
    chunk = np.roll(chunk, -bp_window_f / 2 + 1, axis=1)
    chunk = data[:, data.shape[1] - cut:] - chunk
    chunk = chunk[bp_window_t:,
                  bp_window_f:]  # Because these edge values are nan now
    chunk = np.ravel(chunk)
    if np.ma.is_masked(chunk): chunk = chunk[chunk.mask == False]

    # Clipping values
    dmin = -params.sigma * np.std(chunk)
    dmax = params.sigma * np.std(chunk)

    # Mask the data. Have to flatten the data to find where to mask it
    flat = bn.move_nanmean(data, bp_window_t, axis=0)
    flat = np.roll(flat, -bp_window_t / 2 + 1, axis=0)
    flat = bn.move_nanmean(flat, bp_window_f, axis=1)
    flat = np.roll(flat, -bp_window_f / 2 + 1, axis=1)
    flat = data - flat
    m = np.ma.mean(flat[bp_window_t:, bp_window_f:])
    flat[:
         bp_window_t, :] = m  # Because these edge values are now Nan due to move_nanmean
    flat[:, :bp_window_f] = m
    flat -= m

    data.mask = np.logical_or(data.mask, flat > dmax)
    data.mask = np.logical_or(data.mask, flat < dmin)
コード例 #3
0
def clip1(data):
    nstart = np.ma.count(data)
    for i in range(data.shape[1]):
        flat = bn.move_nanmean(data[:, i], params.sc_bp_window_t, axis=-1)
        flat = np.roll(flat, -params.sc_bp_window_t / 2 + 1, axis=-1)
        flat = data[:, i] - flat  # this will also insert the mask
        std = np.std(flat[np.logical_not(
            np.logical_or(np.isnan(flat), flat.mask))])
        if not np.isnan(float(std)):
            clip_mask = np.logical_or(flat < -params.sigma * std,
                                      params.sigma * std < flat)
            data[:, i].mask = np.logical_or(data[:, i].mask, clip_mask)

    if np.ma.count(data) > nstart:
        print "ERROR: number of points flagged went DOWN after clipping!"
        exit(1)
コード例 #4
0
ファイル: useful.py プロジェクト: hughbg/leda_analysis_2016
def add_uncertainties(data):

    data = ensure_mask(data)
    rms = np.zeros(data.shape[1])

    for i in range(data.shape[1]):
        flat = bn.move_nanmean(data[:, i], params.un_bp_window_t, axis=0)
        flat = np.roll(flat, -params.un_bp_window_t/2+1, axis=0)
        flat = data[:, i]-flat
        flat = np.ma.ravel(flat)
        flat = flat[np.logical_not(flat.mask)]
        if len(flat) != np.ma.MaskedArray.count(data[:, i]):
            print "ERROR: mask not preserved in statistics", len(flat), np.ma.MaskedArray.count(data[:, i])
            exit(1)
        flat = flat[np.logical_not(np.isnan(flat))]
        
        rms[i] = float(np.std(flat))		# Will be Nan if whole channel masked
        
    return rms
コード例 #5
0
def filter(data, size, axis=0):
    # If the input is a masked array, the mask will be lost after filtering

    if not params.median:
        if size % 2 == 0: size -= 1
        d = bn.move_nanmean(data, size, axis=axis)
        return np.roll(d, -size / 2 + 1, axis=axis)
    else:
        d = np.zeros((data.shape[0], data.shape[1]))
        if size % 2 == 0: size -= 1
        if axis == 0:
            for i in range(data.shape[0]):
                d[i] = scipy.signal.medfilt(data[i], size)
        elif axis == 1:
            for i in range(data.shape[1]):
                d[:, i]
                d[:, i] = scipy.signal.medfilt(data[:, i], size)
        else:
            print "Invalid axis", axis, "for filtering"
            exit(1)
        return d
コード例 #6
0
                                             len(indexT))

        d_scrunched = scrunch(d[ant][indexT])  # Averages over 1MHz freq bins.
        # Masked entries are ignored
        data_averaged = np.ma.average(d_scrunched,
                                      axis=0)  # averaging over time
        # seems like if channel is all masked it becomes 0
        num_in = np.ma.MaskedArray.count(data_averaged)
        if num_in != len(data_averaged):
            raise RuntimeError("There are masked values in averaged array")
        if len(data_averaged[np.isnan(data_averaged)]) != 0:
            raise RuntimeError("There are NaN values in averaged array")

        av_data_dict["Bins"][ind][ant] = data_averaged

        flat = bn.move_nanmean(d_scrunched, bp_window_t, axis=0)
        flat = np.roll(flat, -bp_window_t / 2 + 1, axis=0)
        for indnu in range(d_scrunched.shape[1]):
            #print "indnu", d[ant][indexT,indnu].shape
            flat_channel = d_scrunched[:, indnu] - flat[:, indnu]
            rms_av[indnu] = np.std(flat_channel[bp_window_t:])

    #       print "rms",rms_av[ind][indnu]
    #   plt.subplot(len(vecTbins)-1,2,flagsp-2*(len(vecTbins)-1)*indD+indD)
    #   plt.subplot(1,2,flagsp-2*(len(vecTbins)-1)*indD+indD)

        av_data_dict["Bins"][ind][ant + "_RMS"] = rms_av
        av_data_dict["Bins"][ind][ant + "_SI"] = spectral_index(
            freq_scrunched[19:], data_averaged[19:])

        #plt.subplot(2,2,flagsp)
コード例 #7
0
def sum_threshold(data, thr_f, thr_t=None, scales=None, rho=1.5,
                  plot_progress=False, verbose=False):
    """ Apply Sum-Threshold method 
    
    This function applies a set ofmoving averages to the data along both 
    time and frequency axes, then checks if the output are above a threshold.
    This is the basic technique used in AOFlagger's algorithm.
    
    data (np.ma.array): data to flag, 2D array (time, freq)
    thr_f (int): threshold over which to flag on frequency axis
    thr_t (int): threshold over which to flag on time axis
    scales (list): list of window sizes (ints) to do moving average over.
            Defaults to None, in which case it uses [1,2,4,8,16,32,64] 
    rho (float): Threshold setting base. From eqn 12 in Offringa et. al. 2010:
                               thr_1 
                 thr_i =  --------------
                          rho^(log_2(i))
                 A value of 1.5 is suggested as being "empirically good"
    """

    if scales is None:
        scales = [1, 2, 4, 8, 16, 32, 64]
    
    if thr_t is None:
        thr_t = thr_f
    
    mask = np.copy(data.mask)
    
    thr1_f = thr_f
    thr1_t = thr_t
    
    # do first stage of flagging:
    mask_f = np.greater_equal(np.abs(data-1), thr_f)
    mask_t = np.greater_equal(np.abs(data-1), thr_t)
    #mask_b = np.greater_equal(np.abs(summed_b-1), np.sqrt(thr_f * thr_t))
    mask_s = np.logical_or(mask_f, mask_t)
    #mask_s = np.logical_or(mask_s, mask_b)        
    mask   = np.logical_or(data.mask, mask_s)
    data[mask] = np.sqrt(thr_f * thr_t)
    
    for window in scales:
        
        thr_f = thr1_f / np.power(rho, np.log2(window))
        thr_t = thr1_t / np.power(rho, np.log2(window))
        
        if window > 1:
            summed_f = bn.move_nanmean(data, window, axis=1)
            summed_t = bn.move_nanmean(data, window, axis=0)
            #summed_b = bn.move_nanmean(summed_f, int(np.sqrt(window)), axis=0)
        
            mask_f = np.greater_equal(np.abs(summed_f-1), thr_f)
            mask_t = np.greater_equal(np.abs(summed_t-1), thr_t)
            #mask_b = np.greater_equal(np.abs(summed_b-1), np.sqrt(thr_f * thr_t))
            mask_s = np.logical_or(mask_f, mask_t)
            #mask_s = np.logical_or(mask_s, mask_b)        
            mask   = np.logical_or(data.mask, mask_s)
            data[mask] = 1 + np.sqrt(thr_f * thr_t)
            data.mask  = mask
        else:
            summed_f = data
            summed_t = data

        if verbose:
            print "M: %i, Xi_f: %2.2e, Xi_t: %2.2e" % (window, thr_f, thr_t)

        if plot_progress:
            plt.figure()
            plt.subplot(221)
            plt.title("summed f: %i" % window)
            plt.imshow(summed_f, aspect='auto', interpolation='none', rasterized=True)
            plt.colorbar()
            plt.subplot(222)
            plt.title("summed t: %i" % window)
            plt.imshow(summed_t, aspect='auto', interpolation='none', rasterized=True)
            plt.colorbar()
            plt.subplot(223)
            plt.title("flagged: %i" % window)
            plt.imshow(data, aspect='auto', interpolation='none', rasterized=True)
            plt.colorbar()
    if plot_progress:
        plt.show()
        
    return data.mask
コード例 #8
0
ファイル: 06_lmfit_tests.py プロジェクト: hughbg/leda_cal2
def bin_to_1MHz(bottom_f, filt, variance, channel_indexes):
    def calc_rms(x):
        return np.sqrt(np.mean(x**2))

    if len(filt) != len(variance) or len(filt) != len(channel_indexes):
        raise RuntimeError("Arrays of different length in bin_to_1MHz " +
                           str(len(filt)) + " " + str(len(variance)) + " " +
                           str(len(variance)))

    # We want to bin 1MHz of channels. That means from channel N to N+41 (inclusive). However, there
    # may be gaps in the channels, so there may be different numbers of channels binned.
    # The averaged frequencies are calculted from averaging 4 frequencies without gaps.

    nbin = 42
    chan_width = .024

    ndata = []
    nvariance = []
    i = 0
    while i < len(
            channel_indexes
    ):  # Find blocks of channels and bin them. Blocks are defined by a channel sep of 42 in the indexes.
        j = i
        weighted_mean = 0.0
        D_2 = 0.0  #  https://en.wikipedia.org/wiki/Inverse-variance_weighting
        while j < len(channel_indexes
                      ) and channel_indexes[j] < channel_indexes[i] + nbin:
            weighted_mean += filt[j] / variance[j]
            D_2 += 1 / variance[j]
            j += 1

        print j - i, "channels binned"
        D_2 = 1 / D_2
        weighted_mean *= D_2

        ndata.append(weighted_mean)
        nvariance.append(D_2)

        i = j

    # Get frequencies for the bins, based on what was the starting frequency originally
    bottom_freq = (bottom_f + bottom_f + (nbin - 1) * chan_width) / 2
    print "Bottom f", bottom_f, "->", bottom_freq
    nf = [bottom_freq + i * nbin * chan_width for i in range(len(ndata))]

    print "Scrunch to length", len(nf)

    #np.savetxt("filt.dat", np.array(list(zip(filt_f, filt))))
    plt.figure(figsize=(8, 6))
    plt.plot(nf, ndata)
    plt.title("Binned to 1MHz")
    plt.xlabel("Frequency [MHz]")
    plt.ylabel("Temp [K]")
    plt.savefig("bin1MHz.png")

    plt.clf()
    plt.figure(figsize=(8, 6))
    plt.plot(nf, nvariance)
    plt.title("Variance binned")
    plt.xlabel("Frequency [MHz]")
    plt.ylabel("Temp [K$^2$]")
    plt.tight_layout()
    plt.savefig("bin1MHz_var.png")

    np.savetxt("binned_frequencies.dat", nf)
    np.savetxt("binned_data.dat", ndata)
    np.savetxt("binned_variance.dat", nvariance)

    mn = (ndata - bn.move_nanmean(ndata, 9))[4:-4]
    mn = mn[mn != np.nan]
    print mn
    print calc_rms((ndata - scipy.signal.medfilt(ndata, 9))[4:-4]), calc_rms(
        mn[4:]), calc_rms(ndata - filter(ndata))

    return nf, ndata, nvariance
コード例 #9
0
ファイル: 06_lmfit_tests.py プロジェクト: hughbg/leda_cal2
plt.show()
plt.savefig("variance.png")

# Plot the damped sinusoid
plt.clf()
plt.plot(f2, rD, label="Data")
#plt.plot(f2, rD_sin_model, label="Fit")
plt.xlabel("Frequency [MHz]")
plt.ylabel("Temperature [K]")
plt.legend()
plt.show()
plt.savefig("residual.png")
np.savetxt("residual.dat", np.array(list(zip(f2, rD))))

#filt = (rD-scipy.signal.medfilt(rD, 9))[9:-9]
filt = (rD - bn.move_nanmean(rD, 9))[9:-9]
filt = (rD - filter(rD))[9:-9]

#f2, filt = bin_to_1MHz(f2[9:-9], filt)

print "Noise again", np.std(filt[len(filt) / 2:])

plt.figure(figsize=(10, 10))

lw = 0.5
plt.clf()
plt.plot(f2, rD, linewidth=lw)
plt.title("Signal")
plt.xlabel("Frequency [MHz]")
plt.ylabel("Temperature [K]")
plt.savefig("signal.png")