Ejemplo n.º 1
0
def degree_distribution(A, networkName, directed=True):
    binNum = 30

    if (directed):
        (kin, kout) = get_degree(A)
        bins = np.linspace(0, np.log10(np.max(kin)), num=binNum)
        digitized = np.digitize(np.log10(kin), bins)
        bin_counts = np.asarray([digitized.tolist().count(i) for i in range(0,len(bins))])
        bin_counts = ma.log10(bin_counts)
        #fit the line
        a,b = ma.polyfit(bins, bin_counts, 1, full=False)
        print('best fit in degree line:\ny = {:.2f} + {:.2f}x'.format(b, a))
        yfit = [b + a * xi for xi in bins]
        fig, axs = plt.subplots(2, 1)
        axs[0].scatter(bins, bin_counts)
        axs[0].plot(bins, yfit, color="orange")
        axs[0].set_title('in-degree distribution')
        axs[0].set_xlabel('Degree (d) log base 10', fontsize="small")
        axs[0].set_ylabel('Frequency log base 10', fontsize="small")
        axs[0].set_ylim(bottom=0)

        bins = np.linspace(0, np.log10(np.max(kout)), num=binNum)
        digitized = np.digitize(np.log10(kout), bins)
        bin_counts = np.asarray([digitized.tolist().count(i) for i in range(0,len(bins))])
        bin_counts = ma.log10(bin_counts)
        print('best fit out degree line:\ny = {:.2f} + {:.2f}x'.format(b, a))
        yfit = [b + a * xi for xi in bins]
        axs[1].scatter(bins, bin_counts)
        axs[1].plot(bins, yfit, color="orange")
        axs[1].set_title('out-degree distribution')
        axs[1].set_xlabel('Degree (d) log base 10', fontsize="small")
        axs[1].set_ylabel('Frequency log base 10', fontsize="small")
        plt.subplots_adjust(hspace=0.01)
        plt.tight_layout()
        plt.savefig(networkName + 'degree.pdf')
        plt.close()

    if (not directed):
        (kin,kout) = get_degree(A)
        print (kin.shape)
        #bin the statistics
        bins = np.linspace(0, np.log10(np.max(kin)), num=binNum)
        digitized = np.digitize(np.log10(kin), bins)
        bin_counts = np.asarray([digitized.tolist().count(i) for i in range(0,len(bins))])
        bin_counts = ma.log10(bin_counts)
        #fit the line
        a,b = ma.polyfit(bins, bin_counts, 1, full=False)
        print('best fit line:\ny = {:.2f} + {:.2f}x'.format(b, a))
        yfit = [b + a * xi for xi in bins]
        plt.scatter(bins, bin_counts)
        plt.plot(bins, yfit, color="orange")
        plt.title('degree distribution')
        plt.xlabel('Degree (d) log base 10', fontsize="small")
        plt.ylabel('Frequency log base 10', fontsize="small")
        plt.ylim(bottom=0)
        # plt.xscale('log')
        # plt.yscale('log')
        plt.tight_layout()
        plt.savefig(networkName + 'degree.pdf')
        plt.close()
def my_polyfit(x, y, deg, degatt=0):
    k = ma.polyfit(x, y, degatt)
    res = my_poly(k, x)
    resid = ma.abs(res - y)
    medresid = ma.median(resid, axis=0)
    if y.ndim != 1:
        mask = ma.logical_not(ma.sum((resid > 3 * medresid), axis=1).astype('bool'))
    else:
        mask = (resid < 3 * medresid)
    y = y[mask]
    x = x[mask]
    k = ma.polyfit(x, y, deg)
    return(k, mask)
def rate_of_change_polyfit(data, period, dates, fit_length=1):
    '''This function calculates a rate of change in the same way as the old code did, using successive lines of best fit'''
    import aux_functions
    import numpy as np
    import numpy.ma as ma

    seconds_in_day = 8.64e4
    tolerance = 0.95
    numbers = np.array([aux_functions.datetime_to_float(ddt) for ddt in dates])
    dates_within = [
        ddt for ddt in dates if ddt >= period[0] and ddt <= period[1]
    ]
    output_series = ma.masked_array(np.zeros(len(dates_within)),
                                    mask=np.zeros(len(dates_within),
                                                  dtype='bool'))

    for (idate, ddt) in enumerate(dates_within):
        number = aux_functions.datetime_to_float(ddt)
        index_period = np.where(
            np.logical_and(numbers >= number - fit_length,
                           numbers <= number + fit_length))
        numbers_period = numbers[index_period]
        data_period = data[index_period]

        if np.sum(data_period.mask) / data_period.shape[0] > tolerance:
            output_series.mask.itemset(idate, True)
        else:
            rate_of_change_instance = ma.polyfit(numbers_period, data_period,
                                                 1)[0] / seconds_in_day
            output_series.data.itemset(idate, rate_of_change_instance)

    return output_series
Ejemplo n.º 4
0
def baseline_and_deglitch(orig_spec,
                          ww=300,
                          sigma_cut=4.,
                          poly_n=2.,
                          filt_width=7.):
    """
    (1) Calculate a rolling standard deviation (s) in a window
        of 2*ww pixels
    (2) Mask out portions of the spectrum where s is more than
        sigma_cut times the median value for s.
    (3) Fit and subtract-out a polynomial of order poly_n 
        (currently hard-coded to 2)
    (4) Median filter (with a filter width of filt_width)
        to remove the single-channel spikes seen.
    """
    
    ya = rolling_window(orig_spec,ww*2)
    #Calculate standard dev and pad the output
    stds = my_pad.pad(np.std(ya,-1),(ww-1,ww),mode='edge')
    #Figure out which bits of the spectrum have signal/glitches
    med_std = np.median(stds)
    std_std = np.std(stds)
    sigma_x_bar = med_std/np.sqrt(ww)
    sigma_s = (1./np.sqrt(2.))*sigma_x_bar
    #Mask out signal for baseline
    masked = ma.masked_where(stds > med_std+sigma_cut*sigma_s,orig_spec)
    xx = np.arange(masked.size)
    ya = ma.polyfit(xx,masked,2)
    baseline = ya[0]*xx**2+ya[1]*xx+ya[2]
    sub = orig_spec-baseline
    #Filter out glitches in baseline-subtracted version
    final = im.median_filter(sub,filt_width)[::filt_width]
    return(final)
Ejemplo n.º 5
0
def fit_baseline(masked,xx,ndeg=2):
    """
    Fit a polynomial baseline of
    arbitrary (ndeg) order.
    """
    ya = ma.polyfit(xx,masked,ndeg)
    basepoly = np.poly1d(ya)
    return(basepoly)
Ejemplo n.º 6
0
Archivo: ptc.py Proyecto: nhaddad/pydtk
def ptc_irffpairs(imagelist, *coor, **kargs):
    """
    TODO: Need to be finished !!
    NHA

    Perform ptc plot for pairs of ff at same level.
    The pairs of ff should have the same light level.
    The first 2 images must be bias
    To eliminate the FPN, the 'shotnoise' image is computed as the subtraction
    of two debiased flat field images
    optional kargs arguments:
    FACTOR (default = 2.0)
    FIRST_FITTING (default = False)
    LIMIT (default = False)
    VERBOSE (default=False)
    TODO: add Rotation by 90deg as an option
    """
    x1, x2, y1, y2 = imagelist[0].get_windowcoor(*coor)

    # Define empty list to store values
    signal = []
    stddev = []
    variance = []

    oddimageindex = list(range(1, len(imagelist), 2))
    evenimageindex = list(range(0, len(imagelist), 2))

    # For all pairs, compute signal, std and variance
    for odd, even in zip(oddimageindex, evenimageindex):
        ff1 = imagelist[odd]
        ff2 = imagelist[even]
        ffmean = (ff1 + ff2) / 2.0
        shotnoise = ff1 - ff2
        signal.append(np.mean(ffmean[x1:x2, y1:y2]))
        variance.append(np.var(shotnoise[x1:x2, y1:y2]) / 2.0)
        stddev.append(np.std(shotnoise[x1:x2, y1:y2]) / np.sqrt(2.0))
        print(f"Signal: {signal[-1]}   Variance: {variance[-1]}")

    coefts = ma.polyfit(signal, variance, 1)
    # coefts=ma.polyfit(signal[:-3],variance[:-3],1)
    polyts = coefts[0] * np.array(signal) + coefts[1]
    print(1 / coefts[0], coefts[1])

    # plot variance v/s signal
    plt.figure()
    # plot(meansig,masked_variance,'b.')
    plt.plot(signal, variance, 'b.')
    plt.plot(signal, polyts, 'r-')
Ejemplo n.º 7
0
def get_poly_baseline(mspec, k_est, debug=True, **kwargs):
    """
    Fit for the best polynomial baseline according to BIC
    
    Search polynomial fits from order 0 to order 7 and
    use the BIC to select the best fit. This fit should 
    also have an rms error within a factor of two of 
    the estimated noise (k_est). If this is not the case
    then the baseline fit is most likely bad. 
    """
    d = np.arange(0, 7)
    rms_err = np.zeros(d.shape)
    all_polys = []
    #k_est = 0.2/np.sqrt(7)
    xx = np.arange(mspec.size)
    yy = mspec

    for i in range(len(d)):
        p = ma.polyfit(xx, yy, d[i])
        basepoly = P(p[::-1])
        all_polys.append(basepoly)
        rms_err[i] = np.sqrt(np.sum((basepoly(xx) - yy)**2) / len(yy))
    BIC = np.sqrt(len(yy)) * rms_err / k_est + 1 * d * np.log(len(yy))
    AIC = np.sqrt(len(yy)) * rms_err / k_est + 2 * d + 2 * d * (d + 1) / (
        len(yy) - d - 1)

    if debug:
        fig = plt.figure(figsize=(12, 5))
        ax = fig.add_subplot(311)
        ax.plot(d, rms_err, '-k', label='rms-err')
        ax.legend(loc=2)
        ax.axhline(k_est * 2., ls=":", color='r')
        ax.axhline(k_est, color='red')
        ax.axhline(k_est / 2., ls=':', color='r')

        ax = fig.add_subplot(312)
        ax.plot(d, BIC, '-k', label='BIC')
        ax.legend(loc=2)
        ax = fig.add_subplot(313)
        ax.plot(d, AIC, '-r', label='AIC')
        ax.legend(loc=2)

        plt.savefig(kwargs["outdir"] + "/debugplot.png")

        plt.close(fig)
    best_poly_order = np.argmin(AIC)
    best_poly = all_polys[best_poly_order]
    return (best_poly(xx))
Ejemplo n.º 8
0
Archivo: ptc.py Proyecto: nhaddad/pydtk
def ptcloglog(imagelist, *coor, **kargs):
    """
    TODO: Need to be finished!!

    Perform ptc plot for ff at different light levels

    The first image must be a bias

    The FPN is not elliminated. This curve should illustrate the diferent regions of a
    tipical detector

    """
    signal = []
    variance = []
    stddev = []

    x1, x2, y1, y2 = imagelist[0].get_windowcoor(*coor)
    print("No error")

    # Read in bias
    bias = imagelist[0]

    # For all images, compute signal, std and variance
    for image in imagelist[1:]:
        ff = image
        ff = ff - bias

        signal.append(np.mean(ff[x1:x2, y1:y2]))
        variance.append(np.var(ff[x1:x2, y1:y2]))
        stddev.append(np.std(ff[x1:x2, y1:y2]))
        print(f"Signal: {signal[-1]}   Variance: {variance[-1]}")

    coefts = ma.polyfit(signal[:-3], variance[:-3], 1)
    polyts = coefts[0] * signal + coefts[1]
    print(1 / coefts[0], coefts[1])

    # plot variance v/s signal
    plt.figure()
    # plot(meansig,masked_variance,'b.')
    # plot(signal, variance, 'b.')
    # plot(signal[:-3],polyts[:-3],'r-')

    # Plot the curves
    plt.loglog(signal, stddev, 'r')
Ejemplo n.º 9
0
def masked_polyfit(myTime, data, order, mask, firstNan, fit=False):
    '''
    using standard polyfit
    '''
    maskedY = ma.masked_array(data, mask=mask)
    full = np.where(firstNan > order)[0]

    # initialize coef array
    # coef = np.zeros((order+1, data.shape[1]))
    coef = ma.zeros((order + 1, data.shape[1])) + np.NaN
    coef.mask = np.ones(coef.shape)
    coef.mask[:, full] = 0

    if full.size > 0:
        print('full shape', full.shape)
        print('time shape', myTime.shape)
        print('mask shape', mask.shape)
        coef[:, full] = ma.polyfit(myTime, maskedY[:, full], order)

    if fit:
        fitX = np.tile(myTime, (data.shape[1], 1))
        fit = np.polyval(coef, fitX.T)
        return coef, fit
    return coef
Ejemplo n.º 10
0
def two_layer_treatment(mvp_dict, x_sills, x_ranges):
    """Estimate g-prime and interface depth

    Inputs
    ------
    mvp_dict: dict
        Used throughout this file
    x_sills: 2-tuple of floats
        x position of the start and end of the sill in kilometres
    x_ranges: 2-tuple of floats
        x position of location where output is applicable

    For my transect:
        first sill: x_sills = (48, 58), x_ranges = (0, 80)
        second sill: x_sills = (75, 90), x_ranges = (65, 200)

    Returns
    -------
    gprime : 1D array
        g * delta_rho/rho
    interface_depth : 1D array
        Depth in metres of "two-layer" interface
    """

    # Simplify names of commonly used variables
    prho = mvp_dict['prho'].copy()
    z_c = mvp_dict['z_c'].copy()
    x = mvp_dict['dist_flat']

    bottom = np.array(mvp_dict['bottom'])
    sill_inds = np.where(np.logical_and(x > x_sills[0], x < x_sills[1]))[0]

    # Find interface of mode-1 wave for each profile in sill_inds
    rho_interfaces = np.zeros_like(sill_inds, dtype='float')
    for i, ind in enumerate(sill_inds):
        # Interpolate horizontal mode structure against density
        f = interp1d(mvp_dict['hori_0'][ind, :], prho[ind, :])
        # Find density of zero crossing of horizontal structure
        rho_interfaces[i] = f(0)

    rho_interface = np.nanmean(rho_interfaces)

    # Find depth of rho_interface along transect
    x_in, y_in, z_in = x, z_c, prho
    interface_depth = get_contour(x_in, y_in, z_in, rho_interface)

    # Preallocate results to keep
    gprime = np.full_like(x_in, np.nan)

    # Find average density in each layer using linear fit
    for i, rho_i in enumerate(mvp_dict['prho']):
        top_layer_inds = z_c <= interface_depth[i]
        bot_layer_inds = z_c > interface_depth[i]
        top_z = z_c[top_layer_inds]
        bot_z = z_c[bot_layer_inds]
        top_rho = prho[i, :][top_layer_inds]
        bot_rho = prho[i, :][bot_layer_inds]

        if (~top_rho.mask).sum() < 3 or (~bot_rho.mask).sum() < 3:
            # Not enough values to do linear fit
            continue

        p_top = ma.polyfit(top_z, top_rho, 1)
        p_bot = ma.polyfit(bot_z, bot_rho, 1)

        top_rho_avg = np.polyval(p_top, interface_depth[i]/2)
        bot_rho_avg = np.polyval(p_bot, (bottom[i] + interface_depth[i])/2)

        gprime[i] = 9.81*(bot_rho_avg - top_rho_avg)/bot_rho_avg

    # Remove output where inappropriate
    inappropriate_inds = np.logical_or(x < x_ranges[0], x > x_ranges[1])
    gprime[inappropriate_inds] = np.nan
    interface_depth[inappropriate_inds] = np.nan

    return rho_interface, gprime, interface_depth
Ejemplo n.º 11
0
def qqplot(data, distrib=ssd.norm, alpha=0.4, beta=0.4, fsp=None, plot_line=True, **kwargs):
    """
    Returns a quantile-quantile plot with theoretical quantiles in abscissae, and
    experimental quantiles in ordinates.
    The experimental quantiles are estimated through the equation:
    
    .. math::
       q_i = \\frac{i-\\alpha}{n-\\alpha-\\beta}

    where :math:`i` is the rank order statistic, :math:`n` the number of 
    unmasked data and :math:`\\alpha` and :math:`\\beta` two parameters between 0
    and 1. The default :math:`\\alpha=\\beta=0.4` gives approximately unbiased
    estimates of the quantiles.
    
    
    Parameters
    ----------
    data : array
        Input data
    distrib : {norm, function}, optional
        Theoretical distribution used to compute the expected quantiles.
        If None, use a normal distribution.
    alpha : {float} optional
        Coefficient for the computation of plotting positions.
    beta : {float} optional
        Coefficient for the computation of plotting positions.
    fsp : {None, Subplot}, optional
        Subplot where to plot. If None, use the current axe.
    plot_line : {True, False}
        Whether to compute a regression line.

    Returns
    -------
    plotted : :class:`matplotlib.lines.Line2D`
        Plotted data
    lines :  :class:`matplotlib.lines.Line2D`
        Plotted regression line
    (a,b) : tuple
        Slope and intercept of the regression line.

    Notes
    -----
    * The ``distrib`` parameter must be a function with a :meth:`.ppf` method.
    * The input data is ravelled beforehand.

    See Also
    --------
    scipy.stats.mstats.mquantiles
        Computes quantiles from a population.
    """
    data = np.ravel(data)
    qq = qqcalc(data, distrib=distrib, alpha=alpha, beta=beta)
    #
    if fsp is None:
        fsp = pyplot.gca()
    if not len(kwargs):
        kwargs.update(marker="o", c="k", ls="")
    plotted = fsp.plot(qq, data, **kwargs)
    #
    if plot_line:
        (a, b) = ma.polyfit(qq, data, 1)
        xlims = fsp.get_xlim()
        regline = np.polyval((a, b), xlims)
        lines = fsp.plot(xlims, regline, "k:")
        return (plotted, lines, (a, b))
    return plotted
Ejemplo n.º 12
0
def qqplot(data,
           distrib=ssd.norm,
           alpha=.4,
           beta=.4,
           fsp=None,
           plot_line=True,
           **kwargs):
    """
    Returns a quantile-quantile plot with theoretical quantiles in abscissae, and
    experimental quantiles in ordinates.
    The experimental quantiles are estimated through the equation:
    
    .. math::
       q_i = \\frac{i-\\alpha}{n-\\alpha-\\beta}

    where :math:`i` is the rank order statistic, :math:`n` the number of 
    unmasked data and :math:`\\alpha` and :math:`\\beta` two parameters between 0
    and 1. The default :math:`\\alpha=\\beta=0.4` gives approximately unbiased
    estimates of the quantiles.
    
    
    Parameters
    ----------
    data : array
        Input data
    distrib : {norm, function}, optional
        Theoretical distribution used to compute the expected quantiles.
        If None, use a normal distribution.
    alpha : {float} optional
        Coefficient for the computation of plotting positions.
    beta : {float} optional
        Coefficient for the computation of plotting positions.
    fsp : {None, Subplot}, optional
        Subplot where to plot. If None, use the current axe.
    plot_line : {True, False}
        Whether to compute a regression line.

    Returns
    -------
    plotted : :class:`matplotlib.lines.Line2D`
        Plotted data
    lines :  :class:`matplotlib.lines.Line2D`
        Plotted regression line
    (a,b) : tuple
        Slope and intercept of the regression line.

    Notes
    -----
    * The ``distrib`` parameter must be a function with a :meth:`.ppf` method.
    * The input data is ravelled beforehand.

    See Also
    --------
    scipy.stats.mstats.mquantiles
        Computes quantiles from a population.
    """
    data = np.ravel(data)
    qq = qqcalc(data, distrib=distrib, alpha=alpha, beta=beta)
    #
    if fsp is None:
        fsp = pyplot.gca()
    if not len(kwargs):
        kwargs.update(marker='o', c='k', ls='')
    plotted = fsp.plot(qq, data, **kwargs)
    #
    if plot_line:
        (a, b) = ma.polyfit(qq, data, 1)
        xlims = fsp.get_xlim()
        regline = np.polyval((a, b), xlims)
        lines = fsp.plot(xlims, regline, 'k:')
        return (plotted, lines, (a, b))
    return plotted
Ejemplo n.º 13
0
import numpy as np
import numpy.ma as ma
import matplotlib.pyplot as plt

data = np.loadtxt('faulty_data.dat')
plt.plot(data[:, 0], data[:, 1], 'o')

x = data[:, 0]
y = data[:, 1]
mask = y > 35
y_m = ma.masked_array(y, mask)
x_m = ma.masked_array(x, mask)

fit_orig = np.polyfit(x, y, 2)
fit_masked = ma.polyfit(x_m, y_m, 2)

x_i = np.linspace(-6, 6, 35)
plt.plot(x_i, np.polyval(fit_orig, x_i), label='original')
plt.plot(x_i, np.polyval(fit_masked, x_i), label='masked')
plt.legend()
plt.show()
Ejemplo n.º 14
0
    def run(self,**keyval):
        """ Method to calculate the continuum from the given masked spectrum.
            If search=True is given as an argument then the algorithm will
            iterate through the different order splines to find the best fit,
            based on noise level.

            Parameters
            ----------
            keyval : dictionary
                Dictionary containing the keyword value pair arguments

            Returns
            -------
            numpy array containing the best fit continuum

            Notes
            -----
            Arguments for the run method:

            - search : bool, whether or not to search for the best fit. Default: False
            - deg : int, the degree of polynomial to use, Defualt: 1
        """
        # set up the data elements
        args = {"x": self.x,
                "y" : ma.fix_invalid(self.y,fill_value=0.0),
                # reverse the weights since a masked array uses True for good values
                # and UnivariateSpline needs a number. The reversal translates the
                # True values to False, which are then interpreted as 0.0 
                "w" : -self.y.mask}

        # get the given arguments
        search = False
        noise = None
        chisq = 3.0
        if "search" in keyval:
            search = keyval["search"]
        if "noise" in keyval:
            noise = keyval["noise"]
        if "chisq" in keyval:
            maxchisq = keyval["chisq"]
        for arg in ["deg"]:
            if arg in keyval:
                args[arg] = keyval[arg]
        # if searching for the best fit
        # limited to 3rd order as 4th and 5th order could fit weak wide lines 
        if search:
            chisq = {0:1000.,
                     1:1000.,
                     2:1000.,
                     3:1000.}
            # iterate over each possible order
            for order in chisq:
                args["deg"] = order
                
                pfit = np.polyfit(**args)
                if len(pfit) == 1:
                    numpar = 1
                else:
                    # find the number of free parameters
                    # note that if a coefficient is << the max coefficient
                    # it is not considered a free parameter as it has very little affect on the fit
                    numpar = 0
                    mxpar = max(abs(pfit))
                    for i in range(len(pfit)):
                        if abs(mxpar/pfit[i]) < 1000.:
                            numpar += 1
                fit = np.polyval(pfit,self.x)
                chisq[order] = (stats.reducedchisquared(self.y, fit, numpar, noise), numpar)
            # find the base fit, based on number of free parameters and chisq
            mv = 1000.
            order = 0
            for k in chisq:
                if chisq[k][0] < mv and (mv - chisq[k][0]) / mv > 0.2:
                    mv = chisq[k][0]
                    order = k
            if mv > maxchisq:
                logging.warning("No good fit for continuum found")
                return None
            args["deg"] = mv
            logging.info("Using polynomial fit of order %i with chi^2 of %f" % (order, mv))
            # do the final fit
            pfit = np.polyfit(**args)
            fit = np.polyval(pfit,self.x)
        else:
            # do the fit with the given parameters
            pfit = ma.polyfit(**args)
            fit = np.polyval(pfit,self.x)

        return fit