예제 #1
0
def plotting(file, ext):
    ts = pd.read_csv(file, header=[0, 1], index_col=[0])
    ts.index = [dt.datetime.strptime(x, "%H:%M:%S").time() for x in ts.index]
    ts.columns.set_levels(
        [dt.datetime.strptime(x, "%Y-%m-%d").date() for x in ts.columns.levels[0].values], 0, inplace=True
    )

    # gets used in the smoothing
    xtime = [int(x.hour) + int(x.minute) / 60 for x in ts.index]
    timeind = pd.date_range("00:00", "23:59", freq="min").to_pydatetime()  ###Very important!

    # for each month in the data, construct a df of just that month
    for mo in set([x.month for x in ts.columns.levels[0]]):
        monthnum = mo
        motxt = time.strftime("%B", time.strptime(str(monthnum), "%m"))
        mocheck = [x.month == monthnum for x in ts.columns.levels[0]]
        moflag = []
        for ans in mocheck:
            moflag.append(ans)
            moflag.append(ans)

        onemo = ts.loc[:, moflag]

        sleepy = onemo.xs("Sleep", level=1, axis=1).sum(axis=1) / (len(onemo.columns) / 2)
        eaty = onemo.xs("Eat", level=1, axis=1).sum(axis=1) / (len(onemo.columns) / 2)

        # begin plotting
        fig = plt.figure(figsize=(18, 6))
        ax = fig.add_subplot(111)

        ####Plot Sleep
        filtereds = lowess(sleepy, xtime, is_sorted=True, frac=0.025, it=0)
        ax.plot(timeind, filtereds[:, 1], "b", linewidth=2, label="Sleeping")
        ax.fill_between(timeind, 0, filtereds[:, 1], alpha=0.3, facecolor="b")
        # ax.plot(ts.index,sleepy,'b',linewidth=2,label='Sleeping')#raw data, not smoothed
        # ax.fill_between(ts.index, 0, sleepy,alpha=0.3,facecolor='b')

        ####Plot Eat
        filterede = lowess(eaty, xtime, is_sorted=True, frac=0.025, it=0)
        ax.plot(timeind, filterede[:, 1], "orange", linewidth=2, label="Eating")
        ax.fill_between(timeind, 0, filterede[:, 1], alpha=0.3, facecolor="orange")
        # ax.plot(ts.index,eaty,'orange',linewidth=2,label='Eating')
        # ax.fill_between(ts.index, 0, eaty,alpha=0.3,facecolor='orange')

        ####Axis formatting
        xax = ax.get_xaxis()
        xax.set_major_locator(mdates.HourLocator(byhour=range(0, 24, 2)))
        xax.set_major_formatter(mdates.DateFormatter("%H:%M"))
        ax.set_title("Activity Fraction at a Given Time of Day", fontsize="xx-large")
        ax.text("16:00", 0.9, motxt, fontsize="xx-large", color="k", fontweight="bold")
        ax.legend(fontsize="x-large")
        ax.set_ylim(0, 1.1)
        fig.autofmt_xdate()
        filename = "b2_TimeSeries/Activity_" + str(monthnum) + "." + ext
        fig.savefig(filename)
    return
예제 #2
0
파일: extra.py 프로젝트: rgerkin/trillion
def loess(x,y,frac=0.2,it=None,scatter=True):
    from statsmodels.nonparametric.smoothers_lowess import lowess 
    y = np.array(y)
    x = np.array(x)
    y = y[x.argsort()] # Sort y according to order of x.  
    x.sort() # Sort x in place.  
    if it is not None: # Helps if you are getting NaN's in the output.  
        d = lowess(y,x,frac=frac,it=it)
    else:
        d = lowess(y,x,frac=frac)
    return d
예제 #3
0
    def test_iter(self):
        rfile = os.path.join(rpath, "test_lowess_iter.csv")
        test_data = np.genfromtxt(open(rfile, "rb"), delimiter=",", names=True)

        expected_lowess_no_iter = np.array([test_data["x"], test_data["out_0"]]).T
        expected_lowess_3_iter = np.array([test_data["x"], test_data["out_3"]]).T

        actual_lowess_no_iter = lowess(test_data["y"], test_data["x"], it=0)
        actual_lowess_3_iter = lowess(test_data["y"], test_data["x"], it=3)

        assert_almost_equal(expected_lowess_no_iter, actual_lowess_no_iter, decimal=testdec)
        assert_almost_equal(expected_lowess_3_iter, actual_lowess_3_iter, decimal=testdec)
예제 #4
0
    def test_frac(self):
        rfile = os.path.join(rpath, "test_lowess_frac.csv")
        test_data = np.genfromtxt(open(rfile, "rb"), delimiter=",", names=True)

        expected_lowess_23 = np.array([test_data["x"], test_data["out_2_3"]]).T
        expected_lowess_15 = np.array([test_data["x"], test_data["out_1_5"]]).T

        actual_lowess_23 = lowess(test_data["y"], test_data["x"], frac=2.0 / 3)
        actual_lowess_15 = lowess(test_data["y"], test_data["x"], frac=1.0 / 5)

        assert_almost_equal(expected_lowess_23, actual_lowess_23, decimal=testdec - 1)
        assert_almost_equal(expected_lowess_15, actual_lowess_15, decimal=testdec)
예제 #5
0
    def test_iter(self):
        rfile = os.path.join(rpath, 'test_lowess_iter.csv')
        test_data = np.genfromtxt(open(rfile, 'rb'),
                                  delimiter = ',', names = True)

        expected_lowess_no_iter = np.array([test_data['x'], test_data['out_0']]).T
        expected_lowess_3_iter = np.array([test_data['x'], test_data['out_3']]).T

        actual_lowess_no_iter = lowess(test_data['y'], test_data['x'], it = 0)
        actual_lowess_3_iter = lowess(test_data['y'], test_data['x'], it = 3)

        assert_almost_equal(expected_lowess_no_iter, actual_lowess_no_iter, decimal = testdec)
        assert_almost_equal(expected_lowess_3_iter, actual_lowess_3_iter, decimal = testdec)
예제 #6
0
    def test_frac(self):
        rfile = os.path.join(rpath, 'test_lowess_frac.csv')
        test_data = np.genfromtxt(open(rfile, 'rb'),
                                  delimiter = ',', names = True)

        expected_lowess_23 = np.array([test_data['x'], test_data['out_2_3']]).T
        expected_lowess_15 = np.array([test_data['x'], test_data['out_1_5']]).T

        actual_lowess_23 = lowess(test_data['y'], test_data['x'] ,frac = 2./3)
        actual_lowess_15 = lowess(test_data['y'], test_data['x'] ,frac = 1./5)

        assert_almost_equal(expected_lowess_23, actual_lowess_23, decimal = testdec-1)
        assert_almost_equal(expected_lowess_15, actual_lowess_15, decimal = testdec)
예제 #7
0
    def test_delta(self):
        rfile = os.path.join(rpath, "test_lowess_delta.csv")
        test_data = np.genfromtxt(open(rfile, "rb"), delimiter=",", names=True)

        expected_lowess_del0 = np.array([test_data["x"], test_data["out_0"]]).T
        expected_lowess_delRdef = np.array([test_data["x"], test_data["out_Rdef"]]).T
        expected_lowess_del1 = np.array([test_data["x"], test_data["out_1"]]).T

        actual_lowess_del0 = lowess(test_data["y"], test_data["x"], frac=0.1)
        actual_lowess_delRdef = lowess(test_data["y"], test_data["x"], frac=0.1, delta=0.01 * np.ptp(test_data["x"]))
        actual_lowess_del1 = lowess(test_data["y"], test_data["x"], frac=0.1, delta=1.0 + 1e-10)

        assert_almost_equal(expected_lowess_del0, actual_lowess_del0, decimal=testdec)
        assert_almost_equal(expected_lowess_delRdef, actual_lowess_delRdef, decimal=testdec)
        assert_almost_equal(expected_lowess_del1, actual_lowess_del1, decimal=10)  # testdec)
def compute_normalization(G_auto_conf, gctrack, conf, bdy):
    auto_bdy = bdy[21][1]
    print G_auto_conf.shape
    print "this takes time...."
    sys.stdout.flush()
    t0 = time.time()
    gceffect = np.zeros_like(G_auto_conf)
    ncells, nbins = G_auto_conf.shape
    for cell in xrange(ncells):
        if cell % 5 == 0:
            print cell, "cells",
            sys.stdout.flush()
        gceffect[cell, :] = lowess(G_auto_conf[cell],
                                   gctrack[0:auto_bdy][conf[0:auto_bdy]],
                                   frac=0.05,
                                   return_sorted=False)
    print
    print(time.time() - t0) / 60, "mins"
    gcnorm = gceffect / gceffect.mean(axis=1)[:, np.newaxis]
    gcnan = np.isnan(gcnorm)
    gcnorm[gcnan] = 1
    gcnormprofile = G_auto_conf / gcnorm

    avgprofile = (gcnormprofile /
                  gcnormprofile.mean(axis=1)[:, np.newaxis]).mean(axis=0)

    normprofile = gcnormprofile / avgprofile
    nannorm = np.isnan(normprofile)
    normprofile[nannorm] = gcnormprofile[nannorm]

    return gceffect, gcnormprofile, normprofile
예제 #9
0
def plot_spectras(df, outfile=None):
    """
    Function to plot spectras.

    Parameters
    ----------
    df: pd.DataFrame
        dataframe containing spectras
    outfile (optional): string
        filepath for saving plot

    Returns:
    --------
    Pyplot figure and optionally saves figure to file
    """
    # plot data
    spectra_fig = plt.figure(1)
    plt.plot(df.median(axis=1), 'k.', alpha=.05,
             label='median data with QC flag 0')
    # plot loess smoothed line
    smoothed = lowess(df.median(axis=1).values, df.index, is_sorted=True,
                      frac=0.01, it=0)
    plt.plot(smoothed[40:, 0], smoothed[40:, 1], 'b', label='lowess fit')
    # tweak plot
    plt.xscale('log')
    plt.yscale('log')
    plt.xlabel('f (Hz)')
    plt.ylabel('spectra (T)')
    plt.legend()
    plt.tight_layout()
    # save plot if desired
    if outfile:
        plt.savefig(outfile, dpi=300, bbox_inches='tight')
예제 #10
0
def apply_lowess_filter(x, y):
    # The difference between lowess and loess seems pretty subtle, and possibly
    # more by convention that anything else.
    from statsmodels.nonparametric.smoothers_lowess import lowess

    z = lowess(y, x, is_sorted=True, frac=0.025, it=0)
    plt.plot(z[:, 0], z[:, 1], label="LOWESS")
예제 #11
0
파일: utilities.py 프로젝트: dbath/fishMAD
def smooth(y, x='notDefined', frac=0.05):
    if lowess not in sys.modules:
        from statsmodels.nonparametric.smoothers_lowess import lowess
    if x == 'notDefined':
        x = range(len(y))
    filtered = lowess(y, x, frac=frac)
    return filtered[:, 1]
예제 #12
0
def csv_each_gridcell(obs_data, annual_obs, rcp85, rebased_26, rebased_45,
                      rebased_60, rebased_85):
    '''
    Produce a csv file for each gridcell containing observations, lowess-smoothed observations,
    and the four RCP scenarios for each 1x1 lat/lon gridcell on the globe. Add a header line
    that includes the header metadata.
    '''
    n = 0
    for lat in range(0, 180):
        for lon in range(0, 360):
            obs = pd.DataFrame({
                'year':
                obs_data['years'],
                'obs_anoms':
                annual_obs[lat, lon].round(2),
                'uncertainty':
                obs_data['unc'][lat, lon].round(2)
            })
            model = pd.DataFrame({
                'year': rcp85['years'],
                'rcp26': rebased_26[lat, lon].round(2),
                'rcp45': rebased_45[lat, lon].round(2),
                'rcp60': rebased_60[lat, lon].round(2),
                'rcp85': rebased_85[lat, lon].round(2)
            })

            result = pd.merge(obs, model, how='outer', on=['year'])
            first_valid = result['obs_anoms'].first_valid_index()
            last_valid_obs = result['obs_anoms'].last_valid_index()
            bwidth = 10. / (last_valid_obs - first_valid)
            smoothed_data = lowess(result['obs_anoms'],
                                   result['year'],
                                   is_sorted=True,
                                   frac=bwidth)
            smooth = pd.DataFrame({
                'year':
                smoothed_data[:, 0],
                'smoothed_anoms':
                smoothed_data[:, 1].round(2)
            })
            result = pd.merge(result, smooth, how='outer', on=['year'])
            result = result[[
                'year', 'obs_anoms', 'smoothed_anoms', 'uncertainty', 'rcp26',
                'rcp45', 'rcp60', 'rcp85'
            ]]
            header_lines = gridcell_metadata['full_name'][n]
            lat_label = obs_data['lats'][lat]
            lon_label = obs_data['lons'][lon]
            print 'Saving gridcell ' + str(n) + ' of 64800'

            os.chdir(
                '/Users/hausfath/Desktop/Climate Science/Carbon Brief/Warming Map/csvs/'
            )
            result.to_csv('gridcell_' + str(lat_label) + '_' + str(lon_label) +
                          '.csv',
                          header=True,
                          index=True,
                          index_label=header_lines,
                          encoding='utf-8')
            n += 1
예제 #13
0
def select_rdark(data,
                 rdark_list_selection='intercept',
                 pixel_range=[0, 1000],
                 lowess_frac=0.5,
                 pixel_idx=100):
    from numpy import isfinite, sort, linspace, nanmin
    from statsmodels.nonparametric.smoothers_lowess import lowess
    import acolite as pp

    dsorted = sort(data[isfinite(data)], axis=None)
    pixel_range[1] = nanmin((len(dsorted), pixel_range[1]))
    rdark_list = dsorted[pixel_range[0]:pixel_range[1]]

    if rdark_list_selection == 'intercept':
        xi = linspace(pixel_range[0],
                      pixel_range[1],
                      num=pixel_range[1] - pixel_range[0])
        m, b, r, sm, sb = pp.shared.regression.lsqfity(xi, rdark_list)
        rdark_sel = b
    elif rdark_list_selection == 'smooth':
        xi = linspace(pixel_range[0],
                      pixel_range[1],
                      num=pixel_range[1] - pixel_range[0])
        rdark_smooth = lowess(rdark_list, xi, frac=lowess_frac)[:, 1]
        rdark_sel = rdark_smooth[0]
    elif rdark_list_selection == 'absolute_index':
        rdark_sel = dsorted[pixel_idx]
    else:
        rdark_sel = dsorted[0]

    return (rdark_sel)
예제 #14
0
 def plot(self, eid, xlab='sec'):
     jj = self['EID'] == eid
     y = self['Value'][jj]
     it = self['iteration'][jj]
     t = it * TAU
     par, err = fit_line(t, y)
     fit = lowess(y[0:None:100], t[0:None:100])
     fig, ax = plt.subplots(1, 1)
     if xlab == 'sec':
         x = t
     elif xlab == 'turn':
         x = it
         par, err = (TAU * e for e in [par, err])
     else:
         x = t
         xlab = 'sec'
     ax.plot(x, y, '.')
     ax.plot(fit[:, 0], fit[:, 1], '-k')
     ax.plot(x,
             par[0] + x * par[1],
             '-r',
             label=r'$slp = {:4.2e} \pm {:4.2e}$ [u/{}]'.format(
                 par[1], err[1], xlab))
     ax.set_ylabel(r'$\sum_i(\vec s_i, \bar n_{})$'.format(eid))
     ax.set_xlabel(xlab)
     ax.ticklabel_format(axis='y',
                         style='sci',
                         scilimits=(0, 0),
                         useMathText=True)
     ax.legend()
     return fig, ax
예제 #15
0
파일: ps.py 프로젝트: chiechie/ADminer
    def predict(self, s_dimension_value, vol=False, window=3, frac=0.2):
        k = "predict/@%s/%s%d" % (self.ts, s_dimension_value, window)
        res_p = get_Cache(k)
        if res_p is not None:
            return res_p

        s_dimension_value = np.array(s_dimension_value,
                                     dtype=int).reshape(-1, 5)
        ser = self.value(s_dimension_value, window=True)
        # if ser.empty:
        if ser.size == 0:
            pred, stand, real = 0, 0, 0
        else:
            ser_len = ser.shape[0]
            # win_size = min(window+1, ser_len)
            x = np.arange(ser_len).tolist()
            y = ser[:, 1]

            filtered = lowess(y, x, is_sorted=True, frac=frac, it=2)
            pred = filtered[:, 1][-1]
            real = ser[-1, 1]

            if ser_len <= 1:
                stand = 0
            else:
                # stand = ser.std()
                stand = np.std(filtered[:, 1])
        if not vol:
            res_pred = pred
        else:
            # print("pred, stand", pred, stand)
            res_pred = (pred, stand, real)
        set_Cache(k, res_pred)
        return res_pred
예제 #16
0
    def _plot_smoothed_proportion(
        self,
        ax: plt.Axes,
        clusters: Sequence[Any],
        y_offset: Mapping[Any, float],
        alpha: float = 0.8,
    ) -> Tuple[Mapping[Any, np.ndarray], Mapping[Any, PolyCollection]]:
        start_t, end_t = self._cmat.columns.min(), self._cmat.columns.max()
        x = np.array(self._cmat.columns)  # fitting
        # extrapolation
        e = np.linspace(start_t, end_t, int(1 + (end_t - start_t) * 100))

        smoothed_proportion, handles = {}, {}
        for clust in clusters:
            y = self._cmat.loc[clust]
            f = interp1d(x, y)
            fe = f(e)
            lo = lowess(fe, e, frac=0.3, is_sorted=True, return_sorted=False)
            smoothed_proportion[clust] = lo

            handles[clust] = ax.fill_between(
                e,
                y_offset[clust] + lo,
                y_offset[clust] - lo,
                color=self.cmap[clust],
                label=clust,
                alpha=alpha,
                edgecolor=None,
            )

        return smoothed_proportion, handles
    def plot(self, df, dataset_name, k, eps, l, estimator, yscale):
        sampling_freq = 100
        for i in range(1, len(df.columns)):
            x_val = y_val = []
            x_val = df.iloc[:, 0]
            y_val = df.iloc[:, i]

            filtered = lowess(y_val,
                              x_val,
                              is_sorted=True,
                              frac=float(sampling_freq) / len(x_val),
                              it=3)
            plt.plot(filtered[:, 0],
                     filtered[:, 1],
                     linewidth=1,
                     label='k={0}'.format(df.columns[i]))
            # plt.plot(filtered[np.argmax(filtered[:, 1])][0], max(filtered[:, 1]), 'x')
            # plt.plot(x_val, y_val)
            # plt.plot(x_val, y_val, 'or')
        if yscale and yscale == 'log':
            plt.yscale('log', basey=10)
        plt.ylabel(estimator.split('.')[0])
        plt.xlabel(df.columns[0])
        plt.title('{0}: (eps={2})'.format(
            dataset_name.split('\\')[-1], k, eps, l))
        plt.legend()
        plt.grid(True, lw=0.5, ls='--', c='.75')
        plt.margins(0.005)
        plt.show()
예제 #18
0
def ResidFitted(fitted_model, residuals = None, fits = None, ax = None):
  """
  Parameters
  ---------------------------------------------------------
  fitted_model: A fitted linear regression model from the statsmodels package.
                Class: <statsmodels.regression.linear_model.OLS>
  residuals: A pandas series of the OLS residuals
  fits: A pandas series of the fitted values from the OLS model
  ax: A specific matplotlib axis. Used if creating subplots
  
  Returns
  ---------------------------------------------------------
  ax: A matplotlib axis object
  
  By: Jason Sadowski
  Date: 2019-11-19
  """
  if isinstance(residuals,type(None)):
    residuals = fitted_model.resid
  if isinstance(fits,type(None)):
    fits = fitted_model.fittedvalues
  top3 = abs(residuals).sort_values(ascending = False)[:3]
  smoothed = lowess(residuals,fits)
  if isinstance(ax,type(None)):
    fig, ax = plt.subplots()
  ax.scatter(fits, residuals, edgecolors = 'k', facecolors = 'none')
  ax.plot(smoothed[:,0],smoothed[:,1],color = 'r')
  ax.set_ylabel('Residuals')
  ax.set_xlabel('Fitted Values')
  ax.set_title('Residuals vs. Fitted')
  ax.plot([min(fits),max(fits)],[0,0],color = 'k',linestyle = ':')
  for i in top3.index:
    ax.annotate(i, xy = (fits[i],residuals[i]))
  return(ax)
예제 #19
0
def residualsVsFitted(results, axes=None):
    residuals = results.resid
    fitted = results.fittedvalues
    smoothed = lowess(residuals,fitted)
    top3 = abs(residuals).sort_values(ascending = False)[:3]

    if axes == None:
        plt.rcParams.update({'font.size': 16})
        plt.rcParams["figure.figsize"] = (8,7)
        fig, ax = plt.subplots()
        ax.scatter(fitted, residuals, edgecolors = 'k', facecolors = 'none')
        ax.plot(smoothed[:,0],smoothed[:,1],color = 'r')
        ax.set_ylabel('Residuals')
        ax.set_xlabel('Fitted Values')
        ax.set_title('Residuals vs. Fitted')
        ax.plot([min(fitted),max(fitted)],[0,0],color = 'k',linestyle = ':', alpha = .3)

        for i in top3.index:
            ax.annotate(i,xy=(fitted[i],residuals[i]))

        plt.show()
    else:
        axes.scatter(fitted, residuals, edgecolors = 'k', facecolors = 'none')
        axes.plot(smoothed[:,0],smoothed[:,1],color = 'r')
        axes.set_ylabel('Residuals')
        axes.set_xlabel('Fitted Values')
        axes.set_title('Residuals vs. Fitted')
        axes.plot([min(fitted),max(fitted)],[0,0],color = 'k',linestyle = ':', alpha = .3)

        for i in top3.index:
            axes.annotate(i,xy=(fitted[i],residuals[i]))
예제 #20
0
def separating_threshold(bins, ys):
    F = 0.1  # Different to review paper, see line 120: https://github.com/ellesec/burstanalysis/blob/master/Burst_detection_methods/logisi_pasq_method.R
    s_xs_ys = lowess(ys, bins, F, it=0, delta=0.0, is_sorted=True)
    xs, ys = s_xs_ys[:, 0], s_xs_ys[:, 1]
    peaks = find_peaks(
        ys, distance=2
    )[0]  # Distance set according to supplementary information, Pasquale et al. 2010
    x_peaks = xs[peaks]

    if peaks.size > 1:
        # indices of peaks in first 100ms
        peaks_100ms = peaks[np.where(x_peaks < 100)[0]]

        if peaks_100ms.size == 0:  # require a peak in first 100ms
            raise ValueError("Didn't find a burst")

        # find index of max peak in the early peaks
        max_peak_100ms_ind = np.argmax(
            ys[peaks_100ms])  # index of peak in array of peaks
        max_peak_100ms = peaks_100ms[
            max_peak_100ms_ind]  # index of max early peak in xs,ys

        VOID_THRESH = 0.7
        for i in range(max_peak_100ms_ind + 1, len(peaks)):
            p2 = peaks[i]
            local_min_ind = max_peak_100ms + np.argmin(
                ys[max_peak_100ms:p2 + 1])  # local min between peaks
            local_min_i = ys[local_min_ind]
            void = 1 - local_min_i / np.sqrt(
                ys[max_peak_100ms] * ys[p2])  # void param
            if void > VOID_THRESH:  # require void larger than thresh
                return max_peak_100ms, p2, local_min_ind, void, s_xs_ys

    raise ValueError("Didn't find a burst")
예제 #21
0
def scaleLocationPlot(results, axes=None):
    fitted = results.fittedvalues
    student_residuals = results.get_influence().resid_studentized_internal
    sqrt_student_residuals = pd.Series(np.sqrt(np.abs(student_residuals)))
    sqrt_student_residuals.index = results.resid.index
    smoothed = lowess(sqrt_student_residuals,fitted)
    top3 = abs(sqrt_student_residuals).sort_values(ascending = False)[:3]

    if axes == None:
        fig, ax = plt.subplots()
        ax.scatter(fitted, sqrt_student_residuals, edgecolors = 'k', facecolors = 'none')
        ax.plot(smoothed[:,0],smoothed[:,1],color = 'r')
        ax.set_ylabel('$\sqrt{|Studentized \ Residuals|}$')
        ax.set_xlabel('Fitted Values')
        ax.set_title('Scale-Location')
        ax.set_ylim(0,max(sqrt_student_residuals)+0.1)
        for i in top3.index:
            ax.annotate(i,xy=(fitted[i],sqrt_student_residuals[i]))
        plt.show()
    else:
        axes.scatter(fitted, sqrt_student_residuals, edgecolors = 'k', facecolors = 'none')
        axes.plot(smoothed[:,0],smoothed[:,1],color = 'r')
        axes.set_ylabel('$\sqrt{|Studentized \ Residuals|}$')
        axes.set_xlabel('Fitted Values')
        axes.set_title('Scale-Location')
        axes.set_ylim(0,max(sqrt_student_residuals)+0.1)
        for i in top3.index:
            axes.annotate(i,xy=(fitted[i],sqrt_student_residuals[i]))
예제 #22
0
def lowess(data, frac=0.15, it=0):
    return smoothers_lowess.lowess(
        endog=data,
        exog=list(range(len(data))),
        frac=frac,
        it=it
    )[:, 1]
예제 #23
0
    def bounds_peaks(self):
        """Finds max/min bounds by tracing along peak locations then smoothing"""

        peaks_pos, _ = find_peaks(self._resp_trace, height=0)
        peaks_neg, _ = find_peaks(-1 * self._resp_trace, height=0)

        xx = np.linspace(0, len(self._resp_trace) - 1, len(self._resp_trace))

        conn_pos = np.interp(xx, peaks_pos, self._resp_trace[peaks_pos] + (0.3 * self._resp_trace[peaks_pos]))
        conn_neg = np.interp(xx, peaks_neg, self._resp_trace[peaks_neg] + (0.3 * self._resp_trace[peaks_neg]))

        smooth = len(self._resp_trace) * 30e-5
        lim_pos = lowess(conn_pos, xx, is_sorted=True, frac=smooth)[:, 1]
        lim_neg = lowess(conn_neg, xx, is_sorted=True, frac=smooth)[:, 1]

        return lim_pos, lim_neg
예제 #24
0
    def plot_volume(self, lw=1, smoothing=False):
        plt.title('Market Volume')
        plt.xlabel('Iteration')
        plt.ylabel('Quantity')
        if not smoothing:
            plt.plot(self.info.iterations,
                     self.info.excess_volume(),
                     color='black',
                     lw=lw,
                     label='inventory')
        else:
            plt.plot(self.info.iterations,
                     lowess(self.info.excess_volume(),
                            self.info.iterations,
                            return_sorted=False),
                     color='black',
                     lw=lw,
                     label='inventory (loess)')

        plt.plot(self.info.iterations,
                 self.info.sum_quantity('bid'),
                 color='green',
                 lw=lw,
                 label='bids')
        plt.plot(self.info.iterations,
                 self.info.sum_quantity('ask'),
                 color='red',
                 lw=lw,
                 label='asks')
        plt.legend()
        plt.show()
예제 #25
0
def component_wise_fit_LLE(inframe, column, frac=0.2, method='lowess'):
    '''    'method' can be 'lowess' or 'LLE'.
    '''
    from statsmodels.nonparametric.smoothers_lowess import lowess
    frame = inframe.swaplevel('Row', 'Identifier')
    frame['Identifier'] = frame['Identifier'].map(ord) - 97
    for comp in frame.index.levels[0]:
        cframe = frame.loc[[comp]]#.dropna(subset=[column])
        print 'Component: ', comp
        cnum = cframe['Identifier'][0] % 12
        rows = cframe.index.get_level_values('Row')
        if method == 'LLE':
            LLE = KernelReg(frame.loc[comp][column], rows, 'c', bw='cv_ls')
            means, mfx = LLE.fit()
        elif method == 'lowess':
            LLE = lowess(cframe[column], rows, it=10, missing='none',
                         frac=frac)
            means = LLE[:,1]
        frame.loc[[comp], column+'_means'] = means
        plt.plot(frame.loc[comp][column], rows, 's',
                 color=Paired.hex_colors[cnum], zorder=1)
        plt.plot(means, rows, '-', lw=3, color=Paired.hex_colors[cnum],
                 zorder=2)
    frame[column+'_LLEresids'] = frame[column] - frame[column+'_means']
    return frame[[column, column+'_means', column+'_LLEresids']]
예제 #26
0
 def smooth1(y, x):
     return lowess(y,
                   x + 1e-12 * np.random.randn(len(x)),
                   frac=2.0 / 3,
                   it=0,
                   delta=1.0,
                   return_sorted=True)
예제 #27
0
def smoothChunks(mvt, chunkSize):

    smoothed = np.zeros(mvt.shape)

    nPts = float(mvt.shape[0])
    nChunks = int(math.ceil(nPts / chunkSize))

    for chunk in range(nChunks):
        print 'Smoothing chunk {} of {}.'.format(chunk, nChunks - 1)
        start_pos = (chunkSize * chunk)
        end_pos = chunkSize * (chunk + 1)
        if end_pos > nPts:
            end_pos = int(nPts)

        print('start: {}; end: {}'.format(start_pos, end_pos))
        mvtChunk = mvt[start_pos:end_pos]

        smoothChunk = smoo.lowess(mvtChunk,
                                  range(len(mvtChunk)),
                                  it=2,
                                  frac=0.005,
                                  return_sorted=False)
        smoothChunk[smoothChunk < 0.] = 0.

        smoothed[start_pos:end_pos] = smoothChunk

    return smoothed
예제 #28
0
    def test_simple(self):
        x = np.arange(20, dtype='float32')
        #standard normal noise
        noise = np.array([-0.76741118, -0.30754369,
                            0.39950921, -0.46352422, -1.67081778,
                            0.6595567 ,  0.66367639, -2.04388585,
                            0.8123281 ,  1.45977518,
                            1.21428038,  1.29296866,  0.78028477,
                            -0.2402853 , -0.21721302,
                            0.24549405,  0.25987014, -0.90709034,
                            -1.45688216, -0.31780505])
        y = x + noise

        # R output
        out = [-0.6260344553, 0.565071712, 1.759627189,
                2.9579633258, 4.1560636154, 5.3473396937,
                6.522298218, 7.708159388, 8.8759055519,
                9.9409758603, 10.8981138458, 11.7851424728,
                12.6188717297, 13.4098497374, 14.1516996585,
                14.9180658147, 15.6956600199, 16.4783034134,
                17.2617441531, 18.0459201716]

        expected_lowess = np.array([x, out]).T

        actual_lowess = lowess(y,x)

        assert_almost_equal(expected_lowess, actual_lowess)
예제 #29
0
def Sf(time, flux, ferr, **kwargs):
    sortedd = kwargs.get('sort_data', True)
    frac = kwargs.get('fraction_rate', 0.03)
    it = kwargs.get('iterations', 3)
    rmswin = kwargs.get('points_window', 13)
    svgwin = int(rmswin * 3)
    flux_orig = np.copy(flux)
    flux_orig2 = np.copy(flux)
    flux_savgol = SavGol(flux, win = svgwin)
    sigma2 = Scatter(flux_savgol / np.nanmedian(flux_savgol), remove_outliers = True, win = rmswin)
    sigma = np.ones(40) * 3.
    for i in range(len(sigma)):
        if i > 0:
            not_nan = np.logical_not(np.isnan(flux_orig2))
            indices = np.arange(len(flux_orig2))
            interp = interp1d(indices[not_nan], flux_orig2[not_nan],
                              kind = 'nearest',
                              bounds_error = False,
                              fill_value = 'extrapolate')
            flux_orig2 = interp(indices)

        filtered = lowess(flux_orig2, time, is_sorted = sortedd, frac = frac, it = it)

        time_filter = filtered[:, 0]
        flux_filter = filtered[:, 1]

        std = np.std(flux_orig2 - flux_filter)
        if std < sigma2:
            break

        index = np.where(abs(flux_orig2 - flux_filter) > sigma[i] * std)[0]
        np.put(flux_orig2, index, np.nan)

    return flux_orig / flux_filter, ferr / flux_filter
예제 #30
0
def add_lowess(ax, lines_idx=0, frac=.2, **lowess_kwargs):
    """
    Add Lowess line to a plot.

    Parameters
    ----------
    ax : matplotlib Axes instance
        The Axes to which to add the plot
    lines_idx : int
        This is the line on the existing plot to which you want to add
        a smoothed lowess line.
    frac : float
        The fraction of the points to use when doing the lowess fit.
    lowess_kwargs
        Additional keyword arguments are passes to lowess.

    Returns
    -------
    fig : matplotlib Figure instance
        The figure that holds the instance.
    """
    y0 = ax.get_lines()[lines_idx]._y
    x0 = ax.get_lines()[lines_idx]._x
    lres = lowess(y0, x0, frac=frac, **lowess_kwargs)
    ax.plot(lres[:, 0], lres[:, 1], 'r', lw=1.5)
    return ax.figure
예제 #31
0
def evaluate_MAP(qty,
                 weights,
                 bins,
                 smooth='kde',
                 lowess_frac=0.3,
                 bw_method='scott',
                 vb=False):

    post, xaxis = np.histogram(qty, weights=weights, bins=bins)
    xaxis_centers = xaxis[0:-1] + np.mean(np.diff(xaxis))

    if smooth == 'lowess':
        a = lowess(post, xaxis_centers, frac=lowess_frac)
        MAP = a[np.argmax(a[0:, 1]), 0]
    elif smooth == 'kde':
        a = gaussian_kde(qty, bw_method=bw_method, weights=weights)
        MAP = xaxis[np.argmax(a.evaluate(xaxis))]
    else:
        MAP = xaxis[np.argmax(post) + 1]

    if vb == True:
        areapost = np.trapz(x=xaxis_centers, y=post)
        plt.plot(xaxis_centers, post / areapost)
        if smooth == 'lowess':
            plt.plot(a[0:, 0], a[0:, 1] / areapost)
        elif smooth == 'kde':
            plt.plot(xaxis, a.pdf(xaxis))
        plt.plot([MAP, MAP], plt.ylim())
        plt.show()

    return MAP
예제 #32
0
def add_lowess(ax, lines_idx=0, frac=.2, **lowess_kwargs):
    """
    Add Lowess line to a plot.

    Parameters
    ----------
    ax : matplotlib Axes instance
        The Axes to which to add the plot
    lines_idx : int
        This is the line on the existing plot to which you want to add
        a smoothed lowess line.
    frac : float
        The fraction of the points to use when doing the lowess fit.
    lowess_kwargs
        Additional keyword arguments are passes to lowess.

    Returns
    -------
    fig : matplotlib Figure instance
        The figure that holds the instance.
    """
    y0 = ax.get_lines()[lines_idx]._y
    x0 = ax.get_lines()[lines_idx]._x
    lres = lowess(y0, x0, frac=frac, **lowess_kwargs)
    ax.plot(lres[:, 0], lres[:, 1], 'r', lw=1.5)
    return ax.figure
예제 #33
0
    def plot_spectrum(self, ax=None, save=False, filtered=False, frac=0.025, fill=True):
        # the axes (ax) param is so that this function can be used within another function
        # i.e. this can be used to plot to an external figure -- just pass that fig's axes object as ax
        # when you call this function
        if ax is None:
            plt.figure()
            ax = plt.gca()
        ax.set_title("Spectrum")
        ax.set_xlabel("Wavelength (nm)")
        ax.set_ylabel("Normalized Intensity (AU)")
        xs = [x[0] for x in self.data_points]
        ys = [x[1] for x in self.data_points]
        if not filtered:
            ax.plot(xs, ys, 'b--', label=self.dataname)

        # filtered plot could be usefull when plotting the whole spectrum -- pretty noisy
        else:
            filtered = lowess(ys, xs, is_sorted=True, frac=frac, it=0)
            ax.plot(filtered[:, 0], filtered[:, 1], label='filtered data')

        # fill in the integral regions defined by peaks and spacing
        if fill:
            fill_point_1 = self.get_range(self.peaks[0], self.peaks[0] + self.spacing)
            fill_point_2 = self.get_range(self.peaks[1], self.peaks[1] + self.spacing)
            fill_xs_1 = [x[0] for x in fill_point_1]
            fill_ys_1 = [x[1] for x in fill_point_1]
            fill_xs_2 = [x[0] for x in fill_point_2]
            fill_ys_2 = [x[1] for x in fill_point_2]
            ax.fill_between(fill_xs_1, fill_ys_1, color='lightblue', label='Region I')
            ax.fill_between(fill_xs_2, fill_ys_2, color='orange', label='Region II')
        ax.set_ylim(ymin=0)
        ax.margins(0.05)
        ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
        if save:
            ax.savefig(self.filename[:-4])
예제 #34
0
 def get_l_res(arr):
     """calculates residuals from a lowess model for timecourse designs"""
     res = []
     for row in arr:
         ys = lowess(row, self.tpoints, it=1)[:, 1]
         res.append(row - ys)
     return np.array(res)
예제 #35
0
파일: plot.py 프로젝트: t-hdd/lmdiag
def resid_fit(lm):
    """Draw Residuals vs. Fitted Values Plot."""
    model_values = select_model_type(lm)

    # Calculate values for scatter points
    fitted = model_values.get_fitted_values()
    residuals = model_values.get_residuals()

    # Calculate lowess for smoothing line
    grid, yhat = lowess(residuals, fitted).T

    # Get top three observations for annotation
    top_3 = np.abs(residuals).argsort()[-3:][::1]

    # Draw scatter and lowess line
    plt.plot([fitted.min(), fitted.max()], [0, 0], "k:")
    plt.plot(grid, yhat, "r-")
    plt.plot(fitted,
             residuals,
             "o",
             mec=edge_col,
             markeredgewidth=1,
             fillstyle="none")

    # Draw Annotations
    for point in top_3:
        plt.annotate(point, xy=(fitted[point], residuals[point]), color="r")

    # Set Labels
    plt.title("Residual vs. Fitted", fontsize=title_size)
    plt.xlabel("Fitted values")
    plt.ylabel("Residuals")

    return plt
예제 #36
0
def plot_doppler(x, tx_carrier_freq, beacon_carrier_freq):
    # Doppler
    plt.figure()

    dop1 = (tx_carrier_freq[:, 0] - tx_carrier_freq[:, 1])
    dop2 = (beacon_carrier_freq[:, 0] - beacon_carrier_freq[:, 1])
    dop_bin = (dop1 - dop2) / 2.0
    # 50 km/h doppler = 433e6 * ((3e8 + 50/3.6) / 3e8 - 1)
    #                 = 50/3.6 / 3e8 * 433e6
    hz_per_bin = 2.4e6 / 16384
    dop = dop_bin * hz_per_bin * 3e8 / 433e6 * 3.6
    dop_outliers = is_outlier(dop)

    dopx = x[~dop_outliers]

    dop_smooth = lowess(dop[~dop_outliers],
                        dopx,
                        is_sorted=True,
                        frac=0.025,
                        it=0)

    plt.plot(dopx, dop[~dop_outliers], 'r', linewidth=0.2, alpha=0.5)
    plt.plot(dop_smooth[:, 0], dop_smooth[:, 1], 'b')
    plt.ylabel('Doppler shift (km/h)')
    plt.xlabel('TX timestamp at RX0 (s)')
    plt.grid()
    plt.tight_layout()
def loweesfilter():

    low_smooth = lowess(acc_read_variable["ax"],
                        np.arange(acc_read_variable["time"].shape[0]),
                        frac=0.01)

    return low_smooth
예제 #38
0
    def test_simple(self):
        x = np.arange(20, dtype='float32')
        #standard normal noise
        noise = np.array([
            -0.76741118, -0.30754369, 0.39950921, -0.46352422, -1.67081778,
            0.6595567, 0.66367639, -2.04388585, 0.8123281, 1.45977518,
            1.21428038, 1.29296866, 0.78028477, -0.2402853, -0.21721302,
            0.24549405, 0.25987014, -0.90709034, -1.45688216, -0.31780505
        ])
        y = x + noise

        # R output
        out = [
            -0.6260344553, 0.565071712, 1.759627189, 2.9579633258,
            4.1560636154, 5.3473396937, 6.522298218, 7.708159388, 8.8759055519,
            9.9409758603, 10.8981138458, 11.7851424728, 12.6188717297,
            13.4098497374, 14.1516996585, 14.9180658147, 15.6956600199,
            16.4783034134, 17.2617441531, 18.0459201716
        ]

        expected_lowess = np.array([x, out]).T

        actual_lowess = lowess(y, x)

        assert_almost_equal(expected_lowess, actual_lowess)
예제 #39
0
def local_fit(y):
    """
    LOWESS fit of the data (set to 1 week fraction). Gives better view than rolling avg
    """
    x = np.arange(len(y))
    f = lowess(y, x, frac=1 / 7.)
    return f[:, 1]
예제 #40
0
def regression_plot(Z,X,band_names=None,visible_only=True,figsize=(12,7)):
    """
    Produce a figure with a plot for each image band that displays the
    relationship between depth and radiance and gives a visual representation
    of the regression carried out in the `slopes` and `regressions` methods.

    Notes
    -----
    This method doesn't come directly from Lyzenga 1978 but the author of this
    code found it helpful.

    Parameters
    ----------
    Z : np.ma.MaskedArray
        Array of depth values repeated for each band so that Z.shape==X.shape.
        The mask needs to be the same too so that Z.mask==X.mask for all the
        bands.
    X : np.ma.MaskedArray
        The array of log transformed radiance values from equation B1 of
        Lyzenga 1978.

    Returns
    -------
    figure
        A matplotlib figure.
    """
    if band_names is None:
        band_names = ['Band'+str(i+1) for i in range(X.shape[-1])]
    nbands = X.shape[-1]
    if np.atleast_3d(Z).shape[-1] == 1:
        Z = np.repeat(np.atleast_3d(Z), nbands, 2)
    if visible_only:
        fig, axs = plt.subplots( 2, 3, figsize=figsize)
    else:
        fig, axs = plt.subplots( 2, 4, figsize=figsize )
    regs = regressions(Z,X)
    for i, ax in enumerate(axs.flatten()):
        if i > nbands-1:
            continue
        slp, incpt, rval = regs[:,i]
        # print X.shape, Z.shape
        x, y = equalize_array_masks(Z[...,i], X[...,i])
        if x.count() < 2:
            continue
        x, y = x.compressed(), y.compressed()
        # print "i = {}, x.shape = {}, y.shape = {}".format(i, x.shape, y.shape)
        ax.scatter( x, y, alpha=0.1, edgecolor='none', c='gold' )
        smth = lowess(y,x,frac=0.2)
        # ax.plot(smth.T[0],smth.T[1],c='black',alpha=0.5)
        ax.plot(smth.T[0],smth.T[1],c='black',alpha=0.5,linestyle='--')
        reglabel = "m=%.2f, r=%.2f" % (slp,rval)
        f = lambda x: incpt + slp * x
        ax.plot( x, f(x), c='brown', label=reglabel, alpha=1.0 )
        ax.set_title( band_names[i] )
        ax.set_xlabel( r'Depth (m)' )
        ax.set_ylabel( r'$X_i$' )
        ax.legend(fancybox=True, framealpha=0.5)
    plt.tight_layout()
    return fig
예제 #41
0
    def test_simple(self):
        rfile = os.path.join(rpath, "test_lowess_simple.csv")
        test_data = np.genfromtxt(open(rfile, "rb"), delimiter=",", names=True)
        expected_lowess = np.array([test_data["x"], test_data["out"]]).T

        actual_lowess = lowess(test_data["y"], test_data["x"])

        assert_almost_equal(expected_lowess, actual_lowess, decimal=testdec)
예제 #42
0
    def test_delta(self):
        rfile = os.path.join(rpath, 'test_lowess_delta.csv')
        test_data = np.genfromtxt(open(rfile, 'rb'),
                                  delimiter = ',', names = True)

        expected_lowess_del0 = np.array([test_data['x'], test_data['out_0']]).T
        expected_lowess_delRdef = np.array([test_data['x'], test_data['out_Rdef']]).T
        expected_lowess_del1 = np.array([test_data['x'], test_data['out_1']]).T

        actual_lowess_del0    = lowess(test_data['y'], test_data['x'], frac=0.1)
        actual_lowess_delRdef = lowess(test_data['y'], test_data['x'], frac=0.1,
                       delta = 0.01 * np.ptp(test_data['x']))
        actual_lowess_del1    = lowess(test_data['y'], test_data['x'], frac = 0.1, delta = 1.0 + 1e-10)

        assert_almost_equal(expected_lowess_del0, actual_lowess_del0, decimal = testdec)
        assert_almost_equal(expected_lowess_delRdef, actual_lowess_delRdef, decimal = testdec)
        assert_almost_equal(expected_lowess_del1, actual_lowess_del1, decimal = 10) #testdec)
예제 #43
0
 def generate(name, fname, x='x', y='y', out='out', kwargs=None, decimal=7):
     kwargs = {} if kwargs is None else kwargs
     data = np.genfromtxt(os.path.join(rpath, fname), delimiter=',', names=True)
     assert_almost_equal.description = name
     if callable(kwargs):
         kwargs = kwargs(data)
     result = lowess(data[y], data[x], **kwargs)
     expect = np.array([data[x], data[out]]).T
     assert_almost_equal(result, expect, decimal)
예제 #44
0
    def test_simple(self):
        rfile = os.path.join(rpath, 'test_lowess_simple.csv')
        test_data = np.genfromtxt(open(rfile, 'rb'),
                                  delimiter = ',', names = True)
        expected_lowess = np.array([test_data['x'], test_data['out']]).T

        actual_lowess = lowess(test_data['y'], test_data['x'])

        assert_almost_equal(expected_lowess, actual_lowess, decimal = testdec)
def DeTrend(TheCandData,TheMDI):
    '''' Fit a lowess to the data and remove the curve '''
    ''' default smoothing looks ok on test '''
    ''' Works on grids as well as vectors '''
    
    sizee=np.shape(TheCandData)
    if (len(sizee) < 2):
        gots=np.where(TheCandData > TheMDI)[0]
        los=lowess(TheCandData[gots],range(len(TheCandData[gots])))[:,1]
        TheCandData[gots]=TheCandData[gots]-los
    else:
        for ltt in range(len(TheCandData[0,:,0])):
	    for lnn in range(len(TheCandData[0,0,:])):
                gots=np.where(TheCandData[:,ltt,lnn] > TheMDI)[0]
		if (len(gots) > 12):
                    los=lowess(TheCandData[gots,ltt,lnn],range(len(TheCandData[gots,ltt,lnn])))[:,1]
                    TheCandData[gots,ltt,lnn]=TheCandData[gots,ltt,lnn]-los	 
		else:
		    TheCandData[:,ltt,lnn]=TheMDI           
    
    return TheCandData # DETREND
예제 #46
0
 def generate(name, fname,
              x='x', y='y', out='out', kwargs={}, decimal=7):
     data = np.genfromtxt(
         os.path.join(rpath, fname), delimiter=',', names=True)
     assert_equal_at_testdec = partial(
         assert_almost_equal, decimal=decimal)
     assert_equal_at_testdec.description = name
     if callable(kwargs):
         kwargs = kwargs(data)
     result = lowess(data[y], data[x], **kwargs)
     expect = np.array([data[x], data[out]]).T
     return assert_equal_at_testdec, result, expect
예제 #47
0
def stl_loess(ts, loess_frac=0.2):
	ts.OriginalReading.interpolate(inplace=True) # if there are NAs
	x_stl = sm.tsa.seasonal_decompose(ts.OriginalReading.values, freq=95)

	# Apply loess filter to get the trend
	trend_loess = lowess(ts.OriginalReading.values, ts.index, frac=loess_frac)[:,1]
	x_ts = ts.OriginalReading.values
	seasonal = x_stl.seasonal # the seasonality is fine
	x_ts -= seasonal # remove seasonality
	remainder = x_ts - trend_loess # remove trend, similar to x_stl.resid but complete

	return remainder
예제 #48
0
    def test_iter(self):

        x = np.arange(20, dtype='float32')
        #cauchy noise
        noise = np.array([ 1.86299605, -0.10816866,  1.87761229,
                        -3.63442237,  0.30249022,
                        1.03560416,  0.21163349,  1.14167809,
                        -0.00368175, -2.08808987,
                        0.13065417, -1.8052207 ,  0.60404596,
                        -2.30908204,  1.7081412 ,
                        -0.54633243, -0.93107948,  1.79023999,
                        1.05822445, -1.04530564])

        y = x + noise

        # R output
        out = [0.6264479483, 1.5008396363, 2.3861761926, 3.2716390242,
               4.1397266375, 4.9926614002, 5.9062225, 6.8541464784,
               7.8163358136, 8.6684661827, 9.5321215273, 10.4655376106,
               11.469691774, 12.612670578, 13.8080457514, 14.9355218409,
               16.0491183613, 17.1604998952, 18.2739171976, 19.3834268539]


        expected_lowess_no_iter = np.array([x, out]).T

        out = [1.1091939965, 1.9662338415, 2.8223436958, 3.6741660675,
               4.5153163696, 5.3483205165, 6.2127611584, 7.0371035909,
               7.8823844068, 8.7036783127, 9.5698728732, 10.5011237563,
               11.4924301926, 12.6180333554, 13.8056705213, 14.9280791108,
               16.0363681325, 17.1426206341, 18.2516511313, 19.3581200948]

        expected_lowess_3_iter = np.array([x, out]).T

        actual_lowess_no_iter = lowess(y,x,it=0)
        actual_lowess_3_iter = lowess(y,x,it=3)

        assert_almost_equal(expected_lowess_no_iter, actual_lowess_no_iter)
        assert_almost_equal(expected_lowess_3_iter, actual_lowess_3_iter)
def smoothEstimates(points):
    #First we take the points that are passed and turn them into x and y
    [x, y] = points
    #Since our x's are dates, we turn them into Epoch timestamps
    x = [time.mktime(p.timetuple()) for p in x]
    #We want the lowess model to take into account the closest 10 points when smoothing, meaning we pass it frac.
    frac = 10.0/len(x)
    #We then pass the points to a lowess smoother, which considers frac% of points at a time.
    smoothed = smooth.lowess(y, x, frac=frac, is_sorted=True)
    #We change the epoch timestamps back into datetime objects and assign that to x
    x = [datetime.datetime.fromtimestamp(p[0]) for p in smoothed]
    #We then assign the smoothed estimates to y
    y = [p[1] for p in smoothed]
    #Now we recombine the x and y and pass it back as one list.
    smoothed = [x, y]
    return smoothed
예제 #50
0
 def plot_residuals(self, Hz_fraction=15, fname=None, bbox_inches='tight',
                     **kwargs):
     f_total = np.max(self.f) - np.min(self.f)
     frac = min(Hz_fraction / f_total, 1)
     self.residuals_lowess = lowess(self.reduced_residuals,
                                    self.f,
                                    frac=frac,
                                    return_sorted=False)
     
     fig, ax = plt.subplots()
     ax.plot(self.f, self.reduced_residuals, 'b.', alpha=0.5)
     ax.plot(self.f, self.residuals_lowess, 'g-', linewidth=2)
     ax.set_xlabel("Frequency [Hz]")
     ax.set_ylabel("Reduced Residual")
     if fname is not None:
         fig.tight_layout()
         fig.savefig(fname, bbox_inches=bbox_inches, **kwargs)
예제 #51
0
 def test_lowess(self):
     if skip_lowess:
         raise SkipTest
     frac = 0.5
     it = 1
     data = self.s.data.copy()
     for i in range(data.shape[0]):
         data[i, :] = lowess(
             endog=data[i, :],
             exog=self.s.axes_manager[-1].axis,
             frac=frac,
             it=it,
             is_sorted=True,
             return_sorted=False,)
     self.s.smooth_lowess(smoothing_parameter=frac,
                          number_of_iterations=it,)
     nose.tools.assert_true(np.allclose(data, self.s.data))
예제 #52
0
def lowess_fit(spec, lams, frac=0.05, it=5):
    '''Fit a spectrum using a Locally Weighted Scatterplot Smoothing approach.
    
    Wraps around statsmodels.nonparametric.smoothers_lowess.lowess().
    
    :Args:
        spec: 1-D numpy array
              The input spectrum.
        lams: 1-D numpy array
              The corresponding wavelength array.
        frac: float [default:0.05]
              Between 0 and 1. The fraction of the data used when estimating each y-value.
              [From the statsmodel lowess function]
        it: int [default:5]
            The number of residual-based reweightings to perform.
            [From the statsmodel lowess function]
     
    :Returns:    
        out: 1-D array
             The fitted array, with size equal to spec.   
    
    :Notes:
        This function fits a spectrum using a LOWESS (Locally Weighted Scatterplot 
        Smoothing) technique, described in: 
        Cleveland, W.S. (1979) Robust Locally Weighted Regression and Smoothing 
        Scatterplots. Journal of the American Statistical Association 74 (368): 829-836.
    
        This is robust to outliers (hot pixels, cosmics), and is also efficient to ignore 
        emission lines. frac=0.05 and it=5 seem to work very fine for spectra of any SNR, 
        both lousy with no continuum, and good ones in the center of galaxies - modulo the 
        stellar absorption features which are of course "ignored" by the LOWESS routine.
    '''
    
    # Only do the fit if there is some signal. Avoid an ugly warning in the prompt.
    if np.all(np.isnan(spec)):
        fit = np.zeros_like(spec) * np.nan
    else:
        fit = lowess(spec,lams,frac=frac, it=it, is_sorted=True, missing = 'drop', 
                     return_sorted=False)
	                                               
    return fit
# ----------------------------------------------------------------------------------------      

	
def bike_scatter(df, cols):
    import matplotlib.pyplot as plt
    import statsmodels.nonparametric.smoothers_lowess as lw
    
    ## Loop over the columns and create the scatter plots
    for col in cols:
        ## first compute a lowess fit to the data
        los = lw.lowess(df['cnt'], df[col], frac = 0.3)
    
        ## Now make the plots
        fig = plt.figure(figsize=(8, 6))
        fig.clf()
        ax = fig.gca()
        df.plot(kind = 'scatter', x = col, y = 'cnt', ax = ax, alpha = 0.05)
        plt.plot(los[:, 0], los[:, 1], axes = ax, color = 'red')
        ax.set_xlabel(col)
        ax.set_ylabel('Number of bikes')
        ax.set_title('Number of bikes vs. ' + col)    
    return 'Done'        
def _noise_estimate_spectrum(spectrum, nb_split=20):
    """Private function to estimate the noise in a spectrum.

    Parameters
    ----------
    spectrum : ndarray, shape (n_samples)
        Spectrum from which the noise has to be estimated.

    nb_split : int, option (default=20)
        The number of regions splitting each spectrum

    Returns
    -------
    sigma : float,
        The estimate of the noise standard deviation.

    """

    # Check if we will be able to make a split
    nb_elt_out = spectrum.size % nb_split
    if nb_elt_out > 0:
        spectrum = spectrum[:-nb_elt_out]

    # Split the arrays into multiple sections
    sections = np.array(np.split(spectrum, nb_split))

    # Compute the mean and variance for each section
    mean_sec = []
    var_sec = []
    for sec in sections:
        mean_sec.append(np.mean(sec))
        var_sec.append(np.var(sec))

    out = lowess(np.array(var_sec), np.array(mean_sec),
                 frac=.9, it=0)
    mean_reg = out[:, 0]
    var_reg = out[:, 1]

    # Find the value for a zero mean intensity or the nearest to zero
    idx_null_mean = _find_nearest(mean_reg, 0.)

    return np.sqrt(var_reg[idx_null_mean])
예제 #55
0
def correct(inputs, fasta, frac_n=0.1, frac_r=0.0001, lowess_iter=3, lowess_frac=0.1):
    """
    GC-correct input bed lines.
    GC correction takes place with a local regression (LOWESS) on GC perc vs number of reads
    :param inputs: list of BedLine namedtuples
    :param fasta: instance of pyfaidx.Fasta
    :param frac_n: maximal fraction on N-bases per bin
    :param frac_r: minimum fraction of reads per bin
    :param lowess_iter: amount of iterations of LOWESS function
    :param lowess_frac: fraction of input data used for LOWESS function
    :return: corrected BedLines
    """
    reads = []
    gcs = []
    for line in inputs:
        if filter_bin(line, fasta, frac_n, frac_r):
            gcs.append(get_gc_for_bin(fasta, line.chromosome, line))
            reads.append(line.value)

    reads = np.array(reads, np.float)
    gcs = np.array(gcs, np.float)
    if lowess_frac*len(reads) < 4 and len(reads) > 0:  # need at least four data ponts
        warnings.warn("Too few data points for lowess. Raising lowess_frac")
        lowess_frac = 4.0/len(reads)
        delta = 0  # remove delta in this case
    else:
        delta = 0.01 * len(gcs)
    lowess = statlow.lowess(reads, gcs, return_sorted=False,
                            delta=delta, frac=lowess_frac,
                            it=lowess_iter).tolist()

    corrected_lines = []

    for line in inputs:
        if filter_bin(line, fasta, frac_n, frac_r):
            corr_val = float(line.value) / lowess.pop(0)
        else:
            corr_val = 0
        n_bed = BedLine(line.chromosome, line.start, line.end, corr_val)
        corrected_lines.append(n_bed)

    return corrected_lines
예제 #56
0
 def test_lowess(self, parallel):
     pytest.importorskip("statsmodels")
     from statsmodels.nonparametric.smoothers_lowess import lowess
     frac = 0.5
     it = 1
     data = np.asanyarray(self.s.data, dtype='float')
     for i in range(data.shape[0]):
         data[i, :] = lowess(
             endog=data[i, :],
             exog=self.s.axes_manager[-1].axis,
             frac=frac,
             it=it,
             is_sorted=True,
             return_sorted=False,)
     self.s.smooth_lowess(smoothing_parameter=frac,
                          number_of_iterations=it,
                          show_progressbar=None,
                          parallel=parallel)
     np.testing.assert_allclose(self.s.data, data,
                                rtol=self.rtol, atol=self.atol)
예제 #57
0
    def test_options(self):
        rfile = os.path.join(rpath, 'test_lowess_simple.csv')
        test_data = np.genfromtxt(open(rfile, 'rb'),
                                  delimiter = ',', names = True)
        y, x = test_data['y'], test_data['x']
        res1_fitted = test_data['out']
        expected_lowess = np.array([test_data['x'], test_data['out']]).T

        # check skip sorting
        actual_lowess1 = lowess(y, x, is_sorted=True)
        assert_almost_equal(actual_lowess1, expected_lowess, decimal=13)

        # check skip missing
        actual_lowess = lowess(y, x, is_sorted=True, missing='none')
        assert_almost_equal(actual_lowess, actual_lowess1, decimal=13)

        # check order/index, returns yfitted only
        actual_lowess = lowess(y[::-1], x[::-1], return_sorted=False)
        assert_almost_equal(actual_lowess, actual_lowess1[::-1, 1], decimal=13)

        # check integer input
        actual_lowess = lowess(np.round(y).astype(int), x, is_sorted=True)
        actual_lowess1 = lowess(np.round(y), x, is_sorted=True)
        assert_almost_equal(actual_lowess, actual_lowess1, decimal=13)
        assert_(actual_lowess.dtype is np.dtype(float))
        # this will also have duplicate x
        actual_lowess = lowess(y, np.round(x).astype(int), is_sorted=True)
        actual_lowess1 = lowess(y, np.round(x), is_sorted=True)
        assert_almost_equal(actual_lowess, actual_lowess1, decimal=13)
        assert_(actual_lowess.dtype is np.dtype(float))

        # check with nans,  this changes the arrays
        y[[5, 6]] = np.nan
        x[3] = np.nan
        actual_lowess1[[3, 5, 6], 1] = np.nan
        actual_lowess = lowess(y, x, is_sorted=True)
        assert_almost_equal(actual_lowess1, actual_lowess1, decimal=13)
        assert_raises(ValueError, lowess, y, x, missing='raise')
예제 #58
0
def run_lowess(X, Y,
               frac=0.75,
               missing="none"):
    """
    Y ~ X lowess.

    Parameters:
    -----------

    X: X values
    Y: Y values
    frac: fraction of data used to estimate each y-value.
    missing: how to handle missing values (by default "drop" them).
    """
    X[utils.where_null(X)] = np.nan
    Y[utils.where_null(Y)] = np.nan
    # Lowess takes Y values first
    fitted_Y = lowess(Y, X,
                      return_sorted=False,
                      frac=frac,
                      missing=missing)
    return fitted_Y
예제 #59
0
    def test_simple(self):

        x = np.arange(20, dtype='float32')
        #standard normal noise
        noise = np.array([-0.76741118, -0.30754369,
                            0.39950921, -0.46352422, -1.67081778,
                            0.6595567 ,  0.66367639, -2.04388585,
                            0.8123281 ,  1.45977518,
                            1.21428038,  1.29296866,  0.78028477,
                            -0.2402853 , -0.21721302,
                            0.24549405,  0.25987014, -0.90709034,
                            -1.45688216, -0.31780505])
        y = x + noise

        expected_lowess = np.array([[  0.        ,  -0.58337912],
                                   [  1.        ,   0.61951246],
                                   [  2.        ,   1.82221628],
                                   [  3.        ,   3.02536876],
                                   [  4.        ,   4.22667951],
                                   [  5.        ,   5.42387723],
                                   [  6.        ,   6.60834945],
                                   [  7.        ,   7.7797691 ],
                                   [  8.        ,   8.91824348],
                                   [  9.        ,   9.94997506],
                                   [ 10.        ,  10.89697569],
                                   [ 11.        ,  11.78746276],
                                   [ 12.        ,  12.62356492],
                                   [ 13.        ,  13.41538492],
                                   [ 14.        ,  14.15745254],
                                   [ 15.        ,  14.92343948],
                                   [ 16.        ,  15.70019862],
                                   [ 17.        ,  16.48167846],
                                   [ 18.        ,  17.26380699],
                                   [ 19.        ,  18.0466769 ]])

        actual_lowess = lowess(y,x)

        assert_almost_equal(expected_lowess, actual_lowess)
예제 #60
0
    def compile_lowess(self):
        filenames = [f for f in os.listdir(PREDICTIONS_DIR) if f.endswith(".txt") and not "lowess" in f]
        for f in filenames:
            wordclass = f.split(".")[0]
            fh = open(os.path.join(PREDICTIONS_DIR, f), "r")
            data_points = [[float(c) for c in l.strip().split("\t")] for l in fh.readlines()]
            fh.close()

            data_points = self.sample_data_points([d for d in data_points if d[3] > 0])
            x = numpy.array([l[0] for l in data_points])
            y = numpy.array([l[3] for l in data_points])
            results = lowess(y, x, frac=LOWESS_FRACTION, it=LOWESS_ITERATIONS)

            outfile = os.path.join(PREDICTIONS_DIR, "%s_lowess.txt" % wordclass)
            with open(outfile, "w") as fh:
                seen = set()
                for r in results:
                    sig = "%0.3g\t%0.3g\n" % (r[0], r[1])
                    if sig in seen:
                        pass
                    else:
                        fh.write(sig)
                        seen.add(sig)