예제 #1
0
def fitcurve(lc_data, period):
    Mag = np.array([i["mag"] for i in lc_data], dtype=np.float32)
    MJD = np.array([i["time"] for i in lc_data], dtype=np.float32)
    Error = np.array([i["error"] for i in lc_data], dtype=np.float32)
    t = MJD - MJD.min()
    phi = np.array([i / period - int(i / period) for i in t])

    xdata = phi
    ydata = Mag

    model = SuperSmoother()
    model.fit(xdata, ydata)

    x = np.linspace(0, 1, num=50).tolist()
    y = model.predict(x).tolist()

    data = [{"phase": [], "mag": []}]

    x, y = fillNaN(x, y)

    for i in range(len(y)):
        if y[i] == y[i]:
            data[0]["phase"].append(x[i])
            data[0]["mag"].append(y[i])

    return data
예제 #2
0
def fitcurve(lc_data, period):
    Mag = np.array([i["mag"] for i in lc_data], dtype=np.float32)
    MJD = np.array([i["time"] for i in lc_data], dtype=np.float32)
    Error = np.array([i["error"] for i in lc_data], dtype=np.float32)
    t = MJD - MJD.min()
    phi = np.array([i/period - int(i/period) for i in t])

    xdata = phi
    ydata = Mag

    model = SuperSmoother()
    model.fit(xdata, ydata)

    x = np.linspace(0, 1, num = 50).tolist()
    y = model.predict(x).tolist()

    data = [{"phase": [], "mag": []}]

    x, y = fillNaN(x, y)

    for i in range(len(y)):
        if y[i] == y[i]:
            data[0]["phase"].append(x[i])
            data[0]["mag"].append(y[i])

    residual = model.predict(xdata) - ydata
    error = []
    for e in residual:
        if e == e:
            error.append(e)

    return data, error
 def fit_supersmoother(self, periodic=True, scale=True):
     from supersmoother import SuperSmoother
     model = SuperSmoother(period=self.p if periodic else None)
     model.fit(self.times, self.measurements, self.errors)
     self.ss_resid = np.sqrt(
         np.mean((model.predict(self.times) - self.measurements)**2))
     if scale:
         self.ss_resid /= np.std(self.measurements)
예제 #4
0
def smooth(df, columns, x_col=None):
    model = SuperSmoother()

    x = df[x_col] if x_col else np.arange(len(df))
    for col in columns:
        y = df[col].values
        model.fit(x, y)
        df[col] = model.predict(x)
    return df
예제 #5
0
def smooth(df, columns, x_col=None):
    model = SuperSmoother()

    x = df[x_col] if x_col else np.arange(len(df))
    for col in columns:
        y = df[col].values
        model.fit(x, y)
        df[col] = model.predict(x)
    return df
예제 #6
0
def fitcurve(lc_data_all_band, period):
    global GLOBAL_PHASE
    fitresults = {'bands': lc_data_all_band['bands']}
    fiterror = {'bands': lc_data_all_band['bands']}
    for band in lc_data_all_band['bands']:
        lc_data = lc_data_all_band[band]
        Mag = np.array([float(i["mag"]) for i in lc_data], dtype=np.float32)
        MJD = np.array([float(i["time"]) for i in lc_data], dtype=np.float32)
        Error = np.array([float(i["error"]) for i in lc_data], dtype=np.float32)
        t = MJD - MJD.min()
        period = float(period)
        phi = np.array([i/period - int(i/period) for i in t])

        xdata = phi
        ydata = Mag

        model = SuperSmoother()
        model.fit(xdata, ydata)

        #x = np.linspace(0, 1, num = 50).tolist()
        x = GLOBAL_PHASE
        y = model.predict(x).tolist()

        data = {"mag": []}
        error = []
        ksquare = 0

        if len(y) > 0:
            y = fillNaN(y)
            y, shift = norm(y)
            y = [round(d, 6) for d in y]
            data["mag"] = y
            data['shift'] = shift

            residual = model.predict(xdata) - ydata
            for e in residual:
                if e != e:
                    error.append(0)
                elif e*0 != 0:
                    error.append(0)
                else:
                    error.append(round(e, 6))

            for i in range(len(error)):
                ksquare += error[i]**2 / Error[i]**2

            ksquare = ksquare / len(residual)
            data['ksquare'] = ksquare
            fitresults[band] = data
            fiterror[band] = error

    #return fitresults, fiterror, ksquare
    return fitresults, ksquare
def clean(series):
    n_series = len(series)
    if n_series % 2 == 0:
        n_series = n_series - 1
    stl = STL(series, period=7, robust=True, seasonal=n_series)
    res = stl.fit()

    detrend = series - res.trend
    strength = 1 - np.var(res.resid) / np.var(detrend)
    if strength >= 0.6:
        series = res.trend + res.resid  # deseasonlized series

    tt = np.arange(len(series))
    model = SuperSmoother()
    model.fit(tt, series)
    yfit = model.predict(tt)
    resid = series - yfit

    resid_q = np.quantile(resid, [0.25, 0.75])
    iqr = np.diff(resid_q)
    #limits = resid.q + 3 * iqr * [-1, 1]
    limits = resid_q + 5 * iqr * [-1, 1]

    # Find residuals outside limits
    series_cleaned = series.copy()
    outliers = None
    if (limits[1] - limits[0]) > 1e-14:
        outliers = [
            a or b for a, b in zip((resid < limits[0]).to_numpy(), (
                resid > limits[1]).to_numpy())
        ]
        if any(outliers):
            series_cleaned.loc[outliers] = np.nan
            # Replace outliers
            id_outliers = [i for i, x in enumerate(outliers) if x]
            for ii in id_outliers:
                xx = [ii - 2, ii - 1, ii + 1, ii + 2]
                xx = [x for x in xx if x < series_cleaned.shape[0] and x >= 0]
                assert (len(xx) > 0)
                assert (not np.isnan(series_cleaned.iloc[xx]).to_numpy().all())
                series_cleaned.iloc[ii] = np.nanmedian(
                    series_cleaned.iloc[xx].to_numpy().flatten())

    return series_cleaned, outliers
예제 #8
0
    def fit_supersmoother(self, m_period=None, periodic=True, scale=True):
        ''' Residuals from SuperSmoother (Friedman 1984). [SOURCE: py supersmoother library]
        @param m_period: float, period.
        @param periodic: boolean, if the model contains a periodic component.
        @param scale: boolean, if scaling the residuals.
        '''
        from supersmoother import SuperSmoother
        if m_period is None:
            m_period = self.period_catalog

        model = SuperSmoother(period=m_period if periodic else None)
        try:
            model.fit(self.times, self.measurements, self.errors)
            self.ss_resid = np.sqrt(
                np.mean((model.predict(self.times) - self.measurements)**2))
            if scale:
                self.ss_resid /= np.std(self.measurements)
        except ValueError:
            self.ss_resid = np.inf
def clean(series, limit_range = 5, seasonality_th = 0.6):
  n_series = len(series)
  stl = STL(series, period = 7, robust = True)
  res = stl.fit()
  detrend = series - res.trend 

  if (1 - np.var(res.resid) / np.var(detrend)) >= seasonality_th:
    series = res.trend + res.resid # deseasonlized series

  tt = np.arange(n_series)
  model = SuperSmoother()
  model.fit(tt, series)
  yfit = model.predict(tt)
  resid = series - yfit

  resid_q = np.quantile(resid, [0.25, 0.75])
  iqr = np.diff(resid_q)
  limits = resid_q + limit_range * iqr * [-1, 1]

  outliers = (limits[0] > resid) | (resid > limits[1])
  cleaned = series.copy()
  cleaned[outliers] = cleaned.rolling(window=5, min_periods=1, center=True).mean()[outliers]
  return cleaned
예제 #10
0
def fitcurve(lc_data_all_band, period):
    global GLOBAL_PHASE
    fitresults = {'bands': lc_data_all_band['bands']}
    fiterror = {'bands': lc_data_all_band['bands']}
    for band in lc_data_all_band['bands']:
        lc_data = lc_data_all_band[band]
        Mag = np.array([float(i["mag"]) for i in lc_data], dtype=np.float32)
        MJD = np.array([float(i["time"]) for i in lc_data], dtype=np.float32)
        Error = np.array([float(i["error"]) for i in lc_data],
                         dtype=np.float32)
        t = MJD - MJD.min()
        period = float(period)
        phi = np.array([i / period - int(i / period) for i in t])

        xdata = phi
        ydata = Mag

        model = SuperSmoother()
        model.fit(xdata, ydata)

        #x = np.linspace(0, 1, num = 50).tolist()
        x = GLOBAL_PHASE
        y = model.predict(x).tolist()

        data = {"mag": []}
        error = []
        ksquare = 0

        if len(y) > 0:
            y = fillNaN(y)
            y, shift = norm(y)
            y = [round(d, 6) for d in y]
            data["mag"] = y
            data['shift'] = shift

            residual = model.predict(xdata) - ydata
            for e in residual:
                if e != e:
                    error.append(0)
                elif e * 0 != 0:
                    error.append(0)
                else:
                    error.append(round(e, 6))

            for i in range(len(error)):
                ksquare += error[i]**2 / Error[i]**2

            ksquare = ksquare / len(residual)
            data['ksquare'] = ksquare
            fitresults[band] = data
            fiterror[band] = error

    #return fitresults, fiterror, ksquare
    return fitresults, ksquare
예제 #11
0
    def base_surv(self, algo="bsl", X=None, label=None, smoothed=False):
        """Estimate base survival function S0(t) based on data(X, label).

        Parameters
        ----------
        algo : string
            algorithm for estimating survival function.

            The options includes "wwe", "kp" and "bsl".
        X : np.array
            Input data of patients for estimating survival function.
        label : dict 
            Input label of patients for estimating survival function.
        smoothed : bool
            Does smooth survival function.

        Returns
        -------
        tuple
            tuple is (T0, ST), T0 of it means time points of base survival function, 
            ST of it means survival rate of base survival function.

        Examples
        --------
        >>> model.base_surv(algo='wwe')

        Notes
        -----
        Algorithm for estimating basel survival function:

        (1). wwe: WWE(with ties)

        (2). kp: Kalbfleisch & Prentice Estimator(without ties)

        (3). bsl: breslow(with ties, but exists negative value)
        """
        # Get data for estimating S0(t)
        if X is None or label is None:
            X = self.train_data['X']
            label = {'t': self.train_data['T'], 'e': self.train_data['E']}
        X, E, T, failures, atrisk, ties = utils.parse_data(X, label)

        s0 = [1]
        t0 = [0]
        risk = self.predict(X)
        hz_ratio = np.exp(risk)
        if algo == 'wwe':
            for t in T[::-1]:
                if t in t0:
                    continue
                t0.append(t)
                if t in atrisk:
                    # R(t_i) - D_i
                    trisk = [j for j in atrisk[t] if j not in failures[t]]
                    dt = len(failures[t]) * 1.0
                    s = np.sum(hz_ratio[trisk])
                    cj = 1 - dt / (dt + s)
                    s0.append(np.exp(cj - 1))
                else:
                    s0.append(1)
        elif algo == 'kp':
            for t in T[::-1]:
                if t in t0:
                    continue
                t0.append(t)
                if t in atrisk:
                    # R(t_i)
                    trisk = atrisk[t]
                    s = np.sum(hz_ratio[trisk])
                    si = hz_ratio[failures[t][0]]
                    cj = (1 - si / s)**(1 / si)
                    s0.append(np.exp(cj - 1))
                else:
                    s0.append(1)
        elif algo == 'bsl':
            for t in T[::-1]:
                if t in t0:
                    continue
                t0.append(t)
                if t in atrisk:
                    # R(t_i)
                    trisk = atrisk[t]
                    dt = len(failures[t]) * 1.0
                    s = np.sum(hz_ratio[trisk])
                    cj = 1 - dt / s
                    s0.append(np.exp(cj - 1))
                else:
                    s0.append(1)
        else:
            raise NotImplementedError('tie breaking method not recognized')
        # base survival function
        S0 = np.cumprod(s0, axis=0)
        T0 = np.array(t0)

        if smoothed:
            # smooth the baseline hazard
            ss = SuperSmoother()
            #Check duplication points
            ss.fit(T0, S0, dy=100)
            S0 = ss.predict(T0)

        return T0, S0
예제 #12
0
def stl_features(x: np.array, freq: int = 1) -> Dict[str, float]:
    """Calculates seasonal trend using loess decomposition.

    Parameters
    ----------
    x: numpy array
        The time series.
    freq: int
        Frequency of the time series

    Returns
    -------
    dict
        'nperiods': Number of seasonal periods in x.
        'seasonal_period': Frequency of the time series.
        'trend': Strength of trend.
        'spike': Measures "spikiness" of x.
        'linearity': Linearity of x based on the coefficients of an
                     orthogonal quadratic regression.
        'curvature': Curvature of x based on the coefficients of an
                     orthogonal quadratic regression.
        'e_acf1': acfremainder['x_acf1']
        'e_acf10': acfremainder['x_acf10']

        Only for sesonal data (freq > 0).
        'seasonal_strength': Strength of seasonality.
        'peak': Strength of peaks.
        'trough': Strength of trough.
    """
    m = freq
    nperiods = int(m > 1)
    # STL fits
    if m > 1:
        try:
            stlfit = STL(x, m, 13).fit()
        except:
            output = {
                'nperiods': nperiods,
                'seasonal_period': m,
                'trend': np.nan,
                'spike': np.nan,
                'linearity': np.nan,
                'curvature': np.nan,
                'e_acf1': np.nan,
                'e_acf10': np.nan,
                'seasonal_strength': np.nan,
                'peak': np.nan,
                'trough': np.nan
            }

            return output

        trend0 = stlfit.trend
        remainder = stlfit.resid
        seasonal = stlfit.seasonal
    else:
        deseas = x
        t = np.arange(len(x)) + 1
        try:
            trend0 = SuperSmoother().fit(t, deseas).predict(t)
        except:
            output = {
                'nperiods': nperiods,
                'seasonal_period': m,
                'trend': np.nan,
                'spike': np.nan,
                'linearity': np.nan,
                'curvature': np.nan,
                'e_acf1': np.nan,
                'e_acf10': np.nan
            }

            return output

        remainder = deseas - trend0
        seasonal = np.zeros(len(x))
    # De-trended and de-seasonalized data
    detrend = x - trend0
    deseason = x - seasonal
    fits = x - remainder
    # Summay stats
    n = len(x)
    varx = np.nanvar(x, ddof=1)
    vare = np.nanvar(remainder, ddof=1)
    vardetrend = np.nanvar(detrend, ddof=1)
    vardeseason = np.nanvar(deseason, ddof=1)
    #Measure of trend strength
    if varx < np.finfo(float).eps:
        trend = 0
    elif (vardeseason / varx < 1e-10):
        trend = 0
    else:
        trend = max(0, min(1, 1 - vare / vardeseason))
    # Measure of seasonal strength
    if m > 1:
        if varx < np.finfo(float).eps:
            season = 0
        elif np.nanvar(remainder + seasonal, ddof=1) < np.finfo(float).eps:
            season = 0
        else:
            season = max(
                0, min(1, 1 - vare / np.nanvar(remainder + seasonal, ddof=1)))

        peak = (np.argmax(seasonal) + 1) % m
        peak = m if peak == 0 else peak

        trough = (np.argmin(seasonal) + 1) % m
        trough = m if trough == 0 else trough
    # Compute measure of spikiness
    d = (remainder - np.nanmean(remainder))**2
    varloo = (vare * (n - 1) - d) / (n - 2)
    spike = np.nanvar(varloo, ddof=1)
    # Compute measures of linearity and curvature
    time = np.arange(n) + 1
    poly_m = poly(time, 2)
    time_x = add_constant(poly_m)
    coefs = OLS(trend0, time_x).fit().params

    linearity = coefs[1]
    curvature = -coefs[2]
    # ACF features
    acfremainder = acf_features(remainder, m)
    # Assemble features
    output = {
        'nperiods': nperiods,
        'seasonal_period': m,
        'trend': trend,
        'spike': spike,
        'linearity': linearity,
        'curvature': curvature,
        'e_acf1': acfremainder['x_acf1'],
        'e_acf10': acfremainder['x_acf10']
    }

    if m > 1:
        output['seasonal_strength'] = season
        output['peak'] = peak
        output['trough'] = trough

    return output
    np.around(x, 2)
    dy = x
    print(x)
    print(y)
    print(dy)
    return x, y, dy


# Generate and visualize the data
t, y, dy = dataset_from_broker()
plt.errorbar(t, y, dy, fmt='o', alpha=0.3)
plt.show()
plt.clf()

# fit the supersmoother model
model = SuperSmoother()
model.fit(t, y, dy)

# find the smoothed fit to the data
tfit = np.linspace(np.amin(t), np.amax(t), len(t))
yfit = model.predict(tfit)
# Show the smoothed model of the data
plt.errorbar(t, y, dy, fmt='o', alpha=0.3)
plt.plot(tfit, yfit, '-k')
plt.show()
plt.clf()

plt.errorbar(t, y, dy, fmt='o', alpha=0.3)
for smooth in model.primary_smooths:
    plt.plot(tfit,
             smooth.predict(tfit),
예제 #14
0
lc_list = numpy.load('lc_list.npy')
periods_arr = numpy.load('periods_arr.npy')


grid_len = 200
p_loc = int(grid_len/4)
x_fit = numpy.linspace(0, 1, grid_len)

n_objects = len(lc_list)
X_supersmoother = numpy.zeros([n_objects, grid_len])

for i in trange(n_objects):
    
    lc = lc_list[i]
    mag = lc[:,0]
    dmag = lc[:,2]
    t = lc[:,1]
    period = periods_arr[i]
    phase = (t /period /2) % 1
    
    model = SuperSmoother(alpha=5, period=1)
    model.fit(phase, mag, dmag)
    y_fit = model.predict(x_fit)
    
    grid_order = (x_fit + x_fit[p_loc] - x_fit[numpy.argmax(y_fit)]) %1
    y_fit = y_fit[numpy.argsort(grid_order)]
    
    X_supersmoother[i] = y_fit

numpy.save('X_supersmoother', X_supersmoother)
예제 #15
0
    def basesurv(self, algo="wwe", X=None, label=None, smoothed=False):
        """
        Algorithm for estimating S0:
        (1). wwe: WWE(with ties)
        (2). kp: Kalbfleisch & Prentice Estimator(without ties)
        (2). bsl: breslow(with ties, but exists negative value)
        Estimate base survival function S0(t) based on data(X, label).
        """
        # Get data for estimating S0(t)
        if X is None or label is None:
            X = self.train_data['X']
            label = self.train_data['label']
        X, E, T, failures, atrisk, ties = utils.parse_data(X, label)

        s0 = [1]
        risk = self.predict(X)
        hz_ratio = np.exp(risk)
        if algo == 'wwe':
            for t in T[::-1]:
                if t in atrisk:
                    # R(t_i) - D_i
                    trisk = [j for j in atrisk[t] if j not in failures[t]]
                    dt = len(failures[t]) * 1.0
                    s = np.sum(hz_ratio[trisk])
                    cj = 1 - dt / (dt + s)
                    s0.append(cj)
                else:
                    s0.append(1)
        elif algo == 'kp':
            for t in T[::-1]:
                if t in atrisk:
                    # R(t_i)
                    trisk = atrisk[t]
                    s = np.sum(hz_ratio[trisk])
                    si = hz_ratio[failures[t][0]]
                    cj = (1 - si / s)**(1 / si)
                    s0.append(cj)
                else:
                    s0.append(1)
        elif algo == 'bsl':
            for t in T[::-1]:
                if t in atrisk:
                    # R(t_i)
                    trisk = atrisk[t]
                    dt = len(failures[t]) * 1.0
                    s = np.sum(hz_ratio[trisk])
                    cj = 1 - dt / s
                    s0.append(cj)
                else:
                    s0.append(1)
        else:
            pass
        S0 = np.cumprod(s0, axis=0)
        T0 = np.insert(T[::-1], 0, 0, axis=0)

        if smoothed:
            # smooth the baseline hazard
            ss = SuperSmoother()

            #Check duplication points
            ss.fit(T0, S0, dy=100)
            S0 = ss.predict(T0)

        return T0, S0
예제 #16
0
    def basesurv(self, algo="wwe", X=None, label=None, smoothed=False):
        """
        Estimate base survival function S0(t) based on data(X, label).

        Parameters:
            algo: algorithm for estimating survival function.
            X: X of patients for estimating survival function.
            label: label of patients for estimating survival function.
            smoothed: smooth survival function or not.

        Returns:
            T0: time points of base survival function.
            ST: survival rate of base survival function.
        See:
            Algorithm for estimating basel survival function:
            (1). wwe: WWE(with ties)
            (2). kp: Kalbfleisch & Prentice Estimator(without ties)
            (3). bsl: breslow(with ties, but exists negative value)
        """
        # Get data for estimating S0(t)
        if X is None or label is None:
            X = self.train_data['X']
            label = {'t': self.train_data['T'], 'e': self.train_data['E']}
        X, E, T, failures, atrisk, ties = utils.parse_data(X, label)

        s0 = [1]
        risk = self.predict(X)
        hz_ratio = np.exp(risk)
        if algo == 'wwe':
            for t in T[::-1]:
                if t in atrisk:
                    # R(t_i) - D_i
                    trisk = [j for j in atrisk[t] if j not in failures[t]]
                    dt = len(failures[t]) * 1.0
                    s = np.sum(hz_ratio[trisk])
                    cj = 1 - dt / (dt + s)
                    s0.append(cj)
                else:
                    s0.append(1)
        elif algo == 'kp':
            for t in T[::-1]:
                if t in atrisk:
                    # R(t_i)
                    trisk = atrisk[t]
                    s = np.sum(hz_ratio[trisk])
                    si = hz_ratio[failures[t][0]]
                    cj = (1 - si / s)**(1 / si)
                    s0.append(cj)
                else:
                    s0.append(1)
        elif algo == 'bsl':
            for t in T[::-1]:
                if t in atrisk:
                    # R(t_i)
                    trisk = atrisk[t]
                    dt = len(failures[t]) * 1.0
                    s = np.sum(hz_ratio[trisk])
                    cj = 1 - dt / s
                    s0.append(cj)
                else:
                    s0.append(1)
        else:
            raise NotImplementedError('tie breaking method not recognized')
        # base survival function
        S0 = np.cumprod(s0, axis=0)
        T0 = np.insert(T[::-1], 0, 0, axis=0)

        if smoothed:
            # smooth the baseline hazard
            ss = SuperSmoother()
            #Check duplication points
            ss.fit(T0, S0, dy=100)
            S0 = ss.predict(T0)

        return T0, S0