Beispiel #1
0
def lm_cost(x):
    n = (x.shape[0] - 1) // 4
    tk, xk, yk, thetak = split_state(x)
    xk = np.concatenate(([start_x], xk, [goal_x]))
    yk = np.concatenate(([start_y], yk, [goal_y]))
    thetak = np.concatenate(([start_theta], thetak, [goal_theta]))

    time_cost = np.sum(tk)

    # constant curvature path constraint
    path_cost = np.fabs(get_ceq(x))
    # vel, acc, radius limits
    ineq_cost = np.fabs(np.minimum(0, get_c(x)))

    # discourage sharp turns (a bit hacky but wanted paths to be less jagged)
    dist = (xk[1:] - xk[:-1])**2 + (yk[1:] - yk[:-1])**2

    # teb paper said that path cost should be much larger than the rest
    # but these weights/entire cost function could probably use some tuning
    # cost = .1*time_cost + np.sum(5*path_cost) + 5*np.sum(ineq_cost)  + .1*np.sum(dist)
    cost = np.concatenate(
        ([0.01 * time_cost], 10 * path_cost, 10 * ineq_cost, 0.1 * dist))

    # cost = 0.001*time_cost + np.sum( 5000*path_cost) + 10*np.sum(ineq_cost)  + 100*np.sum(dist)

    return cost
Beispiel #2
0
    def train(self, subsample_idcs=None, ridge=1e-9, max_storage=1e8):
        self.idcs = np.sort(subsample_idcs)
        self.X_ind = self.X[self.idcs, :]

        self.chunk_size = int(max_storage / float(self.idcs.shape[0]))
        if self.chunk_size == 0:
            self.chunk_size = 1
        csz = self.chunk_size

        #can handle len(idcs)**2 memory, len(idcs)**3 time cost
        Kxx = self.k(self.X[self.idcs, :])
        Ysum = np.zeros((self.idcs.shape[0], self.Y.shape[1]))
        Ksum = np.zeros((self.idcs.shape[0], self.idcs.shape[0]))

        pbar = ProgressBar('Chunk sum', 0, self.X.shape[0])
        j = 0
        csz = self.idcs.shape[0]
        while j * csz < self.X.shape[0]:
            pbar.update(j * csz)
            Kchunk = self.k(self.X[j * csz:(j + 1) * csz, :],
                            self.X[self.idcs, :])
            Ychunk = self.Y[j * csz:(j + 1) * csz, :]
            Ysum += Kchunk.T.dot(Ychunk)
            Ksum += Kchunk.T.dot(Kchunk)
            j += 1
        pbar.finish()

        self.V = Ksum + self.lvar * Kxx
        self.V += ridge * np.fabs(self.V).max() * np.eye(self.V.shape[0])
        self.alpha = np.linalg.solve(self.V, Ysum)
        self.V /= self.lvar
Beispiel #3
0
 def posttrain(self, ridge=1e-9):
     #compute constants for prediction
     Kxi = self.k(self.X_ind)
     self.C = Kxi.copy()
     j = 0
     KY = np.zeros((self.X_ind.shape[0], self.Y.shape[1]))
     KK = np.zeros((self.X_ind.shape[0], self.X_ind.shape[0]))
     csz = max(self.sridcs.shape[0], self.X_ind.shape[0])
     while j * csz < self.X.shape[0]:
         Kchunk = self.k(self.X[j * csz:(j + 1) * csz, :], self.X_ind)
         KY += np.dot(Kchunk.T, self.Y[j * csz:(j + 1) * csz, :])
         KK += np.dot(Kchunk.T, Kchunk)
         j += 1
     self.C += KK / self.lvar
     self.C += ridge * np.fabs(self.C).max() * np.eye(self.C.shape[0])
     self.alpha = np.linalg.solve(self.C, KY) / self.lvar
     Kxi += ridge * np.fabs(Kxi).max() * np.eye(Kxi.shape[0])
     self.C = np.linalg.inv(self.C) - np.linalg.inv(Kxi)
Beispiel #4
0
def get_c(x):
    n = (x.shape[0] - 1) // 4
    tk, xk, yk, thetak = split_state(x)

    xk = np.concatenate(([start_x], xk, [goal_x]))
    yk = np.concatenate(([start_y], yk, [goal_y]))
    thetak = np.concatenate(([start_theta], thetak, [goal_theta]))

    dx = xk[1:] - xk[0:-1]
    dy = yk[1:] - yk[0:-1]
    dtheta = thetak[1:] - thetak[0:-1]
    dk = np.array([dx, dy, np.zeros(n + 1)]).T

    # turning radius

    mask = np.fabs(dtheta) > 1e-5
    dtheta_stable = np.where(mask, dtheta, 1)
    turn_rad = np.where(
        mask,
        np.linalg.norm(dk, axis=1) / np.fabs(2 * np.sin(dtheta_stable / 2)),
        min_turning_radius)
    c_rad = turn_rad - min_turning_radius

    # linear velocity
    qk = np.array([np.cos(thetak[:-1]),
                   np.sin(thetak[:-1]),
                   np.zeros(n + 1)]).T
    proj_q_d = np.sum(qk * dk, axis=1)
    sign_v = sign(proj_q_d)
    vk = np.linalg.norm(dk, axis=1) * sign_v / tk
    c_vel = 0.9 * max_vel - np.fabs(vk)

    # angular velocity
    wk = dtheta / tk
    c_w = 0.8 * max_ang_vel - np.fabs(wk)

    vk = np.concatenate(([start_vel], vk))
    # acceleration (finite differences)
    ak = (vk[1:] - vk[0:-1]) / tk

    c_acc = np.where(ak < 0, ak + 0.5 * max_dec, 0.5 * max_acc - ak)
    c = np.concatenate((c_rad, c_vel, c_w, c_acc))
    return c
Beispiel #5
0
    def pretrain(self, subsample_idcs, ridge=1e-9):
        self.sridcs = np.sort(subsample_idcs)

        #get matrices necessary to compute khat(x,x') and muhat(x)
        #using subset of regressors
        srgp = SubsetRegressorsGP(self.X, self.Y, self.k, self.lvar)
        srgp.train(self.sridcs, ridge)
        self.pre_alpha = srgp.alpha
        self.V = srgp.V
        self.V += ridge * np.fabs(self.V).max() * np.eye(self.sridcs.shape[0])
Beispiel #6
0
def fitFlare(x, y, yerr, tstart, tstop, skew_fac=10):
    mask = (x > tstart) & (x < tstop)
    mu0 = (tstart + tstop) / 2
    sig0 = (tstop - tstart) / 2
    A0 = np.max(y) * 100
    skew = 0

    try:
        # Fit a gaussian to the segment
        popt1, pcov1 = curve_fit(fh.gaussian,
                                 x[mask],
                                 y[mask],
                                 p0=(mu0, sig0, A0),
                                 sigma=yerr[mask])
        y_model = fh.gaussian(x[mask], popt1[0], popt1[1], popt1[2])
        chi1 = fh.redChiSq(y_model, y[mask], yerr[mask], len(y[mask]) - 3)

        # Fit the Davenport 2014 flare model to the segment
        popt2, pcov2 = curve_fit(fh.aflare1,
                                 x[mask],
                                 y[mask],
                                 p0=(mu0, sig0, A0),
                                 sigma=yerr[mask])
        y_model = fh.aflare1(x[mask], popt2[0], popt2[1], popt2[2])
        chi2 = fh.redChiSq(y_model, y[mask], yerr[mask], len(y[mask]) - 3)

        # If the flare model fit worked, calculate the skew by centering on the peak of the aflare model
        # Use a window scaled to the FWHM of the flare model for integration
        mu = popt2[0]  #np.trapz(x[mask]*A*y[mask], x[mask])
        f_hwhm = popt2[1] / 2
        t1_skew, t2_skew = mu - skew_fac * f_hwhm, mu + skew_fac * f_hwhm
        skew_mask = (x > t1_skew) & (x < t2_skew)

        # Measure the skew by treating time = x and flux = p(x). Calculate the
        # third moment of p(x)
        A = 1 / np.trapz(y[skew_mask], x[skew_mask])
        var = np.trapz((x[skew_mask] - mu)**2 * A * y[skew_mask], x[skew_mask])
        stddev = np.sqrt(np.fabs(var))
        skew = np.trapz((x[skew_mask] - mu)**3 * A * y[skew_mask],
                        x[skew_mask]) / stddev**3
    except:
        traceback.print_exc()
        empty = np.zeros(3)
        return empty, empty, -1, empty, empty, -1, 0, 0

    n_pts = len(x[mask])
    n_pts_true = np.floor(((tstop - tstart) * u.d).to(u.min).value / 2)
    coverage = n_pts / n_pts_true

    return popt1, np.sqrt(pcov1.diagonal()), chi1, popt2, np.sqrt(
        pcov2.diagonal()), chi2, skew, coverage
Beispiel #7
0
def test_correlated_fit():
    num_samples = 400
    N = 10

    x = norm.rvs(size=(N, num_samples))

    r = np.zeros((N, N))
    for i in range(N):
        for j in range(N):
            r[i, j] = np.exp(-0.8 * np.fabs(i - j))

    errl = np.sqrt([3.4, 2.5, 3.6, 2.8, 4.2, 4.7, 4.9, 5.1, 3.2, 4.2])
    for i in range(N):
        for j in range(N):
            r[i, j] *= errl[i] * errl[j]

    c = cholesky(r, lower=True)
    y = np.dot(c, x)
    x = np.arange(N)
    for linear in [True, False]:
        data = []
        for i in range(N):
            if linear:
                data.append(pe.Obs([[i + 1 + o for o in y[i]]], ['ens']))
            else:
                data.append(
                    pe.Obs([[
                        np.exp(-(i + 1)) + np.exp(-(i + 1)) * o for o in y[i]
                    ]], ['ens']))

        [o.gamma_method() for o in data]

        if linear:

            def fitf(p, x):
                return p[1] + p[0] * x
        else:

            def fitf(p, x):
                return p[1] * np.exp(-p[0] * x)

        fitp = pe.least_squares(x, data, fitf, expected_chisquare=True)

        fitpc = pe.least_squares(x, data, fitf, correlated_fit=True)
        for i in range(2):
            diff = fitp[i] - fitpc[i]
            diff.gamma_method()
            assert (diff.is_zero_within_error(sigma=5))
Beispiel #8
0
def get_ceq(x):
    n = (x.shape[0] - 1) // 4

    tk, xk, yk, thetak = split_state(x)
    xk = np.concatenate(([start_x], xk, [goal_x]))
    yk = np.concatenate(([start_y], yk, [goal_y]))
    thetak = np.concatenate(([start_theta], thetak, [goal_theta]))

    dx = xk[1:] - xk[0:-1]
    dy = yk[1:] - yk[0:-1]
    dk = np.array([dx, dy, np.zeros(n + 1)]).T

    # constant curvature (trapezoidal collocation, see teb paper)
    ceq = np.array([
        np.cos(thetak[0:-1]) + np.cos(thetak[1:]),
        np.sin(thetak[0:-1]) + np.sin(thetak[1:]),
        np.zeros(n + 1)
    ]).T
    ceq = np.fabs(np.cross(ceq, dk, axisa=1, axisb=1)[:, 2])
    return ceq
Beispiel #9
0
def sign(v):
    tol = 1e-5
    k = 5e3
    res = np.where(np.fabs(v) <= tol, 0, np.tanh(k * v))
    return res
Beispiel #10
0
    def _objective(self, X_i, itr, ridge, compute_constant=False):
        #chunk size
        csz = max(self.sridcs.shape[0], self.Zshape[0])
        #inducing pt size
        isz = self.Zshape[0]
        #subsample size
        srsz = self.sridcs.shape[0]

        #reshape inducing pt matrix (scipy.minimize uses a flattened version)
        X_i = X_i.reshape(self.Zshape)

        #compute useful constants
        Kxi = self.k(X_i)
        Kxi += ridge * np.fabs(Kxi).max() * np.eye(isz)
        Kxixsr = self.k(X_i, self.X[self.sridcs, :])
        #
        #print(np.linalg.cond(self.V))
        #print(np.linalg.cond(Kxi))
        #
        Khxi = np.dot(Kxixsr, np.linalg.solve(self.V, Kxixsr.T))

        Kxi_inv_muxi = np.linalg.solve(Kxi, np.dot(Kxixsr, self.pre_alpha))

        KxxiTKxxi = np.zeros((isz, isz))
        KxxsrTKxxi = np.zeros((srsz, isz))
        dYxi_chunks = [
        ]  #need this because autograd doesn't support array asgnmt
        KdY = np.zeros((isz, self.Y.shape[1]))
        KdYi = np.zeros((isz, self.Y.shape[1]))
        j = 0
        while j * csz < self.X.shape[0]:
            Kchunk_sr = self.k(self.X[j * csz:(j + 1) * csz, :],
                               self.X[self.sridcs, :])
            Kchunk_i = self.k(self.X[j * csz:(j + 1) * csz, :], X_i)
            KxxiTKxxi += np.dot(Kchunk_i.T, Kchunk_i)
            KxxsrTKxxi += np.dot(Kchunk_sr.T, Kchunk_i)
            dYxi_chunk = np.dot(
                Kchunk_i, Kxi_inv_muxi) - self.Y[j * csz:(j + 1) * csz, :]
            dYxi_chunks.append(dYxi_chunk)
            KdY += np.dot(Kchunk_i.T, self.dYx[j * csz:(j + 1) * csz, :])
            KdYi += np.dot(Kchunk_i.T, dYxi_chunk)
            j += 1
        dYxi = np.vstack(dYxi_chunks)

        Kxi_inv_KxxiTKxxi = np.linalg.solve(Kxi, KxxiTKxxi)

        B = np.linalg.solve(Kxi.T, Kxi_inv_KxxiTKxxi.T).T / self.lvar
        B = 0.5 * (B + B.T)  #enforce symmetry (lin solver doesn't guarantee)

        #
        #print(np.linalg.cond(np.eye(isz) + np.dot(B, Kxi)))
        #

        Xi = np.linalg.solve(np.eye(isz) + np.dot(B, Kxi), B)
        Xi = 0.5 * (Xi + Xi.T
                    )  #enforce symmetry (lin solver doesn't guarantee)

        ##################
        ##Compute ||L||^2
        ##################

        #compute C3 (used for S3 = Kx,xi*C3*Kxi,x )
        C3 = np.dot(Xi, np.dot(Kxi, Xi)) - 2. * Xi
        L = np.trace(
            np.dot(KxxsrTKxxi.T, np.linalg.solve(self.V,
                                                 np.dot(KxxsrTKxxi, C3))))
        L += np.trace(np.dot(KdY.T, np.dot(C3, KdY)))

        ##this code computes objective function constants that aren't needed for optimization
        if compute_constant:
            Kx = self.k(self.X)
            Kxxsr = self.k(self.X, self.X[self.sridcs, :])
            Khxx = np.dot(Kxxsr, np.linalg.solve(self.V, Kxxsr.T))
            L += np.trace(np.dot(Khxx, Kx))
            L += np.dot(self.dYx.T, np.dot(Kx, self.dYx))

        ##################
        ##Compute ||L_i||^2
        ##################

        #compute C1 (used for S1 = Kxxi*C1*Kxix)
        A1 = np.linalg.solve(Kxi, np.eye(isz) - np.dot(Kxi, Xi))
        C1 = np.dot(A1, np.dot(Kxi, A1.T))
        L_i = np.trace(
            np.dot(Khxi,
                   np.dot(Kxi_inv_KxxiTKxxi, np.dot(C1, Kxi_inv_KxxiTKxxi.T))))
        L_i += np.trace(np.dot(KdYi.T, np.dot(C1, KdYi)))

        ##################
        ##Compute <L, L_i>
        ##################
        #compute C2 (used for S2 = Kxxi*C2*Kxix)
        A2 = np.eye(isz) - np.dot(Xi, Kxi)
        C2 = np.dot(A2, np.linalg.solve(Kxi, A2.T).T)
        L_L_i = np.trace(
            np.dot(
                np.linalg.solve(self.V.T, Kxixsr.T).T,
                np.dot(KxxsrTKxxi, np.dot(C2, Kxi_inv_KxxiTKxxi.T))))
        L_L_i += np.trace(np.dot(KdY.T, np.dot(C2, KdYi)))

        return (L + L_i - 2 * L_L_i).sum(
        )  #.sum() converts a 1x1 array to scalar (1x1 arr causes problems for scipy.minimize)
Beispiel #11
0
def procFlares(prefix,
               filenames,
               path,
               clobberGP=False,
               makePlots=False,
               writeLog=True):
    if makePlots:
        plots_path = path + 'plots/'
        if not os.path.exists(plots_path):
            os.makedirs(plots_path)

    gp_path = path + 'gp/'

    #if not os.path.exists(gp_path):
    #os.makedirs(gp_path)

    log_path = path + 'log/'

    #if not os.path.exists(log_path):
    #os.makedirs(log_path)

    if writeLog:
        if os.path.exists(log_path + prefix + '.log'):
            os.remove(log_path + prefix + '.log')

    # Columns for flare table
    FL_files = np.array([])
    FL_TICs = np.array([])
    FL_id = np.array([])
    FL_t0 = np.array([])
    FL_t1 = np.array([])
    FL_f0 = np.array([])
    FL_f1 = np.array([])
    FL_ed = np.array([])
    FL_ed_err = np.array([])
    FL_skew = np.array([])
    FL_cover = np.array([])
    FL_mu = np.array([])
    FL_std = np.array([])
    FL_g_amp = np.array([])
    FL_mu_err = np.array([])
    FL_std_err = np.array([])
    FL_g_amp_err = np.array([])
    FL_tpeak = np.array([])
    FL_fwhm = np.array([])
    FL_f_amp = np.array([])
    FL_tpeak_err = np.array([])
    FL_fwhm_err = np.array([])
    FL_f_amp_err = np.array([])
    FL_g_chisq = np.array([])
    FL_f_chisq = np.array([])
    FL_g_fwhm_win = np.array([])
    FL_f_fwhm_win = np.array([])

    # Columns for param table
    P_median = np.array([])
    P_s_window = np.array([])
    P_acf_1dt = np.array([])
    P_acf_amp = np.array([])

    failed_files = []

    for k in range(len(filenames)):
        start_time = timing.time()
        filename = filenames[k]
        TIC = int(filename.split('-')[-3])
        file = path + filename

        if makePlots:
            fig, axes = plt.subplots(figsize=(16, 16), nrows=4, sharex=True)

        print('Processing ' + filename)
        gp_data_file = gp_path + filename + '.gp'
        gp_param_file = gp_path + filename + '.gp.par'
        median = -1
        s_window = -1
        acf_1dt = -1
        acf_amp = -1
        with fits.open(file, mode='readonly') as hdulist:
            try:
                tess_bjd = hdulist[1].data['TIME']
                quality = hdulist[1].data['QUALITY']
                pdcsap_flux = hdulist[1].data['PDCSAP_FLUX']
                pdcsap_flux_error = hdulist[1].data['PDCSAP_FLUX_ERR']
            except:
                P_median = np.append(P_median, median)
                P_s_window = np.append(P_s_window, s_window)
                P_acf_1dt = np.append(P_acf_1dt, acf_1dt)
                P_acf_amp = np.append(P_acf_amp, acf_amp)
                failed_files.append(filename)
                np.savetxt(gp_data_file, ([]))
                print('Reading file ' + filename + ' failed')
                continue

        if makePlots:
            axes[0].plot(tess_bjd, pdcsap_flux)

        # Cut out poor quality points
        ok_cut = (quality == 0) & (~np.isnan(tess_bjd)) & (~np.isnan(pdcsap_flux))\
                  & (~np.isnan(pdcsap_flux_error))

        tbl = Table([tess_bjd[ok_cut], pdcsap_flux[ok_cut], \
                  pdcsap_flux_error[ok_cut]],
                     names=('TIME', 'PDCSAP_FLUX', 'PDCSAP_FLUX_ERR'))
        df_tbl = tbl.to_pandas()

        median = np.nanmedian(df_tbl['PDCSAP_FLUX'])

        # Estimate the period of the LC with autocorrelation
        acf = fh.autocorr_estimator(tbl['TIME'], tbl['PDCSAP_FLUX']/median, \
                                    yerr=tbl['PDCSAP_FLUX_ERR']/median,
                                    min_period=0.1, max_period=27, max_peaks=2)
        if len(acf['peaks']) > 0:
            acf_1dt = acf['peaks'][0]['period']
            acf_amp = acf['autocorr'][1][np.where(
                acf['autocorr'][0] == acf_1dt)]
            mask = np.where(
                (acf['autocorr'][0] == acf['peaks'][0]['period']))[0]
            acf_1pk = acf['autocorr'][1][mask][0]
            s_window = int(acf_1dt /
                           np.fabs(np.nanmedian(np.diff(df_tbl['TIME']))) / 6)
        else:
            acf_1dt = (tbl['TIME'][-1] - tbl['TIME'][0]) / 2
            acf_amp = 0
            s_window = 128

        P_median = np.append(P_median, median)
        P_s_window = np.append(P_s_window, s_window)
        P_acf_1dt = np.append(P_acf_1dt, acf_1dt)
        P_acf_amp = np.append(P_acf_amp, acf_amp)

        # Run GP fit on the lightcurve if we haven't already
        if os.path.exists(gp_data_file) and not clobberGP:
            # Failed GP regression will produce an empty file
            if os.path.getsize(gp_data_file) == 0:
                print(file + ' failed (previously) during GP regression')
                failed_files.append(filename)
                continue

            print('GP file already exists, loading...')
            times, smo, var = np.loadtxt(gp_data_file)
        else:
            smo = np.zeros(len(df_tbl['TIME']))
            try:
                if makePlots:
                    ax = axes[1]
                else:
                    ax = None
                times, smo, var, params = iterGP_rotation(df_tbl['TIME'].values, df_tbl['PDCSAP_FLUX'].values/median, \
                                          df_tbl['PDCSAP_FLUX_ERR'].values/median, acf_1dt, acf_1pk, ax=ax)

                #np.savetxt(gp_param_file, params['logs2'], params['logamp'], params['logperiod'], \
                #           params['logq0'], params['logdeltaq'], params['mix'], params['period'])
                np.savetxt(gp_param_file, params)
                np.savetxt(gp_data_file, (times, smo, var))

            except:
                traceback.print_exc()
                failed_files.append(filename)
                np.savetxt(gp_data_file, ([]))
                print(filename + ' failed during GP fitting')
                continue

        # The GP is produced from a downsampled lightcurve. Need to interpolate to
        # compare GP and full LC

        smo_int = np.interp(tbl['TIME'], times, smo)

        # Search for flares in the smoothed lightcurve
        x = np.array(tbl['TIME'])
        y = np.array(tbl['PDCSAP_FLUX'] / median - smo_int)
        yerr = np.array(tbl['PDCSAP_FLUX_ERR'] / median)

        FL = fh.FINDflare(y,
                          yerr,
                          avg_std=True,
                          std_window=s_window,
                          N1=3,
                          N2=1,
                          N3=3)

        if makePlots:
            axes[3].plot(x, y, zorder=1)
            for j in range(len(FL[0])):
                s1, s2 = FL[0][j], FL[1][j] + 1
                axes[3].scatter(x[s1:s2], y[s1:s2], zorder=2)

        # Measure properties of detected flares
        if makePlots:
            fig_fl, axes_fl = plt.subplots(figsize=(16, 16), nrows=4, ncols=4)

        for j in range(len(FL[0])):
            s1, s2 = FL[0][j], FL[1][j] + 1
            tstart, tstop = x[s1], x[s2]
            dx_fac = 10
            dx = tstop - tstart
            x1 = tstart - dx * dx_fac / 2
            x2 = tstop + dx * dx_fac / 2
            mask = (x > x1) & (x < x2)

            # Mask out other flare detections when fitting models
            other_mask = np.ones(len(x), dtype=bool)
            for i in range(len(FL[0])):
                s1other, s2other = FL[0][i], FL[1][i] + 1
                if i == j:
                    continue
                other_mask[s1other:s2other] = 0

            popt1, pstd1, g_chisq, popt2, pstd2, f_chisq, skew, cover = \
                fitFlare(x[other_mask], y[other_mask], yerr[other_mask], x1, x2)

            mu, std, g_amp = popt1[0], popt1[1], popt1[2]
            mu_err, std_err, g_amp_err = pstd1[0], pstd1[1], pstd1[2]

            tpeak, fwhm, f_amp = popt2[0], popt2[1], popt2[2]
            tpeak_err, fwhm_err, f_amp_err = pstd2[0], pstd2[1], pstd2[2]

            f_fwhm_win = fwhm / (x2 - x1)
            g_fwhm_win = std / (x2 - x1)

            ed, ed_err = measureED(x, y, yerr, tpeak, fwhm)

            FL_files = np.append(FL_files, filename)
            FL_TICs = np.append(FL_TICs, TIC)
            FL_t0 = np.append(FL_t0, x1)
            FL_t1 = np.append(FL_t1, x2)
            FL_f0 = np.append(FL_f0, np.nanmedian(tbl['PDCSAP_FLUX'][s1:s2]))
            FL_f1 = np.append(FL_f1, np.nanmax(tbl['PDCSAP_FLUX'][s1:s2]))
            FL_ed = np.append(FL_ed, ed)
            FL_ed_err = np.append(FL_ed_err, ed_err)

            FL_skew = np.append(FL_skew, skew)
            FL_cover = np.append(FL_cover, cover)
            FL_mu = np.append(FL_mu, mu)
            FL_std = np.append(FL_std, std)
            FL_g_amp = np.append(FL_g_amp, g_amp)
            FL_mu_err = np.append(FL_mu_err, mu_err)
            FL_std_err = np.append(FL_std_err, std_err)
            FL_g_amp_err = np.append(FL_g_amp_err, g_amp_err)

            FL_tpeak = np.append(FL_tpeak, tpeak)
            FL_fwhm = np.append(FL_fwhm, fwhm)
            FL_f_amp = np.append(FL_f_amp, f_amp)
            FL_tpeak_err = np.append(FL_tpeak_err, tpeak_err)
            FL_fwhm_err = np.append(FL_fwhm_err, fwhm_err)
            FL_f_amp_err = np.append(FL_f_amp_err, f_amp_err)

            FL_g_chisq = np.append(FL_g_chisq, g_chisq)
            FL_f_chisq = np.append(FL_f_chisq, f_chisq)

            FL_g_fwhm_win = np.append(FL_g_fwhm_win, g_fwhm_win)
            FL_f_fwhm_win = np.append(FL_f_fwhm_win, f_fwhm_win)

            if makePlots and j < 15:
                row_idx = j // 4
                col_idx = j % 4
                axes_fl[row_idx][col_idx].errorbar(x[mask],
                                                   y[mask],
                                                   yerr=yerr[mask])
                axes_fl[row_idx][col_idx].scatter(x[s1:s2], y[s1:s2])

                if popt1[0] > 0:
                    xmodel = np.linspace(x1, x2)
                    ymodel = fh.aflare1(xmodel, tpeak, fwhm, f_amp)
                    axes_fl[row_idx][col_idx].plot(xmodel, ymodel, label=r'$\chi_{f}$ = ' + '{:.3f}'.format(f_chisq) \
                                                + '\n FWHM/window = ' + '{:.2f}'.format(f_fwhm_win))
                    ymodel = fh.gaussian(xmodel, mu, std, g_amp)
                    axes_fl[row_idx][col_idx].plot(xmodel, ymodel, label=r'$\chi_{g}$ = ' + '{:.3f}'.format(g_chisq) \
                                                + '\n FWHM/window = ' + '{:.2f}'.format(g_fwhm_win))
                    axes_fl[row_idx][col_idx].axvline(tpeak - fwhm / 2,
                                                      linestyle='--')
                    axes_fl[row_idx][col_idx].axvline(tpeak + fwhm / 2,
                                                      linestyle='--')
                    axes_fl[row_idx][col_idx].legend()
                    axes_fl[row_idx][col_idx].set_title('Skew = ' +
                                                        '{:.3f}'.format(skew))

        if makePlots:
            fig.suptitle(filename)
            axes[0].set_xlabel('Time [BJD - 2457000, days]')
            axes[0].set_ylabel('Flux [e-/s]')
            axes[1].set_xlabel('Time [BJD - 2457000, days]')
            axes[1].set_ylabel('Normalized Flux')
            axes[2].set_xlabel('Time [BJD - 2457000, days]')
            axes[2].set_ylabel('Rolling STD of GP')
            axes[3].set_xlabel('Time [BJD - 2457000, days]')
            axes[3].set_ylabel('Normalized Flux - GP')
            fig.savefig(plots_path + filename + '.png', format='png')

            if len(FL[0] > 0):
                fig_fl.suptitle(filename)
                fig_fl.savefig(plots_path + filename + '_flares.png',
                               format='png')

            plt.clf()

        if writeLog:
            with open(log_path + prefix + '.log', 'a') as f:
                time_elapsed = timing.time() - start_time
                num_flares = len(FL[0])

                f.write('{:^15}'.format(str(k+1) + '/' + str(len(filenames))) + \
                        '{:<60}'.format(filename) + '{:<20}'.format(time_elapsed) + \
                        '{:<10}'.format(num_flares) + '\n')

        # Periodically write to the flare table file and param table file
        l = k + 1
        ALL_TIC = pd.Series(filenames).str.split(
            '-', expand=True).iloc[:, -3].astype('int')
        ALL_FILES = pd.Series(filenames).str.split('/', expand=True).iloc[:,
                                                                          -1]

        flare_out = pd.DataFrame(data={'file':FL_files,'TIC':FL_TICs, 't0':FL_t0, 't1':FL_t1, \
                                    'med_flux':FL_f0, 'peak_flux':FL_f1, 'ed':FL_ed, \
                                    'ed_err':FL_ed_err, 'skew':FL_skew, 'cover':FL_cover, \
                                    'mu':FL_mu, 'std':FL_std, 'g_amp': FL_g_amp, 'mu_err':FL_mu_err, \
                                    'std_err':FL_std_err, 'g_amp_err':FL_g_amp_err,'tpeak':FL_tpeak, \
                                    'fwhm':FL_fwhm, 'f_amp':FL_f_amp, 'tpeak_err':FL_tpeak_err, \
                                    'fwhm_err':FL_fwhm_err, 'f_amp_err':FL_f_amp_err,'f_chisq':FL_f_chisq, \
                                    'g_chisq':FL_g_chisq, 'f_fwhm_win':FL_f_fwhm_win, 'g_fwhm_win':FL_g_fwhm_win})
        flare_out.to_csv(log_path + prefix + '_flare_out.csv', index=False)

        param_out = pd.DataFrame(data={'file':ALL_FILES[:l], 'TIC':ALL_TIC[:l], 'med':P_median[:l], \
                                    's_window':P_s_window[:l], 'acf_1dt':P_acf_1dt[:l], 'acf_amp':P_acf_amp[:l]})
        param_out.to_csv(log_path + prefix + '_param_out.csv', index=False)

    for k in range(len(failed_files)):
        print(failed_files[k])
Beispiel #12
0
def coreset_single(N, D, dist, algn):
    # sys.stderr.write('n: ' + str(N) + ' d: ' +str(D) + ' dist: ' + str(dist) + ' salgn: ' + str(algn) + '\n')
    x, mu0, Sig0, Sig = gendata(N, D, dist)
    Sig0inv = np.linalg.inv(Sig0)
    Siginv = np.linalg.inv(Sig)
    mup, Sigp = weighted_post(mu0, np.linalg.inv(Sig0), np.linalg.inv(Sig), x,
                              np.ones(x.shape[0]))
    anm, alg = algn
    coreset = alg(x, mu0, Sig0, Sig)

    # incremental M tests
    prev_err = np.inf
    for m in range(1, N + 1):
        coreset.build(m)
        muw, Sigw = weighted_post(mu0, Sig0inv, Siginv, x, coreset.weights())
        w = coreset.weights()
        # check if coreset for 1 datapoint is immediately optimal
        if x.shape[0] == 1:
            assert np.fabs(
                w - np.array([1])
            ) < tol, anm + " failed: coreset not immediately optimal with N = 1. weights: " + str(
                coreset.weights()) + " mup = " + str(mup) + " Sigp = " + str(
                    Sigp) + " muw = " + str(muw) + " Sigw = " + str(Sigw)
            # check if coreset is valid
        assert (w > 0.).sum() <= m, anm + " failed: coreset size > m"
        assert (w > 0.).sum() == coreset.size(
        ), anm + " failed: sum of coreset.weights()>0  not equal to size(): sum = " + str(
            (coreset.weights() > 0).sum()) + " size(): " + str(coreset.size())
        assert np.all(w >= 0.), anm + " failed: coreset has negative weights"

        # check if actual output error is monotone
        err = weighted_post_KL(mu0,
                               Sig0inv,
                               Siginv,
                               x,
                               w,
                               reverse=True if 'Reverse' in anm else False)
        assert err - prev_err < tol, anm + " failed: error is not monotone decreasing, err = " + str(
            err) + " prev_err = " + str(prev_err)

        # check if coreset is computing error properly
        assert np.fabs(
            coreset.error() - err
        ) < tol, anm + " failed: error est is not close to true err: est err = " + str(
            coreset.error()) + ' true err = ' + str(err)

        prev_err = err
    # save incremental M result
    w_inc = coreset.weights()

    # check reset
    coreset.reset()
    err = weighted_post_KL(mu0,
                           Sig0inv,
                           Siginv,
                           x,
                           np.zeros(x.shape[0]),
                           reverse=True if 'Reverse' in anm else False)
    assert coreset.M == 0 and np.all(np.fabs(coreset.weights(
    )) == 0.) and np.fabs(
        coreset.error() - err
    ) < tol and not coreset.reached_numeric_limit, anm + " failed: reset() did not properly reset"

    # check build up to N all at once vs incremental
    # do this test for all except bin, where symmetries can cause instabilities in the choice of vector / weights
    if dist != 'bin':
        coreset.build(N)
        w = coreset.weights()
        err = weighted_post_KL(mu0,
                               Sig0inv,
                               Siginv,
                               x,
                               w,
                               reverse=True if 'Reverse' in anm else False)
        err_inc = weighted_post_KL(mu0,
                                   Sig0inv,
                                   Siginv,
                                   x,
                                   w_inc,
                                   reverse=True if 'Reverse' in anm else False)
        assert np.sqrt(
            ((w - w_inc)**2).sum()
        ) < tol, anm + " failed: incremental buid up to N doesn't produce same result as one run at N : \n error = " + str(
            err) + " error_inc = " + str(err_inc)
    # check if coreset with all_data_wts is optimal
    coreset._update_weights(coreset.all_data_wts)
    assert coreset.error(
    ) < tol, anm + " failed: coreset with all_data_wts does not have error 0"