Exemple #1
0
 def bin(self, var):
     import binning
     x = self.data.xs(var, 1, 'field', False)
     self.binned = pd.concat([
         binning.bin(x.xs(i, 1, 'station', False).dropna(0, 'all'))
         for i in self.data.columns.get_level_values('station').unique()
     ], 1)
Exemple #2
0
def bin_pca(df_pca, bool_df, df_cont, b_pca):

    if b_pca=='True':
        pca_leng = {}

        # df_pca.drop(['ABANDONED'], inplace=True,axis=1, errors='ignore')
        print df_cont.dtypes
        lists=list(df_cont)
        print lists

        for k in lists:

            bool = pd.DataFrame(bool_df['ABANDONED'], columns=['ABANDONED'])
            # print bool
            # bool.reset_index(level=['CUSTOMER_KEY'], inplace=True)

            df_bin = pd.concat([df_cont[k], bool['ABANDONED']], axis=1)


            dict = bin(df_bin)

            leng = len(dict)

            pca_level = 'pcl_'
            labels = [pca_level + `r` for r in range(leng)]

            df_pca[k]= pd.cut(df_cont[k], bins=leng, labels=labels, include_lowest=True)
            print df_cont[k]
            pca_leng[k] = leng
Exemple #3
0
def main():
    """
    TKS prioritization code.

    Example command line call:
        python prioritize.py toi+-2019-11-14.csv exofop_search_2019-11-14_combined.csv
    """
    # Handle the command line input
    args = handle_args()

    toi_fname = os.path.join("data/toi", args.toi_fname)
    exo_fname = os.path.join("data/exofop", args.exofop_fname)
    planet_df = load_and_merge(toi_fname, exo_fname)
    toi_col_dict = load_toi_col_names(args.toi_col_dict)

    # Clean up the resulting df a little bit
    planet_df = planet_df.drop(columns=['TFOP SG1a','TFOP SG1b','TFOP SG2',
                                        'TFOP SG3','TFOP SG4','TFOP SG5','TFOP Master',
                                        'TOI Disposition'])
    planet_df = planet_df.drop_duplicates(subset="Full TOI ID").sort_values("Full TOI ID")
    # Add a column for stellar mass calculated from surface gravity
    # N.B. the Exofop data contains stellar mass, but for rows that are not matched
    # with Exofop data, give them a stellar mass.
    planet_df[toi_col_dict["ms_key"]] = (10**planet_df["Surface Gravity Value"] * (planet_df[toi_col_dict["rs_key"]] * const.R_sun)\
                                                                                / const.G) / const.M_sun

    # Remove rows from the df that don't meet these three criteria
    planet_df = planet_df[np.logical_and.reduce((planet_df[toi_col_dict["rp_key"]] > 0,
                                                 planet_df[toi_col_dict["pp_key"]] > 0,
                                                 planet_df["Source Pipeline"] == "spoc"))]

    # Add a column for the ratio of the planet's semi-major orbital axis and the radius of the host star
    # This column is needed for calculating the TSM (but don't need it if calculating equilibrium temperature via insolation flux)
    planet_df["ar_ratio"] = ar_ratio(planet_df[toi_col_dict["pp_key"]], planet_df[toi_col_dict["ms_key"]], planet_df[toi_col_dict["rs_key"]])
    planet_df = planet_df.reset_index(drop = True) # Reset the indices because some rows might've been removed

    # Estimate planet masses given radii
    planet_df[toi_col_dict["mp_key"]] = chen_kipping_louie_mass(planet_df[toi_col_dict["rp_key"]])

    # Estimate K amplitude of RV observation
    planet_df["K_amp"] = k_amp(planet_df[toi_col_dict["pp_key"]], planet_df[toi_col_dict["mp_key"]], planet_df[toi_col_dict["ms_key"]])

    # Cull the sample for systems that are observable by Keck in both dec and RV resolution (above -20 degrees dec, > 2 m/s k_amp)
    desirable_inds = np.logical_and(planet_df[toi_col_dict["dec_key"]] > -20, planet_df['K_amp'] > 2)
    planet_df = planet_df[desirable_inds]
    planet_df = planet_df.reset_index(drop = True)
    
    # Calculate TSM values
    if args.use_TSM_natalie:
        planet_df["TSM"] = calculate_TSM_natalie(planet_df[toi_col_dict["rp_key"]],
                                         planet_df[toi_col_dict["rs_key"]],
                                         planet_df[toi_col_dict["Ts_key"]],
                                         planet_df[toi_col_dict["Jmag_key"]],
                                         planet_df[toi_col_dict["mp_key"]],
                                         planet_df[toi_col_dict["Fp_key"]])
    else:
        planet_df["TSM"] = calculate_TSM(planet_df[toi_col_dict["rp_key"]],
                                         planet_df[toi_col_dict["rs_key"]],
                                         planet_df[toi_col_dict["Ts_key"]],
                                         planet_df[toi_col_dict["Jmag_key"]],
                                         planet_df[toi_col_dict["mp_key"]],
                                         planet_df[toi_col_dict["ar_key"]])


    ##### Copied from Nicholas' notebook #####
    ##########################################
    rad_bins = 10**(np.linspace(0,1,6))
    fpl_bins = 10**(np.linspace(-1,4,6))
    tef_bins = np.array([2500,3900,5200,6500])
    all_bins = [rad_bins, fpl_bins, tef_bins]
    id_key   = "Full TOI ID"
    binned   = bin(toi_col_dict, planet_df, all_bins, id_key, "TSM")
    priority = int(args.priority)
    my_tois  = binned[binned["priority"]==priority].reset_index(drop=True).sort_values(id_key)[id_key].values
    ##########################################
    ##########################################

    print("Priority {} targets: \n{}".format(priority, my_tois))
    output_fname = args.output_fname + "_priority_{}.txt".format(priority)
    np.savetxt(output_fname, my_tois, fmt='%.2f')
    print("Binning sucessful, output stored in {}".format(output_fname))
Exemple #4
0
            r = pd.concat([self.regression(b, y)
                           for b in self.blocks]).sort_index()
        else:
            r = self.regression(pd.concat(self.blocks, 1).sort_index(), y)
        r = pd.DataFrame(r)
        if isinstance(y, pd.Series):
            r.columns = pd.MultiIndex.from_tuples([y.name],
                                                  names=x.columns.names)
        else:
            r.columns = y.columns
        return r


if __name__ == "__main__":
    import binning, data

    D = data.Data()
    D.open('r', 's_raw.h5')
    X = binning.bin(D.r).xs('avg', 1, 'aggr')
    X = X - X.mean()

    y = X.xs('3', 1, 'station', False).iloc[:, 0].dropna()
    x = X.drop(y.name, 1).loc[y.index]

    # b = tree_blocks(x)
    # a = block_predictors(x, b)
    # a0 = pd.concat(a, 1).fillna(0)
    # r1 = np.linalg.lstsq(a0, y)
    # r2 = pd.concat([regression(c, y) for c in a], 0).sort_index()
    # r3 = affinity_regression(x, y)
Exemple #5
0
X = df.values
X_cont = X[:, :]
# print X_cont
pca = PCA()
pca.fit(X_cont)
x3 = pca.transform(X_cont)
string = "pca_"
pca_column_name = [string + ` i ` for i in range(x3.shape[1])]

# print pca_column_name
pca_df = pd.DataFrame(x3, columns=pca_column_name)
pca_array = []
for i in pca_df.columns:
    y = 'ABANDONED_FLAG'
    df_bin = pd.concat((pca_df[i], df_bool[y]), axis=1)
    dict = bin(df_bin, y)
    leng = len(dict)
    if leng > 2:
        pca_level = 'pcl_'
        labels = [pca_level + ` r ` for r in range(leng)]
        pca_df[i] = pd.cut(pca_df[i],
                           bins=leng,
                           labels=labels,
                           include_lowest=True)
    elif leng <= 2:
        pca_df.drop([i], axis=1, inplace=True)

pca_dummies = pd.get_dummies(pca_df)
df_char = pd.read_csv(
    "cs_loyal_sample.csv",
    sep='\t',