def process_tsg_data(): combined_df_list = [] hot_cruise_list = [] for fil in tqdm(hot_tsg_flist): df = pd.read_csv( fil, delim_whitespace=True, names=[ "year", "decimal_year_day", "lon", "lat", "temperature", "salinity", "quality_flag", ], ) df.fillna("", inplace=True) df["temperature_flag"] = df.quality_flag.astype(str).str[0] df["salinity_flag"] = df.quality_flag.astype(str).str[1:] hot_cruise = fil.split("tsg/")[1].split(".dat")[0].split("ts")[0] if "hot" in hot_cruise: cruise_id = "HOT" + hot_cruise.split("hot")[1].zfill(3) elif "ac" in hot_cruise: cruise_id = "AC" + hot_cruise.split("ac")[1].zfill(3) elif "ha" in hot_cruise: cruise_id = "HA" + hot_cruise.split("ha")[1].zfill(3) elif "bts" in hot_cruise: cruise_id = "HOT" + hot_cruise.split("hot")[1].zfill(3) + "b" hot_cruise_list.append(cruise_id) df["cruise"] = cruise_id df["time"] = ( pd.to_datetime(df.year, format="%Y") - pd.Timedelta(days=1) + pd.to_timedelta(df["decimal_year_day"], unit="d") ) df = df[ [ "time", "lat", "lon", "temperature", "salinity", "temperature_flag", "salinity_flag", "cruise", ] ] df = data.clean_data_df(df) combined_df_list.append(df) combined_df = pd.concat(combined_df_list, axis=0, ignore_index=True) combined_df.to_csv(vs.staging + "combined/HOT_TSG_data.csv", index=False)
from cmapingest import data xdf = xr.open_dataset(vs.collected_data + "KNOX22RR_flow_cytometry/picoplankton.nc") df = xdf.to_dataframe().reset_index(drop=True) df = df.applymap(lambda x: x.decode() if isinstance(x, bytes) else x) df["time"] = pd.to_datetime( df["date"].astype(int).astype(str).str.zfill(8) + " " + df["time"].astype(int).astype(str), format="%Y%m%d %H%M%S", ) df.rename(columns={"Depth": "depth"}, inplace=True) df = df[ [ "time", "lat", "lon", "depth", "station", "Syn", "Pro", "Pico_Euk", "Total_Cyano", "Total_picophytoplankton", "HB", ] ] df = data.clean_data_df(df) df.to_excel(vs.staging + "combined/" + "KNOX22RR_flow_cytometry.xlsx", index=False)
"speed_contour_longitude", "speed_contour_shape_error", "speed_radius", "track", "uavg_profile", "year", "month", "week", "dayofyear", ]] combined_eddy.rename(columns={ "latitude": "lat", "longitude": "lon" }, inplace=True) combined_eddy = data.clean_data_df(combined_eddy) combined_eddy = data.mapTo180180(combined_eddy) def test_eddy_age_calc(combined_eddy): randtrack = combined_eddy.track.sample(1).iloc[0] test_eddy = combined_eddy[combined_eddy["track"] == randtrack] test_eddy_age = test_eddy["eddy_age"].iloc[0] comp_eddy_age = (test_eddy["time"].max() - test_eddy["time"].min()).days assert comp_eddy_age == test_eddy_age, "eddy ages do not match" test_eddy_age_calc(combined_eddy) def write_metadata():