Python clean_data_df Examples

Programming Language: Python

Namespace/Package Name: cmapingest.data

Method/Function: clean_data_df

Examples at hotexamples.com: 3

Python clean_data_df - 3 examples found. These are the top rated real world Python examples of cmapingest.data.clean_data_df extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def process_tsg_data():
    combined_df_list = []
    hot_cruise_list = []
    for fil in tqdm(hot_tsg_flist):
        df = pd.read_csv(
            fil,
            delim_whitespace=True,
            names=[
                "year",
                "decimal_year_day",
                "lon",
                "lat",
                "temperature",
                "salinity",
                "quality_flag",
            ],
        )
        df.fillna("", inplace=True)
        df["temperature_flag"] = df.quality_flag.astype(str).str[0]
        df["salinity_flag"] = df.quality_flag.astype(str).str[1:]
        hot_cruise = fil.split("tsg/")[1].split(".dat")[0].split("ts")[0]
        if "hot" in hot_cruise:
            cruise_id = "HOT" + hot_cruise.split("hot")[1].zfill(3)
        elif "ac" in hot_cruise:
            cruise_id = "AC" + hot_cruise.split("ac")[1].zfill(3)
        elif "ha" in hot_cruise:
            cruise_id = "HA" + hot_cruise.split("ha")[1].zfill(3)
        elif "bts" in hot_cruise:
            cruise_id = "HOT" + hot_cruise.split("hot")[1].zfill(3) + "b"

        hot_cruise_list.append(cruise_id)
        df["cruise"] = cruise_id
        df["time"] = (
            pd.to_datetime(df.year, format="%Y")
            - pd.Timedelta(days=1)
            + pd.to_timedelta(df["decimal_year_day"], unit="d")
        )
        df = df[
            [
                "time",
                "lat",
                "lon",
                "temperature",
                "salinity",
                "temperature_flag",
                "salinity_flag",
                "cruise",
            ]
        ]
        df = data.clean_data_df(df)
        combined_df_list.append(df)
    combined_df = pd.concat(combined_df_list, axis=0, ignore_index=True)
    combined_df.to_csv(vs.staging + "combined/HOT_TSG_data.csv", index=False)

Example #2

Show file

from cmapingest import data

xdf = xr.open_dataset(vs.collected_data + "KNOX22RR_flow_cytometry/picoplankton.nc")
df = xdf.to_dataframe().reset_index(drop=True)
df = df.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df["time"] = pd.to_datetime(
    df["date"].astype(int).astype(str).str.zfill(8)
    + " "
    + df["time"].astype(int).astype(str),
    format="%Y%m%d %H%M%S",
)
df.rename(columns={"Depth": "depth"}, inplace=True)

df = df[
    [
        "time",
        "lat",
        "lon",
        "depth",
        "station",
        "Syn",
        "Pro",
        "Pico_Euk",
        "Total_Cyano",
        "Total_picophytoplankton",
        "HB",
    ]
]
df = data.clean_data_df(df)
df.to_excel(vs.staging + "combined/" + "KNOX22RR_flow_cytometry.xlsx", index=False)

Example #3

Show file

File: eddy_nrt.py Project: simonscmap/cmapprocess

    "speed_contour_longitude",
    "speed_contour_shape_error",
    "speed_radius",
    "track",
    "uavg_profile",
    "year",
    "month",
    "week",
    "dayofyear",
]]
combined_eddy.rename(columns={
    "latitude": "lat",
    "longitude": "lon"
},
                     inplace=True)
combined_eddy = data.clean_data_df(combined_eddy)
combined_eddy = data.mapTo180180(combined_eddy)


def test_eddy_age_calc(combined_eddy):
    randtrack = combined_eddy.track.sample(1).iloc[0]
    test_eddy = combined_eddy[combined_eddy["track"] == randtrack]
    test_eddy_age = test_eddy["eddy_age"].iloc[0]
    comp_eddy_age = (test_eddy["time"].max() - test_eddy["time"].min()).days
    assert comp_eddy_age == test_eddy_age, "eddy ages do not match"


test_eddy_age_calc(combined_eddy)


def write_metadata():