def compiler(): files = ["TOA5_2878.WGcontrol10Hz.dat", "TOA5_2879.ts_data.dat",\ "TOA5_3884.ts_data.dat", "TOA5_4390.ts_data.dat", \ "TOA5_4975.ts_data.dat", "TOA5_4976.ts_data.dat", \ "TOA5_10442.ts_data.dat", "TOA5_11584.ts_data.dat",\ "TOA5_11585.ts_data.dat"] file_num = [ "2878", "2879", "3884", "4390", "4975", "4976", "10442", "11584", "11585" ] ### First Loading the files into the script df_2878 = file_to_df(path, files[0]) df_2879 = file_to_df(path, files[1]) df_3884 = file_to_df(path, files[2]) df_4390 = file_to_df(path, files[3]) df_4975 = file_to_df(path, files[4]) df_4976 = file_to_df(path, files[5]) df_10442 = file_to_df(path, files[6]) df_11584 = file_to_df(path, files[7]) df_11585 = file_to_df(path, files[8]) ### List of dataframes that is needed df_names = [ df_2878, df_2879, df_3884, df_4390, df_4975, df_4976, df_10442, df_11584, df_11585 ] if Burn == 19: #t_s,t_e = timestamp_matcher(df_names,file_num) t_s = "2018-09-22 15:26:15" t_e = "2018-09-22 18:25:21" if Burn == 18: t_s = "2018-09-22 09:05:42" t_e = "2018-09-22 15:19:33" #trim_df = input("Would you like to trim the data to these timestamps? (y/n):") trim_df = "y" if trim_df.lower() == "y": df_2878 = cutter(df_2878, t_s, t_e) df_2879 = cutter(df_2879, t_s, t_e) df_3884 = cutter(df_3884, t_s, t_e) df_4390 = cutter(df_4390, t_s, t_e) df_4975 = cutter(df_4975, t_s, t_e) df_4976 = cutter(df_4976, t_s, t_e) df_10442 = cutter(df_10442, t_s, t_e) df_11584 = cutter(df_11584, t_s, t_e) df_11585 = cutter(df_11585, t_s, t_e) df_names = [ df_2878, df_2879, df_3884, df_4390, df_4975, df_4976, df_10442, df_11584, df_11585 ] #check = input("Would you like to check for repeated timestamps? Note: if there are repeats, it could take a while (y/n):") check = "y" if check == "y": end_repeat_times = [] for i in range(len(df_names)): print(file_num[i] + ':') end_repeat_times.append( df_names[i]["TIMESTAMP"][repeat(df_names[i])[-1][-1] + 1]) print("Cut here:", max(end_repeat_times)) t_s = max(end_repeat_times) #t_e = pd.TimeStamp("2018-09-22 15:19:33.500000") #cut_out_repeat = input("Would you like to cut all data at the end of the repeated times? (y/n):") cut_out_repeat = "y" if cut_out_repeat == "y": df_2878 = continuous_df(cutter(df_2878, t_s, t_e), t_s, t_e) df_2879 = continuous_df(cutter(df_2879, t_s, t_e), t_s, t_e) df_3884 = continuous_df(cutter(df_3884, t_s, t_e), t_s, t_e) df_4390 = continuous_df(cutter(df_4390, t_s, t_e), t_s, t_e) df_4975 = continuous_df(cutter(df_4975, t_s, t_e), t_s, t_e) df_4976 = continuous_df(cutter(df_4976, t_s, t_e), t_s, t_e) df_10442 = continuous_df(cutter(df_10442, t_s, t_e), t_s, t_e) df_11584 = continuous_df(cutter(df_11584, t_s, t_e), t_s, t_e) df_11585 = continuous_df(cutter(df_11585, t_s, t_e), t_s, t_e) ### Making sure that the data is continuous df_names = [ df_2878, df_2879, df_3884, df_4390, df_4975, df_4976, df_10442, df_11584, df_11585 ] # for df in range(len(df_names)): # print("File",file_num[df],":") # df_names[df] = continuous_df(df_names[df], t_s, t_e) ### Grabbing Sonic data from specific files sonic_columns = ["Ux_", "Uy_", "Uz_", "Ts_", "diag_rmy_"] time_columns_lst = ["YYYY", "MM", "DD", "Hr", "Min", "Sec"] sonc_headers = ["U", "V", "W", "T", "DIAG"] df_A1, df_A2, df_A3, df_A4 = pd.DataFrame(), pd.DataFrame(), pd.DataFrame( ), pd.DataFrame() df_B1, df_B2, df_B3, df_B4 = pd.DataFrame(), pd.DataFrame(), pd.DataFrame( ), pd.DataFrame() df_C1, df_C2, df_C3, df_C4 = pd.DataFrame(), pd.DataFrame(), pd.DataFrame( ), pd.DataFrame() df_D1, df_D2, df_D3, df_D4 = pd.DataFrame(), pd.DataFrame(), pd.DataFrame( ), pd.DataFrame() a_row_lst = [df_A1, df_A2, df_A3, df_A4] b_row_lst = [df_B1, df_B2, df_B3, df_B4] c_row_lst = [df_C1, df_C2, df_C3, df_C4] d_row_lst = [df_D1, df_D2, df_D3, df_D4] all_sonics = a_row_lst + b_row_lst + c_row_lst + d_row_lst ### Burns truss: #seperate_time = "n" if seperate_time == "y": df_2879_time, df_4975_time = time_columns(df_2879), time_columns( df_4975) df_4976_time, df_11585_time = time_columns(df_4976), time_columns( df_11585) df_10442_time, df_3884_time = time_columns(df_10442), time_columns( df_3884) df_11584_time, df_4390_time = time_columns(df_11584), time_columns( df_4390) ### WG Nover 10hz df_WGNover = pd.DataFrame() if seperate_time == "y": df_2878_time = time_columns(df_2878) for t in time_columns_lst: df_WGNover[t] = df_2878_time[t] if seperate_time == "n": df_WGNover["TIMESTAMP"] = df_2878["TIMESTAMP"] for col in range(len(sonic_columns)): df_WGNover[sonc_headers[col]] = df_2878[sonic_columns[col] + "1"] for n in range(len(a_row_lst)): if seperate_time == "y": for i in range(len(time_columns_lst)): a_row_lst[n][time_columns_lst[i]] = df_2879_time[ time_columns_lst[i]] b_row_lst[n][time_columns_lst[i]] = df_4975_time[ time_columns_lst[i]] c_row_lst[n][time_columns_lst[i]] = df_4976_time[ time_columns_lst[i]] d_row_lst[n][time_columns_lst[i]] = df_11585_time[ time_columns_lst[i]] if seperate_time == "n": a_row_lst[n]["TIMESTAMP"] = df_2879["TIMESTAMP"] b_row_lst[n]["TIMESTAMP"] = df_4975["TIMESTAMP"] c_row_lst[n]["TIMESTAMP"] = df_4976["TIMESTAMP"] d_row_lst[n]["TIMESTAMP"] = df_11585["TIMESTAMP"] for i in range(len(sonic_columns)): a_row_lst[n][sonc_headers[i]] = df_2879[sonic_columns[i] + str(n + 1)] b_row_lst[n][sonc_headers[i]] = df_4975[sonic_columns[i] + str(n + 1)] c_row_lst[n][sonc_headers[i]] = df_4976[sonic_columns[i] + str(n + 1)] d_row_lst[n][sonc_headers[i]] = df_11585[sonic_columns[i] + str(n + 1)] #### Thermal Couple data time_columns_lst = ["YYYY", "MM", "DD", "Hr", "Min", "Sec"] df_B1_tc, df_B2_tc, df_B3_tc, df_B4_tc = pd.DataFrame(), pd.DataFrame( ), pd.DataFrame(), pd.DataFrame() df_C1_tc, df_C2_tc, df_C3_tc, df_C4_tc = pd.DataFrame(), pd.DataFrame( ), pd.DataFrame(), pd.DataFrame() df_B5_tc, df_B6_tc, df_B7_tc = pd.DataFrame(), pd.DataFrame( ), pd.DataFrame() df_C5_tc, df_C6_tc, df_C7_tc = pd.DataFrame(), pd.DataFrame( ), pd.DataFrame() t_c_lst_1 = [ "Temp_C(1)", "Temp_C(2)", "Temp_C(3)", "Temp_C(4)", "Temp_C(5)", "Temp_C(6)", "Temp_C(7)" ] t_c_lst_2 = [ "Temp_C(8)", "Temp_C(9)", "Temp_C(10)", "Temp_C(11)", "Temp_C(12)", "Temp_C(13)", "Temp_C(14)" ] first_tc_group = [ df_B2_tc, df_B6_tc, df_B1_tc, df_B5_tc,\ df_C1_tc, df_C5_tc, df_C2_tc, df_C6_tc] secnd_tc_group = [ df_B4_tc, df_B3_tc, df_B7_tc,\ df_C3_tc, df_C7_tc, df_C4_tc] df_tc_lst_1 = [df_2879, df_10442, df_4975, df_3884, df_4976, df_11584,\ df_11585, df_4390] if seperate_time == "y": df_time_lst_1 =[df_2879_time, df_10442_time,df_4975_time,df_3884_time,\ df_4976_time, df_11584_time, df_11585_time, df_4390_time] for j in range(len(first_tc_group)): if seperate_time == "y": for t in range(len(time_columns_lst)): first_tc_group[j][time_columns_lst[t]] = df_time_lst_1[j][ time_columns_lst[t]] if seperate_time == "n": first_tc_group[j]["TIMESTAMP"] = df_tc_lst_1[j]["TIMESTAMP"] for i in range(len(t_c_lst_1)): first_tc_group[j][t_c_lst_1[i]] = df_tc_lst_1[j][t_c_lst_1[i]] df_tc_lst_2 = [df_2879, df_4975, df_3884, df_4976, df_11584, df_11585] if seperate_time == "y": df_time_lst_2 = [df_2879_time, df_4975_time,df_3884_time,df_4976_time,\ df_11584_time, df_11585_time] for j in range(len(secnd_tc_group)): if seperate_time == "y": for t in range(len(time_columns_lst)): secnd_tc_group[j][time_columns_lst[t]] = df_time_lst_2[j][ time_columns_lst[t]] if seperate_time == "n": secnd_tc_group[j]["TIMESTAMP"] = df_tc_lst_2[j]["TIMESTAMP"] for i in range(len(t_c_lst_2)): secnd_tc_group[j][t_c_lst_2[i]] = df_tc_lst_2[j][t_c_lst_2[i]] all_tc_group = [df_B1_tc, df_B2_tc, df_B3_tc, df_B4_tc, df_B5_tc,\ df_B6_tc, df_B7_tc,df_C1_tc, df_C2_tc, df_C3_tc, df_C4_tc,\ df_C5_tc, df_C6_tc, df_C7_tc] return all_sonics, all_tc_group, df_WGNover
def compiler20_35(path, t_s, t_e, fill_nan, mk_contins="y", sep_time_cols="y"): """ This compiler is used to extract the data from the raw data loggers, cut each to the same starting and ending time, make the data a continous time column and fill nan values. Use this for 10x10m SERDP Burns 20-35. Parameters ---------- path : str Location of the Burn directory containing the raw datalogger files t_s : str or (pandas.Timestamp()) starting timestamp that all the loggers contain and after the repeated timestamps during datalogger's start-up t_e : str or (pandas.Timestamp()) ending timestamp that all the data loggers contain fill_nan : int or str or float (np.nan also exceptable) the desired value to replace the NaN values mk_contins : "y" or "n", optional This is an option to make the dataframes with a continous timestamp column filled with the desired nan value. The default is "y" (to make the timestamps continuous). sep_time_cols : "y" or "n", optional This is an option to have a seperated timestamp column or a single timestamp column. If the output files will be loaded into excel, it's recommended to have them seperated ("y"). The default is "y". HEADER option: "TIMESTAMP" ("n") or "YYYY", "MM","DD","Hr","Min","Sec" ("y") Returns ------- all_sonics : list of pandas.DataFrame()'s This is the list of the output sonics, A1 through D4. all_tc_group : list of pandas.DataFrame()'s This is the list of thermocouples, B1-C7 df_WGNover : pandas.DataFrame() This is the observational tower located outside the 10x10m truss. """ files = ["TOA5_4976.ts_data.dat", "TOA5_4975.ts_data.dat", \ "TOA5_11585.ts_data.dat", "TOA5_2879.ts_data.dat", \ "TOA5_4390.ts_data.dat", "TOA5_2005.ts_data.dat", \ "TOA5_2878.ts_data.dat", "TOA5_11584.ts_data.dat",\ "TOA5_10442.ts_data.dat"] ### First Loading the files into the script df_4976, df_4975 = file_to_df(path, files[0]), file_to_df(path, files[1]) df_11585, df_2879 = file_to_df(path, files[2]), file_to_df(path, files[3]) df_4390, df_2005 = file_to_df(path, files[4]), file_to_df(path, files[5]) df_2878, df_11584 = file_to_df(path, files[6]), file_to_df(path, files[7]) df_10442 = file_to_df(path, files[8]) if mk_contins.lower() != str("y"): df_4976 = (df_4976, t_s, t_e) df_4975 = cutter(df_4975, t_s, t_e) df_11585 = cutter(df_11585, t_s, t_e) df_2879 = cutter(df_2879, t_s, t_e) df_4390 = cutter(df_4390, t_s, t_e) df_2005 = cutter(df_2005, t_s, t_e) df_2878 = cutter(df_2878, t_s, t_e) df_11584 = cutter(df_11584, t_s, t_e) df_10442 = cutter(df_10442, t_s, t_e) if mk_contins.lower() == "y": fmt = "Datalogger {}:" print(fmt.format(files[0].split(".")[0].split("_")[1])) df_4976 = continuous_df(cutter(df_4976, t_s, t_e), t_s, t_e) print(fmt.format(files[1].split(".")[0].split("_")[1])) df_4975 = continuous_df(cutter(df_4975, t_s, t_e), t_s, t_e) print(fmt.format(files[2].split(".")[0].split("_")[1])) df_11585 = continuous_df(cutter(df_11585, t_s, t_e), t_s, t_e) print(fmt.format(files[3].split(".")[0].split("_")[1])) df_2879 = continuous_df(cutter(df_2879, t_s, t_e), t_s, t_e) print(fmt.format(files[4].split(".")[0].split("_")[1])) df_4390 = continuous_df(cutter(df_4390, t_s, t_e), t_s, t_e) print(fmt.format(files[5].split(".")[0].split("_")[1])) df_2005 = continuous_df(cutter(df_2005, t_s, t_e), t_s, t_e) print(fmt.format(files[6].split(".")[0].split("_")[1])) df_2878 = continuous_df(cutter(df_2878, t_s, t_e), t_s, t_e) print(fmt.format(files[7].split(".")[0].split("_")[1])) df_11584 = continuous_df(cutter(df_11584, t_s, t_e), t_s, t_e) print(fmt.format(files[8].split(".")[0].split("_")[1])) df_10442 = continuous_df(cutter(df_10442, t_s, t_e), t_s, t_e) ### Initialized the list and dataframes to append to sonic_columns, time_columns_lst, a_row_lst, b_row_lst, c_row_lst, \ d_row_lst, sonc_headers, all_sonics, df_B1_tc, df_B2_tc, df_B3_tc, \ df_B4_tc, df_C1_tc, df_C2_tc, df_C3_tc, df_C4_tc, t_c_lst_out, \ t_c_lst_1, t_c_lst_2= initializing_df() df_WGNover = pd.DataFrame() if sep_time_cols.lower() == "y": time_columns_lst = ["YYYY", "MM", "DD", "Hr", "Min", "Sec"] df_4976_time, df_4975_time = time_columns(df_4976), time_columns( df_4975) df_11585_time, df_2879_time = time_columns(df_11585), time_columns( df_2879) df_4390_time, df_2005_time = time_columns(df_4390), time_columns( df_2005) df_2878_time, df_11584_time = time_columns(df_2878), time_columns( df_11584) ### WG Nover 10hz df_10442_time = time_columns(df_10442) for t in time_columns_lst: df_WGNover[t] = df_10442_time[t] if sep_time_cols.lower() != "y": time_columns_lst = ["TIMESTAMP"] df_WGNover["TIMESTAMP"] = df_10442["TIMESTAMP"] for col in range(len(sonic_columns)): df_WGNover[sonc_headers[col]] = df_10442[sonic_columns[col] + "1"] df_WGNover.fillna(value=fill_nan, inplace=True) for n in range(len(a_row_lst)): if sep_time_cols.lower() == "y": for i in range(len(time_columns_lst)): a_row_lst[n][time_columns_lst[i]] = df_4976_time[ time_columns_lst[i]] b_row_lst[n][time_columns_lst[i]] = df_4975_time[ time_columns_lst[i]] c_row_lst[n][time_columns_lst[i]] = df_11585_time[ time_columns_lst[i]] d_row_lst[n][time_columns_lst[i]] = df_2879_time[ time_columns_lst[i]] if sep_time_cols.lower() != "y": a_row_lst[n]["TIMESTAMP"] = df_4976["TIMESTAMP"] b_row_lst[n]["TIMESTAMP"] = df_4975["TIMESTAMP"] c_row_lst[n]["TIMESTAMP"] = df_11585["TIMESTAMP"] d_row_lst[n]["TIMESTAMP"] = df_2879["TIMESTAMP"] for i in range(len(sonic_columns)): a_row_lst[n][sonc_headers[i]] = df_4976[sonic_columns[i] + str(n + 1)] b_row_lst[n][sonc_headers[i]] = df_4975[sonic_columns[i] + str(n + 1)] c_row_lst[n][sonc_headers[i]] = df_11585[sonic_columns[i] + str(n + 1)] d_row_lst[n][sonc_headers[i]] = df_2879[sonic_columns[i] + str(n + 1)] ####################### Thermalcouples ################################## df_B5_tc, df_B6_tc, df_B7_tc = pd.DataFrame(), pd.DataFrame( ), pd.DataFrame() df_C5_tc, df_C6_tc, df_C7_tc = pd.DataFrame(), pd.DataFrame( ), pd.DataFrame() first_tc_group = [ df_B5_tc, df_B7_tc, df_B1_tc, df_B3_tc,\ df_C5_tc, df_C7_tc, df_C1_tc, df_C3_tc] secnd_tc_group = [ df_B6_tc, df_B2_tc, df_B4_tc,\ df_C6_tc, df_C2_tc, df_C4_tc] df_tc_lst_1 = [df_4976, df_4390, df_4975, df_2005, df_11585, df_2878,\ df_2879, df_11584] df_tc_lst_2 = [df_4976, df_4975, df_2005, df_11585, df_2879, df_11584] if sep_time_cols.lower() == "y": ### Order that data fits the tc array df_time_lst_1 =[df_4976_time, df_4390_time, df_4975_time, df_2005_time,\ df_11585_time, df_2878_time, df_2879_time, df_11584_time] df_time_lst_2 = [df_4976_time, df_4975_time, df_2005_time, df_11585_time,\ df_2879_time, df_11584_time] ### Adding the time split time columns for j in range(len(first_tc_group)): for t in range(len(time_columns_lst)): first_tc_group[j][time_columns_lst[t]] = df_time_lst_1[j][ time_columns_lst[t]] for j in range(len(secnd_tc_group)): for t in range(len(time_columns_lst)): secnd_tc_group[j][time_columns_lst[t]] = df_time_lst_2[j][ time_columns_lst[t]] ### Adding the timestamp column to the dataframe if sep_time_cols.lower() != "y": for j in range(len(first_tc_group)): first_tc_group[j]["TIMESTAMP"] = df_tc_lst_1[j]["TIMESTAMP"] for j in range(len(secnd_tc_group)): secnd_tc_group[j]["TIMESTAMP"] = df_tc_lst_2[j]["TIMESTAMP"] ###Adding the data to the df for j in range(len(first_tc_group)): for i in range(len(t_c_lst_1)): first_tc_group[j][t_c_lst_1[i]] = df_tc_lst_1[j][t_c_lst_1[i]] for j in range(len(secnd_tc_group)): for i in range(len(t_c_lst_2)): secnd_tc_group[j][t_c_lst_2[i]] = df_tc_lst_2[j][t_c_lst_2[i]] all_tc_group = [df_B1_tc, df_B2_tc, df_B3_tc, df_B4_tc, df_B5_tc,\ df_B6_tc, df_B7_tc,df_C1_tc, df_C2_tc, df_C3_tc, df_C4_tc,\ df_C5_tc, df_C6_tc, df_C7_tc] for df in range(len(all_sonics)): all_sonics[df].fillna(value=fill_nan, inplace=True) for df in range(len(all_tc_group)): all_tc_group[df].fillna(value=fill_nan, inplace=True) return all_sonics, all_tc_group, df_WGNover