def getdata(filename): text = open(filename, 'r').readlines() L = len(text) i = 0 # skip over all stuff before actual data while (text[i][0:13] != 'seq rel_time'): i = i + 1 rm = [] rel_time = [] start = i + 1 # get actual data for i in range(start, len(text)): try: info = split(strip(text[i])) if int(info[2]) == 0: rel_time.append(float(info[3])) rm.append(float(info[8])) except: pass rm_arr = numpy.array(rm) filtered_data = hampel.hampel(rm_arr, 5, 4) filtered_data = hampel.hampel(filtered_data, 10, 1) diff = filtered_data - rm_arr print diff return rel_time, filtered_data
def getdata(filename): text = open(filename, 'r').readlines() L = len(text) i = 0 # skip over all stuff before actual data while (text[i][0:13] != 'seq rel_time'): i = i + 1 stec = [] stec_err = [] rel_time = [] start = i + 1 # get actual data for i in range(start, len(text)): try: # info = text[i].split().strip() info = text[i].split() if int(info[2]) == 0: latest = float(info[3]) / 3600 rel_time.append(latest) stec.append(float(info[7])) try: stec_err.append(float(info[10])) except: pass except: pass stec_arr = numpy.array(stec) stec_err = numpy.array(stec_err) rel_time = numpy.array(rel_time) filtered = hampel.hampel(stec_arr, 5, 4) filtered_data = hampel.hampel(filtered, 10, 1) diff = filtered_data - stec_arr # return rel_time, filtered_data, stec_err, latest return rel_time, stec_arr, stec_err, latest
def getdata(filename): text = open(filename, 'r').readlines() L = len(text) i = 0 # skip over all stuff before actual data while (text[i][0:13] != 'seq rel_time'): i = i + 1 rm = [] rm_err = [] rel_time = [] start = i + 1 # get actual data for i in range(start, len(text)): try: info = split(strip(text[i])) if int(info[2]) == 0: latest = float(info[3]) / 3600.0 rel_time.append(latest) rm_val = float(info[8]) # rm.append(-1* rm_val) rm.append(rm_val) try: rm_error = rm_val * (float(info[10]) / float(info[7])) rm_err.append(rm_error) except: pass except: pass rm_arr = numpy.array(rm) rm_err = numpy.array(rm_err) filtered_data = hampel.hampel(rm_arr, 5, 4) filtered_data = hampel.hampel(filtered_data, 10, 1) # diff = filtered_data - rm_arr # print diff # return rel_time, filtered_data,rm_err,latest return rel_time, rm_arr, rm_err, latest
def smooth_and_diff(series, ma_days=7): """ Args: series (np.array): (T,2) series to smooth ma_days (int): number of days for moving avg of diff """ hampel_ = lambda x: hampel(pd.Series(np.diff(x)), window_size=5).values smooth_series = np.stack([ moving_average(hampel_(series[:, 0]), ma_days), moving_average(hampel_(series[:, 1]), ma_days) ], axis=1) return smooth_series
'nivel_columna_506', 'nivel_columna_507', 'nivel_columna_508', 'nivel_columna_509', 'nivel_columna_510', 'nivel_columna_511', 'espumante_sag', 'colector_primario', '%cu_conc_final' ]] # resample on datetimes df.set_index(["fecha"], inplace=True) df_resample = df.resample('600S').mean() print(df_resample.isna().sum()) interpolated = df_resample.interpolate(method='linear') print(interpolated.isna().sum()) # outliers detection and imputation [using the mining shits # window_size = (60 minutes/10 minutes) * 12 hours * 2 shifs* 7 days] for col in interpolated.columns: df[col] = hampel(df[col], window_size=5, imputation=True) plot_time_series(interpolated, fecha_inicial="2018-01-01 00:00:00", fecha_final="2020-03-10 05:30:00", title="Evolution flotation variables", ylabel="None", sample=9) # reset_index and save interpolated.reset_index(drop=False, inplace=True) path_cleaned = "data/cleaned-data.csv" interpolated.to_csv(path_cleaned, index=False, date_format=date_format)
file_name = args.src_file df = pd.read_csv(file_name).dropna() df_rssi = df.loc[:, ['rssi']] df_rssi.plot(y=['rssi'], figsize=(16, 4)) # plt.axis([0, len(df_rssi.index), -100, 0]) df_csi = df.loc[:, ['len', 'data']] size_x = len(df_csi.index) size_y = int(df_csi.iloc[0]['len'] // 2) array_csi = np.zeros([size_x, size_y], dtype=np.complex64) for x, csi in enumerate(df_csi.iloc): csi_raw_data = json.loads(csi['data']) for y in range(0, len(csi_raw_data), 2): array_csi[x][int(y // 2)] = complex(csi_raw_data[y], csi_raw_data[y + 1]) array_csi_modulus = abs(array_csi) columns = [f"subcarrier{i}" for i in range(0, size_y)] df_csi_modulus = pd.DataFrame(array_csi_modulus, columns=columns) # df_csi_modulus.plot(y = [f"subcarrier{i}" for i in select_list]) # plt.show() for ch in range(df_csi_modulus.shape[1]): ts = df_csi_modulus[f"subcarrier{ch}"] ts_cleaned = hampel(ts, window_size=11, n=3, imputation=True) ts_cleaned.plot(figsize=(30, 6)) plt.show()