Esempio n. 1
0
def getdata(filename):
    text = open(filename, 'r').readlines()
    L = len(text)
    i = 0
    # skip over all stuff before actual data
    while (text[i][0:13] != 'seq  rel_time'):
        i = i + 1

    rm = []
    rel_time = []
    start = i + 1
    # get actual data
    for i in range(start, len(text)):
        try:
            info = split(strip(text[i]))
            if int(info[2]) == 0:
                rel_time.append(float(info[3]))
                rm.append(float(info[8]))
        except:
            pass
    rm_arr = numpy.array(rm)
    filtered_data = hampel.hampel(rm_arr, 5, 4)
    filtered_data = hampel.hampel(filtered_data, 10, 1)
    diff = filtered_data - rm_arr
    print diff
    return rel_time, filtered_data
Esempio n. 2
0
def getdata(filename):
    text = open(filename, 'r').readlines()
    L = len(text)
    i = 0
    # skip over all stuff before actual data
    while (text[i][0:13] != 'seq  rel_time'):
        i = i + 1

    stec = []
    stec_err = []
    rel_time = []
    start = i + 1
    # get actual data
    for i in range(start, len(text)):
        try:
            #           info = text[i].split().strip()
            info = text[i].split()
            if int(info[2]) == 0:
                latest = float(info[3]) / 3600
                rel_time.append(latest)
                stec.append(float(info[7]))
                try:
                    stec_err.append(float(info[10]))
                except:
                    pass
        except:
            pass
    stec_arr = numpy.array(stec)
    stec_err = numpy.array(stec_err)
    rel_time = numpy.array(rel_time)
    filtered = hampel.hampel(stec_arr, 5, 4)
    filtered_data = hampel.hampel(filtered, 10, 1)
    diff = filtered_data - stec_arr
    #       return rel_time, filtered_data, stec_err, latest
    return rel_time, stec_arr, stec_err, latest
def getdata(filename):
    text = open(filename, 'r').readlines()
    L = len(text)
    i = 0
    # skip over all stuff before actual data
    while (text[i][0:13] != 'seq  rel_time'):
        i = i + 1

    rm = []
    rm_err = []
    rel_time = []

    start = i + 1
    # get actual data
    for i in range(start, len(text)):
        try:
            info = split(strip(text[i]))
            if int(info[2]) == 0:
                latest = float(info[3]) / 3600.0
                rel_time.append(latest)
                rm_val = float(info[8])
                #             rm.append(-1* rm_val)
                rm.append(rm_val)
                try:
                    rm_error = rm_val * (float(info[10]) / float(info[7]))
                    rm_err.append(rm_error)
                except:
                    pass
        except:
            pass
    rm_arr = numpy.array(rm)
    rm_err = numpy.array(rm_err)
    filtered_data = hampel.hampel(rm_arr, 5, 4)
    filtered_data = hampel.hampel(filtered_data, 10, 1)
    #        diff = filtered_data - rm_arr
    #        print diff
    #       return rel_time, filtered_data,rm_err,latest
    return rel_time, rm_arr, rm_err, latest
Esempio n. 4
0
def smooth_and_diff(series, ma_days=7):
    """
    Args:
        series (np.array): (T,2) series to smooth
        ma_days (int): number of days for moving avg of diff
    """

    hampel_ = lambda x: hampel(pd.Series(np.diff(x)), window_size=5).values

    smooth_series = np.stack([
        moving_average(hampel_(series[:, 0]), ma_days),
        moving_average(hampel_(series[:, 1]), ma_days)
    ],
                             axis=1)

    return smooth_series
Esempio n. 5
0
    'nivel_columna_506', 'nivel_columna_507', 'nivel_columna_508',
    'nivel_columna_509', 'nivel_columna_510', 'nivel_columna_511',
    'espumante_sag', 'colector_primario', '%cu_conc_final'
]]

# resample on datetimes
df.set_index(["fecha"], inplace=True)

df_resample = df.resample('600S').mean()
print(df_resample.isna().sum())

interpolated = df_resample.interpolate(method='linear')
print(interpolated.isna().sum())

# outliers detection and imputation [using the mining shits
# window_size = (60 minutes/10 minutes) * 12 hours * 2 shifs* 7 days]
for col in interpolated.columns:
    df[col] = hampel(df[col], window_size=5, imputation=True)

plot_time_series(interpolated,
                 fecha_inicial="2018-01-01 00:00:00",
                 fecha_final="2020-03-10 05:30:00",
                 title="Evolution flotation variables",
                 ylabel="None",
                 sample=9)

# reset_index and save
interpolated.reset_index(drop=False, inplace=True)
path_cleaned = "data/cleaned-data.csv"
interpolated.to_csv(path_cleaned, index=False, date_format=date_format)
Esempio n. 6
0
    file_name = args.src_file

    df = pd.read_csv(file_name).dropna()

    df_rssi = df.loc[:, ['rssi']]
    df_rssi.plot(y=['rssi'], figsize=(16, 4))
    # plt.axis([0, len(df_rssi.index), -100, 0])

    df_csi = df.loc[:, ['len', 'data']]
    size_x = len(df_csi.index)
    size_y = int(df_csi.iloc[0]['len'] // 2)
    array_csi = np.zeros([size_x, size_y], dtype=np.complex64)
    for x, csi in enumerate(df_csi.iloc):
        csi_raw_data = json.loads(csi['data'])
        for y in range(0, len(csi_raw_data), 2):
            array_csi[x][int(y // 2)] = complex(csi_raw_data[y],
                                                csi_raw_data[y + 1])

    array_csi_modulus = abs(array_csi)
    columns = [f"subcarrier{i}" for i in range(0, size_y)]
    df_csi_modulus = pd.DataFrame(array_csi_modulus, columns=columns)

    # df_csi_modulus.plot(y = [f"subcarrier{i}" for i in select_list])
    # plt.show()

    for ch in range(df_csi_modulus.shape[1]):
        ts = df_csi_modulus[f"subcarrier{ch}"]
        ts_cleaned = hampel(ts, window_size=11, n=3, imputation=True)
        ts_cleaned.plot(figsize=(30, 6))
        plt.show()