def granger_df_producer_worker(lst): file_name = lst[0] file_path = lst[1] save_path = lst[2] stationary_path = lst[3] dct = lst[4] combinations = lst[5] liste = lst[6] causality_path = lst[7] start = time.time() if file_name in os.listdir(causality_path): print('Skipping file' + file_name + ' since it is already calculated') return if file_name not in os.listdir(stationary_path): print('Skipping file' + file_name + ' since it is not in the stationary dir') return else: stationary_file = pickle.load(open(file_path, 'rb')) print(file_name) df = pd.DataFrame(index=liste, columns=liste) for X, Y in combinations: if X in dct and Y in dct: if dct[X].values.dtype == np.dtype( '<U10') or dct[Y].values.dtype == np.dtype('<U10'): continue ### For now use this later maybe we need just make stationary if stationary_file[X][0] == 1 and stationary_file[X][1] == 1: ergebnisX = True else: ergebnisX = False if stationary_file[Y][0] == 1 and stationary_file[Y][1] == 1: ergebnisY = True else: ergebnisY = False if not (ergebnisX and ergebnisY): df.at[X, Y] = 'N/A' continue data = pd.concat([dct[X], dct[Y]], axis=1) data = Database.fillna(data) try: output = Granger.test(data, 60, 0.05, True, True) df.at[X, Y] = output except: print('Granger test failed at combination: ' + X + '/' + Y) continue pickle.dump(df, open(save_path, 'wb')) stop = time.time() - start print(file_name + 'finished after ' + str(stop) + ' s')
def statistic_iterate_all_worker(liste, dct, visual=False, path=cd.paths.stationary_results): name = dct[0] dct = dct[1] file_path = os.path.join(path, name) if name in os.listdir(path): print(name + ' already calculated. skipping ...') return print(name) file_adf_counter = np.full(len(liste), np.nan) file_kpss_counter = np.full(len(liste), np.nan) for j, subcomponent in enumerate(liste): if subcomponent in dct: data = pd.DataFrame(data=dct[subcomponent], index=dct[subcomponent.split('_')[0] + '_line_number'], columns=[subcomponent]) data = Database.fillna(data, filler=False) if data[subcomponent].dtype == 'O': continue plotter(data, subcomponent, visual) with warnings.catch_warnings(): warnings.filterwarnings("ignore") adf = adf_test(data, subcomponent, visual) kpss = kpss_test(data, subcomponent, visual) if adf: # adf_counter_average[j] += 1 file_adf_counter[j] = 1 else: file_adf_counter[j] = 0 if kpss: # kpss_counter_average[j] += 1 file_kpss_counter[j] = 1 else: file_kpss_counter[j] = 0 out = pd.DataFrame(data=np.vstack((file_adf_counter, file_kpss_counter)), columns=liste, index=['adf', 'kpss']) path = os.path.join(path, name) pickle.dump(out, open(file_path, 'wb'))
from CrashDiag.Preproc.Database import Database, iterDataframes, iterDict import CrashDiag.Detection.Granger as Granger import time import pickle import pandas as pd import CrashDiag as cd import warnings import os #db = Database('SampleLog') db = Database() stationary_path = cd.paths.stationary_results most_subcompos = ['ATT_DesRoll', 'ATT_Roll'] #most_subcompos = pickle.load(open('most_subcomponents.p', 'rb')).index[:99] #most_subcompos = list(most_subcompos) start = time.time() n = 0 cd.data.store(n, 'test') combinations = [] for X in most_subcompos: for Y in most_subcompos: if (X, Y) in combinations or (Y, X) in combinations or X == Y: continue combinations.append((X, Y)) output = [] for file_name, dct in iterDict(db, most_subcompos, querytype='or', linenr=True):
from CrashDiag.Preproc.Database import Database from CrashDiag.Detection.Correlation import correlation import pickle db = Database() most_subcomps = pickle.load(open('../Misc/most_subcomponents.p', 'rb')) most_subcomps_list = list(most_subcomps.index) top100trunc = [] remove_list = ['PM_I2CErr', 'PM_INSErr', 'ERR_ECode', 'MSG_Message', 'PM_INAVErr', 'DU32_Value', 'DU32_Id', 'CMD_Cld', 'CMD_Copt', 'CMD_Prm1'] for subcomp in most_subcomps_list: if subcomp in remove_list: continue if 'Time' in subcomp: continue else: top100trunc.append(subcomp) top100trunc = top100trunc[:100] corr = correlation(db, top100trunc, n_files=10) pickle.dump(corr, open('corr.p', 'wb'))
from CrashDiag.Preproc.Database import Database, iterDataframes from CrashDiag.Detection.Causality import statistic_iterate_all import pickle import time if __name__ == "__main__": # db laden db = Database('SampleLog') #100 most #most_subcomps = pickle.load(open('../Misc/most_subcomponents.p', 'rb')) #most_subcomps_list = list(most_subcomps.index) #top100trunc = [] #remove_list = ['PM_I2CErr', 'PM_INSErr', 'ERR_ECode', 'MSG_Message', 'PM_INAVErr', 'DU32_Value', 'DU32_Id', # 'CMD_Cld', 'CMD_Copt', 'CMD_Prm1'] most_subcomps_list = ['RCIN_C1', 'RCIN_C2', 'RCIN_C3', 'RCIN_C4'] #for subcomp in most_subcomps_list: # if subcomp in remove_list: # continue # if 'Time' in subcomp: # continue # else: # top100trunc.append(subcomp) #top100trunc = top100trunc[:100] start = time.time() statistic_iterate_all(db, most_subcomps_list)
from CrashDiag.Preproc.Database import Database, iterDataframes from CrashDiag.Detection.Causality import statistic_iterate_log_file import matplotlib.pyplot as plt import pandas as pd # db laden db = Database(dbname='SampleLog') # initialize list with the variables liste = ['CURR_CurrTot', 'ATT_DesRoll'] ergebnis = statistic_iterate_log_file(db, 'sample.log', liste, visual=True)