def merge_file(data_1,data_2,test_list_path,answer_list_path): numberFileData_1 = data_1.get_len_tran() numberFileData_2 = data_2.get_len_tran() i_list = random.sample(range(numberFileData_2), 200) for i in i_list: filename_1 = data_1.get_file_test_name(index = i)[-18:] test_list_1 = data_1.get_dataset_test(index = i) answer_st_ed_list_1 = data_1.get_dataset_answer_st_ed(index = i) start = answer_st_ed_list_1[0][0] if start != maxPerFile: j_list = random.sample(range(numberFileData_2), 3) for j in j_list: filename_2 = data_2.get_file_test_name(index=j)[-18:] test_list_2 = data_2.get_dataset_test(index = j) answer_st_ed_list_2 = data_2.get_dataset_answer_st_ed(index=j) start = answer_st_ed_list_2[0][0] if start != maxPerFile: new_test_list = test_list_1 + test_list_2 new_answer_st_ed_list = answer_st_ed_list_1 + answer_st_ed_list_2 new_file_name = "{}_{}".format(filename_1,filename_2).replace("\\","") ut.list_to_txt(rows = new_test_list, csv_name='{}\\{}'.format(test_list_path,new_file_name), is_sort = False) ut.list_to_txt(rows=new_answer_st_ed_list, csv_name='{}\\{}'.format(answer_list_path, new_file_name), is_sort=False)
def cal_k_tran(data,input_k,input_bin): data.clear_change_points() detector = light_detector() detector.set_parameter(input_k=input_k,input_min_winsize=win,input_max_winsize=win,input_bin=input_bin) for i, file_name in enumerate(data.get_files_test()): instances = data.get_dataset_test(i) # change_points,std_low_list,std_high_list,mean_list = detector.get_changepoint_baseCheb(instances) # change_points, std_low_list, std_high_list, mean_list, ex_list = detector.get_changepoint_beseChebE(instances) change_points,mean_list = detector.get_changepoint_BinCheb(instances) data.set_change_points(change_points) # data.append_mean_list(mean_list) # data.append_std_high_list(std_high_list) # data.append_std_low_list(std_low_list) # print(mean_list) ut.list_to_txt(rows=mean_list, csv_name='bin\\mean_list.txt', is_append=True) return data
return return_result if __name__ == '__main__': for height in heights: for duration in durations: print("********* start I = {} Duration = {}*************".format( height, duration)) total_rows = [] total_tp = [0] * len(Thresholds) total_fp = [0] * len(Thresholds) listFile = ut_lc.getListLight(height=height, duration=duration) for file_name in listFile: return_result = getTFfromTopK(file_name=file_name, height=height, duration=duration, Thresholds=Thresholds) for index, row in enumerate(return_result): total_tp[index] = total_tp[index] + row["TP"] total_fp[index] = total_fp[index] + row["FP"] print(file_name) for index, k in enumerate(Thresholds): total_rows.append( [height, duration, k, total_tp[index], total_fp[index]]) ut.list_to_txt(rows=total_rows, csv_name="rrcf_starting_{}.txt".format(dt), is_append=True) print("********* END I = {} Duration = {}*************".format( height, duration))
gb_core = gb_test() for instance in FdataMDF_dy["window"]: gb_core.add_element(instance.get_representation()) for alpha in alphas: # countFile = 0 # foundList = gb_core.get_max_outliers(alpha=alpha) # if foundList: # countFile = 1 # rows.append([file_name,L,I,alpha,countFile,foundList]) indexBin = gb_core.get_max_index(alpha=alpha) detect_tran = 0 if indexBin: answerList = [ *range(int(transList[0][0]), int(transList[0][1]) + 1, 1) ] indexFromMaxBin = genIndex( window=FdataMDF_dy["window"], indexBin=indexBin[0]) resultIntersect = set( indexFromMaxBin).intersection(answerList) if resultIntersect: detect_tran = 1 rows.append([file_name, L, I, alpha, detect_tran]) ut.list_to_txt(rows, csv_name="dy_win{}_TF.txt".format(window_size), is_append=True) print("end L {}, I {} window_size {}".format( L, I, window_size))
duration=duration) # corePlot = sketchDyBinService(windowSize=windowSize, initialBin=initialBin, isOnline=True) # sketchInstances = corePlot.sketchMode(instances=lc_data['instances']) # windowList = corePlot.getWindow() # binInstances = [] # for bin in windowList: # binInstances.append(bin.get_representation()) avg_codisp = get_avgcodisp(lc_data['instances']) # codisp_normalization = [] # for index in range(shingle_size-1): # bin = windowList[index] # n = bin.get_number_instance() # for _ in range(n): # codisp_normalization.append(0) # for index, (k, value) in enumerate(avg_codisp.items()): # print(index) # bin = windowList[index+shingle_size-1] # n = bin.get_number_instance() # for _ in range(n): # codisp_normalization.append(value) # file_save = "{}\\{}.txt".format(folder_name,file_name) temp = [] for key, value in avg_codisp.items(): temp.append(value) ut.list_to_txt(temp, csv_name=file_save, is_sort=False) print(file_name)
return return_result if __name__ == '__main__': for height in heights: for duration in durations: print("********* start I = {} Duration = {}*************".format( height, duration)) total_rows = [] total_tp = [0] * len(Thresholds) total_fp = [0] * len(Thresholds) listFile = ut_lc.getListLight(height=height, duration=duration) for file_name in listFile: return_result = getTFfromTopK(file_name=file_name, height=height, duration=duration, Thresholds=Thresholds) for index, row in enumerate(return_result): total_tp[index] = total_tp[index] + row["TP"] total_fp[index] = total_fp[index] + row["FP"] print(file_name) for index, k in enumerate(Thresholds): total_rows.append( [height, duration, k, total_tp[index], total_fp[index]]) ut.list_to_txt(rows=total_rows, csv_name="mp_{}.txt".format(dt), is_append=True) print("********* END I = {} Duration = {}*************".format( height, duration))
filesize = os.path.getsize(LCFile) if filesize != 0: fileDate = fileName.split("_date")[1] # Save LC to Common LC file = open(LCFile, 'rb') FdataMDF = [float(item) for item in ut.txt_to_list(LCFile)] save_lc = "{}{}\\{}\\".format(main_save_path, "lc_flux_catalog_aperture_r7", fileDate) ut.checkFolderandCreate(save_lc) save_lc_fileName = "{}{}.txt".format(save_lc, fileName) if not (ut.isFileNameInFolder(save_lc, "{}.txt".format(fileName))): ut.list_to_txt(rows=FdataMDF, csv_name=save_lc_fileName, is_sort=False) # Save LC to Common MJD path_lc_timestamp_path = "{}{}{}.txt".format( dataset_path, lc_timestamp_path, fileName) mjd_list = [ float(item) for item in ut.txt_to_list(path_lc_timestamp_path) ] save_mjd = "{}{}\\{}\\".format(main_save_path, "timestamp_MJD", fileDate) ut.checkFolderandCreate(save_mjd) save_mjd_fileName = "{}{}.txt".format(save_mjd, fileName) ut.list_to_txt(rows=mjd_list, csv_name=save_mjd_fileName, is_sort=False)
for index, file_name in enumerate(files_list): rawInstances, transList = ut_service.getDataLC_test(file_name, L=L, I=I) FdataMDF_Fix = ut_gen.genFixBin(binSize=binSize, instances=rawInstances,isExtract= False) gb_core = gb_test() for instance in FdataMDF_Fix: gb_core.add_element(instance) for alpha in alphas: # countFile = 0 # foundList = gb_core.get_max_outliers(alpha=alpha) # if foundList: # countFile = 1 # rows.append([file_name,L,I,alpha,countFile,foundList]) indexBin = gb_core.get_max_index(alpha=alpha) detect_tran = 0 if indexBin: start_point = indexBin[0]*binSize end_point = indexBin[0]*binSize+binSize -1 indexFromMaxBin = [*range(start_point, end_point, 1)] st_end = transList[0] answerList = [*range(int(transList[0][0]), int(transList[0][1])+1, 1)] resultIntersect = set(indexFromMaxBin).intersection(answerList) if resultIntersect: detect_tran = 1 rows.append([file_name, L, I, alpha, detect_tran]) ut.list_to_txt(rows,csv_name="bin_{}_TF.txt".format(binSize), is_append=True) print("end L {}, I {}".format(L,I) )
dy_data = ut_gen.genListDyBin(instances=data['instances'], timestamp=data['timestamp'], windowSize=window_size, initialBin=initial_Bin, isOnline=False) results = [] prior_index = 0 for index_bin in index_list: prior_bin = dy_data['window'][index_bin] cur_bin = dy_data['window'][index_bin + 1] prior_n = prior_bin.get_number_instance() cur_n = cur_bin.get_number_instance() priorMean = prior_bin.get_representation() curMean = cur_bin.get_representation() priorSDError = prior_bin.get_SDError() curSDError = cur_bin.get_SDError() K_star = ut_de.detection_SKmethod( curMean=curMean, priorMean=priorMean, priorSDError=priorSDError, curSDError=curSDError) results.append([ K_star, prior_n, cur_n, prior_index, prior_index + prior_n ]) prior_index = prior_index + prior_n ut.list_to_txt(rows=results, csv_name="{}\\{}.txt".format( folder_name, lc_file)) print("{}\\{}.txt".format(folder_name, lc_file))
for duration in durations: listFiles = ut_lc.getListLight(height=height, duration=duration) folder_name = "I{}_L{}".format(height, duration) # folder_name = 'temp' ut.checkFolderandCreate(folder_name) for lc_file in listFiles: data = ut_lc.getDataFromFile(fileName=lc_file,height=height,duration=duration) s_x = numpy.array(data["instances"]).reshape((-1, 1)) mp = MatrixProfile(subsequence_length=duration, scale=False) mp_series = mp.fit_transform([s_x])[0] t_star = numpy.argmax(mp_series.ravel()) profile_list = list(mp_series.ravel()) norm_profile = normalization(profile_list) ut.list_to_txt(rows=norm_profile, csv_name="{}\\{}.txt".format(folder_name,lc_file)) # print("a") ansList = data["ansList"] # plt.figure() # ax = plt.subplot(2, 1, 1) # First, raw time series # trans = mtransforms.blended_transform_factory(ax.transData, ax.transAxes) # plt.plot(data["timestamp"],s_x.ravel(), "b-") # # plt.xlim([0, s_x.shape[0]]) # plt.ylim(min(data["instances"])-100,max(data["instances"])+100) # plt.axvline(x=data["timestamp"][t_star], c="red", linewidth=2) # # plt.axvline(x=ansList[0], c="black", linewidth=2) # plt.axvline(x=ansList[-1], c="black", linewidth=2) # # if t_star+mp.subsequence_length >= len(data["timestamp"]): # plt.fill_between(x=[data["timestamp"][t_star],
corePlot = sketchDyBinService(windowSize=window_size, initialBin=ini_bin, isOnline=False) corePlot.sketchMode(instances=data['instances']) window = corePlot.getWindow() # a = window[:-1] for index_bin in range((window_size) - 1): prior_bin = window[index_bin] cur_bin = window[index_bin + 1] prior_SE = prior_bin.get_SDError() cur_SE = cur_bin.get_SDError() prior_mean = prior_bin.get_representation() cur_mean = cur_bin.get_representation() K_star = ut_det.detection_SKmethod(curMean=cur_mean, priorMean=prior_mean, priorSDError=prior_SE, curSDError=cur_SE) rows.append( [main_file, window_size, index_bin, K_star, ini_bin]) ut.list_to_txt(rows=rows, csv_name='SKexport.txt', is_append=True, is_sort=False) print("END : {}".format(main_file)) except: rows = [main_file] ut.list_to_txt(rows=rows, csv_name='error_SKexport.txt', is_append=True, is_sort=False)
return return_result if __name__ == '__main__': for height in heights: for duration in durations: print("********* start I = {} Duration = {}*************".format( height, duration)) total_rows = [] total_tp = [0] * len(Ks) total_fp = [0] * len(Ks) listFile = ut_lc.getListLight(height=height, duration=duration) for file_name in listFile: return_result = getTFfromTopK(file_name=file_name, height=height, duration=duration, Ks=Ks) for index, row in enumerate(return_result): total_tp[index] = total_tp[index] + row["TP"] total_fp[index] = total_fp[index] + row["FP"] print(file_name) for index, k in enumerate(Ks): total_rows.append( [height, duration, k, total_tp[index], total_fp[index]]) ut.list_to_txt(rows=total_rows, csv_name="mp_2022_06_02.txt", is_append=True) print("********* END I = {} Duration = {}*************".format( height, duration))
pathFile = "{}{}.txt".format(path_to_lc_file, fileName) filesize = os.path.getsize(pathFile) if filesize != 0: result_found = 0 file = open(pathFile, 'rb') FdataMDF = [float(item) for item in ut.txt_to_list(pathFile)] path_lc_timestamp_path = "{}{}{}.txt".format( dataset_path, lc_timestamp_path, fileName) x_axis = [ float(item) for item in ut.txt_to_list(path_lc_timestamp_path) ] window_size = int( (len(FdataMDF) / initialBin) * theshold_compression) # window_size = 20 FdataMDF_dy = ut_gen.genListDyBin(instances=FdataMDF, timestamp=x_axis, windowSize=window_size, isOnline=False) rows = [] for bin in FdataMDF_dy["window"]: rows.append(bin.get_representation()) ut.list_to_txt(rows=rows, csv_name="{}{}.txt".format(savepath, fileName)) print("END : File .... {}".format(fileName)) except: row = [fileName] ut.add_to_txt(row, csv_name="dy_export.txt", is_append=True) print("************* Error : File .... {}".format(fileName))