Esempio n. 1
0
def merge_file(data_1,data_2,test_list_path,answer_list_path):
    numberFileData_1 = data_1.get_len_tran()
    numberFileData_2 = data_2.get_len_tran()
    i_list = random.sample(range(numberFileData_2), 200)
    for i in i_list:
        filename_1 = data_1.get_file_test_name(index = i)[-18:]
        test_list_1 = data_1.get_dataset_test(index = i)
        answer_st_ed_list_1 = data_1.get_dataset_answer_st_ed(index = i)
        start = answer_st_ed_list_1[0][0]
        if start != maxPerFile:
            j_list = random.sample(range(numberFileData_2), 3)
            for j in j_list:
                filename_2 = data_2.get_file_test_name(index=j)[-18:]
                test_list_2 = data_2.get_dataset_test(index = j)
                answer_st_ed_list_2 = data_2.get_dataset_answer_st_ed(index=j)
                start = answer_st_ed_list_2[0][0]
                if start != maxPerFile:
                    new_test_list = test_list_1 + test_list_2
                    new_answer_st_ed_list = answer_st_ed_list_1 + answer_st_ed_list_2
                    new_file_name = "{}_{}".format(filename_1,filename_2).replace("\\","")
                    ut.list_to_txt(rows = new_test_list,
                                   csv_name='{}\\{}'.format(test_list_path,new_file_name),
                                   is_sort = False)

                    ut.list_to_txt(rows=new_answer_st_ed_list,
                                   csv_name='{}\\{}'.format(answer_list_path, new_file_name),
                                   is_sort=False)
Esempio n. 2
0
def cal_k_tran(data,input_k,input_bin):
    data.clear_change_points()
    detector = light_detector()
    detector.set_parameter(input_k=input_k,input_min_winsize=win,input_max_winsize=win,input_bin=input_bin)
    for i, file_name in enumerate(data.get_files_test()):
        instances = data.get_dataset_test(i)
        # change_points,std_low_list,std_high_list,mean_list = detector.get_changepoint_baseCheb(instances)
        # change_points, std_low_list, std_high_list, mean_list, ex_list = detector.get_changepoint_beseChebE(instances)
        change_points,mean_list = detector.get_changepoint_BinCheb(instances)
        data.set_change_points(change_points)
        # data.append_mean_list(mean_list)
        # data.append_std_high_list(std_high_list)
        # data.append_std_low_list(std_low_list)
        # print(mean_list)
        ut.list_to_txt(rows=mean_list, csv_name='bin\\mean_list.txt', is_append=True)
    return data
    return return_result


if __name__ == '__main__':
    for height in heights:
        for duration in durations:
            print("********* start I = {} Duration = {}*************".format(
                height, duration))
            total_rows = []
            total_tp = [0] * len(Thresholds)
            total_fp = [0] * len(Thresholds)
            listFile = ut_lc.getListLight(height=height, duration=duration)
            for file_name in listFile:
                return_result = getTFfromTopK(file_name=file_name,
                                              height=height,
                                              duration=duration,
                                              Thresholds=Thresholds)

                for index, row in enumerate(return_result):
                    total_tp[index] = total_tp[index] + row["TP"]
                    total_fp[index] = total_fp[index] + row["FP"]
                print(file_name)
            for index, k in enumerate(Thresholds):
                total_rows.append(
                    [height, duration, k, total_tp[index], total_fp[index]])
            ut.list_to_txt(rows=total_rows,
                           csv_name="rrcf_starting_{}.txt".format(dt),
                           is_append=True)
            print("********* END I = {} Duration = {}*************".format(
                height, duration))
Esempio n. 4
0
                    gb_core = gb_test()
                    for instance in FdataMDF_dy["window"]:
                        gb_core.add_element(instance.get_representation())
                    for alpha in alphas:
                        # countFile = 0
                        # foundList = gb_core.get_max_outliers(alpha=alpha)
                        # if foundList:
                        #     countFile = 1
                        # rows.append([file_name,L,I,alpha,countFile,foundList])

                        indexBin = gb_core.get_max_index(alpha=alpha)
                        detect_tran = 0
                        if indexBin:
                            answerList = [
                                *range(int(transList[0][0]),
                                       int(transList[0][1]) + 1, 1)
                            ]
                            indexFromMaxBin = genIndex(
                                window=FdataMDF_dy["window"],
                                indexBin=indexBin[0])
                            resultIntersect = set(
                                indexFromMaxBin).intersection(answerList)
                            if resultIntersect:
                                detect_tran = 1
                        rows.append([file_name, L, I, alpha, detect_tran])

                ut.list_to_txt(rows,
                               csv_name="dy_win{}_TF.txt".format(window_size),
                               is_append=True)
                print("end L {}, I {} window_size {}".format(
                    L, I, window_size))
Esempio n. 5
0
                                                duration=duration)

                # corePlot = sketchDyBinService(windowSize=windowSize, initialBin=initialBin, isOnline=True)
                # sketchInstances = corePlot.sketchMode(instances=lc_data['instances'])
                # windowList = corePlot.getWindow()
                # binInstances = []
                # for bin in windowList:
                #     binInstances.append(bin.get_representation())

                avg_codisp = get_avgcodisp(lc_data['instances'])

                # codisp_normalization = []
                # for index in range(shingle_size-1):
                #     bin = windowList[index]
                #     n = bin.get_number_instance()
                #     for _ in range(n):
                #         codisp_normalization.append(0)
                # for index, (k, value) in enumerate(avg_codisp.items()):
                #     print(index)
                #     bin = windowList[index+shingle_size-1]
                #     n = bin.get_number_instance()
                #     for _ in range(n):
                #         codisp_normalization.append(value)

                # file_save = "{}\\{}.txt".format(folder_name,file_name)
                temp = []
                for key, value in avg_codisp.items():
                    temp.append(value)
                ut.list_to_txt(temp, csv_name=file_save, is_sort=False)
                print(file_name)
Esempio n. 6
0
    return return_result


if __name__ == '__main__':
    for height in heights:
        for duration in durations:
            print("********* start I = {} Duration = {}*************".format(
                height, duration))
            total_rows = []
            total_tp = [0] * len(Thresholds)
            total_fp = [0] * len(Thresholds)
            listFile = ut_lc.getListLight(height=height, duration=duration)
            for file_name in listFile:
                return_result = getTFfromTopK(file_name=file_name,
                                              height=height,
                                              duration=duration,
                                              Thresholds=Thresholds)

                for index, row in enumerate(return_result):
                    total_tp[index] = total_tp[index] + row["TP"]
                    total_fp[index] = total_fp[index] + row["FP"]
                print(file_name)
            for index, k in enumerate(Thresholds):
                total_rows.append(
                    [height, duration, k, total_tp[index], total_fp[index]])
            ut.list_to_txt(rows=total_rows,
                           csv_name="mp_{}.txt".format(dt),
                           is_append=True)
            print("********* END I = {} Duration = {}*************".format(
                height, duration))
Esempio n. 7
0
        filesize = os.path.getsize(LCFile)
        if filesize != 0:
            fileDate = fileName.split("_date")[1]

            # Save LC to Common LC
            file = open(LCFile, 'rb')
            FdataMDF = [float(item) for item in ut.txt_to_list(LCFile)]
            save_lc = "{}{}\\{}\\".format(main_save_path,
                                          "lc_flux_catalog_aperture_r7",
                                          fileDate)
            ut.checkFolderandCreate(save_lc)
            save_lc_fileName = "{}{}.txt".format(save_lc, fileName)

            if not (ut.isFileNameInFolder(save_lc, "{}.txt".format(fileName))):
                ut.list_to_txt(rows=FdataMDF,
                               csv_name=save_lc_fileName,
                               is_sort=False)
                # Save LC to Common MJD
                path_lc_timestamp_path = "{}{}{}.txt".format(
                    dataset_path, lc_timestamp_path, fileName)
                mjd_list = [
                    float(item)
                    for item in ut.txt_to_list(path_lc_timestamp_path)
                ]
                save_mjd = "{}{}\\{}\\".format(main_save_path, "timestamp_MJD",
                                               fileDate)
                ut.checkFolderandCreate(save_mjd)
                save_mjd_fileName = "{}{}.txt".format(save_mjd, fileName)
                ut.list_to_txt(rows=mjd_list,
                               csv_name=save_mjd_fileName,
                               is_sort=False)
Esempio n. 8
0
                for index, file_name in enumerate(files_list):
                    rawInstances, transList = ut_service.getDataLC_test(file_name, L=L,
                                                                        I=I)

                    FdataMDF_Fix = ut_gen.genFixBin(binSize=binSize, instances=rawInstances,isExtract= False)
                    gb_core = gb_test()
                    for instance in FdataMDF_Fix:
                        gb_core.add_element(instance)
                    for alpha in alphas:
                        # countFile = 0
                        # foundList = gb_core.get_max_outliers(alpha=alpha)
                        # if foundList:
                        #     countFile = 1
                        # rows.append([file_name,L,I,alpha,countFile,foundList])

                        indexBin = gb_core.get_max_index(alpha=alpha)
                        detect_tran = 0
                        if indexBin:
                            start_point = indexBin[0]*binSize
                            end_point = indexBin[0]*binSize+binSize -1
                            indexFromMaxBin = [*range(start_point, end_point, 1)]
                            st_end = transList[0]
                            answerList = [*range(int(transList[0][0]), int(transList[0][1])+1, 1)]
                            resultIntersect = set(indexFromMaxBin).intersection(answerList)
                            if resultIntersect:
                                detect_tran = 1
                        rows.append([file_name, L, I, alpha, detect_tran])


                ut.list_to_txt(rows,csv_name="bin_{}_TF.txt".format(binSize), is_append=True)
                print("end L {}, I {}".format(L,I) )
Esempio n. 9
0
                dy_data = ut_gen.genListDyBin(instances=data['instances'],
                                              timestamp=data['timestamp'],
                                              windowSize=window_size,
                                              initialBin=initial_Bin,
                                              isOnline=False)
                results = []
                prior_index = 0
                for index_bin in index_list:
                    prior_bin = dy_data['window'][index_bin]
                    cur_bin = dy_data['window'][index_bin + 1]
                    prior_n = prior_bin.get_number_instance()
                    cur_n = cur_bin.get_number_instance()
                    priorMean = prior_bin.get_representation()
                    curMean = cur_bin.get_representation()
                    priorSDError = prior_bin.get_SDError()
                    curSDError = cur_bin.get_SDError()
                    K_star = ut_de.detection_SKmethod(
                        curMean=curMean,
                        priorMean=priorMean,
                        priorSDError=priorSDError,
                        curSDError=curSDError)
                    results.append([
                        K_star, prior_n, cur_n, prior_index,
                        prior_index + prior_n
                    ])
                    prior_index = prior_index + prior_n
                ut.list_to_txt(rows=results,
                               csv_name="{}\\{}.txt".format(
                                   folder_name, lc_file))
                print("{}\\{}.txt".format(folder_name, lc_file))
Esempio n. 10
0
        for duration in durations:
            listFiles = ut_lc.getListLight(height=height, duration=duration)
            folder_name = "I{}_L{}".format(height, duration)
            # folder_name = 'temp'
            ut.checkFolderandCreate(folder_name)
            for lc_file in listFiles:

                data = ut_lc.getDataFromFile(fileName=lc_file,height=height,duration=duration)
                s_x = numpy.array(data["instances"]).reshape((-1, 1))

                mp = MatrixProfile(subsequence_length=duration, scale=False)
                mp_series = mp.fit_transform([s_x])[0]
                t_star = numpy.argmax(mp_series.ravel())
                profile_list = list(mp_series.ravel())
                norm_profile = normalization(profile_list)
                ut.list_to_txt(rows=norm_profile,
                               csv_name="{}\\{}.txt".format(folder_name,lc_file))
                # print("a")
                ansList = data["ansList"]
                # plt.figure()
                # ax = plt.subplot(2, 1, 1)  # First, raw time series
                # trans = mtransforms.blended_transform_factory(ax.transData, ax.transAxes)
                # plt.plot(data["timestamp"],s_x.ravel(), "b-")
                # # plt.xlim([0, s_x.shape[0]])
                # plt.ylim(min(data["instances"])-100,max(data["instances"])+100)
                # plt.axvline(x=data["timestamp"][t_star],  c="red", linewidth=2)
                #
                # plt.axvline(x=ansList[0], c="black", linewidth=2)
                # plt.axvline(x=ansList[-1], c="black", linewidth=2)
                #
                # if t_star+mp.subsequence_length >= len(data["timestamp"]):
                #     plt.fill_between(x=[data["timestamp"][t_star],
Esempio n. 11
0
            corePlot = sketchDyBinService(windowSize=window_size,
                                          initialBin=ini_bin,
                                          isOnline=False)
            corePlot.sketchMode(instances=data['instances'])
            window = corePlot.getWindow()
            # a = window[:-1]
            for index_bin in range((window_size) - 1):
                prior_bin = window[index_bin]
                cur_bin = window[index_bin + 1]
                prior_SE = prior_bin.get_SDError()
                cur_SE = cur_bin.get_SDError()
                prior_mean = prior_bin.get_representation()
                cur_mean = cur_bin.get_representation()
                K_star = ut_det.detection_SKmethod(curMean=cur_mean,
                                                   priorMean=prior_mean,
                                                   priorSDError=prior_SE,
                                                   curSDError=cur_SE)
                rows.append(
                    [main_file, window_size, index_bin, K_star, ini_bin])
            ut.list_to_txt(rows=rows,
                           csv_name='SKexport.txt',
                           is_append=True,
                           is_sort=False)
            print("END : {}".format(main_file))
        except:
            rows = [main_file]
            ut.list_to_txt(rows=rows,
                           csv_name='error_SKexport.txt',
                           is_append=True,
                           is_sort=False)
Esempio n. 12
0
    return return_result


if __name__ == '__main__':
    for height in heights:
        for duration in durations:
            print("********* start I = {} Duration = {}*************".format(
                height, duration))
            total_rows = []
            total_tp = [0] * len(Ks)
            total_fp = [0] * len(Ks)
            listFile = ut_lc.getListLight(height=height, duration=duration)
            for file_name in listFile:
                return_result = getTFfromTopK(file_name=file_name,
                                              height=height,
                                              duration=duration,
                                              Ks=Ks)

                for index, row in enumerate(return_result):
                    total_tp[index] = total_tp[index] + row["TP"]
                    total_fp[index] = total_fp[index] + row["FP"]
                print(file_name)
            for index, k in enumerate(Ks):
                total_rows.append(
                    [height, duration, k, total_tp[index], total_fp[index]])
            ut.list_to_txt(rows=total_rows,
                           csv_name="mp_2022_06_02.txt",
                           is_append=True)
            print("********* END I = {} Duration = {}*************".format(
                height, duration))
Esempio n. 13
0
            pathFile = "{}{}.txt".format(path_to_lc_file, fileName)
            filesize = os.path.getsize(pathFile)
            if filesize != 0:
                result_found = 0
                file = open(pathFile, 'rb')
                FdataMDF = [float(item) for item in ut.txt_to_list(pathFile)]
                path_lc_timestamp_path = "{}{}{}.txt".format(
                    dataset_path, lc_timestamp_path, fileName)
                x_axis = [
                    float(item)
                    for item in ut.txt_to_list(path_lc_timestamp_path)
                ]
                window_size = int(
                    (len(FdataMDF) / initialBin) * theshold_compression)
                # window_size = 20
                FdataMDF_dy = ut_gen.genListDyBin(instances=FdataMDF,
                                                  timestamp=x_axis,
                                                  windowSize=window_size,
                                                  isOnline=False)

                rows = []
                for bin in FdataMDF_dy["window"]:
                    rows.append(bin.get_representation())
                ut.list_to_txt(rows=rows,
                               csv_name="{}{}.txt".format(savepath, fileName))
            print("END : File .... {}".format(fileName))
        except:
            row = [fileName]
            ut.add_to_txt(row, csv_name="dy_export.txt", is_append=True)
            print("************* Error : File .... {}".format(fileName))