Ejemplo n.º 1
0
    # print(grubbs.max_test_outliers(data, alpha=0.05))
    # result = grubbs.max_test_outliers(data, alpha=0.05)
    for window_size in window_sizes:
        for I in IS:
            for L in LS:
                print("start L {}, I {} ,window_size {}".format(
                    L, I, window_size))
                files_list = ut_lc.getListLight(height=L, duration=I)
                rows = []
                for index, file_name in enumerate(files_list):
                    print("file : {}".format(file_name))
                    rawInstances, transList = ut_service.getDataLC_test(
                        file_name, L=L, I=I)
                    timestamp = [*range(0, len(rawInstances), 1)]
                    FdataMDF_dy = ut_gen.genListDyBin(instances=rawInstances,
                                                      timestamp=timestamp,
                                                      windowSize=window_size,
                                                      initialBin=3)
                    # FdataMDF_Fix = ut_gen.genFixBin(binSize=binSize, instances=rawInstances,isExtract= False)
                    gb_core = gb_test()
                    for instance in FdataMDF_dy["window"]:
                        gb_core.add_element(instance.get_representation())
                    for alpha in alphas:
                        # countFile = 0
                        # foundList = gb_core.get_max_outliers(alpha=alpha)
                        # if foundList:
                        #     countFile = 1
                        # rows.append([file_name,L,I,alpha,countFile,foundList])

                        indexBin = gb_core.get_max_index(alpha=alpha)
                        detect_tran = 0
                        if indexBin:
Ejemplo n.º 2
0
windows_size = [10, 20]

if __name__ == '__main__':

    list_files = ut_db.getAnswerMDF_toDB()
    for index, row in enumerate(list_files):
        file_name = row['file_name']
        FdataMDF, FtimestampMDF = ut_service.getMDFData(file_name)
        UTC = ut_astro.convertList_MJDtoUTC(FtimestampMDF)
        raw = {"timestamp": FtimestampMDF, 'instances': FdataMDF, 'UTC': UTC}
        rows_data = []
        for ini_bin in inis_bin:
            for window_size in windows_size:
                FdataMDF_dy = ut_gen.genListDyBin(instances=FdataMDF,
                                                  timestamp=FtimestampMDF,
                                                  windowSize=window_size,
                                                  isOnline=False,
                                                  initialBin=ini_bin)
                rows_data.append({
                    "timestamp":
                    FtimestampMDF,
                    'instances':
                    FdataMDF_dy['sketchInstances'],
                    'UTC':
                    UTC,
                    'title_name':
                    'win {} , ini {}'.format(window_size, ini_bin)
                })

        ut_bokeh.export_HTML_mutilples(fileName="{}.html".format(file_name),
                                       raw=raw,
Ejemplo n.º 3
0
    plt.ylim((min_var, max_var))
    plt.legend(
        [a.collections[0], b1],
        [
            "learned frontier", "training observations"
            # ,
            # "new regular observations",
            # "new abnormal observations",
        ],
        loc="upper left",
        prop=matplotlib.font_manager.FontProperties(size=11),
    )
    # plt.xlabel(
    #     "errors novel regular: %d/40 ; errors novel abnormal: %d/40"
    #     % (n_error_test, n_error_outliers)
    # )
    plt.show()


if __name__ == '__main__':
    fileName = '41242369943869440_0'
    height = '3'
    duration = '200'
    lightData = ut_light.getDataFromFile(fileName=fileName,
                                         height=height,
                                         duration=duration)
    dyResult = ut_data.genListDyBin(instances=lightData["instances"],
                                    timestamp=lightData["timestamp"],
                                    windowSize=20)
    computeLocalOutlierFactor(dyResult)
Ejemplo n.º 4
0
ini_bin = 5
# window_size = 10

# sql_file = 'getFileForSim.sql'
sql_file = 'getFileForSim2.sql'
distance = 2000

if __name__ == '__main__':

    list_files = ut_db.getFileListFromSQL(sqlFileName=sql_file)
    for index, main_file in enumerate(list_files):
        FdataMain, FtimestampMain = ut_service.getMDFData(main_file)
        windowMain = int(len(FdataMain) / 50)
        mainDy = ut_gen.genListDyBin(instances=FdataMain,
                                     windowSize=windowMain,
                                     timestamp=FtimestampMain,
                                     initialBin=ini_bin)
        near_files = ut_db.getNearStars(fileName=main_file,
                                        maxDistance=distance)
        rows_data = []
        for near_file in near_files:
            try:
                FdataMDF, FtimestampMDF = ut_service.getMDFData(
                    near_file["file_target"])
                windowTarget = int(len(FdataMDF) / 50)
                targetDy = ut_gen.genListDyBin(instances=FdataMDF,
                                               timestamp=FtimestampMDF,
                                               windowSize=windowTarget,
                                               isOnline=False,
                                               initialBin=ini_bin)
                f_test, p_value = ut_cal.Ftest_cal(
Ejemplo n.º 5
0
from numpy import histogram

if __name__ == '__main__':
    windowSize = 40
    upper_bound = 1.87519737
    lower_bound = 0.53327720

    listFile = ut.txt_to_list(csv_name="start_file.csv")

    # for windowSize in listWindow:
    for index, fileName1 in enumerate(listFile):
        print("File Name {}".format(fileName1))
        pattern = fileName1.split("_")[4]
        lightData1 = ut_mdf.getDataFromFile(fileName=fileName1)
        dyResult1 = ut_data.genListDyBin(instances=lightData1["instances"],
                                         timestamp=lightData1["timestamp"],
                                         windowSize=windowSize)
        targetList = ut_mdf.getListMDF(pattern=pattern)
        for fileName2 in targetList:
            try:
                lightData2 = ut_mdf.getDataFromFile(fileName=fileName2)
                isOverlap = ut_mdf.isOverlapTimestamp(lightData1["timestamp"],
                                                      lightData2["timestamp"])
                if isOverlap:
                    dyResult2 = ut_data.genListDyBin(
                        instances=lightData2["instances"],
                        timestamp=lightData2["timestamp"],
                        windowSize=windowSize)
                    # computeLocalOutlierFactor(dyResult1,windowSize=windowSize)
                    F_test = dyResult1["variance"] / dyResult2["variance"]
                    if lower_bound <= F_test <= upper_bound:
Ejemplo n.º 6
0
isOnline = False
index_list = [*range(0, window_size - 1)]

if __name__ == '__main__':
    for height in heights:
        for duration in durations:
            listFiles = ut_lc.getListLight(height=height, duration=duration)
            folder_name = "I{}_L{}".format(height, duration)
            ut.checkFolderandCreate(folder_name)
            for lc_file in listFiles:
                data = ut_lc.getDataFromFile(fileName=lc_file,
                                             height=height,
                                             duration=duration)
                dy_data = ut_gen.genListDyBin(instances=data['instances'],
                                              timestamp=data['timestamp'],
                                              windowSize=window_size,
                                              initialBin=initial_Bin,
                                              isOnline=False)
                results = []
                prior_index = 0
                for index_bin in index_list:
                    prior_bin = dy_data['window'][index_bin]
                    cur_bin = dy_data['window'][index_bin + 1]
                    prior_n = prior_bin.get_number_instance()
                    cur_n = cur_bin.get_number_instance()
                    priorMean = prior_bin.get_representation()
                    curMean = cur_bin.get_representation()
                    priorSDError = prior_bin.get_SDError()
                    curSDError = cur_bin.get_SDError()
                    K_star = ut_de.detection_SKmethod(
                        curMean=curMean,
Ejemplo n.º 7
0
            filesize = os.path.getsize(pathFile)
            if filesize != 0:
                result_found = 0
                file = open(pathFile, 'rb')
                FdataMDF = [float(item) for item in ut.txt_to_list(pathFile)]
                path_lc_timestamp_path = "{}{}{}.txt".format(
                    dataset_path, lc_timestamp_path, fileName)
                x_axis = [
                    float(item)
                    for item in ut.txt_to_list(path_lc_timestamp_path)
                ]
                # window_size = int((len(FdataMDF) / initialBin) * theshold_compression)
                window_size = 10
                FdataMDF_dy = ut_gen.genListDyBin(instances=FdataMDF,
                                                  timestamp=x_axis,
                                                  windowSize=window_size,
                                                  initialBin=3,
                                                  isOnline=False)
                gb_core = gb_test()
                for instance in FdataMDF_dy["window"]:
                    gb_core.add_element(instance.get_representation())

                foundList = gb_core.get_max_outliers(alpha=alpha)
                if foundList:
                    result_found = 1
                row = [fileName, alpha, window_size, result_found]
            ut.add_to_txt(row,
                          csv_name="dy_MDF window{}.txt".format(window_size),
                          is_append=True)
            print("END : File .... {}".format(fileName))
        except: