Ejemplo n.º 1
0
def getDataFromFile(fileName, r=7):
    instances = ut.txt_to_list("{}{}.txt".format(get_lc_path(r), fileName),
                               isString=False)
    timestamp = ut.txt_to_list("{}{}.txt".format(get_timestamp_path(r),
                                                 fileName),
                               isString=False)

    return {
        "fileName": fileName,
        "timestamp": timestamp,
        "instances": instances
    }
Ejemplo n.º 2
0
def getMDFData(fileName):

    path_to_lc_data_file = "{}{}{}.txt".format(MDFdataset_path, MDFdata_path,
                                               fileName)
    path_to_lc_timestamp_file = "{}{}{}.txt".format(MDFdataset_path,
                                                    MDFtime_path, fileName)
    FdataMDF = [float(item) for item in ut.txt_to_list(path_to_lc_data_file)]
    FtimestampMDF = [
        float(item) for item in ut.txt_to_list(path_to_lc_timestamp_file)
    ]

    return FdataMDF, FtimestampMDF
Ejemplo n.º 3
0
def getMDFdata(fileName, r=7):
    pathFile = "{}{}.txt".format(get_lc_path(r), fileName)
    filesize = os.path.getsize(pathFile)
    dictResult = {}
    if filesize != 0:
        dictResult["data"] = [float(item) for item in ut.txt_to_list(pathFile)]
        path_lc_timestamp_path = "{}{}{}.txt".format(dataset_path,
                                                     lc_timestamp_path,
                                                     fileName)
        dictResult["timestamp"] = [
            float(item) for item in ut.txt_to_list(path_lc_timestamp_path)
        ]
        dictResult["fileName"] = fileName
    return dictResult
Ejemplo n.º 4
0
def getDydata(fileName):
    pathFile = "{}{}.txt".format(path_to_dy_file, fileName)
    filesize = os.path.getsize(pathFile)
    dictResult = {}
    if filesize != 0:
        dictResult["data"] = [float(item) for item in ut.txt_to_list(pathFile)]
        dictResult["fileName"] = fileName
    return dictResult
Ejemplo n.º 5
0
def getListFile(start=None):
    listFiles = ut.txt_to_list(listFilePathCSV)
    cutIndex = 0
    if start:
        for index, file in enumerate(listFiles):
            if file == start:
                cutIndex = index + 1
    return listFiles[cutIndex:]
Ejemplo n.º 6
0
def getDataLC_test(fileName, L=3, I=500):
    lc_path_test = "sq_L{}_I{}\\test\\".format(L, I)
    lc_path_answer = "sq_L{}_I{}\\answer\\".format(L, I)

    path_to_lc_data_file = "{}{}{}".format(LCdataset_path, lc_path_test,
                                           fileName)
    path_to_lc_ans_file = "{}{}{}".format(LCdataset_path, lc_path_answer,
                                          fileName)

    rawData = [float(item) for item in ut.txt_to_list(path_to_lc_data_file)]
    startTranList = [
        float(item) for item in ut.txt_to_list(path_to_lc_ans_file)
    ]
    transList = []
    for i, startTran in enumerate(startTranList):
        endTran = startTran + I - 1
        transList.append([startTran, endTran])
    return rawData, transList
Ejemplo n.º 7
0
def getDataFromFile(fileName, height, duration):
    folderName = "sq_L{}_I{}\\".format(height, duration)
    path_to_lc_file = "{}{}".format(MDFdataset_path, folderName)
    instances = ut.txt_to_list("{}test\\{}".format(path_to_lc_file, fileName),
                               isString=False)
    timestamp = [*range(0, len(instances))]
    ansIndexs = ut.txt_to_list("{}answer\\{}".format(path_to_lc_file,
                                                     fileName))
    ansList = []
    for ans in ansIndexs:
        startIndex = int(ans)
        endIndex = int(ans) + int(duration)
        ansList = ansList + [*range(startIndex, endIndex)]
    return {
        "fileName": fileName,
        "height": height,
        "duration": duration,
        "timestamp": timestamp,
        "instances": instances,
        "ansList": ansList
    }
Ejemplo n.º 8
0
def getResult(file_name, height, duration):
    path_result = "I{}_L{}\\{}.txt".format(height, duration, file_name)
    temp = ut.txt_to_list(path_result)
    rows = []
    for row in temp:
        row = row.replace(" ", "").replace("[", "").replace("]", "").split(",")
        rows.append({
            'kstar': float(row[0]),
            'prior_size': int(row[1]),
            'cur_size': int(row[2]),
            'prior_index': int(row[3]),
            'cur_index': int(row[4])
        })
    return rows
Ejemplo n.º 9
0
import os

from webService.backend.coreCompressionRatio import compressionRatioService as com_service
from utility import utility as ut

dataset_path = 'D:\\mdwarf_data\\'
lc_path = "lc_flux_catalog_aperture_r7_txt\\"
lc_timestamp_path = "lc_timestamp_txt\\"
path_to_lc_file = "{}{}".format(dataset_path, lc_path)

# listFile = ["light_curve_Gaia-DR2_608215408323505280_date20200201"]

listFile = ["light_curve_Gaia-DR2_3398180156118506240_date20191224"]

if __name__ == '__main__':
    for indexFile, fileName in enumerate(listFile):
        pathFile = "{}{}.txt".format(path_to_lc_file, fileName)
        filesize = os.path.getsize(pathFile)
        if filesize != 0:
            file = open(pathFile, 'rb')
            FdataMDF = [float(item) for item in ut.txt_to_list(pathFile)]
            path_lc_timestamp_path = "{}{}{}.txt".format(
                dataset_path, lc_timestamp_path, fileName)

            core = com_service(inputList=FdataMDF)
            dictResult = core.TWINcurveBinSize(maxBinSize=1439,
                                               minBinSize=1430)
            print("var : {}".format(dictResult["varList"]))
            print("com : {}".format(dictResult["comList"]))
            print("bin : {}".format(dictResult["binSizeList"]))
Ejemplo n.º 10
0
from pyclustering.cluster import cluster_visualizer
from pyclustering.cluster.xmeans import xmeans
from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
from pyclustering.utils import read_sample
from pyclustering.samples.definitions import SIMPLE_SAMPLES
import random
import utility.utility as ut

# Read sample 'simple3' from file.
# sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3)
path = "D:\\mdwarf_data\\lc_flux_catalog_aperture_r7_txt\\"
fileName = "light_curve_Gaia-DR2_603299423116967424_date20200130.txt"
tempDate = ut.txt_to_list(csv_name="{}{}".format(path, fileName))
sample = []
for i in tempDate:
    # sample.append([random.randrange(1, 50, 1),0])
    sample.append([float(i), 0])
# Prepare initial centers - amount of initial centers defines amount of clusters from which X-Means will
# start analysis.
amount_initial_centers = 5
initial_centers = kmeans_plusplus_initializer(
    sample, amount_initial_centers).initialize()
# Create instance of X-Means algorithm. The algorithm will start analysis from 2 clusters, the maximum
# number of clusters that can be allocated is 20.
xmeans_instance = xmeans(sample, initial_centers, 10)
xmeans_instance.process()
# Extract clustering results: clusters and their centers
clusters = xmeans_instance.get_clusters()
centers = xmeans_instance.get_centers()
# Visualize clustering results
visualizer = cluster_visualizer()
Ejemplo n.º 11
0
data = {
    'inputWindowSize': 5,
    'inputInitialBin': 3,
    'threshold_tTest': 0,
    'threshold_fTest': 0
}


def getListTop():
    fileName = "{}top418.csv".format(dataset_path)
    listFile = ut.txt_to_list(fileName)
    return listFile


if __name__ == '__main__':
    listFile = ut.txt_to_list(csv_name="file_list_normal_full.csv")
    # listFile = ut_mdf.getListMDF()
    for indexFile, fileName in enumerate(listFile):
        pattern = fileName.split("_")[4]

        pathPngOutput = "{}{}{}\\".format(dataset_path, png_path, pattern)
        # listFile = ["light_curve_Gaia-DR2_608215408323505280_date20200201"]
        ut.checkFolderandCreate(pathPngOutput)
        if ut.isEmpty(pathPngOutput):
            print(fileName)
            isFoundPng = ut.isFileNameInFolder(
                path=pathPngOutput, fileName="{}.png".format(fileName))
            if not (isFoundPng):
                pathFile = "{}{}.txt".format(path_to_lc_file, fileName)
                filesize = os.path.getsize(pathFile)
                if filesize != 0:
Ejemplo n.º 12
0
data = {'inputWindowSize': 5, 'inputInitialBin': 3,'threshold_tTest':0,'threshold_fTest':0}


def getListTop():
    fileName = "{}top418.csv".format(dataset_path)
    listFile = ut.txt_to_list(fileName)
    return listFile

if __name__ == '__main__':
    # listFile = getListTop()
    # temp = 'light_curve_Gaia-DR2_652961717644929664_date20200201'

    # listFile = ut_web.getListMDF(pattern = pattern)
    csvFile = ut.getListAllFileName(path="{}csv\\".format(dataset_path))
    for pattern in csvFile:
        listFile = ut.txt_to_list(csv_name="{}csv\\{}.csv".format(dataset_path,pattern))
        pathPngOutput = "{}{}\\{}\\".format(dataset_path, png_path, pattern)
        # listFile = ["light_curve_Gaia-DR2_608215408323505280_date20200201"]
        ut.checkFolderandCreate(pathPngOutput)
        if ut.isEmpty(pathPngOutput):
            for indexFile, fileName in enumerate(listFile):
                print(fileName)
                isFoundHtml = ut.isFileNameInFolder(path=pathHtmlOutput, fileName="{}.html".format(fileName))
                isFoundPng = ut.isFileNameInFolder(path=pathPngOutput, fileName="{}.png".format(fileName))
                if not (isFoundHtml):
                    pathFile = "{}{}.txt".format(path_to_lc_file, fileName)
                    filesize = os.path.getsize(pathFile)
                    if filesize != 0:
                        file = open(pathFile, 'rb')
                        FdataMDF = [float(item) for item in ut.txt_to_list(pathFile)]
                        path_lc_timestamp_path = "{}{}{}.txt".format(dataset_path, lc_timestamp_path, fileName)
Ejemplo n.º 13
0
def getListTop():
    fileName = "{}top418.csv".format(dataset_path)
    listFile = ut.txt_to_list(fileName)
    return listFile
Ejemplo n.º 14
0
# from webService.backend.coreSketchDyBinService import sketchDyBinService
from webService.backend.coreSketchFixBinService import sketchFixService
from csv import DictWriter
import csv
from scipy.interpolate import interp1d
import numpy as np
import matplotlib.pyplot as plt

thresholds = [0.2, 0.5, 0.7] + [*range(1, 50)]

if __name__ == '__main__':
    total_true = 432 * (60 + 100 + 200 + 500) * 2
    total_false = 432 * (4200 + 4160 + 4060 + 3760) * 2
    total_tp = [0] * len(thresholds)
    total_fp = [0] * len(thresholds)
    results = ut.txt_to_list(csv_name="result_rrcf_2022_06_02.txt")
    for result in results:
        result = result.replace("[", "").replace("]", "")
        result_list = result.split(",")
        index = thresholds.index(float(result_list[2]))
        total_tp[index] = total_tp[index] + int(result_list[3])
        total_fp[index] = total_fp[index] + int(result_list[4])

    result_tp = []
    result_fp = []
    result_precision = []
    print(total_tp)
    for tp, fp in zip(total_tp, total_fp):
        print("tp: {}, total:{}".format(tp, total_true))
        tpr = float(tp) / float(total_true)
        fpr = float(fp) / float(total_false)
Ejemplo n.º 15
0
def getRRCFresult(fileName, duration, height):
    full_path = rrcf_resultPath + "I{}_L{}\\{}.txt".format(
        height, duration, fileName)
    rrcf_resultList = ut.txt_to_list(csv_name=full_path, isString=False)
    return rrcf_resultList
Ejemplo n.º 16
0
def getResult(file_name, height, duration):
    path_result = "I{}_L{}\\{}.txt".format(height, duration, file_name)
    return ut.txt_to_list(path_result)
Ejemplo n.º 17
0
# save file
main_save_path = '{}\\mdwarf_data_common\\'.format(dataset_path)
# LC

if __name__ == '__main__':
    listFile = ut_mdf.getListMDF()
    for indexFile, fileName in enumerate(listFile):
        print(fileName)
        LCFile = "{}{}.txt".format(path_to_lc_file, fileName)
        filesize = os.path.getsize(LCFile)
        if filesize != 0:
            fileDate = fileName.split("_date")[1]

            # Save LC to Common LC
            file = open(LCFile, 'rb')
            FdataMDF = [float(item) for item in ut.txt_to_list(LCFile)]
            save_lc = "{}{}\\{}\\".format(main_save_path,
                                          "lc_flux_catalog_aperture_r7",
                                          fileDate)
            ut.checkFolderandCreate(save_lc)
            save_lc_fileName = "{}{}.txt".format(save_lc, fileName)

            if not (ut.isFileNameInFolder(save_lc, "{}.txt".format(fileName))):
                ut.list_to_txt(rows=FdataMDF,
                               csv_name=save_lc_fileName,
                               is_sort=False)
                # Save LC to Common MJD
                path_lc_timestamp_path = "{}{}{}.txt".format(
                    dataset_path, lc_timestamp_path, fileName)
                mjd_list = [
                    float(item)
Ejemplo n.º 18
0
import utility.utility as ut
import utility.utility_bokeh as ut_bokeh

dataset_path = 'D:\\mdwarf_data\\'
png_path = "final_filter\\"
width = 1000
height = 300
s = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=s)
driver.set_window_size(width, height)
sizing_mode = "fixed"

if __name__ == '__main__':
    windowSize = 40

    listFile = ut.txt_to_list(csv_name="f_test.result.csv")
    for row in listFile:
        row_data = row.split(",")
        pattern = row_data[0].split("_")[4]
        pathPngOutput = "{}{}{}\\".format(dataset_path, png_path, pattern)
        ut.checkFolderandCreate(pathPngOutput)
        lightData1 = ut_mdf.getDataFromFile(fileName=row_data[0])
        lightData2 = ut_mdf.getDataFromFile(fileName=row_data[1])

        plots = ut_bokeh.exportPlot(x_axis=lightData1["timestamp"],
                                    y_axis=lightData1["instances"],
                                    fileName=lightData1["fileName"],
                                    addCircle=True,
                                    sizing_mode=sizing_mode)

        plots = ut_bokeh.exportSubplotPng(x_axis1=lightData1["timestamp"],
Ejemplo n.º 19
0
#             result = row[4].replace(" ","")
#             df_list.append([bin_size,lc_file,L,I,alpha,result])
#     df = pd.DataFrame(df_list,
#                       columns=['bin_size', 'lc_file', 'height', 'duration','alpha','result']
#                       )
#
#     # a = df.groupby(['bin_size', 'lc_file', 'height', 'duration','alpha']).sum()
#     df.to_csv ('bin.csv', index = False, header=True)

window_size_list = [10, 15, 20, 30, 50, 100]

if __name__ == '__main__':
    df_list = []
    for window_size in window_size_list:
        file_name = 'dy_win{}.txt'.format(window_size)
        rows = ut.txt_to_list(file_name)
        for row_st in rows:
            row = row_st[1:-1].split(",")
            lc_file = row[0].replace("'", "")
            L = row[1].replace(" ", "")
            I = row[2].replace(" ", "")
            alpha = row[3].replace(" ", "")
            result = row[4].replace(" ", "")
            df_list.append([window_size, lc_file, L, I, alpha, result])
    df = pd.DataFrame(df_list,
                      columns=[
                          'window_size', 'lc_file', 'height', 'duration',
                          'alpha', 'result'
                      ])

    # a = df.groupby(['bin_size', 'lc_file', 'height', 'duration','alpha']).sum()
Ejemplo n.º 20
0
                         window='20')
    plt.plot([0] + result["fp"], [0] + result["tp"],
             color="blue",
             lw=lw,
             label='Dynamic binning size with uninteresting region')

    ################ export MP
    Ks = [
        3, 5, 7, 10, 15, 20, 25, 30, 40, 50, 60, 100, 300, 500, 800, 1000,
        1500, 2000, 2500, 3000, 4000
    ]
    total_true = 432 * (60 + 100 + 200 + 500) * 2
    total_false = 432 * (4200 + 4160 + 4060 + 3760) * 2
    total_tp = [0] * len(Ks)
    total_fp = [0] * len(Ks)
    results = ut.txt_to_list(csv_name="result_mp_2022_05_31.txt")
    for result in results:
        result = result.replace("[", "").replace("]", "")
        result_list = result.split(",")
        index = Ks.index(int(result_list[2]))
        total_tp[index] = total_tp[index] + int(result_list[3])
        total_fp[index] = total_fp[index] + int(result_list[4])

    result_tp = []
    result_fp = []
    result_precision = []
    print(total_tp)
    for tp, fp in zip(total_tp, total_fp):
        print("tp: {}, total:{}".format(tp, total_true))
        tpr = float(tp) / float(total_true)
        fpr = float(fp) / float(total_false)
Ejemplo n.º 21
0
                                             duration=duration,
                                             isFullPath=True)
            listAns = ut_light.getListLight(height=height,
                                            duration=duration,
                                            isFullPath=True,
                                            folderType="answer")
            output3D_htmlFile = "{}3D_html\\".format(
                ut_light.getFullPath(height=height, duration=duration))
            output3D_JPGFile = "{}3D_JPG\\".format(
                ut_light.getFullPath(height=height, duration=duration))
            outputdyFile = "{}dy_html\\".format(
                ut_light.getFullPath(height=height, duration=duration))

            for mainFile, ansFile in zip(listFile, listAns):
                file_name = os.path.basename(mainFile)
                mainRaw = [float(item) for item in ut.txt_to_list(mainFile)]
                ansRaw = [float(item) for item in ut.txt_to_list(ansFile)]
                ansList = []
                tranList = []
                for ans in ansRaw:
                    ansList = ansList + ut_light.genListAns(start=ans,
                                                            duration=duration)
                    tranList = {"startTran": ans, "endTran": ans + duration}
                corePlot = sketchDyBinService(windowSize=windowSize,
                                              initialBin=initialBin,
                                              isOnline=False)
                sketchInstances = corePlot.sketchMode(instances=mainRaw)
                window = corePlot.getWindow()
                cluster = ut_cluster.cluster_xMean_Dy(binsList=window,
                                                      kmax=windowSize,
                                                      hist_bin=20)
Ejemplo n.º 22
0
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.neighbors import LocalOutlierFactor
import utility.utility_mdf as ut_mdf
import utility.utility_genData as ut_data
import utility.utility as ut
from numpy import histogram

if __name__ == '__main__':
    windowSize = 40
    upper_bound = 1.87519737
    lower_bound = 0.53327720

    listFile = ut.txt_to_list(csv_name="start_file.csv")

    # for windowSize in listWindow:
    for index, fileName1 in enumerate(listFile):
        print("File Name {}".format(fileName1))
        pattern = fileName1.split("_")[4]
        lightData1 = ut_mdf.getDataFromFile(fileName=fileName1)
        dyResult1 = ut_data.genListDyBin(instances=lightData1["instances"],
                                         timestamp=lightData1["timestamp"],
                                         windowSize=windowSize)
        targetList = ut_mdf.getListMDF(pattern=pattern)
        for fileName2 in targetList:
            try:
                lightData2 = ut_mdf.getDataFromFile(fileName=fileName2)
                isOverlap = ut_mdf.isOverlapTimestamp(lightData1["timestamp"],
                                                      lightData2["timestamp"])
                if isOverlap: