def getDataFromFile(fileName, r=7): instances = ut.txt_to_list("{}{}.txt".format(get_lc_path(r), fileName), isString=False) timestamp = ut.txt_to_list("{}{}.txt".format(get_timestamp_path(r), fileName), isString=False) return { "fileName": fileName, "timestamp": timestamp, "instances": instances }
def getMDFData(fileName): path_to_lc_data_file = "{}{}{}.txt".format(MDFdataset_path, MDFdata_path, fileName) path_to_lc_timestamp_file = "{}{}{}.txt".format(MDFdataset_path, MDFtime_path, fileName) FdataMDF = [float(item) for item in ut.txt_to_list(path_to_lc_data_file)] FtimestampMDF = [ float(item) for item in ut.txt_to_list(path_to_lc_timestamp_file) ] return FdataMDF, FtimestampMDF
def getMDFdata(fileName, r=7): pathFile = "{}{}.txt".format(get_lc_path(r), fileName) filesize = os.path.getsize(pathFile) dictResult = {} if filesize != 0: dictResult["data"] = [float(item) for item in ut.txt_to_list(pathFile)] path_lc_timestamp_path = "{}{}{}.txt".format(dataset_path, lc_timestamp_path, fileName) dictResult["timestamp"] = [ float(item) for item in ut.txt_to_list(path_lc_timestamp_path) ] dictResult["fileName"] = fileName return dictResult
def getDydata(fileName): pathFile = "{}{}.txt".format(path_to_dy_file, fileName) filesize = os.path.getsize(pathFile) dictResult = {} if filesize != 0: dictResult["data"] = [float(item) for item in ut.txt_to_list(pathFile)] dictResult["fileName"] = fileName return dictResult
def getListFile(start=None): listFiles = ut.txt_to_list(listFilePathCSV) cutIndex = 0 if start: for index, file in enumerate(listFiles): if file == start: cutIndex = index + 1 return listFiles[cutIndex:]
def getDataLC_test(fileName, L=3, I=500): lc_path_test = "sq_L{}_I{}\\test\\".format(L, I) lc_path_answer = "sq_L{}_I{}\\answer\\".format(L, I) path_to_lc_data_file = "{}{}{}".format(LCdataset_path, lc_path_test, fileName) path_to_lc_ans_file = "{}{}{}".format(LCdataset_path, lc_path_answer, fileName) rawData = [float(item) for item in ut.txt_to_list(path_to_lc_data_file)] startTranList = [ float(item) for item in ut.txt_to_list(path_to_lc_ans_file) ] transList = [] for i, startTran in enumerate(startTranList): endTran = startTran + I - 1 transList.append([startTran, endTran]) return rawData, transList
def getDataFromFile(fileName, height, duration): folderName = "sq_L{}_I{}\\".format(height, duration) path_to_lc_file = "{}{}".format(MDFdataset_path, folderName) instances = ut.txt_to_list("{}test\\{}".format(path_to_lc_file, fileName), isString=False) timestamp = [*range(0, len(instances))] ansIndexs = ut.txt_to_list("{}answer\\{}".format(path_to_lc_file, fileName)) ansList = [] for ans in ansIndexs: startIndex = int(ans) endIndex = int(ans) + int(duration) ansList = ansList + [*range(startIndex, endIndex)] return { "fileName": fileName, "height": height, "duration": duration, "timestamp": timestamp, "instances": instances, "ansList": ansList }
def getResult(file_name, height, duration): path_result = "I{}_L{}\\{}.txt".format(height, duration, file_name) temp = ut.txt_to_list(path_result) rows = [] for row in temp: row = row.replace(" ", "").replace("[", "").replace("]", "").split(",") rows.append({ 'kstar': float(row[0]), 'prior_size': int(row[1]), 'cur_size': int(row[2]), 'prior_index': int(row[3]), 'cur_index': int(row[4]) }) return rows
import os from webService.backend.coreCompressionRatio import compressionRatioService as com_service from utility import utility as ut dataset_path = 'D:\\mdwarf_data\\' lc_path = "lc_flux_catalog_aperture_r7_txt\\" lc_timestamp_path = "lc_timestamp_txt\\" path_to_lc_file = "{}{}".format(dataset_path, lc_path) # listFile = ["light_curve_Gaia-DR2_608215408323505280_date20200201"] listFile = ["light_curve_Gaia-DR2_3398180156118506240_date20191224"] if __name__ == '__main__': for indexFile, fileName in enumerate(listFile): pathFile = "{}{}.txt".format(path_to_lc_file, fileName) filesize = os.path.getsize(pathFile) if filesize != 0: file = open(pathFile, 'rb') FdataMDF = [float(item) for item in ut.txt_to_list(pathFile)] path_lc_timestamp_path = "{}{}{}.txt".format( dataset_path, lc_timestamp_path, fileName) core = com_service(inputList=FdataMDF) dictResult = core.TWINcurveBinSize(maxBinSize=1439, minBinSize=1430) print("var : {}".format(dictResult["varList"])) print("com : {}".format(dictResult["comList"])) print("bin : {}".format(dictResult["binSizeList"]))
from pyclustering.cluster import cluster_visualizer from pyclustering.cluster.xmeans import xmeans from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer from pyclustering.utils import read_sample from pyclustering.samples.definitions import SIMPLE_SAMPLES import random import utility.utility as ut # Read sample 'simple3' from file. # sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3) path = "D:\\mdwarf_data\\lc_flux_catalog_aperture_r7_txt\\" fileName = "light_curve_Gaia-DR2_603299423116967424_date20200130.txt" tempDate = ut.txt_to_list(csv_name="{}{}".format(path, fileName)) sample = [] for i in tempDate: # sample.append([random.randrange(1, 50, 1),0]) sample.append([float(i), 0]) # Prepare initial centers - amount of initial centers defines amount of clusters from which X-Means will # start analysis. amount_initial_centers = 5 initial_centers = kmeans_plusplus_initializer( sample, amount_initial_centers).initialize() # Create instance of X-Means algorithm. The algorithm will start analysis from 2 clusters, the maximum # number of clusters that can be allocated is 20. xmeans_instance = xmeans(sample, initial_centers, 10) xmeans_instance.process() # Extract clustering results: clusters and their centers clusters = xmeans_instance.get_clusters() centers = xmeans_instance.get_centers() # Visualize clustering results visualizer = cluster_visualizer()
data = { 'inputWindowSize': 5, 'inputInitialBin': 3, 'threshold_tTest': 0, 'threshold_fTest': 0 } def getListTop(): fileName = "{}top418.csv".format(dataset_path) listFile = ut.txt_to_list(fileName) return listFile if __name__ == '__main__': listFile = ut.txt_to_list(csv_name="file_list_normal_full.csv") # listFile = ut_mdf.getListMDF() for indexFile, fileName in enumerate(listFile): pattern = fileName.split("_")[4] pathPngOutput = "{}{}{}\\".format(dataset_path, png_path, pattern) # listFile = ["light_curve_Gaia-DR2_608215408323505280_date20200201"] ut.checkFolderandCreate(pathPngOutput) if ut.isEmpty(pathPngOutput): print(fileName) isFoundPng = ut.isFileNameInFolder( path=pathPngOutput, fileName="{}.png".format(fileName)) if not (isFoundPng): pathFile = "{}{}.txt".format(path_to_lc_file, fileName) filesize = os.path.getsize(pathFile) if filesize != 0:
data = {'inputWindowSize': 5, 'inputInitialBin': 3,'threshold_tTest':0,'threshold_fTest':0} def getListTop(): fileName = "{}top418.csv".format(dataset_path) listFile = ut.txt_to_list(fileName) return listFile if __name__ == '__main__': # listFile = getListTop() # temp = 'light_curve_Gaia-DR2_652961717644929664_date20200201' # listFile = ut_web.getListMDF(pattern = pattern) csvFile = ut.getListAllFileName(path="{}csv\\".format(dataset_path)) for pattern in csvFile: listFile = ut.txt_to_list(csv_name="{}csv\\{}.csv".format(dataset_path,pattern)) pathPngOutput = "{}{}\\{}\\".format(dataset_path, png_path, pattern) # listFile = ["light_curve_Gaia-DR2_608215408323505280_date20200201"] ut.checkFolderandCreate(pathPngOutput) if ut.isEmpty(pathPngOutput): for indexFile, fileName in enumerate(listFile): print(fileName) isFoundHtml = ut.isFileNameInFolder(path=pathHtmlOutput, fileName="{}.html".format(fileName)) isFoundPng = ut.isFileNameInFolder(path=pathPngOutput, fileName="{}.png".format(fileName)) if not (isFoundHtml): pathFile = "{}{}.txt".format(path_to_lc_file, fileName) filesize = os.path.getsize(pathFile) if filesize != 0: file = open(pathFile, 'rb') FdataMDF = [float(item) for item in ut.txt_to_list(pathFile)] path_lc_timestamp_path = "{}{}{}.txt".format(dataset_path, lc_timestamp_path, fileName)
def getListTop(): fileName = "{}top418.csv".format(dataset_path) listFile = ut.txt_to_list(fileName) return listFile
# from webService.backend.coreSketchDyBinService import sketchDyBinService from webService.backend.coreSketchFixBinService import sketchFixService from csv import DictWriter import csv from scipy.interpolate import interp1d import numpy as np import matplotlib.pyplot as plt thresholds = [0.2, 0.5, 0.7] + [*range(1, 50)] if __name__ == '__main__': total_true = 432 * (60 + 100 + 200 + 500) * 2 total_false = 432 * (4200 + 4160 + 4060 + 3760) * 2 total_tp = [0] * len(thresholds) total_fp = [0] * len(thresholds) results = ut.txt_to_list(csv_name="result_rrcf_2022_06_02.txt") for result in results: result = result.replace("[", "").replace("]", "") result_list = result.split(",") index = thresholds.index(float(result_list[2])) total_tp[index] = total_tp[index] + int(result_list[3]) total_fp[index] = total_fp[index] + int(result_list[4]) result_tp = [] result_fp = [] result_precision = [] print(total_tp) for tp, fp in zip(total_tp, total_fp): print("tp: {}, total:{}".format(tp, total_true)) tpr = float(tp) / float(total_true) fpr = float(fp) / float(total_false)
def getRRCFresult(fileName, duration, height): full_path = rrcf_resultPath + "I{}_L{}\\{}.txt".format( height, duration, fileName) rrcf_resultList = ut.txt_to_list(csv_name=full_path, isString=False) return rrcf_resultList
def getResult(file_name, height, duration): path_result = "I{}_L{}\\{}.txt".format(height, duration, file_name) return ut.txt_to_list(path_result)
# save file main_save_path = '{}\\mdwarf_data_common\\'.format(dataset_path) # LC if __name__ == '__main__': listFile = ut_mdf.getListMDF() for indexFile, fileName in enumerate(listFile): print(fileName) LCFile = "{}{}.txt".format(path_to_lc_file, fileName) filesize = os.path.getsize(LCFile) if filesize != 0: fileDate = fileName.split("_date")[1] # Save LC to Common LC file = open(LCFile, 'rb') FdataMDF = [float(item) for item in ut.txt_to_list(LCFile)] save_lc = "{}{}\\{}\\".format(main_save_path, "lc_flux_catalog_aperture_r7", fileDate) ut.checkFolderandCreate(save_lc) save_lc_fileName = "{}{}.txt".format(save_lc, fileName) if not (ut.isFileNameInFolder(save_lc, "{}.txt".format(fileName))): ut.list_to_txt(rows=FdataMDF, csv_name=save_lc_fileName, is_sort=False) # Save LC to Common MJD path_lc_timestamp_path = "{}{}{}.txt".format( dataset_path, lc_timestamp_path, fileName) mjd_list = [ float(item)
import utility.utility as ut import utility.utility_bokeh as ut_bokeh dataset_path = 'D:\\mdwarf_data\\' png_path = "final_filter\\" width = 1000 height = 300 s = Service(ChromeDriverManager().install()) driver = webdriver.Chrome(service=s) driver.set_window_size(width, height) sizing_mode = "fixed" if __name__ == '__main__': windowSize = 40 listFile = ut.txt_to_list(csv_name="f_test.result.csv") for row in listFile: row_data = row.split(",") pattern = row_data[0].split("_")[4] pathPngOutput = "{}{}{}\\".format(dataset_path, png_path, pattern) ut.checkFolderandCreate(pathPngOutput) lightData1 = ut_mdf.getDataFromFile(fileName=row_data[0]) lightData2 = ut_mdf.getDataFromFile(fileName=row_data[1]) plots = ut_bokeh.exportPlot(x_axis=lightData1["timestamp"], y_axis=lightData1["instances"], fileName=lightData1["fileName"], addCircle=True, sizing_mode=sizing_mode) plots = ut_bokeh.exportSubplotPng(x_axis1=lightData1["timestamp"],
# result = row[4].replace(" ","") # df_list.append([bin_size,lc_file,L,I,alpha,result]) # df = pd.DataFrame(df_list, # columns=['bin_size', 'lc_file', 'height', 'duration','alpha','result'] # ) # # # a = df.groupby(['bin_size', 'lc_file', 'height', 'duration','alpha']).sum() # df.to_csv ('bin.csv', index = False, header=True) window_size_list = [10, 15, 20, 30, 50, 100] if __name__ == '__main__': df_list = [] for window_size in window_size_list: file_name = 'dy_win{}.txt'.format(window_size) rows = ut.txt_to_list(file_name) for row_st in rows: row = row_st[1:-1].split(",") lc_file = row[0].replace("'", "") L = row[1].replace(" ", "") I = row[2].replace(" ", "") alpha = row[3].replace(" ", "") result = row[4].replace(" ", "") df_list.append([window_size, lc_file, L, I, alpha, result]) df = pd.DataFrame(df_list, columns=[ 'window_size', 'lc_file', 'height', 'duration', 'alpha', 'result' ]) # a = df.groupby(['bin_size', 'lc_file', 'height', 'duration','alpha']).sum()
window='20') plt.plot([0] + result["fp"], [0] + result["tp"], color="blue", lw=lw, label='Dynamic binning size with uninteresting region') ################ export MP Ks = [ 3, 5, 7, 10, 15, 20, 25, 30, 40, 50, 60, 100, 300, 500, 800, 1000, 1500, 2000, 2500, 3000, 4000 ] total_true = 432 * (60 + 100 + 200 + 500) * 2 total_false = 432 * (4200 + 4160 + 4060 + 3760) * 2 total_tp = [0] * len(Ks) total_fp = [0] * len(Ks) results = ut.txt_to_list(csv_name="result_mp_2022_05_31.txt") for result in results: result = result.replace("[", "").replace("]", "") result_list = result.split(",") index = Ks.index(int(result_list[2])) total_tp[index] = total_tp[index] + int(result_list[3]) total_fp[index] = total_fp[index] + int(result_list[4]) result_tp = [] result_fp = [] result_precision = [] print(total_tp) for tp, fp in zip(total_tp, total_fp): print("tp: {}, total:{}".format(tp, total_true)) tpr = float(tp) / float(total_true) fpr = float(fp) / float(total_false)
duration=duration, isFullPath=True) listAns = ut_light.getListLight(height=height, duration=duration, isFullPath=True, folderType="answer") output3D_htmlFile = "{}3D_html\\".format( ut_light.getFullPath(height=height, duration=duration)) output3D_JPGFile = "{}3D_JPG\\".format( ut_light.getFullPath(height=height, duration=duration)) outputdyFile = "{}dy_html\\".format( ut_light.getFullPath(height=height, duration=duration)) for mainFile, ansFile in zip(listFile, listAns): file_name = os.path.basename(mainFile) mainRaw = [float(item) for item in ut.txt_to_list(mainFile)] ansRaw = [float(item) for item in ut.txt_to_list(ansFile)] ansList = [] tranList = [] for ans in ansRaw: ansList = ansList + ut_light.genListAns(start=ans, duration=duration) tranList = {"startTran": ans, "endTran": ans + duration} corePlot = sketchDyBinService(windowSize=windowSize, initialBin=initialBin, isOnline=False) sketchInstances = corePlot.sketchMode(instances=mainRaw) window = corePlot.getWindow() cluster = ut_cluster.cluster_xMean_Dy(binsList=window, kmax=windowSize, hist_bin=20)
import numpy as np import matplotlib import matplotlib.pyplot as plt from sklearn.neighbors import LocalOutlierFactor import utility.utility_mdf as ut_mdf import utility.utility_genData as ut_data import utility.utility as ut from numpy import histogram if __name__ == '__main__': windowSize = 40 upper_bound = 1.87519737 lower_bound = 0.53327720 listFile = ut.txt_to_list(csv_name="start_file.csv") # for windowSize in listWindow: for index, fileName1 in enumerate(listFile): print("File Name {}".format(fileName1)) pattern = fileName1.split("_")[4] lightData1 = ut_mdf.getDataFromFile(fileName=fileName1) dyResult1 = ut_data.genListDyBin(instances=lightData1["instances"], timestamp=lightData1["timestamp"], windowSize=windowSize) targetList = ut_mdf.getListMDF(pattern=pattern) for fileName2 in targetList: try: lightData2 = ut_mdf.getDataFromFile(fileName=fileName2) isOverlap = ut_mdf.isOverlapTimestamp(lightData1["timestamp"], lightData2["timestamp"]) if isOverlap: