Python DataProcess 예제들, mypackages.processing.DataProcess Python 예제들

예제 #1

0

파일 보기

    def __init__(
            self,
            isExtract=True,
            root_dir="C:\\Users\\DELL\\Projects\\VHR_CD\\image-v2-timeseries\\newest",
            filename="4Band_Subtracted_20040514_20050427"):
        """
        Args:
            root_dir (string): the path of the file
            file_name (string): the name of the picture
            isExtract (boolean): use the mask to extract changed area
        """
        self.isExtract = isExtract
        self.root_dir = root_dir
        self.file_name = filename
        self.dataset = oi.open_tiff(root_dir, filename)
        self.H = self.dataset[1]
        self.W = self.dataset[2]
        self.n_bands = self.dataset[3]
        self.npdataset = art.tif2vec(
            self.dataset[0])  #flatten and transform the array

        if self.isExtract:
            # extract out the changed area
            self.select_path = "C:\\Users\\DELL\\Projects\\VHR_CD\\image-v2-timeseries\\EXTRACT"
            self.select_img = "SOMOCLU_20_20_HDBSCAN_cl_2_2004_2005_min_cluster_size_4_alg_best_"
            self.simg = oi.open_tiff(self.select_path, self.select_img)
            self.select = self.simg[0]  #(2720000)
            self.changePos = DataProcess.selectArea(self.select,
                                                    self.n_bands,
                                                    -1,
                                                    isStack=True)
            self.ns_changePos = DataProcess.selectArea(self.select,
                                                       self.n_bands,
                                                       -1,
                                                       isStack=False)
            self.ns_nonChangePos = DataProcess.selectArea(self.select,
                                                          self.n_bands,
                                                          0,
                                                          isStack=False)

            self.npdataset = self.npdataset[self.changePos].reshape(
                -1, self.n_bands)

        # normalization
        self.nmax = self.npdataset.max(axis=0)
        self.nmin = self.npdataset.min(axis=0)
        self.norm_data = (self.npdataset - self.nmin) / (self.nmax - self.nmin)

        # TODO:don't know what's for yet, add only to be compatible to TensorDataset
        self.target_data = np.zeros_like(self.norm_data)

        #clear the memory
        # self.simg=None
        self.dataset = None

예제 #2

0

파일 보기

def extract_compareClustering(clusterClass):
    # Get data, n_bands=4
    norm_img_path = "C:\\Users\\DELL\\Projects\\MLS_cluster\\image-v2-timeseries\\newest"
    img = "4Band_Subtracted_20040514_20050427"

    dataset = oi.open_tiff(norm_img_path, img)
    H = dataset[1]
    W = dataset[2]
    n_bands = dataset[3]
    org_data = art.tif2vec(dataset[0])  #NOTE: this step is really important

    select_path = "C:\\Users\\DELL\\Projects\\MLS_cluster\\image-v2-timeseries\\EXTRACT"
    select_img = "SOMOCLU_20_20_HDBSCAN_cl_2_2004_2005_min_cluster_size_4_alg_best_"
    simg = oi.open_tiff(select_path, select_img)
    select = simg[0]  #(2720000)

    changePos = DataProcess.selectArea(select, n_bands, -1, isStack=True)
    ns_changePos = DataProcess.selectArea(select, n_bands, -1, isStack=False)
    ns_nonChangePos = DataProcess.selectArea(select, n_bands, 0, isStack=False)

    X_train = org_data[changePos].reshape(-1, n_bands)

    result = np.zeros_like(select.reshape(-1, 1))

    for cls_name, cls_class in clusterClass.items():
        print("running", cls_name, "...")
        t0 = time.clock()
        cls_class.fit(X_train)
        usingTime = time.clock() - t0

        # combine the result
        result[ns_changePos] = cls_class.labels_
        result[ns_nonChangePos] = np.max(cls_class.labels_) + 1

        evaluation = silhouette_score(X=org_data,
                                      labels=result,
                                      metric='euclidean',
                                      sample_size=10000)

        save_path = "C:\\Users\\DELL\\Projects\\MLS_cluster\\image-v2-timeseries\\sklearn_clustering\\compare"
        DataProcess.visualize_class(
            result.reshape(H, W),
            save_path + '\\' + cls_name + "_change_area_class")

        # save using time
        print("save the information to txt file...")
        with open(
                save_path + '/' +
                "Outlier Detection Algorithms Running Time.txt", 'a') as f:
            f.write("detetion algorithm: " + cls_name + "\nsilhouette_score:" +
                    str(evaluation) + "\ndetection using time: " +
                    str(usingTime))
            f.write("\n----------------------------------------------\n")

예제 #3

0

파일 보기

 def anomaly(self,scores):
     if isExtract:
         self.score_result=np.empty_like(self.select.reshape(-1,1))
         # scale the scores
         self.score_result[self.ns_changePos]=DataProcess.scaleNormalize(scores,(0,500)).reshape(-1,)
         self.score_result[self.ns_nonChangePos]=0
     else:
         self.score_result=DataProcess.scaleNormalize(scores,(0,500)).reshape(-1,)
     # give labels
     self.outlier_result=highRank.getOutliers(self.score_result,99)
     # generate picture
     GeoProcess.getSHP(img_path=self.root_dir,img_name=self.file_name,
         save_path="C:\\Users\\DELL\\Projects\\VHR_CD\\repository\\code-v2",extend_name="VAE_noEXT_",result_array=self.outlier_result)

예제 #4

0

파일 보기

def RunPyodOutlier(classifiers, outlier_save_path, isExtract=True):
    # Get data, n_bands=4
    norm_img_path = "C:\\Users\\DELL\\Projects\\MLS_cluster\\image-v2-timeseries\\newest"
    img = "4Band_Subtracted_20040514_20050427"

    dataset = oi.open_tiff(norm_img_path, img)
    H = dataset[1]
    W = dataset[2]
    n_bands = dataset[3]
    org_data = art.tif2vec(dataset[0])  #NOTE: this step is really important

    #NOTE: Normalize the scale of the orignialdata
    org_data = org_data / org_data.max(axis=0)

    #TODO: normalize the data?

    if isExtract:
        # extract out the changed area
        select_path = "C:\\Users\\DELL\\Projects\\MLS_cluster\\image-v2-timeseries\\EXTRACT"
        select_img = "SOMOCLU_20_20_HDBSCAN_cl_2_2004_2005_min_cluster_size_4_alg_best_"
        simg = oi.open_tiff(select_path, select_img)
        select = simg[0]  #(2720000)

        changePos = DataProcess.selectArea(select, n_bands, -1, isStack=True)
        ns_changePos = DataProcess.selectArea(select,
                                              n_bands,
                                              -1,
                                              isStack=False)
        ns_nonChangePos = DataProcess.selectArea(select,
                                                 n_bands,
                                                 0,
                                                 isStack=False)

        X_train = org_data[changePos].reshape(-1, n_bands)
        print("shape of original data: ", org_data.shape)
        print("shape of extracted data: ", X_train.shape)
        # to save the final result
        outlier_result = np.zeros_like(select.reshape(-1, 1))
        score_result = np.empty_like(select.reshape(-1, 1))
    else:
        X_train = org_data.reshape(-1, n_bands)
        print("shape of training data: ", X_train.shape)

    for clf_name, clf in classifiers.items():
        if not isExtract:
            clf_name = "no_extract_" + clf_name

        print("running " + clf_name + "...")
        t0 = time.clock()
        clf.fit(X_train)
        usingTime = time.clock() - t0
        # get the prediction labels and outlier scores of the training data
        y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
        y_train_scores = clf.decision_scores_  # raw outlier scores

        if isExtract:
            # combine the extraction non-changed label&&scores and the algorithm result
            outlier_result[ns_changePos] = y_train_pred
            outlier_result[ns_nonChangePos] = 0
            score_result[ns_changePos] = DataProcess.scaleNormalize(
                y_train_scores, (0, 500)).reshape(-1, )
            score_result[ns_nonChangePos] = 0
            #save the outlier detection result as .tif and .shp file
        else:
            # combine the extraction non-changed label and the algorithm result
            outlier_result = y_train_pred
            score_result = DataProcess.scaleNormalize(y_train_scores,
                                                      (0, 500)).reshape(-1, )

        print("the scale of the y_train_score is:", y_train_scores.min(),
              y_train_scores.max())
        print("the scale of the score_result is:", score_result.min(),
              score_result.max())

        DataProcess.int_to_csv(outlier_save_path, img, outlier_result,
                               clf_name + "_outliers")
        GeoProcess.getSHP(norm_img_path, img, outlier_save_path,
                          clf_name + "_outliers", outlier_result)

        #save the outlier scores as heatmap
        DataProcess.saveHeatMap(score_result.reshape(H, W),
                                outlier_save_path + "\\" + clf_name)

        print("save the information to txt file...")
        with open(
                outlier_save_path + '/' +
                "Outlier Detection Algorithms Running Time.txt", 'a') as f:
            f.write("detetion algorithm: " + clf_name +
                    "\ndetection using time: " + str(usingTime))
            f.write("\n----------------------------------------------\n")

예제 #5

0

파일 보기

def runClusteringBased(img_path,img_name,data_path,data_name,outlier_save_path,\
    clusteringPara,outlierPara,o_filter="highRank"):
    #clusteringPara[0] is the name, the rest are parameters
    #TODO:change score and filter para
    org_data = DataProcess.csv_to_array(data_path, data_name)
    AlgorithmName = clusteringPara[0]
    print("running " + AlgorithmName + " for clustering...")
    t0 = time.time()

    if AlgorithmName == "kMeans":
        d_label = cl.kMeans.getCluster(org_data, *(clusteringPara[1]))
    elif AlgorithmName == "Affinity":
        d_label = cl.Affinity.getCluster(org_data, *(clusteringPara[1]))
    elif AlgorithmName == "MeanShift":
        d_label = cl.MeanShift.getCluster(org_data, *(clusteringPara[1]))
    elif AlgorithmName == "Spectral":
        d_label = cl.Spectral.getCluster(org_data, *(clusteringPara[1]))
    elif AlgorithmName == "Agglomerative":
        d_label = cl.Agglomerative.getCluster(org_data, *(clusteringPara[1]))
        AlgorithmName = AlgorithmName + '_' + clusteringPara[1][6]
    elif AlgorithmName == "DBSCAN":
        d_label = cl.DBSCAN.getCluster(org_data, *(clusteringPara[1]))
    elif AlgorithmName == "BIRCH":
        d_label = cl.BIRCH.getCluster(org_data, *(clusteringPara[1]))
    else:
        print("algorithm name ilegal")
        exit()
    AlgorithmName += '_'

    #save the cluster information
    saveclass_extend_name = '_' + AlgorithmName + "cluster_label"
    DataProcess.int_to_csv(outlier_save_path, img_name, d_label,
                           saveclass_extend_name)
    DataProcess.visualize_class(img_path, img_name, outlier_save_path,
                                img_name + saveclass_extend_name)
    t1 = time.time()

    print("running " + outlierPara[0] +
          " for calculating the outlier scores...")
    if outlierPara[0] == "LDCOF":
        d_score = cb.calLDCOF.findLDCOF(org_data, d_label, outlierPara[1],
                                        outlierPara[2], outlierPara[3])

    if o_filter == "highRank":
        outlier_label = sc2r.highRank.getOutliers(d_score, 98)

    #save the label information for further usage
    savelabel_extend_name = '_' + AlgorithmName + "outlier_label"
    DataProcess.int_to_csv(outlier_save_path, img_name, outlier_label,
                           savelabel_extend_name)
    GeoProcess.getSHP(
        img_path, img_name, outlier_save_path, AlgorithmName,
        outlier_label)  #FIXME: the .tif file could not be specified the path
    # DataProcess.visualize_class(img_path,img_name,outlier_save_path,img_name+savelabel_extend_name)

    #calculate the Silhouette Coefficient as a reference of the performance of the outcome
    #NOTE:due to the limited memory, I adjust the sample_size to 10000,which may cause the score less reliable
    print("calculating Silhouette Coefficients...")
    clusteringScore = cl.Silhouette.getSilhouette(org_data,
                                                  d_label,
                                                  sample_size=10000)
    usingTime = t1 - t0
    print("save the information to txt file...")
    with open(data_path + '/' + "runningstatus.txt", 'a') as f:
        f.write("clustering algorithm: " + AlgorithmName +
                "\nsilhouette coefficient: " + str(clusteringScore) +
                "\nclstering using time: " + str(usingTime))
        f.write("\n----------------------------------------------\n")
    org_data = None
    return clusteringScore, usingTime

예제 #6

0

파일 보기

from mypackages.processing import DataProcess
from mypackages.processing import GeoProcess
from mypackages import clustering as cl
from mypackages import clusteringBased as cb
from mypackages import scoresToResults as sc2r
from mypackages.processing import open_image as oi

import numpy as np

norm_img_path = "C:\\Users\\DELL\\Projects\\MLS_cluster\\image-v2-timeseries\\Encoded_dataset\\Encoded_models_2018-10-03_1337\\subtracted_norm_from_norm"
norm_data_path = "C:\\Users\\DELL\\Projects\\MLS_cluster\\image-v2-timeseries\\raw_data\\1337_sub"
img = "Subtracted_20040514_20050427"
raw = "Subtracted_20040514_20050427_raw_data"

#get the paths and names in the dir
# img_path_name,img_f_names = DataProcess.file_name(norm_subtracted_path,".TIF")
# data_path_name,data_f_names = DataProcess.file_name(norm_subtracted_save,".csv")

DataProcess.img_to_csv(norm_img_path, norm_data_path, img)

#transform all the images in the path into csv (done)
# for name in img_f_names:
# DataProcess.img_to_csv(norm_subtracted_path,norm_subtracted_save,name)