def clusterYogaTest(): print('YOGA') traionSize = 300 valSize = 1000 testSize = 2000 seed = 0 distParams={'window': 3, 'psi': 0} quantizationDistParams={'window': 2, 'psi': 0} kmeansWindowSize=2 #generalClusterParams = {'dists_merger':clustering.singleLinkageUpdater, 'min_clusters':10} generalClusterParams = {'dists_merger':clustering.singleLinkageUpdater, 'min_clusters':2} pqClusterParams = {'k':199800,'quantizer_usage':clustering.QuantizerUsage.TOP_K_ONLY_AT_INITIALISATION} quantizationDistParams={'window': 2, 'psi': 0} qParams=[] qParams.append(q.ProductQuantiserParameters(107,100,distParams=distParams, subsetType=q.SubsetSelectionType.NO_OVERLAP, kmeansWindowSize=kmeansWindowSize, distanceCalculation=q.DISTANCECALCULATION.SYMMETRIC, computeDistanceCorrection = False, quantizationDistParams=quantizationDistParams, #km_init="random", max_iters=2)) XTrain,YTrain,XTest,YTest=loadYogaDataSetForClustering(traionSize, testSize+valSize) XVal, YVal, XTest, YTest = take2RandDataParts(XTest,YTest,valSize, testSize, seed) #print(YVal[1500:1700], np.unique(YVal)) distanceAndClusterTests(XTrain,XTest, qParams, distParams, generalClusterParams, pqClusterParams)
def clusterECG5000Test(): print('ECG') traionSize = 500 valSize = 1500 testSize = 3000 seed = 0 distParams={'window': 2, 'psi': 0} quantizationDistParams={'window': 1, 'psi': 0} nwDistParams={'window': 2, 'psi': 0} kmeansWindowSize=2 #generalClusterParams = {'dists_merger':clustering.singleLinkageUpdater, 'min_clusters':10} generalClusterParams = {'dists_merger':clustering.singleLinkageUpdater, 'min_clusters':20} pqClusterParams = {'k':120000,'quantizer_usage':clustering.QuantizerUsage.TOP_K_ONLY_AT_INITIALISATION} qParams=[] qParams.append(q.ProductQuantiserParameters(70,200,distParams=distParams, subsetType=q.SubsetSelectionType.DOUBLE_OVERLAP, kmeansWindowSize=kmeansWindowSize, distanceCalculation=q.DISTANCECALCULATION.SYMMETRIC, computeDistanceCorrection = True, quantizationDistParams=quantizationDistParams, #km_init="random", max_iters=2)) XTrain,YTrain,XTest,YTest=loadEcg500DataSetForClustering(traionSize, testSize+valSize) XVal, YVal, XTest, YTest = take2RandDataParts(XTest,YTest,valSize, testSize, seed) #print(YVal[1500:1700], np.unique(YVal)) rclust = createRealClustering(YVal,5) rclust = None #print(YVal) distanceAndClusterTests(XTrain,XTest, qParams, distParams, generalClusterParams, pqClusterParams,groundTruth=rclust)
def clusterElectricalDevicesTest(): print('ELD') traionSize = 1000 valSize = 2000 testSize = 5000 seed = 0 distParams={'window': 15, 'psi': 0} quantizationDistParams={'window':4, 'psi': 0} nwDistParams={'window': 2, 'psi': 0} kmeansWindowSize=2 #generalClusterParams = {'dists_merger':clustering.singleLinkageUpdater, 'min_clusters':10} generalClusterParams = {'dists_merger':None, 'min_clusters':7} pqClusterParams = {'k':199800,'quantizer_usage':clustering.QuantizerUsage.ONLY_APPROXIMATES} qParams=[] qParams.append(q.ProductQuantiserParameters(48,200,distParams=distParams, subsetType=q.SubsetSelectionType.NO_OVERLAP, kmeansWindowSize=kmeansWindowSize, distanceCalculation=q.DISTANCECALCULATION.ASYMMETRIC, computeDistanceCorrection = True, quantizationDistParams=quantizationDistParams, #km_init="random", max_iters=2)) #qParams.append(q.ProductQuantiserParameters(12,2,computeDistanceCorrection=False)) qNWParams=[] qNWParams.append(q.ProductQuantiserParameters(20,40, quantizerType=q.QuantizerType.PQNeedlemanWunsch,nwDistParams=nwDistParams, distParams=distParams, subsetType=q.SubsetSelectionType.DOUBLE_OVERLAP)) XTrain,YTrain,XTest,YTest=loadElectricDevicesDataSetForClustering(traionSize, testSize+valSize) XVal, YVal, XTest, YTest = take2RandDataParts(XTest,YTest,valSize, testSize, seed) #import matplotlib as pl #pl.pyplot.plot(XVal[3]) #pl.pyplot.plot(XVal[4]) #pl.pyplot.plot(XVal[5]) #pl.pyplot.show() #print(YVal[1500:1700], np.unique(YVal)) rclust =createRealClustering(YVal,7) rclust =None distanceAndClusterTests(XTrain,XTest, qParams, distParams, generalClusterParams, pqClusterParams, groundTruth=rclust)
def clusterStarLight30AsymTest(): print('STARLIGHT30ASYMMETRIC') print(50) traionSize = 500 valSize = 2000 testSize = 5000 seed = 0 distParams={'window': 30, 'psi': 0} quantizationDistParams={'window': 4, 'psi': 0} nwDistParams={'window': 2, 'psi': 0} kmeansWindowSize=2 #generalClusterParams = {'dists_merger':clustering.singleLinkageUpdater, 'min_clusters':10} generalClusterParams = {'dists_merger':clustering.singleLinkageUpdater, 'min_clusters':10} pqClusterParams = {'k':199800,'quantizer_usage':clustering.QuantizerUsage.TOP_K_ONLY_AT_INITIALISATION} qParams=[] qParams.append(q.ProductQuantiserParameters(128,100,distParams=distParams, subsetType=q.SubsetSelectionType.NO_OVERLAP, kmeansWindowSize=kmeansWindowSize, distanceCalculation=q.DISTANCECALCULATION.ASYMMETRIC, computeDistanceCorrection = True, quantizationDistParams=quantizationDistParams, km_init="random", max_iters=0)) #qParams.append(q.ProductQuantiserParameters(32,2,computeDistanceCorrection=False)) qNWParams=[] qNWParams.append(q.ProductQuantiserParameters(20,40, quantizerType=q.QuantizerType.PQNeedlemanWunsch,nwDistParams=nwDistParams, distParams=distParams, subsetType=q.SubsetSelectionType.DOUBLE_OVERLAP)) qNWPileParams=[] qNWPileParams.append(q.ProductQuantiserParameters(20,20, quantizerType=q.QuantizerType.VQNeedlemanWunsch,nwDistParams=nwDistParams, distParams=distParams, subsetType=q.SubsetSelectionType.DOUBLE_OVERLAP)) XTrain,YTrain,XTest,YTest=loadStarLightDataSetForClustering(traionSize, testSize+valSize) XVal, YVal, XTest, YTest = take2RandDataParts(XTest,YTest,valSize, testSize, seed) #print(YVal[1500:1700], np.unique(YVal)) distanceAndClusterTests(XTrain,XTest, qParams, distParams, generalClusterParams, pqClusterParams)
def clusterStarLight17Test(): print('STARLIGHT17') traionSize = 500 valSize = 2000 testSize = 5000 seed = 0 distParams={'window': 17, 'psi': 0} quantizationDistParams={'window': 4, 'psi': 0} nwDistParams={'window': 2, 'psi': 0} kmeansWindowSize=2 #generalClusterParams = {'dists_merger':clustering.singleLinkageUpdater, 'min_clusters':10} generalClusterParams = {'dists_merger':clustering.singleLinkageUpdater, 'min_clusters':3} pqClusterParams = {'k':199800,'quantizer_usage':clustering.QuantizerUsage.TOP_K_ONLY_AT_INITIALISATION} print() qParams=[] qParams.append(q.ProductQuantiserParameters(128,100,distParams=distParams, subsetType=q.SubsetSelectionType.NO_OVERLAP, kmeansWindowSize=kmeansWindowSize, distanceCalculation=q.DISTANCECALCULATION.SYMMETRIC, computeDistanceCorrection = True, #km_init="random", quantizationDistParams=quantizationDistParams, max_iters=2)) #qParams.append(q.ProductQuantiserParameters(9,4,computeDistanceCorrection=False)) XTrain,YTrain,XTest,YTest=loadStarLightDataSetForClustering(traionSize, testSize+valSize) XVal, YVal, XTest, YTest = take2RandDataParts(XTest,YTest,valSize, testSize, seed) #print(YVal[1500:1700], np.unique(YVal)) #rclust = createRealClustering(YVal,3) rclust = None distanceAndClusterTests(XTrain,XTest, qParams, distParams, generalClusterParams, pqClusterParams, groundTruth=rclust)