Esempio n. 1
0
 def __init__(self, dataContext, name):
     clusteringResultsPath = getClusteringResultsPath(
     ) + getTimeSeriesToyDatasetName() + '/'
     self.name = name
     super().__init__(dataContext, clusteringResultsPath)
     self.ownResourcesFolder = self.clusteringResultsPath + self.name + '/'
     self.microClustersFolder = self.ownResourcesFolder
# obtain the data sets from the csv files
non_time_series_datasets = getDatasetsFromFolder(
    getNonTimeSeriesDatasetsPath())

# clustering algorithms
kmeans = KMeans()
dbscan = DBSCAN(eps=0.2, min_samples=10)
algorithms = [(getmKMeansName(), kmeans), (getDbscanName(), dbscan)]

# iterate over the data sets
for datIndx in range(len(non_time_series_datasets)):  # row index
    X = non_time_series_datasets[datIndx]['dataset']
    dName = non_time_series_datasets[datIndx]['name']
    k = non_time_series_datasets[datIndx]['k']
    baseFolder = getClusteringResultsPath() + dName + '/'
    # normalize dataset for easier parameter selection
    X = StandardScaler().fit_transform(X)
    # iterate over the algorithms
    for algIndx in range(len(algorithms)):  # column index
        algo = algorithms[algIndx][1]
        algoName = algorithms[algIndx][0]
        # if the algorithm is 'kmeans', k must be set specifically for the data set
        if algoName.lower() == 'kmeans':
            algo.set_params(n_clusters=k)
        # assign labels
        labels = algo.fit_predict(X)
        print("  ", len(set(labels)), " clusters")
        clusteringInfo(labels)
        # result contains every scaled element with the corresponding label
        result = np.c_[X, labels]
Esempio n. 3
0
dataset = dataset[dataset['to_timestamp'].apply(validate_timestamp, date = datetime.datetime(2018, 10, 2, 10, 30))]

# data context
dataContext = [ # hardcodeamos los límites de sweden
                BoundingBox(minimun=11.0273686052 , maximun=23.9033785336),
                BoundingBox(minimun=55.3617373725, maximun=69.1062472602),
            ]
# dyclee
dyclee = Dyclee(dataContext = dataContext, relativeSize = params["relativeSize"], speed = params["speed"], lambd = params["lambd"],
                periodicRemovalAt = params["periodicRemovalAt"], periodicUpdateAt = params["periodicUpdateAt"],
                timeWindow = params["timeWindow"], closenessThreshold = params["closenessThreshold"])

tGlobal = 6000 # siguiendo lo del notebook, estoy tomando casi cada media hs
ac = 0 # represents amount of processed elements

folder = getClusteringResultsPath() + getRealDatasetName() + '/' + getDycleeName() + '/' # for saving the prepared resuls
if will_calculate_DBCV:
    resetStorage(folder) 

first = True # first clustering; 'end' picture will be added at the beggining (to understand the loop)

for ind, p in dataset.iterrows():
    if end(p['to_timestamp']): # break; para no seguir, solo quiero ver los insights de las 11 y 16 del dia 2
        print(p, ' ... (the end)')
        break

    if not validate_p(p): # me saco de encima los que estan por fuera de sweden, y no corresponden al dia que quiero analizar
        continue

    ac += 1
    point = [p.latitude, p.longitude] # ahora esta completo el dataset, hay que subsetear
Esempio n. 4
0
import json

import numpy as np
import os
from config import getClusteringResultsPath, getMbkName, getTimeSeriesDatasetsPath, getTimeSeriesToyDatasetName
import shutil

folder = getClusteringResultsPath() + getTimeSeriesToyDatasetName() + '/' + getMbkName() + '/'


def resetStorage():
    if os.path.exists(folder):
        shutil.rmtree(folder)


def createDirectoryIfNotExists(folder):
    # check if resourcesFolder needs to be created
    if not os.path.exists(folder):
        os.makedirs(folder)


def storeResult(snapshot):
    createDirectoryIfNotExists(folder)
    time = snapshot.get("time")
    result = snapshot.get("result")
    targetFile = folder + str(time) + '.csv'
    np.savetxt(targetFile, result, delimiter=',',)


def storeAlgoConfig(dict):
    createDirectoryIfNotExists(folder)
Esempio n. 5
0
 def __init__(self, dataContext):
     clusteringResultsPath = getClusteringResultsPath()
     print('clusteringResultsPath', clusteringResultsPath)
     super().__init__(dataContext, clusteringResultsPath)