def makeModelCounts( splits, modelLocation, dataLocation, neighborhoodLocation=None, minBehavior=0, compress=2, splitLength=8 ): """ Makes a set of counts for a given dataset and models. Neighborhood location specifies if the models and data need to be preclustered. Returns the datavector and the associated split times. """ files = os.listdir(modelLocation) neighborhood = False dVector = [] times = [] if neighborhoodLocation: neighborclusters = ncluster.parse(neighborhoodLocation) neighborhood = True # Iterate over splits. for s in splits: oldSplit = datetime.datetime.strptime(s[0], "%Y-%m-%d %H:%M:%S") newSplit = datetime.datetime.strptime(s[1], "%Y-%m-%d %H:%M:%S") tmpDoc = [] # Loop over all models for f in files: # It is a data file. if f.split(".")[-1] == "dat": # Open it and grab the models and sensor list fn = dataio.loadData(modelLocation + str(f)) fn.matrixToModel(fn.modelList) cd, td = bbdata.getdata(oldSplit, newSplit, comp=compress, sens=fn.sensors, readLocation=dataLocation) cd2 = cd if neighborhood: local = neighborclusters[str(fn.sensors)] cd2 = ncluster.convertNeighborhood(cd, local) cd2 = numpy.array(cd2, ndmin=2) cd2 = cd2.T sData = markov_anneal.splitLocalMax(cd2, td, splitLength) try: val, counts = analysis.ratio(sData.values(), fn.models) except: counts = [0] * len(fn.models) val = [0] * len(fn.models) tmpDoc += counts if len(tmpDoc) >= minBehavior: dVector.append(tmpDoc) times.append(oldSplit) oldSplit = newSplit return dVector, times
modelLocation = "../../runs/real/models_min_3/" lsaLocation = "../../runs/real/data_min_3.lsa" neighborhoodLocation = "../../data/generated/clean/neighborclusters.txt" compress = 2 dVector = [] times = [] lsaVector = [] splitLength = 8 skipLength = 1 i = 0 if __name__ == "__main__": files = os.listdir(modelLocation) neighborclusters = ncluster.parse(neighborhoodLocation) splits = bbdata.makeSplits(100, st, et, valid = [0, 2, 4], \ splitLen = datetime.timedelta(minutes = splitLength), \ sPeriod = "06:00:00", \ ePeriod = "07:00:00") splits += bbdata.makeSplits(100, st, et, valid = [0, 2, 4], \ splitLen = datetime.timedelta(minutes = splitLength), \ sPeriod = "18:00:00", \ ePeriod = "19:00:00") #Iterate over splits. for s in splits: print i i += 1
def makeModelCounts(splits, modelLocation, dataLocation, \ neighborhoodLocation = None, minBehavior = 0, \ compress = 2, splitLength = 8): """ Makes a set of counts for a given dataset and models. Neighborhood location specifies if the models and data need to be preclustered. Returns the datavector and the associated split times. """ files = os.listdir(modelLocation) neighborhood = False dVector = [] times = [] if neighborhoodLocation: neighborclusters = ncluster.parse(neighborhoodLocation) neighborhood = True #Iterate over splits. for s in splits: oldSplit = datetime.datetime.strptime(s[0], "%Y-%m-%d %H:%M:%S") newSplit = datetime.datetime.strptime(s[1], "%Y-%m-%d %H:%M:%S") tmpDoc = [] #Loop over all models for f in files: #It is a data file. if f.split('.')[-1] == 'dat': #Open it and grab the models and sensor list fn = dataio.loadData(modelLocation + str(f)) fn.matrixToModel(fn.modelList) cd, td = bbdata.getdata(oldSplit, newSplit, \ comp = compress, \ sens = fn.sensors, readLocation = dataLocation) cd2 = cd if neighborhood: local = neighborclusters[str(fn.sensors)] cd2 = ncluster.convertNeighborhood(cd, local) cd2 = numpy.array(cd2, ndmin = 2) cd2 = cd2.T sData = markov_anneal.splitLocalMax(cd2, td, splitLength) try: val, counts = analysis.ratio(sData.values(), fn.models) except: counts = [0] * len(fn.models) val = [0] * len(fn.models) tmpDoc += counts if len(tmpDoc) >= minBehavior: dVector.append(tmpDoc) times.append(oldSplit) oldSplit = newSplit return dVector, times
st = datetime.datetime.strptime(st, "%Y-%m-%d %H:%M:%S") et = datetime.datetime.strptime(et, "%Y-%m-%d %H:%M:%S") #Get the sensor blocks for i in range(len(sensors)): print "Sensors:" + str(sensors[i]) cd, td = bbdata.getdata(st, et, \ pStart = periodStart, \ pEnd = periodEnd, \ vDays = validDays, \ comp = compress, \ sens = sensors[i], readLocation = dataDirectory) neighborclusters = ncluster.parse(neighborhoodLocation) local = neighborclusters[str(sensors[i])] cd2 = ncluster.convertNeighborhood(cd, local) cd2 = numpy.array(cd2, ndmin = 2) cd2 = cd2.T #obs = 2**len(sensors[i]) #sData = markov_anneal.splitLocalMax(cd, td, splitLen) obs = 9 #Use only for sensor block 90-102 if i == len(sensors) - 1: obs = 16 sData = markov_anneal.splitLocalMax(cd2, td, splitLen)
files = os.listdir(readLocation) for f in files: print f #It is a data file. if f.split('.')[-1] == 'dat': #Open files fn = dataio.loadData(readLocation + str(f)) fn.matrixToModel(fn.modelList) #visualizer.drawHMM(len(fn.models), fn.obs, \ # fn.assignedData, \ # writeLocation = "../../output/" + str(f.split('.')[0]) + ".png") #Grab clusters cc = ncluster.parse("../../data/generated/clean/neighborclusters.txt") """ m = None for temp in range(len(fn.assignedData)): visualizer.drawHMMCluster(fn.assignedData[temp], fn.models, \ len(fn.sensors), \ writeLocation = "../../output/cluster" \ + str(f.split('.')[0]) + "_" + \ str(temp) + ".png", \ spacing = 20, scaling = 5) visualizer.drawHMMCluster(fn.out, fn.models, \ len(fn.sensors), \ writeLocation = "../../output/outliers" \