def makeModelCounts( splits, modelLocation, dataLocation, neighborhoodLocation=None, minBehavior=0, compress=2, splitLength=8 ): """ Makes a set of counts for a given dataset and models. Neighborhood location specifies if the models and data need to be preclustered. Returns the datavector and the associated split times. """ files = os.listdir(modelLocation) neighborhood = False dVector = [] times = [] if neighborhoodLocation: neighborclusters = ncluster.parse(neighborhoodLocation) neighborhood = True # Iterate over splits. for s in splits: oldSplit = datetime.datetime.strptime(s[0], "%Y-%m-%d %H:%M:%S") newSplit = datetime.datetime.strptime(s[1], "%Y-%m-%d %H:%M:%S") tmpDoc = [] # Loop over all models for f in files: # It is a data file. if f.split(".")[-1] == "dat": # Open it and grab the models and sensor list fn = dataio.loadData(modelLocation + str(f)) fn.matrixToModel(fn.modelList) cd, td = bbdata.getdata(oldSplit, newSplit, comp=compress, sens=fn.sensors, readLocation=dataLocation) cd2 = cd if neighborhood: local = neighborclusters[str(fn.sensors)] cd2 = ncluster.convertNeighborhood(cd, local) cd2 = numpy.array(cd2, ndmin=2) cd2 = cd2.T sData = markov_anneal.splitLocalMax(cd2, td, splitLength) try: val, counts = analysis.ratio(sData.values(), fn.models) except: counts = [0] * len(fn.models) val = [0] * len(fn.models) tmpDoc += counts if len(tmpDoc) >= minBehavior: dVector.append(tmpDoc) times.append(oldSplit) oldSplit = newSplit return dVector, times
tmpDoc = [] #Loop over all models for f in files: #It is a data file. if f.split('.')[-1] == 'dat': #Open it and grab the models and sensor list fn = dataio.loadData(modelLocation + str(f)) fn.matrixToModel(fn.modelList) cd, td = bbdata.getdata(oldSplit, newSplit, \ comp = compress, \ sens = fn.sensors, readLocation = dataLocation) local = neighborclusters[str(fn.sensors)] cd2 = ncluster.convertNeighborhood(cd, local) cd2 = numpy.array(cd2, ndmin=2) cd2 = cd2.T sData = markov_anneal.splitLocalMax(cd2, td, splitLength) #for each split, make a document matrix and append it to the #ongoing tdmatrix try: val, counts = analysis.ratio(sData.values(), fn.models) except: counts = [0] * len(fn.models) val = [0] * len(fn.models) tmpDoc += counts
def makeModelCounts(splits, modelLocation, dataLocation, \ neighborhoodLocation = None, minBehavior = 0, \ compress = 2, splitLength = 8): """ Makes a set of counts for a given dataset and models. Neighborhood location specifies if the models and data need to be preclustered. Returns the datavector and the associated split times. """ files = os.listdir(modelLocation) neighborhood = False dVector = [] times = [] if neighborhoodLocation: neighborclusters = ncluster.parse(neighborhoodLocation) neighborhood = True #Iterate over splits. for s in splits: oldSplit = datetime.datetime.strptime(s[0], "%Y-%m-%d %H:%M:%S") newSplit = datetime.datetime.strptime(s[1], "%Y-%m-%d %H:%M:%S") tmpDoc = [] #Loop over all models for f in files: #It is a data file. if f.split('.')[-1] == 'dat': #Open it and grab the models and sensor list fn = dataio.loadData(modelLocation + str(f)) fn.matrixToModel(fn.modelList) cd, td = bbdata.getdata(oldSplit, newSplit, \ comp = compress, \ sens = fn.sensors, readLocation = dataLocation) cd2 = cd if neighborhood: local = neighborclusters[str(fn.sensors)] cd2 = ncluster.convertNeighborhood(cd, local) cd2 = numpy.array(cd2, ndmin = 2) cd2 = cd2.T sData = markov_anneal.splitLocalMax(cd2, td, splitLength) try: val, counts = analysis.ratio(sData.values(), fn.models) except: counts = [0] * len(fn.models) val = [0] * len(fn.models) tmpDoc += counts if len(tmpDoc) >= minBehavior: dVector.append(tmpDoc) times.append(oldSplit) oldSplit = newSplit return dVector, times
#Get the sensor blocks for i in range(len(sensors)): print "Sensors:" + str(sensors[i]) cd, td = bbdata.getdata(st, et, \ pStart = periodStart, \ pEnd = periodEnd, \ vDays = validDays, \ comp = compress, \ sens = sensors[i], readLocation = dataDirectory) neighborclusters = ncluster.parse(neighborhoodLocation) local = neighborclusters[str(sensors[i])] cd2 = ncluster.convertNeighborhood(cd, local) cd2 = numpy.array(cd2, ndmin = 2) cd2 = cd2.T #obs = 2**len(sensors[i]) #sData = markov_anneal.splitLocalMax(cd, td, splitLen) obs = 9 #Use only for sensor block 90-102 if i == len(sensors) - 1: obs = 16 sData = markov_anneal.splitLocalMax(cd2, td, splitLen) print len(sData)