Example #1
0
def makeModelCounts(
    splits, modelLocation, dataLocation, neighborhoodLocation=None, minBehavior=0, compress=2, splitLength=8
):
    """
    Makes a set of counts for a given dataset and models.  
    
    Neighborhood location specifies if the models and data need to be preclustered.
    
    Returns the datavector and the associated split times.
    """
    files = os.listdir(modelLocation)

    neighborhood = False
    dVector = []
    times = []

    if neighborhoodLocation:
        neighborclusters = ncluster.parse(neighborhoodLocation)
        neighborhood = True

    # Iterate over splits.
    for s in splits:
        oldSplit = datetime.datetime.strptime(s[0], "%Y-%m-%d %H:%M:%S")
        newSplit = datetime.datetime.strptime(s[1], "%Y-%m-%d %H:%M:%S")

        tmpDoc = []
        # Loop over all models
        for f in files:
            # It is a data file.
            if f.split(".")[-1] == "dat":
                # Open it and grab the models and sensor list
                fn = dataio.loadData(modelLocation + str(f))
                fn.matrixToModel(fn.modelList)

                cd, td = bbdata.getdata(oldSplit, newSplit, comp=compress, sens=fn.sensors, readLocation=dataLocation)

                cd2 = cd
                if neighborhood:
                    local = neighborclusters[str(fn.sensors)]
                    cd2 = ncluster.convertNeighborhood(cd, local)

                cd2 = numpy.array(cd2, ndmin=2)
                cd2 = cd2.T

                sData = markov_anneal.splitLocalMax(cd2, td, splitLength)

                try:
                    val, counts = analysis.ratio(sData.values(), fn.models)
                except:
                    counts = [0] * len(fn.models)
                    val = [0] * len(fn.models)
                tmpDoc += counts

        if len(tmpDoc) >= minBehavior:
            dVector.append(tmpDoc)
            times.append(oldSplit)

        oldSplit = newSplit

    return dVector, times
Example #2
0
modelLocation = "../../runs/real/models_min_3/"
lsaLocation = "../../runs/real/data_min_3.lsa"
neighborhoodLocation = "../../data/generated/clean/neighborclusters.txt"

compress = 2
dVector = []
times = []
lsaVector = []
splitLength = 8
skipLength = 1
i = 0

if __name__ == "__main__":

    files = os.listdir(modelLocation)
    neighborclusters = ncluster.parse(neighborhoodLocation)

    splits = bbdata.makeSplits(100, st, et, valid = [0, 2, 4], \
                    splitLen = datetime.timedelta(minutes = splitLength), \
                    sPeriod = "06:00:00", \
                    ePeriod = "07:00:00")

    splits += bbdata.makeSplits(100, st, et, valid = [0, 2, 4], \
                    splitLen = datetime.timedelta(minutes = splitLength), \
                    sPeriod = "18:00:00", \
                    ePeriod = "19:00:00")

    #Iterate over splits.
    for s in splits:
        print i
        i += 1
Example #3
0
def makeModelCounts(splits, modelLocation, dataLocation, \
                    neighborhoodLocation = None, minBehavior = 0, \
                    compress = 2, splitLength = 8):
    """
    Makes a set of counts for a given dataset and models.  
    
    Neighborhood location specifies if the models and data need to be preclustered.
    
    Returns the datavector and the associated split times.
    """
    files = os.listdir(modelLocation)
    
    neighborhood = False
    dVector = []
    times = []
    
    if neighborhoodLocation:
        neighborclusters = ncluster.parse(neighborhoodLocation)
        neighborhood = True
        
    #Iterate over splits.
    for s in splits:
        oldSplit = datetime.datetime.strptime(s[0], "%Y-%m-%d %H:%M:%S")
        newSplit = datetime.datetime.strptime(s[1], "%Y-%m-%d %H:%M:%S")
        
        tmpDoc = []
        #Loop over all models
        for f in files:
            #It is a data file.
            if f.split('.')[-1] == 'dat':
                #Open it and grab the models and sensor list
                fn = dataio.loadData(modelLocation + str(f))
                fn.matrixToModel(fn.modelList)
            
                cd, td = bbdata.getdata(oldSplit, newSplit, \
                                    comp = compress, \
                                    sens = fn.sensors,
                                    readLocation = dataLocation)
                
                
                cd2 = cd
                if neighborhood:
                    local = neighborclusters[str(fn.sensors)]
                    cd2 = ncluster.convertNeighborhood(cd, local)
                
                cd2 = numpy.array(cd2, ndmin = 2)
                cd2 = cd2.T

                sData = markov_anneal.splitLocalMax(cd2, td, splitLength)

                try:
                    val, counts = analysis.ratio(sData.values(), fn.models)
                except:
                    counts = [0] * len(fn.models)
                    val = [0] * len(fn.models)
                tmpDoc += counts

        if len(tmpDoc) >= minBehavior:
            dVector.append(tmpDoc)
            times.append(oldSplit)

        oldSplit = newSplit
        
    return dVector, times
Example #4
0
    st = datetime.datetime.strptime(st, "%Y-%m-%d %H:%M:%S")
    et = datetime.datetime.strptime(et, "%Y-%m-%d %H:%M:%S")

    #Get the sensor blocks
    for i in range(len(sensors)):
        print "Sensors:" + str(sensors[i])

        cd, td = bbdata.getdata(st, et, \
                pStart = periodStart, \
                pEnd = periodEnd, \
                vDays = validDays, \
                comp = compress, \
                sens = sensors[i], 
                readLocation = dataDirectory)
        
        neighborclusters = ncluster.parse(neighborhoodLocation)
        local = neighborclusters[str(sensors[i])]
        cd2 = ncluster.convertNeighborhood(cd, local)
        cd2 = numpy.array(cd2, ndmin = 2)
        cd2 = cd2.T
        
        #obs = 2**len(sensors[i])
        #sData = markov_anneal.splitLocalMax(cd, td, splitLen)

        obs = 9
        
        #Use only for sensor block 90-102
        if i == len(sensors) - 1:
            obs = 16
        
        sData = markov_anneal.splitLocalMax(cd2, td, splitLen)
Example #5
0
    files = os.listdir(readLocation)
    for f in files:
        print f
        #It is a data file.
        if f.split('.')[-1] == 'dat':
        
            #Open files
            fn = dataio.loadData(readLocation + str(f))
            fn.matrixToModel(fn.modelList)

            #visualizer.drawHMM(len(fn.models), fn.obs, \
            #                    fn.assignedData, \
            #            writeLocation = "../../output/" + str(f.split('.')[0]) + ".png")
            
            #Grab clusters
            cc = ncluster.parse("../../data/generated/clean/neighborclusters.txt")
            """
            m = None
                                    
            for temp in range(len(fn.assignedData)):
                visualizer.drawHMMCluster(fn.assignedData[temp], fn.models, \
                                        len(fn.sensors), \
                                        writeLocation = "../../output/cluster" \
                                                + str(f.split('.')[0]) + "_" + \
                                                str(temp) + ".png", \
                                        spacing = 20, 
                                        scaling = 5)
            
            visualizer.drawHMMCluster(fn.out, fn.models, \
                                        len(fn.sensors), \
                                        writeLocation = "../../output/outliers" \