Exemple #1
0
def makeModelCounts(
    splits, modelLocation, dataLocation, neighborhoodLocation=None, minBehavior=0, compress=2, splitLength=8
):
    """
    Makes a set of counts for a given dataset and models.  
    
    Neighborhood location specifies if the models and data need to be preclustered.
    
    Returns the datavector and the associated split times.
    """
    files = os.listdir(modelLocation)

    neighborhood = False
    dVector = []
    times = []

    if neighborhoodLocation:
        neighborclusters = ncluster.parse(neighborhoodLocation)
        neighborhood = True

    # Iterate over splits.
    for s in splits:
        oldSplit = datetime.datetime.strptime(s[0], "%Y-%m-%d %H:%M:%S")
        newSplit = datetime.datetime.strptime(s[1], "%Y-%m-%d %H:%M:%S")

        tmpDoc = []
        # Loop over all models
        for f in files:
            # It is a data file.
            if f.split(".")[-1] == "dat":
                # Open it and grab the models and sensor list
                fn = dataio.loadData(modelLocation + str(f))
                fn.matrixToModel(fn.modelList)

                cd, td = bbdata.getdata(oldSplit, newSplit, comp=compress, sens=fn.sensors, readLocation=dataLocation)

                cd2 = cd
                if neighborhood:
                    local = neighborclusters[str(fn.sensors)]
                    cd2 = ncluster.convertNeighborhood(cd, local)

                cd2 = numpy.array(cd2, ndmin=2)
                cd2 = cd2.T

                sData = markov_anneal.splitLocalMax(cd2, td, splitLength)

                try:
                    val, counts = analysis.ratio(sData.values(), fn.models)
                except:
                    counts = [0] * len(fn.models)
                    val = [0] * len(fn.models)
                tmpDoc += counts

        if len(tmpDoc) >= minBehavior:
            dVector.append(tmpDoc)
            times.append(oldSplit)

        oldSplit = newSplit

    return dVector, times
Exemple #2
0
                cd, td = bbdata.getdata(oldSplit, newSplit, \
                                    comp = compress, \
                                    sens = fn.sensors,
                                    readLocation = dataLocation)

                local = neighborclusters[str(fn.sensors)]
                cd2 = ncluster.convertNeighborhood(cd, local)
                cd2 = numpy.array(cd2, ndmin=2)
                cd2 = cd2.T

                sData = markov_anneal.splitLocalMax(cd2, td, splitLength)

                #for each split, make a document matrix and append it to the
                #ongoing tdmatrix
                try:
                    val, counts = analysis.ratio(sData.values(), fn.models)
                except:
                    counts = [0] * len(fn.models)
                    val = [0] * len(fn.models)

                tmpDoc += counts

        #if len(tmpDoc) >= minBehaviour:
        dVector.append(tmpDoc)
        times.append(oldSplit)

        oldSplit = newSplit

    #Load plsa data
    lsaData = dataio.loadData(lsaLocation)
Exemple #3
0
def makeModelCounts(splits, modelLocation, dataLocation, \
                    neighborhoodLocation = None, minBehavior = 0, \
                    compress = 2, splitLength = 8):
    """
    Makes a set of counts for a given dataset and models.  
    
    Neighborhood location specifies if the models and data need to be preclustered.
    
    Returns the datavector and the associated split times.
    """
    files = os.listdir(modelLocation)
    
    neighborhood = False
    dVector = []
    times = []
    
    if neighborhoodLocation:
        neighborclusters = ncluster.parse(neighborhoodLocation)
        neighborhood = True
        
    #Iterate over splits.
    for s in splits:
        oldSplit = datetime.datetime.strptime(s[0], "%Y-%m-%d %H:%M:%S")
        newSplit = datetime.datetime.strptime(s[1], "%Y-%m-%d %H:%M:%S")
        
        tmpDoc = []
        #Loop over all models
        for f in files:
            #It is a data file.
            if f.split('.')[-1] == 'dat':
                #Open it and grab the models and sensor list
                fn = dataio.loadData(modelLocation + str(f))
                fn.matrixToModel(fn.modelList)
            
                cd, td = bbdata.getdata(oldSplit, newSplit, \
                                    comp = compress, \
                                    sens = fn.sensors,
                                    readLocation = dataLocation)
                
                
                cd2 = cd
                if neighborhood:
                    local = neighborclusters[str(fn.sensors)]
                    cd2 = ncluster.convertNeighborhood(cd, local)
                
                cd2 = numpy.array(cd2, ndmin = 2)
                cd2 = cd2.T

                sData = markov_anneal.splitLocalMax(cd2, td, splitLength)

                try:
                    val, counts = analysis.ratio(sData.values(), fn.models)
                except:
                    counts = [0] * len(fn.models)
                    val = [0] * len(fn.models)
                tmpDoc += counts

        if len(tmpDoc) >= minBehavior:
            dVector.append(tmpDoc)
            times.append(oldSplit)

        oldSplit = newSplit
        
    return dVector, times
Exemple #4
0
        print i
        i+=1
        oldSplit = datetime.datetime.strptime(s[0], "%Y-%m-%d %H:%M:%S")
        newSplit = datetime.datetime.strptime(s[1], "%Y-%m-%d %H:%M:%S")
        tmpDoc = []
        
        suppress.suppress(2)
        #Get the sensor blocks
        for f in files:
            #It is a data file.
            if f.split('.')[-1] == 'dat':

                #Open it and grab the models and sensor list
                fn = dataio.loadData(modelDirectory + str(f))
                fn.matrixToModel(fn.modelList)
                cd, td = bbdata.comp(oldSplit, newSplit, \
                                    comp = compress, \
                                    sens = fn.sensors,
                                    readLocation = dataDirectory)
                                    
                sData = markov_anneal.splitLocalMax(cd, td, splitLen)
                
                #for each split, make a document matrix and append it to the
                #ongoing tdmatrix
                try:
                    val, counts = analysis.ratio(sData.values(), fn.models)
                except:
                    counts = [0] * len(fn.models)
                    val = [0] * len(fn.models)
        suppress.restore(2)