Esempio n. 1
0
def _initializeGoodModels(data, numModels, states):
    """Initialization technique for selecting a good pool of models by which 
    to start the expectation maximization algorithm for unsupervised learning
    Based on kmeans++ paper.
    """

    models = []
    distances = numpy.zeros((len(data)), float)

    #Select initial random sequence
    seq = data[int(random.random() * len(data))]
    models.append(hmmsup.obsToModel(seq))

    for i in range(numModels - 1):
        for j in range(len(data)):
            tmp = hmmsup.obsToModel(data[j])
            tmpDistances = hmmsup.hmmDistAll(data[j], models)
            tmpDistances.sort()
            distances[j] = tmpDistances[0]

        distances /= sum(distances)

        #Select a model with probability dist/sum(all distance)
        val = random.random()
        distSum = 0

        for j in range(len(distances)):
            distSum += distances[j]

            if distSum >= val:
                #Make a new model using this data element
                models.append(hmmsup.obsToModel(data[j]))
                break
    return models
Esempio n. 2
0
def _initializeGoodModels(data, numModels, states):
    """Initialization technique for selecting a good pool of models by which 
    to start the expectation maximization algorithm for unsupervised learning
    Based on kmeans++ paper.
    """

    models = []
    distances = numpy.zeros((len(data)), float)
    
    #Select initial random sequence
    seq = data[int(random.random()*len(data))]
    models.append(hmmsup.obsToModel(seq))

    for i in range(numModels - 1):
        for j in range(len(data)):
            tmp = hmmsup.obsToModel(data[j])
            tmpDistances = hmmsup.hmmDistAll(data[j], models)
            tmpDistances.sort()
            distances[j] = tmpDistances[0]

        distances /= sum(distances)

        #Select a model with probability dist/sum(all distance)
        val = random.random()
        distSum = 0
        
        for j in range(len(distances)):
            distSum += distances[j]
            
            if distSum >= val:
                #Make a new model using this data element
                models.append(hmmsup.obsToModel(data[j]))
                break
    return models
Esempio n. 3
0
def _trainModels(tdata, models):
    """Train models using every data element designated from the _assign
    functions.  
    
    Note: this function is independent from the type of data split used.
    """
    for i in range(len(models)):

        #Create a sequence set used for training from the multiple observations
        seqSet = ghmm.SequenceSet(ghmm.Float(), [])
        for tmpData in tdata[i]:
            seqSet.merge(ghmm.EmissionSequence(ghmm.Float(), tmpData))

        #Make average sequence
        s = numpy.array(tdata[i])
        nm = hmmsup.obsToModel(s.mean(axis=0), max(s.std(axis=0)))
        nm.normalize()
        nm.baumWelch(seqSet)
        models[i] = nm
        #models[i].baumWelch(seqSet)#, loglikelihoodCutoff = 0.000001)
        hmmsup.normalizeAMat(models[i])
        hmmsup.normalizePiMat(models[i])
    return models
Esempio n. 4
0
def _trainModels(tdata, models):
    """Train models using every data element designated from the _assign
    functions.  
    
    Note: this function is independent from the type of data split used.
    """
    for i in range(len(models)):

        #Create a sequence set used for training from the multiple observations
        seqSet = ghmm.SequenceSet(ghmm.Float(), [])
        for tmpData in tdata[i]:
            seqSet.merge(ghmm.EmissionSequence(ghmm.Float(), tmpData))

        #Make average sequence
        s = numpy.array(tdata[i])
        nm = hmmsup.obsToModel(s.mean(axis = 0), max(s.std(axis = 0)))
        nm.normalize()
        nm.baumWelch(seqSet)
        models[i] = nm
        #models[i].baumWelch(seqSet)#, loglikelihoodCutoff = 0.000001)
        hmmsup.normalizeAMat(models[i])
        hmmsup.normalizePiMat(models[i])
    return models