def _initializeGoodModels(data, numModels, states): """Initialization technique for selecting a good pool of models by which to start the expectation maximization algorithm for unsupervised learning Based on kmeans++ paper. """ models = [] distances = numpy.zeros((len(data)), float) #Select initial random sequence seq = data[int(random.random() * len(data))] models.append(hmmsup.obsToModel(seq)) for i in range(numModels - 1): for j in range(len(data)): tmp = hmmsup.obsToModel(data[j]) tmpDistances = hmmsup.hmmDistAll(data[j], models) tmpDistances.sort() distances[j] = tmpDistances[0] distances /= sum(distances) #Select a model with probability dist/sum(all distance) val = random.random() distSum = 0 for j in range(len(distances)): distSum += distances[j] if distSum >= val: #Make a new model using this data element models.append(hmmsup.obsToModel(data[j])) break return models
def _initializeGoodModels(data, numModels, states): """Initialization technique for selecting a good pool of models by which to start the expectation maximization algorithm for unsupervised learning Based on kmeans++ paper. """ models = [] distances = numpy.zeros((len(data)), float) #Select initial random sequence seq = data[int(random.random()*len(data))] models.append(hmmsup.obsToModel(seq)) for i in range(numModels - 1): for j in range(len(data)): tmp = hmmsup.obsToModel(data[j]) tmpDistances = hmmsup.hmmDistAll(data[j], models) tmpDistances.sort() distances[j] = tmpDistances[0] distances /= sum(distances) #Select a model with probability dist/sum(all distance) val = random.random() distSum = 0 for j in range(len(distances)): distSum += distances[j] if distSum >= val: #Make a new model using this data element models.append(hmmsup.obsToModel(data[j])) break return models
def _trainModels(tdata, models): """Train models using every data element designated from the _assign functions. Note: this function is independent from the type of data split used. """ for i in range(len(models)): #Create a sequence set used for training from the multiple observations seqSet = ghmm.SequenceSet(ghmm.Float(), []) for tmpData in tdata[i]: seqSet.merge(ghmm.EmissionSequence(ghmm.Float(), tmpData)) #Make average sequence s = numpy.array(tdata[i]) nm = hmmsup.obsToModel(s.mean(axis=0), max(s.std(axis=0))) nm.normalize() nm.baumWelch(seqSet) models[i] = nm #models[i].baumWelch(seqSet)#, loglikelihoodCutoff = 0.000001) hmmsup.normalizeAMat(models[i]) hmmsup.normalizePiMat(models[i]) return models
def _trainModels(tdata, models): """Train models using every data element designated from the _assign functions. Note: this function is independent from the type of data split used. """ for i in range(len(models)): #Create a sequence set used for training from the multiple observations seqSet = ghmm.SequenceSet(ghmm.Float(), []) for tmpData in tdata[i]: seqSet.merge(ghmm.EmissionSequence(ghmm.Float(), tmpData)) #Make average sequence s = numpy.array(tdata[i]) nm = hmmsup.obsToModel(s.mean(axis = 0), max(s.std(axis = 0))) nm.normalize() nm.baumWelch(seqSet) models[i] = nm #models[i].baumWelch(seqSet)#, loglikelihoodCutoff = 0.000001) hmmsup.normalizeAMat(models[i]) hmmsup.normalizePiMat(models[i]) return models