def train(self, trainData, trainLabels,iteration): for k in range(iteration): for i in range(len(trainLabels)): truth = trainLabels[i] feature = features.featuresExtract(trainData[i],self.f) predict = self.learnWeights(feature) if predict != truth: wt = self.weights[truth] + feature wp = self.weights[predict] - feature self.weights[truth] = wt self.weights[predict] = wp
def calConditionalProbabilities(self, data, labels): "calculate P(F_i|Y)" occurrence = {} "first count the occurrence of all labels " for i in range(len(labels)): l = labels[i] feature = features.featuresExtract(data[i],self.f) if l not in occurrence: occurrence[l] = np.array(feature) else: occurrence[l] += np.array(feature) self.conds = {} "then estimate the conditional probabilities with Adaptive Smoothing" for l in labels: self.conds[l] = np.divide(occurrence[l] + self.k, float(self.dist[l] + self.k * 2)) return self.conds
def calLogJointProbabilities(self, datum): logJoint = {} feature = features.featuresExtract(datum, self.f) for l in self.dist.keys(): logConds = np.log(self.conds[l]) logCondsC = np.log(1 - self.conds[l]) logJoint[l] = np.sum(np.array(feature) * logConds, dtype=float) logJoint[l] += np.sum((1 - np.array(feature)) * logCondsC, dtype=float) logJoint[l] += math.log(self.prior[l]) return logJoint
def calConditionalProbabilities(self, data, labels): occurrence = {} for i in range(len(labels)): l = labels[i] feature = features.featuresExtract(data[i], self.f) if l not in occurrence: occurrence[l] = np.array(feature) else: occurrence[l] += np.array(feature) self.conds = {} for l in labels: self.conds[l] = np.divide(occurrence[l] + self.k, float(self.dist[l] + self.k * 2)) return self.conds
def calLogJointProbabilities(self, datum): "calculate the log of joint probability" logJoint = {} feature = features.featuresExtract(datum,self.f) for l in self.dist.keys(): "the log of P(f_i=1|Y=y)" logConds = np.log(self.conds[l]) "the log of P(f_i=0|Y=y)" logCondsC = np.log(1 - self.conds[l]) """ feature is an Indicator array of which features equal to 1 1-feature is an Indicator array of which features equal to 0 sum of dot product between Indicator array and logP(f_i|Y=y) calculates total """ logJoint[l] = np.sum(np.array(feature) * logConds, dtype=float) logJoint[l] += np.sum((1 - np.array(feature)) * logCondsC, dtype=float) "adding up the log of P(Y=y)" logJoint[l] += math.log(self.prior[l]) return logJoint
def classify(self, testData): guess = [] for datum in testData: feature = features.featuresExtract(datum,self.f) guess.append(self.learnWeights(feature)) return guess