def learn(self, Xtrain, ytrain): """ In the first code block, you should set self.numclasses and self.numfeatures correctly based on the inputs and the given parameters (use the column of ones or not). In the second code block, you should compute the parameters for each feature. In this case, they're mean and std for Gaussian distribution. """ ### YOUR CODE HERE # check the number of classes num_of_classes = [] for i in ytrain: if i not in num_of_classes: num_of_classes.append(i) # set numclasses and numfeatures self.numclasses = len(num_of_classes) self.numfeatures = (Xtrain.shape[1]) - 1 if (self.params['usecolumnones'] == True): self.numfeatures += 1 ### END YOUR CODE origin_shape = (self.numclasses, self.numfeatures) self.means = np.zeros(origin_shape) self.stds = np.zeros(origin_shape) ### YOUR CODE HERE # split data by class(y value is 0 or 1) class_0 = [] class_1 = [] for i in range(len(ytrain)): if ytrain[i] == 0: class_0.append(Xtrain[i]) if ytrain[i] == 1: class_1.append(Xtrain[i]) # mean and std for class_0 for i in range(self.numfeatures): feature = [] for j in range(len(class_0)): feature.append(class_0[j][i]) self.means[0][i] = (utils.mean(feature)) self.stds[0][i] = (utils.stdev(feature)) # mean and std for class_1 for i in range(self.numfeatures): feature = [] for j in range(len(class_1)): feature.append(class_1[j][i]) self.means[1][i] = (utils.mean(feature)) self.stds[1][i] = (utils.stdev(feature)) ### END YOUR CODE assert self.means.shape == origin_shape assert self.stds.shape == origin_shape
def learn(self, Xtrain, ytrain): #print self.usecolumnones # print(Xtrain) if not self.usecolumnones: Xtrain = Xtrain[:,0:-1] # print Xtrain.shape[1] # print Xtrain num_features = Xtrain.shape[1] indices_1 = ytrain == 1 indices_0 = ytrain == 0 self.prior_1 = float(sum(indices_1))/Xtrain.shape[0] self.prior_0 = 1.0 - self.prior_1 for i in range(num_features): feature = Xtrain[:,i] numbers_0 = feature[indices_0] #print(numbers_0) mean = utils.mean(numbers_0) stdev = utils.stdev(numbers_0) self.meanvar_0.append([stdev**2, mean]) numbers_1 = feature[indices_1] mean = utils.mean(numbers_1) stdev = utils.stdev(numbers_1) self.meanvar_1.append([stdev**2, mean]) print self.meanvar_1, len(self.meanvar_1) print self.meanvar_0, len(self.meanvar_0)
def learn(self, Xtrain, ytrain): """ Learns using the traindata """ if not self.getparams()['usecolumnones']: Xtrain = Xtrain[:, :-1] # print("Xtrain shape when useColumns is", self.getparams()['usecolumnones'], Xtrain.shape[1]) noOfFeatures = Xtrain.shape[1] noOfSamples = len(ytrain) self.x_Class0 = [] self.x_Class1 = [] for i in range(noOfSamples): # print(ytrain[i]) if ytrain[i] == 0: # print(i,"y=0") self.x_Class0.append(Xtrain[i]) else: # print(i,"y=1") self.x_Class1.append(Xtrain[i]) self.x_Class0 = np.asarray(self.x_Class0).reshape(len(self.x_Class0), Xtrain.shape[1]) self.x_Class1 = np.asarray(self.x_Class1).reshape(len(self.x_Class1), Xtrain.shape[1]) # print ("X_Class0.shape",self.x_Class0.shape) # print ("X_Class1.shape",self.x_Class1.shape) self.mean_Class0 = utils.mean(self.x_Class0) self.std_Class0 = utils.stdev(self.x_Class0) self.mean_Class1 = utils.mean(self.x_Class1) self.std_Class1 = utils.stdev(self.x_Class1) # print("mean_Class0.shape", self.mean_Class0.shape) # print("std_Class0.shape", self.std_Class0.shape) # print("mean_Class1.shape", self.mean_Class1.shape) # print("std_Class1.shape", self.std_Class1.shape) self.ymean_Class1 = utils.mean(ytrain) self.ymean_Class0 = 1 - self.ymean_Class1
def learn(self, Xtrain, ytrain, obj): """ Learns using the traindata """ """This part learns the prior of each class labels""" if self.usecolumnones == True: self.nof = Xtrain.shape[1] else: self.nof = Xtrain.shape[1] - 1 postrain = Xtrain[ytrain == 0] negtrain = Xtrain[ytrain == 1] posprior = postrain.shape[0] / Xtrain.shape[0] negprior = negtrain.shape[0] / Xtrain.shape[0] self.prior_prob.extend((posprior, negprior)) for i in range(0, self.nof): feature = "Feature" + str(i) a = {} for targDom in range(0, 2): parameters = {} parameters["mu"] = utils.mean(Xtrain[ytrain == targDom, i]) parameters["sig"] = utils.stdev(Xtrain[ytrain == targDom, i]) a[targDom] = parameters self.prob_table[feature] = a """Python implementation of Naive Bayes"""
def calculate_mv(self, zero, one): feature_array = [] for i in range(0, len(zero[0])): feature_array = [] for j in range(0, len(zero)): feature_array.append(zero[j][i]) self.mv0.append( [utils.mean(feature_array), utils.stdev(feature_array)]) for i in range(0, len(one[0])): feature_array = [] for j in range(0, len(one)): feature_array.append(one[j][i]) self.mv1.append( [utils.mean(feature_array), utils.stdev(feature_array)])
def learn(self, Xtrain, ytrain): self.features = Xtrain.shape[1] if not self.usecolumnones: self.features -= 1 Xtrain = Xtrain[:,0:self.features] zeroindex = ytrain == 0 self.priozero = float(sum(zeroindex)/Xtrain.shape[0]) self.prioone = 1 - self.priozero classzero = Xtrain[zeroindex,:] classone = Xtrain[-zeroindex,:] self.meanstdev = np.empty((2,2,self.features)) for f in xrange(self.features): data = classzero[:,f] self.meanstdev[0,0,f] = utils.mean(data) self.meanstdev[0,1,f] = utils.stdev(data) data = classone[:,f] self.meanstdev[1,0,f] = utils.mean(data) self.meanstdev[1,1,f] = utils.stdev(data)
def learn(self, Xtrain, ytrain): # Separate by class separated = {} for tt in range(Xtrain.shape[0]): inputv = Xtrain[tt] outputy = ytrain[tt] if outputy not in separated: separated[outputy] = [] separated[outputy].append(inputv) for classValue, instances in separated.iteritems(): summ = [(utils.mean(attribute), utils.stdev(attribute)) for attribute in zip(*instances)] del summ[-1] self.summaries[classValue] = summ
def learn(self, Xtrain, ytrain): # Separate by class separated = {} for tt in range(Xtrain.shape[0]): inputv = Xtrain[tt] outputy = ytrain[tt] if (outputy not in separated): separated[outputy] = [] separated[outputy].append(inputv) for classValue, instances in separated.iteritems(): summ = [(utils.mean(attribute), utils.stdev(attribute)) for attribute in zip(*instances)] del summ[-1] self.summaries[classValue] = summ
def divide(self, ds): dividedDS = [(utils.mean(x), utils.stdev(x)) for x in zip(*ds)] del dividedDS[-1] return dividedDS
input_data.close() # Set up default ranges if (binrange[0] == 0 and binrange[1] == 0) or \ (binrange[2] == 0 and binrange[3] == 0): xmaxminholder = utilities.minmax(data1) ymaxminholder = utilities.minmax(data2) binrange[0] = math.floor(xmaxminholder[0]) binrange[1] = math.ceil(xmaxminholder[1]) binrange[2] = math.floor(ymaxminholder[0]) binrange[3] = math.ceil(ymaxminholder[1]) # Set up default number of bins according to "Scott's Choice" if bins[0] == 0 or bins[1] == 0: xinttmp = 3.5 * utilities.stdev(data1,'no') / float(len(data1)) ** (1/3) yinttmp = 3.5 * utilities.stdev(data2,'no') / float(len(data2)) ** (1/3) bins[0] = int(math.ceil((binrange[1] - binrange[0])/xinttmp)) bins[1] = int(math.ceil((binrange[3] - binrange[2])/yinttmp)) if opt.normalize: pointweight /= float(len(data1)) * ( binrange[1]-binrange[0])*(binrange[3]-binrange[2])/(bins[0]*bins[1]) xinterval = (binrange[1] - binrange[0])/bins[0] yinterval = (binrange[3] - binrange[2])/bins[1] # create a large 1-D array with every bin # (x1y1 x1y2 ... x1yN x2y1 x2y2 ... ... xNyN) for x in range(bins[0]): for y in range(bins[1]): phipsibins.append(0)
try: # skip any lines where value is not a float data.append(float(words[column-1])) except ValueError: continue input_data.close() # Set up default ranges if (binrange[0] == 0 and binrange[1] == 0): xmaxminholder = utilities.minmax(data) binrange[0] = math.floor(xmaxminholder[0]) binrange[1] = math.ceil(xmaxminholder[1]) # Set up default number of bins according to "Scott's Choice" if bins == 0: inttmp = 3.5 * utilities.stdev(data,'no') / float(len(data)) ** (1/3.0) bins = int(math.ceil((binrange[1] - binrange[0])/inttmp)) if normalize: pointweight /= float(len(data)) * (binrange[1] - binrange[0])/bins interval = (binrange[1] - binrange[0])/bins # create a large 1-D array with every bin (x1y1 x1y2 ... x1yN x2y1 x2y2 ... ... xNyN) for x in range(bins): phipsibins.append(0) for x in range(len(data)): xval = data[x]
def makegroups(dataset): groups = [(utils.mean(attribute), utils.stdev(attribute)) for attribute in zip(*dataset)] return groups
def divide(self,ds): dividedDS = [(utils.mean(x), utils.stdev(x)) for x in zip(*ds)] del dividedDS[-1] return dividedDS
try: # skip any lines where value is not a float data.append(float(words[column - 1])) except ValueError: continue input_data.close() # Set up default ranges if (binrange[0] == 0 and binrange[1] == 0): xmaxminholder = utilities.minmax(data) binrange[0] = math.floor(xmaxminholder[0]) binrange[1] = math.ceil(xmaxminholder[1]) # Set up default number of bins according to "Scott's Choice" if bins == 0: inttmp = 3.5 * utilities.stdev(data, 'no') / float(len(data))**(1 / 3.0) bins = int(math.ceil((binrange[1] - binrange[0]) / inttmp)) if normalize: pointweight /= float(len(data)) * (binrange[1] - binrange[0]) / bins interval = (binrange[1] - binrange[0]) / bins # create a large 1-D array with every bin (x1y1 x1y2 ... x1yN x2y1 x2y2 ... ... xNyN) for x in range(bins): phipsibins.append(0) for x in range(len(data)): xval = data[x]
for learnername, learner in classalgs.items(): # Reset learner for new parameters learner.reset(params) print('Running learner = ' + learnername + ' on parameters ' + str(learner.getparams())) # Train model learner.learn(trainset[0], trainset[1]) # Test model predictions = learner.predict(testset[0]) error = geterror(testset[1], predictions) print('Error for ' + learnername + ': ' + str(error)) errors[learnername][p, r] = error for learnername, learner in classalgs.items(): besterror = np.mean(errors[learnername][0, :]) best_standard_error = util.stdev( errors[learnername][0, :]) / math.sqrt(numruns) bestparams = 0 for p in range(numparams): aveerror = np.mean(errors[learnername][p, :]) standard_error = util.stdev( errors[learnername][p, :]) / math.sqrt(numruns) if aveerror < besterror: besterror = aveerror best_standard_error = standard_error bestparams = p # Extract best parameters learner.reset(parameters[bestparams]) print('Best parameters for ' + learnername + ': ' + str(learner.getparams())) print('Average error for ' + learnername + ': ' + str(besterror) +
plt.ylabel('MSE') plt.subplot(212) plt.plot(arrCounterSGD, EpochErrAMS[0][0], color='green', linewidth=0.5) plt.xlabel('Epochs') plt.ylabel('MSE') plt.show() for learnername in regressionalgs: besterror = np.mean(errors[learnername][0, :]) #finding the standard deviation with numpy.std is one way to to do it std_err = np.std(errors[learnername][0, :], ddof=1) #finding the standard deviation by using utilities.py's implemented function. The result is the same #with numpy.std SDUtil = util.stdev(np.array(errors[learnername][0, :])) bestparams = 0 for p in range(numparams): aveerror = np.mean(errors[learnername][p, :]) if aveerror < besterror: std_err = np.std(errors[learnername][p, :], ddof=1) SDUtil = util.stdev(np.array(errors[learnername][0, :])) besterror = aveerror bestparams = p #By using the standard deviation function in module utilities, we computed the standard deviation #of errors for each rewgt for several runs of each available algorithm. get the minimum standard #deviation error over all of the three regwgt parameters #here we calculate standard error for each of the standard deviation that we have calculated (numpy and utilities) std_err = std_err / math.sqrt(numruns)