Beispiel #1
0
    def learn(self, Xtrain, ytrain):
        """
        In the first code block, you should set self.numclasses and
        self.numfeatures correctly based on the inputs and the given parameters
        (use the column of ones or not).

        In the second code block, you should compute the parameters for each
        feature. In this case, they're mean and std for Gaussian distribution.
        """

        ### YOUR CODE HERE

        # check the number of classes
        num_of_classes = []
        for i in ytrain:
            if i not in num_of_classes:
                num_of_classes.append(i)

        # set numclasses and numfeatures
        self.numclasses = len(num_of_classes)
        self.numfeatures = (Xtrain.shape[1]) - 1
        if (self.params['usecolumnones'] == True):
            self.numfeatures += 1

        ### END YOUR CODE

        origin_shape = (self.numclasses, self.numfeatures)
        self.means = np.zeros(origin_shape)
        self.stds = np.zeros(origin_shape)

        ### YOUR CODE HERE

        # split data by class(y value is 0 or 1)
        class_0 = []
        class_1 = []
        for i in range(len(ytrain)):
            if ytrain[i] == 0:
                class_0.append(Xtrain[i])
            if ytrain[i] == 1:
                class_1.append(Xtrain[i])

        # mean and std for class_0
        for i in range(self.numfeatures):
            feature = []
            for j in range(len(class_0)):
                feature.append(class_0[j][i])
            self.means[0][i] = (utils.mean(feature))
            self.stds[0][i] = (utils.stdev(feature))

        # mean and std for class_1
        for i in range(self.numfeatures):
            feature = []
            for j in range(len(class_1)):
                feature.append(class_1[j][i])
            self.means[1][i] = (utils.mean(feature))
            self.stds[1][i] = (utils.stdev(feature))

        ### END YOUR CODE
        assert self.means.shape == origin_shape
        assert self.stds.shape == origin_shape
Beispiel #2
0
    def learn(self, Xtrain, ytrain):
        
        #print self.usecolumnones
        # print(Xtrain)
        if not self.usecolumnones:     
            Xtrain = Xtrain[:,0:-1]
        # print Xtrain.shape[1]
        # print Xtrain
        num_features  = Xtrain.shape[1]     
        
        indices_1 = ytrain  == 1 
        indices_0 = ytrain  == 0
        self.prior_1 = float(sum(indices_1))/Xtrain.shape[0]
        self.prior_0 = 1.0 - self.prior_1
        for i in range(num_features):
            feature = Xtrain[:,i]
            numbers_0 = feature[indices_0]
            #print(numbers_0)
            mean = utils.mean(numbers_0)
            stdev = utils.stdev(numbers_0)
            self.meanvar_0.append([stdev**2, mean])
            numbers_1 = feature[indices_1]
            mean = utils.mean(numbers_1)
            stdev = utils.stdev(numbers_1)
            self.meanvar_1.append([stdev**2, mean])

        print self.meanvar_1, len(self.meanvar_1)
        print self.meanvar_0, len(self.meanvar_0)
    def learn(self, Xtrain, ytrain):
        """ Learns using the traindata """
        if not self.getparams()['usecolumnones']:
            Xtrain = Xtrain[:, :-1]
        # print("Xtrain shape when useColumns is", self.getparams()['usecolumnones'], Xtrain.shape[1])
        noOfFeatures = Xtrain.shape[1]
        noOfSamples = len(ytrain)

        self.x_Class0 = []
        self.x_Class1 = []
        for i in range(noOfSamples):
            # print(ytrain[i])
            if ytrain[i] == 0:
                # print(i,"y=0")
                self.x_Class0.append(Xtrain[i])
            else:
                # print(i,"y=1")
                self.x_Class1.append(Xtrain[i])

        self.x_Class0 = np.asarray(self.x_Class0).reshape(len(self.x_Class0), Xtrain.shape[1])
        self.x_Class1 = np.asarray(self.x_Class1).reshape(len(self.x_Class1), Xtrain.shape[1])
        # print ("X_Class0.shape",self.x_Class0.shape)
        # print ("X_Class1.shape",self.x_Class1.shape)
        self.mean_Class0 = utils.mean(self.x_Class0)
        self.std_Class0 = utils.stdev(self.x_Class0)

        self.mean_Class1 = utils.mean(self.x_Class1)
        self.std_Class1 = utils.stdev(self.x_Class1)

        # print("mean_Class0.shape", self.mean_Class0.shape)
        # print("std_Class0.shape", self.std_Class0.shape)
        # print("mean_Class1.shape", self.mean_Class1.shape)
        # print("std_Class1.shape", self.std_Class1.shape)
        self.ymean_Class1 = utils.mean(ytrain)
        self.ymean_Class0 = 1 - self.ymean_Class1
Beispiel #4
0
    def learn(self, Xtrain, ytrain, obj):
        """ Learns using the traindata """
        """This part learns the prior of each class labels"""
        if self.usecolumnones == True:

            self.nof = Xtrain.shape[1]
        else:
            self.nof = Xtrain.shape[1] - 1

        postrain = Xtrain[ytrain == 0]
        negtrain = Xtrain[ytrain == 1]
        posprior = postrain.shape[0] / Xtrain.shape[0]
        negprior = negtrain.shape[0] / Xtrain.shape[0]
        self.prior_prob.extend((posprior, negprior))

        for i in range(0, self.nof):
            feature = "Feature" + str(i)
            a = {}
            for targDom in range(0, 2):
                parameters = {}
                parameters["mu"] = utils.mean(Xtrain[ytrain == targDom, i])
                parameters["sig"] = utils.stdev(Xtrain[ytrain == targDom, i])
                a[targDom] = parameters
            self.prob_table[feature] = a
        """Python implementation of Naive Bayes"""
Beispiel #5
0
    def calculate_mv(self, zero, one):
        feature_array = []
        for i in range(0, len(zero[0])):
            feature_array = []
            for j in range(0, len(zero)):
                feature_array.append(zero[j][i])
            self.mv0.append(
                [utils.mean(feature_array),
                 utils.stdev(feature_array)])

        for i in range(0, len(one[0])):
            feature_array = []
            for j in range(0, len(one)):
                feature_array.append(one[j][i])
            self.mv1.append(
                [utils.mean(feature_array),
                 utils.stdev(feature_array)])
 def learn(self, Xtrain, ytrain):
     self.features = Xtrain.shape[1]
     if not self.usecolumnones:
         self.features -= 1
         Xtrain = Xtrain[:,0:self.features]
     zeroindex = ytrain == 0
     self.priozero = float(sum(zeroindex)/Xtrain.shape[0])
     self.prioone = 1 - self.priozero
     classzero = Xtrain[zeroindex,:]
     classone = Xtrain[-zeroindex,:]
     self.meanstdev = np.empty((2,2,self.features))
     for f in xrange(self.features):
         data = classzero[:,f]
         self.meanstdev[0,0,f] = utils.mean(data)
         self.meanstdev[0,1,f] = utils.stdev(data)
         data = classone[:,f]
         self.meanstdev[1,0,f] = utils.mean(data)
         self.meanstdev[1,1,f] = utils.stdev(data)
 def learn(self, Xtrain, ytrain):
     # Separate by class
     separated = {}
     for tt in range(Xtrain.shape[0]):
         inputv = Xtrain[tt]
         outputy = ytrain[tt]
         if outputy not in separated:
             separated[outputy] = []
         separated[outputy].append(inputv)
     for classValue, instances in separated.iteritems():
         summ = [(utils.mean(attribute), utils.stdev(attribute)) for attribute in zip(*instances)]
         del summ[-1]
         self.summaries[classValue] = summ
Beispiel #8
0
 def learn(self, Xtrain, ytrain):
     # Separate by class
     separated = {}
     for tt in range(Xtrain.shape[0]):
         inputv = Xtrain[tt]
         outputy = ytrain[tt]
         if (outputy not in separated):
             separated[outputy] = []
         separated[outputy].append(inputv)
     for classValue, instances in separated.iteritems():
         summ = [(utils.mean(attribute), utils.stdev(attribute))
                 for attribute in zip(*instances)]
         del summ[-1]
         self.summaries[classValue] = summ
 def divide(self, ds):
     dividedDS = [(utils.mean(x), utils.stdev(x)) for x in zip(*ds)]
     del dividedDS[-1]
     return dividedDS
Beispiel #10
0
input_data.close()

# Set up default ranges
if (binrange[0] == 0 and binrange[1] == 0) or \
            (binrange[2] == 0 and binrange[3] == 0):
   xmaxminholder = utilities.minmax(data1)
   ymaxminholder = utilities.minmax(data2)
   binrange[0] = math.floor(xmaxminholder[0])
   binrange[1] = math.ceil(xmaxminholder[1])
   binrange[2] = math.floor(ymaxminholder[0])
   binrange[3] = math.ceil(ymaxminholder[1])

# Set up default number of bins according to "Scott's Choice"
if bins[0] == 0 or bins[1] == 0:
   xinttmp = 3.5 * utilities.stdev(data1,'no') / float(len(data1)) ** (1/3)
   yinttmp = 3.5 * utilities.stdev(data2,'no') / float(len(data2)) ** (1/3)
   bins[0] = int(math.ceil((binrange[1] - binrange[0])/xinttmp))
   bins[1] = int(math.ceil((binrange[3] - binrange[2])/yinttmp))
if opt.normalize: 
   pointweight /= float(len(data1)) * (
            binrange[1]-binrange[0])*(binrange[3]-binrange[2])/(bins[0]*bins[1])

xinterval = (binrange[1] - binrange[0])/bins[0]
yinterval = (binrange[3] - binrange[2])/bins[1]

# create a large 1-D array with every bin 
# (x1y1 x1y2 ... x1yN x2y1 x2y2 ... ... xNyN)
for x in range(bins[0]):
   for y in range(bins[1]):
      phipsibins.append(0)
Beispiel #11
0
   try: # skip any lines where value is not a float
      data.append(float(words[column-1]))
   except ValueError:
      continue

input_data.close()

# Set up default ranges
if (binrange[0] == 0 and binrange[1] == 0):
   xmaxminholder = utilities.minmax(data)
   binrange[0] = math.floor(xmaxminholder[0])
   binrange[1] = math.ceil(xmaxminholder[1])

# Set up default number of bins according to "Scott's Choice"
if bins == 0:
   inttmp = 3.5 * utilities.stdev(data,'no') / float(len(data)) ** (1/3.0)
   bins = int(math.ceil((binrange[1] - binrange[0])/inttmp))

if normalize: 
   pointweight /= float(len(data)) * (binrange[1] - binrange[0])/bins

interval = (binrange[1] - binrange[0])/bins

# create a large 1-D array with every bin (x1y1 x1y2 ... x1yN x2y1 x2y2 ... ... xNyN)
for x in range(bins):
   phipsibins.append(0)

for x in range(len(data)):

   xval = data[x]
Beispiel #12
0
 def makegroups(dataset):
     groups = [(utils.mean(attribute), utils.stdev(attribute)) for attribute in zip(*dataset)]
     return groups
    def divide(self,ds):
	dividedDS = [(utils.mean(x), utils.stdev(x)) for x in zip(*ds)]
	del dividedDS[-1]
	return dividedDS
Beispiel #14
0
    try:  # skip any lines where value is not a float
        data.append(float(words[column - 1]))
    except ValueError:
        continue

input_data.close()

# Set up default ranges
if (binrange[0] == 0 and binrange[1] == 0):
    xmaxminholder = utilities.minmax(data)
    binrange[0] = math.floor(xmaxminholder[0])
    binrange[1] = math.ceil(xmaxminholder[1])

# Set up default number of bins according to "Scott's Choice"
if bins == 0:
    inttmp = 3.5 * utilities.stdev(data, 'no') / float(len(data))**(1 / 3.0)
    bins = int(math.ceil((binrange[1] - binrange[0]) / inttmp))

if normalize:
    pointweight /= float(len(data)) * (binrange[1] - binrange[0]) / bins

interval = (binrange[1] - binrange[0]) / bins

# create a large 1-D array with every bin (x1y1 x1y2 ... x1yN x2y1 x2y2 ... ... xNyN)
for x in range(bins):
    phipsibins.append(0)

for x in range(len(data)):

    xval = data[x]
Beispiel #15
0
            for learnername, learner in classalgs.items():
                # Reset learner for new parameters
                learner.reset(params)
                print('Running learner = ' + learnername + ' on parameters ' +
                      str(learner.getparams()))
                # Train model
                learner.learn(trainset[0], trainset[1])
                # Test model
                predictions = learner.predict(testset[0])
                error = geterror(testset[1], predictions)
                print('Error for ' + learnername + ': ' + str(error))
                errors[learnername][p, r] = error

    for learnername, learner in classalgs.items():
        besterror = np.mean(errors[learnername][0, :])
        best_standard_error = util.stdev(
            errors[learnername][0, :]) / math.sqrt(numruns)
        bestparams = 0
        for p in range(numparams):
            aveerror = np.mean(errors[learnername][p, :])
            standard_error = util.stdev(
                errors[learnername][p, :]) / math.sqrt(numruns)
            if aveerror < besterror:
                besterror = aveerror
                best_standard_error = standard_error
                bestparams = p

        # Extract best parameters
        learner.reset(parameters[bestparams])
        print('Best parameters for ' + learnername + ': ' +
              str(learner.getparams()))
        print('Average error for ' + learnername + ': ' + str(besterror) +
Beispiel #16
0
    plt.ylabel('MSE')

    plt.subplot(212)
    plt.plot(arrCounterSGD, EpochErrAMS[0][0], color='green', linewidth=0.5)
    plt.xlabel('Epochs')
    plt.ylabel('MSE')

    plt.show()

    for learnername in regressionalgs:
        besterror = np.mean(errors[learnername][0, :])
        #finding the standard deviation with numpy.std is one way to to do it
        std_err = np.std(errors[learnername][0, :], ddof=1)
        #finding the standard deviation by using utilities.py's implemented function. The result is the same
        #with numpy.std
        SDUtil = util.stdev(np.array(errors[learnername][0, :]))

        bestparams = 0
        for p in range(numparams):
            aveerror = np.mean(errors[learnername][p, :])
            if aveerror < besterror:
                std_err = np.std(errors[learnername][p, :], ddof=1)
                SDUtil = util.stdev(np.array(errors[learnername][0, :]))
                besterror = aveerror
                bestparams = p
        #By using the standard deviation function in module utilities, we computed the standard deviation
        #of errors for each rewgt for several runs of each available algorithm. get the minimum standard
        #deviation error over all of the three regwgt parameters

#here we calculate standard error for each of the standard deviation that we have calculated (numpy and utilities)
        std_err = std_err / math.sqrt(numruns)