Beispiel #1
0
    def train(self, inputs):

        self.means = random.sample(inputs, self.k)
        assignments = None

        its = 0
        print(self.k)
        while True:

            # Find new assignments
            new_assignments = list(map(self.classify, inputs))

            #print('KMeans.. ', its)
            #its += 1

            # If no assignments have changed, we're done.
            if assignments == new_assignments:
                self.last_assignments = assignments
                return

            # Otherwise keep the new assignments,
            assignments = new_assignments

            for i in range(self.k):
                i_points = [p for p, a in zip(inputs, assignments) if a == i]
                # avoid divide-by-zero if i_points is empty
                if i_points:
                    self.means[i] = vector_mean(i_points)
Beispiel #2
0
    def train(self, input):
        #choose k random points as the initial means
        self.means = random.sample(inputs, self.k)
        assignments = None

        while True:
            #find new assigments
            new_assignments = list(map(self.classify, inputs))
            print(new_assignments)
            #if not assignments have change, we're done
            if assignments == new_assignments:
                return

            #otherwise keep the new assignments
            assignments = new_assignments

            #only to plot image
            self.assignments_cluster = new_assignments

            #and compute new means based on the new assignments
            for i in range(self.k):
                #find all points assignments to cluster i
                i_points = [p for p, a in zip(inputs, assignments) if a == i]

                #make sure i_points is not empty so don't divide by 0
                if i_points:
                    self.means[i] = vector_mean(i_points)
Beispiel #3
0
def ols_fit(xs: List[Vector],
            ys: List[float],
            learning_rate: float = 0.001,
            num_steps: int = 1000,
            batch_size: int = 1) -> Vector:
    """
    Find the vector (of coefficients) that minimizes the sum of squared errors
    """

    # Initialise random guess
    guess = [random.random() for _ in xs[0]]

    # iterate 'num_steps' number of times
    for _ in range(num_steps):
        #iterate through data with steps of 'batch_size'
        for start in range(0, len(xs), batch_size):
            # get number of rows according to batch_size
            batch_xs = xs[start:start + batch_size]
            batch_ys = ys[start:start + batch_size]

            # calculate 'mean' gradient across these points
            gradient = vector_mean([
                squared_error_gradient(x, y, guess)
                for x, y in zip(batch_xs, batch_ys)
            ])

            # update the 'guess' using gradient times the learning rate
            guess = gradient_step(guess, gradient, -learning_rate)

    return guess
Beispiel #4
0
    def train(self, inputs):
        # choose k random points as the initial means
        self.means = random.sample(inputs, self.k)
        assignments = None

        while True:
            # print(assignments)
            # Find new assignments
            new_assignments = list(map(self.classify, inputs))

            # If no assignments have changed, we're done
            if assignments == new_assignments:
                return

            # Otherwise keep the new assignments
            assignments = new_assignments

            # And compute new means based on the new assignments
            for i in range(self.k):
                # find all the points assigned to cluster i
                i_points = [p for p, a in zip(inputs, assignments) if a == i]

                # make sure i_points is not empty so don't divide by 0
                if i_points:
                    self.means[i] = vector_mean(i_points)
    def train(self, inputs):

        self.means = random.sample(inputs, self.k)
        assignments = None

        progress = 0
        while progress < 2:
            progress = progress + 1
            print "iteration : {0}".format(progress)

            print "--a--"
            # Find new assignments
            new_assignments = map(self.classify, inputs)
            print "--b--"

            # If no assignments have changed, we're done.
            if assignments == new_assignments:
                return

            # Otherwise keep the new assignments,
            assignments = new_assignments

            print "--c--"
            for i in range(self.k):
                i_points = [p for p, a in zip(inputs, assignments) if a == i]
                # avoid divide-by-zero if i_points is empty
                if i_points:
                    self.means[i] = vector_mean(i_points)
Beispiel #6
0
def scale(data: List[Vector]) -> Tuple[Vector, Vector]:
    "Return the column-wise mean and standard deviation of a dataset"
    dim = len(data[0])

    means = vector_mean(data)
    std_devs = [
        standard_deviation([vector[i] for vector in data]) for i in range(dim)
    ]

    return means, std_devs
Beispiel #7
0
def fit_model(parameters):
    theta = [random.uniform(-1, 1), random.uniform(-1, 1)]

    learning_rate = 0.001
    for epoch in range(5000):
        grad = vector_mean([linear_gradient(x, y, theta) for x, y in parameters])
        theta = gradient_step(theta, grad, -learning_rate)     #take step in direction of gradient
        print(epoch, theta)

    slope, intercept = theta
    assert 19.9 < slope < 20.1 # slope should be around 20
    assert 4.9 < intercept < 5.1 # intercept should be around 5
Beispiel #8
0
    def train(self, vectors):
        self.means = random.sample(vectors, self.k)
        assignments = None

        while True:
            new_assignments = [self.classify(v) for v in vectors]

            if assignments == new_assignments:
                return assignments

            assignments = new_assignments

            for i in range(self.k):
                i_points = [v for v, a in zip(vectors, assignments) if i == a]

                if i_points:
                    self.means[i] = vector_mean(i_points)
Beispiel #9
0
    def train(self, inputs):
        self.means = random.sample(inputs, self.k)
        assignments = None

        while True:
            new_assignments = [self.classify(i) for i in inputs]

            if assignments == new_assignments:
                return assignments

            assignments = new_assignments

            for i in range(self.k):
                i_points = [p for p, a in zip(inputs, assignments) if a == i]

                if i_points:
                    self.means[i] = vector_mean(i_points)
Beispiel #10
0
    def train(self, inputs):
        self.means = random.sample(inputs, self.k)
        self.assignments = None

        while True:
            new_assignments = list(map(self.classify, inputs))

            if self.assignments == new_assignments:
                return

            self.assignments = new_assignments

            for i in range(self.k):
                i_points = [
                    p for p, a in zip(inputs, self.assignments) if a == i
                ]
                if i_points:
                    self.means[i] = vector_mean(i_points)
Beispiel #11
0
    def train(self, inputs):
        self.means = random.sample(inputs, self.k)
        assignments = None
        while True:
            # 요소들이 어느 (변경된)중심점에 가까이 있는지 매핑 [1,1,0,2,1, ... 이런식
            new_assignments = map(self.classify, inputs)


# 기존 매핑과 동일하다면 더 이상 프로세싱 할 필요 없음. 리턴 ~
        if assignments == new_assignments: return
        # 현재 매핑을 저장
        assignments = new_assignments
        # 현재 클러스터링의 중심점 재 계산
        for i in range(self.k):
            i_points = [p for p, a in zip(inputs, assignments)
                        if a == i]  # i 번 요소들의 리스트
            if i_points:
                self.means[i] = vector_mean(i_points)  # 그것들의 평균 ( 중심점 )
    def train(self, inputs):
        # choose k random points from inputs as the initial means
        self.means = random.sample(inputs, self.k)

        count = 0
        
        while count < self.iters:
            # find new assignments; run classify method on all inputs and return a list
            assignments = map(self.classify, inputs)
            # and compute new means based on the new assignments
            for i in range(self.k):
                # find all points assigned to cluster i
                i_points = [p for p, a in zip(inputs, assignments) if a == i]

                # make sure i_points is not empty so don't divide by 0
                if i_points:
                    self.means[i] = vector_mean(i_points)
            
            count += 1
Beispiel #13
0
def run():
    print("computing random values for theta")
    time.sleep(2)
    theta = [random.uniform(-1, 1), random.uniform(-1, 1)]
    print("Generating learning rate at 0.001")
    time.sleep(2)
    learning_rate = 0.001

    for epoch in range(5000):
        grad = vector_mean([linear_gradient(x, y, theta) for x, y in inputs])
        theta = gradient_step(theta, grad, -learning_rate)
        print(f"epoch: {epoch}, theta: {theta}")

    slope, intercept = theta
    print(
        f"Final slope at {slope} and intercept at {intercept}.\n Expected --> slope: 10 intercept: 7"
    )
    assert 9.99 < slope < 10.11, "slope should be about 10"
    assert 6.9 < intercept < 7.1, "intercept should be about 7"
Beispiel #14
0
 def train(self, inputs):    # choose k random points as the initial means
     self.means = random.sample(inputs, self.k)
     assignments = None
     
     while True:             
         # find new assignments
         new_assignments = map(self.classify, inputs)
         # if no assignments have changed, we're done 
         if assignments == new_assignments:
             return          
         # otherwise, keep the new assignments
         assignments = new_assignments
         # and compute new means based on the new assignments
      
         for i in range(self.k):
             # find all the points assigned to cluster i
             i_points = [p for p, a in zip(inputs, assignments) if a == 1]
                         
             # make sure i_points is not empty so don't divide by zero
             if i_points:
                 self.means[i] = vector_mean(i_points)
Beispiel #15
0
    def train(self, inputs):
    
        self.means = random.sample(inputs, self.k)
        assignments = None
        
        while True:
            # Find new assignments
            new_assignments = map(self.classify, inputs)

            # If no assignments have changed, we're done.
            if assignments == new_assignments:                
                return

            # Otherwise keep the new assignments,
            assignments = new_assignments    

            for i in range(self.k):
                i_points = [p for p, a in zip(inputs, assignments) if a == i]
                # avoid divide-by-zero if i_points is empty
                if i_points:                                
                    self.means[i] = vector_mean(i_points)    
def gradient_descent_linear_model(X: Vector,
                                  y: Vector,
                                  learning_rate: float = 0.001,
                                  iterations: int = 5000,
                                  show_case: bool = True) -> None:
    theta = [random.uniform(-1, 1), random.uniform(-1, 1)] # Random start point.
    for epoch in range(iterations):
        # we gather the gradient ME error for every case. (which direction we can go to minimize error.)
        # remember that the values in the list are not SE error, but rather the dirrection
        # at which we should move to minimize wathever the error was. we are concerned with
        # this since we are TRAINING not TESTING!
        fit_se_err = [linear_gradient(data, actual, theta) for data, actual in zip(X, y)]
        # calculate the average dirrection where we can go to minimize the error MSE
        # here, this is not the calculated MSE, but just the average of all of
        # the gradients!
        mse_gradient = vector_mean(fit_se_err)
        # We calculate the step at which we want to move, following the gradient starting
        # at point theta and at the step size of learning rate in the direction of minimization
        # hence the negative.
        theta = gradient_step(theta, mse_gradient, -learning_rate)
        # this is just to show the progress in the training.
        if(show_case):
            print(f"{str(epoch)} - parameters: {str(theta)} -- mse average minimization gradient: {str(mse_gradient)}")
Beispiel #17
0
print("vector A = ", A)
print("vector B = ", B)

C = la.vector_add(A, B)
print("A + B = ", C)

C = la.vector_subtract(A, B)
print("A - B = ", C)

C = la.vector_sum([A, B])
print("A and B summary = ", C)

C = la.scalar_multiply(10, A)
print("10 * A = ", C)

C = la.vector_mean([A, B])
print("A and B mean = ", C)

C = la.dot(A, B)
print("A dot B = ", C)

C = la.sum_of_squares(A)
print("A^2's summary = ", C)

C = la.magnitude(A)
print("A's magnitude = ", C)

C = la.distance(A, B)
print("A's distance = ", C)

print()
Beispiel #18
0
def de_mean(data: List[Vector]) -> List[Vector]:
    """Recenter data to 0"""
    mean = vector_mean(data)
    return [subtract(vector, mean) for vector in data]
Beispiel #19
0
def scale(data: List[Vector]) -> Tuple[Vector, Vector]:
    dim = len(data[0])
    means = vector_mean(data)
    stdevs = [standard_deviation([vector[i] for vector in data]) for i in range(dim)]
    return means, stdevs
    but it deviates in that instead of calculating the gradient on the whole dataset,
    we instead split the dataset into pieces (batches) from which we calculate the
    gradient.

    the advantages to this approach is mainly visible when working with large datasets.
    while the computation seems to follow a O(n^2), the speed with which it moves towards
    the optimal training parameters is faster since steps towards it are more common than
    when having to compute the gradient for every single point in the dataset.
    Therefore, with this approach we can approach the optimal parameters for out models
    with less epochs than with regular gradient descent.

    """
    theta = [random.uniform(-1, 1), random.uniform(-1, 1)]
    for epoch in range(iterations):
            for batch in minibatch([(data, actual) for data, actual in zip(X, y)], batch_size):
                gradient_mu = vector_mean([linear_gradient(data, actual, theta) for data, actual in batch])
                theta = gradient_step(theta, gradient_mu, -learning_rate)
                if(show_case):
                    print(f"{str(epoch)} - parameters: {str(theta)} -- mse average minimization gradient: {str(gradient_mu)}")

def stochastic_gradient_descent(X: List[T], 
                                y: List[T], 
                                batch_size: int=20,
                                learning_rate: float=0.001, 
                                iterations: float=1_000,
                                show_case: bool=True) -> None:
    """ Stochastic gradient descent (SGD)
    This technique of training is very similar to that of minibatch gradient descent.
    Many of its benefits are shared, but again, the time it takes to approach the optimal
    parameters for a model are smaller due to taking steps for every training example.
    A large trade off is that while we can find the optimal parameters much faster, the
Beispiel #21
0
# inputs = [(x,x) for x in range(-50, 50)]
# inputs = [(x,10*x) for x in range(-50, 50)]
# inputs = [(x,x*x) for x in range(-50, 50)]
# inputs = [(x,x/10) for x in range(-50, 50)]
def linear_gradient(x: float, y: float, theta: Vector) -> Vector:
    slope, intercept = theta
    predicted = slope * x + intercept # The prediction of the model.
    error = (predicted - y) # error is (predicted - actual).
    squared_error = error ** 2 # We'll minimize squared error
    grad = [2 * error * x, 2 * error] # using its gradient.
    return grad
def gradient_step(v: Vector, gradient: Vector, step_size: float) -> Vector:
    """Moves `step_size` in the `gradient` direction from `v`"""
    assert len(v) == len(gradient)
    step = scalar_multiply(step_size, gradient)
    return add(v, step)
def sum_of_squares_gradient(v: Vector) -> Vector:
    return [2 * v_i for v_i in v]
# pick a random starting point
learning_rate = 0.001
theta = [random.uniform(-1, 1), random.uniform(-1, 1)]
for epoch in range(5000):
# Compute the mean of the gradients
    grad = vector_mean([linear_gradient(x, y, theta) for x, y in inputs])
# Take a step in that direction
    theta = gradient_step(theta, grad, -learning_rate)
print(epoch, theta)
slope, intercept = theta
# assert 19.9 < slope < 20.1, "slope should be about 20"
# assert 4.9 < intercept < 5.1, "intercept should be about 5"
    def test_vector_mean(self):
        result = vector_mean([[2, 3, 1], [1, 1, 2], [3, 1, 1]])

        self.assertAlmostEqual(6 / 3, result[0])
        self.assertAlmostEqual(5 / 3, result[1])
        self.assertAlmostEqual(4 / 3, result[2])