Ejemplo n.º 1
0
    def compute(self, size):
        """
        self.points is a vector with n rows and d cols
        bi its a vector of with klogn rows and d dols
        dist(i) represents the sens(p_i) as in the formula discussed.
        """
        e = w_kmeans.Kmeans(self.points, np.expand_dims(self.weights, axis=0), self.k, 10)
        bi = e.compute()

        dist = utils.get_dist_to_centers(self.points, bi) #find distance of each point to its nearset cluster
        if self.weights is not None: # its always not none!!!
            dist /= np.sum(dist) #norm
        dist *= 2
        c = utils.get_centers(self.points, bi)#get centers
        c = self.find_cluester_size_weighted(c, W=self.weights)#get weighted size of center's cluster
        dist += ((4.0)/(c)) #add to each point the size of its cluster as at the formula
        t = np.sum(dist*self.weights)
        weights = 1/(dist*size)
        weights *= t
        # print t
        dist *= self.weights
        dist /= np.sum(dist)
        prob = dist # its actually the sampling probability
        points, weights = utils.sample(self.points, prob, size, weights=weights)
        return points, weights
Ejemplo n.º 2
0
 def __init__(self, base_network, source_iter, num_classes):
     self.eps = 0.0001
     self.stop = False
     self.base_network = base_network
     self.source_ctr = utils.get_centers(base_network, source_iter,
                                         num_classes)
     # self.target_ctr = 0
     self.target_features = []
     self.num_classes = num_classes
     self.clustered_targets = {}
Ejemplo n.º 3
0
    def compute(self):
        self.centers, temp = utils.sample(self.p, None, n=self.k)     # random k centers
        np.reshape(self.centers, (self.k, 2))   #just fix the shape
        dist = utils.get_centers(self.p ,self.centers)
        points = self.p
        weights = self.w.T
        for j in range(0, self.e):
            for i in range(0, self.k):
                x = [dist == i]
                a = points[x]
                w = weights[x]
                c = a*w
                new_center = np.sum(c ,axis=0, keepdims=1)
                if np.sum(w) == 0:
                    continue
                new_center /= np.sum(w)
                self.centers[i] = new_center
            dist = utils.get_centers(self.p, self.centers)

        return self.centers
Ejemplo n.º 4
0
 def compute(self, size, grnds=10, ginit=1):
     q = w_KMeans.KMeans(self.p, np.expand_dims(self.w , axis=0), self.k, grnds, ginit).compute() # this is my kmeans for the coreset.
     sq_d = utils.get_sq_distances(self.p, q) # Squared distances from each point to each center
     dist = utils.get_dist_to_centers(d=sq_d) # I get the sq dist from each point its center.
     dist /= np.sum(dist) # Norm 
     dist *= 2 # according to the paper
     c = utils.get_centers(d=sq_d) # I get the index of the center
     c = self._find_cluster_size(c) # Find the size of the cluster for each point.
     s = dist + 4.0/c # I add it, the 4 is according to the paper.
     t = np.sum(s*self.w) # This is the t from the paper.
     u = t/(s*size) # the new weights for coreset.
     prob = s*self.w/t # the probability for sampling
     p, w = utils.sample(self.p, size, prob=prob, weights=u) # sample coreset: points + weights.
     return p, w
Ejemplo n.º 5
0
Archivo: em.py Proyecto: darxsys/ML
def solve(list_k, examples, out_path, centers_=None,
        conf_num=None, out_minus=True, out_iterations=False):
    """Solves the clustering problem using EM algorithm.
    """

    # start_centers = utils.get_centers()
    dimension = len(examples[0][0])
    # print (dimension)
    N = len(examples)    # print(N)
    for k in list_k:
        # sys.stderr.write(str(k) + "\n")
        h = np.zeros((N, k), dtype=np.float64)
        # responsibilities.fill(1/float(k))
        pi_k = np.zeros(k, dtype=np.float64)
        pi_k.fill(1/float(k))
        # print
        configuration = False
        if centers_ == None:
            centers = utils.get_centers(k, examples)
        else:
            centers = centers_
            configuration = True

        matrix = np.identity(dimension, dtype=np.float64)
        matrices = np.array([matrix] * k)
        # print (matrices)
        log = calc_log(examples, centers, matrices, pi_k)
        # print (log)

        # output stuff
        num_iter = 0
        # if k == 4:
        # groups = [list([])] * k
        groups = []
        for i in range(k):
            groups.append([])
        probabilities = dict()

        # if k == 4:
        #     print (h)
        #     print (pi_k)
        #     print (matrices)
        #     print (centers)

        if out_iterations:
            iter_out = open(out_path + "em-kmeans.dat", "w")
            iter_out.write("#iteracije: log-izglednost\n")
            iter_out.write("--\n")
            iter_out.write("#0: %.2lf\n" % log)
        # buffer_all = "K = %d\n" % k
        # number of members in a certain group

        while True:
            num_iter += 1
            group_count = [0] * k
            # init = dict()
            group_dicts = []
            for i in range(k):
                group_dicts.append({})
            # group_dicts = [] * k
            # E step
            # print (num_iter)
            # changed = 0
            for i in range(N):
                x = examples[i][0]
                mi_j = centers[0][0]
                sigma_j = matrices[0]
                p = utils.multivariate_probability(x, mi_j, sigma_j, dimension) * pi_k[0]
                h[i][0] = p
                sum_h = h[i][0]
                probabilities[i] = p
                group = examples[i][1]
                if group not in group_dicts[0]:
                    group_dicts[0][group] = 1
                else:
                    group_dicts[0][group] += 1

                # if p > 
                group_count[0] += 1
                examples[i][2] = 0

                for j in range(1,k):
                    mi_j = centers[j][0]
                    sigma_j = matrices[j]
                    # print (sigma_j)
                    p2 = utils.multivariate_probability(x, mi_j, sigma_j, dimension) * pi_k[j]
                    if p2 > p:
                        group_count[examples[i][2]] -= 1
                        group_dicts[examples[i][2]][group] -= 1
                        if group not in group_dicts[j]:
                            group_dicts[j][group] = 1
                        else:
                            group_dicts[j][group] += 1
                        examples[i][2] = j
                        group_count[j] += 1
                        p = p2
                        probabilities[i] = p

                    h[i][j] = p2
                    sum_h += h[i][j]

                h[i] /= sum_h
                probabilities[i] /= sum_h
            # print ("$$$$$$$")
            # M step
            # if num_iter == 1:
            #     print (h)
            #     sys.exit(1)
            for i in range(k):
                new_mi = np.zeros(dimension, dtype=np.float64)
                sum_h = 0.
                new_sigma = np.zeros((dimension, dimension), dtype=np.float64)

                for j in range(N):
                    x = examples[j][0]
                    sum_h += h[j][i]
                    new_mi += h[j][i] * x

                new_mi /= sum_h
                # if utils.euclid(new_mi, centers[i][0]) > EPS:
                #     changed = 1
                centers[i] = (new_mi, centers[i][1], centers[i][2])

                # mi = np.matrix(new_mi)
                mi = new_mi
                for j in range(N):
                    x = examples[j][0]
                    # print ((x-mi).T * (x - mi))
                    diff = np.matrix(x - mi)
                    # print (diff.T * diff)
                    new_sigma += h[j][i] * (diff.T * diff)
                    # break

                new_sigma /= sum_h
                matrices[i] = new_sigma
                new_pi = sum_h / float(N)
                pi_k[i] = new_pi

            # print (matrices)
            # if num_iter == 1:
            #     sys.exit(1)

            new_log = calc_log(examples, centers, matrices, pi_k)

            # print (new_log)
            if abs(new_log - log) < EPS:
                break
            log = new_log
            # if k == 4:
            #     print (log)
            if out_iterations:
                iter_out.write("#%d: %.2lf\n" % (num_iter, log))
            # if changed == 0:
            #     break

        # set the groups for K = 4
        # print (groups)
        if not configuration and not out_iterations:
            if k == 4:
                for i in range(N):
                    # print (examples[i][2])
                    # print (groups)
                    groups[examples[i][2]].append((examples[i][1], probabilities[i]))

                with open(out_path + "/em-k4.dat", "a") as f:
                    for i in range(k):
                        # print (len(groups[i]))
                        groups[i] = sorted(groups[i], key=lambda tup: tup[1], reverse=True)
                        f.write("Grupa %d:\n" % (i+1))
                        for j in range(len(groups[i])):
                            f.write(groups[i][j][0] + " %.2lf\n" % groups[i][j][1])
                        if i < k-1:
                            f.write("--\n")

            with open(out_path + "/em-all.dat", "a") as f:
                f.write("K = %d\n" % k)
                for i in range(k):
                    f.write("c%d:" % (i+1))
                    for j in range(dimension):
                        f.write(" %.2lf" % centers[i][0][j])
                    f.write("\n")
                    f.write("grupa %d: %d primjera\n" % ((i+1), group_count[i]))
                f.write("#iter: %d\n" % num_iter)
                f.write("log-izglednost: %.2lf\n" % log)
                if k < 5:
                    f.write("--\n")
        elif not out_iterations:
            with open(out_path + "/em-konf.dat", "a") as f:
                f.write("Konfiguracija %d:\n" % conf_num)
                f.write("log-izglednost: %.2lf\n" % log)
                f.write("#iteracija: %d\n" % num_iter)
                if out_minus == True:
                    f.write("--\n")
                # if i < k - 1:
        else:
            iter_out.write("--\n")
            for i in range(len(group_dicts)):
                iter_out.write("Grupa %d:" % (i+1))
                buff = ""
                for key in group_dicts[i]:
                    buff += " " + key + " %d" % group_dicts[i][key] + ","

                buff = buff[:-1]
                iter_out.write(buff + "\n")
Ejemplo n.º 6
0
Archivo: kmeans.py Proyecto: darxsys/ML
def solve(list_k, examples, out_path):
    """ Does k-means for each k in the list of k_s for a particular set of examples.
    """

    # start_centers = utils.get_centers
    ret_centers = []
    size = len(examples[0][0])
    out_all = open(out_path + "/kmeans-all.dat", "w")
    out_k4 = open(out_path + "/kmeans-k4.dat", "w")
    buffer_ = ""
    buffer_2 = "#iteracije: J\n--\n"
    for k in list_k:
        centers = utils.get_centers(k, examples)
        num_iter = 0
        # labels = {}
        while True:
            # print (centers)
            num_iter += 1
            changed = 0
            classes = []
            for cent in centers:
                classes.append([])

            for count in range(len(examples)):
                ex = examples[count]
                bk = 0
                dist = utils.euclid(ex[0], centers[0][0])
                # labels 

                for i in range(1, len(classes)):
                    dist2 = utils.euclid(ex[0], centers[i][0])
                    if dist2 < dist:
                        bk = i
                        dist = dist2

                classes[bk].append(ex)
                if not bk == ex[2]:
                    changed = 1
                    examples[count][2] = bk
        
            if k == 4:
                buffer_2 += "#%d: " % (num_iter-1) + "%.2lf"\
                    % (utils.calc_error(k, classes, centers)) + "\n"

            if changed == 0:
                break

            for i in range(k):
                # print (i)
                # new_centers = []
                a = np.zeros(size, dtype=np.float64)
                for j in range(len(classes[i])):
                    a = a + classes[i][j][0]
                a = a / len(classes[i])
                centers[i][0] = a

        # print (sum_)
        sum_ = utils.calc_error(k, classes, centers)
        if k == 4:
            buffer_2 += "--\n"

        buffer_ += "K = " + str(k) + "\n"
        for i in range(k):
            buffer_ += ("c%d: " % (i+1))
            if k == 4:
                buffer_2 += "Grupa %d: " % (i+1)
                count = dict()
                for j in range(len(classes[i])):
                    if classes[i][j][1] not in count:
                        count[classes[i][j][1]] = 1
                    else:
                        count[classes[i][j][1]] += 1

                count = sorted(count.items(), key=lambda x:x[1], reverse=True)
                # print (count)
                for (key, val) in count:
                    # print (key,val)
                    buffer_2 += str(key) + " " + str(val) + ", "

                buffer_2 = buffer_2[:-2] + "\n"


            for j in range(size):
                buffer_ += "%.2lf" % centers[i][0][j]
                buffer_ += " "
            buffer_ = buffer_[:-1] + "\n"
            buffer_ += "grupa %d: " % (i+1)

            buffer_ += str(len(classes[i])) + " primjera\n"
        buffer_ += ("#iter: " + str(num_iter) + "\n")
        buffer_ += ("J: %.2lf" % (sum_) + "\n")
        buffer_ += ("--\n")

        if k == 4:
            out_k4.write(buffer_2)
            ret_centers = centers[:]

    out_all.write(buffer_[:-3])
    return ret_centers
if __name__ == '__main__':
    img_dir = "./images"
    img_fp = os.path.join(img_dir, random.choice(os.listdir(img_dir)))
    print(img_fp)
    filter_polygon = True
    kuzu_seg = KuzuSegment()
    kuzu_cls = KuzuClassify()
    img, origin_image, origin_h, origin_w = kuzu_seg.load_image(img_fp)
    pred_bbox, pred_center = kuzu_seg.predict(img)

    # get all polygon area in image
    polygon_contours = make_contours(pred_bbox)

    # get all center points by contour method
    center_coords = get_centers(pred_center.astype(np.uint8))
    no_center_points = len(center_coords)
    final_center = vis_pred_center(center_coords, rad=2)

    # filter polygon
    if filter_polygon:
        filtered_contours = filter_polygons_points_intersection(polygon_contours, center_coords)  # noqa
        pred_bbox = vis_pred_bbox_polygon(pred_bbox, filtered_contours)
    final_bbox = vis_pred_bbox(pred_bbox, center_coords, width=2)

    y_ratio = origin_h / 512
    x_ratio = origin_w / 512

    pil_img = Image.fromarray(origin_image).convert('RGBA')
    char_canvas = Image.new('RGBA', pil_img.size)
    char_draw = ImageDraw.Draw(char_canvas)