コード例 #1
0
 def test_same(self):
     original = '/home/user/test.txt'
     new = '/home/user/test.txt'
     ret = paths.find_transformation(original, new)
     self.assertEqual(ret, '')
コード例 #2
0
 def test_down_file(self):
     new = '/home/user/doc.pdf'
     original = '/home/user'
     ret = paths.find_transformation(original, new)
     self.assertEqual(ret, 'doc.pdf')
コード例 #3
0
 def test_down_folder(self):
     new = '/home/user/typography/doc.pdf'
     original = '/home/user'
     ret = paths.find_transformation(original, new)
     self.assertEqual(ret, 'typography')
コード例 #4
0
 def test_up_different(self):
     original = '/home/user/typography/doc.pdf'
     new = '/home/user/test.txt'
     ret = paths.find_transformation(original, new)
     self.assertEqual(ret, '..')
コード例 #5
0
ファイル: som.py プロジェクト: Programator2/medusa-som
    def find_clusters(self, n):
        # short names for static methods
        find_median = self.find_median
        transform = self.transform

        # Choose cluster centers randomly from neurons
        centers = random.sample(self.neurons, n)

        improvement = True
        count = 0
        while improvement:
            count += 1
            # print('clustering', count)
            improvement = False
            # Convert centers to tuples so they can be hashed
            centers = [tuple(x) for x in centers]

            # Determine the categories of neurons
            categories = defaultdict(list)  # dictionary center -> [neurons]
            for i, neuron in enumerate(self.neurons):
                bmu = min(((self._distance(center, neuron), center, index)
                           for index, center in enumerate(centers)),
                          key=lambda x: x[0])
                categories[bmu[1]].append(neuron)
            self.categories = categories

            # Compute new value for each category
            new_centers = [list(x) for x in centers]

            ### Process ###

            # k is active feature of the vector
            feature = 0

            for center_id, center in enumerate(centers):

                processes = [x[feature] for x in categories.get(center, [])]
                if not processes:
                    continue
                process_counter = BetterCounter(processes)
                most_common_list = process_counter.most_common()
                most_common_process = random.choice(most_common_list)[0]
                if new_centers[center_id][feature] != most_common_process:
                    improvement = True
                new_centers[center_id][feature] = most_common_process

            ### Path ###

            for center_id, center in enumerate(centers):
                # Compute new value for each neuron

                # Get the neighborhood of the neuron (input vectors from the topological neighborhood)
                neighborhood = categories.get(center, [])

                # When there are no neighbours:
                if not neighborhood:
                    # empty_neighborhood += 1
                    continue

                # Compute average path from the neighborhood
                average, error = find_median(neighborhood)

                improvement = True
                while improvement:
                    counter = BetterCounter()

                    # Search how the average path needs to be changed to be the
                    # same as each of its neighbour. Count the transformations.
                    # There are three transformations:
                    # 1) go up (..)
                    # 2) go down in the directory structure
                    # 3) do nothing (paths are equal)
                    for neighbour in neighborhood:
                        transformation = find_transformation(
                            average, neighbour)
                        if transformation == '.':
                            breakpoint()
                        counter.update([transformation])

                    # Apply the most frequent transformation
                    most_frequent = counter.most_common()
                    if len(most_frequent) == 1:
                        # There were no ties --- ideal case
                        transformation = most_frequent[0][0]
                        if not transformation:
                            # empty string means the paths are equal, we are
                            # finished --- unlikely to happen
                            break
                        transformations = [transformation]
                    elif len(most_frequent) > 1:
                        # There were ties. We are going to try each transformation
                        # and find one that gives smaller error value
                        transformations = [x[0] for x in most_frequent]
                    else:
                        raise RuntimeError(
                            'Unexpected length of most_frequent')

                    for t in transformations:
                        new_average = transform(average, t)
                        new_error = SOM.summed_distance(
                            new_average, neighborhood)
                        if new_error < error:
                            average = new_average
                            error = new_error
                            break
                        else:
                            # No transformation was better --- we are finished with
                            # neuron `p`
                            improvement = False

                # This is a new path for the `center_id`-th cluster center
                if new_centers[center_id][1] != average and count < 50:
                    improvement = True
                    # print(center_id, 'improved')
                new_centers[center_id][1] = average

            # Permissions

            centers = new_centers
        self.centers = centers

        # Determine the categories of neurons
        # once again because there are old values from the previous run of the
        # loop
        categories = defaultdict(list)  # dictionary center -> [neurons]
        for i, neuron in enumerate(self.neurons):
            bmu = min(((self._distance(center, neuron), center, index)
                       for index, center in enumerate(centers)),
                      key=lambda x: x[0])
            categories[bmu[2]].append(neuron)
        self.neurons_by_cat_id = categories

        return centers
コード例 #6
0
ファイル: som.py プロジェクト: Programator2/medusa-som
    def _update_paths(self, iteration: int):
        """Updates file paths of neurons. Update is not stored immediately, new values
        are returned in a list.
        :param iteration: iteration of the update function
        """

        find_median = self.find_median
        transform = self.transform

        # This is where we store new paths for the neurons, they will be stored
        # in original order
        new_paths = []

        # Counter for neurons with empty neighborhood
        empty_neighborhood = 0

        # Compute neighborhood for each neuron in the *input space*
        neighborhood_for_neuron = self._neuron_neighborhood()

        distance = int((1 - self.col_sz / 3.5) /
                       (self.max_iter - 1) * iteration + self.col_sz / 3.5)
        print('distance is', distance)

        for p_id, p in enumerate(self.neurons):
            # Compute new value for each neuron

            # Get the neighborhood of the neuron (input vectors from the topological neighborhood)
            neighborhood = self._neighborhood_set(p_id, distance,
                                                  neighborhood_for_neuron)
            neighborhood = list(neighborhood)

            # When there are no neighbours:
            if not neighborhood:
                new_paths.append(p[PATH_CATEGORY])
                empty_neighborhood += 1
                continue

            # Compute average path from the neighborhood
            average, error = find_median(neighborhood)

            improvement = True
            while improvement:
                counter = BetterCounter()

                # Search how the average path needs to be changed to be the
                # same as each of its neighbour. Count the transformations.
                # There are three transformations:
                # 1) go up (..)
                # 2) go down in the directory structure
                # 3) do nothing (paths are equal)
                for neighbour in neighborhood:
                    transformation = find_transformation(average, neighbour)
                    # print(average, neighbour, transformation)
                    if transformation == '.':
                        breakpoint()
                    counter.update([transformation])

                # Apply the most frequent transformation
                most_frequent = counter.most_common()
                if len(most_frequent) == 1:
                    # There were no ties --- ideal case
                    transformation = most_frequent[0][0]
                    if not transformation:
                        # empty string means the paths are equal, we are
                        # finished --- unlikely to happen
                        break
                    transformations = [transformation]
                elif len(most_frequent) > 1:
                    # There were ties. We are going to try each transformation
                    # and find one that gives smaller error value
                    transformations = [x[0] for x in most_frequent]
                else:
                    raise RuntimeError('Unexpected length of most_frequent')

                for t in transformations:
                    new_average = transform(average, t)
                    new_error = SOM.summed_distance(new_average, neighborhood)
                    # TODO Try all possible transformations and choose the best one
                    # disadvantage - slows down the algorithm
                    if new_error < error:
                        average = new_average
                        error = new_error
                        break
                    else:
                        # No transformation was better --- we are finished with
                        # neuron `p`
                        improvement = False

            # This is a new path for the `p_id`-th neuron
            # print('appending', average)
            new_paths.append(average)

        # print('_update_paths(): neighborhood was empty', empty_neighborhood, 'times')
        return new_paths