Esempio n. 1
0
    def findingshapelets(self, data, target):
        """
        Searches for a shapelet classifier for each label.
        :param data: list of training examples
        :type data: np.array
        :param target: list of event labels for each training example
        :type target: np.array
        :return: with label as key and (classifier, target) as value
        :rtype: dict
        """
        self.data = data
        self.target = target
        self.windows = self.calc_windows(self.sl_max, self.N_max)
        self.estimate_sigma_min()
        self.unique_labels = self.get_unique_targets(target)
        bsf_classifier = defaultdict(lambda: None)
        self.shapelets = dict()
        self.dimensions_subsets = list(powerset(range(data[0].shape[1])))[1:]

        self.precompute_z_norm(data)

        c = Counter(len(self.dimensions_subsets) * len(self.windows),
                    prefix="generating shapelets")
        for i, dimension_subset in enumerate(self.dimensions_subsets):
            if dimension_subset == ():
                continue

            for j, window in enumerate(self.windows):
                shapelets = self.prune_shapelet_candidates(
                    window, dimension_subset)
                for label in shapelets.keys():
                    self.shapelets[label, dimension_subset,
                                   window] = shapelets[label]
                c.printProgress(j + (i * len(self.windows)) + 1)

        self.precompute_bmd(data)

        for label in self.unique_labels:
            binary_target = np.array([int(label in x) for x in target])
            c = Counter(len(self.dimensions_subsets) * len(self.windows),
                        prefix=label)
            c.printProgress(0)
            for ds_i, dimension_subset in enumerate(self.dimensions_subsets):
                for w_i, window in enumerate(self.windows):
                    key = (label, dimension_subset, window)
                    classifier_candidates = self.build_classifier(
                        self.shapelets[key], binary_target, label,
                        dimension_subset)
                    for c_i, classifier in enumerate(classifier_candidates):
                        try:
                            if self.cmp_classifier(bsf_classifier[label],
                                                   classifier) > 0:
                                bsf_classifier[label] = classifier
                        except AttributeError:
                            bsf_classifier[label] = classifier
                    c.printProgress(ds_i * len(self.windows) + w_i + 1)
            bsf_classifier[label] = bsf_classifier[label], binary_target
        return bsf_classifier
Esempio n. 2
0
 def precompute_bmd(self, data):
     """
     Calculates the BMD between all shapelet candidates and all training examples.
     :param data: list of training examples
     :type data: np.array
     """
     self.dist_shapelet_ts = dict()
     c = Counter(data.shape[0], prefix="calculating min dist")
     c.printProgress(0)
     for ts_id in range(data.shape[0]):
         for axis in self.dimensions_subsets:
             for shapelet_length in self.windows:
                 muh = np.concatenate([
                     self.shapelets[label, axis, shapelet_length]
                     for label in self.unique_labels
                 ])
                 ts = np.concatenate([
                     self.z_data[ts_id, shapelet_length][:, :, a][..., None]
                     for a in axis
                 ],
                                     axis=-1)
                 d_m = distance_matrix3D(muh, ts).min(axis=1)
                 i = 0
                 for label in self.unique_labels:
                     key = (label, axis, shapelet_length)
                     for shapelet_id, shapelet in enumerate(
                             self.shapelets[key]):
                         self.dist_shapelet_ts[ts_id, shapelet_id, label,
                                               shapelet_length,
                                               axis] = d_m[i]
                         i += 1
         c.printProgress(ts_id + 1)
def run_driver(v, D, U, H, verbose=False):
    # Generate small examples to walk through
    test_int, mu, x = alg.initialize(D, U, H)
    eintervals = [test_int]

    evals, evects = [], []

    RQI_iters, bisec_iters, stable, inverse, solve_count = [], [], [], [], []
    n = len(D)
    for it in xrange(n):
        #print it, sum([interval.num_evals() for interval in eintervals])
        assert sanity_check(eintervals, n - it)
        #print("Finding the %dth eigen pair.\n" % (it+1))
        counter = Counter()
        lamb, y = driver_back.find_pair(eintervals,
                                        D,
                                        U,
                                        H,
                                        mu,
                                        x,
                                        counter=counter,
                                        version=v)
        evals.append(lamb)
        evects.append(y)

        # D_hat = core.ldl_fast(D-lamb, U, H)
        # inertia = utils.inertia_ldl(D_hat)
        # assert inertia[1] == 1
        #assert np.allclose(0, utils.comp_error(D, U, H, lamb, y) )

        RQI_iters.append(counter.rqi_count)
        bisec_iters.append(counter.bisec_count)
        stable.append(counter.stable_count)
        inverse.append(counter.inverse_count)
        solve_count.append(counter.solve_count)

        if it != n - 1:
            mu = (eintervals[0].low + eintervals[0].high) / 2
            # Can initialize smarter
            x = np.random.randn(n)
            x = x / np.linalg.norm(x)

    RQI_iters = np.array(RQI_iters)
    bisec_iters = np.array(bisec_iters)
    stable = np.array(stable)
    print("Max number of RQI iterations = %d, that of bisection = %d." %
          (max(RQI_iters), max(bisec_iters)))

    print("Tol number of RQI iterations = %d, that of bisection = %d." %
          (sum(RQI_iters), sum(bisec_iters)))
    print("Ave number of RQI iterations = %.2f, that of bisection = %.2f." %
          (np.mean(RQI_iters), np.mean(bisec_iters)))
    print("Tol number of inverse iterations = %d." % sum(inverse))
    print("The total number of stable QR method is %d." % (sum(stable)))
    print("The total number of bisection that need solve is %d." %
          sum(solve_count))

    A = form_A(D, U, H)
    for i in xrange(n):
        assert np.allclose(np.dot(A, evects[i]), evals[i] * evects[i])
    def run(self):
        # create general data structures and queues
        input_queue = Queue()
        output_queue = Queue()
        processes = []

        # create the should terminate event
        event = Event()

        finished_slaves_counter = Counter()

        # spawn the processes
        for i in range(self.context['number']):
            p = Process(target=process,
                        args=[
                            input_queue, output_queue, event,
                            finished_slaves_counter
                        ])
            p.start()
            processes.append(p)

        # get the queries and push them into the queue
        queue_thread = Process(target=fill_queue,
                               args=[input_queue, event, self.context])
        queue_thread.daemon = True
        queue_thread.start()

        # empty the output_queue
        result_count = 0
        num_result_count_unchanged = 0
        while True:
            try:
                result = output_queue.get_nowait()

                # do some dummy calculation
                hashlib.md5(result).hexdigest()

                result_count += 1
                num_result_count_unchanged = 0
            except Empty, e:

                num_result_count_unchanged += 1
                if finished_slaves_counter.value(
                ) == self.context['number'] and (
                        result_count == self.context['query_number'] or
                        num_result_count_unchanged >= 1000) and event.is_set():
                    break
Esempio n. 5
0
    def calculate_grid(self) -> None:
        """
        Calculates the values of the grid based on current information
        """
        self._verboseprint("Reading data...")

        self._initialize_data()
        self._verboseprint("Initializing map grid generation...")
        # initial grid
        grid_width = ceil((self._lon_max - self._lon_min) / self.scale)
        grid_height = ceil((self._lat_max - self._lat_min) / self.scale)
        self._verboseprint(
            ("Map Parameters\n"
             "--------------\n"
             "Lat Min:         {}\n"
             "Lat Max:         {}\n"
             "Lat Grid Height: {}\n"
             "Lon Min:         {}\n"
             "Lon Max:         {}\n"
             "Lon Grid Width:  {}\n"
             "Grid Dimensions: ({}, {})\n").format(self._lat_min,
                                                   self._lat_max, grid_height,
                                                   self._lon_min,
                                                   self._lon_max, grid_width,
                                                   grid_width, grid_height))

        grid = np.full((grid_height, grid_width), 0.0)

        self._verboseprint("Determining grid coordinates of points...")
        x_coords, y_coords, remove = [], [], []
        item_count = len(self._names)
        for i in range(item_count):
            lat, lon = self._lats[i], self._lons[i]
            value, name = self._values[i], self._names[i]
            if (lon < self._lon_min - self.radius
                    or lon > self._lon_max + self.radius
                    or lat < self._lat_min - self.radius
                    or lat > self._lat_max + self.radius):
                remove.append(i)
                continue
            grid_x = ceil((lon - self._lon_min) / self.scale)
            grid_y = ceil((lat - self._lat_min) / self.scale)
            x_coords.append(grid_x)
            y_coords.append(grid_y)
            # y comes first in the way the grid displays the map
            # which is why it is reversed in this fashion.
            # debugging is shown in x y to keep in line with
            # conventional thinking, since if you think of them
            # in the x y convention then it still makes sense
            # on the actual map.
            value_text = ("{} ({})".format(value, self._legend[value])
                          if self._mode == MODES[0] else value)
            self._verboseprint(("{} -> Map Coords: ({}, {}) || "
                                "Grid Coords: ({}, {}) || "
                                "Value: {}").format(name, lat, lon, grid_x,
                                                    grid_y, value_text))
        remove.sort(reverse=True)
        for i in remove:
            self._lats.pop(i)
            self._lons.pop(i)
            self._values.pop(i)
            self._names.pop(i)
        self._verboseprint("Filling in the grid...")
        try:
            import progressbar  # displays progress nicely if installed
            prog_bar = progressbar.ProgressBar()
        except ImportError:
            prog_bar = lambda l: l
        for i in prog_bar(range(grid_height)):
            for j in range(grid_width):
                radius = self.radius / self.scale
                vicinity = [[
                    point_i,
                    sqrt((x_coords[point_i] - j)**2 +
                         (y_coords[point_i] - i)**2)
                ] for point_i in range(item_count)]
                if [item for item in vicinity if item[1] <= radius]:
                    vicinity = [[point_i, 0.999 - point_dist / radius]
                                for point_i, point_dist in vicinity
                                if point_dist <= radius]
                    # influence mode
                    if self._mode == MODES[0]:
                        weights = Counter()
                        for point_i, weighted_dist in vicinity:
                            weights[self._values[point_i]] += weighted_dist
                        weights = list(weights.items())
                        weights.sort(key=lambda item: item[1], reverse=True)
                        dominant = weights[0]
                        d_value = dominant[0]
                        d_weight = dominant[1]
                        # sum of the other weights
                        rest = sum([weight for value, weight in weights[1:]])
                        if not d_weight < rest:
                            total_weight = d_weight - rest
                            grid[i][j] = (self._legend[d_value]
                                          if total_weight >= 0.999 else
                                          self._legend[d_value] -
                                          (0.999 - total_weight))
                    # weighted mode
                    elif self._mode == MODES[1]:
                        total_count = 0
                        for point_i, weighted_dist in vicinity:
                            total_count += weighted_dist * self._values[point_i]
                        grid[i][j] = total_count

        self.grid = grid