Esempio n. 1
0
    def optimize(self, net_string, device_graph):
        """
        Optimizes a configuration for the given net on the given hardware.
        :param net: The network that should be optimized, given as a json string.
        :param device_graph: The device graph that the network should be optimized for.
        :return: A network JSON string with optimized device placements.
        """

        net = json.loads(net_string)
        groups = self.create_colocation_groups(
            get_flattened_layer_names(net_string))

        best_score = -1
        best_net = None
        for comb in tqdm(product(range(len(device_graph.devices)),
                                 repeat=len(groups)),
                         total=len(device_graph.devices)**len(groups),
                         unit='placements'):

            net = apply_placement(net_string, comb, groups)

            score = self.evaluate_placement(net, device_graph)

            if score < best_score or best_net is None:
                best_net = net
                best_score = score

        return best_net
Esempio n. 2
0
    def optimize(self, net_string, device_graph):
        n_devices = len(device_graph.devices)

        groups = self.create_colocation_groups(
            get_flattened_layer_names(net_string))

        placement = [randint(0, n_devices - 1)
                     for n in range(len(groups))]  # [0] * len(groups)
        score = self.evaluate_placement(
            apply_placement(net_string, placement, groups), device_graph)

        if self.score_save_period:
            with open(os.path.join(get_log_dir(), 'time_history.csv'),
                      'w') as f:
                f.write('step, time\n')

        for i in tqdm(range(self.steps), disable=not self.verbose):
            new_placement = placement[:]
            new_placement[randint(0,
                                  len(new_placement) - 1)] = randint(
                                      0, n_devices - 1)
            new_score = self.evaluate_placement(
                apply_placement(net_string, new_placement, groups),
                device_graph)

            if self.verbose and (i + 1) % self.verbose == 0:
                log(f'[{i + 1}/{self.steps}] Best run time: {score:,.2f}ms')

            if self.score_save_period and i % self.score_save_period == 0:
                with open(os.path.join(get_log_dir(), 'time_history.csv'),
                          'a') as f:
                    f.write(f'{i + 1}, {score}\n')

            if new_score != -1:
                if new_score < score or score == -1 \
                        or random() < expit((score - new_score) / self.temp(i)):
                    score = new_score
                    placement = new_placement

        solution = json.dumps(apply_placement(net_string, placement, groups),
                              indent=4)

        with open(os.path.join(get_log_dir(), 'sa_solution.json'), 'w') as f:
            f.write(solution)

        return solution
Esempio n. 3
0
    def optimize(self, net_string, device_graph):
        n_devices = len(device_graph.devices)

        def generate_neighbours(placement):
            if n_devices == 1:
                return

            i = 0
            while i < len(placement):
                p = placement[i]
                if p < n_devices - 1:
                    n = placement[:]
                    n[i] = p + 1
                    yield n
                if p > 0:
                    n = placement[:]
                    n[i] = p - 1
                    yield n
                i += 1

        net = json.loads(net_string)
        groups = self.create_colocation_groups(
            get_flattened_layer_names(net_string))

        placement = generate_random_placement(len(groups), n_devices)
        score = self.evaluate_placement(
            apply_placement(net_string, placement, groups), device_graph)

        i = 0
        while True:
            i += 1
            if self.verbose:
                log(f'Iteration {i}. Best running time: {score:.2f}ms')

            for n in generate_neighbours(placement):
                new_score = self.evaluate_placement(
                    apply_placement(net_string, n, groups), device_graph)
                if (new_score < score or score == -1) and new_score != -1:
                    placement = n
                    score = new_score
                    break
            else:
                break

        return placement
Esempio n. 4
0
            def eval_function(x):
                nonlocal i
                t.update(1)
                new_placement = [int(round(g)) for g in x]
                score = self.evaluate_placement(
                    apply_placement(net_string, new_placement, groups),
                    device_graph)

                i += 1
                return score
Esempio n. 5
0
    def optimize(self, net_string, device_graph):
        n_devices = len(device_graph.devices)

        groups = self.create_colocation_groups(
            get_flattened_layer_names(net_string))

        with open(os.path.join(get_log_dir(), 'time_history.csv'), 'w') as f:
            f.write('generation, time\n')

        i = 0

        with tqdm(total=self.steps, disable=not self.verbose) as t:

            def eval_function(x):
                nonlocal i
                t.update(1)
                new_placement = [int(round(g)) for g in x]
                score = self.evaluate_placement(
                    apply_placement(net_string, new_placement, groups),
                    device_graph)

                i += 1
                return score

            def callback(x, score, context):
                if self.verbose:
                    log(f'[{i + 1}/{self.steps}] Found new minimum: {score:.2f}ms'
                        )
                with open(os.path.join(get_log_dir(), 'time_history.csv'),
                          'a') as f:
                    f.write(f'{i + 1}, {score}\n')

            result = scipy.optimize.dual_annealing(
                eval_function, [(0, n_devices - 1)] * len(groups),
                no_local_search=True,
                maxfun=self.steps,
                callback=callback)

        placement = [int(round(g)) for g in result.x]

        if self.verbose:
            log(f'Best found placement: {placement}')

        solution = json.dumps(apply_placement(net_string, placement, groups),
                              indent=4)

        with open(os.path.join(get_log_dir(), 'sa_solution.json'), 'w') as f:
            f.write(solution)

        return solution
    def optimize(self, net_string, device_graph):
        n_devices = len(device_graph.devices)
        groups = self.create_colocation_groups(
            get_flattened_layer_names(net_string))

        def initialize_swarm():
            swarm = []

            for i in range(self.swarm_size):
                position = generate_random_placement(len(groups), n_devices)
                velocity = [random.random() * n_devices * 2 - n_devices]
                particle = Particle(position, velocity)
                particle.evaluate(evaluate)
                swarm.append(particle)

            return swarm

        def find_global_best(swarm):
            global_best = max(swarm, key=lambda x: x.best_score)

            return global_best.position

        def position_to_placement(position):
            return [min(max(int(g), 0), n_devices - 1) for g in position]

        def evaluate(position):
            placement = position_to_placement(position)
            return self.evaluate_placement(
                apply_placement(net_string, placement, groups), device_graph)

        swarm = initialize_swarm()
        global_best_position = find_global_best(swarm)

        for i in tqdm(range(self.steps)):
            for particle in swarm:
                particle.update_velocity(self.w, self.l1, self.l2,
                                         global_best_position)
                particle.update_position()
                particle.evaluate(evaluate)

            global_best_position = find_global_best(swarm)

        return json.dumps(
            apply_placement(net_string,
                            position_to_placement(global_best_position),
                            groups))
Esempio n. 7
0
        def benchmark(individual, benchmarking_function):
            device_assignment = get_device_assignment(
                apply_placement(net_string, individual, groups))
            time, memory_overflow = benchmarking_function(
                device_assignment, return_memory_overflow=True)

            description = create_description(individual)

            # Time is set to -1 if memory overflows - but we check with memory_overflow instead
            time = max(time, 0)

            if memory_overflow == -1:
                memory_overflow = 1

            if memory_overflow > 0:
                time += memory_overflow * 10**9 * 1

            return 1 / time, description, individual
Esempio n. 8
0
        def create_description(individual):
            c = Counter(individual)
            device_mode = c.most_common(1)[0][0]
            device_mode = round((device_mode / len(device_graph.devices)) *
                                self.dimension_sizes[0])

            used_devices = round(
                ((len(set(individual)) - 1) /
                 (len(device_graph.devices))) * self.dimension_sizes[1])

            comp_graph_dict = apply_placement(net_string, individual, groups)
            comp_graph = ComputationGraph()
            comp_graph.load_from_string(json.dumps(comp_graph_dict))

            num_jumps, max_jumps = comp_graph.get_number_of_jumps(
                return_max_jumps=True)
            num_jumps = round(
                (num_jumps / max_jumps) * (self.dimension_sizes[2] - 1))

            return (device_mode, used_devices, num_jumps)
Esempio n. 9
0
def _evaluate(individual,
              net_string,
              groups,
              device_graph,
              pipeline_batches=1,
              batches=1,
              simulator_comp_penalty=1,
              simulator_comm_penalty=1,
              device_memory_utilization=1):
    description, individual = individual

    comp_graph_dict = apply_placement(net_string, individual, groups)

    score = 1 / evaluate_placement(
        comp_graph_dict,
        device_graph,
        pipeline_batches=pipeline_batches,
        batches=batches,
        comp_penalty=simulator_comp_penalty,
        comm_penalty=simulator_comm_penalty,
        device_memory_utilization=device_memory_utilization)

    return score, description, individual
Esempio n. 10
0
 def evaluate(position):
     placement = position_to_placement(position)
     return self.evaluate_placement(
         apply_placement(net_string, placement, groups), device_graph)
Esempio n. 11
0
    def optimize(self, net_string, device_graph, return_full_archive=False):

        if self.n_threads > 1:
            self.worker_pool = Pool(self.n_threads)

        n_devices = len(device_graph.devices)
        groups = self.create_colocation_groups(
            get_flattened_layer_names(net_string))

        if self.dimension_sizes[0] == -1:
            self.dimension_sizes[0] = n_devices

        if self.dimension_sizes[1] == -1:
            self.dimension_sizes[1] = n_devices

        if self.dimension_sizes[2] == -1:
            comp_graph = ComputationGraph()
            comp_graph.load_from_string(net_string)
            _, max_jumps = comp_graph.get_number_of_jumps(
                return_max_jumps=True)
            self.dimension_sizes[2] = max_jumps

        archive_scores = np.empty(self.dimension_sizes)
        archive_scores[:] = np.NaN
        archive_individuals = np.zeros(list(self.dimension_sizes) +
                                       [len(groups)],
                                       dtype=int)

        def evaluate(individual):
            return _evaluate(individual, net_string, groups, device_graph,
                             self.dimension_sizes, self.pipeline_batches,
                             self.batches, self.simulator_comp_penalty,
                             self.simulator_comm_penalty,
                             self.device_memory_utilization)

        def mutate(individual):
            new_individual = []
            if random.random() < self.replace_mutation_rate:
                devices_present = list(set(individual))
                i1 = random.choice(devices_present)
                i2 = random.choice(devices_present)

                new_individual = [i2 if i == i1 else i for i in individual]
            elif random.random() < self.zone_mutation_rate:
                split1 = random.randint(0, len(individual) - 1)
                split2 = split1 + min(np.random.geometric(0.2),
                                      len(individual) - split1)
                dev = random.randint(0 if self.allow_cpu else 1, n_devices - 1)
                new_individual = individual[:split1] + [dev] * (
                    split2 - split1) + individual[split2:]
            else:
                for i, gene in enumerate(individual):
                    if random.random() < self.copy_mutation_rate and i > 0:
                        new_individual.append(individual[i - 1])
                    elif random.random() < self.mutation_rate:
                        if self.allow_cpu:
                            new_individual.append(
                                random.randint(0, n_devices - 1))
                        else:
                            new_individual.append(
                                random.randint(1, n_devices - 1))
                    else:
                        new_individual.append(gene)

            return new_individual

        def crossover(parent1, parent2):
            crossover_point = random.randint(1, len(parent1) - 1)
            return parent1[:crossover_point] + parent2[crossover_point:]

        def create_candidates(n,
                              create_random=False,
                              create_trivial=False,
                              selectable_candidates=None):
            if n <= 0:
                return []
            candidates = []
            if create_trivial:
                candidates.extend([[i] * len(groups)
                                   for i in range(1, n_devices)])
                n -= n_devices - 1

                if self.allow_cpu:
                    candidates.append([0] * len(groups))
                    n -= 1

            if create_random:
                while len(candidates) < n:
                    candidates.append(
                        generate_random_placement(
                            len(groups),
                            n_devices,
                            allow_device_0=self.allow_cpu))
            else:
                selectable_indices = np.argwhere(np.isfinite(archive_scores))
                # selectable_indices = sorted(selectable_indices, key=lambda x: -archive_scores[x[0], x[1], x[2]])
                while len(candidates) < n:
                    c = []
                    if selectable_candidates:
                        for _ in range(1 + int(
                                random.random() < self.crossover_rate)):
                            c.append(random.choice(selectable_candidates))
                    else:
                        if self.selection == 'random':
                            for _ in range(1 + int(
                                    random.random() < self.crossover_rate)):
                                idx = random.choice(selectable_indices)
                                c.append(
                                    archive_individuals[idx[0], idx[1],
                                                        idx[2], :].tolist())
                        elif self.selection == 'tournament':
                            idx = []
                            t = min(self.tournament_size,
                                    len(selectable_indices))
                            while len(idx) < 1 + int(
                                    random.random() < self.crossover_rate):
                                competitors = random.sample(
                                    selectable_indices.tolist(), t)
                                winner = max(competitors,
                                             key=lambda x: archive_scores[x[
                                                 0], x[1], x[2]])
                                idx.append(winner)
                            for i in idx:
                                c.append(archive_individuals[i[0], i[1],
                                                             i[2], :].tolist())

                    if len(c) == 2:
                        candidate = crossover(*c)
                    else:
                        candidate = c[0]
                    candidate = mutate(candidate)
                    candidates.append(candidate)

            return candidates

        def create_description(individual):
            c = Counter(individual)
            device_mode = c.most_common(1)[0][0]
            device_mode = round((device_mode / len(device_graph.devices)) *
                                self.dimension_sizes[0])

            used_devices = round(
                ((len(set(individual)) - 1) /
                 (len(device_graph.devices))) * self.dimension_sizes[1])

            comp_graph_dict = apply_placement(net_string, individual, groups)
            comp_graph = ComputationGraph()
            comp_graph.load_from_string(json.dumps(comp_graph_dict))

            num_jumps, max_jumps = comp_graph.get_number_of_jumps(
                return_max_jumps=True)
            num_jumps = round(
                (num_jumps / max_jumps) * (self.dimension_sizes[2] - 1))

            return (device_mode, used_devices, num_jumps)

        def benchmark(individual, benchmarking_function):
            device_assignment = get_device_assignment(
                apply_placement(net_string, individual, groups))
            time, memory_overflow = benchmarking_function(
                device_assignment, return_memory_overflow=True)

            description = create_description(individual)

            # Time is set to -1 if memory overflows - but we check with memory_overflow instead
            time = max(time, 0)

            if memory_overflow == -1:
                memory_overflow = 1

            if memory_overflow > 0:
                time += memory_overflow * 10**9 * 1

            return 1 / time, description, individual

        def reevaluate_archive(benchmarking_function=None,
                               n_keep=None,
                               time_threshold=None):
            indices = list(np.argwhere(np.isfinite(archive_scores)))

            if time_threshold:
                indices = [
                    i for i in indices
                    if archive_scores[i[0], i[1], i[2]] >= 1 / time_threshold
                ]

            if n_keep:
                indices = sorted(
                    indices, key=lambda i: -archive_scores[i[0], i[1], i[2]])
                indices = indices[:n_keep]

            assert len(
                indices), 'No solutions fulfill the specified requirements'

            archive_scores[:] = np.NaN
            if self.verbose:
                if n_keep:
                    log(f'Reevaluating {n_keep} best individuals in archive (and throwing away the rest)'
                        )
                else:
                    log('Reevaluating all individuals in archive')
                if time_threshold:
                    log(f'Time threshold: {time_threshold}ms')
            for i in tqdm(indices, disable=not self.verbose):
                individual = archive_individuals[i[0], i[1], i[2], :].tolist()
                if benchmarking_function:
                    archive_scores[i[0], i[1],
                                   i[2]] = benchmark(individual,
                                                     benchmarking_function)[0]
                else:
                    archive_scores[i[0], i[1], i[2]] = evaluate(individual)[0]

        def log_archive(file_name):
            indices = list(np.argwhere(np.isfinite(archive_scores)))
            indices = sorted(indices,
                             key=lambda i: -archive_scores[i[0], i[1], i[2]])

            with open(os.path.join(get_log_dir(), 'archive_logs', file_name),
                      'w') as f:
                f.write('niche; time; placement\n')
                for i in indices:
                    niche = tuple(i)
                    time = 1 / archive_scores[i[0], i[1], i[2]]
                    placement = archive_individuals[i[0], i[1], i[2]].tolist()

                    f.write(f'{niche}; {time}; {placement}\n')

        def run_optimization(steps, benchmarking_function=None, start_step=0):
            nonlocal archive_individuals, archive_scores

            if self.verbose:
                if benchmarking_function:
                    log('Optimizing with benchmarking...')
                else:
                    log('Optimizing with simulator...')

            step_size = 1 if benchmarking_function else self.n_threads

            for i in tqdm(range(0, steps, step_size),
                          disable=not self.verbose):
                init_number = min(max(0, self.initial_size - i),
                                  self.n_threads)

                if self.include_trivial_solutions and i == 0:
                    candidates = create_candidates(init_number,
                                                   create_trivial=True,
                                                   create_random=True)
                else:
                    candidates = create_candidates(init_number,
                                                   create_random=True)
                if init_number > 0:
                    candidates += create_candidates(
                        self.n_threads - init_number,
                        selectable_candidates=candidates[:])
                else:
                    candidates += create_candidates(self.n_threads -
                                                    init_number)

                if benchmarking_function:
                    eval_results = [
                        benchmark(candidates[0], benchmarking_function)
                    ]
                elif self.n_threads == 1:
                    eval_results = [evaluate(candidates[0])]
                else:
                    fn_args = zip(((create_description(c), c)
                                   for c in candidates), repeat(net_string),
                                  repeat(groups), repeat(device_graph),
                                  repeat(self.pipeline_batches),
                                  repeat(self.batches),
                                  repeat(self.simulator_comp_penalty),
                                  repeat(self.simulator_comm_penalty),
                                  repeat(self.device_memory_utilization))

                    eval_results = self.worker_pool.starmap(_evaluate, fn_args)

                for result in eval_results:
                    score, description, individual = result

                    previous_elite_score = archive_scores[description[0],
                                                          description[1],
                                                          description[2]]
                    if np.isnan(previous_elite_score
                                ) or previous_elite_score < score:
                        archive_scores[description[0], description[1],
                                       description[2]] = score
                        archive_individuals[description[0], description[1],
                                            description[2], :] = individual

                if self.verbose and (i + 1) % self.verbose < step_size:
                    best_time = 1 / np.nanmax(archive_scores)
                    log(f'[{i + 1}/{steps}] Best time: {best_time:.4f}ms')

                if self.score_save_period and (i % self.score_save_period == 0
                                               or steps - i < step_size):
                    best_time = 1 / np.nanmax(archive_scores)
                    with open(os.path.join(get_log_dir(), 'time_history.csv'),
                              'a') as f:
                        f.write(f'{i + start_step + 1}, {best_time}\n')

                if self.archive_log_period and (
                        i + 1) % self.archive_log_period < step_size:
                    log_archive(f'step_{i + start_step + 1:06}.csv')

        if self.score_save_period:
            with open(os.path.join(get_log_dir(), 'time_history.csv'),
                      'w') as f:
                f.write('step, time\n')

        run_optimization(self.steps)

        if self.worker_pool:
            self.worker_pool.close()

        if self.archive_log_period is not None:
            log_archive('1_simulation_finished.csv')

        if self.benchmarking_steps > 0 or self.benchmark_before_selection:
            reevaluate_archive(self.benchmarking_function,
                               n_keep=self.benchmarking_n_keep,
                               time_threshold=self.benchmarking_time_threshold)

            if self.archive_log_period is not None:
                log_archive('2_reevaluated.csv')

        if self.benchmarking_steps > 0:
            run_optimization(self.benchmarking_steps,
                             self.benchmarking_function, self.steps)
            log_archive('3_benchmarking_finished.csv')

        if self.show_score_plot:
            if self.verbose:
                log('Plotting archive scores...', end='')
            graph = ComputationGraph()
            graph.load_from_string(net_string)
            _, max_jumps = graph.get_number_of_jumps(return_max_jumps=True)
            plot_map_elites_archive(archive_scores,
                                    n_devices,
                                    max_jumps,
                                    self.plot_axes,
                                    save_path=os.path.join(
                                        get_log_dir(), 'archive_plot.pdf'))
            if self.verbose:
                log('Done')

        if self.plot_animation:
            if not self.archive_log_period and self.verbose:
                log('self.plot_animation was set to True, but archive logging was not enabled. '
                    'Skipping animation plot.')
            else:
                if self.verbose:
                    log('Plotting archive animation...', end='')
                paths = glob(
                    os.path.join(get_log_dir(), 'archive_logs', 'step_*.csv'))
                plot_archive_animation(
                    paths,
                    (os.path.join(get_log_dir(), 'archive_animation.mp4'),
                     os.path.join(get_log_dir(), 'archive_animation.gif')),
                    self.dimension_sizes,
                    n_devices=n_devices,
                    max_jumps=max_jumps,
                    axes=self.plot_axes,
                    fps=self.animation_fps)
                if self.verbose:
                    log('Done')

        if return_full_archive:
            return archive_scores, archive_individuals

        best_index = np.nanargmax(archive_scores)
        best_individual = archive_individuals.reshape(
            (-1, len(groups)))[best_index]

        if self.verbose:
            log(f'Best individual: {best_individual.tolist()}')

        solution = json.dumps(apply_placement(net_string,
                                              best_individual.tolist(),
                                              groups),
                              indent=4)

        with open(os.path.join(get_log_dir(), 'me_solution.json'), 'w') as f:
            f.write(solution)

        return solution