def is_local_optimum(parameters, template, sizes, context): tree, _ = tools.tree_of(template, sizes, context) genetic_infos = tools.genetic_infos_of(template) if issubclass(template, sc.templates.elementwise_1d): sweep_over = [0,1,2] elif issubclass(template, sc.templates.reduce_1d): sweep_over = [0,1,2] elif issubclass(template, sc.templates.elementwise_2d): sweep_over = [0,1,2,3,4] elif issubclass(template, sc.templates.reduce_2d): sweep_over = [0,1,2,3,4] elif issubclass(template, sc.templates.gemm): sweep_over = [0,1,2,3,4] #Evaluate the provided parameters guess reference = tools.benchmark(template(*parameters), tree) if reference==float('inf'): return False #Latency bound -- ignore if reference < 1e-5: return True #Determine if local minimum timings = {} domain = [[v for v in [x/2, x, x*2] if 1 <= v <= 2**2**genetic_infos['nbits'][i]] \ if i in sweep_over else [x] for i, x in enumerate(parameters)] for x in product(*domain): if x==parameters: pass time = tools.benchmark(template(*x), tree) if time/reference < .98: return False return True
def is_local_optimum(parameters, template, sizes, context): tree, _ = tools.tree_of(template, sizes, context) genetic_infos = tools.genetic_infos_of(template) if issubclass(template, sc.templates.elementwise_1d): sweep_over = [0, 1, 2] elif issubclass(template, sc.templates.reduce_1d): sweep_over = [0, 1, 2] elif issubclass(template, sc.templates.elementwise_2d): sweep_over = [0, 1, 2, 3, 4] elif issubclass(template, sc.templates.reduce_2d): sweep_over = [0, 1, 2, 3, 4] elif issubclass(template, sc.templates.gemm): sweep_over = [0, 1, 2, 3, 4] #Evaluate the provided parameters guess reference = tools.benchmark(template(*parameters), tree) if reference == float('inf'): return False #Latency bound -- ignore if reference < 1e-5: return True #Determine if local minimum timings = {} domain = [[v for v in [x/2, x, x*2] if 1 <= v <= 2**2**genetic_infos['nbits'][i]] \ if i in sweep_over else [x] for i, x in enumerate(parameters)] for x in product(*domain): if x == parameters: pass time = tools.benchmark(template(*x), tree) if time / reference < .98: return False return True
def exhaustive(template, sizes, context): tree, _ = tools.tree_of(template, sizes, context) metric = tools.metric_of(template) nbits = tools.genetic_infos_of(template)['nbits'] categorical = tools.genetic_infos_of(template)['categorical'] ranges = [range(2**x) for x in nbits] ranges = list(product(*ranges)) timings = {} best = None for idx, r in enumerate(ranges): parameters = tuple([fetch_types[x] if i in categorical else 2**x for i,x in enumerate(r)]) try: time = tools.benchmark(template, parameters, tree) if not best or time < best[1]: best = parameters, time except profile_execution_failure: pass if best: stdout.write('%.2f %% | Best %.2f [ for %s ]\r'%(float(idx*100)/len(ranges),metric(sizes, best[1]), best[0])) return best[0]
def is_local_optimum(parameters, template, sizes, context): tree, _ = tools.tree_of(template, sizes, context) genetic_infos = tools.genetic_infos_of(template) if issubclass(template, sc.templates.elementwise_1d): sweep_over = [0,1,2] elif issubclass(template, sc.templates.reduce_1d): sweep_over = [0,1,2] elif issubclass(template, sc.templates.elementwise_2d): sweep_over = [0,1,2,3,4] elif issubclass(template, sc.templates.reduce_2d): sweep_over = [0,1,2,3,4] elif issubclass(template, sc.templates.matrix_product): sweep_over = [1,2,3,4,5,6,7] #Evaluate the provided parameters guess try: reference = tools.benchmark(template, parameters, tree) except profile_execution_failure: return False #Latency bound -- ignore if reference < 1e-5: return True timings = {} domain = [[v for v in [x/2, x, x*2] if 1 <= v <= 2**2**genetic_infos['nbits'][i]] \ if i in sweep_over else [x] for i, x in enumerate(parameters)] for x in product(*domain): if x==parameters: pass try: time = tools.benchmark(template, x, tree) if time/reference < .98: return False except profile_execution_failure: pass return True
def run(self, level='intermediate'): assert level in ['simple', 'intermediate', 'full'] tools.dtype = self.dtype device = self.device operation = self.operation context = sc.driver.context(device) if self.logger: self.logger.info("----------------") self.logger.info(operation.__name__.replace('_', '-').upper()) self.logger.info(tools.dtype.__name__.upper()) self.logger.info("----------------") #BLAS1 training sizes if operation in [sc.templates.elementwise_1d, sc.templates.reduce_1d]: sizes = [(10**x, ) for x in range(3, 8)] #BLAS2 training sizes if operation in [ sc.templates.elementwise_2d, sc.templates.reduce_2d_rows, sc.templates.reduce_2d_cols ]: sizes = [] #Square for N in [896, 1280, 1760, 2560]: sizes += [(N, N)] #Short/Fat for M in [16, 32, 64, 128, 512, 1024]: for N in [1024, 4096, 16384, 65536]: sizes += [(M, N)] #Tall/Skinny for N in [16, 32, 64, 128, 512, 1024]: for M in [1024, 4096, 16384, 65536]: sizes += [(M, N)] #BLAS3 training sizes if operation in [ sc.templates.gemm_nn, sc.templates.gemm_nt, sc.templates.gemm_tn, sc.templates.gemm_tt ]: sizes = [] #Square for N in [896, 1760, 2048, 2560]: sizes += [(N, N, N)] #LaPack for N in [896, 1760, 2048, 2560]: for K in [16, 32, 64, 128]: sizes += [(N, N, K)] #Covariance for N in [16, 32, 64, 128, 256]: for K in [16000, 32000, 64000, 128000]: sizes += [(N, N, K)] #DeepSpeech for M in [1760, 2048, 2560, 4096]: for N in [16, 32, 64, 128, 7000]: sizes += [(M, N, M)] for K in [1760, 2048, 2560, 4096]: for M, N in [(5124, 9124), (35, 8457)]: sizes += [(M, N, K)] for M, K in [(7680, 2560), (3072, 1024)]: for N in [16, 32, 64, 128]: sizes += [(M, N, K)] #Training data performance = tools.metric_of(operation) profiles, X, Y = [], [], [] #Restore progress savepath = os.path.join('save', tools.dtype.__name__, operation.__name__) if not os.path.exists(savepath): os.makedirs(savepath) try: with open(os.path.join(savepath, 'X.csv')) as f: X = [ tuple(map(int, row)) for row in csv.reader(f, delimiter=',') ] with open(os.path.join(savepath, 'profiles.csv')) as f: profiles = [ map(int, row) for v in row for row in csv.reader(f, delimiter=',') ] with open(os.path.join(savepath, 'Y.csv')) as f: Y = [map(float, row) for row in csv.reader(f, delimiter=',')] #Recompute Y #Y = [] #for x in X: # tree, _ = tools.tree_of(operation, x, context) # Y.append([performance(x, tools.benchmark(operation(*best), tree)) for best in profiles]) except: pass #Save data def save(): for (fname, data) in zip(['X.csv', 'Y.csv', 'profiles.csv'], [X, Y, profiles]): with open(os.path.join(savepath, fname), 'wb') as f: csv.writer(f).writerows(data) #Tuning for idx, x in enumerate(sizes): #Create new line on log if idx > 0: self.progress_bar.set_finished() self.progress_bar.set_prefix(', '.join(map(str, x))) #Skip if already saved if x in X: row = Y[X.index(x)] self.progress_bar.update(1, 1, profiles[argmax(row)], max(row)) continue #Best existing profile for x tree, operands = tools.tree_of(operation, x, context) y = [ performance(x, tools.benchmark(operation(*p), tree)) for p in profiles ] best = profiles[np.argmax(y)] if y else None #Retune if necessary tune = not (best and optimize.is_local_optimum( best, operation, x, context)) if tune: optimizer = optimize.GeneticOptimizer( self.logger, naccept=1000, niter=1000, cxpb=.4, mutpb=.4, popsize=20, progress_bar=self.progress_bar) best = optimizer.run(operation, x, context, prior=best)[0] if best not in profiles: profiles.append(best) for xx, yy in zip(X, Y): tree, _ = tools.tree_of(operation, xx, context) time = tools.benchmark(operation(*best), tree) yy.append(performance(xx, time)) #Update dataset X.append(x) tree, operands = tools.tree_of(operation, x, context) y = [ performance(x, tools.benchmark(operation(*prf), tree)) for prf in profiles ] Y.append(y) #Save data save() #print performance info in case no tuning was done if not tune: row = Y[X.index(x)] self.progress_bar.update(1, 1, profiles[argmax(row)], max(row)) self.progress_bar.set_finished() save() #Adding external profiles for prof in tools.external_profiles(operation): profiles.append(prof.__class__.__name__) for x, y in zip(X, Y): tree, operands = tools.tree_of(operation, x, context) perf = performance(x, tools.benchmark(prof, tree, operation)) y.append(perf) #Pruning of useless profiles X = np.array(X) Y = np.array(Y) if len(Y[0]) > 1: idx = np.where( np.bincount(np.argmax(Y, 1), minlength=len(profiles)) == 0)[0] profiles = [p for ip, p in enumerate(profiles) if ip not in idx] Y = np.delete(Y, idx, axis=1) #Exporting to JSON json_path = tools.sanitize( device.name) + '.json' if not self.json_path else self.json_path if os.path.isfile(json_path): json_data = json.load(open(json_path, 'r')) else: json_data = {} json_data["version"] = "1.0" operation_name = operation.__name__ if operation_name not in json_data: json_data[operation_name] = {} json_data[operation_name][tools.dtype.__name__] = {} D = json_data[operation_name][tools.dtype.__name__] if len(profiles) > 1: clf, nrmse = model.train(X, Y, profiles) D['predictor'] = [{ 'children_left': e.tree_.children_left.tolist(), 'children_right': e.tree_.children_right.tolist(), 'threshold': e.tree_.threshold.astype('float64').tolist(), 'feature': e.tree_.feature.astype('float64').tolist(), 'value': e.tree_.value[:, :, 0].astype('float64').tolist() } for e in clf.estimators_] D['profiles'] = [tools.convert(x) for x in profiles] json.dump(json_data, open(json_path, 'w'))
def run(self, template, sizes, context, initializer=None, prior=None): tree, _ = tools.tree_of(template, sizes, context) metric = tools.metric_of(template) genetic_infos = tools.genetic_infos_of(template) nbits = genetic_infos['nbits'] offsets = cumsum([0] + nbits) def bin2gray(A): g = [int(A[0])] for i in range(1, len(A)): g += [int(A[i - 1] != A[i])] return g def gray2int(A): b = [A[0]] for i in range(1, len(A)): b += [int(b[i - 1] != A[i])] return int(''.join(map(str, b)), 2) def encode(genome): encoded = [ bin2gray(bin(x)[2:].zfill(nb)) for x, nb in zip(genome, nbits) ] return sum(encoded, []) def decode(genome): result = [] for off1, off2 in zip(offsets[:-1], offsets[1:]): result += [gray2int(genome[off1:off2])] result = [2**x for i, x in enumerate(result)] return result def evaluate(genome): idx = tuple(genome) if idx not in cache: time = tools.benchmark(template(*decode(genome)), tree) if time == float('inf'): return time, cache[idx] = time self.progress_bar.update(max(len(cache), it), self.niter, decode(min(cache, key=cache.get)), metric(sizes, min(cache.values()))) return cache[idx], cache = {} hof = deap_tools.HallOfFame(1) creator.create("FitnessMin", base.Fitness, weights=(-1.0, )) creator.create("Individual", list, fitness=creator.FitnessMin) toolbox = base.Toolbox() toolbox.register("evaluate", evaluate) toolbox.register("mate", deap_tools.cxTwoPoint) toolbox.register("mutate", deap_tools.mutFlipBit) toolbox.register("select", deap_tools.selNSGA2) x = [] y = [] it = 0 population = [] #Initialization if initializer is None: initializer = ([random.randint(0, 2**x) for x in nbits] for i in iter(int, 1)) genome = encode(prior if prior else list(initializer.next())) while len(population) < self.popsize: individual = creator.Individual(genome) individual.fitness.values = toolbox.evaluate(genome) if max(individual.fitness.values) != float('inf'): population += [individual] genome = encode(list(initializer.next())) hof.update(population) #Main iteration while len(cache) < self.naccept and it < self.niter: #Generate offspring offspring = [] while len(offspring) < self.popsize: op_choice = random.random() #Cross-over if op_choice < self.cxpb: ind1, ind2 = map(toolbox.clone, random.sample(population, 2)) ind1, ind2 = toolbox.mate(ind1, ind2) ind = ind1 toolbox.evaluate(ind) if max(ind.fitness.values) != float('inf'): offspring += [ind] #Mutation elif op_choice < self.cxpb + self.mutpb: ind = toolbox.clone(random.choice(population)) ind, = toolbox.mutate(ind, 1.0 / offsets[-1]) toolbox.evaluate(ind) if max(ind.fitness.values) != float('inf'): offspring += [ind] #Reproduction else: offspring += [random.choice(population)] #Update fitnesses fitnesses = toolbox.map(toolbox.evaluate, offspring) for ind, fit in zip(offspring, fitnesses): ind.fitness.values = fit #Update population population[:] = toolbox.select(population + offspring, self.popsize) hof.update(population) it += 1 return tuple(decode(hof[0])), x, y
def run(self, level='intermediate'): assert level in ['simple', 'intermediate', 'full'] device = self.device operation = self.operation context = sc.driver.context(device) if self.logger: self.logger.info("----------------") self.logger.info(operation.__name__.replace('_', '-').upper()) self.logger.info("----------------") #BLAS1 training sizes if operation in [sc.templates.elementwise_1d, sc.templates.reduce_1d]: if level == 'simple': sizes = [(10000000, )] elif level == 'intermediate': sizes = [(x, ) for x in tools.expspace(1e3, 1e8, 10)] else: sizes = [(x, ) for x in tools.expspace(1e3, 1e8, 100)] #BLAS2 training sizes if operation in [ sc.templates.elementwise_2d, sc.templates.reduce_2d_rows, sc.templates.reduce_2d_cols ]: if level == 'simple': sizes = [(1536, 1536)] elif level == 'intermediate': sizes = [(896, 896), (1536, 1536), (256, 256), (1024, 256), (4096, 256), (16384, 256), (256, 1024), (256, 4096), (256, 16384), (3025, 96)] else: sizes = product(pow2range(4, 17), pow2range(4, 17)) #BLAS3 training sizes if operation in [ sc.templates.matrix_product_nn, sc.templates.matrix_product_nt, sc.templates.matrix_product_tn, sc.templates.matrix_product_tt ]: if level == 'simple': sizes = [(2560, 2560, 2560)] elif level == 'intermediate': sizes = [ #Square (896, 896, 896), (1536, 1536, 1536), (2176, 2176, 2176), #Rank-32 updates (896, 896, 32), (1536, 1536, 32), (2176, 2176, 32), #Covariance (32, 32, 16000), (64, 64, 64000), (256, 256, 32000), #Convolutions (3025, 64, 363), (729, 192, 1200), (169, 384, 1728), (169, 256, 3456), (169, 128, 2304), (169, 2304, 256), (169, 3456, 256), (169, 1728, 384), (729, 1600, 192), (3025, 363, 64), (2304, 256, 169), (3456, 256, 169), (1728, 384, 169), (1600, 192, 729), (363, 64, 3025) ] elif level == 'full': sizes = product(pow2range(5, 12), pow2range(5, 12), pow2range(5, 17)) #Remove duplicates and or too small/big tuples sizes = [ x for x in sizes if 1e-4 <= tools.memory_footprint(operation, x) <= 2e-1 ] #Training data performance = tools.metric_of(operation) profiles, X, Y = [], [], [] #Restore previous run savepath = os.path.join('save', operation.__name__) if not os.path.exists(savepath): os.makedirs(savepath) try: with open(os.path.join(savepath, 'X.csv')) as f: X = [ tuple(map(int, row)) for row in csv.reader(f, delimiter=',') ] with open(os.path.join(savepath, 'Y.csv')) as f: Y = [map(float, row) for row in csv.reader(f, delimiter=',')] with open(os.path.join(savepath, 'profiles.csv')) as f: def mmap(x): if x == 'FETCH_FROM_LOCAL': return sc.templates.fetch_type.FETCH_FROM_LOCAL if x == 'FETCH_FROM_GLOBAL_CONTIGUOUS': return sc.templates.fetch_type.FETCH_FROM_GLOBAL_CONTIGUOUS if x == 'FETCH_FROM_GLOBAL_STRIDED': return sc.templates.fetch_type.FETCH_FROM_GLOBAL_STRIDED return int(x) profiles = [ map(mmap, row) for v in row for row in csv.reader(f, delimiter=',') ] except: pass ##### Exploration ##### for idx, x in enumerate(sizes): if idx > 0: self.progress_bar.set_finished() self.progress_bar.set_prefix(', '.join(map(str, x))) #Skip if saved if x in X: row = Y[X.index(x)] self.progress_bar.update(1, 1, profiles[argmax(row)], max(row)) continue #Check if the current best prediction is not a local optimum idx = len(X) nparams = len(profiles) tree, operands = tools.tree_of(operation, x, context) if idx == 0: retune = True predicted = None else: if nparams == 1: predicted = profiles[0] else: clf = RandomForestRegressor(min(10, idx + 1), max_depth=min(10, idx + 1)).fit( X, Y) #clf, nrmse = model.train(X, Y, profiles) predperf = clf.predict(x)[0] best = (-predperf).argsort()[:5] perf = [] for b in best: try: perf += [ performance( x, tools.benchmark(operation, profiles[b], tree)) ] except profile_execution_failure: pass predicted = profiles[best[argmax(perf)]] retune = not optimize.is_local_optimum(predicted, operation, x, context) #Retune if necessary if retune: optimizer = optimize.GeneticOptimizer( self.logger, naccept=1000, niter=1000, cxpb=.4, mutpb=.4, popsize=20, progress_bar=self.progress_bar) new = optimizer.run(operation, x, context, prior=predicted)[0] if new not in profiles: profiles.append(new) if idx > 0: for xx, yy in zip(X, Y): _tree, _operands = tools.tree_of( operation, xx, context) try: time = tools.benchmark(operation, new, _tree) perf = performance(xx, time) except profile_execution_failure: perf = 0 yy.append(0 if isinf(perf) else perf) ##### Training ##### y = [] fastest = max(predperf) if nparams > 1 else None for ip, p in enumerate(profiles): try: perf = 0 if fastest and ip < nparams and predperf[ ip] / fastest < .1 else performance( x, tools.benchmark(operation, p, tree)) except profile_execution_failure: perf = 0 y.append(0 if isinf(perf) else perf) X.append(x) Y.append(y) #Save data for (fname, data) in zip(['X.csv', 'Y.csv', 'profiles.csv'], [X, Y, profiles]): with open(os.path.join(savepath, fname), 'wb') as f: csv.writer(f).writerows(data) #print performance info in case no tuning was done if not retune: row = Y[X.index(x)] self.progress_bar.update(1, 1, profiles[argmax(row)], max(row)) self.progress_bar.set_finished() #Remove unused profiles if len(Y[0]) > 1: unused = np.where(np.bincount(np.argmax(Y, 1)) == 0)[0] profiles = [p for ip, p in enumerate(profiles) if ip not in unused] Y = np.delete(Y, np.where(np.bincount(np.argmax(Y, 1)) == 0), axis=1).tolist() ##### Exportation ##### json_path = tools.sanitize( device.name) + '.json' if not self.json_path else self.json_path if os.path.isfile(json_path): json_data = json.load(open(json_path, 'r')) else: json_data = {} json_data["version"] = "1.0" operation_name = operation.__name__ if operation_name not in json_data: json_data[operation_name] = {} json_data[operation_name]['float32'] = {} D = json_data[operation_name]['float32'] if len(profiles) > 1: clf, nrmse = model.train(X, Y, profiles) D['predictor'] = [{ 'children_left': e.tree_.children_left.tolist(), 'children_right': e.tree_.children_right.tolist(), 'threshold': e.tree_.threshold.astype('float64').tolist(), 'feature': e.tree_.feature.astype('float64').tolist(), 'value': e.tree_.value[:, :, 0].astype('float64').tolist() } for e in clf.estimators_] D['profiles'] = [map(int, x) for x in profiles] json.dump(json_data, open(json_path, 'w'))
def run(self, level = 'intermediate'): assert level in ['simple', 'intermediate', 'full'] tools.dtype = self.dtype device = self.device operation = self.operation context = sc.driver.context(device) if self.logger: self.logger.info("----------------") self.logger.info(operation.__name__.replace('_','-').upper()) self.logger.info(tools.dtype.__name__.upper()) self.logger.info("----------------") #BLAS1 training sizes if operation in [sc.templates.elementwise_1d, sc.templates.reduce_1d]: sizes = [(10**x,) for x in range(3,8)] #BLAS2 training sizes if operation in [sc.templates.elementwise_2d, sc.templates.reduce_2d_rows, sc.templates.reduce_2d_cols]: sizes = [] #Square for N in [896, 1280, 1760, 2560]: sizes += [(N, N)] #Short/Fat for M in [16, 32, 64, 128, 512, 1024]: for N in [1024, 4096, 16384, 65536]: sizes += [(M, N)] #Tall/Skinny for N in [16, 32, 64, 128, 512, 1024]: for M in [1024, 4096, 16384, 65536]: sizes += [(M, N)] #BLAS3 training sizes if operation in [sc.templates.gemm_nn, sc.templates.gemm_nt, sc.templates.gemm_tn, sc.templates.gemm_tt]: sizes = [] #Square for N in [896, 1760, 2048, 2560]: sizes += [(N, N, N)] #LaPack for N in [896, 1760, 2048, 2560]: for K in [16, 32, 64, 128]: sizes += [(N, N, K)] #Covariance for N in [16, 32, 64, 128, 256]: for K in [16000,32000,64000,128000]: sizes += [(N, N, K)] #DeepSpeech for M in [1760, 2048, 2560, 4096]: for N in [16, 32, 64, 128, 7000]: sizes += [(M, N, M)] for K in [1760, 2048, 2560, 4096]: for M, N in [(5124,9124),(35,8457)]: sizes += [(M, N, K)] for M, K in [(7680,2560),(3072,1024)]: for N in [16, 32, 64, 128]: sizes += [(M, N, K)] #Training data performance = tools.metric_of(operation) profiles, X, Y = [], [], [] #Restore progress savepath = os.path.join('save', tools.dtype.__name__, operation.__name__) if not os.path.exists(savepath): os.makedirs(savepath) try: with open(os.path.join(savepath, 'X.csv')) as f: X = [tuple(map(int, row)) for row in csv.reader(f, delimiter=',')] with open(os.path.join(savepath, 'profiles.csv')) as f: profiles = [map(int,row) for v in row for row in csv.reader(f, delimiter=',')] with open(os.path.join(savepath, 'Y.csv')) as f: Y = [map(float, row) for row in csv.reader(f, delimiter=',')] #Recompute Y #Y = [] #for x in X: # tree, _ = tools.tree_of(operation, x, context) # Y.append([performance(x, tools.benchmark(operation(*best), tree)) for best in profiles]) except: pass #Save data def save(): for (fname, data) in zip(['X.csv', 'Y.csv', 'profiles.csv'], [X, Y, profiles]): with open(os.path.join(savepath, fname), 'wb') as f: csv.writer(f).writerows(data) #Tuning for idx, x in enumerate(sizes): #Create new line on log if idx>0: self.progress_bar.set_finished() self.progress_bar.set_prefix(', '.join(map(str, x))) #Skip if already saved if x in X: row = Y[X.index(x)] self.progress_bar.update(1, 1, profiles[argmax(row)], max(row)) continue #Best existing profile for x tree, operands = tools.tree_of(operation, x, context) y = [performance(x, tools.benchmark(operation(*p), tree)) for p in profiles] best = profiles[np.argmax(y)] if y else None #Retune if necessary tune = not (best and optimize.is_local_optimum(best, operation, x, context)) if tune: optimizer = optimize.GeneticOptimizer(self.logger, naccept=1000, niter=1000, cxpb=.4, mutpb=.4, popsize=20, progress_bar = self.progress_bar) best = optimizer.run(operation, x, context, prior=best)[0] if best not in profiles: profiles.append(best) for xx,yy in zip(X, Y): tree, _ = tools.tree_of(operation, xx, context) time = tools.benchmark(operation(*best), tree) yy.append(performance(xx, time)) #Update dataset X.append(x) tree, operands = tools.tree_of(operation, x, context) y = [performance(x,tools.benchmark(operation(*prf), tree)) for prf in profiles] Y.append(y) #Save data save() #print performance info in case no tuning was done if not tune: row = Y[X.index(x)] self.progress_bar.update(1, 1, profiles[argmax(row)], max(row)) self.progress_bar.set_finished() save() #Adding external profiles for prof in tools.external_profiles(operation): profiles.append(prof.__class__.__name__) for x, y in zip(X, Y): tree, operands = tools.tree_of(operation, x, context) perf = performance(x,tools.benchmark(prof, tree, operation)) if max(y) < perf: print x, '\t', prof.__class__.__name__, '\toutperform: \t', int(perf), tools.metric_name_of(operation) y.append(perf) #Pruning of useless profiles X = np.array(X) Y = np.array(Y) if len(Y[0]) > 1: idx = np.where(np.bincount(np.argmax(Y, 1), minlength=len(profiles))==0)[0] profiles = [p for ip,p in enumerate(profiles) if ip not in idx] Y = np.delete(Y, idx, axis=1) #Exporting to JSON json_path = tools.sanitize(device.name) + '.json' if not self.json_path else self.json_path if os.path.isfile(json_path): json_data = json.load(open(json_path, 'r')) else: json_data = {} json_data["version"] = "1.0" operation_name = operation.__name__ if operation_name not in json_data: json_data[operation_name] = {} json_data[operation_name][tools.dtype.__name__] = {} D = json_data[operation_name][tools.dtype.__name__] if len(profiles) > 1: clf, nrmse = model.train(X, Y, profiles) D['predictor'] = [{'children_left': e.tree_.children_left.tolist(), 'children_right': e.tree_.children_right.tolist(), 'threshold': e.tree_.threshold.astype('float64').tolist(), 'feature': e.tree_.feature.astype('float64').tolist(), 'value': e.tree_.value[:,:,0].astype('float64').tolist()} for e in clf.estimators_] D['profiles'] = [tools.convert(x) for x in profiles] json.dump(json_data, open(json_path,'w'))
def run(self, template, sizes, context, initializer = None, prior = None): tree, _ = tools.tree_of(template, sizes, context) metric = tools.metric_of(template) genetic_infos = tools.genetic_infos_of(template) nbits = genetic_infos['nbits'] offsets = cumsum([0] + nbits) def bin2gray(A): g = [int(A[0])] for i in range(1, len(A)): g += [int(A[i-1] != A[i])] return g def gray2int(A): b = [A[0]] for i in range(1, len(A)): b += [int(b[i-1] != A[i])] return int(''.join(map(str,b)), 2) def encode(genome): encoded = [bin2gray(bin(x)[2:].zfill(nb)) for x, nb in zip(genome, nbits)] return sum(encoded, []) def decode(genome): result = [] for off1,off2 in zip(offsets[:-1],offsets[1:]): result += [gray2int(genome[off1:off2])] result = [2**x for i,x in enumerate(result)] return result def evaluate(genome): idx = tuple(genome) if idx not in cache: time = tools.benchmark(template(*decode(genome)), tree) if time == float('inf'): return time, cache[idx] = time self.progress_bar.update(max(len(cache), it), self.niter, decode(min(cache, key=cache.get)), metric(sizes, min(cache.values()))) return cache[idx], cache = {} hof = deap_tools.HallOfFame(1) creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) creator.create("Individual", list, fitness=creator.FitnessMin) toolbox = base.Toolbox() toolbox.register("evaluate", evaluate) toolbox.register("mate", deap_tools.cxTwoPoint) toolbox.register("mutate", deap_tools.mutFlipBit) toolbox.register("select", deap_tools.selNSGA2) x = [] y = [] it = 0 population = [] #Initialization if initializer is None: initializer = ([random.randint(0, 2**x) for x in nbits] for i in iter(int,1)) genome = encode(prior if prior else list(initializer.next())) while len(population) < self.popsize: individual = creator.Individual(genome) individual.fitness.values = toolbox.evaluate(genome) if max(individual.fitness.values) != float('inf'): population += [individual] genome = encode(list(initializer.next())) hof.update(population) #Main iteration while len(cache) < self.naccept and it<self.niter: #Generate offspring offspring = [] while len(offspring) < self.popsize: op_choice = random.random() #Cross-over if op_choice < self.cxpb: ind1, ind2 = map(toolbox.clone, random.sample(population, 2)) ind1, ind2 = toolbox.mate(ind1, ind2) ind = ind1 toolbox.evaluate(ind) if max(ind.fitness.values) != float('inf'): offspring += [ind] #Mutation elif op_choice < self.cxpb + self.mutpb: ind = toolbox.clone(random.choice(population)) ind, = toolbox.mutate(ind, 1.0/offsets[-1]) toolbox.evaluate(ind) if max(ind.fitness.values) != float('inf'): offspring += [ind] #Reproduction else: offspring += [random.choice(population)] #Update fitnesses fitnesses = toolbox.map(toolbox.evaluate, offspring) for ind, fit in zip(offspring, fitnesses): ind.fitness.values = fit #Update population population[:] = toolbox.select(population + offspring, self.popsize) hof.update(population) it += 1 return tuple(decode(hof[0])), x, y