def random_schedule(root_func, min_depth=0, max_depth=DEFAULT_MAX_DEPTH, vars=None): """ Generate Schedule for all functions called by root_func (recursively). Same arguments as schedules_func(). """ if vars is None: vars = halide.func_varlist(root_func) d_new_vars = {} schedule = {} def callback(f, parent): extra_caller_vars = d_new_vars.get( parent.name() if parent is not None else None, []) # print 'schedule', f.name(), extra_caller_vars # ans = schedules_func(root_func, f, min_depth, max_depth, random=True, extra_caller_vars=extra_caller_vars, vars=vars).next() max_depth_sel = max_depth if f.name() != 'f' else 0 ans = schedules_func(root_func, f, min_depth, max_depth_sel, random=True, extra_caller_vars=extra_caller_vars).next() d_new_vars[f.name()] = ans.new_vars() schedule[f.name()] = ans halide.visit_funcs(root_func, callback) return Schedule(root_func, schedule)
def schedules_func(root_func, func, min_depth=0, max_depth=DEFAULT_MAX_DEPTH, random=False, extra_caller_vars=[], vars=None): """ Generator of valid schedules for a function, each of which is a list of schedule fragments (FragmentList). If random is True then instead generate exactly one schedule randomly chosen. """ if vars is None: vars = halide.func_varlist(func) #if func.name() == 'f': # yield FragmentList(func, [random_module.choice(FragmentChunk.fragments(root_func, func, FragmentChunk, vars, extra_caller_vars))]) # return for depth in range(min_depth, max_depth + 1): if random: depth = random_module.randrange(min_depth, max_depth + 1) for L in schedules_depth(root_func, func, vars, depth, random, extra_caller_vars): if L.check(): yield L.randomized() if random: return
def test_schedules(): f = halide.Func('f') x = halide.Var('x') y = halide.Var('y') g = halide.Func('g') v = halide.Var('v') f[x,y] = 1 g[v] = f[v,v] print halide.func_varlist(f) print 'caller_vars(f) =', caller_vars(g, f) print 'caller_vars(g) =', caller_vars(g, g) validL = list(valid_schedules(g, f, 3)) for L in validL: print repr(repr(L)) print 'number valid: ', len(validL)
def valid_schedules(root_func, func, max_depth=4): "A sequence of valid schedules for a function, each of which is a list of schedule fragments (up to a maximum depth)." vars = halide.func_varlist(func) for depth in range(max_depth+1): for L in schedules_depth(root_func, func, vars, depth): ok = True for x in L: #print 'at depth=%d, checking'%depth, str(L)#, len(L) if not x.check(L): #print 'check failed' ok = False break if ok: yield L
def autotune(func, test, scope): """ Autotunes func, using test(func) as a testing function that returns a time in seconds. """ sys.argv = [sys.argv[0], 'dirname'] info = {} cfg = HalideConfigAccessor() func_d = halide.all_funcs(func) hl = HalideHighLevelConfig(info, [(key, halide.func_varlist(value)) for (key, value) in func_d.items()]) hl.randomize(cfg, 1) test_permutation() sgatuner.main(tester_lambda=lambda *a: HalideCandidateTester(hl, *a, test_func=test, func_d=func_d, func=func, scope=scope), pop_lambda=lambda *a: HalidePopulation(hl, *a), hlconfig_lambda=lambda: hl, config_lambda=lambda: cfg)
def added_or_edited(self, root_func, extra_caller_vars, vars=None, delta=0): if vars is None: vars = halide.func_varlist(self.func) for j in range(MUTATE_TRIES): L = copy.copy(list(self)) i = random.randrange(len(L)+1-delta) all_vars = list(vars) for fragment in L[:i]: all_vars.extend(fragment.new_vars()) L[i:i+delta] = [random_fragment(root_func, self.func, all_vars, extra_caller_vars)] ans = FragmentList(self.func, L) # print ans, ans.check() if ans.check(): # print '-'*40 return ans raise MutateFailed
def schedules_func(root_func, func, min_depth=0, max_depth=DEFAULT_MAX_DEPTH, random=False, extra_caller_vars=[], vars=None): """ Generator of valid schedules for a Func, each of which is a FragmentList (e.g. f.root().vectorize(x).parallel(y)). If random is True then instead generate exactly one schedule randomly chosen. """ if vars is None: vars = halide.func_varlist(func) #if func.name() == 'f': # yield FragmentList(func, [random_module.choice(FragmentChunk.fragments(root_func, func, FragmentChunk, vars, extra_caller_vars))]) # return for depth in range(min_depth, max_depth+1): if random: depth = random_module.randrange(min_depth, max_depth+1) for L in schedules_depth(root_func, func, vars, depth, random, extra_caller_vars): if L.check(): yield L.randomized_const() if random: return
def random_schedule(root_func, min_depth=0, max_depth=DEFAULT_MAX_DEPTH, vars=None): """ Generate Schedule for all functions called by root_func (recursively). Same arguments as schedules_func(). """ if vars is None: vars = halide.func_varlist(root_func) d_new_vars = {} schedule = {} def callback(f, parent): extra_caller_vars = d_new_vars.get(parent.name() if parent is not None else None,[]) # print 'schedule', f.name(), extra_caller_vars # ans = schedules_func(root_func, f, min_depth, max_depth, random=True, extra_caller_vars=extra_caller_vars, vars=vars).next() max_depth_sel = max_depth if f.name() != 'f' else 0 ans = schedules_func(root_func, f, min_depth, max_depth_sel, random=True, extra_caller_vars=extra_caller_vars).next() d_new_vars[f.name()] = ans.new_vars() schedule[f.name()] = ans halide.visit_funcs(root_func, callback) return Schedule(root_func, schedule)
def get_xy(f, bounds): """ Given func object and bounds returned by get_bounds(), return vars suitable for interpretation as (x, y). If this is not possible then just returns the var list for func. """ varlist = halide.func_varlist(f) bounds = bounds[f.name()] assert len(varlist) == len(bounds), (len(varlist), len(bounds)) allowed = [True]*len(varlist) for i in range(len(bounds)): if bounds[i] < 10 and bounds[i] >= 0: allowed[i] = False ans = [] for i in range(len(bounds)-1): if allowed[i] and allowed[i+1]: ans.append((varlist[i], varlist[i+1])) if len(ans) > 0: return random.choice(ans) return varlist
def added_or_edited(self, root_func, extra_caller_vars, vars=None, delta=0): if vars is None: vars = halide.func_varlist(self.func) for j in range(MUTATE_TRIES): L = copy.copy(list(self)) i = random.randrange(len(L) + 1 - delta) all_vars = list(vars) for fragment in L[:i]: all_vars.extend(fragment.new_vars()) L[i:i + delta] = [ random_fragment(root_func, self.func, all_vars, extra_caller_vars) ] ans = FragmentList(self.func, L) # print ans, ans.check() if ans.check(): # print '-'*40 return ans raise MutateFailed
def get_bounds(root_func, scope): """ Returns map of func name => list of bounds for each dim, with -1 indicating unbounded. Does not do bound inference. """ bounds = {} varlist = {} for (name, f) in halide.all_funcs(root_func).items(): varlist[name] = halide.func_varlist(f) bounds[name] = [-1] * len(varlist[name]) if 'tune_constraints' in scope: constraints = autotune.Schedule.fromstring(root_func, scope['tune_constraints']) else: return bounds for (name, L) in constraints.d.items(): for x in L: if isinstance(x, autotune.FragmentBound): try: i = varlist[name].index(x.var) except IndexError: raise ValueError('could not find var %s to bound in func %s (varlist is %r)' % (x.var, name, varlist[name])) bounds[name][i] = x.size print 'bounds is:', bounds return bounds
def mutate(a, p, constraints): "Mutate existing schedule using AutotuneParams p." a0 = a a = copy.copy(a0) extra_caller_vars = [] # FIXME: Implement extra_caller_vars, important for chunk(). dmutate0 = p.dict_prob_mutate() while True: # for name in a.d.keys(): # if random.random() < p.mutation_rate: name = random.choice(a.d.keys()) dmutate = dict(dmutate0) if len(a.d[name]) <= p.min_depth: del dmutate['remove'] if len(a.d[name]) >= p.max_depth: del dmutate['add'] if len(a.d[name]) == 0: del dmutate['edit'] # if 'remove' in dmutate: # del dmutate['remove'] # if 'edit' in dmutate: # del dmutate['edit'] mode = sample_prob(dmutate) try: if mode == 'consts': a.d[name] = a.d[name].randomized_const() a.genomelog = 'mutate_consts(%s)'%a0.identity() elif mode == 'replace': constraints_d = a.d del constraints_d[name] all_d = random_schedule(a.root_func, p.min_depth, p.max_depth, None, constraints_d) a.d[name] = all_d.d[name] a.genomelog = 'mutate_replace(%s)'%a0.identity() elif mode == 'add': assert len(a.d[name]) < p.max_depth #raise NotImplementedError a.d[name] = a.d[name].added(a.root_func, extra_caller_vars) a.genomelog = 'mutate_add(%s)'%a0.identity() elif mode == 'remove': assert len(a.d[name]) > p.min_depth #raise NotImplementedError a.d[name] = a.d[name].removed() a.genomelog = 'mutate_remove(%s)'%a0.identity() elif mode == 'edit': # raise NotImplementedError a.d[name] = a.d[name].edited(a.root_func, extra_caller_vars) a.genomelog = 'mutate_edit(%s)'%a0.identity() elif mode == 'template': s = autotune_template.sample(halide.func_varlist(a.d[name].func)) # TODO: Use parent variables if chunk... a.d[name] = FragmentList.fromstring(a.d[name].func, s) a.genomelog = 'replace_template(%s)'%a0.identity() else: raise ValueError('Unknown mutation mode %s'%mode) except MutateFailed: continue try: #print 'Mutated schedule:' + '\n' + '-'*40 + '\n' + str(a) + '\n' + '-' * 40 + '\n' a.apply(constraints) # Apply schedule to determine if random_schedule() invalidated new variables that were referenced except (NameError, halide.ScheduleError): continue return a
def test_schedules(verbose=False, test_random=False): #random_module.seed(int(sys.argv[1]) if len(sys.argv)>1 else 0) halide.exit_on_signal() f = halide.Func('f') x = halide.Var('x') y = halide.Var('y') c = halide.Var('c') g = halide.Func('g') v = halide.Var('v') input = halide.UniformImage(halide.UInt(16), 3) int_t = halide.Int(32) f[x, y, c] = input[ halide.clamp(x, halide.cast(int_t, 0 ), halide.cast(int_t, input.width() - 1)), halide.clamp(y, halide.cast(int_t, 0 ), halide.cast(int_t, input.height() - 1)), halide.clamp(c, halide.cast(int_t, 0), halide.cast(int_t, 2))] #g[v] = f[v,v] g[x, y, c] = f[x, y, c] + 1 assert sorted(halide.all_vars(g).keys()) == sorted(['x', 'y', 'c']) #, 'v']) if verbose: print halide.func_varlist(f) print 'caller_vars(f) =', caller_vars(g, f) print 'caller_vars(g) =', caller_vars(g, g) # validL = list(valid_schedules(g, f, 4)) # validL = [repr(_x) for _x in validL] # # for L in sorted(validL): # print repr(L) T0 = time.time() if not test_random: random = True #False nvalid_determ = 0 for L in schedules_func(g, f, 0, 3): nvalid_determ += 1 if verbose: print L nvalid_random = 0 for i in range(100): for L in schedules_func( g, f, 0, DEFAULT_MAX_DEPTH, random=True ): #sorted([repr(_x) for _x in valid_schedules(g, f, 3)]): if verbose and 0: print L #repr(L) nvalid_random += 1 s = [] for i in range(400): d = random_schedule(g, 0, DEFAULT_MAX_DEPTH) si = str(d) s.append(si) if verbose: print 'Schedule:', si d.apply() evaluate = d.test((36, 36, 3), input) print 'evaluate' evaluate() if test_random: print 'Success' sys.exit() T1 = time.time() s = '\n'.join(s) assert 'f.chunk(_c0)' in s assert 'f.root().vectorize' in s assert 'f.root().unroll' in s assert 'f.root().split' in s assert 'f.root().tile' in s assert 'f.root().parallel' in s assert 'f.root().transpose' in s assert nvalid_random == 100 if verbose: print 'generated in %.3f secs' % (T1 - T0) print 'random_schedule: OK'
def mutate(a, p, constraints): "Mutate existing schedule using AutotuneParams p." a0 = a a = copy.copy(a0) extra_caller_vars = [ ] # FIXME: Implement extra_caller_vars, important for chunk(). dmutate0 = p.dict_prob_mutate() while True: # for name in a.d.keys(): # if random.random() < p.mutation_rate: name = random.choice(a.d.keys()) dmutate = dict(dmutate0) if len(a.d[name]) <= p.min_depth: del dmutate['remove'] if len(a.d[name]) >= p.max_depth: del dmutate['add'] if len(a.d[name]) == 0: del dmutate['edit'] # if 'remove' in dmutate: # del dmutate['remove'] # if 'edit' in dmutate: # del dmutate['edit'] mode = sample_prob(dmutate) try: if mode == 'consts': a.d[name] = a.d[name].randomized_const() a.genomelog = 'mutate_consts(%s)' % a0.identity() elif mode == 'replace': constraints_d = a.d del constraints_d[name] all_d = random_schedule(a.root_func, p.min_depth, p.max_depth, None, constraints_d) a.d[name] = all_d.d[name] a.genomelog = 'mutate_replace(%s)' % a0.identity() elif mode == 'add': assert len(a.d[name]) < p.max_depth #raise NotImplementedError a.d[name] = a.d[name].added(a.root_func, extra_caller_vars) a.genomelog = 'mutate_add(%s)' % a0.identity() elif mode == 'remove': assert len(a.d[name]) > p.min_depth #raise NotImplementedError a.d[name] = a.d[name].removed() a.genomelog = 'mutate_remove(%s)' % a0.identity() elif mode == 'edit': # raise NotImplementedError a.d[name] = a.d[name].edited(a.root_func, extra_caller_vars) a.genomelog = 'mutate_edit(%s)' % a0.identity() elif mode == 'template': s = autotune_template.sample( halide.func_varlist(a.d[name].func) ) # TODO: Use parent variables if chunk... a.d[name] = FragmentList.fromstring(a.d[name].func, s) a.genomelog = 'replace_template(%s)' % a0.identity() else: raise ValueError('Unknown mutation mode %s' % mode) except MutateFailed: continue try: #print 'Mutated schedule:' + '\n' + '-'*40 + '\n' + str(a) + '\n' + '-' * 40 + '\n' a.apply( constraints ) # Apply schedule to determine if random_schedule() invalidated new variables that were referenced except (NameError, halide.ScheduleError): continue return a
def test_schedules(verbose=False, test_random=False): #random_module.seed(int(sys.argv[1]) if len(sys.argv)>1 else 0) halide.exit_on_signal() f = halide.Func('f') x = halide.Var('x') y = halide.Var('y') c = halide.Var('c') g = halide.Func('g') v = halide.Var('v') input = halide.UniformImage(halide.UInt(16), 3) int_t = halide.Int(32) f[x,y,c] = input[halide.clamp(x,halide.cast(int_t,0),halide.cast(int_t,input.width()-1)), halide.clamp(y,halide.cast(int_t,0),halide.cast(int_t,input.height()-1)), halide.clamp(c,halide.cast(int_t,0),halide.cast(int_t,2))] #g[v] = f[v,v] g[x,y,c] = f[x,y,c]+1 assert sorted(halide.all_vars(g).keys()) == sorted(['x', 'y', 'c']) #, 'v']) if verbose: print halide.func_varlist(f) print 'caller_vars(f) =', caller_vars(g, f) print 'caller_vars(g) =', caller_vars(g, g) # validL = list(valid_schedules(g, f, 4)) # validL = [repr(_x) for _x in validL] # # for L in sorted(validL): # print repr(L) T0 = time.time() if not test_random: random = True #False nvalid_determ = 0 for L in schedules_func(g, f, 0, 3): nvalid_determ += 1 if verbose: print L nvalid_random = 0 for i in range(100): for L in schedules_func(g, f, 0, DEFAULT_MAX_DEPTH, random=True): #sorted([repr(_x) for _x in valid_schedules(g, f, 3)]): if verbose and 0: print L#repr(L) nvalid_random += 1 s = [] for i in range(400): d = random_schedule(g, 0, DEFAULT_MAX_DEPTH) si = str(d) s.append(si) if verbose: print 'Schedule:', si d.apply() evaluate = d.test((36, 36, 3), input) print 'evaluate' evaluate() if test_random: print 'Success' sys.exit() T1 = time.time() s = '\n'.join(s) assert 'f.chunk(_c0)' in s assert 'f.root().vectorize' in s assert 'f.root().unroll' in s assert 'f.root().split' in s assert 'f.root().tile' in s assert 'f.root().parallel' in s assert 'f.root().transpose' in s assert nvalid_random == 100 if verbose: print 'generated in %.3f secs' % (T1-T0) print 'random_schedule: OK'