def __init__(self, start, rules, max_program_depth=4): self.start = start self.rules = rules self.max_program_depth = max_program_depth self.hash = hash(format(rules)) self.remove_non_productive(max_program_depth) self.remove_non_reachable(max_program_depth) for S in self.rules: s = sum([self.rules[S][P][1] for P in self.rules[S]]) for P in self.rules[S]: args_P, w = self.rules[S][P] self.rules[S][P] = (args_P, w / s) self.hash_table_programs = {} self.max_probability = {} self.compute_max_probability() self.list_derivations = {} self.vose_samplers = {} for S in self.rules: self.list_derivations[S] = sorted( self.rules[S], key=lambda P: self.rules[S][P][1] ) self.vose_samplers[S] = vose.Sampler( np.array([self.rules[S][P][1] for P in self.list_derivations[S]]) )
def __setstate__(self, d): self.__dict__ = d self.vose_samplers = { S: vose.Sampler( np.array([self.rules[S][P][1] for P in self.list_derivations[S]]) ) for S in self.rules }
def sample(self): """Sample a row at random. The `sample` method of a Series is very slow. Additionally, it is not designed to be used repetitively and requires O(n) steps every time it is called. Instead, we use a Cython implemention of Vose's alias method that takes O(n) time to build and O(1) time to query. """ if self.sampler is None: self.sampler = vose.Sampler( weights=self.series.to_numpy(dtype=float), seed=np.random.randint(2**16)) idx = self.sampler.sample() return self.series.index[idx]