def merge(i_left, i_right, i_end): i0 = MemValue(i_left) i1 = MemValue(i_right) @for_range(i_left, i_end) def loop(j): if_then(and_(lambda: i0 < i_right, or_(lambda: i1 >= i_end, lambda: regint(reveal(A[i0] <= A[i1]))))) B[j] = A[i0] i0.iadd(1) else_then() B[j] = A[i1] i1.iadd(1) end_if()
def twos_complement(x): bits = x.bit_decompose(k)[::-1] bit_array = Array(k, cint) bit_array.assign(bits) twos_result = MemValue(cint(0)) @for_range(k) def block(i): val = twos_result.read() val <<= 1 val += 1 - bit_array[i] twos_result.write(val) return twos_result.read() + 1
def mergesort(A): B = Array(len(A), sint) def merge(i_left, i_right, i_end): i0 = MemValue(i_left) i1 = MemValue(i_right) @for_range(i_left, i_end) def loop(j): if_then(and_(lambda: i0 < i_right, or_(lambda: i1 >= i_end, lambda: regint(reveal(A[i0] <= A[i1]))))) B[j] = A[i0] i0.iadd(1) else_then() B[j] = A[i1] i1.iadd(1) end_if() width = MemValue(1) @do_while def width_loop(): @for_range(0, len(A), 2 * width) def merge_loop(i): merge(i, i + width, i + 2 * width) A.assign(B) width.imul(2) return width < len(A)
def approximate_reciprocal(divisor, k, f, theta): """ returns aproximation of 1/divisor where type(divisor) = cint """ def twos_complement(x): bits = x.bit_decompose(k)[::-1] bit_array = Array(k, cint) bit_array.assign(bits) twos_result = MemValue(cint(0)) @for_range(k) def block(i): val = twos_result.read() val <<= 1 val += 1 - bit_array[i] twos_result.write(val) return twos_result.read() + 1 bit_array = Array(k, cint) bits = divisor.bit_decompose(k)[::-1] bit_array.assign(bits) cnt_leading_zeros = MemValue(regint(0)) flag = MemValue(regint(0)) cnt_leading_zeros = MemValue(regint(0)) normalized_divisor = MemValue(divisor) @for_range(k) def block(i): flag.write(flag.read() | bit_array[i] == 1) @if_(flag.read() == 0) def block(): cnt_leading_zeros.write(cnt_leading_zeros.read() + 1) normalized_divisor.write(normalized_divisor << 1) q = MemValue(two_power(k)) e = MemValue(twos_complement(normalized_divisor.read())) @for_range(theta) def block(i): qread = q.read() eread = e.read() qread += (qread * eread) >> k eread = (eread * eread) >> k q.write(qread) e.write(eread) res = q >> (2 * k - 2 * f - cnt_leading_zeros) return res
def map_reduce_single(n_parallel, n_loops, initializer, reducer, mem_state=None): if not isinstance(n_parallel, int): raise CompilerException('Number of parallel executions' \ 'must be constant') n_parallel = n_parallel or 1 if mem_state is None: # default to list of MemValues to allow varying types mem_state = [MemValue(x) for x in initializer()] use_array = False else: # use Arrays for multithread version use_array = True def decorator(loop_body): if isinstance(n_loops, int): loop_rounds = n_loops / n_parallel \ if n_parallel < n_loops else 0 else: loop_rounds = n_loops / n_parallel def write_state_to_memory(r): if use_array: mem_state.assign(r) else: # cannot do mem_state = [...] due to scope issue for j,x in enumerate(r): mem_state[j].write(x) # will be optimized out if n_loops <= n_parallel @for_range(loop_rounds) def f(i): state = tuplify(initializer()) for k in range(n_parallel): j = i * n_parallel + k state = reducer(tuplify(loop_body(j)), state) r = reducer(mem_state, state) write_state_to_memory(r) if isinstance(n_loops, int): state = mem_state for j in range(loop_rounds * n_parallel, n_loops): state = reducer(tuplify(loop_body(j)), state) else: @for_range(loop_rounds * n_parallel, n_loops) def f(j): r = reducer(tuplify(loop_body(j)), mem_state) write_state_to_memory(r) state = mem_state for i,x in enumerate(state): if use_array: mem_state[i] = x else: mem_state[i].write(x) def returner(): return untuplify(tuple(state)) return returner return decorator
def approximate_reciprocal(divisor, k, f, theta): """ returns aproximation of 1/divisor where type(divisor) = cint """ def twos_complement(x): bits = x.bit_decompose(k)[::-1] bit_array = Array(k, cint) bit_array.assign(bits) twos_result = MemValue(cint(0)) @for_range(k) def block(i): val = twos_result.read() val <<= 1 val += 1 - bit_array[i] twos_result.write(val) return twos_result.read() + 1 bit_array = Array(k, cint) bits = divisor.bit_decompose(k)[::-1] bit_array.assign(bits) cnt_leading_zeros = MemValue(regint(0)) flag = MemValue(regint(0)) cnt_leading_zeros = MemValue(regint(0)) normalized_divisor = MemValue(divisor) @for_range(k) def block(i): flag.write(flag.read() | bit_array[i] == 1) @if_(flag.read() == 0) def block(): cnt_leading_zeros.write(cnt_leading_zeros.read() + 1) normalized_divisor.write(normalized_divisor << 1) q = MemValue(two_power(k)) e = MemValue(twos_complement(normalized_divisor.read())) qr = q.read() er = e.read() for i in range(theta): qr = qr + shift_two(qr * er, k) er = shift_two(er * er, k) q = qr res = shift_two(q, (2*k - 2*f - cnt_leading_zeros)) return res
def test_while(): num_vals = 5 counter = MemValue(sint(num_vals - 1)) source_arr = Array(num_vals, sint) for i in range(num_vals): source_arr[i] = sint(i) target_arr = Array(num_vals, sint) @do_while def body(): counter_val = counter.read() counter_val_open = counter_val.reveal() target_arr[counter_val_open] = source_arr[counter_val_open] + 1 counter.write(counter_val - 1) opened = counter.reveal() return opened >= 0 runtime_assert_arr_equals([1, 2, 3, 4, 5], target_arr, default_test_name())
class bits(Tape.Register): n = 40 size = 1 PreOp = staticmethod(floatingpoint.PreOpN) MemValue = staticmethod(lambda value: MemValue(value)) decomposed = None @staticmethod def PreOR(l): return [1 - x for x in \ floatingpoint.PreOpN(operator.mul, \ [1 - x for x in l])] @classmethod def get_type(cls, length): if length is None: return cls elif length == 1: return cls.bit_type if length not in cls.types: class bitsn(cls): n = length cls.types[length] = bitsn bitsn.__name__ = cls.__name__ + str(length) return cls.types[length] @classmethod def conv(cls, other): if isinstance(other, cls): return other elif isinstance(other, MemValue): return cls.conv(other.read()) else: res = cls() res.load_other(other) return res hard_conv = conv @classmethod def compose(cls, items, bit_length=1): return cls.bit_compose( sum([util.bit_decompose(item, bit_length) for item in items], [])) @classmethod def bit_compose(cls, bits): if len(bits) == 1: return bits[0] bits = list(bits) res = cls.new(n=len(bits)) cls.bitcom(res, *(sbit.conv(bit) for bit in bits)) res.decomposed = bits return res def bit_decompose(self, bit_length=None): n = bit_length or self.n suffix = [0] * (n - self.n) if n == 1 and self.n == 1: return [self] n = min(n, self.n) if self.decomposed is None or len(self.decomposed) < n: res = [self.bit_type() for i in range(n)] self.bitdec(self, *res) self.decomposed = res return res + suffix else: return self.decomposed[:n] + suffix @classmethod def load_mem(cls, address, mem_type=None): res = cls() if mem_type == 'sd': return cls.load_dynamic_mem(address) else: cls.load_inst[util.is_constant(address)](res, address) return res def store_in_mem(self, address): self.store_inst[isinstance(address, (int, long))](self, address) def __init__(self, value=None, n=None, size=None): if size != 1 and size is not None: raise Exception('invalid size for bit type: %s' % size) Tape.Register.__init__(self, self.reg_type, Program.prog.curr_tape) self.set_length(n or self.n) if value is not None: self.load_other(value) def set_length(self, n): if n > self.max_length: print self.max_length raise Exception('too long: %d' % n) self.n = n def load_other(self, other): if isinstance(other, (int, long)): self.set_length(self.n or util.int_len(other)) self.load_int(other) elif isinstance(other, regint): self.conv_regint(self.n, self, other) elif isinstance(self, type(other)) or isinstance(other, type(self)): self.mov(self, other) else: try: other = self.bit_compose(other.bit_decompose()) self.mov(self, other) except: raise CompilerError('cannot convert from %s to %s' % \ (type(other), type(self))) def long_one(self): return 2**self.n - 1 def __repr__(self): return '%s(%d/%d)' % \ (super(bits, self).__repr__(), self.n, type(self).n)
def memorize(x): if isinstance(x, (tuple, list)): return tuple(memorize(i) for i in x) else: return MemValue(x)
def int2FL_plain(a, gamma, l, kappa): lam = gamma - 1 a_abs = 0 v = cint(0) p = cint(0) s = cint(0) z = cint(0) # extracts the sign and calculates the abs s = cint(a < 0) a_abs = a * (1 - 2 * s) # isolates most significative bit a_bits = a_abs.bit_decompose() b = 0 b_c = 1 blen = 0 for a_i in range(len(a_bits) - 1, -1, -1): # enumerate(a_bits): b = (a_bits[a_i]) * (b == 0) * ((b_c) / 2) + b blen = (a_bits[a_i]) * (blen == 0) * ((a_i + 1)) + blen b_c = b_c * 2 # obtains p # blen= len(a_bits) - blen v = a_abs * b # (2 ** (b))#scale a p = - (lam - blen) # (len(a_bits)-blen)) # reduces v v_l = MemValue(v) z_l = MemValue(z) if_then(a_abs > 0) if (lam > l): v_l.write(v_l.read() / (2 ** (gamma - l - 1))) else: v_l.write(v_l.read() * (2 ** l - lam)) else_then() z_l.write(cint(1)) end_if() # corrects output # s is coming from the abs extraction v = cint(v_l.read()) z = cint(z_l.read()) p = cint((p + lam - l) * (1 - z)) return v, p, z, s
def decorator(loop_body): my_n_parallel = n_parallel if isinstance(n_parallel, int): if isinstance(n_loops, int): loop_rounds = n_loops / n_parallel \ if n_parallel < n_loops else 0 else: loop_rounds = n_loops / n_parallel def write_state_to_memory(r): if use_array: mem_state.assign(r) else: # cannot do mem_state = [...] due to scope issue for j, x in enumerate(r): mem_state[j].write(x) if n_parallel is not None: # will be optimized out if n_loops <= n_parallel @for_range(loop_rounds) def f(i): state = tuplify(initializer()) for k in range(n_parallel): j = i * n_parallel + k state = reducer(tuplify(loop_body(j)), state) r = reducer(mem_state, state) write_state_to_memory(r) else: n_parallel_reg = MemValue(regint(0)) parent_block = get_block() @while_do(lambda x: x + n_parallel_reg <= n_loops, regint(0)) def _(i): state = tuplify(initializer()) k = 0 block = get_block() while k < n_loops and (len(get_block()) < get_program().budget \ or k == 0) \ and block is get_block(): j = i + k state = reducer(tuplify(loop_body(j)), state) k += 1 r = reducer(mem_state, state) write_state_to_memory(r) global n_opt_loops n_opt_loops = k n_parallel_reg.write(k) return i + k my_n_parallel = n_opt_loops loop_rounds = n_loops / my_n_parallel blocks = get_tape().basicblocks n_to_merge = 5 if loop_rounds == 1 and parent_block is blocks[-n_to_merge]: # merge blocks started by if and do_while def exit_elimination(block): if block.exit_condition is not None: for reg in block.exit_condition.get_used(): reg.can_eliminate = True exit_elimination(parent_block) merged = parent_block merged.exit_condition = blocks[-1].exit_condition merged.exit_block = blocks[-1].exit_block assert parent_block is blocks[-n_to_merge] assert blocks[-n_to_merge + 1] is \ get_tape().req_node.children[-1].nodes[0].blocks[0] for block in blocks[-n_to_merge + 1:]: merged.instructions += block.instructions exit_elimination(block) del blocks[-n_to_merge + 1:] del get_tape().req_node.children[-1] merged.children = [] get_tape().active_basicblock = merged else: req_node = get_tape().req_node.children[-1].nodes[0] req_node.children[0].aggregator = lambda x: loop_rounds * x[0] if isinstance(n_loops, int): state = mem_state for j in range(loop_rounds * my_n_parallel, n_loops): state = reducer(tuplify(loop_body(j)), state) else: @for_range(loop_rounds * my_n_parallel, n_loops) def f(j): r = reducer(tuplify(loop_body(j)), mem_state) write_state_to_memory(r) state = mem_state for i, x in enumerate(state): if use_array: mem_state[i] = x else: mem_state[i].write(x) def returner(): return untuplify(tuple(state)) return returner