class AdditionEnv: """ Environment of Addition """ def __init__(self, height, width, num_chars, terminal): self.screen = Screen(height, width) self.num_chars = num_chars self.pointers = [0] * height self.reset() self.terminal = terminal def reset(self): self.screen.fill(0) self.pointers = [self.screen.width-1] * self.screen.height # rightmost # the get_observation function returns the observation of numbers of each pointer, so to speak, # we can only see the numbers that our pointer is at, but this is gonna cause a trouble becasue 10 # with pointer at the second position will return zero def get_observation(self) -> np.ndarray: value = [] for row in range(len(self.pointers)): value.append(self.to_one_hot(self.screen[row, self.pointers[row]])) return np.array(value) # shape of FIELD_ROW * FIELD_DEPTH def to_one_hot(self, ch): ret = np.zeros((self.num_chars,), dtype=np.int8) if 0 <= ch < self.num_chars: ret[ch] = 1 else: raise IndexError("ch must be 0 <= ch < %s, but %s" % (self.num_chars, ch)) return ret def setup_problem(self, num1, num2): for i, s in enumerate(reversed("%s" % num1)): self.screen[0, -(i+1)] = int(s) + 1 for i, s in enumerate(reversed("%s" % num2)): self.screen[1, -(i+1)] = int(s) + 1 # self.terminal.add_log(screen) def move_pointer(self, row, left_or_right): if 0 <= row < len(self.pointers): self.pointers[row] += 1 if left_or_right == 1 else -1 # LEFT is 0, RIGHT is 1 self.pointers[row] %= self.screen.width def write(self, row, ch): if 0 <= row < self.screen.height and 0 <= ch < self.num_chars: self.screen[row, self.pointers[row]] = ch def get_output(self): s = "" for ch in self.screen[3]: if ch > 0: s += "%s" % (ch-1) return int(s or "0")
class BubblesortEnv: """ Environment of Bubblesort """ def __init__(self, height, width, num_chars): self.screen = Screen(height, width) self.num_chars = num_chars self.pointers = [NULL] * 3 self.reset() def reset(self): self.screen.fill(NULL) self.pointers = [NULL] * 3 def get_observation(self) -> np.ndarray: value = [] for ptr_kind in range(len(self.pointers)): value.append( self.to_one_hot(self.screen[0, self.pointers[ptr_kind]])) for ptr_kind in range(len(self.pointers)): value.append(self.to_one_hot(self.pointers[ptr_kind])) return np.array(value) def to_one_hot(self, ch): ret = np.zeros((self.num_chars, ), dtype=np.int8) if 0 <= ch < self.num_chars: ret[ch] = 1 else: raise IndexError("ch must be 0 <= ch < %s, but %s" % (self.num_chars, ch)) return ret def setup_problem(self, num_array): for i, s in enumerate(num_array): self.screen[0, i] = int(s) + 1 def move_pointer(self, ptr_kind, left_or_right): if 0 <= ptr_kind < len(self.pointers): self.pointers[ptr_kind] += 1 if left_or_right == 1 else -1 self.pointers[ptr_kind] %= self.screen.width def get_output(self): s = [] for ch in self.screen[0]: if ch > 0: s.append(int(ch - 1)) return s def swap_point(self): ch = self.screen[0, self.pointers[0]] self.screen[0, self.pointers[0]] = self.screen[0, self.pointers[1]] self.screen[0, self.pointers[1]] = ch
class AdditionEnv: def __init__(self, height, width, num_chars): self.screen = Screen(height, width) self.num_chars = num_chars #chars的类别数 self.pointers = [0] * height #指针位置 self.reset() def reset(self): self.screen.fill(0) self.pointers = [self.screen.width - 1 ] * self.screen.height # rightmost # 当前指针所指内容 def get_observation(self) -> np.ndarray: value = [] for row in range(len(self.pointers)): value.append(self.to_one_hot(self.screen[row, self.pointers[row]])) return np.array(value) # shape of FIELD_ROW * FIELD_DEPTH def to_one_hot(self, ch): ret = np.zeros((self.num_chars, ), dtype=np.int8) if 0 <= ch < self.num_chars: ret[ch] = 1 else: raise IndexError("ch must be 0 <= ch < %s, but %s" % (self.num_chars, ch)) return ret def setup_problem(self, num1, num2): for i, s in enumerate(reversed("%s" % num1)): self.screen[0, -(i + 1)] = int(s) + 1 #0~9 -> 1~10 for i, s in enumerate(reversed("%s" % num2)): self.screen[1, -(i + 1)] = int(s) + 1 def move_pointer(self, row, left_or_right): if 0 <= row < len(self.pointers): self.pointers[ row] += 1 if left_or_right == 1 else -1 # LEFT is 0, RIGHT is 1 self.pointers[row] %= self.screen.width def write(self, row, ch): if 0 <= row < self.screen.height and 0 <= ch < self.num_chars: self.screen[row, self.pointers[row]] = ch def get_output(self): s = "" for ch in self.screen[3]: if ch > 0: s += "%s" % (ch - 1) return int(s or "0")
class AdditionEnv: """ Environment of Addition """ def __init__(self, height, width, num_chars): self.screen = Screen(height, width) self.num_chars = num_chars self.pointers = [0] * height self.reset() def reset(self): self.screen.fill(0) self.pointers = [self.screen.width-1] * self.screen.height # rightmost def get_observation(self) -> np.ndarray: value = [] for row in range(len(self.pointers)): value.append(self.to_one_hot(self.screen[row, self.pointers[row]])) return np.array(value) # shape of FIELD_ROW * FIELD_DEPTH def to_one_hot(self, ch): ret = np.zeros((self.num_chars,), dtype=np.int8) if 0 <= ch < self.num_chars: ret[ch] = 1 else: raise IndexError("ch must be 0 <= ch < %s, but %s" % (self.num_chars, ch)) return ret def setup_problem(self, num1, num2): for i, s in enumerate(reversed("%s" % num1)): self.screen[0, -(i+1)] = int(s) + 1 for i, s in enumerate(reversed("%s" % num2)): self.screen[1, -(i+1)] = int(s) + 1 def move_pointer(self, row, left_or_right): if 0 <= row < len(self.pointers): self.pointers[row] += 1 if left_or_right == 1 else -1 # LEFT is 0, RIGHT is 1 self.pointers[row] %= self.screen.width def write(self, row, ch): if 0 <= row < self.screen.height and 0 <= ch < self.num_chars: self.screen[row, self.pointers[row]] = ch def get_output(self): s = "" for ch in self.screen[3]: if ch > 0: s += "%s" % (ch-1) return int(s or "0")
class MultiplicationEnv: """ Environment of Multiplication the idea is to add mul1 to 0 for mul2 times row0: in1 (initial as 0, and the set as last time output, serve as accomadator) row1: in2 (will consistently be mul1) row2: carry row3: output row4: mul1 row5: mul2 (will decrease by 1 after each addition, serves as counter of addition program here) """ def __init__(self, height, width, num_chars, terminal): self.screen = Screen(height, width) self.num_chars = num_chars self.pointers = [0] * height self.reset() self.terminal = terminal # reset the pointers and the content of row def reset(self): self.screen.fill(0) self.pointers = [self.screen.width - 1 ] * self.screen.height # rightmost # only reset the 1 to 4 th pointers after each addition def reset_pointers(self): for row in range(0, 4): self.pointers[row] = self.screen.width - 1 # rightmost self.fill_row_with_zero(2) def fill_row_with_zero(self, row): self.screen[row, :].fill(0) def get_observation(self) -> np.ndarray: value = [] for row in range(len(self.pointers)): # self.terminal.add_log(self.screen[row, self.pointers[row]]) value.append(self.to_one_hot(self.screen[row, self.pointers[row]])) # self.terminal.add_log(np.array(value)) return np.array(value) # shape of FIELD_ROW * FIELD_DEPTH def to_one_hot(self, ch): ret = np.zeros((self.num_chars, ), dtype=np.int8) if 0 <= ch < self.num_chars: ret[ch] = 1 else: raise IndexError("ch must be 0 <= ch < %s, but %s" % (self.num_chars, ch)) return ret def setup_problem(self, mul1, mul2): # set in1 as the value 0 for i, s in enumerate(reversed("0")): self.screen[0, -(i + 1)] = int(s) + 1 # set in2 as the value mul1 for i, s in enumerate(reversed("%s" % mul1)): self.screen[1, -(i + 1)] = int(s) + 1 # set mul1 and mul2 for i, s in enumerate(reversed("%s" % mul1)): self.screen[4, -(i + 1)] = int(s) + 1 self.set_pointer(4, (self.screen.width - i - 1)) for i, s in enumerate(reversed("%s" % mul2)): self.screen[5, -(i + 1)] = int(s) + 1 self.set_pointer(5, (self.screen.width - i - 1)) # set the pointer of mul1 and mu2 so that we don't interpret it as zero when # decode_params def move_pointer(self, row, left_or_right): if 0 <= row < len(self.pointers): self.pointers[ row] += 1 if left_or_right == 1 else -1 # LEFT is 0, RIGHT is 1 self.pointers[row] %= self.screen.width def set_pointer(self, row, index): self.terminal.add_log("@@@@set pointer at index %d" % (index)) self.pointers[row] = index # wirte single digit def write(self, row, ch): if 0 <= row < self.screen.height and 0 <= ch < self.num_chars: self.screen[row, self.pointers[row]] = ch # write the whole row(only used for SUB) # write 123 def write_row(self, row, number): for i, s in enumerate(reversed("%s" % number)): self.screen[row, -(i + 1)] = int( s ) + 1 # TODO why + 1, is it because they want to reseve for 0 for null value? self.screen[row, -(i + 2)] = 0 self.set_pointer(row, (self.screen.width - i - 1)) # issues here def copy_output_row_to_in1_row(self): self.terminal.add_log("#####copy cat") for index in reversed(range(self.pointers[3], self.screen.width)): self.terminal.add_log("#index %d" % (index)) ch = self.screen[3, index] if 0 <= ch < self.num_chars: self.screen[0, index] = ch def get_output(self): s = "" for ch in self.screen[3]: if ch > 0: s += "%s" % (ch - 1) return int(s or "0") def get_mul2(self): s = "" for ch in self.screen[5]: if ch > 0: s += "%s" % (ch - 1) self.terminal.add_log("get %s" % s) return int(s or "0") def get_row(self, row): s = "" for ch in self.screen[row]: if ch > 0: s += "%s" % (ch - 1) self.terminal.add_log("get %s" % s) return int(s or "0")