def restart_or_quit(): assert pc() & 0xFF == 0, "restart_or_quit must be placed at the start of a page" label("forth.restart-or-quit") bra([W_lo]) # 6 ble(pc() + 1) # 7 # 8 happens in start of thread again label(".quit") ld(hi("forth.exit"), Y) # 9 C("jmp forth.exit.from-failed-test") jmp(Y, lo("forth.exit.from-failed-test")) # 10
def shift(vtmp): """Place all of the code required for 2*, LSHIFT, RSHIFT and 2/ Needs a page to itself. """ # Customized restart or quit trampoline which loads Y and X so that # they point at the data stack. This saves space at no runtime cost. assert pc( ) & 0xFF == 0, "restart_or_quit must be placed at the start of a page" label("forth.restart-or-quit") ble(pc() + 3) # 6 ld(data_stack_page, Y) # 7 bra([W_lo]) ld([data_stack_pointer], X) # 8; nop for purposes of .quit label(".quit") ld(hi("forth.exit"), Y) # 9 C("jmp forth.exit.from-failed-test") jmp(Y, lo("forth.exit.from-failed-test")) # 10 # 11, overlap with whatever comes next - hopefully not a branch or jump! _two_times() offset_start = pc() _lshift() offset_of_shift_by_8 = pc() - offset_start _lshift__amount_eq_8() offset_of_shift_by_gt_8 = pc() - offset_start _lshift__amount_gt_8() offset_of_shift_by_lt_8 = pc() - offset_start _lshift__amount_lt_8() rshift_offset_start = pc() _rshift() assert pc() - rshift_offset_start <= offset_of_shift_by_8 fillers(until=(rshift_offset_start + offset_of_shift_by_8) & 255) _rshift__amount_eq_8() assert pc() - rshift_offset_start <= offset_of_shift_by_gt_8 fillers(until=(rshift_offset_start + offset_of_shift_by_gt_8) & 255) _rshift__amount_gt_8() assert pc() - rshift_offset_start <= offset_of_shift_by_lt_8 fillers(until=(rshift_offset_start + offset_of_shift_by_lt_8) & 255) _rshift__amount_lt_8() _shift_entry( offset_to_amount_eq_8=offset_of_shift_by_8, offset_to_amount_gt_8=offset_of_shift_by_gt_8, offset_to_amount_lt_8=offset_of_shift_by_lt_8, ) _left_shift_by_n() _right_shift_by_n(vtmp) _two_div(vtmp)
def question_branch_rom_mode(): """Conditional Branch (flag -- ) Branches when the flag at the top of the stack is zero Naming is per the Forth '83 standard. """ label("forth.internal.rom-mode.?BRANCH") adda(-cost_of_question_branch_rom_mode // 2) # 1 ld([data_stack_pointer], X) ld([data_stack_pointer]) adda(2) st([data_stack_pointer]) # 5 ld([X]) bne(".?BRANCH.not-zero1") ld(-(cost_of_question_branch_rom_mode__first_byte_nonzero // 2)) # 8 ld(data_stack_page, Y) # 9 st([Y, Xpp]) ld([Y, X]) bne(".?BRANCH.not-zero2") ld(-(cost_of_question_branch_rom_mode__second_byte_nonzero // 2)) # 13 ld(-(cost_of_question_branch_rom_mode__both_bytes_zero // 2)) # 14 C("Store cost") st([tmp0]) # 15 label(".enter-thread") ld(W, X) # 16, 20 C("X <- W") ld([IP_hi], Y) C("Jump to the code in the thread") jmp(Y, [IP_lo]) ld(0x00, Y) # 19, 23 label(".?BRANCH.not-zero1") nop() label(".?BRANCH.not-zero2") st([tmp0]) # 10, 14 C("Store cost") ld(2) # 11, 15 ("IP <- IP + 2") adda([IP_lo]) st([IP_lo]) bra(".enter-thread") ld(3) # 15, 19 C("IP will move a further 3")
def next1(vTicks): """Routine to make continue or abort decisions, and dispatch to the next word""" # Invariant - on entry the vTicks variable and the accumulator both hold # an accurate number of cycles until we must be back in the display loop, # starting from the first instruction of this routine. # This value will always be greater than the cost of failing continue/abort test. This is true # whenever we return here from another word, and true when we first enter from the # display loop. label("forth.next1") C( "Timing point: [vTicks] == AC == accurate number of ticks until we need to be back" ) suba((cost_of_successful_test + cost_of_failfast) / 2) # 1 ld([W_hi], Y) # 2 jmp(Y, [W_lo]) # 3 bra("forth.restart-or-quit") # 4
def _rshift__amount_gt_8(): # offset to n > 8 case label("forth.core.RSHIFT.n>8") adda(-add_cost_of_next(cost_of_rshift__amount_gt_8) / 2) # 1 # ld(data_stack_page, Y) # Happen in head of page, but still counted # ld([data_stack_pointer], X) ld(lo("forth.core.RSHIFT.n>8.continuation")) st([continuation]) # 5 bra("right-shift-by-n") ld([amount]) label("forth.core.RSHIFT.n>8.continuation") st([Y, Xpp]) st([Y, Xpp]) ld([data_stack_pointer], X) # 10 ld(0) st([Y, X]) # 12 NEXT(cost_of_rshift__amount_gt_8)
def _right_shift_by_n(vtmp): """ Fixed cost routine to do a right-shift by 1-7 places Shift amount is passed in NEGATED in ac, value is loaded from [y, x]. Control is returned to address in continuation. In the case of a right-shift by between 1 and 7 places, this needs to be called twice. in which case we can jump to right-shift-by-n.second-time with mask in ac. """ label("right-shift-by-n") st([tmp0]) # 1 adda(".end-of-set_bits_table") bra(AC) # 3 bra(lo(".end-of-set_bits_table")) # 4 ld(0b0011_1111) # Shift by 7 ld(0b0001_1111) # Shift by 6 ld(0b0000_1111) # Shift by 5 ld(0b0000_0111) # Shift by 4 ld(0b0000_0011) # Shift by 3 ld(0b0000_0001) # Shift by 2 ld(0b0000_0000) # Shift by 1 label(".end-of-set_bits_table") st([set_bits]) # 6 # Take the opportunity to set vTmp ld(lo("forth.right-shift-return-point")) st([vtmp]) ld([tmp0]) adda(".end-of-mask-table") bra(AC) # 11 bra(lo(".end-of-mask-table")) # 12 ld(0b1000_0000) # Shift by 7 ld(0b1100_0000) # Shift by 6 ld(0b1110_0000) # Shift by 5 ld(0b1111_0000) # Shift by 4 ld(0b1111_1000) # Shift by 3 ld(0b1111_1100) # Shift by 2 ld(0b1111_1110) # Shift by 1 label(".end-of-mask-table") st([mask]) # 14 label("right-shift-by-n.second-time") anda([Y, X]) # 15, 1 ora([set_bits]) ld(hi("shiftTable"), Y) jmp(Y, AC) # 18, 4 bra(0xFF) # 19, 5
def next1_reenter(vTicks): label("forth.next1.reenter") label( "forth.next1.reenter.even" ) # When a word took an even number of cycles, enter here nop() # 1 label( "forth.next1.reenter.odd" ) # Inbound code should round down ticks, because counting is from .even suba((cost_of_successful_test + cost_of_next1_reenter_success) / 2) # 2 adda([vTicks]) # 3 st([vTicks]) # 4; If we exit successfully we'll be ready for next1 suba(cost_of_failed_test / 2) # 5 blt(lo("forth.exit.from-next1-reenter")) # 6 vticks_error = cost_of_next1_reenter_success - cost_of_next1_reenter_failure ld((vticks_error / 2)) # 7 ; load vTicks wrongness into A bra(lo("forth.next1")) # 8 ld([vTicks]) # 9
def _two_div(vtmp): """Implementation of 2/ (arithmetic shift right) 2/ ( x1 -- x2 ) """ label("forth.core.2/") adda(-add_cost_of_next(cost_of_two_div) / 2) # 1 # ld(data_stack_page, Y) # Happen in head of page, but still counted # ld([data_stack_pointer], X) ld(lo("forth.core.2/.continuation1")) st([continuation]) # 5 ld(lo("forth.right-shift-return-point")) st([vtmp]) label("forth.core.2/.do-shift") ld([Y, X]) # 8, 30 anda(0b1111_1110) ld(hi("shiftTable"), Y) # 10 jmp(Y, AC) # 11, 33 bra(255) # 12, 34 # ld # 13, 35 # bra [vTmp] # 14, 36 # nop # 15, 37 # ld $thisPage, Y # 16, 38 # jmp Y, [sysArgs + 4] # 17, 39 # ld $0, y # 18, 40 label("forth.core.2/.continuation1") st([Y, Xpp]) # 19; Store shifted low-byte ld([Y, X]) # 20; Calculate bit to copy from high to low anda(0b0000_0001) adda(127) anda(0b1000_0000) ld([data_stack_pointer], X) ora([Y, X]) # 25 st([Y, Xpp]) ld(lo("forth.core.2/.continuation2")) # Continue to high-byte bra("forth.core.2/.do-shift") # 28 st([continuation]) # 29 label("forth.core.2/.continuation2") st([Y, X]) # 41 adda(AC) # Shift back to get sign-bit anda(0b1000_0000) ora([Y, X]) st([Y, X]) # 45 NEXT(cost_of_two_div)
def _lshift__amount_gt_8(): """LSHIFT (x1 u -- x2) Special case where u > 8 """ label("forth.core.LSHIFT.n>8") adda(-add_cost_of_next(cost_of_lshift__amount_gt_8) / 2) # 1 # ld(data_stack_page, Y) # ld([data_stack_pointer], X) ld(lo("forth.core.LSHIFT.n>8.continuation")) st([continuation]) # 5 bra("left-shift-by-n") # 6 ld([amount]) # 7 label("forth.core.LSHIFT.n>8.continuation") st([Y, Xpp]) # 1 st([Y, Xpp]) # 2 ld([data_stack_pointer], X) # 3 ld(0) # 4 st([Y, Xpp]) # 5 NEXT(cost_of_lshift__amount_gt_8)
def zero_equal(): """Logical not ( x -- flag ) flag is true if and only if x is equal to zero. """ label("forth.core.0=") adda(-add_cost_of_next(cost_of_zero_equal) / 2) # 1 ld(data_stack_page, Y) ld([data_stack_pointer], X) ld([Y, X]) st([Y, Xpp]) # 5 ora([Y, X]) beq(pc() + 3) # 7 bra(pc() + 3) # 8 ld(0x00) # 9 - If any bits were non-zero ld(0xFF) # 9 - Otherwise ld([data_stack_pointer], X) # 10 st([Y, Xpp]) st([Y, X]) # 12 NEXT(cost_of_zero_equal)
def next2(vTicks): label("forth.next2") label("forth.next2.odd") nop() label("forth.next2.even") # On entry AC holds the negative of the number of ticks taken by the just executed instruction # To have entered the instruction we must have also had a successful test, suba((cost_of_successful_test + cost_of_next2_success) / 2) # 1 adda([vTicks]) # 2 st([vTicks]) # 3; If we exit successfully we'll be ready for next1 ld([mode]) # 4 st([W_lo]) # 5 ld(hi("forth.next3")) # 6 # TODO st([W_hi]) # 7 ld([vTicks]) # 8 suba((cost_of_failed_test) / 2) # 9 blt(lo("forth.exit.from-next2")) # 10 tick_correction = cost_of_next2_success - cost_of_next2_failure ld(tick_correction / 2) # 11; Restore bra(lo("forth.next1")) # 12 ld([vTicks]) # 13
def _lshift__amount_lt_8(): """LSHIFT (x1 u -- x2) Special case where u < 8 """ label("forth.core.LSHIFT.n<8") adda(-add_cost_of_next(cost_of_lshift__amount_lt_8) / 2) # 1 # ld(data_stack_page, Y) # ld([data_stack_pointer], X) ld(lo("forth.core.LSHIFT.n<8.continuation1")) st([continuation]) # 5 bra("right-shift-by-n") ld([transfer_amount]) label("forth.core.LSHIFT.n<8.continuation1") st([high_byte_temp]) ld(lo("forth.core.LSHIFT.n<8.continuation2")) st([continuation]) # 10 bra("left-shift-by-n") ld([amount]) label("forth.core.LSHIFT.n<8.continuation2") st([Y, Xpp]) ld(lo("forth.core.LSHIFT.n<8.continuation3")) st([continuation]) # 15 bra("left-shift-by-n") ld([amount]) label("forth.core.LSHIFT.n<8.continuation3") ora([high_byte_temp]) st([Y, X]) # 19 NEXT(cost_of_lshift__amount_lt_8)
def _left_shift_by_n(): """Fixed cost routine to do a left-shift by 1-7 places Shift amount is passed in NEGATED in ac, value is loaded from [Y, X] Control is returned to address in continuation """ label("left-shift-by-n") # Because we do n shift operations, with 0 < n < 8 # we need to balance it with 7 - n nops - so that we always do # 7 ops in total adda(lo(".end-of-left-shifts")) # 1 st([tmp0]) # Where we jump in the left-shifts suba(lo(".end-of-left-shifts") - 7) xora(0xFF) # ac = -(shift-amount) + 7; Negate it. adda(lo(".end-of-nops") + 1) # 5; +1 is to finish two's complement bra(AC) # 6 ld([tmp0]) # 7 ; Shift by 1 nop() # Shift by 2 nop() # Shift by 3 nop() # Shift by 4 nop() # Shift by 5 nop() # Shift by 6 label(".end-of-nops") bra(AC) # 8; ld([Y, X]) # 9 adda(AC) # Shift by 7 adda(AC) # Shift by 6 adda(AC) # Shift by 5 adda(AC) # Shift by 4 adda(AC) # Shift by 3 adda(AC) # Shift by 2 bra([continuation]) # 10 # Shift by 1 label(".end-of-left-shifts") adda(AC) # (counted as one of the 7)
def _rshift__amount_lt_8(): label("forth.core.RSHIFT.n<8") adda(-add_cost_of_next(cost_of_rshift__amount_lt_8) / 2) # 1 # ld(data_stack_page, Y) # Happen in head of page, but still counted # ld([data_stack_pointer], X) ld(lo("forth.core.RSHIFT.n<8.continuation1")) st([continuation]) # 5 bra("right-shift-by-n") ld([amount]) label("forth.core.RSHIFT.n<8.continuation1") st([Y, Xpp]) ld(lo("forth.core.RSHIFT.n<8.continuation2")) st([continuation]) # 10 bra("left-shift-by-n") ld([transfer_amount]) label("forth.core.RSHIFT.n<8.continuation2") ld([data_stack_pointer], X) ora([Y, X]) st([Y, Xpp]) # 15 ld(lo("forth.core.RSHIFT.n<8.continuation3")) st([continuation]) bra("right-shift-by-n.second-time") ld([mask]) label("forth.core.RSHIFT.n<8.continuation3") st([Y, X]) # 20 NEXT(cost_of_rshift__amount_lt_8)
def bitwise(): """Common implementation for all of the bitwise operators""" for name, target in [("AND", ".and"), ("OR", ".or"), ("XOR", ".xor")]: label(f"forth.core.{name}") adda(-add_cost_of_next(cost_of_binary_bitwise) / 2) # 1 bra(".copy-first-value") # 2 ld(lo(target)) # 3 label(".copy-first-value") st([tmp0]) # 4 adda(1) # 5 st([tmp1]) ld(data_stack_page, Y) ld([data_stack_pointer], X) ld(2) adda([data_stack_pointer]) # 10 st([data_stack_pointer]) # 11 for tmp in [tmp2, tmp3]: ld([Y, X]) st([tmp]) st([Y, Xpp]) # 17 = 11 + 2 * 3 ld([Y, X]) bra([tmp0]) bra(pc() + 1) # 20 # 21 st([Y, Xpp]) # 22 ld([Y, X]) bra([tmp1]) # 24 bra(".bitwise-done") # 25 # 26 for label_, op in [(".and", anda), (".or", ora), (".xor", xora)]: label(label_) op([tmp2]) op([tmp3]) label(".bitwise-done") st([Y, X]) # 27 NEXT(cost_of_binary_bitwise)
for i in range(32, 256): val = math.floor(i**2 / 4) ld(hi(val)) C(f"${val:04x} = {val} = floor({i} ** 2 / 4); ${val:04x} >> 8 = ${val >> 8:02x}" ) # We jump back here after looking up the low-byte of the result. label("low-byte return point") ld(hi("multiply 7x7"), Y) jmp(Y, [continuation]) ld(hi(pc()), Y) # Make it easy to get back here! cost_of_low_byte_return = 3 label("table entry.possibly-negative") # AC is negative, if b > a. Find absolute value blt(pc() + 3) # 1 bra(pc() + 3) # 2 suba(1) # 3; if >= 0 xora(0xFF) # 3; if < 0 adda(1) # 4 cost_of_absolute = 4 label("table entry") # Calculate an index into the high-byte table. # This is basically a matter of subtracting 32, and jumping in if the result >= 0. # But values greater than 160 have the sign-bit set after subtraction, # despite being >32. # We test for the sign bit and jump after subtraction even if 'negative' in these cases. st([tmp]) # 1 blt(pc() + 5) # 2 suba(32) # 3 bge(AC) # 4 bra([high_byte_action]) # 5
def add(): # This is exactly the same algorithm as in the vCPU implementation, but with my own comments to explain it to myself. label("forth.core.+") label("forth.core.CHAR+") adda(-add_cost_of_next(cost_of_add) / 2) # 1 low, high = tmp0, tmp1 ld(data_stack_page, Y) C("Load and move data stack pointer") ld([data_stack_pointer], X) ld([data_stack_pointer]) adda(2) # 5 st([data_stack_pointer]) # 6 # Copy TOS to low, high c = "Copy TOS to zero-page" for address in [low, high]: ld([Y, X]) c = C(c) st([address]) st([Y, Xpp]) # 12 = 6 + 2 * 3 # Add low bytes ld([Y, X]) C("Add low bytes") adda([low]) st([Y, Xpp]) # 15 bmi(".add.result-has-1-in-bit-7") suba([low]) # 17 Restore to low-byte of TOS # We previously had a result with a 0 in bit seven 0xxxxxxx # We can now use the operands to work out if there was # a carry out of bit seven. # The truth table is as follows # | A[7] | B[7] | Carry-in || Result[7] | Carry-out # ---|------------------------------------------------ # 0 | 0 | 0 | 0 || 0 | 0 # 1 | 0 | 0 | 1 || 1 | 0 # 2 | 0 | 1 | 0 || 1 | 0 # 3 | 0 | 1 | 1 || 0 | 1 # 4 | 1 | 0 | 0 || 1 | 0 # 5 | 1 | 0 | 1 || 0 | 1 # 6 | 1 | 1 | 0 || 0 | 1 # 7 | 1 | 1 | 1 || 1 | 1 # Given that there is zero in bit seven (cases 0, 3, 5 and 6) # There is not a carry (case 0) when both A[7] and B[7] are 0 # There is if either or both are 1. # Bitwise OR of the two operands will place the carry in bit seven bra(".add.carry-bit-in-msb") # 18 ora([low]) # 19 label(".add.result-has-1-in-bit-7") # Given that there is one in bit seven (cases 1, 2, 4 and 7) # There is not a carry (case 1, 2, 4) when either A[7] or B[7] are 0 # There is only a carry (case 7) when both are one. # Bitwise AND of the two operands will place the carry in bit seven bra(".add.carry-bit-in-msb") # 18 anda([low]) # 19 label(".add.carry-bit-in-msb") # vCPU moves uses anda $80, x to load 0x00 or 0x80 to X, and loads [X], # using constant values at 0x80 and 0x00, but we still need X for now, # So branching on the sign-bit works out just as cheap. bmi(".add.carry") ld([Y, X]) # 21 bra(".add.finish") adda([high]) # 23 label(".add.carry") adda(1) # 22 adda([high]) # 23 label(".add.finish") st([Y, X]) # 24 NEXT(cost_of_add)
def _shift_entry(*, offset_to_amount_eq_8, offset_to_amount_gt_8, offset_to_amount_lt_8): # Structurally left and right shift are very similar, # and we can share a lot of code. # There are five major cases for each (n is the shift amount): # n == 0 : We don't do anything but adjust stack height. # 0 < n < 8 : The most complicated case - we need to shift both # bytes and also transfer bits from one to the other # n == 8 : Quite simple, one byte takes its value from the other # which becomes zero # 8 < n < 16 : Shift one byte, and store into the other. # Store zero in first byte. # 16 <= n : Result is zero (technically we could ignore this). # # These have very different costs! # The entry point for both LSHIFT and RSHIFT call a single routine. # It loads the shift amount, and works out which of the cases we're # in. n == 0, and n > 16 are both handled immediately, followed by # NEXT. # For the other three cases, we dispatch to different routines by # adjusting W and calling REENTER. # The code is structured so that the we need to apply to W is the # same whether we're doing a left or right shift. # LSHIFT and RSHIFT both begin with the following sequence # adda(-add_cost_of_next(cost_of_shift_entry) / 2) # 1 # ld(data_stack_page, Y) # 2 # ld([data_stack_pointer], X) # 3 # bra("forth.core.shift.entry") # 4 # ld([data_stack_pointer]) # 5 # (The loads of X and Y technically happen elsewhere, but we count # them here) label("forth.core.shift.entry") adda(2) # 6 st([data_stack_pointer]) # Load amount: ld([Y, X]) # Load low-byte of amount st([Y, Xpp]) st([amount]) # 10 ora([Y, X]) beq("forth.core.shift.entry.amount-zero") # 12 # Numbers greater than 16 must have bit 4 or higher set. # AND with 0xf0 will reveal high bits set. ld(0xF0) # 13; Test for 16s place or higher being set in low byte anda([amount]) ora([Y, X]) # 15; Or any bit in high byte bne("forth.core.shift.entry.amount-gte16") # 16 # We want different values depending on which path we're going to follow # the n < 8 case wants -(n) and -(8 - n) = n - 8. # The n > 8 case wants -(n - 8) = 8 - n # The n = 8 case needs nothing. # Because the < 8 case has two variables, give it the "default" path # TODO: I feel very deeply that there must be a nicer way of doing this # TODO: Probably something todo with XOR. ld([amount]) # 17 suba(8) bgt("forth.core.shift.entry.amount-gt8") # 19 beq("forth.core.shift.entry.amount-eq8") # 20 st([transfer_amount]) # 21 # For the n < 8 case ld(0) suba([amount]) st([amount]) ld(offset_to_amount_lt_8) # 25 label(".adjust_W") adda([W_lo]) # 26 st([W_lo]) # 27 REENTER(27) label("forth.core.shift.entry.amount-eq8") nop() # 22 nop() bra(lo(".adjust_W")) # 24 ld(offset_to_amount_eq_8) # 25 label("forth.core.shift.entry.amount-gt8") ld(8) # 21 suba([amount]) st([amount]) bra(".adjust_W") # 24 ld(offset_to_amount_gt_8) # 25 label("forth.core.shift.entry.amount-zero") NEXT(13) label("forth.core.shift.entry.amount-gte16") st([Y, Xpp]) # 18 ld(0) st([Y, Xpp]) # 20 st([Y, Xpp]) # 21 NEXT(21)
def _rshift(): """RSHIFT (x1 u -- x2)""" label("forth.core.RSHIFT") adda(-add_cost_of_next(cost_of_shift_entry) / 2) # 1 bra("forth.core.shift.entry") ld([data_stack_pointer]) # 3