def _copy_W_to_IP(*, increment_by): ld([W_hi]) C(f"Copy [W] to [IP], incrementing it by {increment_by}") st([IP_hi]) ld([W_lo]) adda(increment_by) st([IP_lo])
def _two_times(): """Implements a left-shift by one which is used for two words 2* ( x1 -- x2 ) CELLS ( n1 -- n2 ) """ label("forth.core.2*") label("forth.core.CELLS") adda(-add_cost_of_next(cost_of_two_times) / 2) # 1 # ld(data_stack_page, Y) # Implemented in page header, but still counted here. # ld([data_stack_pointer], X) C("Load low-byte") ld([X]) anda(0b1000_0000, X) # 5 C("Calculate bit to shift in to the high-byte") ld([X]) st([tmp0]) ld([data_stack_pointer], X) C("Reload and left-shift") ld([X]) adda(AC) # 10 st([Y, Xpp]) ld([Y, X]) C("Load high byte and left-shift") adda(AC) adda([tmp0]) st([Y, X]) # 15 NEXT(cost_of_two_times)
def docol(): "Code that should be inlined at the start of each core word" adda(-add_cost_of_next(cost_of_docol_ram) / 2) ld(hi("forth.DO-DOCOL-RAM"), Y) jmp(Y, "forth.DO-DOCOL-RAM") ld(return_stack_page, Y) # 4 docol_rom_only() # 4 + 4
def drop(): label("forth.core." + name) adda(-add_cost_of_next(cost_of_drop) / 2) ld([data_stack_pointer]) adda(size) st([data_stack_pointer]) NEXT(cost_of_drop)
def _left_shift_by_n(): """Fixed cost routine to do a left-shift by 1-7 places Shift amount is passed in NEGATED in ac, value is loaded from [Y, X] Control is returned to address in continuation """ label("left-shift-by-n") # Because we do n shift operations, with 0 < n < 8 # we need to balance it with 7 - n nops - so that we always do # 7 ops in total adda(lo(".end-of-left-shifts")) # 1 st([tmp0]) # Where we jump in the left-shifts suba(lo(".end-of-left-shifts") - 7) xora(0xFF) # ac = -(shift-amount) + 7; Negate it. adda(lo(".end-of-nops") + 1) # 5; +1 is to finish two's complement bra(AC) # 6 ld([tmp0]) # 7 ; Shift by 1 nop() # Shift by 2 nop() # Shift by 3 nop() # Shift by 4 nop() # Shift by 5 nop() # Shift by 6 label(".end-of-nops") bra(AC) # 8; ld([Y, X]) # 9 adda(AC) # Shift by 7 adda(AC) # Shift by 6 adda(AC) # Shift by 5 adda(AC) # Shift by 4 adda(AC) # Shift by 3 adda(AC) # Shift by 2 bra([continuation]) # 10 # Shift by 1 label(".end-of-left-shifts") adda(AC) # (counted as one of the 7)
def restart_or_quit(): assert pc() & 0xFF == 0, "restart_or_quit must be placed at the start of a page" label("forth.restart-or-quit") bra([W_lo]) # 6 ble(pc() + 1) # 7 # 8 happens in start of thread again label(".quit") ld(hi("forth.exit"), Y) # 9 C("jmp forth.exit.from-failed-test") jmp(Y, lo("forth.exit.from-failed-test")) # 10
def exit(vTicks, vReturn): label("forth.exit") # Counting down label("forth.exit.from-failed-test") ld(-(cost_of_failed_next1 + 1) / 2) # 7 label("forth.exit.from-next1-reenter") label("forth.exit.from-next2") adda([vTicks]) # 6 ld(hi("vBlankStart"), Y) # 5 bgt(pc() & 0xFF) # 4 suba(1) # 3 jmp(Y, [vReturn]) # 2 nop() # 1
def reenter(cycles_so_far): """Dispatch to the word in W""" cost = cycles_so_far + cost_of_reenter if cost % 2 == 0: target = "forth.next1.reenter.even" else: target = "forth.next1.reenter.odd" cost -= 1 # We're skipping a nop ld(hi("forth.next1.reenter"), Y) # 1 C("REENTER") jmp(Y, lo(target)) # 2 ld(-cost / 2) # 3
def next(cycles_so_far): """Jump to the next instruction""" cost = cycles_so_far + cost_of_next if cost % 2 == 0: target = "forth.next2.even" else: target = "forth.next2.odd" cost += 1 # We're gaining a nop ld(hi("forth.next2"), Y) # 1 C("NEXT") jmp(Y, lo(target)) # 2 ld(-(cost / 2)) # 3
def next3_rom_head(): """Start the process of next3""" label("forth.next3") label("forth.next3.rom-mode") adda(-(cost_of_next3_rom // 2)) # 1 ld(-(cost_of_next3_rom // 2)) # 2 st([tmp0]) # 3 ld(W, X) # 4 ld(3) # We're going to shift the IP by 3 ld([IP_hi], Y) # 6 nop() # 7 jmp(Y, [IP_lo]) # 8 ld(0x00, Y) # 9
def exit(): """Word to exit from a thread""" label("forth.core.EXIT") adda(-add_cost_of_next(cost_of_exit) / 2) ld(return_stack_page, Y) ld([return_stack_pointer], X) ld([Y, X]) st([IP_lo]) ld([return_stack_pointer]) adda(1, X) adda(2) st([return_stack_pointer]) ld([Y, X]) st([IP_hi]) # 11 NEXT(cost_of_exit)
def next1(vTicks): """Routine to make continue or abort decisions, and dispatch to the next word""" # Invariant - on entry the vTicks variable and the accumulator both hold # an accurate number of cycles until we must be back in the display loop, # starting from the first instruction of this routine. # This value will always be greater than the cost of failing continue/abort test. This is true # whenever we return here from another word, and true when we first enter from the # display loop. label("forth.next1") C( "Timing point: [vTicks] == AC == accurate number of ticks until we need to be back" ) suba((cost_of_successful_test + cost_of_failfast) / 2) # 1 ld([W_hi], Y) # 2 jmp(Y, [W_lo]) # 3 bra("forth.restart-or-quit") # 4
def move_ip(): """Page-Zero code to move the IP by the amount contained in AC This routine is used by the ROM mode next3, and also by literal, branch and zero_branch. As these routines all have different lengths, it uses a variable (tmp0) to tell it what length to return It always jumps to forth.next1.reenter.odd, and it has an odd length itself code calling it must have an even length """ assert pc() >> 8 == 0 label("forth.move-ip") adda([IP_lo]) # 1 st([IP_lo]) # 2 ld(hi("forth.next1.reenter"), Y) # 3 C("REENTER") jmp(Y, lo("forth.next1.reenter.odd")) # 4 ld([tmp0]) # 5
def two_dup(): label("forth.core.2DUP") adda(-add_cost_of_next(cost_of_2dup) / 2) # 1 ld(data_stack_page, Y) ld([data_stack_pointer], X) # 3 for tmp in [tmp0, tmp1, tmp2, tmp3]: ld([Y, X]) st([tmp]) st([Y, Xpp]) # 15 = 3 + 4 * 3 ld([data_stack_pointer]) suba(4) st([data_stack_pointer], X) # 18 for tmp in [tmp0, tmp1, tmp2, tmp3]: ld([tmp]) st([Y, Xpp]) # 26 = 18 + 4 * 2 NEXT(cost_of_2dup)
def next1_reenter(vTicks): label("forth.next1.reenter") label( "forth.next1.reenter.even" ) # When a word took an even number of cycles, enter here nop() # 1 label( "forth.next1.reenter.odd" ) # Inbound code should round down ticks, because counting is from .even suba((cost_of_successful_test + cost_of_next1_reenter_success) / 2) # 2 adda([vTicks]) # 3 st([vTicks]) # 4; If we exit successfully we'll be ready for next1 suba(cost_of_failed_test / 2) # 5 blt(lo("forth.exit.from-next1-reenter")) # 6 vticks_error = cost_of_next1_reenter_success - cost_of_next1_reenter_failure ld((vticks_error / 2)) # 7 ; load vTicks wrongness into A bra(lo("forth.next1")) # 8 ld([vTicks]) # 9
def invert(): label("forth.core.INVERT") adda(-add_cost_of_next(cost_of_invert) / 2) # 1 ld(data_stack_page, Y) ld([data_stack_pointer], X) ld([Y, X]) xora(0xFF) # 5 st([Y, Xpp]) ld([Y, X]) xora(0xFF) st([Y, X]) # 9 NEXT(cost_of_invert)
def do_docol_ram(): label("forth.DO-DOCOL-RAM") # Upon exit from this thread, we need to restore the mode # So the return stack needs to look like: # TOP-> [restore_mode, mode, IP] ld([return_stack_pointer]) # 1 suba(5) st([return_stack_pointer], X) st(lo("forth.RESTORE-MODE"), [Y, Xpp]) st(hi("forth.RESTORE-MODE"), [Y, Xpp]) # 5 ld([mode]) st([Y, Xpp]) ld([IP_lo]) st([Y, Xpp]) ld([IP_hi]) # 10 st([Y, X]) ld(lo("forth.next3.rom-mode")) st([mode]) # 13 _copy_W_to_IP(increment_by=8) NEXT(cost_of_docol_ram)
def next2(vTicks): label("forth.next2") label("forth.next2.odd") nop() label("forth.next2.even") # On entry AC holds the negative of the number of ticks taken by the just executed instruction # To have entered the instruction we must have also had a successful test, suba((cost_of_successful_test + cost_of_next2_success) / 2) # 1 adda([vTicks]) # 2 st([vTicks]) # 3; If we exit successfully we'll be ready for next1 ld([mode]) # 4 st([W_lo]) # 5 ld(hi("forth.next3")) # 6 # TODO st([W_hi]) # 7 ld([vTicks]) # 8 suba((cost_of_failed_test) / 2) # 9 blt(lo("forth.exit.from-next2")) # 10 tick_correction = cost_of_next2_success - cost_of_next2_failure ld(tick_correction / 2) # 11; Restore bra(lo("forth.next1")) # 12 ld([vTicks]) # 13
def branch_rom_mode(): """Unconditional Branch ( -- )""" label("forth.internal.rom-mode.BRANCH") adda(-cost_of_branch_rom_mode // 2) # 1 ld(-(cost_of_branch_rom_mode // 2)) C("Store cost") st([tmp0]) ld(W, X) C("X <- W") ld([IP_hi], Y) # 5 C("Jump to the code in the thread") jmp(Y, [IP_lo]) ld(0x00, Y) # 7
def _rshift__amount_gt_8(): # offset to n > 8 case label("forth.core.RSHIFT.n>8") adda(-add_cost_of_next(cost_of_rshift__amount_gt_8) / 2) # 1 # ld(data_stack_page, Y) # Happen in head of page, but still counted # ld([data_stack_pointer], X) ld(lo("forth.core.RSHIFT.n>8.continuation")) st([continuation]) # 5 bra("right-shift-by-n") ld([amount]) label("forth.core.RSHIFT.n>8.continuation") st([Y, Xpp]) st([Y, Xpp]) ld([data_stack_pointer], X) # 10 ld(0) st([Y, X]) # 12 NEXT(cost_of_rshift__amount_gt_8)
def _push_ip_to_return_stack(): ld([return_stack_pointer]) C("Y holds the page of the return stack") C("Push [IP] to Return stack") suba(2) st([return_stack_pointer], X) ld([IP_lo]) st([Y, Xpp]) ld([IP_hi]) st([Y, X])
def decrement(): "Subtract one from the top of the stack (n -- n)" label("forth.core.1-") adda(-add_cost_of_next(cost_of_decrement) / 2) # 1 ld(data_stack_page, Y) ld([data_stack_pointer], X) ld([Y, X]) beq(lo(".low-byte-was-zero")) # 5 suba(1) # 6 st([Y, X]) # 7 NEXT(cost_of_decrement__one_word_written) label(".low-byte-was-zero") st([Y, Xpp]) # 7 ld([Y, X]) suba(1) st([Y, X]) # 10 NEXT(cost_of_decrement__two_words_written)
def increment(): "Add one to the top of the stack (n -- n)" label("forth.core.1+") adda(-add_cost_of_next(cost_of_increment) / 2) # 1 ld(data_stack_page, Y) ld([data_stack_pointer], X) ld([Y, X]) adda(1) # 5 bne(lo(".done")) # 6 st([Y, Xpp]) # 7 ld([Y, X]) # 8 adda(1) st([Y, X]) NEXT(cost_of_increment__two_words_written) label(".done") NEXT(cost_of_increment__one_word_written)
def _rshift__amount_eq_8(): # Offset to n==8 case = 3 label("forth.core.RSHIFT.n==8") adda(-add_cost_of_next(cost_of_rshift__amount_eq_8) / 2) # 1 # ld(data_stack_page, Y) # Happen in head of page, but still counted # ld([data_stack_pointer], X) st([Y, Xpp]) # Blat low byte ld([Y, X]) # 5 ld([data_stack_pointer], X) st([Y, Xpp]) ld(0) st([Y, X]) # 9 NEXT(cost_of_rshift__amount_eq_8)
def _lshift__amount_gt_8(): """LSHIFT (x1 u -- x2) Special case where u > 8 """ label("forth.core.LSHIFT.n>8") adda(-add_cost_of_next(cost_of_lshift__amount_gt_8) / 2) # 1 # ld(data_stack_page, Y) # ld([data_stack_pointer], X) ld(lo("forth.core.LSHIFT.n>8.continuation")) st([continuation]) # 5 bra("left-shift-by-n") # 6 ld([amount]) # 7 label("forth.core.LSHIFT.n>8.continuation") st([Y, Xpp]) # 1 st([Y, Xpp]) # 2 ld([data_stack_pointer], X) # 3 ld(0) # 4 st([Y, Xpp]) # 5 NEXT(cost_of_lshift__amount_gt_8)
def shift(vtmp): """Place all of the code required for 2*, LSHIFT, RSHIFT and 2/ Needs a page to itself. """ # Customized restart or quit trampoline which loads Y and X so that # they point at the data stack. This saves space at no runtime cost. assert pc( ) & 0xFF == 0, "restart_or_quit must be placed at the start of a page" label("forth.restart-or-quit") ble(pc() + 3) # 6 ld(data_stack_page, Y) # 7 bra([W_lo]) ld([data_stack_pointer], X) # 8; nop for purposes of .quit label(".quit") ld(hi("forth.exit"), Y) # 9 C("jmp forth.exit.from-failed-test") jmp(Y, lo("forth.exit.from-failed-test")) # 10 # 11, overlap with whatever comes next - hopefully not a branch or jump! _two_times() offset_start = pc() _lshift() offset_of_shift_by_8 = pc() - offset_start _lshift__amount_eq_8() offset_of_shift_by_gt_8 = pc() - offset_start _lshift__amount_gt_8() offset_of_shift_by_lt_8 = pc() - offset_start _lshift__amount_lt_8() rshift_offset_start = pc() _rshift() assert pc() - rshift_offset_start <= offset_of_shift_by_8 fillers(until=(rshift_offset_start + offset_of_shift_by_8) & 255) _rshift__amount_eq_8() assert pc() - rshift_offset_start <= offset_of_shift_by_gt_8 fillers(until=(rshift_offset_start + offset_of_shift_by_gt_8) & 255) _rshift__amount_gt_8() assert pc() - rshift_offset_start <= offset_of_shift_by_lt_8 fillers(until=(rshift_offset_start + offset_of_shift_by_lt_8) & 255) _rshift__amount_lt_8() _shift_entry( offset_to_amount_eq_8=offset_of_shift_by_8, offset_to_amount_gt_8=offset_of_shift_by_gt_8, offset_to_amount_lt_8=offset_of_shift_by_lt_8, ) _left_shift_by_n() _right_shift_by_n(vtmp) _two_div(vtmp)
def _lshift__amount_eq_8(): """LSHIFT (x1 u -- x2) Special case where u = 8 """ label("forth.core.LSHIFT.n==8") adda(-add_cost_of_next(cost_of_lshift__amount_eq_8) / 2) # 1 # ld(data_stack_page, Y) # ld([data_stack_pointer], X) ld([Y, X]) st([tmp0]) # 5 ld(0) st([Y, Xpp]) ld([tmp0]) st([Y, X]) # 9 NEXT(cost_of_lshift__amount_eq_8)
def _right_shift_by_n(vtmp): """ Fixed cost routine to do a right-shift by 1-7 places Shift amount is passed in NEGATED in ac, value is loaded from [y, x]. Control is returned to address in continuation. In the case of a right-shift by between 1 and 7 places, this needs to be called twice. in which case we can jump to right-shift-by-n.second-time with mask in ac. """ label("right-shift-by-n") st([tmp0]) # 1 adda(".end-of-set_bits_table") bra(AC) # 3 bra(lo(".end-of-set_bits_table")) # 4 ld(0b0011_1111) # Shift by 7 ld(0b0001_1111) # Shift by 6 ld(0b0000_1111) # Shift by 5 ld(0b0000_0111) # Shift by 4 ld(0b0000_0011) # Shift by 3 ld(0b0000_0001) # Shift by 2 ld(0b0000_0000) # Shift by 1 label(".end-of-set_bits_table") st([set_bits]) # 6 # Take the opportunity to set vTmp ld(lo("forth.right-shift-return-point")) st([vtmp]) ld([tmp0]) adda(".end-of-mask-table") bra(AC) # 11 bra(lo(".end-of-mask-table")) # 12 ld(0b1000_0000) # Shift by 7 ld(0b1100_0000) # Shift by 6 ld(0b1110_0000) # Shift by 5 ld(0b1111_0000) # Shift by 4 ld(0b1111_1000) # Shift by 3 ld(0b1111_1100) # Shift by 2 ld(0b1111_1110) # Shift by 1 label(".end-of-mask-table") st([mask]) # 14 label("right-shift-by-n.second-time") anda([Y, X]) # 15, 1 ora([set_bits]) ld(hi("shiftTable"), Y) jmp(Y, AC) # 18, 4 bra(0xFF) # 19, 5
def docol_ram_ram(): """Word that implements DOCOL for a thread in RAM""" label("forth.DOCOL") adda(-add_cost_of_next(cost_of_docol_ram_ram) / 2) # 1 ld(return_stack_page, Y) _push_ip_to_return_stack() # The Next3 for ram-ram mode leaves the thread address in tmp0, tmp1 # We need to increment it by two, and store in IP ld([tmp0]) adda(2) st([IP_lo]) # 5 beq(lo(".page-boundary#docol")) # Page boundary test ld([tmp1]) # 7 st([IP_hi]) # 8 NEXT(cost_of_docol_ram_ram__page_not_crossed) label(".page-boundary#docol") adda(1) # 8 st([IP_hi]) # 9 NEXT(cost_of_docol_ram_ram__page_crossed)
def dup(): label("forth.core.DUP") adda(-add_cost_of_next(cost_of_dup) / 2) ld([data_stack_pointer], X) ld([X]) st([tmp0]) ld([data_stack_pointer]) # 5 adda(1, X) ld([X]) st([tmp1]) ld(data_stack_page, Y) ld([data_stack_pointer]) # 10 suba(2) st([data_stack_pointer], X) ld([tmp0]) st([Y, Xpp]) ld([tmp1]) # 15 st([Y, X]) # 16 NEXT(cost_of_dup)