Esempio n. 1
0
    def sim(self, dut, testbench=None, traces=(), filename=None):
        dut = self.prepare(dut)
        simulator = Simulator(dut)
        for name, frequency in self.clocks.items():
            simulator.add_clock(1 / frequency, domain=name)

        if not filename:
            stack = inspect.stack()
            filename = stack[1].function
        assert isinstance(filename, str)

        if isinstance(testbench, tuple):
            generator, domain = testbench
            self.add_process(generator, domain)
        elif inspect.isgeneratorfunction(testbench):
            self.add_process(testbench, "sync")
        elif testbench is None:
            pass
        else:
            raise TypeError("unknown type for testbench")

        for generator, domain in self.processes:
            simulator.add_sync_process(generator, domain=domain)

        Path(".sim_results/").mkdir(exist_ok=True)
        with simulator.write_vcd(".sim_results/{}.vcd".format(filename),
                                 ".sim_results/{}.gtkw".format(filename),
                                 traces=traces):
            simulator.run()
Esempio n. 2
0
    def fxfilter(self, stimulus):
        """
        Calculate the fixpoint filter response in float format for a frame of
        stimulus data (float).

        Parameters
        ----------
        stimulus : ndarray of float
            One frame of stimuli data (float) scaled as WI.WF

        Returns
        -------
        output : ndarray of float
            One frame of response data (float) scaled as WI.WF
        """
        def process():
            # convert stimulus to int by multiplying with 2 ^ WF
            input = np.round(stimulus *
                             (1 << self.fx_filt.p['QI']['WF'])).astype(int)
            self.output = []
            for i in input:
                yield self.fx_filt.i.eq(int(i))
                yield Tick()
                self.output.append((yield self.fx_filt.o))

        sim = Simulator(self.fx_filt)

        sim.add_clock(1 / 48000)
        sim.add_process(process)
        sim.run()

        # convert output to ndarray of float by dividing the integer response by 2 ^ WF
        return np.array(self.output,
                        dtype='f') / (1 << self.fx_filt.p['QO']['WF'])
def run_sim(dut, data, n):
    sim = Simulator(dut, engine=os.getenv('NMIGEN_SIM', 'pysim'))
    sim.add_clock(10e-9, domain='sync')
    sim.add_sync_process(dut.input.send_driver(data))
    sim.add_sync_process(dut.output.recv_driver(n))
    with sim.write_vcd('bla.vcd'):
        sim.run()
Esempio n. 4
0
class TestBase(unittest.TestCase):
    """Base class for testing an nMigen module.

    The module can use sync, comb or both.
    """
    def setUp(self):
        self.m = Module()
        self.dut = self.create_dut()
        self.m.submodules['dut'] = self.dut
        self.m.submodules['dummy'] = _DummySyncModule()
        self.sim = Simulator(self.m)

    def create_dut(self):
        """Returns an instance of the device under test"""
        raise NotImplementedError

    def add_process(self, process):
        """Add main test process to the simulator"""
        self.sim.add_sync_process(process)

    def add_sim_clocks(self):
        """Add clocks as required by sim.
        """
        self.sim.add_clock(1, domain='sync')

    def run_sim(self, process, write_trace=False):
        self.add_process(process)
        self.add_sim_clocks()
        if write_trace:
            with self.sim.write_vcd("zz.vcd", "zz.gtkw"):
                self.sim.run()
        else:
            self.sim.run()
Esempio n. 5
0
    def sim(self, dut, testbench=None, traces=(), engine="pysim"):
        dut = self.prepare(dut)
        self.fragment = dut
        simulator = Simulator(dut, engine=engine)
        for name, (frequency, phase) in self.clocks.items():
            simulator.add_clock(1 / frequency, domain=name, phase=phase)

        if isinstance(testbench, tuple):
            generator, domain = testbench
            self.add_process(generator, domain)
        elif inspect.isgeneratorfunction(testbench):
            self.add_process(testbench, "sync")
        elif testbench is None:
            pass
        else:
            raise TypeError("unknown type for testbench")

        for generator, domain in self.processes:
            simulator.add_sync_process(generator, domain=domain)

        print("\nwriting vcd to '{}.vcd'".format(self.output_filename_base))
        with simulator.write_vcd("{}.vcd".format(self.output_filename_base),
                                 "{}.gtkw".format(self.output_filename_base),
                                 traces=traces):
            simulator.run()
Esempio n. 6
0
def test_h100_sync():
	from nmigen.sim import Simulator, Delay

	bus = HighSpeedTransmitBus()

	m = Module()
	m.submodules.sync = sync = H100Sync()
	m.submodules.dut = dut = HighSpeedTransmit(bus=bus, sync=sync)

	frame_data_0 = [ord(c) for c in '0_TESTING_T1_DATA_STUFF\xff']
	frame_data_1 = [ord(c) for c in '1_TESTING_T1_DATA_STUFF\x00']
	frame_data_2 = [ord(c) for c in '2_TESTING_T1_DATA_STUFF\xff']
	frame_data_3 = [ord(c) for c in '3_TESTING_T1_DATA_STUFF\x00']

	def process_test():
		yield Delay(100e-9)
		yield sync.enable.eq(1)
		yield dut.enable.eq(1)
		yield dut.data[0].eq(0xaa)
		yield dut.data[1].eq(0x55)
		yield dut.data[2].eq(0xff)
		yield dut.data[3].eq(0x00)

		for _ in range(2500):
			slot_t1 = yield sync.slot_t1
			yield
			yield dut.data[0].eq(frame_data_0[slot_t1])
			yield dut.data[1].eq(frame_data_1[slot_t1])
			yield dut.data[2].eq(frame_data_2[slot_t1])
			yield dut.data[3].eq(frame_data_3[slot_t1])

	sim = Simulator(m)
	sim.add_clock(1.0 / 16.384e6)

	# sim.add_sync_process(process_inclk)
	sim.add_sync_process(process_test)

	traces = [
		# sync.inclk,
		# sync.outclk,

		dut.enable,

		bus.ser.o,
		bus.ser.oe,
		bus.msync.o,
		bus.msync.oe,
		bus.sync.o,
		bus.sync.oe,
	]

	with sim.write_vcd("test_h100_sync.vcd", "test_h100_sync.gtkw", traces=traces):
		sim.run()
Esempio n. 7
0
def resolve(expr):
    """Resolves a nMigen expression that can be constantly evaluated to an integer"""

    sim = Simulator(Module())

    a = []

    def testbench():
        a.append((yield expr))

    sim.add_process(testbench)
    sim.run()
    return a[0]
Esempio n. 8
0
    def test_ram_banks(self):
        sim = Simulator(self.mbc)

        def proc():
            yield from self.reset()

            yield from self.write(0x4000, 0x1F)
            assert (yield self.mbc.ram_bank) == 0xF
            yield from self.write(0x4000, 0x7A)
            assert (yield self.mbc.ram_bank) == 0xA

        sim.add_process(proc)
        sim.reset()
        sim.run()
Esempio n. 9
0
    def test_ram_en(self):
        sim = Simulator(self.mbc)

        def proc():
            yield from self.reset()

            assert (yield self.mbc.ram_en) == 0
            yield from self.write(0x0000, 0x0A)
            assert (yield self.mbc.ram_en) == 1
            yield from self.write(0x0000, 0x1A)
            assert (yield self.mbc.ram_en) == 0

        sim.add_process(proc)
        sim.reset()
        sim.run()
Esempio n. 10
0
    def test_rom_banks(self):
        sim = Simulator(self.mbc)

        def proc():
            yield from self.reset()

            yield from self.assert_rom_banks(0x000, 0x001)
            yield from self.write(0x2000, 0x42)
            yield from self.assert_rom_banks(0x000, 0x042)
            yield from self.write(0x3000, 0x01)
            yield from self.assert_rom_banks(0x000, 0x142)
            yield from self.write(0x2000, 0x00)
            yield from self.assert_rom_banks(0x000, 0x100)
            yield from self.write(0x3000, 0x00)
            yield from self.assert_rom_banks(0x000, 0x000)

        sim.add_process(proc)
        sim.reset()
        sim.run()
Esempio n. 11
0
def test_pattern_matching():
    m = Matcher(pattern=[0, 1, 1, 0], interval=1)
    sim = Simulator(m)
    sim.add_clock(1e-6, domain="sync")

    haystack = [0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]

    def process():
        matchcount = 0
        for i in range(len(haystack)):
            yield
            yield m.input.eq(haystack[i])
            matchcount += yield m.match
        assert (matchcount == 1)

    sim.add_sync_process(process)

    with sim.write_vcd("matching.vcd"):
        sim.run()
Esempio n. 12
0
    def test_sim_async_fifo(self):
        m = Module()
        fifo = m.submodules.fifo = AsyncFIFO(width=32, depth=8, r_domain="sync", w_domain="sync")

        def testbench():
            for i in range(20):
                yield fifo.w_data.eq(i)
                yield fifo.w_en.eq(1)
                yield
            yield fifo.w_en.eq(0)
            yield
            yield
            yield
            yield

            assert (yield fifo.r_level) == 8

        simulator = Simulator(m)
        simulator.add_clock(1 / 100e6, domain="sync")
        simulator.add_sync_process(testbench, domain="sync")
        simulator.run()
Esempio n. 13
0
def test_crc():

    m = GaloisCRC()
    sim = Simulator(m)
    sim.add_clock(1e-6, domain="sync")

    data = [0, 1, 0, 0, 1, 0, 1, 1, 1, 0]

    def process():
        for bit in data:
            yield m.input.eq(bit)
            yield m.en.eq(1)
            yield
        yield
        crc = yield m.crc
        assert (crc == py_crc(np.array(data)))

    sim.add_sync_process(process)

    with sim.write_vcd("crc.vcd"):
        sim.run()
Esempio n. 14
0
    def test_sim_asnyc_stream_fifo(self):
        m = Module()
        input = StreamEndpoint(32, is_sink=False, has_last=False)
        fifo = m.submodules.fifo = AsyncStreamFifo(input, 1024, r_domain="sync", w_domain="sync", buffered=False)

        def testbench():
            for i in range(10):
                yield from write_to_stream(input, i)

            # async fifos need some time
            yield
            yield

            assert (yield fifo.r_level) == 10

            for i in range(10):
                assert (yield from read_from_stream(fifo.output)) == i

        simulator = Simulator(m)
        simulator.add_clock(1 / 100e6, domain="sync")
        simulator.add_sync_process(testbench, domain="sync")
        simulator.run()
Esempio n. 15
0
    def check(self, instruction: int, expected: Dict[str, int]):
        decoder = CPUDecoder()
        sim = Simulator(decoder)

        def input_process():
            yield decoder.instruction_in.eq(instruction)

        def check_process():
            yield Delay(1e-6)
            for k, expected_value in expected.items():
                value = yield getattr(decoder, k)
                if isinstance(expected_value, enum.Enum):
                    value = expected_value.__class__(value)
                else:
                    value = hex(value)
                    expected_value = hex(expected_value)
                with self.subTest(f"decoder.{k}"):
                    self.assertEqual(value, expected_value)

        with sim.write_vcd(f"test_decode_{hex(instruction)}.vcd"):
            sim.add_process(input_process)
            sim.add_process(check_process)
            sim.run()
Esempio n. 16
0
    def test_serializer(self):
        clk = 60e6
        m = Serializer(w_width=32, r_width=8)

        sim = Simulator(m)
        sim.add_clock(1 / clk)

        def process():
            data = 0xDDCCBBAA
            yield m.w_data.eq(data)
            yield m.w_en.eq(1)
            yield m.r_en.eq(1)
            yield
            for i in range(10):
                for j in range(4):
                    yield
                    shift = (j * 8)
                    mask = (0xff << shift)
                    expected_r_data = (data & mask) >> shift
                    self.assertEqual((yield m.r_data), expected_r_data)

        sim.add_sync_process(process)
        with sim.write_vcd("serializer.vcd", "serializer.gtkw", traces=[]):
            sim.run()
Esempio n. 17
0
def test_frequency_generation():
    """This creates an oscillator with a frequency error that meets a given spec and then
       verifies that it actually loops through everything. Note that below a certain error
       level, we start to get assorted floating point differences that mean that this test
       fails against even the "realized frequency" reference"""
    
    freq = 2.4*1e9
    sample_rate = 5*1e9
    error = 0.0001*1e6 # 50ppm allowable frequency error, not 

    m = OneBitFixedOscillator(sample_rate=sample_rate, frequency=freq, max_error=error, width=20, domain='sync')
    sim = Simulator(m)
    sim.add_clock(1e-6, domain="sync")

    assert np.abs(m.realized_frequency - freq) < error
    samples = m.pattern_words*10

    ref = binarize(make_carrier(sample_rate=sample_rate, freq=m.realized_frequency, samples=samples*20))
    ref = pack_mem(ref, 20)

    output = np.zeros((samples,), dtype=np.uint32)

    def process():
        for i in range(samples):
            yield
            result = yield m.output
            counter = yield m.counter
            output[i] = result
            if bin(result) != bin(ref[i]):
                raise Exception("At {} got {} but expected {}".format(i, bin(result), bin(ref[i])))
        print(json.dumps(list(map(int, output))))

    sim.add_sync_process(process)
    
    with sim.write_vcd("nco.vcd"):
        sim.run()
Esempio n. 18
0
    def assertStatement(self, stmt, inputs, output, reset=0):
        inputs = [Value.cast(i) for i in inputs]
        output = Value.cast(output)

        isigs = [Signal(i.shape(), name=n) for i, n in zip(inputs, "abcd")]
        osig = Signal(output.shape(), name="y", reset=reset)

        stmt = stmt(osig, *isigs)
        frag = Fragment()
        frag.add_statements(stmt)
        for signal in flatten(s._lhs_signals() for s in Statement.cast(stmt)):
            frag.add_driver(signal)

        sim = Simulator(frag)

        def process():
            for isig, input in zip(isigs, inputs):
                yield isig.eq(input)
            yield Settle()
            self.assertEqual((yield osig), output.value)

        sim.add_process(process)
        with sim.write_vcd("test.vcd", "test.gtkw", traces=[*isigs, osig]):
            sim.run()
Esempio n. 19
0
def test_base_rate_sync():
	from nmigen.sim import Simulator, Delay

	clock_sclk = 1.544e6
	clock_sync = 16.384e6

	m = Module()
	m.submodules.dut = dut = BaseRateSync()

	sclk = Signal()
	serclk = Signal()
	ser = Signal()

	SERCLK_SKEW = 10e-9
	SER_SKEW = 10e-9

	def process_framer():
		frequency = clock_sclk
		period = 1.0 / frequency

		data = 'THIS_IS_A_TEST_' * 40
		data_bits = ''.join(['{:08b}'.format(ord(v)) for v in data])

		for bit in data_bits:
			yield sclk.eq(1)
			yield Delay(SERCLK_SKEW)
			yield serclk.eq(1)
			yield Delay(SER_SKEW)
			yield ser.eq(int(bit))
			yield Delay(period * 0.5 - SERCLK_SKEW - SER_SKEW)
			yield sclk.eq(0)
			yield Delay(SERCLK_SKEW)
			yield serclk.eq(0)
			yield Delay(period * 0.5 - SERCLK_SKEW)

	def process_strobe():
		last = 0
		for _ in range(int(round(4700 * clock_sync / clock_sclk))):
			serclk_value = yield serclk
			if serclk_value == 0 and last == 1:
				yield dut.strobe_in.eq(1)
			else:
				yield dut.strobe_in.eq(0)
			last = serclk_value
			yield

	def process_test():
		yield Delay(100e-9)

		for _ in range(4700):
			yield

	sim = Simulator(m)
	sim.add_clock(1.0 / clock_sync)

	sim.add_process(process_framer)
	sim.add_sync_process(process_strobe)
	sim.add_sync_process(process_test)

	traces = [
		sclk,
		serclk,
		ser,
	]

	with sim.write_vcd("test_base_rate_sync.vcd", "test_base_rate_sync.gtkw", traces=traces):
		sim.run()
Esempio n. 20
0
        # with m.If(self.enable):
        m.d.sync += phase_acc.eq(phase_acc + self.phase_step)

        return m

    @staticmethod
    def calculate_phase_step(clk_frequency: float, frequency: float):
        return int(round((2 ** 32) * frequency / clk_frequency))


if __name__ == "__main__":
    # from nmigen_boards.tinyfpga_bx import TinyFPGABXPlatform
    # platform = TinyFPGABXPlatform()
    # products = platform.build(Top(), do_program=True)
    
    from nmigen.sim import Simulator, Tick

    dut = NCO(width=8, samples=1024)
    sim = Simulator(dut)
    sim.add_clock(1 / 1e6)

    def proc():
        yield dut.phase_step.eq(NCO.calculate_phase_step(clk_frequency=1e6, frequency=440))
        for i in range(3000):
            yield Tick()

    sim.add_process(proc)
    with sim.write_vcd("dds.vcd"):
        sim.run()
Esempio n. 21
0
def main():
    # parser = main_parser()
    # args = parser.parse_args()

    m = Module()

    m.submodules.ft = ft = FT600()
    m.submodules.wfifo = wfifo = AsyncFIFOBuffered(
        width=16, depth=1024, r_domain="sync", w_domain="sync")
    m.submodules.rfifo = rfifo = AsyncFIFOBuffered(
        width=16, depth=1024, r_domain="sync", w_domain="sync")

    ft_oe = Signal()
    ft_be = Signal()
    ft_txe = Signal()

    # FT control
    m.d.comb += ft_oe.eq(ft.ft_oe)
    m.d.comb += ft_be.eq(ft.ft_be)
    m.d.comb += ft_txe.eq(ft.ft_txe)

    # FT to Write FIFO
    m.d.comb += ft.input_payload.eq(wfifo.r_data)
    m.d.comb += wfifo.r_en.eq(ft.input_ready)
    m.d.comb += ft.input_valid.eq(wfifo.r_rdy)

    # FT to Read FIFO
    m.d.comb += rfifo.w_data.eq(ft.output_payload)
    m.d.comb += rfifo.w_en.eq(ft.output_valid)
    m.d.comb += ft.output_ready.eq(rfifo.w_rdy)

    sim = Simulator(m)
    sim.add_clock(1e-7, domain="sync")      # 10 MHz FPGA clock

    def process():
        yield wfifo.w_en.eq(1)
        yield wfifo.w_data.eq(1)
        yield Tick(domain="sync")
        yield wfifo.w_data.eq(2)
        yield Tick(domain="sync")
        yield wfifo.w_data.eq(3)
        yield Tick(domain="sync")
        yield wfifo.w_data.eq(4)
        yield Tick(domain="sync")
        yield wfifo.w_data.eq(5)
        yield Tick(domain="sync")
        yield wfifo.w_data.eq(6)
        yield Tick(domain="sync")
        yield wfifo.w_data.eq(7)
        yield Tick(domain="sync")
        yield wfifo.w_en.eq(0)
        yield Tick(domain="sync")
        yield Tick(domain="sync")
        yield ft.ft_txe.eq(1)
        yield Tick(domain="sync")
        yield Tick(domain="sync")
        yield Tick(domain="sync")
        yield Tick(domain="sync")
        yield ft.ft_txe.eq(0)
        yield Tick(domain="sync")
        yield Tick(domain="sync")
        yield ft.ft_txe.eq(1)
        yield Tick(domain="sync")
        yield Tick(domain="sync")
        yield Tick(domain="sync")
        yield Tick(domain="sync")
        yield Tick(domain="sync")
        yield Tick(domain="sync")
        yield Tick(domain="sync")
        yield ft.ft_rxf.eq(1)
        yield Tick(domain="sync")
        yield ft.ft_override.eq(1)
        yield Tick(domain="sync")
        yield ft.ft_override.eq(2)
        yield Tick(domain="sync")
        yield ft.ft_override.eq(3)
        yield Tick(domain="sync")
        yield ft.ft_rxf.eq(0)
        yield Tick(domain="sync")
        yield Tick(domain="sync")
        yield Tick(domain="sync")
        yield Tick(domain="sync")

    sim.add_sync_process(process)
    with sim.write_vcd("test.vcd", "test.gtkw", traces=[]):
        sim.run()
Esempio n. 22
0
class LunaGatewareTestCase(unittest.TestCase):

    domain = 'sync'

    # Convenience property: if set, instantiate_dut will automatically create
    # the relevant fragment with FRAGMENT_ARGUMENTS.
    FRAGMENT_UNDER_TEST = None
    FRAGMENT_ARGUMENTS = {}

    # Convenience properties: if not None, a clock with the relevant frequency
    # will automatically be added.
    FAST_CLOCK_FREQUENCY = None
    SYNC_CLOCK_FREQUENCY = 120e6
    USB_CLOCK_FREQUENCY = None
    SS_CLOCK_FREQUENCY = None

    def instantiate_dut(self):
        """ Basic-most function to instantiate a device-under-test.

        By default, instantiates FRAGMENT_UNDER_TEST.
        """
        return self.FRAGMENT_UNDER_TEST(**self.FRAGMENT_ARGUMENTS)

    def get_vcd_name(self):
        """ Return the name to use for any VCDs generated by this class. """
        return "test_{}".format(self.__class__.__name__)

    def setUp(self):
        self.dut = self.instantiate_dut()
        self.sim = Simulator(self.dut)

        if self.USB_CLOCK_FREQUENCY:
            self.sim.add_clock(1 / self.USB_CLOCK_FREQUENCY, domain="usb")
        if self.SYNC_CLOCK_FREQUENCY:
            self.sim.add_clock(1 / self.SYNC_CLOCK_FREQUENCY, domain="sync")
        if self.FAST_CLOCK_FREQUENCY:
            self.sim.add_clock(1 / self.FAST_CLOCK_FREQUENCY, domain="fast")
        if self.SS_CLOCK_FREQUENCY:
            self.sim.add_clock(1 / self.SS_CLOCK_FREQUENCY, domain="ss")

    def initialize_signals(self):
        """ Provide an opportunity for the test apparatus to initialize siganls. """
        yield Signal()

    def traces_of_interest(self):
        """ Returns an interable of traces to include in any generated output. """
        return ()

    def simulate(self, *, vcd_suffix=None):
        """ Runs our core simulation. """

        # If we're generating VCDs, run the test under a VCD writer.
        if os.getenv('GENERATE_VCDS', default=False):

            # Figure out the name of our VCD files...
            vcd_name = self.get_vcd_name()
            if vcd_suffix:
                vcd_name = "{}_{}".format(vcd_name, vcd_suffix)

            # ... and run the simulation while writing them.
            traces = self.traces_of_interest()
            with self.sim.write_vcd(vcd_name + ".vcd",
                                    vcd_name + ".gtkw",
                                    traces=traces):
                self.sim.run()

        else:
            self.sim.run()

    @staticmethod
    def pulse(signal, *, step_after=True):
        """ Helper method that asserts a signal for a cycle. """
        yield signal.eq(1)
        yield
        yield signal.eq(0)

        if step_after:
            yield

    @staticmethod
    def advance_cycles(cycles):
        """ Helper methods that waits for a given number of cycles. """

        for _ in range(cycles):
            yield

    @staticmethod
    def wait_until(strobe, *, timeout=None):
        """ Helper method that advances time until a strobe signal becomes true. """

        cycles_passed = 0

        while not (yield strobe):
            yield

            cycles_passed += 1
            if timeout and cycles_passed > timeout:
                raise RuntimeError(
                    f"Timeout waiting for '{strobe.name}' to go high!")

    def _ensure_clocks_present(self):
        """ Function that validates that a clock is present for our simulation domain. """
        frequencies = {
            'sync': self.SYNC_CLOCK_FREQUENCY,
            'usb': self.USB_CLOCK_FREQUENCY,
            'fast': self.FAST_CLOCK_FREQUENCY,
            'ss': self.SS_CLOCK_FREQUENCY
        }
        self.assertIsNotNone(
            frequencies[self.domain],
            f"no frequency provied for `{self.domain}`-domain clock!")

    def wait(self, time):
        """ Helper method that waits for a given number of seconds in a *_test_case. """

        # Figure out the period of the clock we want to work with...
        if self.domain == 'sync':
            period = 1 / self.SYNC_CLOCK_FREQUENCY
        elif self.domain == 'usb':
            period = 1 / self.USB_CLOCK_FREQUENCY
        elif self.domain == 'fast':
            period = 1 / self.FAST_CLOCK_FREQUENCY

        # ... and, accordingly, how many cycles we want to delay.
        cycles = math.ceil(time / period)
        print(cycles)

        # Finally, wait that many cycles.
        yield from self.advance_cycles(cycles)
Esempio n. 23
0
    def generic_chacha20(self, implementation):
        print("")

        # Encrypt a test message with a known good implementation
        import json
        from base64 import b64encode
        from Crypto.Cipher import ChaCha20
        from Crypto.Random import get_random_bytes
        from struct import pack, unpack
        from binascii import hexlify

        def byte_xor(ba1, ba2):
            """ XOR two byte strings """
            return bytes([_a ^ _b for _a, _b in zip(ba1, ba2)])

        plaintext = b'A' * 64
        # key = get_random_bytes(32)
        key = bytes([i for i in range(32)])

        # nonce = get_random_bytes(12)
        nonce = bytes([(i * 16 + i) for i in range(12)])
        cipher = ChaCha20.new(key=key, nonce=nonce)
        ciphertext = cipher.encrypt(plaintext)

        nonceb64 = b64encode(cipher.nonce).decode('utf-8')
        ciphertextb64 = b64encode(ciphertext).decode('utf-8')
        keystream = byte_xor(plaintext, ciphertext)
        keystream_hex = hexlify(keystream).decode('utf8')
        result = json.dumps({
            'nonce': nonceb64,
            'ciphertext': ciphertextb64,
            'keystream': keystream_hex
        })
        # print(result)

        # cipher = ChaCha20.new(key=key, nonce=nonce)
        # cipher.seek(0)
        # print(cipher.decrypt(ciphertext))

        m = Module()

        m.submodules.chacha20 = chacha20 = ChaCha20Cipher(implementation)

        key_words = unpack("<8I", key)
        m.d.comb += [
            chacha20.i_key[i].eq(key_words[i]) for i in range(len(key_words))
        ]

        nonce_words = unpack("<3I", nonce)
        m.d.comb += [
            chacha20.i_nonce[i].eq(nonce_words[i])
            for i in range(len(nonce_words))
        ]

        sim = Simulator(m)
        sim.add_clock(1e-6, domain="sync")

        def process():
            ks = []
            iterations = 0
            yield chacha20.i_en.eq(1)
            yield
            for i in range(30 * 4):
                # Simulate until it'd finished
                iterations += 1
                if (yield chacha20.o_ready) != 0:
                    yield
                    yield
                    yield
                    break
                yield
            for i in range(16):
                ks.append((yield chacha20.o_stream[i]))
            keystream_hdl = pack("<16I", *ks)
            print(f"Took {iterations} iterations")
            print("Keystream generated by simulation: ",
                  hexlify(keystream_hdl))
            print("Decryption using simulation: ",
                  byte_xor(keystream_hdl, ciphertext))

            self.assertEqual(keystream_hdl, keystream)
            self.assertEqual(plaintext, byte_xor(keystream_hdl, ciphertext))

        sim.add_sync_process(process)
        with sim.write_vcd("test.vcd", "test.gtkw"):
            sim.run()