def replace_rtl(conf, buildconfig): """ Run chisel/firrtl/fame-1, produce verilog for fpga build. THIS ALWAYS RUNS LOCALLY""" builddir = buildconfig.get_build_dir_name() fpgabuilddir = "hdk/cl/developer_designs/cl_" + buildconfig.get_chisel_triplet( ) ddir = get_deploy_dir() rootLogger.info("Running replace-rtl to generate verilog for " + str(buildconfig.get_chisel_triplet())) with prefix('cd ' + ddir + '/../'), prefix('source sourceme-f1-manager.sh'), prefix( 'export CL_DIR={}/../platforms/f1/aws-fpga/{}'.format( ddir, fpgabuilddir)), prefix('cd sim/'), StreamLogger( 'stdout'), StreamLogger('stderr'): run("""make DESIGN={} TARGET_CONFIG={} PLATFORM_CONFIG={} replace-rtl""" .format(buildconfig.DESIGN, buildconfig.TARGET_CONFIG, buildconfig.PLATFORM_CONFIG)) run("""mkdir -p {}/results-build/{}/""".format(ddir, builddir)) run("""cp $CL_DIR/design/cl_firesim_generated.sv {}/results-build/{}/cl_firesim_generated.sv""" .format(ddir, builddir)) # build the fpga driver that corresponds with this version of the RTL build_fpga_driver(buildconfig.get_chisel_triplet())
def replace_rtl(conf, buildconfig): """ Run chisel/firrtl/fame-1, produce verilog for fpga build. THIS ALWAYS RUNS LOCALLY""" builddir = buildconfig.get_build_dir_name() fpgabuilddir = "hdk/cl/developer_designs/cl_" + buildconfig.get_chisel_triplet( ) ddir = get_deploy_dir() rootLogger.info("Running replace-rtl to generate verilog for " + str(buildconfig.get_chisel_triplet())) with prefix('cd ' + ddir + '/../'), prefix('source sourceme-f1-manager.sh'), prefix( 'export CL_DIR={}/../platforms/f1/aws-fpga/{}'.format( ddir, fpgabuilddir)), prefix('cd sim/'), StreamLogger( 'stdout'), StreamLogger('stderr'): local("""pwd""", shell=get_local_shell()) local(buildconfig.make_recipe("replace-rtl"), shell=get_local_shell()) local("""mkdir -p {}/results-build/{}/""".format(ddir, builddir), shell=get_local_shell()) local( """cp $CL_DIR/design/cl_firesim_generated.sv {}/results-build/{}/cl_firesim_generated.sv""" .format(ddir, builddir), shell=get_local_shell()) # build the fpga driver that corresponds with this version of the RTL with prefix('cd ' + ddir + '/../'), prefix('source sourceme-f1-manager.sh'), prefix( 'cd sim/'), StreamLogger('stdout'), StreamLogger('stderr'): local(buildconfig.make_recipe("f1"), shell=get_local_shell())
def kill_switches_instance(self): """ Kill all the switches on this instance. """ if self.instance_assigned_switches(): for slotno in range(self.parentnode.get_num_switch_slots()): self.kill_switch_slot(slotno) with StreamLogger('stdout'), StreamLogger('stderr'): run("sudo rm -rf /dev/shm/*")
def build_fpga_driver(self): """ Build FPGA driver for running simulation """ if self.driver_built: # we already built the driver at some point return # TODO there is a duplicate of this in runtools triplet_pieces = self.get_deploytriplet_for_config().split("-") design = triplet_pieces[0] target_config = triplet_pieces[1] platform_config = triplet_pieces[2] rootLogger.info("Building FPGA software driver for " + str(self.get_deploytriplet_for_config())) with prefix('cd ../'), prefix('source sourceme-f1-manager.sh'), prefix( 'cd sim/'), StreamLogger('stdout'), StreamLogger('stderr'): localcap = None with settings(warn_only=True): driverbuildcommand = """make DESIGN={} TARGET_CONFIG={} PLATFORM_CONFIG={} f1""".format( design, target_config, platform_config) localcap = local(driverbuildcommand, capture=True) rootLogger.debug("[localhost] " + str(localcap)) rootLogger.debug("[localhost] " + str(localcap.stderr)) if localcap.failed: rootLogger.info( "FPGA software driver build failed. Exiting. See log for details." ) rootLogger.info( """You can also re-run '{}' in the 'firesim/sim' directory to debug this error.""" .format(driverbuildcommand)) exit(1) self.driver_built = True
def load_edma(self): """ load the edma kernel module. """ self.instance_logger("Loading EDMA Driver Kernel Module.") # TODO: can make these values automatically be chosen based on link lat with StreamLogger('stdout'), StreamLogger('stderr'): run("sudo insmod /home/centos/edma/linux_kernel_drivers/edma/edma-drv.ko single_transaction_size=65536 transient_buffer_size=67108864 edma_queue_depth=1024 poll_mode=1" )
def local_logged(startdir, command): """ Run local command with logging. """ with prefix('cd ' + startdir), StreamLogger('stdout'), StreamLogger('stderr'): localcap = local(command, shell=get_local_shell(), capture=True) rootLogger.debug(localcap) rootLogger.debug(localcap.stderr)
def flash_fpgas(self): for firesimservernode, slotno in zip(self.parentnode.fpga_slots, range(self.parentnode.get_num_fpga_slots_consumed())): if firesimservernode is not None: agfi = firesimservernode.get_agfi() self.instance_logger("""Flashing FPGA Slot: {} with agfi: {}.""".format(slotno, agfi)) with StreamLogger('stdout'), StreamLogger('stderr'): run("""sudo fpga-load-local-image -S {} -I {}""".format( slotno, agfi))
def start_sim_slot(self, slotno): self.instance_logger( """Starting FPGA simulation for slot: {}.""".format(slotno)) remote_sim_dir = """/home/centos/sim_slot_{}/""".format(slotno) server = self.parentnode.fpga_slots[slotno] with cd(remote_sim_dir), StreamLogger('stdout'), StreamLogger( 'stderr'): run(server.get_sim_start_command(slotno))
def kill_switch_slot(self, switchslot): """ kill the switch in slot switchslot. """ self.instance_logger( """Killing switch simulation for switchslot: {}.""".format( switchslot)) switch = self.parentnode.switch_slots[switchslot] with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): run(switch.get_switch_kill_command())
def unload_nbd_module(self): """ unload the nbd module. """ self.instance_logger("Unloading NBD Kernel Module.") # disconnect all /dev/nbdX devices before rmmod self.disconnect_all_nbds_instance() with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): run('sudo rmmod nbd')
def start_ila_server(self): """ start the vivado hw_server and virtual jtag on simulation instance.) """ self.instance_logger("Starting Vivado hw_server.") with StreamLogger('stdout'), StreamLogger('stderr'): run("""screen -S hw_server -d -m bash -c "script -f -c 'hw_server'"; sleep 1""") self.instance_logger("Starting Vivado virtual JTAG.") with StreamLogger('stdout'), StreamLogger('stderr'): run("""screen -S virtual_jtag -d -m bash -c "script -f -c 'sudo fpga-start-virtual-jtag -P 10201 -S 0'"; sleep 1""")
def load_nbd_module(self): """ load the nbd module. always unload the module first to ensure it is in a clean state. """ self.unload_nbd_module() # now load xdma self.instance_logger("Loading NBD Kernel Module.") with StreamLogger('stdout'), StreamLogger('stderr'): run("""sudo insmod /home/centos/nbd.ko nbds_max={}""".format(self.parentnode.nbd_tracker.NBDS_MAX))
def start_switches_instance(self): """ Boot up all the switches in a screen. """ # remove shared mem pages used by switches if self.instance_assigned_switches(): with StreamLogger('stdout'), StreamLogger('stderr'): run("sudo rm -rf /dev/shm/*") for slotno in range(self.parentnode.get_num_switch_slots()): self.start_switch_slot(slotno)
def unload_xdma(self): self.instance_logger("Unloading XDMA Driver Kernel Module.") with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): # fpga mgmt tools seem to force load xocl after a flash now... # so we just remove everything for good measure: remote_kmsg("removing_xdma_start") run('sudo rmmod xdma') remote_kmsg("removing_xdma_end")
def fpga_node_edma(self): """ Copy EDMA infra to remote node. This assumes that the driver was already built and that a binary exists in the directory on this machine """ self.instance_logger("""Copying AWS FPGA EDMA driver to remote node.""") with StreamLogger('stdout'), StreamLogger('stderr'): run('mkdir -p /home/centos/edma/') put('../platforms/f1/aws-fpga/sdk/linux_kernel_drivers', '/home/centos/edma/', mirror_local_mode=True)
def unload_xrt_and_xocl(self): self.instance_logger("Unloading XRT-related Kernel Modules.") with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): # fpga mgmt tools seem to force load xocl after a flash now... # so we just remove everything for good measure: remote_kmsg("removing_xrt_start") run('sudo systemctl stop mpd') run('sudo yum remove -y xrt xrt-aws') remote_kmsg("removing_xrt_end")
def start_switch_slot(self, switchslot): self.instance_logger( """Starting switch simulation for switch slot: {}.""".format( switchslot)) remote_switch_dir = """/home/centos/switch_slot_{}/""".format( switchslot) switch = self.parentnode.switch_slots[switchslot] with cd(remote_switch_dir), StreamLogger('stdout'), StreamLogger( 'stderr'): run(switch.get_switch_start_command())
def load_xdma(self): """ load the xdma kernel module. """ # fpga mgmt tools seem to force load xocl after a flash now... # xocl conflicts with the xdma driver, which we actually want to use # so we just remove everything for good measure before loading xdma: self.unload_xdma() # now load xdma self.instance_logger("Loading XDMA Driver Kernel Module.") # TODO: can make these values automatically be chosen based on link lat with StreamLogger('stdout'), StreamLogger('stderr'): run("sudo insmod /home/centos/xdma/linux_kernel_drivers/xdma/xdma.ko poll_mode=1")
def screens(): """ poll on screens to make sure kill succeeded. """ with warn_only(): rootLogger.info("Confirming exit...") # keep checking screen until it reports that there are no screens left while True: with StreamLogger('stdout'), StreamLogger('stderr'): screenoutput = run("screen -ls") if "No Sockets found" in screenoutput: break time.sleep(1)
def fpga_node_qcow(self): """ Install qemu-img management tools and copy NBD infra to remote node. This assumes that the kernel module was already built and exists in the directory on this machine. """ self.instance_logger("""Setting up remote node for qcow2 disk images.""") with StreamLogger('stdout'), StreamLogger('stderr'): # get qemu-nbd run('sudo yum -y install qemu-img') # copy over kernel module put('../build/nbd.ko', '/home/centos/nbd.ko', mirror_local_mode=True)
def disconnect_all_nbds_instance(self): """ Disconnect all nbds on the instance. """ self.instance_logger("Disconnecting all NBDs.") # warn_only, so we can call this even if there are no nbds with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): # build up one large command with all the disconnects fullcmd = [] for nbd_index in range(self.parentnode.nbd_tracker.NBDS_MAX): fullcmd.append("""sudo qemu-nbd -d /dev/nbd{nbdno}""".format(nbdno=nbd_index)) run("; ".join(fullcmd))
def clear_fpgas(self): # we always clear ALL fpga slots for slotno in range(self.parentnode.get_num_fpga_slots_max()): self.instance_logger("""Clearing FPGA Slot {}.""".format(slotno)) with StreamLogger('stdout'), StreamLogger('stderr'): run("""sudo fpga-clear-local-image -S {} -A""".format(slotno)) for slotno in range(self.parentnode.get_num_fpga_slots_max()): self.instance_logger( """Checking for Cleared FPGA Slot {}.""".format(slotno)) with StreamLogger('stdout'), StreamLogger('stderr'): run("""until sudo fpga-describe-local-image -S {} -R -H | grep -q "cleared"; do sleep 1; done""" .format(slotno))
def copy_switch_slot_infrastructure(self, switchslot): self.instance_logger("""Copying switch simulation infrastructure for switch slot: {}.""".format(switchslot)) remote_switch_dir = """/home/centos/switch_slot_{}/""".format(switchslot) with StreamLogger('stdout'), StreamLogger('stderr'): run("""mkdir -p {}""".format(remote_switch_dir)) switch = self.parentnode.switch_slots[switchslot] files_to_copy = switch.get_required_files_local_paths() for filename in files_to_copy: with StreamLogger('stdout'), StreamLogger('stderr'): put(filename, remote_switch_dir, mirror_local_mode=True)
def get_and_install_aws_fpga_sdk(self): """ Installs the aws-sdk. This gets us access to tools to flash the fpga. """ # TODO: we checkout a specific version of aws-fpga here, in case upstream # master is bumped. But now we have to remember to change AWS_FPGA_FIRESIM_UPSTREAM_VERSION # when we bump our stuff. Need a better way to do this. AWS_FPGA_FIRESIM_UPSTREAM_VERSION = "2fdf23ffad944cb94f98d09eed0f34c220c522fe" self.instance_logger("""Installing AWS FPGA SDK on remote nodes. Upstream hash: {}""".format(AWS_FPGA_FIRESIM_UPSTREAM_VERSION)) with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): run('git clone https://github.com/aws/aws-fpga') run('cd aws-fpga && git checkout ' + AWS_FPGA_FIRESIM_UPSTREAM_VERSION) with cd('/home/centos/aws-fpga'), StreamLogger('stdout'), StreamLogger('stderr'): run('source sdk_setup.sh')
def build_fpga_driver(triplet): """ Build FPGA driver for running simulation """ # TODO there is a duplicate of this in runtools ddir = get_deploy_dir() triplet_pieces = triplet.split("-") design = triplet_pieces[0] target_config = triplet_pieces[1] platform_config = triplet_pieces[2] with prefix('cd ' + ddir + '/../'), prefix('source sourceme-f1-manager.sh'), prefix( 'cd sim/'), StreamLogger('stdout'), StreamLogger('stderr'): run("""make DESIGN={} TARGET_CONFIG={} PLATFORM_CONFIG={} f1""".format( design, target_config, platform_config))
def get_and_install_aws_fpga_sdk(self): """ Installs the aws-sdk. This gets us access to tools to flash the fpga. """ # TODO: we checkout a specific version of aws-fpga here, in case upstream # master is bumped. But now we have to remember to change AWS_FPGA_FIRESIM_UPSTREAM_VERSION # when we bump our stuff. Need a better way to do this. AWS_FPGA_FIRESIM_UPSTREAM_VERSION = "b1ed5e951de3442ffb1fc8c7097e7064489e83f1" self.instance_logger("""Installing AWS FPGA SDK on remote nodes.""") with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): run('git clone https://github.com/aws/aws-fpga') run('cd aws-fpga && git checkout ' + AWS_FPGA_FIRESIM_UPSTREAM_VERSION) with cd('/home/centos/aws-fpga'), StreamLogger('stdout'), StreamLogger('stderr'): run('source sdk_setup.sh')
def flash_fpgas(self): dummyagfi = None for firesimservernode, slotno in zip(self.parentnode.fpga_slots, range(self.parentnode.get_num_fpga_slots_consumed())): if firesimservernode is not None: agfi = firesimservernode.get_agfi() dummyagfi = agfi self.instance_logger("""Flashing FPGA Slot: {} with agfi: {}.""".format(slotno, agfi)) with StreamLogger('stdout'), StreamLogger('stderr'): run("""sudo fpga-load-local-image -S {} -I {} -A""".format( slotno, agfi)) # We only do this because XDMA hangs if some of the FPGAs on the instance # are left in the cleared state. So, if you're only using some of the # FPGAs on an instance, we flash the rest with one of your images # anyway. Since the only interaction we have with an FPGA right now # is over PCIe where the software component is mastering, this can't # break anything. for slotno in range(self.parentnode.get_num_fpga_slots_consumed(), self.parentnode.get_num_fpga_slots_max()): self.instance_logger("""Flashing FPGA Slot: {} with dummy agfi: {}.""".format(slotno, dummyagfi)) with StreamLogger('stdout'), StreamLogger('stderr'): run("""sudo fpga-load-local-image -S {} -I {} -A""".format( slotno, dummyagfi)) for firesimservernode, slotno in zip(self.parentnode.fpga_slots, range(self.parentnode.get_num_fpga_slots_consumed())): if firesimservernode is not None: self.instance_logger("""Checking for Flashed FPGA Slot: {} with agfi: {}.""".format(slotno, agfi)) with StreamLogger('stdout'), StreamLogger('stderr'): run("""until sudo fpga-describe-local-image -S {} -R -H | grep -q "loaded"; do sleep 1; done""".format(slotno)) for slotno in range(self.parentnode.get_num_fpga_slots_consumed(), self.parentnode.get_num_fpga_slots_max()): self.instance_logger("""Checking for Flashed FPGA Slot: {} with agfi: {}.""".format(slotno, dummyagfi)) with StreamLogger('stdout'), StreamLogger('stderr'): run("""until sudo fpga-describe-local-image -S {} -R -H | grep -q "loaded"; do sleep 1; done""".format(slotno))
def screens(): """ poll on screens to make sure kill succeeded. """ with warn_only(): rootLogger.info("Confirming exit...") # keep checking screen until it reports that there are no screens left while True: with StreamLogger('stdout'), StreamLogger('stderr'): screenoutput = run("screen -ls") # If AutoILA is enabled, use the following condition if "2 Sockets in" in screenoutput and "hw_server" in screenoutput and "virtual_jtag" in screenoutput: break # If AutoILA is disabled, use the following condition elif "No Sockets found" in screenoutput: break time.sleep(1)
def copy_back_job_results_from_run(self, slotno): """ 1) Make the local directory for this job's output 2) Copy back UART log 3) Mount rootfs on the remote node and copy back files TODO: move this somewhere else, it's kinda in a weird place... """ jobinfo = self.get_job() simserverindex = slotno job_results_dir = self.get_job().parent_workload.job_results_dir job_dir = """{}/{}/""".format(job_results_dir, jobinfo.jobname) with StreamLogger('stdout'), StreamLogger('stderr'): localcap = local("""mkdir -p {}""".format(job_dir), capture=True) rootLogger.debug("[localhost] " + str(localcap)) rootLogger.debug("[localhost] " + str(localcap.stderr)) # mount rootfs, copy files from it back to local system if self.get_rootfs_name() is not None: mountpoint = """/home/centos/sim_slot_{}/mountpoint""".format( simserverindex) with StreamLogger('stdout'), StreamLogger('stderr'): run("""sudo mkdir -p {}""".format(mountpoint)) run("""sudo mount /home/centos/sim_slot_{}/{} {}""".format( simserverindex, self.get_rootfs_name(), mountpoint)) run("""sudo chmod -Rf 777 {}""".format(mountpoint)) ## copy back files from inside the rootfs with warn_only(), StreamLogger('stdout'), StreamLogger('stderr'): for outputfile in jobinfo.outputs: get(remote_path=mountpoint + outputfile, local_path=job_dir) ## unmount with StreamLogger('stdout'), StreamLogger('stderr'): run("""sudo umount {}""".format(mountpoint)) ## copy output files generated by the simulator that live on the host: ## e.g. uartlog, memory_stats.csv, etc remote_sim_run_dir = """/home/centos/sim_slot_{}/""".format( simserverindex) for simoutputfile in jobinfo.simoutputs: with StreamLogger('stdout'), StreamLogger('stderr'): get(remote_path=remote_sim_run_dir + simoutputfile, local_path=job_dir)
def copy_back_switchlog_from_run(self, job_results_dir, switch_slot_no): """ Copy back the switch log for this switch TODO: move this somewhere else, it's kinda in a weird place... """ job_dir = """{}/switch{}/""".format(job_results_dir, self.switch_id_internal) with StreamLogger('stdout'), StreamLogger('stderr'): localcap = local("""mkdir -p {}""".format(job_dir), capture=True) rootLogger.debug("[localhost] " + str(localcap)) rootLogger.debug("[localhost] " + str(localcap.stderr)) ## copy output files generated by the simulator that live on the host: ## e.g. uartlog, memory_stats.csv, etc remote_sim_run_dir = """/home/centos/switch_slot_{}/""".format(switch_slot_no) for simoutputfile in ["switchlog"]: with StreamLogger('stdout'), StreamLogger('stderr'): get(remote_path=remote_sim_run_dir + simoutputfile, local_path=job_dir)