def __init__(self, host, cond_api): # FIXME (Shaohe) local cache for Accelerator. # Will fix it in next release. self.fpgas = None self.host = host self.conductor_api = cond_api self.fpga_driver = FPGADriver()
def __init__(self, topic, host=None): super(AgentManager, self).__init__(CONF) self.topic = topic self.host = host or CONF.host self.fpga_driver = FPGADriver() self.cond_api = cond_api.ConductorAPI() self.agent_api = AgentAPI() self.image_api = ImageAPI() self._rt = ResourceTracker(host, self.cond_api)
class AgentManager(periodic_task.PeriodicTasks): """Cyborg Agent manager main class.""" RPC_API_VERSION = '1.0' target = messaging.Target(version=RPC_API_VERSION) def __init__(self, topic, host=None): super(AgentManager, self).__init__(CONF) self.topic = topic self.host = host or CONF.host self.fpga_driver = FPGADriver() self.cond_api = cond_api.ConductorAPI() self.agent_api = AgentAPI() self.image_api = ImageAPI() self._rt = ResourceTracker(host, self.cond_api) def periodic_tasks(self, context, raise_on_error=False): return self.run_periodic_tasks(context, raise_on_error=raise_on_error) def fpga_program_v2(self, context, controlpath_id, bitstream_uuid, driver_name): # TODO() Use tempfile module? download_path = "/tmp/" + bitstream_uuid + ".gbs" self.image_api.download(context, bitstream_uuid, dest_path=download_path) driver = self.fpga_driver.create(driver_name) ret = driver.program_v2(controlpath_id, download_path) LOG.info('Driver program() API returned code %s', ret) os.remove(download_path) @periodic_task.periodic_task(run_immediately=True) def update_available_resource(self, context, startup=True): """Update all kinds of accelerator resources from their drivers.""" self._rt.update_usage(context)
def __init__(self, topic, host=None): super(AgentManager, self).__init__(CONF) #can only use in the same node, change it to RPC to conductor self.conductor_api = conductor_api.ConductorAPI() self.topic = topic self.host = host or CONF.host self.fpga_driver = FPGADriver() self._rt = ResourceTracker(host, self.conductor_api)
class AgentManager(periodic_task.PeriodicTasks): """Cyborg Agent manager main class.""" RPC_API_VERSION = '1.0' target = messaging.Target(version=RPC_API_VERSION) def __init__(self, topic, host=None): super(AgentManager, self).__init__(CONF) self.topic = topic self.host = host or CONF.host self.fpga_driver = FPGADriver() self.cond_api = cond_api.ConductorAPI() self.agent_api = AgentAPI() self.image_api = ImageAPI() self._rt = ResourceTracker(host, self.cond_api) def periodic_tasks(self, context, raise_on_error=False): return self.run_periodic_tasks(context, raise_on_error=raise_on_error) def hardware_list(self, context, values): """List installed hardware.""" pass def fpga_program(self, context, deployable_uuid, image_uuid): """ Program a FPGA regoin, image can be a url or local file""" # TODO (Shaohe Feng) Get image from glance. # And add claim and rollback logical. path = self._download_bitstream(context, image_uuid) dep = self.cond_api.deployable_get(context, deployable_uuid) driver = self.fpga_driver.create(dep.vendor) driver.program(dep.address, path) def _download_bitstream(self, context, bitstream_uuid): """download the bistream :param context: the context :param bistream_uuid: v4 uuid of the bitstream to reprogram :returns: the path to bitstream downloaded, None if fail to download """ download_path = "/tmp/" + bitstream_uuid + ".bin" self.image_api.download(context, bitstream_uuid, dest_path=download_path) return download_path @periodic_task.periodic_task(run_immediately=True) def update_available_resource(self, context, startup=True): """update all kinds of accelerator resources from their drivers.""" self._rt.update_usage(context)
def test_intel_discover(self): expect = [{ 'function': 'pf', 'assignable': False, 'pr_num': '1', 'vendor_id': '0x8086', 'devices': '0000:5e:00.0', 'regions': [{ 'function': 'vf', 'assignable': True, 'product_id': '0xbcc1', 'name': 'intel-fpga-dev.2', 'parent_devices': '0000:5e:00.0', 'path': '%s/intel-fpga-dev.2' % sysinfo.SYS_FPGA, 'vendor_id': '0x8086', 'devices': '0000:5e:00.1' }], 'name': 'intel-fpga-dev.0', 'parent_devices': '', 'path': '%s/intel-fpga-dev.0' % sysinfo.SYS_FPGA, 'product_id': '0xbcc0' }, { 'function': 'pf', 'assignable': True, 'pr_num': '0', 'vendor_id': '0x8086', 'devices': '0000:be:00.0', 'name': 'intel-fpga-dev.1', 'parent_devices': '', 'path': '%s/intel-fpga-dev.1' % sysinfo.SYS_FPGA, 'product_id': '0xbcc0' }] expect.sort() intel = FPGADriver.create("intel") fpgas = intel.discover() fpgas.sort() self.assertEqual(2, len(fpgas)) self.assertEqual(fpgas, expect)
class AgentManager(periodic_task.PeriodicTasks): """Cyborg Agent manager main class. API version history: | 1.0 - Initial version. """ RPC_API_VERSION = '1.0' target = messaging.Target(version=RPC_API_VERSION) def __init__(self, topic, host=None): super(AgentManager, self).__init__(CONF) self.topic = topic self.host = host or CONF.host self.fpga_driver = FPGADriver() self.cond_api = cond_api.ConductorAPI() self.agent_api = AgentAPI() self.image_api = ImageAPI() self._rt = ResourceTracker(host, self.cond_api) def periodic_tasks(self, context, raise_on_error=False): return self.run_periodic_tasks(context, raise_on_error=raise_on_error) def fpga_program(self, context, controlpath_id, bitstream_uuid, driver_name): bitstream_uuid = str(bitstream_uuid) if not uuidutils.is_uuid_like(bitstream_uuid): raise exception.InvalidUUID(uuid=bitstream_uuid) download_path = tempfile.NamedTemporaryFile(suffix=".gbs", prefix=bitstream_uuid) self.image_api.download(context, bitstream_uuid, dest_path=download_path.name) try: driver = self.fpga_driver.create(driver_name) ret = driver.program(controlpath_id, download_path.name) LOG.info('Driver program() API returned %s', ret) finally: LOG.debug('Remove tmp bitstream file: %s', download_path.name) os.remove(download_path.name) return ret @periodic_task.periodic_task(run_immediately=True) def update_available_resource(self, context, startup=True): """Update all kinds of accelerator resources from their drivers.""" self._rt.update_usage(context)
def test_intel_program(self, mock_popen): class p(object): returncode = 0 def wait(self): pass b = "0x5e" d = "0x00" f = "0x0" expect_cmd = [ 'sudo', 'fpgaconf', '-b', b, '-d', d, '-f', f, '/path/image' ] mock_popen.return_value = p() intel = FPGADriver.create("intel") # program VF intel.program("0000:5e:00.1", "/path/image") mock_popen.assert_called_with(expect_cmd, stdout=subprocess.PIPE) # program PF intel.program("0000:5e:00.0", "/path/image") mock_popen.assert_called_with(expect_cmd, stdout=subprocess.PIPE)
class ResourceTracker(object): """Agent helper class for keeping track of resource usage when hardware Accelerator resources updated. Update the Deployable DB through conductor. """ def __init__(self, host, cond_api): # FIXME (Shaohe) local cache for Accelerator. # Will fix it in next release. self.fpgas = None self.host = host self.conductor_api = cond_api self.fpga_driver = FPGADriver() @utils.synchronized(AGENT_RESOURCE_SEMAPHORE) def claim(self, context): pass def _fpga_compare_and_update(self, host_dev, acclerator): need_updated = False for k, v in DEPLOYABLE_HOST_MAPS.items(): if acclerator[k] != host_dev[v]: need_updated = True acclerator[k] = host_dev[v] return need_updated def _gen_deployable_from_host_dev(self, host_dev): dep = {} for k, v in DEPLOYABLE_HOST_MAPS.items(): dep[k] = host_dev[v] dep["host"] = self.host dep["version"] = DEPLOYABLE_VERSION dep["availability"] = "free" dep["uuid"] = uuidutils.generate_uuid() return dep @utils.synchronized(AGENT_RESOURCE_SEMAPHORE) def update_usage(self, context): """Update the resource usage and stats after a change in an instance """ def create_deployable(fpgas, bdf, parent_uuid=None): fpga = fpgas[bdf] dep = self._gen_deployable_from_host_dev(fpga) # if parent_uuid: dep["parent_uuid"] = parent_uuid obj_dep = objects.Deployable(context, **dep) new_dep = self.conductor_api.deployable_create(context, obj_dep) return new_dep # NOTE(Shaohe Feng) need more agreement on how to keep consistency. fpgas = self._get_fpga_devices() bdfs = set(fpgas.keys()) deployables = self.conductor_api.deployable_get_by_host( context, self.host) # NOTE(Shaohe Feng) when no "pcie_address" in deployable? accls = dict([(v["pcie_address"], v) for v in deployables]) accl_bdfs = set(accls.keys()) # Firstly update for mutual in accl_bdfs & bdfs: accl = accls[mutual] if self._fpga_compare_and_update(fpgas[mutual], accl): try: self.conductor_api.deployable_update(context, accl) except RemoteError as e: LOG.error(e) # Add new = bdfs - accl_bdfs new_pf = set([n for n in new if fpgas[n]["function"] == "pf"]) for n in new_pf: new_dep = create_deployable(fpgas, n) accls[n] = new_dep sub_vf = set() if "regions" in n: sub_vf = set([sub["devices"] for sub in fpgas[n]["regions"]]) for vf in sub_vf & new: new_dep = create_deployable(fpgas, vf, new_dep["uuid"]) accls[vf] = new_dep new.remove(vf) for n in new - new_pf: p_bdf = fpgas[n]["parent_devices"] p_accl = accls[p_bdf] p_uuid = p_accl["uuid"] new_dep = create_deployable(fpgas, n, p_uuid) # Delete for obsolete in accl_bdfs - bdfs: try: self.conductor_api.deployable_delete(context, accls[obsolete]) except RemoteError as e: LOG.error(e) del accls[obsolete] def _get_fpga_devices(self): def form_dict(devices, fpgas): for v in devices: fpgas[v["devices"]] = v if "regions" in v: form_dict(v["regions"], fpgas) fpgas = {} vendors = self.fpga_driver.discover_vendors() for v in vendors: driver = self.fpga_driver.create(v) form_dict(driver.discover(), fpgas) return fpgas
def test_program(self): d = FPGADriver() self.assertRaises(NotImplementedError, d.program, "path", "image")
def test_discover(self): d = FPGADriver() self.assertRaises(NotImplementedError, d.discover)
def test_create(self): FPGADriver.create("intel") self.assertRaises(LookupError, FPGADriver.create, "xilinx")
class ResourceTracker(object): """Agent helper class for keeping track of resource usage as instances are built and destroyed. """ def __init__(self, host, cond_api): # FIXME (Shaohe) local cache for Accelerator. # Will fix it in next release. self.fpgas = None self.host = host self.conductor_api = cond_api self.fpga_driver = FPGADriver() @utils.synchronized(AGENT_RESOURCE_SEMAPHORE) def claim(self, context): pass def _fpga_compare_and_update(self, host_dev, acclerator): need_updated = False for k, v in DEPLOYABLE_HOST_MAPS.items(): if acclerator[k] != host_dev[v]: need_updated = True acclerator[k] = host_dev[v] return need_updated def _gen_accelerator_for_deployable(self, context, name, vendor, productor, desc="", dev_type="pf", acc_type="FPGA", acc_cap="", remotable=0): """ The type of the accelerator device, e.g GPU, FPGA, ... acc_type defines the usage of the accelerator, e.g Crypto acc_capability defines the specific capability, e.g AES """ db_acc = { 'deleted': False, 'uuid': uuidutils.generate_uuid(), 'name': name, 'description': desc, 'project_id': pecan.request.context.project_id, 'user_id': pecan.request.context.user_id, 'device_type': dev_type, 'acc_type': acc_type, 'acc_capability': acc_cap, 'vendor_id': vendor, 'product_id': productor, 'remotable': remotable } acc = objects.Accelerator(context, **db_acc) acc = self.conductor_api.accelerator_create(context, acc) return acc def _gen_deployable_from_host_dev(self, host_dev, acc_id, parent_uuid=None, root_uuid=None): dep = {} for k, v in DEPLOYABLE_HOST_MAPS.items(): dep[k] = host_dev[v] dep["host"] = self.host dep["version"] = DEPLOYABLE_VERSION dep["availability"] = "free" dep["uuid"] = uuidutils.generate_uuid() dep["parent_uuid"] = parent_uuid dep["root_uuid"] = root_uuid dep["accelerator_id"] = acc_id return dep @utils.synchronized(AGENT_RESOURCE_SEMAPHORE) def update_usage(self, context): """Update the resource usage and stats after a change in an instance """ def create_deployable(fpgas, bdf, acc_id, parent_uuid=None): fpga = fpgas[bdf] dep = self._gen_deployable_from_host_dev(fpga, acc_id) # if parent_uuid: dep["parent_uuid"] = parent_uuid obj_dep = objects.Deployable(context, **dep) new_dep = self.conductor_api.deployable_create(context, obj_dep) return new_dep # NOTE(Shaohe Feng) need more agreement on how to keep consistency. fpgas = self._get_fpga_devices() bdfs = set(fpgas.keys()) deployables = self.conductor_api.deployable_get_by_host( context, self.host) # NOTE(Shaohe Feng) when no "address" in deployable? accls = dict([(v["address"], v) for v in deployables]) accl_bdfs = set(accls.keys()) # Firstly update for mutual in accl_bdfs & bdfs: accl = accls[mutual] if self._fpga_compare_and_update(fpgas[mutual], accl): try: self.conductor_api.deployable_update(context, accl) except RemoteError as e: LOG.error(e) # Add new = bdfs - accl_bdfs new_pf = set([n for n in new if fpgas[n]["function"] == "pf"]) for n in new_pf: fpga = fpgas[n] acc = self._gen_accelerator_for_deployable( context, fpga["name"], fpga["vendor_id"], fpga["product_id"], "FPGA device on %s" % self.host, "pf", "FPGA") new_dep = create_deployable(fpgas, n, acc.id) accls[n] = new_dep sub_vf = set() if "regions" in n: sub_vf = set([sub["devices"] for sub in fpgas[n]["regions"]]) for vf in sub_vf & new: fpga = fpgas[n] acc = self._gen_accelerator_for_deployable( context, fpga["name"], fpga["vendor_id"], fpga["product_id"], "FPGA device on %s" % self.host, "vf", "FPGA") new_dep = create_deployable(fpgas, vf, acc.id, new_dep["uuid"]) accls[vf] = new_dep new.remove(vf) for n in new - new_pf: p_bdf = fpgas[n]["parent_devices"] p_accl = accls[p_bdf] p_uuid = p_accl["uuid"] fpga = fpgas[n] acc = self._gen_accelerator_for_deployable( context, fpga["name"], fpga["vendor_id"], fpga["product_id"], "FPGA device on %s" % self.host, "pf", "FPGA") new_dep = create_deployable(fpgas, n, acc.id, p_uuid) # Delete for obsolete in accl_bdfs - bdfs: try: self.conductor_api.deployable_delete(context, accls[obsolete]) except RemoteError as e: LOG.error(e) del accls[obsolete] def _get_fpga_devices(self): def form_dict(devices, fpgas): for v in devices: fpgas[v["devices"]] = v if "regions" in v: form_dict(v["regions"], fpgas) fpgas = {} vendors = self.fpga_driver.discover_vendors() for v in vendors: driver = self.fpga_driver.create(v) form_dict(driver.discover(), fpgas) return fpgas