Beispiel #1
0
 def __init__(self, host, cond_api):
     # FIXME (Shaohe) local cache for Accelerator.
     # Will fix it in next release.
     self.fpgas = None
     self.host = host
     self.conductor_api = cond_api
     self.fpga_driver = FPGADriver()
Beispiel #2
0
 def __init__(self, topic, host=None):
     super(AgentManager, self).__init__(CONF)
     self.topic = topic
     self.host = host or CONF.host
     self.fpga_driver = FPGADriver()
     self.cond_api = cond_api.ConductorAPI()
     self.agent_api = AgentAPI()
     self.image_api = ImageAPI()
     self._rt = ResourceTracker(host, self.cond_api)
Beispiel #3
0
class AgentManager(periodic_task.PeriodicTasks):
    """Cyborg Agent manager main class."""

    RPC_API_VERSION = '1.0'
    target = messaging.Target(version=RPC_API_VERSION)

    def __init__(self, topic, host=None):
        super(AgentManager, self).__init__(CONF)
        self.topic = topic
        self.host = host or CONF.host
        self.fpga_driver = FPGADriver()
        self.cond_api = cond_api.ConductorAPI()
        self.agent_api = AgentAPI()
        self.image_api = ImageAPI()
        self._rt = ResourceTracker(host, self.cond_api)

    def periodic_tasks(self, context, raise_on_error=False):
        return self.run_periodic_tasks(context, raise_on_error=raise_on_error)

    def fpga_program_v2(self, context, controlpath_id, bitstream_uuid,
                        driver_name):
        # TODO() Use tempfile module?
        download_path = "/tmp/" + bitstream_uuid + ".gbs"
        self.image_api.download(context,
                                bitstream_uuid,
                                dest_path=download_path)
        driver = self.fpga_driver.create(driver_name)
        ret = driver.program_v2(controlpath_id, download_path)
        LOG.info('Driver program() API returned code %s', ret)
        os.remove(download_path)

    @periodic_task.periodic_task(run_immediately=True)
    def update_available_resource(self, context, startup=True):
        """Update all kinds of accelerator resources from their drivers."""
        self._rt.update_usage(context)
Beispiel #4
0
 def __init__(self, topic, host=None):
     super(AgentManager, self).__init__(CONF)
     #can only use in the same node, change it to RPC to conductor
     self.conductor_api = conductor_api.ConductorAPI()
     self.topic = topic
     self.host = host or CONF.host
     self.fpga_driver = FPGADriver()
     self._rt = ResourceTracker(host, self.conductor_api)
Beispiel #5
0
class AgentManager(periodic_task.PeriodicTasks):
    """Cyborg Agent manager main class."""

    RPC_API_VERSION = '1.0'
    target = messaging.Target(version=RPC_API_VERSION)

    def __init__(self, topic, host=None):
        super(AgentManager, self).__init__(CONF)
        self.topic = topic
        self.host = host or CONF.host
        self.fpga_driver = FPGADriver()
        self.cond_api = cond_api.ConductorAPI()
        self.agent_api = AgentAPI()
        self.image_api = ImageAPI()
        self._rt = ResourceTracker(host, self.cond_api)

    def periodic_tasks(self, context, raise_on_error=False):
        return self.run_periodic_tasks(context, raise_on_error=raise_on_error)

    def hardware_list(self, context, values):
        """List installed hardware."""
        pass

    def fpga_program(self, context, deployable_uuid, image_uuid):
        """ Program a FPGA regoin, image can be a url or local file"""
        # TODO (Shaohe Feng) Get image from glance.
        # And add claim and rollback logical.
        path = self._download_bitstream(context, image_uuid)
        dep = self.cond_api.deployable_get(context, deployable_uuid)
        driver = self.fpga_driver.create(dep.vendor)
        driver.program(dep.address, path)

    def _download_bitstream(self, context, bitstream_uuid):
        """download the bistream

        :param context: the context
        :param bistream_uuid: v4 uuid of the bitstream to reprogram
        :returns: the path to bitstream downloaded, None if fail to download
        """
        download_path = "/tmp/" + bitstream_uuid + ".bin"
        self.image_api.download(context,
                                bitstream_uuid,
                                dest_path=download_path)
        return download_path

    @periodic_task.periodic_task(run_immediately=True)
    def update_available_resource(self, context, startup=True):
        """update all kinds of accelerator resources from their drivers."""
        self._rt.update_usage(context)
Beispiel #6
0
    def test_intel_discover(self):
        expect = [{
            'function':
            'pf',
            'assignable':
            False,
            'pr_num':
            '1',
            'vendor_id':
            '0x8086',
            'devices':
            '0000:5e:00.0',
            'regions': [{
                'function': 'vf',
                'assignable': True,
                'product_id': '0xbcc1',
                'name': 'intel-fpga-dev.2',
                'parent_devices': '0000:5e:00.0',
                'path': '%s/intel-fpga-dev.2' % sysinfo.SYS_FPGA,
                'vendor_id': '0x8086',
                'devices': '0000:5e:00.1'
            }],
            'name':
            'intel-fpga-dev.0',
            'parent_devices':
            '',
            'path':
            '%s/intel-fpga-dev.0' % sysinfo.SYS_FPGA,
            'product_id':
            '0xbcc0'
        }, {
            'function': 'pf',
            'assignable': True,
            'pr_num': '0',
            'vendor_id': '0x8086',
            'devices': '0000:be:00.0',
            'name': 'intel-fpga-dev.1',
            'parent_devices': '',
            'path': '%s/intel-fpga-dev.1' % sysinfo.SYS_FPGA,
            'product_id': '0xbcc0'
        }]
        expect.sort()

        intel = FPGADriver.create("intel")
        fpgas = intel.discover()
        fpgas.sort()
        self.assertEqual(2, len(fpgas))
        self.assertEqual(fpgas, expect)
Beispiel #7
0
class AgentManager(periodic_task.PeriodicTasks):
    """Cyborg Agent manager main class.

    API version history:

    | 1.0 - Initial version.

    """

    RPC_API_VERSION = '1.0'
    target = messaging.Target(version=RPC_API_VERSION)

    def __init__(self, topic, host=None):
        super(AgentManager, self).__init__(CONF)
        self.topic = topic
        self.host = host or CONF.host
        self.fpga_driver = FPGADriver()
        self.cond_api = cond_api.ConductorAPI()
        self.agent_api = AgentAPI()
        self.image_api = ImageAPI()
        self._rt = ResourceTracker(host, self.cond_api)

    def periodic_tasks(self, context, raise_on_error=False):
        return self.run_periodic_tasks(context, raise_on_error=raise_on_error)

    def fpga_program(self, context, controlpath_id, bitstream_uuid,
                     driver_name):
        bitstream_uuid = str(bitstream_uuid)
        if not uuidutils.is_uuid_like(bitstream_uuid):
            raise exception.InvalidUUID(uuid=bitstream_uuid)
        download_path = tempfile.NamedTemporaryFile(suffix=".gbs",
                                                    prefix=bitstream_uuid)
        self.image_api.download(context,
                                bitstream_uuid,
                                dest_path=download_path.name)
        try:
            driver = self.fpga_driver.create(driver_name)
            ret = driver.program(controlpath_id, download_path.name)
            LOG.info('Driver program() API returned %s', ret)
        finally:
            LOG.debug('Remove tmp bitstream file: %s', download_path.name)
            os.remove(download_path.name)
        return ret

    @periodic_task.periodic_task(run_immediately=True)
    def update_available_resource(self, context, startup=True):
        """Update all kinds of accelerator resources from their drivers."""
        self._rt.update_usage(context)
Beispiel #8
0
    def test_intel_program(self, mock_popen):
        class p(object):
            returncode = 0

            def wait(self):
                pass

        b = "0x5e"
        d = "0x00"
        f = "0x0"
        expect_cmd = [
            'sudo', 'fpgaconf', '-b', b, '-d', d, '-f', f, '/path/image'
        ]
        mock_popen.return_value = p()
        intel = FPGADriver.create("intel")
        # program VF
        intel.program("0000:5e:00.1", "/path/image")
        mock_popen.assert_called_with(expect_cmd, stdout=subprocess.PIPE)

        # program PF
        intel.program("0000:5e:00.0", "/path/image")
        mock_popen.assert_called_with(expect_cmd, stdout=subprocess.PIPE)
Beispiel #9
0
class ResourceTracker(object):
    """Agent helper class for keeping track of resource usage when hardware
    Accelerator resources updated. Update the Deployable DB through conductor.
    """

    def __init__(self, host, cond_api):
        # FIXME (Shaohe) local cache for Accelerator.
        # Will fix it in next release.
        self.fpgas = None
        self.host = host
        self.conductor_api = cond_api
        self.fpga_driver = FPGADriver()

    @utils.synchronized(AGENT_RESOURCE_SEMAPHORE)
    def claim(self, context):
        pass

    def _fpga_compare_and_update(self, host_dev, acclerator):
        need_updated = False
        for k, v in DEPLOYABLE_HOST_MAPS.items():
            if acclerator[k] != host_dev[v]:
                need_updated = True
                acclerator[k] = host_dev[v]
        return need_updated

    def _gen_deployable_from_host_dev(self, host_dev):
        dep = {}
        for k, v in DEPLOYABLE_HOST_MAPS.items():
            dep[k] = host_dev[v]
        dep["host"] = self.host
        dep["version"] = DEPLOYABLE_VERSION
        dep["availability"] = "free"
        dep["uuid"] = uuidutils.generate_uuid()
        return dep

    @utils.synchronized(AGENT_RESOURCE_SEMAPHORE)
    def update_usage(self, context):
        """Update the resource usage and stats after a change in an
        instance
        """
        def create_deployable(fpgas, bdf, parent_uuid=None):
            fpga = fpgas[bdf]
            dep = self._gen_deployable_from_host_dev(fpga)
            # if parent_uuid:
            dep["parent_uuid"] = parent_uuid
            obj_dep = objects.Deployable(context, **dep)
            new_dep = self.conductor_api.deployable_create(context, obj_dep)
            return new_dep

        # NOTE(Shaohe Feng) need more agreement on how to keep consistency.
        fpgas = self._get_fpga_devices()
        bdfs = set(fpgas.keys())
        deployables = self.conductor_api.deployable_get_by_host(
            context, self.host)

        # NOTE(Shaohe Feng) when no "pcie_address" in deployable?
        accls = dict([(v["pcie_address"], v) for v in deployables])
        accl_bdfs = set(accls.keys())

        # Firstly update
        for mutual in accl_bdfs & bdfs:
            accl = accls[mutual]
            if self._fpga_compare_and_update(fpgas[mutual], accl):
                try:
                    self.conductor_api.deployable_update(context, accl)
                except RemoteError as e:
                    LOG.error(e)
        # Add
        new = bdfs - accl_bdfs
        new_pf = set([n for n in new if fpgas[n]["function"] == "pf"])
        for n in new_pf:
            new_dep = create_deployable(fpgas, n)
            accls[n] = new_dep
            sub_vf = set()
            if "regions" in n:
                sub_vf = set([sub["devices"] for sub in fpgas[n]["regions"]])
            for vf in sub_vf & new:
                new_dep = create_deployable(fpgas, vf, new_dep["uuid"])
                accls[vf] = new_dep
                new.remove(vf)
        for n in new - new_pf:
            p_bdf = fpgas[n]["parent_devices"]
            p_accl = accls[p_bdf]
            p_uuid = p_accl["uuid"]
            new_dep = create_deployable(fpgas, n, p_uuid)

        # Delete
        for obsolete in accl_bdfs - bdfs:
            try:
                self.conductor_api.deployable_delete(context, accls[obsolete])
            except RemoteError as e:
                LOG.error(e)
            del accls[obsolete]

    def _get_fpga_devices(self):

        def form_dict(devices, fpgas):
            for v in devices:
                fpgas[v["devices"]] = v
                if "regions" in v:
                    form_dict(v["regions"], fpgas)

        fpgas = {}
        vendors = self.fpga_driver.discover_vendors()
        for v in vendors:
            driver = self.fpga_driver.create(v)
            form_dict(driver.discover(), fpgas)
        return fpgas
Beispiel #10
0
 def test_program(self):
     d = FPGADriver()
     self.assertRaises(NotImplementedError, d.program, "path", "image")
Beispiel #11
0
 def test_discover(self):
     d = FPGADriver()
     self.assertRaises(NotImplementedError, d.discover)
Beispiel #12
0
 def test_create(self):
     FPGADriver.create("intel")
     self.assertRaises(LookupError, FPGADriver.create, "xilinx")
Beispiel #13
0
class ResourceTracker(object):
    """Agent helper class for keeping track of resource usage as instances
    are built and destroyed.
    """
    def __init__(self, host, cond_api):
        # FIXME (Shaohe) local cache for Accelerator.
        # Will fix it in next release.
        self.fpgas = None
        self.host = host
        self.conductor_api = cond_api
        self.fpga_driver = FPGADriver()

    @utils.synchronized(AGENT_RESOURCE_SEMAPHORE)
    def claim(self, context):
        pass

    def _fpga_compare_and_update(self, host_dev, acclerator):
        need_updated = False
        for k, v in DEPLOYABLE_HOST_MAPS.items():
            if acclerator[k] != host_dev[v]:
                need_updated = True
                acclerator[k] = host_dev[v]
        return need_updated

    def _gen_accelerator_for_deployable(self,
                                        context,
                                        name,
                                        vendor,
                                        productor,
                                        desc="",
                                        dev_type="pf",
                                        acc_type="FPGA",
                                        acc_cap="",
                                        remotable=0):
        """
        The type of the accelerator device, e.g GPU, FPGA, ...
        acc_type defines the usage of the accelerator, e.g Crypto
        acc_capability defines the specific capability, e.g AES
        """
        db_acc = {
            'deleted': False,
            'uuid': uuidutils.generate_uuid(),
            'name': name,
            'description': desc,
            'project_id': pecan.request.context.project_id,
            'user_id': pecan.request.context.user_id,
            'device_type': dev_type,
            'acc_type': acc_type,
            'acc_capability': acc_cap,
            'vendor_id': vendor,
            'product_id': productor,
            'remotable': remotable
        }

        acc = objects.Accelerator(context, **db_acc)
        acc = self.conductor_api.accelerator_create(context, acc)
        return acc

    def _gen_deployable_from_host_dev(self,
                                      host_dev,
                                      acc_id,
                                      parent_uuid=None,
                                      root_uuid=None):
        dep = {}
        for k, v in DEPLOYABLE_HOST_MAPS.items():
            dep[k] = host_dev[v]
        dep["host"] = self.host
        dep["version"] = DEPLOYABLE_VERSION
        dep["availability"] = "free"
        dep["uuid"] = uuidutils.generate_uuid()
        dep["parent_uuid"] = parent_uuid
        dep["root_uuid"] = root_uuid
        dep["accelerator_id"] = acc_id
        return dep

    @utils.synchronized(AGENT_RESOURCE_SEMAPHORE)
    def update_usage(self, context):
        """Update the resource usage and stats after a change in an
        instance
        """
        def create_deployable(fpgas, bdf, acc_id, parent_uuid=None):
            fpga = fpgas[bdf]
            dep = self._gen_deployable_from_host_dev(fpga, acc_id)
            # if parent_uuid:
            dep["parent_uuid"] = parent_uuid
            obj_dep = objects.Deployable(context, **dep)
            new_dep = self.conductor_api.deployable_create(context, obj_dep)
            return new_dep

        # NOTE(Shaohe Feng) need more agreement on how to keep consistency.
        fpgas = self._get_fpga_devices()
        bdfs = set(fpgas.keys())
        deployables = self.conductor_api.deployable_get_by_host(
            context, self.host)

        # NOTE(Shaohe Feng) when no "address" in deployable?
        accls = dict([(v["address"], v) for v in deployables])
        accl_bdfs = set(accls.keys())

        # Firstly update
        for mutual in accl_bdfs & bdfs:
            accl = accls[mutual]
            if self._fpga_compare_and_update(fpgas[mutual], accl):
                try:
                    self.conductor_api.deployable_update(context, accl)
                except RemoteError as e:
                    LOG.error(e)
        # Add
        new = bdfs - accl_bdfs
        new_pf = set([n for n in new if fpgas[n]["function"] == "pf"])
        for n in new_pf:
            fpga = fpgas[n]
            acc = self._gen_accelerator_for_deployable(
                context, fpga["name"], fpga["vendor_id"], fpga["product_id"],
                "FPGA device on %s" % self.host, "pf", "FPGA")
            new_dep = create_deployable(fpgas, n, acc.id)
            accls[n] = new_dep
            sub_vf = set()
            if "regions" in n:
                sub_vf = set([sub["devices"] for sub in fpgas[n]["regions"]])
            for vf in sub_vf & new:
                fpga = fpgas[n]
                acc = self._gen_accelerator_for_deployable(
                    context, fpga["name"], fpga["vendor_id"],
                    fpga["product_id"], "FPGA device on %s" % self.host, "vf",
                    "FPGA")
                new_dep = create_deployable(fpgas, vf, acc.id, new_dep["uuid"])
                accls[vf] = new_dep
                new.remove(vf)
        for n in new - new_pf:
            p_bdf = fpgas[n]["parent_devices"]
            p_accl = accls[p_bdf]
            p_uuid = p_accl["uuid"]
            fpga = fpgas[n]
            acc = self._gen_accelerator_for_deployable(
                context, fpga["name"], fpga["vendor_id"], fpga["product_id"],
                "FPGA device on %s" % self.host, "pf", "FPGA")
            new_dep = create_deployable(fpgas, n, acc.id, p_uuid)

        # Delete
        for obsolete in accl_bdfs - bdfs:
            try:
                self.conductor_api.deployable_delete(context, accls[obsolete])
            except RemoteError as e:
                LOG.error(e)
            del accls[obsolete]

    def _get_fpga_devices(self):
        def form_dict(devices, fpgas):
            for v in devices:
                fpgas[v["devices"]] = v
                if "regions" in v:
                    form_dict(v["regions"], fpgas)

        fpgas = {}
        vendors = self.fpga_driver.discover_vendors()
        for v in vendors:
            driver = self.fpga_driver.create(v)
            form_dict(driver.discover(), fpgas)
        return fpgas