Ejemplos de PytorchSpecification en Python

Lenguaje de programación: Python

Namespace/Package Name: polyaxon_schemas.polyaxonfile.specification.frameworks

Ejemplos en hotexamples.com: 8

Python PytorchSpecification - 8 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de polyaxon_schemas.polyaxonfile.specification.frameworks.PytorchSpecification extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

get_worker_resources(3)

get_worker_affinities(2)

get_worker_node_selectors(2)

get_worker_tolerations(2)

get_cluster_def(1)

get_total_resources(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: experiment.py Proyecto: gzcf/polyaxon-schemas

    def cluster_def(self):
        cluster = {
            TaskType.MASTER: 1,
        }
        is_distributed = False
        environment = self.environment

        if not environment:
            return cluster, is_distributed

        if environment.tensorflow:
            return TensorflowSpecification.get_cluster_def(
                cluster=cluster, tensorflow_config=environment.tensorflow)
        if environment.horovod:
            return HorovodSpecification.get_cluster_def(
                cluster=cluster, horovod_config=environment.horovod)
        if environment.mxnet:
            return MXNetSpecification.get_cluster_def(
                cluster=cluster, mxnet_config=environment.mxnet)
        if environment.pytorch:
            return PytorchSpecification.get_cluster_def(
                cluster=cluster, pytorch_config=environment.pytorch)

        # No specified framework, It should return default standalone mode cluster definition
        return cluster, is_distributed

Ejemplo n.º 2

Mostrar archivo

Archivo: experiment_scheduler.py Proyecto: whmnoe4j/polyaxon

def handle_pytorch_experiment(experiment, spawner, response):
    # Get the number of jobs this experiment started
    master = response[TaskType.MASTER]
    job_uuid = master['pod']['metadata']['labels']['job_uuid']
    job_uuid = uuid.UUID(job_uuid)

    create_job(job_uuid=job_uuid,
               experiment=experiment,
               definition=get_job_definition(master),
               resources=spawner.spec.master_resources)

    cluster, is_distributed, = spawner.spec.cluster_def
    worker_resources = PytorchSpecification.get_worker_resources(
        environment=spawner.spec.environment,
        cluster=cluster,
        is_distributed=is_distributed)

    for i, worker in enumerate(response[TaskType.WORKER]):
        job_uuid = worker['pod']['metadata']['labels']['job_uuid']
        job_uuid = uuid.UUID(job_uuid)
        create_job(job_uuid=job_uuid,
                   experiment=experiment,
                   definition=get_job_definition(worker),
                   role=TaskType.WORKER,
                   resources=worker_resources.get(i))

Ejemplo n.º 3

Mostrar archivo

Archivo: experiment_scheduler.py Proyecto: ttsvetanov/polyaxon

def handle_pytorch_experiment(experiment, spawner, response):
    # Get the number of jobs this experiment started
    master = response[TaskType.MASTER]
    job_uuid = master['pod']['metadata']['labels']['job_uuid']
    job_uuid = uuid.UUID(job_uuid)

    create_job(job_uuid=job_uuid,
               experiment=experiment,
               definition=get_job_definition(master),
               resources=spawner.spec.master_resources)

    cluster, is_distributed, = spawner.spec.cluster_def
    worker_resources = PytorchSpecification.get_worker_resources(
        environment=spawner.spec.environment,
        cluster=cluster,
        is_distributed=is_distributed
    )

    for i, worker in enumerate(response[TaskType.WORKER]):
        job_uuid = worker['pod']['metadata']['labels']['job_uuid']
        job_uuid = uuid.UUID(job_uuid)
        create_job(job_uuid=job_uuid,
                   experiment=experiment,
                   definition=get_job_definition(worker),
                   role=TaskType.WORKER,
                   resources=worker_resources.get(i))

Ejemplo n.º 4

Mostrar archivo

Archivo: pytorch_spawner.py Proyecto: ttsvetanov/polyaxon

 def node_selectors(self):
     cluster, is_distributed, = self.spec.cluster_def
     worker_node_selectors = PytorchSpecification.get_worker_node_selectors(
         environment=self.spec.environment,
         cluster=cluster,
         is_distributed=is_distributed
     )
     return {
         TaskType.MASTER: {0: self.spec.master_node_selectors},
         TaskType.WORKER: worker_node_selectors,
     }

Ejemplo n.º 5

Mostrar archivo

 def node_selectors(self):
     cluster, is_distributed, = self.spec.cluster_def
     worker_node_selectors = PytorchSpecification.get_worker_node_selectors(
         environment=self.spec.environment,
         cluster=cluster,
         is_distributed=is_distributed)
     return {
         TaskType.MASTER: {
             0: self.spec.master_node_selectors
         },
         TaskType.WORKER: worker_node_selectors,
     }

Ejemplo n.º 6

Mostrar archivo

Archivo: pytorch_spawner.py Proyecto: innovia/polyaxon

 def affinities(self):
     cluster, is_distributed, = self.spec.cluster_def
     worker_affinities = PytorchSpecification.get_worker_affinities(
         environment=self.spec.environment,
         cluster=cluster,
         is_distributed=is_distributed)
     return {
         TaskType.MASTER: {
             0: self.spec.master_affinity
         },
         TaskType.WORKER: worker_affinities,
     }

Ejemplo n.º 7

Mostrar archivo

Archivo: experiment.py Proyecto: gzcf/polyaxon-schemas

    def total_resources(self):
        environment = self.environment

        if not environment:
            return None

        cluster, is_distributed = self.cluster_def

        # Check if any framework is defined
        if environment.tensorflow:
            return TensorflowSpecification.get_total_resources(
                master_resources=self.master_resources,
                environment=environment,
                cluster=cluster,
                is_distributed=is_distributed)

        if environment.horovod:
            return HorovodSpecification.get_total_resources(
                master_resources=self.master_resources,
                environment=environment,
                cluster=cluster,
                is_distributed=is_distributed)

        if environment.mxnet:
            return MXNetSpecification.get_total_resources(
                master_resources=self.master_resources,
                environment=environment,
                cluster=cluster,
                is_distributed=is_distributed)

        if environment.pytorch:
            return PytorchSpecification.get_total_resources(
                master_resources=self.master_resources,
                environment=environment,
                cluster=cluster,
                is_distributed=is_distributed)

        # default value is the master resources
        return self.master_resources

Ejemplo n.º 8

Mostrar archivo

    def test_distributed_pytorch_passes(self):
        plxfile = PolyaxonFile(
            os.path.abspath('tests/fixtures/distributed_pytorch_file.yml'))
        spec = plxfile.specification
        assert spec.version == 1
        assert spec.project.name == 'project1'
        assert isinstance(spec.settings, SettingsConfig)
        assert isinstance(spec.settings.logging, LoggingConfig)
        assert spec.settings.matrix is None
        assert isinstance(spec.environment, EnvironmentConfig)
        assert spec.is_runnable
        assert spec.framework == Frameworks.PYTORCH
        assert spec.environment.pytorch.n_workers == 5

        assert isinstance(spec.environment.resources, PodResourcesConfig)
        assert isinstance(spec.environment.resources.cpu, K8SResourcesConfig)
        assert spec.environment.resources.cpu.requests == 1
        assert spec.environment.resources.cpu.limits == 2

        assert isinstance(spec.environment.pytorch.default_worker_resources,
                          PodResourcesConfig)
        assert isinstance(
            spec.environment.pytorch.default_worker_resources.cpu,
            K8SResourcesConfig)
        assert spec.environment.pytorch.default_worker_resources.cpu.requests == 3
        assert spec.environment.pytorch.default_worker_resources.cpu.limits == 3
        assert isinstance(
            spec.environment.pytorch.default_worker_resources.memory,
            K8SResourcesConfig)
        assert spec.environment.pytorch.default_worker_resources.memory.requests == 256
        assert spec.environment.pytorch.default_worker_resources.memory.limits == 256

        assert isinstance(spec.environment.pytorch.worker_resources[0],
                          PodResourcesConfig)
        assert isinstance(spec.environment.pytorch.worker_resources[0].memory,
                          K8SResourcesConfig)
        assert spec.environment.pytorch.worker_resources[0].index == 3
        assert spec.environment.pytorch.worker_resources[
            0].memory.requests == 300
        assert spec.environment.pytorch.worker_resources[
            0].memory.limits == 300

        # check that properties for return list of configs and resources is working
        cluster, is_distributed = spec.cluster_def
        worker_resources = PytorchSpecification.get_worker_resources(
            environment=spec.environment,
            cluster=cluster,
            is_distributed=is_distributed)
        assert len(worker_resources) == spec.environment.pytorch.n_workers
        assert set(worker_resources.values()) == {
            spec.environment.pytorch.default_worker_resources,
            spec.environment.pytorch.worker_resources[0]
        }

        # Check total resources
        assert spec.total_resources == {
            'cpu': {
                'requests': 1 + 3 * 4,
                'limits': 2 + 3 * 4
            },
            'memory': {
                'requests': 300 + 256 * 4,
                'limits': 300 + 256 * 4
            },
            'gpu': None
        }

        assert spec.cluster_def == ({
            TaskType.MASTER: 1,
            TaskType.WORKER: 5
        }, True)