Esempio n. 1
0
    cluster,
    constants,
    metrics,
    evaluator_metrics,
    mlflow,
    tensorboard,
    event,
    experiment,
    topologies
)

YARN_LOG_TRIES = 15

ExperimentFn = Callable[[], experiment.Experiment]

TASK_SPEC_NONE = topologies.single_server_topology()

logger = logging.getLogger(__name__)

here = os.path.dirname(__file__)


class SkeinCluster(NamedTuple):
    client: skein.Client
    app: skein.ApplicationClient
    tasks: List[Tuple[str, int]]
    event_listener: Thread
    events: Dict[str, Dict[str, str]]


class ContainerLogStatus(NamedTuple):
Esempio n. 2
0
from typing import Union, Dict, Callable, Optional

from tf_yarn import client
from tf_yarn import topologies
from tf_yarn.metrics import Metrics
from tf_yarn.tensorflow.experiment import Experiment
from tf_yarn.tensorflow.keras_experiment import KerasExperiment
from tf_yarn.tensorflow.metrics import _add_monitor_to_experiment

ExperimentFn = Callable[[], Experiment]
KerasExperimentFn = Callable[[], KerasExperiment]

DEFAULT_TASK_SPEC = topologies.single_server_topology()


def run_on_yarn(experiment_fn: Union[ExperimentFn, KerasExperimentFn],
                task_specs: Dict[str, topologies.TaskSpec] = DEFAULT_TASK_SPEC,
                *args,
                **kwargs) -> Optional[Metrics]:
    def _new_experiment_fn():
        return _add_monitor_to_experiment(experiment_fn())

    return client.run_on_yarn(_new_experiment_fn, task_specs, *args, **kwargs)
Esempio n. 3
0
def test_single_server_topology():
    with pytest.raises(ValueError):
        topologies.single_server_topology(memory=MAX_MEMORY_CONTAINER + 1)
    with pytest.raises(ValueError):
        topologies.single_server_topology(vcores=MAX_VCORES_CONTAINER + 1)