Exemple #1
0
    def test_remote_worker(self):
        experiment_name = 'test_remote_worker_' + str(uuid.uuid4())
        queue_name = experiment_name
        logger = logs.get_logger('test_remote_worker')
        logger.setLevel(10)

        pw = subprocess.Popen([
            'studio-start-remote-worker', '--queue=' + queue_name,
            '--single-run', '--no-cache', '--timeout=30',
            '--image=peterzhokhoff/studioml'
        ],
                              stdout=subprocess.PIPE,
                              stderr=subprocess.STDOUT)

        stubtest_worker(self,
                        experiment_name=experiment_name,
                        runner_args=['--queue=' + queue_name, '--force-git'],
                        config_name='test_config_http_client.yaml',
                        test_script='tf_hello_world.py',
                        script_args=['arg0'],
                        expected_output='[ 2.0 6.0 ]',
                        queue=PubsubQueue(queue_name))

        workerout, _ = pw.communicate()
        if workerout:
            logger.debug("studio-start-remote-worker output: \n" +
                         str(workerout))
Exemple #2
0
    def test_remote_worker_co(self):
        logger = logs.get_logger('test_remote_worker_co')
        logger.setLevel(10)

        tmpfile = os.path.join(tempfile.gettempdir(), str(uuid.uuid4()))

        random_str = str(uuid.uuid4())
        with open(tmpfile, 'w') as f:
            f.write(random_str)

        experiment_name = 'test_remote_worker_co_' + str(uuid.uuid4())
        queue_name = experiment_name
        pw = subprocess.Popen([
            'studio-start-remote-worker', '--queue=' + queue_name,
            '--single-run', '--no-cache', '--image=peterzhokhoff/studioml'
        ],
                              stdout=subprocess.PIPE,
                              stderr=subprocess.STDOUT)

        stubtest_worker(self,
                        experiment_name=experiment_name,
                        runner_args=[
                            '--capture-once=' + tmpfile + ':f',
                            '--queue=' + queue_name, '--force-git'
                        ],
                        config_name='test_config_http_client.yaml',
                        test_script='art_hello_world.py',
                        script_args=[],
                        expected_output=random_str,
                        queue=PubsubQueue(queue_name))

        workerout, _ = pw.communicate()
        logger.debug('studio-start-remote-worker output: \n' + str(workerout))

        os.remove(tmpfile)
Exemple #3
0
    def test_remote_worker_c(self):
        tmpfile = os.path.join(tempfile.gettempdir(), str(uuid.uuid4()))

        logger = logs.get_logger('test_remote_worker_c')
        logger.setLevel(10)
        experiment_name = "test_remote_worker_c_" + str(uuid.uuid4())

        random_str1 = str(uuid.uuid4())
        with open(tmpfile, 'w') as f:
            f.write(random_str1)

        random_str2 = str(uuid.uuid4())

        queue_name = experiment_name
        pw = subprocess.Popen([
            'studio-start-remote-worker', '--queue=' + queue_name,
            '--single-run', '--no-cache', '--image=peterzhokhoff/studioml'
        ],
                              stdout=subprocess.PIPE,
                              stderr=subprocess.STDOUT)

        db = stubtest_worker(self,
                             experiment_name=experiment_name,
                             runner_args=[
                                 '--capture=' + tmpfile + ':f',
                                 '--queue=' + queue_name, '--force-git'
                             ],
                             config_name='test_config_http_client.yaml',
                             test_script='art_hello_world.py',
                             script_args=[random_str2],
                             expected_output=random_str1,
                             queue=PubsubQueue(queue_name),
                             delete_when_done=False)

        workerout, _ = pw.communicate()
        if workerout:
            logger.debug("studio-start-remote-worker output: \n" +
                         str(workerout))
        os.remove(tmpfile)

        tmppath = os.path.join(tempfile.gettempdir(), str(uuid.uuid4()))
        if os.path.exists(tmppath):
            os.remove(tmppath)

        db.get_artifact(db.get_experiment(experiment_name).artifacts['f'],
                        tmppath,
                        only_newer=False)

        with open(tmppath, 'r') as f:
            self.assertEquals(f.read(), random_str2)
        os.remove(tmppath)
        db.delete_experiment(experiment_name)
Exemple #4
0
    def test_unfold_tuples(self):
        logger = logs.get_logger('test_stop_experiment')
        h = HyperparameterParser(RunnerArgs(), logger)

        hyperparams = [Hyperparameter(name='a', values=[1, 2, 3]),
                       Hyperparameter(name='b', values=[4, 5])]

        expected_tuples = [
            {'a': 1, 'b': 4}, {'a': 2, 'b': 4}, {'a': 3, 'b': 4},
            {'a': 1, 'b': 5}, {'a': 2, 'b': 5}, {'a': 3, 'b': 5}]

        self.assertEqual(
            sorted(h.convert_to_tuples(hyperparams), key=lambda x: str(x)),
            sorted(expected_tuples, key=lambda x: str(x)))
Exemple #5
0
    def test_parse_range(self):
        logger = logs.get_logger('test_stop_experiment')
        h = HyperparameterParser(RunnerArgs(), logger)
        range_strs = ['1,2,3', ':5', '2:5', '0.1:0.05:0.3', '0.1:3:0.3',
                      '0.01:4l:10']
        gd_truths = [
            [
                1.0, 2.0, 3.0], [
                0.0, 1.0, 2.0, 3.0, 4.0, 5.0], [
                2.0, 3.0, 4.0, 5.0], [
                    0.1, 0.15, 0.2, 0.25, 0.3], [
                        0.1, 0.2, 0.3], [
                            0.01, 0.1, 1, 10]]

        for range_str, gd_truth in zip(range_strs, gd_truths):
            hyperparameter = h._parse_grid("test", range_str)
            self.assertTrue(np.isclose(hyperparameter.values, gd_truth).all())
Exemple #6
0
    def __init__(
        self,
        # Name of experiment
        experimentId,
        # Completion service configuration
        cs_config=None,
        # used to pass a studioML configuration block read by client software
        studio_config=None,
        # Studio config yaml file
        studio_config_file=None,
        shutdown_del_queue=False
    ):
        # StudioML configuration
        self.config = model.get_config(studio_config_file)

        self.logger = logs.get_logger(self.__class__.__name__)
        self.verbose_level = parse_verbosity(self.config['verbose'])
        self.logger.setLevel(self.verbose_level)

        # Setup Completion Service instance properties
        # based on configuration
        self.experimentId = experimentId
        self.project_name = "completion_service_" + experimentId

        self.resumable = RESUMABLE
        self.clean_queue = CLEAN_QUEUE
        self.queue_upscaling = QUEUE_UPSCALING
        self.num_workers = int(cs_config.get('num_workers', 1))
        self.cloud_timeout = cs_config.get('timeout')
        self.bid = cs_config.get('bid')
        self.ssh_keypair = cs_config.get('ssh_keypair')
        self.sleep_time = cs_config.get('sleep_time')
        self.shutdown_del_queue = shutdown_del_queue

        # Figure out request for resources:
        resources_needed = cs_config.get('resources_needed')
        self.resources_needed = DEFAULT_RESOURCES_NEEDED
        self.resources_needed.update(resources_needed)
        studio_resources = self.config.get('resources_needed')
        if studio_resources:
            self.resources_needed.update(studio_resources)

        # Figure out task queue and cloud we are going to use:
        queue_name = cs_config.get('queue')
        cloud_name = cs_config.get('cloud')
        if cs_config.get('local'):
            queue_name = None
            cloud_name = None
        elif queue_name is not None:
            self.shutdown_del_queue = False
            if cloud_name in ['ec2spot', 'ec2']:
                assert queue_name.startswith("sqs_")
        else:
            queue_name = self.experiment_id
            if cloud_name in ['ec2spot', 'ec2']:
                queue_name = "sqs_" + queue_name
        self.cloud = cloud_name
        if queue_name is not None and queue_name.startswith("rmq_"):
            assert self.cloud is None

        self.wm = model.get_worker_manager(
            self.config, self.cloud)

        if queue_name is not None:
            self.logger.info(
                "CompletionService configured with queue {0}"
                    .format(queue_name))

        self.queue = model.get_queue(queue_name=queue_name, cloud=self.cloud,
                                      config=self.config,
                                      logger=self.logger,
                                      verbose=self.verbose_level)

        self.queue_name = self.queue.get_name()

        self.submitted = {}
        self.use_spot = cloud_name in ['ec2spot', 'gcspot']

        self.logger.info("Project name: {0}".format(self.project_name))
        self.logger.info("Initial/final queue name: {0}, {1}"
                         .format(queue_name, self.queue_name))
        self.logger.info("Cloud name: {0}".format(self.cloud))
Exemple #7
0
def main(args=sys.argv):
    logger = logs.get_logger('studio-remote-worker')
    parser = argparse.ArgumentParser(description='Studio remote worker. \
                     Usage: studio-remote-worker \
                     ')
    parser.add_argument('--config', help='configuration file', default=None)

    parser.add_argument('--guest',
                        help='Guest mode (does not require db credentials)',
                        action='store_true')

    parser.add_argument(
        '--single-run',
        help='quit after a single run (regardless of the state of the queue)',
        action='store_true')

    parser.add_argument('--queue', help='queue name', required=True)
    parser.add_argument('--verbose',
                        '-v',
                        help='Verbosity level. Allowed vaules: ' +
                        'debug, info, warn, error, crit ' +
                        'or numerical value of logger levels.',
                        default=None)

    parser.add_argument(
        '--timeout',
        '-t',
        help='Timeout after which remote worker stops listening (in seconds)',
        type=int,
        default=100)

    parsed_args, script_args = parser.parse_known_args(args)
    verbose = parse_verbosity(parsed_args.verbose)
    logger.setLevel(verbose)

    config = None
    if parsed_args.config is not None:
        config = model.get_config(parsed_args.config)

    if parsed_args.queue.startswith('ec2_') or \
       parsed_args.queue.startswith('sqs_'):
        queue = SQSQueue(parsed_args.queue, verbose=verbose)
    elif parsed_args.queue.startswith('rmq_'):
        queue = get_cached_queue(name=parsed_args.queue,
                                 route='StudioML.' + parsed_args.queue,
                                 config=config,
                                 logger=logger,
                                 verbose=verbose)
    else:
        queue = PubsubQueue(parsed_args.queue, verbose=verbose)

    logger.info('Waiting for work')

    timeout_before = parsed_args.timeout
    timeout_after = timeout_before if timeout_before > 0 else 0
    # wait_for_messages(queue, timeout_before, logger)

    logger.info('Starting working')
    worker_loop(queue,
                parsed_args,
                single_experiment=parsed_args.single_run,
                timeout=timeout_after,
                verbose=verbose)
Exemple #8
0
def get_logger():
    global logger
    if not logger:
        logger = logs.get_logger('studio-serve')
        logger.setLevel(logs.DEBUG)
    return logger
import importlib
import shutil
import pickle
import os
import sys
import six
import signal
import pdb

from studio import fs_tracker, model, logs, util

logger = logs.get_logger('completion_service_client')
try:
    logger.setLevel(model.parse_verbosity(sys.argv[1]))
except BaseException:
    logger.setLevel(10)


def main():
    logger.debug('copying and importing client module')
    logger.debug('getting file mappings')

    # Register signal handler for signal.SIGUSR1
    # which will invoke built-in Python debugger:
    signal.signal(signal.SIGUSR1, lambda sig, stack: pdb.set_trace())

    artifacts = fs_tracker.get_artifacts()
    files = {}
    logger.debug("Artifacts = {}".format(artifacts))

    for tag, path in six.iteritems(artifacts):
Exemple #10
0
import time
from studio import logs

logger = logs.get_logger('helloworld')
logger.setLevel(10)

i = 0
while True:
    logger.info('{} seconds passed '.format(i))
    time.sleep(1)
    i += 1
Exemple #11
0
    def test_baked_image(self):

        # create a docker image with baked in credentials
        # and run a remote worker tests with it
        logger = logs.get_logger('test_baked_image')
        logger.setLevel(logs.DEBUG)

        # check if docker is installed
        dockertestp = subprocess.Popen(['docker'],
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.STDOUT)

        dockertestout, _ = dockertestp.communicate()
        if dockertestout:
            logger.info("docker test output: \n" + str(dockertestout))

        if dockertestp.returncode != 0:
            logger.error("docker is not installed (correctly)")
            return

        image = 'test_image' + str(uuid.uuid4())

        addcredsp = subprocess.Popen(
            [
                'studio-add-credentials', '--tag=' + image,
                '--base-image=peterzhokhoff/studioml'
            ],
            # stdout=subprocess.PIPE,
            # stderr=subprocess.STDOUT
        )

        addcredsout, _ = addcredsp.communicate()
        if addcredsout:
            logger.info('studio-add-credentials output: \n' + str(addcredsout))
        if addcredsp.returncode != 0:
            logger.error("studio-add-credentials failed.")
            self.assertTrue(False)

        experiment_name = 'test_remote_worker_baked' + str(uuid.uuid4())
        queue_name = experiment_name
        logger = logs.get_logger('test_baked_image')
        logger.setLevel(10)

        pw = subprocess.Popen(
            [
                'studio-start-remote-worker', '--queue=' + queue_name,
                '--no-cache', '--single-run', '--timeout=30',
                '--image=' + image
            ],
            # stdout=subprocess.PIPE,
            # stderr=subprocess.STDOUT
        )

        stubtest_worker(self,
                        experiment_name=experiment_name,
                        runner_args=['--queue=' + queue_name, '--force-git'],
                        config_name='test_config_http_client.yaml',
                        test_script='tf_hello_world.py',
                        script_args=['arg0'],
                        expected_output='[ 2.0 6.0 ]',
                        queue=PubsubQueue(queue_name))

        workerout, _ = pw.communicate()
        if workerout:
            logger.debug("studio-start-remote-worker output: \n" +
                         str(workerout))

        rmip = subprocess.Popen(['docker', 'rmi', image],
                                stdout=subprocess.PIPE,
                                stderr=subprocess.STDOUT)

        rmiout, _ = rmip.communicate()

        if rmiout:
            logger.info('docker rmi output: \n' + str(rmiout))