def test_experiment_lifetime(self):
        my_path = os.path.dirname(os.path.realpath(__file__))

        logger = logs.getLogger('test_experiment_lifetime')
        logger.setLevel(10)

        config_name = os.path.join(my_path, 'test_config_http_client.yaml')
        key = 'test_experiment_lifetime' + str(uuid.uuid4())

        with model.get_db_provider(model.get_config(config_name)) as db:
            try:
                db.delete_experiment(key)
            except Exception:
                pass

            p = subprocess.Popen(['studio', 'run',
                                  '--config=' + config_name,
                                  '--experiment=' + key,
                                  '--force-git',
                                  '--verbose=debug',
                                  '--lifetime=-10m',
                                  'stop_experiment.py'],
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.STDOUT,
                                 cwd=my_path)

            pout, _ = p.communicate()

            if pout:
                logger.debug("studio run output: \n" + pout.decode())

            db.delete_experiment(key)
예제 #2
0
    def _test_serving(self, data_in, expected_data_out, wrapper=None):

        self.port = randint(5000, 9000)
        server_experimentid = 'test_serving_' + str(uuid.uuid4())

        with get_local_queue_lock():
            args = [
                'studio', 'run', '--force-git', '--verbose=debug',
                '--experiment=' + server_experimentid,
                '--config=' + self.get_config_path(), 'studio::serve_main',
                '--port=' + str(self.port), '--host=localhost'
            ]

            if wrapper:
                args.append('--wrapper=' + wrapper)

            subprocess.Popen(args, cwd=os.path.dirname(__file__))
            time.sleep(60)

        try:
            retval = requests.post(url='http://localhost:' + str(self.port),
                                   json=data_in)
            data_out = retval.json()
            assert data_out == expected_data_out

        finally:
            with model.get_db_provider(model.get_config(
                    self.get_config_path())) as db:

                db.stop_experiment(server_experimentid)
                time.sleep(20)
                db.delete_experiment(server_experimentid)
예제 #3
0
    def test_get_config_env(self):
        value1 = str(uuid.uuid4())
        os.environ['TEST_VAR1'] = value1
        value2 = str(uuid.uuid4())
        os.environ['TEST_VAR2'] = value2

        config = model.get_config(
            os.path.join(os.path.dirname(os.path.realpath(__file__)),
                         'test_config_env.yaml'))
        self.assertEquals(config['test_key'], value1)
        self.assertEquals(config['test_section']['test_key'], value2)
예제 #4
0
def main(args=sys.argv[1:]):
    parser = argparse.ArgumentParser(
        description='Studio WebUI server. \
                     Usage: studio \
                     <arguments>')

    parser.add_argument('--config', help='configuration file', default=None)
#    parser.add_argument('--guest',
#                        help='Guest mode (does not require db credentials)',
#                        action='store_true')

    parser.add_argument('--port',
                        help='port to run Flask server on',
                        type=int,
                        default=5000)

    parser.add_argument('--host',
                        help='host name.',
                        default='localhost')

    parser.add_argument(
        '--verbose', '-v',
        help='Verbosity level. Allowed vaules: ' +
             'debug, info, warn, error, crit ' +
             'or numerical value of logger levels.',
        default=None)

    args = parser.parse_args(args)
    config = model.get_config()
    if args.config:
        with open(args.config) as f:
            config = yaml.load(f, Loader=yaml.FullLoader)

    if args.verbose:
        config['verbose'] = args.verbose

#    if args.guest:
#        config['database']['guest'] = True
    global _config
    global _db_provider
    _config = config
    _db_provider = model.get_db_provider(_config)

    getlogger().setLevel(parse_verbosity(config.get('verbose')))

    global _save_auth_cookie
    _save_auth_cookie = True

    print('Starting Studio UI on port {0}'.format(args.port))
    app.run(host=args.host, port=args.port)
    def test_stop_experiment(self):
        my_path = os.path.dirname(os.path.realpath(__file__))

        logger = logs.getLogger('test_stop_experiment')
        logger.setLevel(10)

        config_name = os.path.join(my_path, 'test_config_http_client.yaml')
        key = 'test_stop_experiment' + str(uuid.uuid4())

        with model.get_db_provider(model.get_config(config_name)) as db:
            try:
                db.delete_experiment(key)
            except Exception:
                pass

            p = subprocess.Popen(['studio', 'run',
                                  '--config=' + config_name,
                                  '--experiment=' + key,
                                  '--force-git',
                                  '--verbose=debug',
                                  'stop_experiment.py'],
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.STDOUT,
                                 cwd=my_path)

            # wait till experiment spins up
            experiment = None
            while experiment is None or experiment.status == 'waiting':
                time.sleep(1)
                try:
                    experiment = db.get_experiment(key)
                except BaseException:
                    pass

            logger.info('Stopping experiment')
            db.stop_experiment(key)
            pout, _ = p.communicate()

            if pout:
                logger.debug("studio run output: \n" + pout.decode())

            db.delete_experiment(key)
예제 #6
0
    def __init__(
        self,
        # Name of experiment
        experimentId,
        # Config yaml file
        config=None,
        # Number of remote workers to spin up
        num_workers=1,
        # Compute requirements, amount of RAM, GPU, etc
        resources_needed={},
        # Name of the queue for submission to a server.
        queue=None,
        # What computer resource to use, either AWS, Google, or local
        cloud=None,
        # Timeout for cloud instances
        cloud_timeout=100,
        # Bid price for EC2 spot instances
        bid='100%',
        # Keypair to use for EC2 workers
        ssh_keypair=None,
        # If true, get results that are submitted by other instances of CS
        resumable=False,
        # Whether to clean the submission queue on initialization
        clean_queue=True,
        # Whether to enable autoscaling for EC2 instances
        queue_upscaling=True,
        # Whether to delete the queue on shutdown
        shutdown_del_queue=False,
        # delay between queries for results
        sleep_time=1
    ):

        self.config = model.get_config(config)
        self.cloud = cloud
        self.experimentId = experimentId
        self.project_name = "completion_service_" + experimentId

        self.resources_needed = DEFAULT_RESOURCES_NEEDED
        if self.config.get('resources_needed'):
            self.resources_needed.update(self.config.get('resources_needed'))

        self.resources_needed.update(resources_needed)

        self.wm = runner.get_worker_manager(
            self.config, self.cloud)

        self.logger = logs.getLogger(self.__class__.__name__)
        self.verbose_level = model.parse_verbosity(self.config['verbose'])
        self.logger.setLevel(self.verbose_level)

        self.queue = runner.get_queue(queue, self.cloud,
                                      self.verbose_level)

        self.queue_name = self.queue.get_name()

        self.clean_queue = clean_queue
        if self.clean_queue:
            self.queue.clean()

        self.cloud_timeout = cloud_timeout
        self.bid = bid
        self.ssh_keypair = ssh_keypair

        self.submitted = set([])
        self.num_workers = num_workers
        self.resumable = resumable
        self.queue_upscaling = queue_upscaling
        self.shutdown_del_queue = shutdown_del_queue
        self.use_spot = cloud in ['ec2spot', 'gcspot']
        self.sleep_time = sleep_time
예제 #7
0
def get_config():
    global _config
    if _config is None:
        _config = model.get_config()
    return _config
예제 #8
0
import requests
from requests.exceptions import ChunkedEncodingError

from .experiment import experiment_from_dict
from .auth import get_and_verify_user, get_auth
from .util import parse_verbosity
from studio.util import logs

app = Flask(__name__)


DB_PROVIDER_EXPIRATION = 1800

_db_provider_timestamp = None
_db_provider = None
_config = model.get_config()

_tensorboard_dirs = {}
_save_auth_cookie = False

logger = None


@app.route('/')
def dashboard():
    return _render('dashboard.html')


@app.route('/projects')
def projects():
    return _render('projects.html')
def stubtest_worker(
        testclass,
        experiment_name,
        runner_args,
        config_name,
        test_script,
        expected_output,
        script_args=[],
        queue=LocalQueue(),
        wait_for_experiment=True,
        delete_when_done=True,
        test_output=True,
        test_workspace=True):

    my_path = os.path.dirname(os.path.realpath(__file__))
    config_name = os.path.join(my_path, config_name)
    logger = logs.getLogger('stubtest_worker')
    logger.setLevel(10)

    queue.clean()

    with model.get_db_provider(model.get_config(config_name)) as db:
        try:
            db.delete_experiment(experiment_name)
        except Exception:
            pass

    os.environ['PYTHONUNBUFFERED'] = 'True'
    p = subprocess.Popen(['studio', 'run'] + runner_args +
                         ['--config=' + config_name,
                          '--verbose=debug',
                          '--force-git',
                          '--experiment=' + experiment_name,
                          test_script] + script_args,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.STDOUT,
                         close_fds=True,
                         cwd=my_path)

    pout, _ = p.communicate()

    if pout:
        logger.debug("studio run output: \n" + sixdecode(pout))
        splitpout = sixdecode(pout).split('\n')
        experiments = [line.split(' ')[-1] for line in splitpout
                       if line.startswith('studio run: submitted experiment')]
        logger.debug("added experiments: {}".format(experiments))

    db = model.get_db_provider(model.get_config(config_name))
    experiment_name = experiments[0]

    try:
        experiment = db.get_experiment(experiment_name)
        if wait_for_experiment:
            while not experiment or not experiment.status == 'finished':
                experiment = db.get_experiment(experiment_name)

        if test_output:
            with open(db.get_artifact(experiment.artifacts['output']),
                      'r') as f:
                data = f.read()
                split_data = data.strip().split('\n')
                print(data)
                testclass.assertEquals(split_data[-1], expected_output)

        if test_workspace:
            check_workspace(testclass, db, experiment_name)

        if delete_when_done:
            retry(lambda: db.delete_experiment(experiment_name), sleep_time=10)

        return db

    except Exception as e:
        print("Exception {} raised during test".format(e))
        print("worker output: \n {}".format(pout))
        print("Exception trace:")
        print(traceback.format_exc())
        raise e
예제 #10
0
def main(args=sys.argv[1:]):
    logger = logs.get_logger('studio-runner')
    parser = argparse.ArgumentParser(description='Studio runner. \
                     Usage: studio run <runner_arguments> \
                     script <script_arguments>')
    parser.add_argument('--config', help='configuration file', default=None)
    parser.add_argument('--project', help='name of the project', default=None)
    parser.add_argument('--experiment',
                        '-e',
                        help='Name of the experiment. If none provided, ' +
                        'random uuid will be generated',
                        default=None)

    parser.add_argument('--guest',
                        help='Guest mode (does not require db credentials)',
                        action='store_true')

    parser.add_argument(
        '--force-git',
        help='If run in a git directory, force running the experiment ' +
        'even if changes are not commited',
        action='store_true')

    parser.add_argument('--gpus',
                        help='Number of gpus needed to run the experiment',
                        type=int,
                        default=None)

    parser.add_argument('--cpus',
                        help='Number of cpus needed to run the experiment' +
                        ' (used to configure cloud instance)',
                        type=int,
                        default=None)

    parser.add_argument('--ram',
                        help='Amount of RAM needed to run the experiment' +
                        ' (used to configure cloud instance), ex: 10G, 10GB',
                        default=None)

    parser.add_argument('--gpuMem',
                        help='Amount of GPU RAM needed to run the experiment',
                        default=None)

    parser.add_argument(
        '--hdd',
        help='Amount of hard drive space needed to run the experiment' +
        ' (used to configure cloud instance), ex: 10G, 10GB',
        default=None)

    parser.add_argument('--queue',
                        '-q',
                        help='Name of the remote execution queue',
                        default=None)

    parser.add_argument(
        '--cloud',
        help='Cloud execution mode. Could be gcloud, gcspot, ec2 or ec2spot',
        default=None)

    parser.add_argument(
        '--bid',
        help='Spot instance price bid, specified in USD or in percentage ' +
        'of on-demand instance price. Default is %(default)s',
        default='100%')

    parser.add_argument(
        '--capture-once',
        '-co',
        help='Name of the immutable artifact to be captured. ' +
        'It will be captured once before the experiment is run',
        default=[],
        action='append')

    parser.add_argument(
        '--capture',
        '-c',
        help='Name of the mutable artifact to be captured continuously',
        default=[],
        action='append')

    parser.add_argument(
        '--reuse',
        '-r',
        help='Name of the artifact from another experiment to use',
        default=[],
        action='append')

    parser.add_argument('--verbose',
                        '-v',
                        help='Verbosity level. Allowed values: ' +
                        'debug, info, warn, error, crit ' +
                        'or numerical value of logger levels.',
                        default=None)

    parser.add_argument(
        '--metric',
        help='Metric to show in the summary of the experiment, ' +
        'and to base hyperparameter search on. ' +
        'Refers a scalar value in tensorboard log ' +
        'example: --metric=val_loss[:final | :min | :max] to report ' +
        'validation loss in the end of the keras experiment ' +
        '(or smallest or largest throughout the experiment for :min ' +
        'and :max respectively)',
        default=None)

    parser.add_argument(
        '--hyperparam',
        '-hp',
        help='Try out multiple values of a certain parameter. ' +
        'For example, --hyperparam=learning_rate:0.01:0.1:l10 ' +
        'will instantiate 10 versions of the script, replace ' +
        'learning_rate with a one of the 10 values for learning ' +
        'rate that lies on a log grid from 0.01 to 0.1, create '
        'experiments and place them in the queue.',
        default=[],
        action='append')

    parser.add_argument('--num-workers',
                        help='Number of local or cloud workers to spin up',
                        type=int,
                        default=None)

    parser.add_argument(
        '--python-pkg',
        help='Python package not present in the current environment ' +
        'that is needed for experiment. Only compatible with ' +
        'remote and cloud workers for now',
        default=[],
        action='append')

    parser.add_argument(
        '--ssh-keypair',
        help='Name of the SSH keypair used to access the EC2 ' +
        'instances directly',
        default=None)

    parser.add_argument(
        '--optimizer',
        '-opt',
        help='Name of optimizer to use, by default is grid search. ' +
        'The name of the optimizer must either be in ' +
        'studio/optimizer_plugins ' +
        'directory or the path to the optimizer source file ' +
        'must be supplied. ',
        default='grid')

    parser.add_argument(
        '--cloud-timeout',
        help="Time (in seconds) that cloud workers wait for messages. " +
        "If negative, " +
        "wait for the first message in the queue indefinitely " +
        "and shut down " + "as soon as no new messages are available. " +
        "If zero, don't wait at all." + "Default value is %(default)d",
        type=int,
        default=300)

    parser.add_argument('--user-startup-script',
                        help='Path of script to run immediately ' +
                        'before running the remote worker',
                        default=None)

    parser.add_argument(
        '--branch',
        help='Branch of studioml to use when running remote worker, useful ' +
        'for debugging pull requests. Default is current',
        default=None)

    parser.add_argument(
        '--max-duration',
        help='Max experiment runtime (i.e. time after which experiment ' +
        'should be killed no matter what.).  Examples of values ' +
        'might include 5h, 48h2m10s',
        default=None)

    parser.add_argument(
        '--lifetime',
        help='Max experiment lifetime (i.e. wait time after which ' +
        'experiment loses relevance and should not be started)' +
        '  Examples include 240h30m10s',
        default=None)

    parser.add_argument(
        '--container',
        help='Singularity container in which experiment should be run. ' +
        'Assumes that container has all dependencies installed',
        default=None)

    parser.add_argument('--port',
                        help='Ports to open on a cloud instance',
                        default=[],
                        action='append')

    # detect which argument is the script filename
    # and attribute all arguments past that index as related to the script
    (runner_args, other_args) = parser.parse_known_args(args)
    py_suffix_args = [
        i for i, arg in enumerate(args) if arg.endswith('.py') or '::' in arg
    ]

    rerun = False
    if len(py_suffix_args) < 1:
        print('None of the arugments end with .py')
        if len(other_args) == 0:
            print("Trying to run a container job")
            assert runner_args.container is not None
            exec_filename = None
        elif len(other_args) == 1:
            print("Treating last argument as experiment key to rerun")
            rerun = True
            experiment_key = args[-1]
        else:
            print("Too many extra arguments - should be either none " +
                  "for container job or one for experiment re-run")
            sys.exit(1)
    else:
        script_index = py_suffix_args[0]
        exec_filename, other_args = args[script_index], args[script_index + 1:]
        runner_args = parser.parse_args(args[:script_index])

    # TODO: Queue the job based on arguments and only then execute.

    config = model.get_config(runner_args.config)

    if runner_args.verbose:
        config['verbose'] = runner_args.verbose

    if runner_args.guest:
        config['database']['guest'] = True

    if runner_args.container:
        runner_args.capture_once.append(runner_args.container +
                                        ':_singularity')

    verbose = model.parse_verbosity(config['verbose'])
    logger.setLevel(verbose)

    if git_util.is_git() and not git_util.is_clean() and not rerun:
        logger.warn('Running from dirty git repo')
        if not runner_args.force_git:
            logger.error(
                'Specify --force-git to run experiment from dirty git repo')
            sys.exit(1)

    resources_needed = _parse_hardware(runner_args, config['resources_needed'])
    logger.debug('resources requested: ')
    logger.debug(str(resources_needed))

    # Set up default artifacts:
    # note that their "local" paths will be updated
    # on Experiment creation,
    # but they must have "local" field defined
    # to have storage credentials set up properly.
    artifacts = {
        'workspace': {
            'mutable': False,
            'local': os.getcwd(),
            'unpack': True
        },
        'modeldir': {
            'mutable': True,
            'local': '',
            'unpack': True
        },
        'retval': {
            'mutable': True,
            'local': '',
            'unpack': True
        },
        'output': {
            'mutable': True,
            'local': '',
            'unpack': True
        },
        'tb': {
            'mutable': True,
            'local': '',
            'unpack': True
        },
        '_metrics': {
            'mutable': True,
            'local': '',
            'unpack': True
        },
        '_metadata': {
            'mutable': True,
            'local': '',
            'unpack': True
        }
    }

    artifacts.update(_parse_artifacts(runner_args.capture, mutable=True))
    artifacts.update(_parse_artifacts(runner_args.capture_once, mutable=False))
    with model.get_db_provider(config) as db:
        artifacts.update(_parse_external_artifacts(runner_args.reuse, db))

    logger.debug("Task artifacts: %s", repr(artifacts))
    storage_creds = config.get('storage', {}).get(KEY_CREDENTIALS, None)
    _setup_artifacts_creds(artifacts, storage_creds)

    if runner_args.branch:
        config['cloud']['branch'] = runner_args.branch

    if runner_args.user_startup_script:
        config['cloud']['user_startup_script'] = \
            runner_args.user_startup_script

    if runner_args.lifetime:
        config['experimentLifetime'] = runner_args.lifetime

    queueLifetime = None

    if any(runner_args.hyperparam):
        if runner_args.optimizer == "grid":
            experiments = _add_hyperparam_experiments(exec_filename,
                                                      other_args, runner_args,
                                                      artifacts,
                                                      resources_needed, logger)

            queue = model.get_queue(queue_name=runner_args.queue,
                                    cloud=runner_args.cloud,
                                    config=config,
                                    close_after=queueLifetime,
                                    logger=logger,
                                    verbose=verbose)

            queue_name = submit_experiments(experiments,
                                            config=config,
                                            logger=logger,
                                            queue=queue)

            spin_up_workers(runner_args,
                            config,
                            resources_needed,
                            queue_name=queue_name,
                            verbose=verbose)
        else:
            opt_modulepath = os.path.join(
                os.path.dirname(os.path.abspath(__file__)),
                "optimizer_plugins", runner_args.optimizer + ".py")
            if not os.path.exists(opt_modulepath):
                opt_modulepath = os.path.abspath(
                    os.path.expanduser(runner_args.optimizer))
            logger.info('optimizer path: %s' % opt_modulepath)

            assert os.path.exists(opt_modulepath)
            sys.path.append(os.path.dirname(opt_modulepath))
            opt_module = importlib.import_module(
                os.path.basename(opt_modulepath.replace(".py", '')))

            h = HyperparameterParser(runner_args, logger)
            hyperparams = h.parse()
            optimizer = getattr(opt_module,
                                "Optimizer")(hyperparams, config['optimizer'],
                                             logger)

            workers_started = False
            queue_name = runner_args.queue
            while not optimizer.stop():
                hyperparam_pop = optimizer.ask()
                hyperparam_tuples = h.convert_to_tuples(hyperparam_pop)

                experiments = _add_hyperparam_experiments(
                    exec_filename,
                    other_args,
                    runner_args,
                    artifacts,
                    resources_needed,
                    logger,
                    optimizer=optimizer,
                    hyperparam_tuples=hyperparam_tuples)

                queue = model.get_queue(queue_name=queue_name,
                                        cloud=runner_args.cloud,
                                        config=config,
                                        close_after=queueLifetime,
                                        logger=logger,
                                        verbose=verbose)

                queue_name = submit_experiments(experiments,
                                                config=config,
                                                logger=logger,
                                                queue=queue)

                if not workers_started:
                    spin_up_workers(runner_args,
                                    config,
                                    resources_needed,
                                    queue_name=queue_name,
                                    verbose=verbose)
                    workers_started = True

                fitnesses, behaviors = get_experiment_fitnesses(
                    experiments, optimizer, config, logger)

                try:
                    optimizer.tell(hyperparam_pop, fitnesses, behaviors)
                except BaseException:
                    util.check_for_kb_interrupt()
                    optimizer.tell(hyperparam_pop, fitnesses)

                try:
                    optimizer.disp()
                except BaseException:
                    util.check_for_kb_interrupt()
                    logger.warn('Optimizer has no disp() method')
    else:
        if rerun:
            with model.get_db_provider(config) as db:
                experiment = db.get_experiment(experiment_key)
                new_key = runner_args.experiment if runner_args.experiment \
                    else experiment_key + '_rerun' + str(uuid.uuid4())
                experiment.key = new_key
                for _, art in six.iteritems(experiment.artifacts):
                    art['mutable'] = False

                experiments = [experiment]

        else:
            experiments = [
                create_experiment(filename=exec_filename,
                                  args=other_args,
                                  experiment_name=runner_args.experiment,
                                  project=runner_args.project,
                                  artifacts=artifacts,
                                  resources_needed=resources_needed,
                                  metric=runner_args.metric,
                                  max_duration=runner_args.max_duration,
                                  dependency_policy=StudioDependencyPolicy())
            ]

        queue = model.get_queue(queue_name=runner_args.queue,
                                cloud=runner_args.cloud,
                                config=config,
                                close_after=queueLifetime,
                                logger=logger,
                                verbose=verbose)

        queue_name = submit_experiments(experiments,
                                        config=config,
                                        logger=logger,
                                        queue=queue)

        spin_up_workers(runner_args,
                        config,
                        resources_needed,
                        queue_name=queue_name,
                        verbose=verbose)

    return
예제 #11
0
 def get_db_provider(self):
     config = model.get_config(self.client_config_file)
     config['database']['serverUrl'] = 'http://localhost:' + str(self.port)
     return model.get_db_provider(config)
예제 #12
0
 def get_db_provider(self, config_name):
     config_file = os.path.join(
         os.path.dirname(
             os.path.realpath(__file__)),
         config_name)
     return model.get_db_provider(model.get_config(config_file))
예제 #13
0
def stubtest_worker(testclass,
                    experiment_name,
                    runner_args,
                    config_name,
                    test_script,
                    expected_output,
                    script_args=[],
                    queue=LocalQueue(),
                    wait_for_experiment=True,
                    delete_when_done=True,
                    test_output=True):

    my_path = os.path.dirname(os.path.realpath(__file__))
    config_name = os.path.join(my_path, config_name)
    logger = logging.getLogger('stubtest_worker')
    logger.setLevel(10)

    queue.clean()

    with model.get_db_provider(model.get_config(config_name)) as db:
        try:
            db.delete_experiment(experiment_name)
        except Exception:
            pass

    p = subprocess.Popen(['studio', 'run'] + runner_args + [
        '--config=' + config_name, '--verbose=debug', '--force-git',
        '--experiment=' + experiment_name, test_script
    ] + script_args,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.STDOUT,
                         close_fds=True,
                         cwd=my_path)

    pout, _ = p.communicate()

    if pout:
        logger.debug("studio run output: \n" + pout)

    db = model.get_db_provider(model.get_config(config_name))
    experiments = [
        e for e in db.get_user_experiments() if e.startswith(experiment_name)
    ]

    assert len(experiments) == 1

    experiment_name = experiments[0]

    try:
        # test saved arguments
        keybase = "/experiments/" + experiment_name
        saved_args = db._get(keybase + '/args')
        if saved_args is not None:
            testclass.assertTrue(len(saved_args) == len(script_args))
            for i in range(len(saved_args)):
                testclass.assertTrue(saved_args[i] == script_args[i])
            testclass.assertTrue(db._get(keybase + '/filename') == test_script)
        else:
            testclass.assertTrue(script_args is None or len(script_args) == 0)

        experiment = db.get_experiment(experiment_name)
        if wait_for_experiment:
            while not experiment.status == 'finished':
                time.sleep(1)
                experiment = db.get_experiment(experiment_name)

        if test_output:
            with open(db.store.get_artifact(experiment.artifacts['output']),
                      'r') as f:
                data = f.read()
                split_data = data.strip().split('\n')
                testclass.assertEquals(split_data[-1], expected_output)

        check_workspace(testclass, db, experiment_name)

        if delete_when_done:
            db.delete_experiment(experiment_name)

        return db

    except Exception as e:
        print("Exception {} raised during test".format(e))
        print("worker output: \n {}".format(pout))
        print("Exception trace:")
        print(traceback.format_exc())
        raise e
예제 #14
0
파일: magics.py 프로젝트: zuma89/studio
    def studio_run(self, line, cell=None):
        script_text = []
        pickleable_ns = {}

        for varname, var in six.iteritems(self.shell.user_ns):
            if not varname.startswith('__'):
                if isinstance(var, ModuleType) and \
                   var.__name__ != 'studio.magics':
                    script_text.append(
                        'import {} as {}'.format(var.__name__, varname)
                    )

                else:
                    try:
                        pickle.dumps(var)
                        pickleable_ns[varname] = var
                    except BaseException:
                        pass

        script_text.append(cell)
        script_text = '\n'.join(script_text)
        stub_path = os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            'run_magic.py.stub')

        with open(stub_path) as f:
            script_stub = f.read()

        script = script_stub.format(script=script_text)

        experiment_key = str(int(time.time())) + \
            "_jupyter_" + str(uuid.uuid4())

        print('Running studio with experiment key ' + experiment_key)
        config = model.get_config()
        if config['database']['type'] == 'http':
            print("Experiment progress can be viewed/shared at:")
            print("{}/experiment/{}".format(
                config['database']['serverUrl'],
                experiment_key))

        workspace_new = fs_tracker.get_artifact_cache(
            'workspace', experiment_key)

        rsync_cp('.', workspace_new)
        with open(os.path.join(workspace_new, '_script.py'), 'w') as f:
            f.write(script)

        ns_path = fs_tracker.get_artifact_cache('_ns', experiment_key)

        with gzip.open(ns_path, 'wb') as f:
            f.write(pickle.dumps(pickleable_ns))

        if any(line):
            runner_args = line.strip().split(' ')
        else:
            runner_args = []

        runner_args.append('--capture={}:_ns'.format(ns_path))
        runner_args.append('--capture-once=.:workspace')
        runner_args.append('--force-git')
        runner_args.append('--experiment=' + experiment_key)

        notebook_cwd = os.getcwd()
        os.chdir(workspace_new)
        print(runner_args + ['_script.py'])
        runner_main(runner_args + ['_script.py'])
        os.chdir(notebook_cwd)

        with model.get_db_provider() as db:
            while True:
                experiment = db.get_experiment(experiment_key)
                if experiment and experiment.status == 'finished':
                    break

                time.sleep(10)

            new_ns_path = db.get_artifact(experiment.artifacts['_ns'])

        with open(new_ns_path) as f:
            new_ns = pickle.loads(f.read())

        self.shell.user_ns.update(new_ns)
예제 #15
0
def main(args=sys.argv):
    logger = logs.get_logger('studio-remote-worker')
    parser = argparse.ArgumentParser(description='Studio remote worker. \
                     Usage: studio-remote-worker \
                     ')
    parser.add_argument('--config', help='configuration file', default=None)

    parser.add_argument('--guest',
                        help='Guest mode (does not require db credentials)',
                        action='store_true')

    parser.add_argument(
        '--single-run',
        help='quit after a single run (regardless of the state of the queue)',
        action='store_true')

    parser.add_argument('--queue', help='queue name', required=True)
    parser.add_argument('--verbose',
                        '-v',
                        help='Verbosity level. Allowed vaules: ' +
                        'debug, info, warn, error, crit ' +
                        'or numerical value of logger levels.',
                        default=None)

    parser.add_argument(
        '--timeout',
        '-t',
        help='Timeout after which remote worker stops listening (in seconds)',
        type=int,
        default=100)

    parsed_args, script_args = parser.parse_known_args(args)
    verbose = parse_verbosity(parsed_args.verbose)
    logger.setLevel(verbose)

    config = None
    if parsed_args.config is not None:
        config = model.get_config(parsed_args.config)

    if parsed_args.queue.startswith('ec2_') or \
       parsed_args.queue.startswith('sqs_'):
        queue = SQSQueue(parsed_args.queue, verbose=verbose)
    elif parsed_args.queue.startswith('rmq_'):
        queue = get_cached_queue(name=parsed_args.queue,
                                 route='StudioML.' + parsed_args.queue,
                                 config=config,
                                 logger=logger,
                                 verbose=verbose)
    else:
        queue = PubsubQueue(parsed_args.queue, verbose=verbose)

    logger.info('Waiting for work')

    timeout_before = parsed_args.timeout
    timeout_after = timeout_before if timeout_before > 0 else 0
    # wait_for_messages(queue, timeout_before, logger)

    logger.info('Starting working')
    worker_loop(queue,
                parsed_args,
                single_experiment=parsed_args.single_run,
                timeout=timeout_after,
                verbose=verbose)
예제 #16
0
    def __init__(
        self,
        # Name of experiment
        experimentId,
        # Completion service configuration
        cs_config=None,
        # used to pass a studioML configuration block read by client software
        studio_config=None,
        # Studio config yaml file
        studio_config_file=None,
        shutdown_del_queue=False
    ):
        # StudioML configuration
        self.config = model.get_config(studio_config_file)

        self.logger = logs.get_logger(self.__class__.__name__)
        self.verbose_level = parse_verbosity(self.config['verbose'])
        self.logger.setLevel(self.verbose_level)

        # Setup Completion Service instance properties
        # based on configuration
        self.experimentId = experimentId
        self.project_name = "completion_service_" + experimentId

        self.resumable = RESUMABLE
        self.clean_queue = CLEAN_QUEUE
        self.queue_upscaling = QUEUE_UPSCALING
        self.num_workers = int(cs_config.get('num_workers', 1))
        self.cloud_timeout = cs_config.get('timeout')
        self.bid = cs_config.get('bid')
        self.ssh_keypair = cs_config.get('ssh_keypair')
        self.sleep_time = cs_config.get('sleep_time')
        self.shutdown_del_queue = shutdown_del_queue

        # Figure out request for resources:
        resources_needed = cs_config.get('resources_needed')
        self.resources_needed = DEFAULT_RESOURCES_NEEDED
        self.resources_needed.update(resources_needed)
        studio_resources = self.config.get('resources_needed')
        if studio_resources:
            self.resources_needed.update(studio_resources)

        # Figure out task queue and cloud we are going to use:
        queue_name = cs_config.get('queue')
        cloud_name = cs_config.get('cloud')
        if cs_config.get('local'):
            queue_name = None
            cloud_name = None
        elif queue_name is not None:
            self.shutdown_del_queue = False
            if cloud_name in ['ec2spot', 'ec2']:
                assert queue_name.startswith("sqs_")
        else:
            queue_name = self.experiment_id
            if cloud_name in ['ec2spot', 'ec2']:
                queue_name = "sqs_" + queue_name
        self.cloud = cloud_name
        if queue_name is not None and queue_name.startswith("rmq_"):
            assert self.cloud is None

        self.wm = model.get_worker_manager(
            self.config, self.cloud)

        if queue_name is not None:
            self.logger.info(
                "CompletionService configured with queue {0}"
                    .format(queue_name))

        self.queue = model.get_queue(queue_name=queue_name, cloud=self.cloud,
                                      config=self.config,
                                      logger=self.logger,
                                      verbose=self.verbose_level)

        self.queue_name = self.queue.get_name()

        self.submitted = {}
        self.use_spot = cloud_name in ['ec2spot', 'gcspot']

        self.logger.info("Project name: {0}".format(self.project_name))
        self.logger.info("Initial/final queue name: {0}, {1}"
                         .format(queue_name, self.queue_name))
        self.logger.info("Cloud name: {0}".format(self.cloud))
예제 #17
0
 def get_db_provider(self):
     config = model.get_config('test_config_http_client.yaml')
     config['database']['serverUrl'] = 'http://localhost:' + str(self.port)
     return model.get_db_provider(config)