Esempio n. 1
0
    def __init__(self, art_name, art_dict, logger=None):
        self.name = art_name
        self.key: str = None
        self.local_path: str = None
        self.remote_path: str = None
        self.credentials = None
        self.hash = None

        self.logger = logger
        if self.logger is None:
            self.logger = logs.get_logger(self.__class__.__name__)
            self.logger.setLevel(storage_setup.get_storage_verbose_level())

        self.storage_handler: StorageHandler = None

        self.unpack: bool = art_dict.get('unpack')
        self.is_mutable: bool = art_dict.get('mutable')
        if 'key' in art_dict.keys():
            self.key = art_dict['key']
        if 'local' in art_dict.keys():
            self.local_path = art_dict['local']
        if 'qualified' in art_dict.keys():
            self.remote_path = art_dict['qualified']
        if 'url' in art_dict.keys():
            self.remote_path = art_dict['url']
        if 'hash' in art_dict.keys():
            self.hash = art_dict['hash']
        self.credentials = credentials.Credentials.get_credentials(art_dict)

        self._setup_storage_handler(art_dict)
Esempio n. 2
0
    def test_experiment_lifetime(self):
        my_path = os.path.dirname(os.path.realpath(__file__))

        logger = logs.get_logger('test_experiment_lifetime')
        logger.setLevel(10)

        config_name = os.path.join(my_path, 'test_config.yaml')
        key = 'test_experiment_lifetime' + str(uuid.uuid4())

        with model.get_db_provider(model.get_config(config_name)) as db:
            try:
                db.delete_experiment(key)
            except Exception:
                pass

            p = subprocess.Popen(['studio', 'run',
                                  '--config=' + config_name,
                                  '--experiment=' + key,
                                  '--force-git',
                                  '--verbose='+EXPERIMENT_VERBOSE_LEVEL,
                                  '--lifetime=-10m',
                                  'stop_experiment.py'],
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.STDOUT,
                                 cwd=my_path)

            pout, _ = p.communicate()

            if pout:
                logger.debug("studio run output: \n" + pout.decode())

            db.delete_experiment(key)
Esempio n. 3
0
    def __init__(self,
                 db_config,
                 measure_timestamp_diff=False,
                 blocking_auth=True,
                 compression=None):

        verbose = get_storage_verbose_level()
        self.logger = logs.get_logger(self.__class__.__name__)
        self.logger.setLevel(verbose)

        guest = db_config.get('guest')

        self.app = pyrebase.initialize_app(db_config)

        if compression is None:
            compression = db_config.get('compression')

        self.auth = None
        if not guest and 'serviceAccount' not in db_config.keys():
            self.auth = get_auth(db_config['type'],
                                 blocking_auth,
                                 verbose=verbose)

        super().__init__(StorageType.storageFirebase,
                         self.logger,
                         measure_timestamp_diff=measure_timestamp_diff,
                         compression=compression)
Esempio n. 4
0
    def __init__(self,
                 auth_cookie=None,
                 verbose=10,
                 branch=None,
                 user_startup_script=None):
        self.startup_script_file = os.path.join(
            os.path.dirname(__file__), 'scripts/ec2_worker_startup.sh')

        self.install_studio_script = os.path.join(os.path.dirname(__file__),
                                                  'scripts/install_studio.sh')

        self.client = boto3.client('ec2')
        self.asclient = boto3.client('autoscaling')
        self.cwclient = boto3.client('cloudwatch')

        self.region = self.client._client_config.region_name

        self.logger = logs.get_logger('EC2WorkerManager')
        self.logger.setLevel(verbose)
        self.auth_cookie = auth_cookie

        self.prices = self._get_ondemand_prices(_instance_specs.keys())

        self.repo_url = git_util.get_my_repo_url()
        self.branch = branch if branch else git_util.get_my_checkout_target()
        self.user_startup_script = user_startup_script

        if user_startup_script:
            self.logger.warn('User startup script argument is deprecated')
Esempio n. 5
0
    def __init__(self,
                 func=lambda x: x,
                 parent=None,
                 q_in=None,
                 q_out=None,
                 num_workers=0,
                 q_size=None,
                 batch_size=1,
                 filterf=lambda x: x is not None,
                 batcher=lambda x: x,
                 timeout=1):

        min_q_size = 10

        self.func = func
        self.parent = parent
        self.num_workers = num_workers
        self.filterf = filterf
        self.batch_size = batch_size
        self.batcher = batcher

        if num_workers > 0:
            self.q_size = q_size if q_size else 2 * num_workers

        self.q_out = q_out
        self.q_in = q_in
        self.q_size = max(min_q_size, 2 * num_workers)

        self.logger = logs.get_logger('BufferedPipe')
        self.logger.setLevel(10)
        self.timeout = timeout
        self.worker_frame = Thread
Esempio n. 6
0
    def __init__(self,
                 config,
                 verbose=10,
                 blocking_auth=True,
                 compression=None):
        # TODO: implement connection
        self.url = config.get('serverUrl', None)
        self.verbose = get_storage_verbose_level()
        self.logger = logs.get_logger('HTTPProvider')
        self.logger.setLevel(self.verbose)

        self.credentials: Credentials = \
            Credentials.get_credentials(config)

        self.storage_handler = HTTPStorageHandler(
            self.url,
            self.credentials.to_dict() if self.credentials else None,
            compression=compression)

        self.auth = None
        guest = config.get('guest', None)
        if not guest and 'serviceAccount' not in config.keys():
            self.auth = get_auth(config.get('authentication', None),
                                 blocking_auth)

        self.compression = compression
        if self.compression is None:
            self.compression = config.get('compression', None)
Esempio n. 7
0
    def __init__(self,
                 zone='us-east1-c',
                 auth_cookie=None,
                 verbose=10,
                 branch=None,
                 user_startup_script=None):
        assert 'GOOGLE_APPLICATION_CREDENTIALS' in os.environ.keys()
        with open(os.environ['GOOGLE_APPLICATION_CREDENTIALS'], 'r') as f:
            credentials_dict = json.loads(f.read())

        self.compute = googleapiclient.discovery.build('compute', 'v1')

        self.startup_script_file = os.path.join(
            os.path.dirname(__file__), 'scripts/gcloud_worker_startup.sh')

        self.install_studio_script = os.path.join(os.path.dirname(__file__),
                                                  'scripts/install_studio.sh')

        self.zone = zone
        self.projectid = credentials_dict['project_id']
        self.logger = logs.get_logger("GCloudWorkerManager")
        self.logger.setLevel(verbose)
        self.auth_cookie = auth_cookie
        self.user_startup_script = user_startup_script
        self.repo_url = git_util.get_my_repo_url()
        self.branch = branch if branch else git_util.get_my_checkout_target()
        self.log_bucket = "studioml-logs"

        if user_startup_script:
            self.logger.warn('User startup script argument is deprecated')
Esempio n. 8
0
def get_worker_manager(config, cloud=None, verbose=10):
    if cloud is None:
        return None

    assert cloud in ['gcloud', 'gcspot', 'ec2', 'ec2spot']
    logger = logs.get_logger('runner.get_worker_manager')
    logger.setLevel(verbose)

    auth = get_auth(config['database']['authentication'])
    auth_cookie = auth.get_token_file() if auth else None

    branch = config['cloud'].get('branch')

    logger.info('using branch {}'.format(branch))

    if cloud in ['gcloud', 'gcspot']:

        cloudconfig = config['cloud']['gcloud']
        worker_manager = GCloudWorkerManager(
            auth_cookie=auth_cookie,
            zone=cloudconfig['zone'],
            branch=branch,
            user_startup_script=config['cloud'].get('user_startup_script'))

    if cloud in ['ec2', 'ec2spot']:
        worker_manager = EC2WorkerManager(
            auth_cookie=auth_cookie,
            branch=branch,
            user_startup_script=config['cloud'].get('user_startup_script'))
    return worker_manager
Esempio n. 9
0
def getlogger():
    global logger
    if logger is None:
        logger = logs.get_logger('studio_server')
        logger.setLevel(10)

    return logger
Esempio n. 10
0
    def __init__(self, queue, route, amqp_url='', config=None, logger=None):
        """Setup the example publisher object, passing in the URL we will use
        to connect to RabbitMQ.
        """
        self._rmq_lock = threading.RLock()
        self._connection = None
        self._channel = None
        self._consumer = None
        self._consume_ready = False

        self._msg_tracking_lock = threading.RLock()
        self._deliveries = []
        self._acked = 0
        self._nacked = 0
        self._message_number = 0

        self._rmq_msg = None
        self._rmq_id = None

        self._stopping = False
        self._exchange = 'StudioML.topic'
        self._exchange_type = 'topic'
        self._routing_key = route

        self._url = amqp_url
        self._is_persistent: bool = False

        if logger is not None:
            self._logger = logger
        else:
            self._logger = logs.get_logger('RabbitMQ')
            self._logger.setLevel(get_storage_verbose_level())

        if config is not None:
            # extract from the config data structure any settings related to
            # queue messaging for rabbit MQ
            if 'cloud' in config:
                if 'queue' in config['cloud']:
                    if 'rmq' in config['cloud']['queue']:
                        self._url = config['cloud']['queue']['rmq']
                        self._logger.warning('use queue url %s', self._url)
                        flag_persistent = config['cloud']['queue']\
                            .get('persistent', False)
                        if isinstance(flag_persistent, str):
                            flag_persistent = flag_persistent.lower() == 'true'
                        self._is_persistent = flag_persistent

        self._queue = queue
        self._queue_deleted = True
        self._connection_failed = False
        self._connection_failure_reason = None

        # The pika library for RabbitMQ has an asynchronous run method
        # that needs to run forever and will do reconnections etc
        # automatically for us
        thr = threading.Thread(target=self._run, args=(), kwargs={})
        thr.setDaemon(True)
        thr.start()
        self._wait_queue_created(600)
Esempio n. 11
0
    def __init__(self, name: str,
                 receiver_keypath: str,
                 sender_keypath: str = None):
        """
        param: name - payload builder name
        param: receiver_keypath - file path to .pem file
                                  with recipient public key
        param: sender_keypath - file path to .pem file
                                  with sender private key
        """
        super(EncryptedPayloadBuilder, self).__init__(name)

        # XXX Set logger verbosity level here
        self.logger = logs.get_logger(self.__class__.__name__)

        self.recipient_key_path = receiver_keypath
        self.recipient_key = None
        try:
            self.recipient_key =\
                RSA.import_key(open(self.recipient_key_path).read())
        except:
            check_for_kb_interrupt()
            msg = "FAILED to import recipient public key from: {0}"\
                .format(self.recipient_key_path)
            self.logger.error(msg)
            raise ValueError(msg)

        self.sender_key_path = sender_keypath

        self.sender_key: SigningKey = None
        self.verify_key: VerifyKey = None
        self.sender_fingerprint = None

        if self.sender_key_path is None:
            self.logger.error("Signing key path must be specified for encrypted payloads. ABORTING.")
            raise ValueError()

        # We expect ed25519 signing key in "openssh private key" format
        try:
            public_key_data, private_key_data =\
                Ed25519KeyUtil.parse_private_key_file(
                    self.sender_key_path, self.logger)
            if public_key_data is None or private_key_data is None:
                self._raise_error(
                    "Failed to import private signing key from {0}. ABORTING."
                        .format(self.sender_key_path))

            self.sender_key = SigningKey(private_key_data)
            self.verify_key = VerifyKey(public_key_data)
        except Exception:
            self._raise_error("FAILED to open/read private signing key file: {0}"\
                .format(self.sender_key_path))

        self.sender_fingerprint = \
            self._get_fingerprint(public_key_data)

        self.simple_builder =\
            UnencryptedPayloadBuilder("simple-builder-for-encryptor")
Esempio n. 12
0
    def __init__(self, queue, args):
        self.config = args.config

        if args.guest:
            self.config['database']['guest'] = True

        self.task_queue = queue
        self.logger = logs.get_logger('LocalExecutor')
        self.logger.setLevel(model.parse_verbosity(self.config.get('verbose')))
        self.logger.debug("Config: ")
        self.logger.debug(self.config)
Esempio n. 13
0
    def __init__(self, db_config, handler: StorageHandler, compression=None):
        self.logger = logs.get_logger(self.__class__.__name__)
        self.logger.setLevel(get_storage_verbose_level())

        self.compression = compression
        if self.compression is None:
            self.compression = db_config.get('compression', None)

        self.auth = None

        self.storage_handler = handler

        self.max_keys = db_config.get('max_keys', 100)
Esempio n. 14
0
 def __init__(self, path=None, verbose=10):
     if path is None:
         self.path = self._get_queue_directory()
     else:
         self.path = path
     self.logger = logs.get_logger(self.__class__.__name__)
     self.logger.setLevel(verbose)
     self.status_marker = os.path.join(self.path, 'is_active.queue')
     try:
         with open(self.status_marker, "w") as smark:
             _ = smark
     except IOError:
         self.logger.error('FAILED to create %s for LocalQueue. ABORTING.',
                           self.status_marker)
         sys.exit(-1)
Esempio n. 15
0
def allocate_resources(experiment, config=None, verbose=10):
    logger = logs.get_logger('allocate_resources')
    logger.setLevel(verbose)
    logger.info('Allocating resources {} for experiment {}'.format(
        experiment.resources_needed, experiment.key))

    ret_val = True
    gpus_needed = int(experiment.resources_needed.get('gpus')) \
        if experiment.resources_needed else 0

    if gpus_needed > 0:
        ret_val = ret_val and allocate_gpus(
            gpus_needed, experiment.resources_needed, config)
    else:
        allocate_gpus(0)

    return ret_val
Esempio n. 16
0
def get_db_provider(config=None, blocking_auth=True):

    db_provider = get_storage_db_provider()
    if db_provider is not None:
        return db_provider

    if config is None:
        config = get_config()
    verbose = parse_verbosity(config.get('verbose', None))

    # Save this verbosity level as global for the whole experiment job:
    set_storage_verbose_level(verbose)

    logger = logs.get_logger("get_db_provider")
    logger.setLevel(verbose)
    logger.debug('Choosing db provider with config:')
    logger.debug(config)

    if 'storage' in config.keys():
        artifact_store = get_artifact_store(config['storage'])
    else:
        artifact_store = None

    assert 'database' in config.keys()
    db_config = config['database']
    if db_config['type'].lower() == 's3':
        db_provider = S3Provider(db_config,
                          blocking_auth=blocking_auth)
        if artifact_store is None:
            artifact_store = db_provider.get_storage_handler()

    elif db_config['type'].lower() == 'gs':
        raise NotImplementedError("GS is not supported.")

    elif db_config['type'].lower() == 'local':
        db_provider = LocalDbProvider(db_config,
                          blocking_auth=blocking_auth)
        if artifact_store is None:
            artifact_store = db_provider.get_storage_handler()

    else:
        raise ValueError('Unknown type of the database ' + db_config['type'])

    setup_storage(db_provider, artifact_store)
    return db_provider
Esempio n. 17
0
    def __init__(self, name: str, path: str = None, logger=None):
        if logger is not None:
            self._logger = logger
        else:
            self._logger = logs.get_logger('LocalQueue')
            self._logger.setLevel(get_storage_verbose_level())

        self.name = name
        if path is None:
            self.path = self._get_queue_directory()
        else:
            self.path = path
        self.path = os.path.join(self.path, name)
        os.makedirs(self.path, exist_ok=True)

        # Local queue is considered active, iff its directory exists.
        self._lock_path = os.path.join(self.path, LOCK_FILE_NAME)
        self._lock = filelock.SoftFileLock(self._lock_path)
Esempio n. 18
0
    def __init__(self, config, blocking=True, verbose=logs.DEBUG):
        self.logger = logs.get_logger(self.__class__.__name__)
        self.logger.setLevel(verbose)

        if isinstance(config, dict):
            self.config = config
        else:
            self.config = {'type': config}

        self.tokendir = os.path.abspath(
            os.path.expanduser(self.config.get('token_directory', TOKEN_DIR)))

        if not os.path.exists(self.tokendir):
            os.makedirs(self.tokendir)

        self.token = self._load_token()[0]
        if self.token is None and blocking:
            self._sign_in()
Esempio n. 19
0
    def __init__(self, cred_dict):
        self.logger = logs.get_logger(self.__class__.__name__)
        self.logger.setLevel(storage_setup.get_storage_verbose_level())

        self.type = None
        self.key = None
        self.secret_key = None
        self.session_token = None
        self.region = None
        self.profile = None
        if cred_dict is None:
            return

        if isinstance(cred_dict, str) and cred_dict == 'none':
            return

        if not isinstance(cred_dict, dict):
            msg: str =\
                "NOT SUPPORTED credentials format {0}".format(repr(cred_dict))
            util.report_fatal(msg, self.logger)

        if len(cred_dict) == 0:
            # Empty credentials dictionary is like None:
            return

        if len(cred_dict) == 1 and AWS_TYPE in cred_dict.keys():
            aws_creds = cred_dict[AWS_TYPE]
            self.type = AWS_TYPE
            self.key = aws_creds.get(AWS_KEY, None)
            self.secret_key = aws_creds.get(AWS_SECRET_KEY, None)
            self.session_token = aws_creds.get(AWS_SESSION_TOKEN, None)
            self.region = self._get_named(AWS_REGION, aws_creds)
            self.profile = self._get_named(AWS_PROFILE, aws_creds)

            if self.key is None or self.secret_key is None:
                msg: str = \
                    "INVALID aws credentials format {0}".format(repr(cred_dict))
                util.report_fatal(msg, self.logger)
        else:
            msg: str =\
                "NOT SUPPORTED credentials format {0}".format(repr(cred_dict))
            util.report_fatal(msg, self.logger)
Esempio n. 20
0
    def test_stop_experiment(self):
        my_path = os.path.dirname(os.path.realpath(__file__))

        logger = logs.get_logger('test_stop_experiment')
        logger.setLevel(10)

        config_name = os.path.join(my_path, 'test_config.yaml')
        key = 'test_stop_experiment' + str(uuid.uuid4())

        with model.get_db_provider(model.get_config(config_name)) as db:
            try:
                db.delete_experiment(key)
            except Exception:
                pass

            p = subprocess.Popen(['studio', 'run',
                                  '--config=' + config_name,
                                  '--experiment=' + key,
                                  '--force-git',
                                  '--verbose='+EXPERIMENT_VERBOSE_LEVEL,
                                  'stop_experiment.py'],
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.STDOUT,
                                 cwd=my_path)

            # wait till experiment spins up
            experiment = None
            while experiment is None or experiment.status == 'waiting':
                time.sleep(1)
                try:
                    experiment = db.get_experiment(key)
                except BaseException:
                    pass

            logger.info('Stopping experiment')
            db.stop_experiment(key)
            pout, _ = p.communicate()

            if pout:
                logger.debug("studio run output: \n" + pout.decode())

            db.delete_experiment(key)
Esempio n. 21
0
    def __init__(self, config, blocking=True, verbose=logs.DEBUG):
        if not os.path.exists(TOKEN_DIR):
            try:
                os.makedirs(TOKEN_DIR)
            except OSError:
                pass

        self.logger = logs.get_logger(self.__class__.__name__)
        self.logger.setLevel(logs.DEBUG)

        self.firebase = pyrebase.initialize_app(config)
        self.user = {}
        self.use_email_auth = config.get('use_email_auth', False)
        if self.use_email_auth:
            self.email = config.get('email')
            self.password = config.get('password')
            if not self.password or not self.email:
                self.email = input('Firebase token is not found or expired! ' +
                                   'You need to re-login. (Or re-run with ' +
                                   'studio/studio-runner ' +
                                   'with --guest option ) '
                                   '\nemail:')
                self.password = getpass.getpass('password:'******'Authentication required! Either specify ' +
                  'use_email_auth in config file, or run '
                  'studio and go to webui ' + '(localhost:5000 by default) '
                  'to authenticate using google credentials')
            while self.expired:
                time.sleep(1)
                self._update_user()

        self.sched = BackgroundScheduler()
        self.sched.start()
        self.sched.add_job(self._update_user, 'interval', minutes=31)
        atexit.register(self.sched.shutdown)
Esempio n. 22
0
    def __init__(self, queue_name, sub_name=None, verbose=10):
        from google.cloud import pubsub

        assert 'GOOGLE_APPLICATION_CREDENTIALS' in os.environ.keys()
        with open(os.environ['GOOGLE_APPLICATION_CREDENTIALS']) as f:
            credentials = json.loads(f.read())

        project_name = credentials['project_id']
        self.logger = logs.get_logger(self.__class__.__name__)
        if verbose is not None:
            self.logger.setLevel(parse_verbosity(verbose))

        self.pubclient = pubsub.PublisherClient()
        self.subclient = pubsub.SubscriberClient()

        self.project = project_name
        self.topic_name = self.pubclient.topic_path(project_name, queue_name)
        self.logger.info("Topic name = {}".format(self.topic_name))
        try:
            self.pubtopic = self.pubclient.get_topic(self.topic_name)
        except BaseException as e:
            check_for_kb_interrupt()
            self.pubtopic = self.pubclient.create_topic(self.topic_name)
            self.logger.info('topic {} created'.format(self.topic_name))

        sub_name = sub_name if sub_name else queue_name + "_sub"
        self.logger.info("Topic name = {}".format(queue_name))
        self.logger.info("Subscription name = {}".format(sub_name))

        self.sub_name = self.subclient.subscription_path(
            project_name, sub_name)
        try:
            self.subclient.get_subscription(self.sub_name)
        except BaseException as e:
            check_for_kb_interrupt()
            self.logger.warn(e)
            self.subclient.create_subscription(self.sub_name, self.topic_name,
                                               ack_deadline_seconds=20)

        self.logger.info('subscription {} created'.format(sub_name))
Esempio n. 23
0
    def __init__(self,
                 remote_path,
                 credentials_dict,
                 timestamp=None,
                 compression=None):

        self.logger = logs.get_logger(self.__class__.__name__)
        self.logger.setLevel(get_storage_verbose_level())

        self.url = remote_path
        self.timestamp = timestamp

        parsed_url = urlparse(self.url)
        self.scheme = parsed_url.scheme
        self.endpoint = parsed_url.netloc
        self.path = parsed_url.path
        self.credentials = Credentials(credentials_dict)

        super().__init__(StorageType.storageHTTP,
                         self.logger,
                         False,
                         compression=compression)
Esempio n. 24
0
def get_db_provider(config=None, blocking_auth=True):

    db_provider = get_storage_db_provider()
    if db_provider is not None:
        return db_provider

    if config is None:
        config = get_config()
    verbose = parse_verbosity(config.get('verbose'))

    # Save this verbosity level as global for the whole experiment job:
    set_storage_verbose_level(verbose)

    logger = logs.get_logger("get_db_provider")
    logger.setLevel(verbose)
    logger.debug('Choosing db provider with config:')
    logger.debug(config)

    if 'storage' in config.keys():
        artifact_store = db_provider_setup.get_artifact_store(
            config['storage'])
    else:
        artifact_store = None

    assert 'database' in config.keys()
    db_config = config['database']
    if db_config['type'].lower() == 'firebase':
        db_provider = FirebaseProvider(db_config, blocking_auth=blocking_auth)

    elif db_config['type'].lower() == 'http':
        db_provider = HTTPProvider(db_config,
                                   verbose=verbose,
                                   blocking_auth=blocking_auth)
    else:
        db_provider = db_provider_setup.get_db_provider(
            config=config, blocking_auth=blocking_auth)

    setup_storage(db_provider, artifact_store)
    return db_provider
Esempio n. 25
0
    def __init__(self, name,
                 config=None, logger=None):

        if logger is not None:
            self.logger = logger
        else:
            self.logger = logs.get_logger('SQSQueue')
            self.logger.setLevel(get_storage_verbose_level())

        self.name = name
        self.is_persistent = False

        self.credentials = self._setup_from_config(config)

        aws_access_key_id = self.credentials.get_key()
        aws_secret_access_key = self.credentials.get_secret_key()

        if self.credentials.get_profile() is not None:
            # If profile name is specified, for whatever reason
            # boto3 API will barf if (key, secret key) pair
            # is also defined.
            aws_access_key_id = None
            aws_secret_access_key = None

        self._session = boto3.session.Session(
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key,
            aws_session_token=None,
            region_name=self.credentials.get_region(),
            profile_name=self.credentials.get_profile()
        )
        self._client = self._session.client('sqs')

        create_q_response = self._client.create_queue(
            QueueName=name)

        self.queue_url = create_q_response['QueueUrl']
        self.logger.info('Creating SQS queue with name %s', name)
        self.logger.info('Queue url = %s', self.queue_url)
Esempio n. 26
0
    def test_two_receivers(self):
        logger = logs.get_logger('test_two_receivers')
        logger.setLevel(10)
        q1 = self.get_queue()
        q1.clean()

        q2 = self.get_queue(q1.get_name())

        data1 = str(uuid.uuid4())
        data2 = str(uuid.uuid4())

        logger.debug('data1 = ' + data1)
        logger.debug('data2 = ' + data2)

        q1.enqueue(data1)
        recv_data1 = self.get_queue_data(
            q2.dequeue(timeout=self.get_timeout()))

        self.assertEqual(data1, recv_data1)

        q1.enqueue(data1)
        q1.enqueue(data2)

        recv_data1 = q1.dequeue(timeout=self.get_timeout())
        recv_data2 = q2.dequeue(timeout=self.get_timeout())

        recv1 = self.get_queue_data(recv_data1)
        recv2 = self.get_queue_data(recv_data2)

        logger.debug('recv1 = ' + recv1)
        logger.debug('recv2 = ' + recv2)

        self.assertTrue(data1 == recv1 or data2 == recv1)
        self.assertTrue(data1 == recv2 or data2 == recv2)
        self.assertFalse(recv1 == recv2)

        self.assertTrue(q1.dequeue() is None)
        self.assertTrue(q2.dequeue() is None)
Esempio n. 27
0
    def __init__(self, config, measure_timestamp_diff=False, compression=None):

        self.logger = logs.get_logger(self.__class__.__name__)
        self.logger.setLevel(get_storage_verbose_level())

        if compression is None:
            compression = config.get('compression', None)

        self.endpoint = config.get('endpoint', '~')
        self.endpoint = os.path.realpath(os.path.expanduser(self.endpoint))
        if not os.path.exists(self.endpoint) \
            or not os.path.isdir(self.endpoint):
            msg: str = "Store root {0} doesn't exist or not a directory. Aborting."\
                .format(self.endpoint)
            self._report_fatal(msg)

        self.bucket = config.get('bucket', 'storage')
        self.store_root = os.path.join(self.endpoint, self.bucket)
        self._ensure_path_dirs_exist(self.store_root)

        super().__init__(StorageType.storageLocal,
                         self.logger,
                         measure_timestamp_diff,
                         compression=compression)
Esempio n. 28
0
def worker_loop(queue,
                parsed_args,
                single_experiment=False,
                timeout=0,
                verbose=None):

    fetch_artifacts = True

    logger = logs.get_logger('worker_loop')

    hold_period = 4
    retval = 0
    while True:
        msg = queue.dequeue(acknowledge=False, timeout=timeout)
        if not msg:
            break

        first_exp, ack_key = msg

        data_dict = json.loads(sixdecode(first_exp))
        experiment_key = data_dict['experiment']['key']
        config = data_dict['config']

        parsed_args.config = config
        if verbose:
            config['verbose'] = verbose
        else:
            verbose = model.parse_verbosity(config.get('verbose', None))

        logger.setLevel(verbose)

        logger.debug('Received message: \n{}'.format(data_dict))

        executor = LocalExecutor(queue, parsed_args)

        with model.get_db_provider(config) as db:
            # experiment = experiment_from_dict(data_dict['experiment'])
            def try_get_experiment():
                experiment = db.get_experiment(experiment_key)
                if experiment is None:
                    raise ValueError(
                        'experiment is not found - indicates storage failure')
                return experiment

            experiment = retry(try_get_experiment,
                               sleep_time=10,
                               logger=logger)

            if config.get('experimentLifetime', None) and \
                int(str2duration(config['experimentLifetime'])
                    .total_seconds()) + experiment.time_added < time.time():
                logger.info(
                    'Experiment expired (max lifetime of {0} was exceeded)'.
                    format(config.get('experimentLifetime', None)))
                queue.acknowledge(ack_key)
                continue

            if allocate_resources(experiment, config, verbose=verbose):

                def hold_job():
                    queue.hold(ack_key, hold_period)

                hold_job()
                sched = BackgroundScheduler()
                sched.add_job(hold_job, 'interval', minutes=hold_period / 2)
                sched.start()

                try:
                    python = 'python'
                    if experiment.pythonver[0] == '3':
                        python = 'python3'
                    if '_singularity' not in experiment.artifacts.keys():
                        pip_diff = pip_needed_packages(experiment.pythonenv,
                                                       python)
                        if any(pip_diff):
                            logger.info(
                                'Setting up python packages for experiment')
                            if pip_install_packages(pip_diff, python,
                                                    logger) != 0:

                                logger.info(
                                    "Installation of all packages together " +
                                    " failed, "
                                    "trying one package at a time")

                                for pkg in pip_diff:
                                    pip_install_packages([pkg], python, logger)

                    for tag, item in experiment.artifacts.items():
                        art: Artifact = item
                        if fetch_artifacts or art.local_path is None:
                            get_only_newer: bool = True
                            if tag == 'workspace':
                                get_only_newer = False

                            if not art.is_mutable:
                                logger.info('Fetching artifact ' + tag)
                                art.local_path = retry(lambda: db.get_artifact(
                                    art, only_newer=get_only_newer),
                                                       sleep_time=10,
                                                       logger=logger)
                            else:
                                logger.info('Skipping mutable artifact ' + tag)

                    returncode = executor.run(experiment)
                    if returncode != 0:
                        retval = returncode
                finally:
                    sched.shutdown()
                    queue.acknowledge(ack_key)

                if single_experiment:
                    logger.info('single_experiment is True, quitting')
                    return retval
            else:
                logger.info('Cannot run experiment ' + experiment.key +
                            ' due lack of resources. Will retry')
                # Debounce failed requests we cannot service yet
                time.sleep(config.get('sleep_time', 5))

    logger.info("Queue in {0} is empty, quitting".format(
        fs_tracker.get_queue_directory()))

    return retval
Esempio n. 29
0
import six
import signal
import pdb

from apscheduler.schedulers.background import BackgroundScheduler

from studio import fs_tracker, model
from studio.util import logs
from studio.queues.local_queue import LocalQueue
from studio.util.gpu_util import get_available_gpus, get_gpu_mapping, get_gpus_summary
from studio.artifacts.artifact import Artifact
from studio.experiments.experiment import Experiment
from studio.util.util import sixdecode, str2duration, retry,\
    parse_verbosity, check_for_kb_interrupt

logs.get_logger('apscheduler.scheduler').setLevel(logs.ERROR)


class LocalExecutor(object):
    """Runs job while capturing environment and logs results.
    """
    def __init__(self, queue, args):
        self.config = args.config

        if args.guest:
            self.config['database']['guest'] = True

        self.task_queue = queue
        self.logger = logs.get_logger('LocalExecutor')
        self.logger.setLevel(
            model.parse_verbosity(self.config.get('verbose', None)))
Esempio n. 30
0
    def __init__(self, config, measure_timestamp_diff=False, compression=None):
        self.logger = logs.get_logger(self.__class__.__name__)
        self.logger.setLevel(get_storage_verbose_level())
        self.credentials: Credentials =\
            Credentials.get_credentials(config)

        self.endpoint = config.get('endpoint', None)

        if self.credentials is None:
            msg: str = "NO CREDENTIALS provided for {0}."\
                .format(self.endpoint)
            self._report_fatal(msg)

        if self.credentials.get_type() != AWS_TYPE:
            msg: str = "EXPECTED aws credentials for {0}: {1}"\
                .format(self.endpoint, repr(self.credentials.to_dict()))
            self._report_fatal(msg)

        aws_key: str = self.credentials.get_key()
        aws_secret_key = self.credentials.get_secret_key()
        region_name = self.credentials.get_region()
        profile_name = self.credentials.get_profile()

        if profile_name is not None:
            # it seems that explicitly specified profile name
            # should not be used with explicitly specified credentials:
            aws_key = None
            aws_secret_key = None

        session = Session(aws_access_key_id=aws_key,
                          aws_secret_access_key=aws_secret_key,
                          region_name=region_name,
                          profile_name=profile_name)

        session.events.unregister('before-parameter-build.s3.ListObjects',
                                  set_list_objects_encoding_type_url)

        self.client = session.client('s3',
                                     endpoint_url=self.endpoint,
                                     config=Config(signature_version='s3v4'))

        if compression is None:
            compression = config.get('compression', None)

        self.cleanup_bucket = config.get('cleanup_bucket', False)
        if isinstance(self.cleanup_bucket, str):
            self.cleanup_bucket = self.cleanup_bucket.lower() == 'true'
        self.bucket_cleaned_up: bool = False

        self.endpoint = self.client._endpoint.host

        self.bucket = config['bucket']
        try:
            buckets = self.client.list_buckets()
        except Exception as exc:
            msg: str = "FAILED to list buckets for {0}: {1}"\
                .format(self.endpoint, exc)
            self._report_fatal(msg)

        if self.bucket not in [b['Name'] for b in buckets['Buckets']]:
            try:
                if region_name is not None:
                    self.client.create_bucket(Bucket=self.bucket,
                                              CreateBucketConfiguration={
                                                  'LocationConstraint':
                                                  region_name
                                              })
                else:
                    self.client.create_bucket(Bucket=self.bucket)

            except Exception as exc:
                msg: str = "FAILED to create bucket {0} for {1}: {2}"\
                    .format(self.bucket, self.endpoint, exc)
                self._report_fatal(msg)

        super().__init__(StorageType.storageS3,
                         self.logger,
                         measure_timestamp_diff,
                         compression=compression)