def _get_task_status_from_s3(self):
        task_list = {}
        
        # Check s3 for the status key.
        credentials = {}   # If this is empty, boto will look in environment variables.
        if(hasattr(self.environment, 'aws_access_key') and hasattr(self.environment, 'aws_secret_key')):
            credentials['aws_access_key_id'] = self.environment.aws_access_key
            credentials['aws_secret_access_key'] = self.environment.aws_secret_key
        s3_conn = _connect_s3(**credentials)
        bucket = s3_conn.get_bucket(self.s3_bucket, validate=False)
        k = bucket.get_key(self.s3_status_key)

        try:
            if k:
                jobstatus = k.read()
                jobstatus_blob = jobstatus.split('\n')
                for line in jobstatus_blob:
                    line_split = line.split(":");
                    # spliting the line read, to key value pairs
                    if (len(line_split) > 1):
                        task_list[line_split[0]] = ":".join(line_split[1:]).rstrip('\n')
        except IOError:
            __LOGGER__.warning("Unable to get job status file from s3://%s/%s" % (self.s3_bucket, self.s3_status_key))

        self._task_status = task_list
        return task_list
    def get_status(self):
        # First check if we've already know we've reached a final state/status.
        if self._final_status:
            return self._final_status

        # Check s3 for the status key.
        credentials = {}   # If this is empty, boto will look in environment variables.
        if(hasattr(self.environment, 'aws_access_key') and hasattr(self.environment, 'aws_secret_key')):
            credentials['aws_access_key_id'] = self.environment.aws_access_key
            credentials['aws_secret_access_key'] = self.environment.aws_secret_key
        s3_conn = _connect_s3(**credentials)
        bucket = s3_conn.get_bucket(self.s3_bucket, validate=False)
        k = bucket.get_key(self.s3_state_key)

        # The key only gets deleted on success.
        if not k:
            self._get_task_status_from_s3()
            self._final_status = 'Completed'
            self.save()
            return self._final_status

        status_from_s3 = k.read()
        if status_from_s3 == 'Failed':
            self._get_task_status_from_s3()
            self._final_status = 'Failed'
            self.save()
            return self._final_status

        return status_from_s3
    def run_job(self, job, session=None):
        # Determine which AWS credentials to use. 
        aws_access_key, aws_secret_key = None, None
        if(hasattr(job.environment, 'aws_access_key') and hasattr(job.environment, 'aws_secret_key')):
            aws_access_key = job.environment.aws_access_key
            aws_secret_key = job.environment.aws_secret_key
        else: 
            aws_access_key, aws_secret_key = _get_credentials()
        credentials = {'aws_access_key_id': aws_access_key, 'aws_secret_access_key': aws_secret_key}

        s3_log_folder = job.environment.s3_log_folder_path + job.name + '-' + str(_time.time()) + '/'
        s3_state_key = s3_log_folder + 'status'
        s3_log_key = s3_log_folder + 'log.txt'
        s3_status_key = s3_log_folder + 'jobstatus.txt'

        # Make sure we can write to S3
        try:
            conn = _connect_s3(**credentials)
            bucket = conn.get_bucket(job.environment.s3_bucket, validate=False)
            k = _s3_key(bucket)
            k.key = s3_state_key
            k.set_contents_from_string('Pending')
        except:
            raise RuntimeError("Unable to start job. Could not write to S3 bucket: '%s'"
                               % job.environment.s3_bucket)

        serialized_job_folder = _tempfile.mkdtemp(prefix='graphlab-ec2-job')
        serialized_job_file_path = _os.path.join(serialized_job_folder, 'job')
        job._serialize(serialized_job_file_path)

        commander = Ec2ExecutionEnvironment._start_commander_host(job.environment, credentials)
        post_url = "http://%s:9004/submit" % commander.public_dns_name
        __LOGGER__.debug("Sending %s to %s" % (serialized_job_file_path, post_url))

        # Swallow all logging from the 'requests' module.
        logging.getLogger('requests').setLevel(logging.CRITICAL)

        # Technically the host should be started up now, but more than likely it needs a bit longer
        # before it's really started up.
        MAX_TRIES, current_tries, response = 60, 0, None
        post_params = {'s3_bucket': job.environment.s3_bucket,
                  's3_state_key': s3_state_key, 
                  's3_log_key': s3_log_key,
                  's3_status_key': s3_status_key,
                  'required_packages': job.get_required_packages()
                  }
        while current_tries <  MAX_TRIES:
            with open(serialized_job_file_path, 'rb') as job_file:
                files = {'file': job_file}
                try: 
                    # Try to submit the work.
                    current_tries += 1
                    response = _requests.post(post_url, files=files, params=post_params)
                    break   # Success
                except _requests.exceptions.ConnectionError, e:
                    if current_tries >= MAX_TRIES:
                        commander.stop()
                        raise RuntimeError("Unable to submit job to EC2 instance: '%s'. Please check AWS Console to make sure any EC2 instances launched have been terminated." % commander.instance_id)
            _time.sleep(2)
    def _save_state_to_s3(self):
        # Dump immutable state data to a config
        state = _ConfigParser(allow_no_value=True)
        state.optionxform = str
        state.add_section(PredictiveService._SERVICE_INFO_SECTION_NAME)
        state.set(PredictiveService._SERVICE_INFO_SECTION_NAME, 'Name', self.name)
        state.set(PredictiveService._SERVICE_INFO_SECTION_NAME, 'Description', self._description)
        state.set(PredictiveService._SERVICE_INFO_SECTION_NAME, 'API Key', self._api_key)

        # Save environment, if we have one
        if self._environment:
            state.add_section(PredictiveService._ENVIRONMENT_SECTION_NAME)
            for (key, value) in self._environment._get_state().iteritems():
                state.set(PredictiveService._ENVIRONMENT_SECTION_NAME, key, value)

        # Save deployment version data to config
        state.add_section(PredictiveService._DEPLOYMENT_SECTION_NAME)
        current_predictive_objects = _copy(self._all_predictive_objects)
        for (model_name, info) in current_predictive_objects.iteritems():
            state.set(PredictiveService._DEPLOYMENT_SECTION_NAME, model_name, info['version'])

        state.add_section(PredictiveService._PREDICTIVE_OBJECT_DOCSTRING)
        for (model_name, info) in current_predictive_objects.iteritems():
            state.set(PredictiveService._PREDICTIVE_OBJECT_DOCSTRING, model_name, info['docstring'].encode('string_escape'))

        if self._has_state_changed_on_s3():
            raise IOError("Can not save changes. The Predictive Service has changed on S3. Please "
                          "reload from S3.")

        # Save any new predictive objects to S3.
        for predictive_object_name in self._local_changes:
            (predictive_object, po_info) = self._local_changes[predictive_object_name]
            if predictive_object:         # if this is not a model deletion:
                save_path = self._get_predictive_object_save_path(predictive_object_name, po_info['version'])
                dependency_path = self._get_dependency_save_path(predictive_object_name, po_info['version'])
                predictive_object.save(save_path, dependency_path, self.aws_credentials)

        # Update the revision number after we have successfully written all predictive objects
        self._revision_number += 1
        state.add_section(self._META_SECTION_NAME)
        state.set(self._META_SECTION_NAME, 'Revision Number', self._revision_number)
        state.set(self._META_SECTION_NAME, 'Schema Version', self._schema_version)

        # Write state file to S3
        with _NamedTemporaryFile() as temp_file:
            state.write(temp_file)
            temp_file.flush()
            conn = _connect_s3(**self.aws_credentials)
            bucket = conn.get_bucket(self._s3_bucket_name, validate=False)
            key = _s3_key(bucket)
            key.key = self._s3_state_key
            key.set_contents_from_filename(temp_file.name)
            temp_file.close()  # deletes temp file

        # Update our state
        self._local_changes = {}
        self._predictive_objects = dict(zip(current_predictive_objects.keys(),
            [{'version':info['version'], 'docstring': info['docstring']} for info in current_predictive_objects.values()]))
    def __init__(self, name, s3_state_path, description, api_key, aws_credentials,
                 _new_service = True):
        '''
        Initialize a new Predictive Service object

        Notes
        -----
        Do not call this method directly.

        To create a new Predictive Service, use:
             graphlab.deploy.predictive_service.create(...)

        To load an existing Predictive Service, use
            graphlab.deploy.predictive_service.load(<ps-s3-path>)
        '''
        if type(name) != str:
            raise TypeError("Name of Predictive Service needs to be a string")

        self.name = name
        self._s3_bucket_name, self._s3_key_name = _parse_s3_path(s3_state_path)
        self._s3_state_key = self._s3_key_name + '/state.ini'
        self._description = description
        self._api_key = api_key

        self._local_changes = {}
        self._predictive_objects = {}
        self._s3_state_path = s3_state_path
        self.aws_credentials = aws_credentials
        self._session = _gl.deploy._default_session

        if _new_service:
            # Verify we're not overriding another predictive service.
            bucket = _connect_s3(**self.aws_credentials).get_bucket(self._s3_bucket_name,
                                                                    validate=False)
            key = bucket.get_key(self._s3_state_key)
            if key:
                raise IOError("There is already a Predictive Service at the specified location. Use"
                              " a different S3 path. If you want to load an existing Predictive"
                              " Service, call 'load(...)'.")

            # Init version data
            self._revision_number = 0
            self._schema_version = PREDICTIVE_SERVICE_SCHEMA_VERSION

            # No environment yet. A launched one must be attached later.
            self._environment = None

            # Write init data to S3
            self._save_state_to_s3()
        else:
            # Read version data
            self._update_from_s3()
def _check_aws_credentials(src_credentials, tgt_credentials, source_path):
    # check if credentials are the same
    if src_credentials['aws_access_key_id'] == tgt_credentials['aws_access_key_id'] and \
            src_credentials['aws_secret_access_key'] == tgt_credentials['aws_secret_access_key']:
        return

    # make sure tgt_credentials can be used to access source path
    try:
        conn = _connect_s3(**tgt_credentials)
        (bucket_name, s3_directory) = _file_util.parse_s3_path(source_path)
        bucket = conn.get_bucket(bucket_name)
        key = bucket.get_key(s3_directory)
        if not key:
            raise RuntimeError("Unable to find the key within the S3 bucket. Please check your \
                            aws credentials.")
    except Exception as e:
        raise RuntimeError("Unable to access the correct S3 bucket. Please check your aws credentials.")
    def _get_s3_state_config(s3_bucket_name, s3_key_name, credentials):
        conn = _connect_s3(**credentials)
        bucket = conn.get_bucket(s3_bucket_name, validate=False)
        key = bucket.get_key(s3_key_name)

        if not key:
            raise IOError("No Predictive Service at the specified location.")

        with _NamedTemporaryFile() as temp_file:
            key.get_contents_to_file(temp_file)
            temp_file.flush()
            config = _ConfigParser(allow_no_value=True)
            config.optionxform = str
            config.read(temp_file.name)
            temp_file.close()  # deletes temp file

        return config
Example #8
0
def _check_aws_credentials(src_credentials, tgt_credentials, source_path):
    # check if credentials are the same
    if src_credentials['aws_access_key_id'] == tgt_credentials['aws_access_key_id'] and \
            src_credentials['aws_secret_access_key'] == tgt_credentials['aws_secret_access_key']:
        return

    # make sure tgt_credentials can be used to access source path
    try:
        conn = _connect_s3(**tgt_credentials)
        (bucket_name, s3_directory) = _file_util.parse_s3_path(source_path)
        bucket = conn.get_bucket(bucket_name)
        key = bucket.get_key(s3_directory)
        if not key:
            raise RuntimeError(
                "Unable to find the key within the S3 bucket. Please check your "
                "aws credentials.")
    except Exception as e:
        raise RuntimeError(
            "Unable to access the correct S3 bucket. Please check"
            " your aws credentials. Error: %s" % e)
    def _get_log_file_path(self):
        # return generated url if the url has not expire
        if self.__s3_log_url_expire and self.__s3_log_url:
            delta = _datetime.datetime.now() - self.__s3_log_url_expire
            if delta.total_seconds() < 300:
                return (self.__s3_log_url, self.__s3_log_size)

        # Check s3 for the status key.
        credentials = {}   # If this is empty, boto will look in environment variables.
        if(hasattr(self.environment, 'aws_access_key') and hasattr(self.environment, 'aws_secret_key')):
            credentials['aws_access_key_id'] = self.environment.aws_access_key
            credentials['aws_secret_access_key'] = self.environment.aws_secret_key
        s3_conn = _connect_s3(**credentials)
        bucket = s3_conn.get_bucket(self.s3_bucket, validate=False)
        k = bucket.get_key(self.s3_log_key)

        if not k:
            return "Log file not available on S3."
        else:
            url = k.generate_url(expires_in=300)
            self.__s3_log_url = url
            self.__s3_log_size = k.size
            self.__s3_log_url_expire = _datetime.datetime.now()
            return (url, k.size)