Exemple #1
0
    def wait_for_object_status(self,
        get_status, progress, object_readable_name,
        post_check_status=None, log_status=None):

        def _log_status(obj_status): pass
        log_status  = log_status if log_status else _log_status

        status = get_status()
        last_status = ''

        while status in progress:
            if status != last_status:
                last_status = status
                log_status(status)

            while status == last_status:
                time.sleep(STATE_POLL_INTERVAL)
                status = get_status()

        if status == 'processed_with_error':
            raise AugerException(
                '%s processed with error' % object_readable_name)
        elif status == 'error' or status == "failure":
            raise AugerException('Auger Cloud return error...')

        if post_check_status:
            post_check_status(status)

        return status
Exemple #2
0
    def get_experiment_settings(self):
        config = self.ctx.get_config('config')
        auger_config = self.ctx.get_config('auger')

        model_type = config.get('model_type', '')
        if not model_type in MODEL_TYPES:
            raise AugerException('Model type should be %s' % \
                '|'.join(MODEL_TYPES))
        target = config.get('target', '')
        exclude = config.get('exclude', [])

        options = {
            'targetFeature': None,
            'featureColumns': [],
            'categoricalFeatures': [],
            'timeSeriesFeatures': [],
            'binaryClassification': False,
            'labelEncodingFeatures':
                auger_config.get('experiment/label_encoded', []),
            'crossValidationFolds':
                auger_config.get('experiment/cross_validation_folds', 5),
            'max_total_time_mins':
                auger_config.get('experiment/max_total_time', 60),
            'max_eval_time_mins':
                auger_config.get('experiment/max_eval_time', 1),
            'max_n_trials':
                auger_config.get('experiment/max_n_trials', 1000),
            'use_ensemble':
                auger_config.get('experiment/use_ensemble', True),
            'classification':
                True if model_type == 'classification' else False,
            'scoring':
                auger_config.get('experiment/metric',
                    'f1_macro' if model_type == 'classification' else 'r2')
        }

        data_set_id = self.properties()['project_file_id']
        data_set_api = AugerDataSetApi(
            self.ctx, self.parent_api, None, data_set_id)
        data_set_properties = data_set_api.properties()
        stats = data_set_properties['statistics']

        self._fill_data_options(options, stats, target, exclude)

        if options['targetFeature'] is None:
            raise AugerException('Please set target to build model.')

        if model_type is not 'timeseries':
            options['timeSeriesFeatures'] = []
        else:
            time_series = auger_config.get('experiment/time_series', None)
            if time_series:
                options['timeSeriesFeatures'] = [time_series]
            if len(options['timeSeriesFeatures']) != 1:
                raise AugerException('Please select time series feature'
                    ' to build time series model'
                    ' (experiment/time_series option).')

        return {'evaluation_options': options}, model_type
Exemple #3
0
    def predict(self, records, features, threshold=None):
        if self.object_id is None:
            raise AugerException('Please provide Auger Pipeline id')

        if self.properties().get('status') != 'ready':
            raise AugerException("Pipeline %s is not ready or has issues..." %
                                 self.object_id)

        prediction_api = AugerPredictionApi(self.ctx, self)
        prediction_properties = \
            prediction_api.create(records, features, threshold)

        return prediction_properties.get('result')
Exemple #4
0
    def evaluate(self):
        # verify avalability of auger credentials
        self.credentials.verify()

        experiment_session_id = self.ctx.config['auger'].get(
            'experiment/experiment_session_id')
        if experiment_session_id is None:
            raise AugerException(
                'Can\'t find previously run experiments'
                ' (auger.yaml/experiment/experiment_session_id option).')

        experiment_session_api = AugerExperimentSessionApi(
            self.ctx, None, None, experiment_session_id)
        print_table(self.ctx.log, experiment_session_api.get_leaderboard())

        status = experiment_session_api.properties().get('status')
        messages = {
            'preprocess': 'Search is preprocessing data for traing...',
            'started': 'Search is in progress...',
            'completed': 'Search is completed.'
        }
        message = messages.get(status, None)
        if message:
            self.ctx.log(message)
        else:
            self.ctx.log('Search status is %s' % status)
Exemple #5
0
    def _docker_run_predict(self, filename, threshold, model_path):
        cluster_settings = AugerClusterApi.get_cluster_settings(self.ctx)
        docker_tag = cluster_settings.get('kubernetes_stack')
        result_file = os.path.basename(filename)
        data_path = os.path.dirname(filename)

        call_args = "--path_to_predict=./model_data/%s %s" % \
            (result_file, "--threshold=%s" % str(threshold) if threshold else '')

        command = (r"docker run "
            "-v {model_path}:/var/src/auger-ml-worker/exported_model "
            "-v {data_path}:/var/src/auger-ml-worker/model_data "
            "deeplearninc/auger-ml-worker:{docker_tag} "
            "python ./exported_model/client.py {call_args}").format(
                model_path=model_path, data_path=data_path,
                docker_tag=docker_tag, call_args=call_args)

        try:
            self.ctx.log(
                'Running model in deeplearninc/'
                'auger-ml-worker:%s' % docker_tag)
            subprocess.check_call(
                command, stderr=subprocess.STDOUT, shell=True)
        except subprocess.CalledProcessError as e:
            raise AugerException('Error running Docker container...')

        return os.path.join(data_path,
            os.path.splitext(result_file)[0] + "_predicted.csv")
Exemple #6
0
    def call(self, method, params={}):
        result = self.call_ex(method, params)

        if 'data' in result:
            return result['data']

        raise AugerException("Call of Auger API method %s failed." % keys)
Exemple #7
0
 def _ensure_object_id(self):
     if self.object_id is None:
         properties = self.properties()
         if properties is not None:
             self.object_id = properties.get('id')
         else:
             raise AugerException('Can\'t find remote %s: %s...' % \
                 (self._get_readable_name(), self.object_name))
     return self.object_id
Exemple #8
0
 def name(self):
     if self.object_name is None:
         properties = self.properties()
         if properties is None:
             raise AugerException(
                 'Can\'t find name for remote %s: %s...' % \
                 (self._get_readable_name(), self.object_id))
         self.object_name = properties.get('name')
     return self.object_name
Exemple #9
0
    def verify(data_source_file):
        if urllib.parse.urlparse(data_source_file).scheme in ['http', 'https']:
            return data_source_file, False

        data_source_file = os.path.abspath(
            os.path.join(os.getcwd(), data_source_file))

        filename, file_extension = os.path.splitext(data_source_file)
        if not file_extension in SUPPORTED_FORMATS:
            raise AugerException(
                'Source file has to be one of the supported fomats: %s' %
                ', '.join(SUPPORTED_FORMATS))

        if not os.path.isfile(data_source_file):
            raise AugerException(
                'Can\'t find file to import: %s' % data_source_file)

        return data_source_file, True
Exemple #10
0
    def _upload_file(self, file_name, url):
        with open(file_name, 'rb') as f:
            r = requests.post(url, data=f)

        if r.status_code == 200:
            rp = urllib.parse.parse_qs(r.text)
            return ('files/%s' % rp.get('path')[0].split('files/')[-1])
        else:
            raise AugerException(
                'HTTP error [%s] while uploading file to Auger Cloud...' % r.status_code)
Exemple #11
0
    def download(self, url, path_to_download, trial_id):
        if self.object_id is None:
            raise AugerException('Please provide Auger Pipeline File id')

        if not os.path.exists(path_to_download):
            os.makedirs(path_to_download)
        basename = os.path.basename(
            urllib.parse.urlparse(url).path).replace('export_','model-')
        file_name = os.path.join(path_to_download, basename)
        urllib.request.urlretrieve(url, file_name)

        return file_name
Exemple #12
0
 def login(self, username, password, organisation, url):
     rest_api = RestApi(url, None)
     res = rest_api.call_ex('create_token', {
         'email': username,
         'password': password
     })
     self.ctx.rest_api = RestApi(url, res['data']['token'])
     org_api = AugerOrganizationApi(self.ctx, organisation)
     if org_api.properties() == None:
         raise AugerException('Auger Organization %s doesn\'t exist' %
                              organisation)
     return res['data']['token']
Exemple #13
0
    def _docker_pull_image(self):
        cluster_settings = AugerClusterApi.get_cluster_settings(self.ctx)
        docker_tag = cluster_settings.get('kubernetes_stack')

        try:
            subprocess.check_call(
                'docker pull deeplearninc/auger-ml-worker:%s' % \
                 docker_tag, shell=True)
        except subprocess.CalledProcessError as e:
            raise AugerException('Can\'t pull Docker container...')

        return docker_tag
Exemple #14
0
    def _upload_to_multi_tenant(self, file_to_upload):
        file_path = 'workspace/projects/%s/files/%s-%s' % \
            (self.parent_api.object_name, shortuuid.uuid(),
             os.path.basename(file_to_upload))

        res = self.rest_api.call('create_project_file_url', {
            'project_id': self.parent_api.object_id,
            'file_path': file_path})
        if res is None:
            raise AugerException(
                'Error while uploading file to Auger Cloud...')

        url = res['url']
        with open(file_to_upload, 'rb') as f:
            files = {'file': (file_path, f)}
            res = requests.post(url, data=res['fields'], files=files)

        if res.status_code == 201 or res.status_code == 200:
            bucket = urllib.parse.urlparse(url).netloc.split('.')[0]
            return 's3://%s/%s' % (bucket, file_path)
        else:
            raise AugerException(
                'HTTP error [%s] while uploading file'
                    ' to Auger Cloud...' % res.status_code)
Exemple #15
0
    def properties(self):
        if self.object_id is not None:
            return self.rest_api.call('get_%s' % self.api_request_path,
                                      {'id': self.object_id})

        if self.object_name is None:
            raise AugerException(
                'No name or id was specified for %s' % \
                self._get_readable_name())

        alt_name = self.object_name.replace('_', '-')
        for item in iter(self.list()):
            if item['name'] in [self.object_name, alt_name]:
                self.object_id = item.get('id')
                return item

        return None
Exemple #16
0
    def request_list(self, record_type, params):
        offset = params.get('offset', 0)
        limit = params.get('limit', REQUEST_LIMIT)
        p = params.copy()
        while limit > 0:
            p['offset'] = offset
            p['limit'] = limit
            response = self.call_ex('get_' + record_type, p)
            if not 'data' in response or not 'meta' in response:
                raise AugerException("Read list of %s failed." % record_type)

            for item in response['data']:
                yield item

            received = len(response['data'])
            offset += received
            limit -= received
            if offset >= response['meta']['pagination']['total']:
                break
Exemple #17
0
    def _predict_locally(self, filename, model_id, threshold):
        is_model_loaded, model_path, model_name = \
            AugerDeploy.verify_local_model(model_id)

        if not is_model_loaded:
            raise AugerException('Model isn\'t loaded locally. '
                'Please use a2ml depoly command to download model.')

        model_path, model_existed = self._exstract_model(model_name)

        try:
            predicted = \
                self._docker_run_predict(filename, threshold, model_path)
        finally:
            # clean up unzipped model
            # if it wasn't unzipped before
            if not model_existed:
                shutil.rmtree(model_path, ignore_errors=True)

        return predicted
Exemple #18
0
    def train(self):
        # verify avalability of auger credentials
        self.credentials.verify()

        self.start_project()

        data_set_name = self.ctx.config['auger'].get('dataset')
        if data_set_name is None:
            raise AugerException(
                'Plese specify DataSet name in auger.yaml/dataset')

        experiment_api = AugerExperimentApi(self.ctx, self.project_api)
        experiment_api.create(data_set_name)
        self.ctx.log('Created Experiment %s ' % experiment_api.object_name)

        experiment_session_id = experiment_api.run()
        self.ctx.log('Started Experiment %s training.' %
                     experiment_api.object_name)

        AugerConfig(self.ctx).set_experiment(experiment_api.object_name,
                                             experiment_session_id)
Exemple #19
0
    def create(self, data_source_file, data_set_name=None):
        data_source_file, local_data_source = \
            AugerDataSetApi.verify(data_source_file)

        if local_data_source:
            file_url = self._upload_to_cloud(data_source_file)
            file_name = os.path.basename(data_source_file)
            if data_set_name:
                self.object_name = data_set_name
            else:
                self.object_name = self._get_data_set_name(file_name)
        else:
            file_url = data_source_file
            url_path = urllib.parse.urlparse(file_url).path
            file_name = os.path.basename(url_path)
            self.object_name = file_name

        try:
            return super().create(file_url, file_name)
        except Exception as exc:
            if 'en.errors.project_file.url_not_uniq' in str(exc):
                raise AugerException(
                    'DataSet already exists for %s' % file_url)
            raise exc
Exemple #20
0
 def delete(self):
     raise AugerException(
         'You could\'t delete organization using Auger Cloud API.'
         ' Please use Auger UI to do that...')