def wait_for_object_status(self, get_status, progress, object_readable_name, post_check_status=None, log_status=None): def _log_status(obj_status): pass log_status = log_status if log_status else _log_status status = get_status() last_status = '' while status in progress: if status != last_status: last_status = status log_status(status) while status == last_status: time.sleep(STATE_POLL_INTERVAL) status = get_status() if status == 'processed_with_error': raise AugerException( '%s processed with error' % object_readable_name) elif status == 'error' or status == "failure": raise AugerException('Auger Cloud return error...') if post_check_status: post_check_status(status) return status
def get_experiment_settings(self): config = self.ctx.get_config('config') auger_config = self.ctx.get_config('auger') model_type = config.get('model_type', '') if not model_type in MODEL_TYPES: raise AugerException('Model type should be %s' % \ '|'.join(MODEL_TYPES)) target = config.get('target', '') exclude = config.get('exclude', []) options = { 'targetFeature': None, 'featureColumns': [], 'categoricalFeatures': [], 'timeSeriesFeatures': [], 'binaryClassification': False, 'labelEncodingFeatures': auger_config.get('experiment/label_encoded', []), 'crossValidationFolds': auger_config.get('experiment/cross_validation_folds', 5), 'max_total_time_mins': auger_config.get('experiment/max_total_time', 60), 'max_eval_time_mins': auger_config.get('experiment/max_eval_time', 1), 'max_n_trials': auger_config.get('experiment/max_n_trials', 1000), 'use_ensemble': auger_config.get('experiment/use_ensemble', True), 'classification': True if model_type == 'classification' else False, 'scoring': auger_config.get('experiment/metric', 'f1_macro' if model_type == 'classification' else 'r2') } data_set_id = self.properties()['project_file_id'] data_set_api = AugerDataSetApi( self.ctx, self.parent_api, None, data_set_id) data_set_properties = data_set_api.properties() stats = data_set_properties['statistics'] self._fill_data_options(options, stats, target, exclude) if options['targetFeature'] is None: raise AugerException('Please set target to build model.') if model_type is not 'timeseries': options['timeSeriesFeatures'] = [] else: time_series = auger_config.get('experiment/time_series', None) if time_series: options['timeSeriesFeatures'] = [time_series] if len(options['timeSeriesFeatures']) != 1: raise AugerException('Please select time series feature' ' to build time series model' ' (experiment/time_series option).') return {'evaluation_options': options}, model_type
def predict(self, records, features, threshold=None): if self.object_id is None: raise AugerException('Please provide Auger Pipeline id') if self.properties().get('status') != 'ready': raise AugerException("Pipeline %s is not ready or has issues..." % self.object_id) prediction_api = AugerPredictionApi(self.ctx, self) prediction_properties = \ prediction_api.create(records, features, threshold) return prediction_properties.get('result')
def evaluate(self): # verify avalability of auger credentials self.credentials.verify() experiment_session_id = self.ctx.config['auger'].get( 'experiment/experiment_session_id') if experiment_session_id is None: raise AugerException( 'Can\'t find previously run experiments' ' (auger.yaml/experiment/experiment_session_id option).') experiment_session_api = AugerExperimentSessionApi( self.ctx, None, None, experiment_session_id) print_table(self.ctx.log, experiment_session_api.get_leaderboard()) status = experiment_session_api.properties().get('status') messages = { 'preprocess': 'Search is preprocessing data for traing...', 'started': 'Search is in progress...', 'completed': 'Search is completed.' } message = messages.get(status, None) if message: self.ctx.log(message) else: self.ctx.log('Search status is %s' % status)
def _docker_run_predict(self, filename, threshold, model_path): cluster_settings = AugerClusterApi.get_cluster_settings(self.ctx) docker_tag = cluster_settings.get('kubernetes_stack') result_file = os.path.basename(filename) data_path = os.path.dirname(filename) call_args = "--path_to_predict=./model_data/%s %s" % \ (result_file, "--threshold=%s" % str(threshold) if threshold else '') command = (r"docker run " "-v {model_path}:/var/src/auger-ml-worker/exported_model " "-v {data_path}:/var/src/auger-ml-worker/model_data " "deeplearninc/auger-ml-worker:{docker_tag} " "python ./exported_model/client.py {call_args}").format( model_path=model_path, data_path=data_path, docker_tag=docker_tag, call_args=call_args) try: self.ctx.log( 'Running model in deeplearninc/' 'auger-ml-worker:%s' % docker_tag) subprocess.check_call( command, stderr=subprocess.STDOUT, shell=True) except subprocess.CalledProcessError as e: raise AugerException('Error running Docker container...') return os.path.join(data_path, os.path.splitext(result_file)[0] + "_predicted.csv")
def call(self, method, params={}): result = self.call_ex(method, params) if 'data' in result: return result['data'] raise AugerException("Call of Auger API method %s failed." % keys)
def _ensure_object_id(self): if self.object_id is None: properties = self.properties() if properties is not None: self.object_id = properties.get('id') else: raise AugerException('Can\'t find remote %s: %s...' % \ (self._get_readable_name(), self.object_name)) return self.object_id
def name(self): if self.object_name is None: properties = self.properties() if properties is None: raise AugerException( 'Can\'t find name for remote %s: %s...' % \ (self._get_readable_name(), self.object_id)) self.object_name = properties.get('name') return self.object_name
def verify(data_source_file): if urllib.parse.urlparse(data_source_file).scheme in ['http', 'https']: return data_source_file, False data_source_file = os.path.abspath( os.path.join(os.getcwd(), data_source_file)) filename, file_extension = os.path.splitext(data_source_file) if not file_extension in SUPPORTED_FORMATS: raise AugerException( 'Source file has to be one of the supported fomats: %s' % ', '.join(SUPPORTED_FORMATS)) if not os.path.isfile(data_source_file): raise AugerException( 'Can\'t find file to import: %s' % data_source_file) return data_source_file, True
def _upload_file(self, file_name, url): with open(file_name, 'rb') as f: r = requests.post(url, data=f) if r.status_code == 200: rp = urllib.parse.parse_qs(r.text) return ('files/%s' % rp.get('path')[0].split('files/')[-1]) else: raise AugerException( 'HTTP error [%s] while uploading file to Auger Cloud...' % r.status_code)
def download(self, url, path_to_download, trial_id): if self.object_id is None: raise AugerException('Please provide Auger Pipeline File id') if not os.path.exists(path_to_download): os.makedirs(path_to_download) basename = os.path.basename( urllib.parse.urlparse(url).path).replace('export_','model-') file_name = os.path.join(path_to_download, basename) urllib.request.urlretrieve(url, file_name) return file_name
def login(self, username, password, organisation, url): rest_api = RestApi(url, None) res = rest_api.call_ex('create_token', { 'email': username, 'password': password }) self.ctx.rest_api = RestApi(url, res['data']['token']) org_api = AugerOrganizationApi(self.ctx, organisation) if org_api.properties() == None: raise AugerException('Auger Organization %s doesn\'t exist' % organisation) return res['data']['token']
def _docker_pull_image(self): cluster_settings = AugerClusterApi.get_cluster_settings(self.ctx) docker_tag = cluster_settings.get('kubernetes_stack') try: subprocess.check_call( 'docker pull deeplearninc/auger-ml-worker:%s' % \ docker_tag, shell=True) except subprocess.CalledProcessError as e: raise AugerException('Can\'t pull Docker container...') return docker_tag
def _upload_to_multi_tenant(self, file_to_upload): file_path = 'workspace/projects/%s/files/%s-%s' % \ (self.parent_api.object_name, shortuuid.uuid(), os.path.basename(file_to_upload)) res = self.rest_api.call('create_project_file_url', { 'project_id': self.parent_api.object_id, 'file_path': file_path}) if res is None: raise AugerException( 'Error while uploading file to Auger Cloud...') url = res['url'] with open(file_to_upload, 'rb') as f: files = {'file': (file_path, f)} res = requests.post(url, data=res['fields'], files=files) if res.status_code == 201 or res.status_code == 200: bucket = urllib.parse.urlparse(url).netloc.split('.')[0] return 's3://%s/%s' % (bucket, file_path) else: raise AugerException( 'HTTP error [%s] while uploading file' ' to Auger Cloud...' % res.status_code)
def properties(self): if self.object_id is not None: return self.rest_api.call('get_%s' % self.api_request_path, {'id': self.object_id}) if self.object_name is None: raise AugerException( 'No name or id was specified for %s' % \ self._get_readable_name()) alt_name = self.object_name.replace('_', '-') for item in iter(self.list()): if item['name'] in [self.object_name, alt_name]: self.object_id = item.get('id') return item return None
def request_list(self, record_type, params): offset = params.get('offset', 0) limit = params.get('limit', REQUEST_LIMIT) p = params.copy() while limit > 0: p['offset'] = offset p['limit'] = limit response = self.call_ex('get_' + record_type, p) if not 'data' in response or not 'meta' in response: raise AugerException("Read list of %s failed." % record_type) for item in response['data']: yield item received = len(response['data']) offset += received limit -= received if offset >= response['meta']['pagination']['total']: break
def _predict_locally(self, filename, model_id, threshold): is_model_loaded, model_path, model_name = \ AugerDeploy.verify_local_model(model_id) if not is_model_loaded: raise AugerException('Model isn\'t loaded locally. ' 'Please use a2ml depoly command to download model.') model_path, model_existed = self._exstract_model(model_name) try: predicted = \ self._docker_run_predict(filename, threshold, model_path) finally: # clean up unzipped model # if it wasn't unzipped before if not model_existed: shutil.rmtree(model_path, ignore_errors=True) return predicted
def train(self): # verify avalability of auger credentials self.credentials.verify() self.start_project() data_set_name = self.ctx.config['auger'].get('dataset') if data_set_name is None: raise AugerException( 'Plese specify DataSet name in auger.yaml/dataset') experiment_api = AugerExperimentApi(self.ctx, self.project_api) experiment_api.create(data_set_name) self.ctx.log('Created Experiment %s ' % experiment_api.object_name) experiment_session_id = experiment_api.run() self.ctx.log('Started Experiment %s training.' % experiment_api.object_name) AugerConfig(self.ctx).set_experiment(experiment_api.object_name, experiment_session_id)
def create(self, data_source_file, data_set_name=None): data_source_file, local_data_source = \ AugerDataSetApi.verify(data_source_file) if local_data_source: file_url = self._upload_to_cloud(data_source_file) file_name = os.path.basename(data_source_file) if data_set_name: self.object_name = data_set_name else: self.object_name = self._get_data_set_name(file_name) else: file_url = data_source_file url_path = urllib.parse.urlparse(file_url).path file_name = os.path.basename(url_path) self.object_name = file_name try: return super().create(file_url, file_name) except Exception as exc: if 'en.errors.project_file.url_not_uniq' in str(exc): raise AugerException( 'DataSet already exists for %s' % file_url) raise exc
def delete(self): raise AugerException( 'You could\'t delete organization using Auger Cloud API.' ' Please use Auger UI to do that...')