def _get_confusion_matrix_action(self, **kwargs): from tasks import calculate_confusion_matrix parser = reqparse.RequestParser() parser.add_argument('weights', type=str) args = parser.parse_args() test = self._get_details_query(None, **kwargs) if not test: raise NotFound('Test not found') model = Model.query.get(kwargs.get('model_id')) if not model: raise NotFound('Model not found') try: arg = args.get("weights") import json json_weights = json.loads(arg) if "weights_list" not in json_weights or \ not json_weights["weights_list"]: raise ValueError("Weights list is empty") weights = [] for w in json_weights["weights_list"]: if not ("label" in w and "value" in w): raise ValueError("Weights list is incorrect") weights.append((w["label"], float(w["value"]))) calculate_confusion_matrix.delay(test.id, weights) except Exception as e: return self._render({self.OBJECT_NAME: test.id, 'error': e.message}) return self._render({self.OBJECT_NAME: test.id})
def _get_server(self, kwargs): server_id = kwargs.get('server_id') if server_id is None: raise NotFound('Need to specify server_id') server = Server.query.get(server_id) if server is None: raise NotFound(self.MESSAGE404 % kwargs) return server
def _get_pig_fields_action(self, **kwargs): if 'id' not in kwargs: raise ValueError("Specify id of the datasource") ds = self._get_details_query({}, **kwargs) if ds is None: raise NotFound('DataSet not found') fields_data = [] for key, val in ds.pig_row.iteritems(): if isint(val): data_type = 'integer' elif isfloat(val): data_type = 'float' else: data_type = 'string' fields_data.append({'column_name': key, 'data_type': data_type}) from ..utils import XML_FIELD_TEMPLATE xml = "\r\n".join( [XML_FIELD_TEMPLATE % field for field in fields_data]) return self._render({ 'sample_xml': xml, 'fields': fields_data, 'pig_result_line': ds.pig_row, })
def _get_handler(self, handler_id): if handler_id is None: raise ValidationError('Please specify import handler') handler = XmlImportHandler.query.get(handler_id) if handler is None: raise NotFound() return handler
def _get_exports_action(self, **kwargs): test = self._get_details_query(None, **kwargs) if not test: raise NotFound('Test not found') return self._render({self.OBJECT_NAME: test.id, 'exports': test.exports, 'db_exports': test.db_exports})
def _get_pig_fields_action(self, **kwargs): sqoop = self._get_details_query({}, **kwargs) if sqoop is None: raise NotFound(self.MESSAGE404 % kwargs) return self._render({ self.OBJECT_NAME: sqoop.id, 'pig_fields': sqoop.pig_fields })
def _get_sample_data_action(self, **kwargs): ds = self._get_details_query({}, **kwargs) if ds is None: raise NotFound('DataSet not found') if not os.path.exists(ds.filename): raise NotFound('DataSet file cannot be found') _, ext = os.path.splitext(ds.filename) open_fn = gzip.open if ext == '.gz' else open if ext == '' else None if not open_fn: raise ValidationError('DataSet has unknown file type') lines = [] params = self._parse_parameters([('size', int)]) sample_size = params.get('size') or 10 with open_fn(ds.filename, 'rb') as f: line = f.readline() while line and len(lines) < sample_size: lines.append(json.loads(line)) line = f.readline() return self._render(lines)
def _get_script_string_action(self, **kwargs): script = self._get_details_query({}, **kwargs) if script is None: raise NotFound(self.MESSAGE404 % kwargs) try: return self._render({ self.OBJECT_NAME: script.id, 'script_string': script.script_string }) except Exception as e: return odesk_error_response(400, ERR_INVALID_DATA, str(e), e)
def _get_details_query(self, params, **kwargs): ver_example = super(VerificationExampleResource, self).\ _get_details_query(params, **kwargs) if ver_example is None: raise NotFound() if not ver_example.example.is_weights_calculated: ver_example.example.calc_weighted_data() return ver_example
def _get_download_action(self, **kwargs): model = self._get_details_query(None, None, **kwargs) if model is None: raise NotFound(self.MESSAGE404 % kwargs) data = json.dumps(model.to_dict()) resp = Response(data) resp.headers['Content-Type'] = 'text/plain' resp.headers['Content-Disposition'] = \ 'attachment; filename=%s.json' % model.name return resp
def _get_features_download_action(self, **kwargs): model = self._get_details_query(None, **kwargs) if model is None: raise NotFound(self.MESSAGE404 % kwargs) content = model.get_features_json() resp = Response(content) resp.headers['Content-Type'] = 'application/json' resp.headers['Content-Disposition'] = \ 'attachment; filename="%s-features.json"' % model.name return resp
def _get_xml_download_action(self, **kwargs): handler = self._get_details_query(None, **kwargs) if handler is None: raise NotFound(self.MESSAGE404 % kwargs) content = handler.data resp = Response(content) resp.headers['Content-Type'] = 'text/xml' resp.headers['Content-Disposition'] = \ 'attachment; filename="%s-importhandler.xml"' % handler.name return resp
def post(self, action=None, **kwargs): if action == 'get_auth_url': auth_url, oauth_token, oauth_token_secret =\ User.get_auth_url() # TODO: Use redis? # app.db['auth_tokens'].insert({ # 'oauth_token': oauth_token, # 'oauth_token_secret': oauth_token_secret, # }) auth = AuthToken(oauth_token, oauth_token_secret) auth.save() logging.debug("User Auth: oauth token %s added", oauth_token) return self._render({'auth_url': auth_url}) if action == 'authenticate': parser = reqparse.RequestParser() parser.add_argument('oauth_token', type=str) parser.add_argument('oauth_verifier', type=str) params = parser.parse_args() oauth_token = params.get('oauth_token') oauth_verifier = params.get('oauth_verifier') logging.debug("User Auth: trying to authenticate with token %s", oauth_token) # TODO: Use redis? auth = AuthToken.get_auth(oauth_token) if not auth: logging.error('User Auth: token %s not found', oauth_token) return odesk_error_response( 500, 500, 'Wrong token: {0!s}'.format(oauth_token)) oauth_token_secret = auth.get('oauth_token_secret') auth_token, user = User.authenticate(oauth_token, oauth_token_secret, oauth_verifier) logging.debug('User Auth: Removing token %s', oauth_token) AuthToken.delete(auth.get('oauth_token')) return self._render({'auth_token': auth_token, 'user': user}) if action == 'get_user': user = getattr(request, 'user', None) if user: return self._render({'user': user}) return odesk_error_response(401, 401, 'Unauthorized') logging.error('User Auth: invalid action %s', action) raise NotFound('Action not found')
def _get_details_query(self, params, **kwargs): example = super(TestExampleResource, self)._get_details_query( params, **kwargs) if example is None: raise NotFound() fields = self._get_show_fields(params) if 'next' in fields or 'previous' in fields: from sqlalchemy.sql import select, func, text, bindparam from models import db filter_params = kwargs.copy() filter_params.update(self._prepare_filter_params(params)) filter_params.pop('id') sort_by = params.get('sort_by', None) or 'id' is_desc = params.get('order', None) == 'desc' fields_to_select = [TestExample.id] # TODO: simplify query with specifying WINDOW w if 'previous' in fields: fields_to_select.append( func.lag(TestExample.id).over( order_by=[sort_by, 'id']).label('prev')) if 'next' in fields: fields_to_select.append( func.lead(TestExample.id).over( order_by=[sort_by, 'id']).label('next')) tbl = select(fields_to_select) for name, val in filter_params.iteritems(): if '->>' in name: # TODO: refactor this try: splitted = name.split('->>') name = "%s->>'%s'" % (splitted[0], splitted[1]) except: logging.warning('Invalid GET param %s', name) tbl.append_whereclause("%s='%s'" % (name, val)) tbl = tbl.cte('tbl') select1 = select(['id', 'prev', 'next']).where( tbl.c.id == kwargs['id']) res = db.engine.execute(select1, id_1=kwargs['id']) id_, example.previous, example.next = res.fetchone() if not example.is_weights_calculated: example.calc_weighted_data() example = super(TestExampleResource, self)._get_details_query( params, **kwargs) return example
def _put_transformers_download_action(self, **kwargs): model = self._get_details_query(None, **kwargs) if model is None: raise NotFound('Model not found') if model.status != Model.STATUS_TRAINED: return odesk_error_response(405, ERR_INVALID_METHOD, 'Model is not trained') form = TransformersDownloadForm(obj=model) if not form.is_valid(): return segment = form.cleaned_data['segment'] segment_obj = Segment.query.filter(Segment.model_id == model.id)\ .filter(Segment.name == segment).all() if not len(segment_obj): raise NotFound('Segment not found in trained model') data_format = form.cleaned_data['data_format'] from api.ml_models.tasks import upload_segment_features_transformers upload_segment_features_transformers.delay(model.id, segment_obj[0].id, data_format) return self._render({})
def _put_pig_fields_action(self, **kwargs): sqoop = self._get_details_query({}, **kwargs) if sqoop is None: raise NotFound(self.MESSAGE404 % kwargs) from ..forms import LoadPigFieldsForm form = LoadPigFieldsForm(obj={}) if form.is_valid(): from api.import_handlers.tasks import load_pig_fields params = form.cleaned_data.get('params') load_pig_fields.delay(sqoop.id, params) return self._render({ 'result': "Generating pig fields delayed " "(link will appear in sqoop section)" }) return odesk_error_response(400, 400, 'Parameters are invalid')
def _put_dataset_download_action(self, **kwargs): model = self._get_details_query(None, **kwargs) if model is None: raise NotFound('Model not found') if model.status != Model.STATUS_TRAINED: return odesk_error_response(400, ERR_INVALID_DATA, 'Model is not trained') form = TransformDataSetForm(obj=model) if not form.is_valid(): return dataset = form.cleaned_data['dataset'] from api.ml_models.tasks import transform_dataset_for_download transform_dataset_for_download.delay(model.id, dataset.id) return self._render({})
def _get_dataset_download_action(self, **kwargs): model = self._get_details_query(None, **kwargs) if model is None: raise NotFound('Model not found') from api.tasks import TRANSFORM_DATASET_TASK tasks = AsyncTask.get_current_by_object(model, TRANSFORM_DATASET_TASK) downloads = [] for task in tasks: downloads.append({ 'dataset': DataSet.query.get(task.args[1]), 'task': task }) return self._render({ self.OBJECT_NAME: model.id, 'downloads': downloads })
def _put_csv_task_action(self, model_id, test_result_id): """ Schedules a task to generate examples in CSV format """ test = TestResult.query.get(test_result_id) if not test: raise NotFound('Test not found') form = SelectFieldsForCSVForm(obj=test) if form.is_valid(): fields = form.cleaned_data['fields'] if isinstance(fields, list) and len(fields) > 0: from tasks import get_csv_results logging.info('Download examples in csv') get_csv_results.delay(test.model_id, test.id, fields) return self._render({}) return odesk_error_response(400, ERR_INVALID_DATA, 'Fields of the CSV export is required')
def _put_run_sql_action(self, **kwargs): """ Run sql query for testing """ from api.import_handlers.forms import QueryTestForm model = self._get_details_query({}, **kwargs) if model is None: raise NotFound(self.MESSAGE404 % kwargs) form = QueryTestForm(obj={}) if not form.is_valid(): return self._render({'error': form.error_messages}) sql = form.cleaned_data['sql'] limit = form.cleaned_data['limit'] params = form.cleaned_data.get('params', {}) datasource_name = form.cleaned_data['datasource'] try: sql = re.sub('#{(\w+)}', '%(\\1)s', sql) sql = sql % params except (KeyError, ValueError) as e: return odesk_error_response(400, ERR_INVALID_DATA, 'Wrong query parameters', e) try: model.check_sql(sql) except Exception as e: return odesk_error_response(400, ERR_INVALID_DATA, str(e), e) # Change query LIMIT sql = model.build_query(sql, limit=limit) try: data = list(model.execute_sql_iter(sql, datasource_name)) except DatabaseError as e: return odesk_error_response(400, ERR_INVALID_DATA, str(e), e) columns = [] if len(data) > 0: columns = data[0].keys() return self._render({'data': data, 'columns': columns, 'sql': sql})
def _get_weights_download_action(self, **kwargs): model = self._get_details_query(None, **kwargs) if model is None: raise NotFound(self.MESSAGE404 % kwargs) if model.status != model.STATUS_TRAINED: raise ValidationError('Model should be trained') if not model.weights_synchronized: raise ValidationError('Model weights should be synchronized') trainer = model.get_trainer(force_load=True) result = {} for segment in model.segments: result[segment.name] = trainer.get_weights(segment.name) content = json.dumps(result) resp = Response(content) resp.headers['Content-Type'] = 'application/json' resp.headers['Content-Disposition'] = \ 'attachment; filename=%s-weights.json' % model.name return resp
def _put_db_task_action(self, model_id, test_result_id): """ Schedules a task to export examples to the specified DB """ test = TestResult.query.get(test_result_id) if not test: raise NotFound('Test not found') form = ExportToDbForm(obj=test) if form.is_valid(): fields = form.cleaned_data['fields'] datasource = form.cleaned_data['datasource'] tablename = form.cleaned_data['tablename'] if isinstance(fields, list) and len(fields) > 0: from tasks import export_results_to_db logging.info('Export examples to db') export_results_to_db.delay( test.model_id, test.id, datasource.id, tablename, fields) return self._render({}) return odesk_error_response(400, ERR_INVALID_DATA, 'Fields of the DB export is required')
def _get_transformers_download_action(self, **kwargs): model = self._get_details_query(None, **kwargs) if model is None: raise NotFound('Model not found') from api.tasks import TRANSFORMERS_UPLOAD_TASK tasks = [] for segment in model.segments: tasks.extend( AsyncTask.get_current_by_object(segment, TRANSFORMERS_UPLOAD_TASK)) downloads = [] for task in sorted(tasks, key=lambda x: x.created_on, reverse=True): downloads.append({ 'segment': Segment.query.get(task.args[1]), 'task': task }) return self._render({ self.OBJECT_NAME: model.id, 'downloads': downloads })
def _get_folder(self, kwargs): folder = kwargs.get('folder') if folder not in self.ALLOWED_FOLDERS: raise NotFound(self.MESSAGE404 % kwargs) return folder
def _get_generate_url_action(self, **kwargs): ds = self._get_details_query({}, **kwargs) if ds is None: raise NotFound('DataSet not found') url = ds.get_s3_download_url() return self._render({self.OBJECT_NAME: ds.id, 'url': url})
def _get_trainer_download_s3url_action(self, **kwargs): entity = self._get_details_query(None, **kwargs) if entity is None: raise NotFound(self.MESSAGE404 % kwargs) url = entity.get_trainer_s3url() return self._render({'trainer_file_for': entity.id, 'url': url})
def _get_uid(self, kwargs): uid = kwargs.get('id') if uid is None: raise NotFound(self.MESSAGE404 % kwargs) return uid