Beispiel #1
0
    def _get_confusion_matrix_action(self, **kwargs):
        from tasks import calculate_confusion_matrix

        parser = reqparse.RequestParser()
        parser.add_argument('weights', type=str)
        args = parser.parse_args()

        test = self._get_details_query(None, **kwargs)
        if not test:
            raise NotFound('Test not found')

        model = Model.query.get(kwargs.get('model_id'))
        if not model:
            raise NotFound('Model not found')

        try:
            arg = args.get("weights")
            import json
            json_weights = json.loads(arg)
            if "weights_list" not in json_weights or \
                    not json_weights["weights_list"]:
                raise ValueError("Weights list is empty")

            weights = []
            for w in json_weights["weights_list"]:
                if not ("label" in w and "value" in w):
                    raise ValueError("Weights list is incorrect")
                weights.append((w["label"], float(w["value"])))
            calculate_confusion_matrix.delay(test.id, weights)

        except Exception as e:
            return self._render({self.OBJECT_NAME: test.id,
                                 'error': e.message})

        return self._render({self.OBJECT_NAME: test.id})
Beispiel #2
0
    def _get_server(self, kwargs):
        server_id = kwargs.get('server_id')
        if server_id is None:
            raise NotFound('Need to specify server_id')

        server = Server.query.get(server_id)
        if server is None:
            raise NotFound(self.MESSAGE404 % kwargs)
        return server
Beispiel #3
0
    def _get_pig_fields_action(self, **kwargs):
        if 'id' not in kwargs:
            raise ValueError("Specify id of the datasource")

        ds = self._get_details_query({}, **kwargs)
        if ds is None:
            raise NotFound('DataSet not found')

        fields_data = []

        for key, val in ds.pig_row.iteritems():
            if isint(val):
                data_type = 'integer'
            elif isfloat(val):
                data_type = 'float'
            else:
                data_type = 'string'
            fields_data.append({'column_name': key, 'data_type': data_type})

        from ..utils import XML_FIELD_TEMPLATE
        xml = "\r\n".join(
            [XML_FIELD_TEMPLATE % field for field in fields_data])
        return self._render({
            'sample_xml': xml,
            'fields': fields_data,
            'pig_result_line': ds.pig_row,
        })
Beispiel #4
0
    def _get_handler(self, handler_id):
        if handler_id is None:
            raise ValidationError('Please specify import handler')

        handler = XmlImportHandler.query.get(handler_id)
        if handler is None:
            raise NotFound()
        return handler
Beispiel #5
0
    def _get_exports_action(self, **kwargs):
        test = self._get_details_query(None, **kwargs)
        if not test:
            raise NotFound('Test not found')

        return self._render({self.OBJECT_NAME: test.id,
                             'exports': test.exports,
                             'db_exports': test.db_exports})
Beispiel #6
0
    def _get_pig_fields_action(self, **kwargs):
        sqoop = self._get_details_query({}, **kwargs)
        if sqoop is None:
            raise NotFound(self.MESSAGE404 % kwargs)

        return self._render({
            self.OBJECT_NAME: sqoop.id,
            'pig_fields': sqoop.pig_fields
        })
Beispiel #7
0
 def _get_sample_data_action(self, **kwargs):
     ds = self._get_details_query({}, **kwargs)
     if ds is None:
         raise NotFound('DataSet not found')
     if not os.path.exists(ds.filename):
         raise NotFound('DataSet file cannot be found')
     _, ext = os.path.splitext(ds.filename)
     open_fn = gzip.open if ext == '.gz' else open if ext == '' else None
     if not open_fn:
         raise ValidationError('DataSet has unknown file type')
     lines = []
     params = self._parse_parameters([('size', int)])
     sample_size = params.get('size') or 10
     with open_fn(ds.filename, 'rb') as f:
         line = f.readline()
         while line and len(lines) < sample_size:
             lines.append(json.loads(line))
             line = f.readline()
     return self._render(lines)
Beispiel #8
0
 def _get_script_string_action(self, **kwargs):
     script = self._get_details_query({}, **kwargs)
     if script is None:
         raise NotFound(self.MESSAGE404 % kwargs)
     try:
         return self._render({
             self.OBJECT_NAME: script.id,
             'script_string': script.script_string
         })
     except Exception as e:
         return odesk_error_response(400, ERR_INVALID_DATA, str(e), e)
Beispiel #9
0
    def _get_details_query(self, params, **kwargs):
        ver_example = super(VerificationExampleResource, self).\
            _get_details_query(params, **kwargs)

        if ver_example is None:
            raise NotFound()

        if not ver_example.example.is_weights_calculated:
            ver_example.example.calc_weighted_data()

        return ver_example
Beispiel #10
0
    def _get_download_action(self, **kwargs):
        model = self._get_details_query(None, None, **kwargs)
        if model is None:
            raise NotFound(self.MESSAGE404 % kwargs)

        data = json.dumps(model.to_dict())
        resp = Response(data)
        resp.headers['Content-Type'] = 'text/plain'
        resp.headers['Content-Disposition'] = \
            'attachment; filename=%s.json' % model.name
        return resp
Beispiel #11
0
    def _get_features_download_action(self, **kwargs):
        model = self._get_details_query(None, **kwargs)
        if model is None:
            raise NotFound(self.MESSAGE404 % kwargs)

        content = model.get_features_json()
        resp = Response(content)
        resp.headers['Content-Type'] = 'application/json'
        resp.headers['Content-Disposition'] = \
            'attachment; filename="%s-features.json"' % model.name
        return resp
Beispiel #12
0
    def _get_xml_download_action(self, **kwargs):
        handler = self._get_details_query(None, **kwargs)
        if handler is None:
            raise NotFound(self.MESSAGE404 % kwargs)

        content = handler.data
        resp = Response(content)
        resp.headers['Content-Type'] = 'text/xml'
        resp.headers['Content-Disposition'] = \
            'attachment; filename="%s-importhandler.xml"' % handler.name
        return resp
Beispiel #13
0
    def post(self, action=None, **kwargs):
        if action == 'get_auth_url':
            auth_url, oauth_token, oauth_token_secret =\
                User.get_auth_url()

            # TODO: Use redis?
            # app.db['auth_tokens'].insert({
            #     'oauth_token': oauth_token,
            #     'oauth_token_secret': oauth_token_secret,
            # })
            auth = AuthToken(oauth_token, oauth_token_secret)
            auth.save()

            logging.debug("User Auth: oauth token %s added", oauth_token)
            return self._render({'auth_url': auth_url})

        if action == 'authenticate':
            parser = reqparse.RequestParser()
            parser.add_argument('oauth_token', type=str)
            parser.add_argument('oauth_verifier', type=str)
            params = parser.parse_args()

            oauth_token = params.get('oauth_token')
            oauth_verifier = params.get('oauth_verifier')

            logging.debug("User Auth: trying to authenticate with token %s",
                          oauth_token)
            # TODO: Use redis?
            auth = AuthToken.get_auth(oauth_token)
            if not auth:
                logging.error('User Auth: token %s not found', oauth_token)
                return odesk_error_response(
                    500, 500, 'Wrong token: {0!s}'.format(oauth_token))

            oauth_token_secret = auth.get('oauth_token_secret')
            auth_token, user = User.authenticate(oauth_token,
                                                 oauth_token_secret,
                                                 oauth_verifier)

            logging.debug('User Auth: Removing token %s', oauth_token)
            AuthToken.delete(auth.get('oauth_token'))

            return self._render({'auth_token': auth_token, 'user': user})

        if action == 'get_user':
            user = getattr(request, 'user', None)
            if user:
                return self._render({'user': user})

            return odesk_error_response(401, 401, 'Unauthorized')

        logging.error('User Auth: invalid action %s', action)
        raise NotFound('Action not found')
Beispiel #14
0
    def _get_details_query(self, params, **kwargs):
        example = super(TestExampleResource, self)._get_details_query(
            params, **kwargs)

        if example is None:
            raise NotFound()

        fields = self._get_show_fields(params)
        if 'next' in fields or 'previous' in fields:
            from sqlalchemy.sql import select, func, text, bindparam
            from models import db

            filter_params = kwargs.copy()
            filter_params.update(self._prepare_filter_params(params))
            filter_params.pop('id')

            sort_by = params.get('sort_by', None) or 'id'
            is_desc = params.get('order', None) == 'desc'
            fields_to_select = [TestExample.id]
            # TODO: simplify query with specifying WINDOW w
            if 'previous' in fields:
                fields_to_select.append(
                    func.lag(TestExample.id).over(
                        order_by=[sort_by, 'id']).label('prev'))
            if 'next' in fields:
                fields_to_select.append(
                    func.lead(TestExample.id).over(
                        order_by=[sort_by, 'id']).label('next'))
            tbl = select(fields_to_select)
            for name, val in filter_params.iteritems():
                if '->>' in name:  # TODO: refactor this
                    try:
                        splitted = name.split('->>')
                        name = "%s->>'%s'" % (splitted[0], splitted[1])
                    except:
                        logging.warning('Invalid GET param %s', name)
                tbl.append_whereclause("%s='%s'" % (name, val))
            tbl = tbl.cte('tbl')
            select1 = select(['id', 'prev', 'next']).where(
                tbl.c.id == kwargs['id'])
            res = db.engine.execute(select1, id_1=kwargs['id'])
            id_, example.previous, example.next = res.fetchone()

        if not example.is_weights_calculated:
            example.calc_weighted_data()
            example = super(TestExampleResource, self)._get_details_query(
                params, **kwargs)

        return example
Beispiel #15
0
    def _put_transformers_download_action(self, **kwargs):
        model = self._get_details_query(None, **kwargs)
        if model is None:
            raise NotFound('Model not found')
        if model.status != Model.STATUS_TRAINED:
            return odesk_error_response(405, ERR_INVALID_METHOD,
                                        'Model is not trained')

        form = TransformersDownloadForm(obj=model)
        if not form.is_valid():
            return

        segment = form.cleaned_data['segment']
        segment_obj = Segment.query.filter(Segment.model_id == model.id)\
            .filter(Segment.name == segment).all()
        if not len(segment_obj):
            raise NotFound('Segment not found in trained model')

        data_format = form.cleaned_data['data_format']

        from api.ml_models.tasks import upload_segment_features_transformers
        upload_segment_features_transformers.delay(model.id, segment_obj[0].id,
                                                   data_format)
        return self._render({})
Beispiel #16
0
 def _put_pig_fields_action(self, **kwargs):
     sqoop = self._get_details_query({}, **kwargs)
     if sqoop is None:
         raise NotFound(self.MESSAGE404 % kwargs)
     from ..forms import LoadPigFieldsForm
     form = LoadPigFieldsForm(obj={})
     if form.is_valid():
         from api.import_handlers.tasks import load_pig_fields
         params = form.cleaned_data.get('params')
         load_pig_fields.delay(sqoop.id, params)
         return self._render({
             'result':
             "Generating pig fields delayed "
             "(link will appear in sqoop section)"
         })
     return odesk_error_response(400, 400, 'Parameters are invalid')
Beispiel #17
0
    def _put_dataset_download_action(self, **kwargs):
        model = self._get_details_query(None, **kwargs)
        if model is None:
            raise NotFound('Model not found')
        if model.status != Model.STATUS_TRAINED:
            return odesk_error_response(400, ERR_INVALID_DATA,
                                        'Model is not trained')

        form = TransformDataSetForm(obj=model)
        if not form.is_valid():
            return

        dataset = form.cleaned_data['dataset']

        from api.ml_models.tasks import transform_dataset_for_download
        transform_dataset_for_download.delay(model.id, dataset.id)
        return self._render({})
Beispiel #18
0
    def _get_dataset_download_action(self, **kwargs):
        model = self._get_details_query(None, **kwargs)
        if model is None:
            raise NotFound('Model not found')

        from api.tasks import TRANSFORM_DATASET_TASK
        tasks = AsyncTask.get_current_by_object(model, TRANSFORM_DATASET_TASK)

        downloads = []
        for task in tasks:
            downloads.append({
                'dataset': DataSet.query.get(task.args[1]),
                'task': task
            })

        return self._render({
            self.OBJECT_NAME: model.id,
            'downloads': downloads
        })
Beispiel #19
0
    def _put_csv_task_action(self, model_id, test_result_id):
        """
        Schedules a task to generate examples in CSV format
        """
        test = TestResult.query.get(test_result_id)
        if not test:
            raise NotFound('Test not found')

        form = SelectFieldsForCSVForm(obj=test)
        if form.is_valid():
            fields = form.cleaned_data['fields']
            if isinstance(fields, list) and len(fields) > 0:
                from tasks import get_csv_results
                logging.info('Download examples in csv')
                get_csv_results.delay(test.model_id, test.id, fields)
                return self._render({})

        return odesk_error_response(400, ERR_INVALID_DATA,
                                    'Fields of the CSV export is required')
Beispiel #20
0
    def _put_run_sql_action(self, **kwargs):
        """
        Run sql query for testing
        """
        from api.import_handlers.forms import QueryTestForm
        model = self._get_details_query({}, **kwargs)
        if model is None:
            raise NotFound(self.MESSAGE404 % kwargs)

        form = QueryTestForm(obj={})
        if not form.is_valid():
            return self._render({'error': form.error_messages})

        sql = form.cleaned_data['sql']
        limit = form.cleaned_data['limit']
        params = form.cleaned_data.get('params', {})
        datasource_name = form.cleaned_data['datasource']
        try:
            sql = re.sub('#{(\w+)}', '%(\\1)s', sql)
            sql = sql % params
        except (KeyError, ValueError) as e:
            return odesk_error_response(400, ERR_INVALID_DATA,
                                        'Wrong query parameters', e)

        try:
            model.check_sql(sql)
        except Exception as e:
            return odesk_error_response(400, ERR_INVALID_DATA, str(e), e)

        # Change query LIMIT
        sql = model.build_query(sql, limit=limit)

        try:
            data = list(model.execute_sql_iter(sql, datasource_name))
        except DatabaseError as e:
            return odesk_error_response(400, ERR_INVALID_DATA, str(e), e)

        columns = []
        if len(data) > 0:
            columns = data[0].keys()

        return self._render({'data': data, 'columns': columns, 'sql': sql})
Beispiel #21
0
    def _get_weights_download_action(self, **kwargs):
        model = self._get_details_query(None, **kwargs)
        if model is None:
            raise NotFound(self.MESSAGE404 % kwargs)
        if model.status != model.STATUS_TRAINED:
            raise ValidationError('Model should be trained')
        if not model.weights_synchronized:
            raise ValidationError('Model weights should be synchronized')

        trainer = model.get_trainer(force_load=True)
        result = {}
        for segment in model.segments:
            result[segment.name] = trainer.get_weights(segment.name)
        content = json.dumps(result)

        resp = Response(content)
        resp.headers['Content-Type'] = 'application/json'
        resp.headers['Content-Disposition'] = \
            'attachment; filename=%s-weights.json' % model.name
        return resp
Beispiel #22
0
    def _put_db_task_action(self, model_id, test_result_id):
        """
        Schedules a task to export examples to the specified DB
        """
        test = TestResult.query.get(test_result_id)
        if not test:
            raise NotFound('Test not found')

        form = ExportToDbForm(obj=test)
        if form.is_valid():
            fields = form.cleaned_data['fields']
            datasource = form.cleaned_data['datasource']
            tablename = form.cleaned_data['tablename']
            if isinstance(fields, list) and len(fields) > 0:
                from tasks import export_results_to_db
                logging.info('Export examples to db')
                export_results_to_db.delay(
                    test.model_id, test.id, datasource.id, tablename, fields)
                return self._render({})

        return odesk_error_response(400, ERR_INVALID_DATA,
                                    'Fields of the DB export is required')
Beispiel #23
0
    def _get_transformers_download_action(self, **kwargs):
        model = self._get_details_query(None, **kwargs)
        if model is None:
            raise NotFound('Model not found')

        from api.tasks import TRANSFORMERS_UPLOAD_TASK
        tasks = []
        for segment in model.segments:
            tasks.extend(
                AsyncTask.get_current_by_object(segment,
                                                TRANSFORMERS_UPLOAD_TASK))

        downloads = []
        for task in sorted(tasks, key=lambda x: x.created_on, reverse=True):
            downloads.append({
                'segment': Segment.query.get(task.args[1]),
                'task': task
            })

        return self._render({
            self.OBJECT_NAME: model.id,
            'downloads': downloads
        })
Beispiel #24
0
 def _get_folder(self, kwargs):
     folder = kwargs.get('folder')
     if folder not in self.ALLOWED_FOLDERS:
         raise NotFound(self.MESSAGE404 % kwargs)
     return folder
Beispiel #25
0
 def _get_generate_url_action(self, **kwargs):
     ds = self._get_details_query({}, **kwargs)
     if ds is None:
         raise NotFound('DataSet not found')
     url = ds.get_s3_download_url()
     return self._render({self.OBJECT_NAME: ds.id, 'url': url})
Beispiel #26
0
 def _get_trainer_download_s3url_action(self, **kwargs):
     entity = self._get_details_query(None, **kwargs)
     if entity is None:
         raise NotFound(self.MESSAGE404 % kwargs)
     url = entity.get_trainer_s3url()
     return self._render({'trainer_file_for': entity.id, 'url': url})
Beispiel #27
0
 def _get_uid(self, kwargs):
     uid = kwargs.get('id')
     if uid is None:
         raise NotFound(self.MESSAGE404 % kwargs)
     return uid