Beispiel #1
0
    def _put_update_xml_action(self, **kwargs):
        handler = self._get_details_query(None, **kwargs)

        if not handler._can_modify():
            return odesk_error_response(405, ERR_INVALID_METHOD,
                                        handler.reason_msg)

        form = XmlImportHandlerUpdateXmlForm(obj={})
        if not form.is_valid():
            return self._render({'error': form.error_messages})

        try:
            for e in XmlEntity.query.filter_by(import_handler=handler).all():
                e.delete()
            for ds in XmlDataSource.query.filter_by(
                    import_handler=handler).all():
                ds.delete()
            for ip in XmlInputParameter.query.filter_by(
                    import_handler=handler).all():
                ip.delete()
            for s in XmlScript.query.filter_by(import_handler=handler).all():
                s.delete()
            handler.data = form.cleaned_data['data']
        except Exception, exc:
            return odesk_error_response(400, ERR_INVALID_DATA, str(exc), exc)
Beispiel #2
0
    def post(self, action=None, **kwargs):
        if action == 'get_auth_url':
            auth_url, oauth_token, oauth_token_secret =\
                User.get_auth_url()

            # TODO: Use redis?
            # app.db['auth_tokens'].insert({
            #     'oauth_token': oauth_token,
            #     'oauth_token_secret': oauth_token_secret,
            # })
            auth = AuthToken(oauth_token, oauth_token_secret)
            auth.save()

            logging.debug("User Auth: oauth token %s added", oauth_token)
            return self._render({'auth_url': auth_url})

        if action == 'authenticate':
            parser = reqparse.RequestParser()
            parser.add_argument('oauth_token', type=str)
            parser.add_argument('oauth_verifier', type=str)
            params = parser.parse_args()

            oauth_token = params.get('oauth_token')
            oauth_verifier = params.get('oauth_verifier')

            logging.debug("User Auth: trying to authenticate with token %s",
                          oauth_token)
            # TODO: Use redis?
            auth = AuthToken.get_auth(oauth_token)
            if not auth:
                logging.error('User Auth: token %s not found', oauth_token)
                return odesk_error_response(
                    500, 500, 'Wrong token: {0!s}'.format(oauth_token))

            oauth_token_secret = auth.get('oauth_token_secret')
            auth_token, user = User.authenticate(oauth_token,
                                                 oauth_token_secret,
                                                 oauth_verifier)

            logging.debug('User Auth: Removing token %s', oauth_token)
            AuthToken.delete(auth.get('oauth_token'))

            return self._render({'auth_token': auth_token, 'user': user})

        if action == 'get_user':
            user = getattr(request, 'user', None)
            if user:
                return self._render({'user': user})

            return odesk_error_response(401, 401, 'Unauthorized')

        logging.error('User Auth: invalid action %s', action)
        raise NotFound('Action not found')
Beispiel #3
0
    def delete(self, action=None, **kwargs):
        server = self._get_server(kwargs)
        uid = self._get_uid(kwargs)
        folder = self._get_folder(kwargs)

        try:
            server.set_key_metadata(uid, folder, 'hide', 'True')
            from .tasks import update_at_server
            file_name = '{0}/{1}'.format(folder, uid)
            update_at_server.delay(file_name, server.id)
        except AmazonS3ObjectNotFound as err:
            return odesk_error_response(404, 1001, str(err), err)
        except ClientError as err:
            return odesk_error_response(500, 1006, str(err), err)
        return '', 204
Beispiel #4
0
 def _post_clone_action(self, **kwargs):
     from datetime import datetime
     handler = self._get_details_query(None, **kwargs)
     name = "{0} clone: {1}".format(
         handler.name,
         datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
     new_handler = XmlImportHandler(name=name)
     try:
         import xml.etree.ElementTree as ET
         if not handler._can_modify():
             data = handler.data
             e = ET.fromstring(data)
             datasources = e.find('datasources')
             if datasources:
                 for ds in datasources.iter('*'):
                     if ds.tag == 'pig':
                         ds.set('amazon_access_token', '')
                         ds.set('amazon_token_secret', '')
                     if ds.tag == 'db':
                         ds.set('password', '')
                 data = ET.tostring(e)
             new_handler.data = data
         else:
             new_handler.data = handler.data
     except Exception, exc:
         return odesk_error_response(400, ERR_INVALID_DATA, str(exc), exc)
Beispiel #5
0
 def _modify(self, mtd, msg, action=None, **kwargs):
     handler_id = kwargs.get('import_handler_id', None)
     handler = XmlImportHandler.query.filter_by(id=handler_id).one()
     if handler and not handler.can_edit:
         return odesk_error_response(
             405, ERR_INVALID_METHOD,
             '{0} {1}'.format(msg, handler.reason_msg))
     else:
         mthd = getattr(super(XmlImportHandlerPartResource, self), mtd)
         return mthd(action, **kwargs)
Beispiel #6
0
    def _put_run_sql_action(self, **kwargs):
        """
        Run sql query for testing
        """
        from api.import_handlers.forms import QueryTestForm
        model = self._get_details_query({}, **kwargs)
        if model is None:
            raise NotFound(self.MESSAGE404 % kwargs)

        form = QueryTestForm(obj={})
        if not form.is_valid():
            return self._render({'error': form.error_messages})

        sql = form.cleaned_data['sql']
        limit = form.cleaned_data['limit']
        params = form.cleaned_data.get('params', {})
        datasource_name = form.cleaned_data['datasource']
        try:
            sql = re.sub('#{(\w+)}', '%(\\1)s', sql)
            sql = sql % params
        except (KeyError, ValueError) as e:
            return odesk_error_response(400, ERR_INVALID_DATA,
                                        'Wrong query parameters', e)

        try:
            model.check_sql(sql)
        except Exception as e:
            return odesk_error_response(400, ERR_INVALID_DATA, str(e), e)

        # Change query LIMIT
        sql = model.build_query(sql, limit=limit)

        try:
            data = list(model.execute_sql_iter(sql, datasource_name))
        except DatabaseError as e:
            return odesk_error_response(400, ERR_INVALID_DATA, str(e), e)

        columns = []
        if len(data) > 0:
            columns = data[0].keys()

        return self._render({'data': data, 'columns': columns, 'sql': sql})
Beispiel #7
0
 def _get_script_string_action(self, **kwargs):
     script = self._get_details_query({}, **kwargs)
     if script is None:
         raise NotFound(self.MESSAGE404 % kwargs)
     try:
         return self._render({
             self.OBJECT_NAME: script.id,
             'script_string': script.script_string
         })
     except Exception as e:
         return odesk_error_response(400, ERR_INVALID_DATA, str(e), e)
Beispiel #8
0
    def _put_generate_visualization_action(self, **kwargs):
        model = self._get_details_query(None, **kwargs)
        if not app.config['MODIFY_DEPLOYED_MODEL'] and model.locked:
            return odesk_error_response(
                405, ERR_INVALID_METHOD, 'Forbidden to change visualization '
                'data. Model is deployed and blocked '
                'for modifications.')

        form = VisualizationOptionsForm(obj=model)
        if form.is_valid():
            form.process()
            return self._render({self.OBJECT_NAME: model})
Beispiel #9
0
    def put(self, action=None, **kwargs):
        if action:
            return self._apply_action(action, method='PUT', **kwargs)

        server = self._get_server(kwargs)
        uid = self._get_uid(kwargs)
        folder = self._get_folder(kwargs)

        try:
            for key, val in request.form.iteritems():
                if key in self.ALLOWED_METADATA_KEY_NAMES:
                    server.set_key_metadata(uid, folder, key, val)
            from .tasks import update_at_server
            file_name = '{0}/{1}'.format(folder, uid)
            update_at_server.delay(file_name, server.id)
        except ValueError as err:
            return odesk_error_response(400, 1006, str(err), err)
        except AmazonS3ObjectNotFound as err:
            return odesk_error_response(404, 1006, str(err), err)

        return self._render({self.OBJECT_NAME: {'id': uid}})
Beispiel #10
0
    def _put_reimport_action(self, **kwargs):
        from api.import_handlers.tasks import import_data
        dataset = self._get_details_query({}, **kwargs)
        if dataset.locked:
            return odesk_error_response(
                405, 1006, 'Data set is locked for modifications.'
                ' Some trained/tested models use it.')
        if dataset.status not in (DataSet.STATUS_IMPORTING,
                                  DataSet.STATUS_UPLOADING):
            dataset.status = DataSet.STATUS_IMPORTING
            dataset.save()
            import_data.delay(dataset_id=dataset.id)

        return self._render({self.OBJECT_NAME: dataset})
Beispiel #11
0
 def get(self, action=None):
     try:
         basedir = os.path.abspath(
             os.path.join(os.path.dirname(__file__), '../../'))
         with open(os.path.join(basedir, 'changelog.rst')) as fh:
             res = fh.read()
             fh.close()
         return self._render({
             'about': {
                 'version': __version__,
                 'releasenotes': res.replace('.. _changelog:', '').strip(),
             }
         })
     except Exception as e:
         return odesk_error_response(500, ERR_INVALID_DATA, e.message, e)
Beispiel #12
0
 def _put_pig_fields_action(self, **kwargs):
     sqoop = self._get_details_query({}, **kwargs)
     if sqoop is None:
         raise NotFound(self.MESSAGE404 % kwargs)
     from ..forms import LoadPigFieldsForm
     form = LoadPigFieldsForm(obj={})
     if form.is_valid():
         from api.import_handlers.tasks import load_pig_fields
         params = form.cleaned_data.get('params')
         load_pig_fields.delay(sqoop.id, params)
         return self._render({
             'result':
             "Generating pig fields delayed "
             "(link will appear in sqoop section)"
         })
     return odesk_error_response(400, 400, 'Parameters are invalid')
Beispiel #13
0
    def _put_dataset_download_action(self, **kwargs):
        model = self._get_details_query(None, **kwargs)
        if model is None:
            raise NotFound('Model not found')
        if model.status != Model.STATUS_TRAINED:
            return odesk_error_response(400, ERR_INVALID_DATA,
                                        'Model is not trained')

        form = TransformDataSetForm(obj=model)
        if not form.is_valid():
            return

        dataset = form.cleaned_data['dataset']

        from api.ml_models.tasks import transform_dataset_for_download
        transform_dataset_for_download.delay(model.id, dataset.id)
        return self._render({})
Beispiel #14
0
    def _put_csv_task_action(self, model_id, test_result_id):
        """
        Schedules a task to generate examples in CSV format
        """
        test = TestResult.query.get(test_result_id)
        if not test:
            raise NotFound('Test not found')

        form = SelectFieldsForCSVForm(obj=test)
        if form.is_valid():
            fields = form.cleaned_data['fields']
            if isinstance(fields, list) and len(fields) > 0:
                from tasks import get_csv_results
                logging.info('Download examples in csv')
                get_csv_results.delay(test.model_id, test.id, fields)
                return self._render({})

        return odesk_error_response(400, ERR_INVALID_DATA,
                                    'Fields of the CSV export is required')
Beispiel #15
0
    def _put_upload_to_server_action(self, **kwargs):
        from api.servers.tasks import upload_model_to_server, update_at_server
        from api.servers.forms import ChooseServerForm

        model = self._get_details_query(None, **kwargs)
        if model.status != Model.STATUS_TRAINED:
            return odesk_error_response(400, ERR_INVALID_DATA,
                                        'Model is not yet trained')

        form = ChooseServerForm(obj=model)
        if form.is_valid():
            server = form.cleaned_data['server']
            (upload_model_to_server.s(server.id, model.id, request.user.id)
             | update_at_server.s(server.id)).apply_async()

            return self._render({
                self.OBJECT_NAME:
                model,
                'status':
                'Model "{0}" will be uploaded to server'.format(model.name)
            })
Beispiel #16
0
    def _put_db_task_action(self, model_id, test_result_id):
        """
        Schedules a task to export examples to the specified DB
        """
        test = TestResult.query.get(test_result_id)
        if not test:
            raise NotFound('Test not found')

        form = ExportToDbForm(obj=test)
        if form.is_valid():
            fields = form.cleaned_data['fields']
            datasource = form.cleaned_data['datasource']
            tablename = form.cleaned_data['tablename']
            if isinstance(fields, list) and len(fields) > 0:
                from tasks import export_results_to_db
                logging.info('Export examples to db')
                export_results_to_db.delay(
                    test.model_id, test.id, datasource.id, tablename, fields)
                return self._render({})

        return odesk_error_response(400, ERR_INVALID_DATA,
                                    'Fields of the DB export is required')
Beispiel #17
0
    def _put_transformers_download_action(self, **kwargs):
        model = self._get_details_query(None, **kwargs)
        if model is None:
            raise NotFound('Model not found')
        if model.status != Model.STATUS_TRAINED:
            return odesk_error_response(405, ERR_INVALID_METHOD,
                                        'Model is not trained')

        form = TransformersDownloadForm(obj=model)
        if not form.is_valid():
            return

        segment = form.cleaned_data['segment']
        segment_obj = Segment.query.filter(Segment.model_id == model.id)\
            .filter(Segment.name == segment).all()
        if not len(segment_obj):
            raise NotFound('Segment not found in trained model')

        data_format = form.cleaned_data['data_format']

        from api.ml_models.tasks import upload_segment_features_transformers
        upload_segment_features_transformers.delay(model.id, segment_obj[0].id,
                                                   data_format)
        return self._render({})
Beispiel #18
0
    def _put_import_features_from_xml_ih_action(self, **kwargs):
        model = self._get_details_query(None, **kwargs)
        error_response = odesk_error_response(
            405, ERR_INVALID_METHOD,
            'Only new models with 0 features and Xml import handler as '
            'trainer is allowed for this feature')

        if model.status != Model.STATUS_NEW:
            return error_response

        if model.train_import_handler_type.lower() != 'xml':
            return error_response

        features_count = Feature.query.join(
            FeatureSet, FeatureSet.id == Feature.feature_set_id).join(
                Model, Model.features_set_id == FeatureSet.id).filter(
                    Model.id == model.id).with_entities(func.count(
                        Feature.id)).scalar()

        if features_count > 0:
            return error_response

        fields = model.train_import_handler.list_fields()
        features = []
        for field in fields:
            feature = Feature()
            feature.name = field.name
            feature.type = Feature.field_type_to_feature_type(field.type)
            feature.feature_set_id = model.features_set_id
            feature.save(commit=False)
            features.append(feature)
        app.sql_db.session.commit()
        return self._render({
            self.OBJECT_NAME: model.id,
            'features': [f.to_dict() for f in features]
        })
Beispiel #19
0
    def _get_groupped_action(self, **kwargs):
        """
        Groups data by `group_by_field` field and calculates mean average
        precision.
        Note: `group_by_field` should be specified in request parameters.
        """
        from ml_metrics import apk
        import numpy as np
        from operator import itemgetter
        logging.info('Start request for calculating MAP')

        group_by_field, count = self._parse_map_params()
        if not group_by_field:
            return odesk_error_response(400, ERR_INVALID_DATA,
                                        'field parameter is required')

        res = []
        avps = []

        groups = TestExample.get_grouped(
            field=group_by_field,
            model_id=kwargs.get('model_id'),
            test_result_id=kwargs.get('test_result_id')
        )

        import sklearn.metrics as sk_metrics
        import numpy
        if len(groups) < 1:
            logging.error('Can not group')
            return odesk_error_response(400, ERR_INVALID_DATA,
                                        'Can not group')
        if 'prob' not in groups[0]['list'][0]:
            logging.error('Examples do not contain probabilities')
            return odesk_error_response(400, ERR_INVALID_DATA, 'Examples do \
not contain probabilities')
        if not isinstance(groups[0]['list'][0]['prob'], list):
            logging.error('Examples do not contain probabilities')
            return odesk_error_response(400, ERR_INVALID_DATA, 'Examples do \
not contain probabilities')

        if groups[0]['list'][0]['label'] in ("True", "False"):
            def transform(x):
                return int(bool(x))
        elif groups[0]['list'][0]['label'] in ("0", "1"):
            def transform(x):
                return int(x)
        else:
            logging.error('Type of labels do not support')
            return odesk_error_response(400, ERR_INVALID_DATA,
                                        'Type of labels do not support')
        logging.info('Calculating avps for groups')
        calc_average = True
        for group in groups:
            group_list = group['list']

            labels = [transform(item['label']) for item in group_list]
            pred_labels = [transform(item['pred']) for item in group_list]
            probs = [item['prob'][1] for item in group_list]
            if len(labels) > 1:
                labels = numpy.array(labels)
                probs = numpy.array(probs)
                try:
                    precision, recall, thresholds = \
                        sk_metrics.precision_recall_curve(labels, probs)
                    avp = sk_metrics.auc(recall[:count], precision[:count])
                except:
                    avp = apk(labels, pred_labels, count)
            else:
                avp = apk(labels, pred_labels, count)
            if math.isnan(avp):
                calc_average = False
                avp = "Can't be calculated"
            avps.append(avp)
            res.append({'group_by_field': group[group_by_field],
                        'count': len(group_list),
                        'avp': avp})

        res = sorted(res, key=itemgetter("count"), reverse=True)[:100]
        logging.info('Calculating map')
        mavp = np.mean(avps) if calc_average else "N/A"
        context = {self.list_key: {'items': res},
                   'field_name': group_by_field,
                   'mavp': mavp}
        logging.info('End request for calculating MAP')
        return self._render(context)
Beispiel #20
0
    def _put_train_action(self, **kwargs):
        from api.import_handlers.tasks import import_data
        from api.instances.tasks import request_spot_instance, \
            get_request_instance
        from celery import chain
        obj = self._get_details_query(None, **kwargs)
        # check if model is deployed
        if not app.config['MODIFY_DEPLOYED_MODEL'] and \
           self.ENTITY_TYPE == 'model' and obj.locked:
            return odesk_error_response(
                405, ERR_INVALID_METHOD, 'Re-train is forbidden. Model is '
                'deployed and blocked for '
                'modifications.')
        # check if some model tests are in progress
        from api.model_tests.models import TestResult
        tests_in_progress = TestResult.query.\
            filter(TestResult.model_id == obj.id)\
            .filter(TestResult.status.in_(TestResult.TEST_STATUSES)).count()
        if tests_in_progress:
            return odesk_error_response(
                405, ERR_INVALID_METHOD, 'There are some tests of this model '
                'in progress. Please, wait for a '
                'moment before re-training model.')
        # start train model
        delete_metadata = obj.status != obj.STATUS_NEW
        form = self.train_form(obj=obj, **kwargs)
        if form.is_valid():
            entity = form.save()  # set status to queued
            entity_key = '{0}_id'.format(self.ENTITY_TYPE)
            new_dataset_selected = form.cleaned_data.get(
                'new_dataset_selected')
            existing_instance_selected = form.cleaned_data.get(
                'existing_instance_selected')
            instance = form.cleaned_data.get('aws_instance', None)
            spot_instance_type = form.cleaned_data.get('spot_instance_type',
                                                       None)

            tasks_list = []
            if new_dataset_selected:
                import_handler = entity.train_import_handler
                params = form.cleaned_data.get('parameters', None)
                dataset = import_handler.create_dataset(
                    params,
                    data_format=form.cleaned_data.get('format',
                                                      DataSet.FORMAT_JSON))
                opts = {'dataset_id': dataset.id, entity_key: entity.id}
                tasks_list.append(import_data.s(**opts))
                dataset = [dataset]
            else:
                dataset = form.cleaned_data.get('dataset', None)
            dataset_ids = [ds.id for ds in dataset]

            if not existing_instance_selected:  # request spot instance
                if self.ENTITY_TYPE != 'model':
                    raise NotImplemented()

                tasks_list.append(
                    request_spot_instance.s(instance_type=spot_instance_type,
                                            model_id=entity.id))
                tasks_list.append(
                    get_request_instance.subtask(
                        (), {
                            'callback': 'train',
                            'dataset_ids': dataset_ids,
                            'model_id': entity.id,
                            'user_id': request.user.id,
                        },
                        retry=True,
                        countdown=10,
                        retry_policy={
                            'max_retries': 3,
                            'interval_start': 5,
                            'interval_step': 5,
                            'interval_max': 10
                        }))
            else:
                opts = {
                    entity_key: entity.id,
                    'user_id': request.user.id,
                    'delete_metadata': delete_metadata
                }
                if not new_dataset_selected:
                    opts['dataset_ids'] = dataset_ids
                tasks_list.append(
                    self.train_entity_task.subtask(None,
                                                   opts,
                                                   queue=instance.name))

            chain(tasks_list).apply_async()
            ret_obj = {
                'id': entity.id,
                'status': entity.status,
                'training_in_progress': entity.training_in_progress
            }
            if new_dataset_selected:
                ret_obj['new_dataset'] = dataset[0].id

            return self._render({self.OBJECT_NAME: ret_obj})