Beispiel #1
0
class FeatureSetForm(BaseForm):
    schema_name = CharField()
    group_by = JsonField()
    target_variable = CharField()

    target_feature = None

    def clean_group_by(self, value, field):
        if value is not None:
            ids = [feature['id'] for feature in value]
            return Feature.query.filter(Feature.id.in_(ids)).all()

    def clean_target_variable(self, value, field):
        if value:
            self.target_feature = Feature.query.filter_by(
                name=value,
                feature_set_id=self.id).one()
            if self.target_feature is None:
                raise ValidationError('Feature not found')
        return value

    def save(self):
        self.cleaned_data['modified'] = True
        if self.target_feature:
            self.target_feature.is_target_variable = True
            self.target_feature.required = True
            self.target_feature.save(commit=False)
        return super(FeatureSetForm, self).save()
Beispiel #2
0
class DataSetAddForm(BaseForm):
    required_fields = ('format', )
    format = ChoiceField(choices=DataSet.FORMATS)
    import_params = JsonField()

    def before_clean(self):
        self.importhandler = ImportHandler.query.get(self.import_handler_id)

    def clean_import_params(self, value, field):
        if not isinstance(value, dict):
            raise ValidationError('Should be a dict')

        for param in self.importhandler.import_params:
            if param not in value:
                raise ValidationError(
                    '{0!s} parameter is required'.format(param))

        return value

    def save(self, commit=True):
        from api.import_handlers.tasks import import_data
        dataset = self.importhandler.create_dataset(
            params=self.cleaned_data['import_params'],
            data_format=self.cleaned_data['format'],
            compress=True)
        dataset.save()
        import_data.delay(dataset.id)
        return dataset
Beispiel #3
0
class SelectFieldsForCSVForm(BaseForm):
    """
    Form containing one json entry called fields which is an array of fields to
    use for generating test examples csv in _put_csv_task_action
    """
    required_fields = ('fields', )
    fields = JsonField()
Beispiel #4
0
class BaseChooseInstanceAndDataset(BaseForm):
    HANDLER_TYPE = 'train'
    TYPE_CHOICES = ('m3.xlarge', 'm3.2xlarge', 'cc2.8xlarge', 'cr1.8xlarge',
                    'hi1.4xlarge', 'hs1.8xlarge')

    new_dataset_selected = BooleanField()
    existing_instance_selected = BooleanField()
    aws_instance = ModelField(model=Instance, return_model=True)
    dataset = ModelField(model=DataSet, return_model=True)
    parameters = JsonField()
    spot_instance_type = ChoiceField(choices=TYPE_CHOICES)
    format = ChoiceField(choices=DataSet.FORMATS)

    @property
    def import_handler(self):
        if self.model is not None and self.HANDLER_TYPE is not None:
            return getattr(self.model, '%s_import_handler' % self.HANDLER_TYPE)

    def clean_parameters(self, params, field):
        params = params or {}
        if self.model is None:
            return params

        if not isinstance(params, dict):
            raise ValidationError('Invalid parameters format')
        return params

    def validate_data(self):
        # DataSet tab
        new_dataset_selected = self.cleaned_data.get('new_dataset_selected')
        if new_dataset_selected:
            if not self.cleaned_data.get('format'):
                self.add_error("format",
                               "Please select format of the Data Set")

            parameter_names = self.import_handler.import_params
            # No params for this import handler
            if parameter_names and len(parameter_names) > 0:
                parameters = self.cleaned_data.get('parameters')
                missed_params = set(parameter_names) - set(parameters.keys())
                if missed_params:
                    self.add_error(
                        "parameters", "Some parameters are missing: %s" %
                        ', '.join(missed_params))
        else:
            if not self.cleaned_data.get('dataset'):
                self.add_error("dataset", "Please select Data Set")

        # Instance tab
        existing_instance_selected = self.cleaned_data.get(
            'existing_instance_selected')
        if existing_instance_selected:
            if not self.cleaned_data.get('aws_instance'):
                self.add_error("aws_instance",
                               "Please select instance with a worker")
        else:
            if not self.cleaned_data.get('spot_instance_type'):
                self.add_error("spot_instance_type",
                               "Please select Spot instance type")
Beispiel #5
0
class ServerModelVerificationForm(BaseForm):
    required_fields = ('server_id', 'model_id', 'test_result_id')

    server_id = ModelField(model=Server)
    model_id = ModelField(model=Model)
    import_handler_id = ModelField(model=XmlImportHandler)
    test_result_id = ModelField(model=TestResult)
    description = JsonField()
    params_map = JsonField()
    count = IntegerField()
    clazz = CharField()

    def save(self, *args, **kwargs):
        obj = super(ServerModelVerificationForm, self).save(*args, **kwargs)
        from tasks import verify_model
        verify_model.delay(
            obj.id,
            self.cleaned_data['count'])
        return obj
Beispiel #6
0
class FeatureTransformerForm(BaseForm, ParametersConvertorMixin):
    """
    Adds/edits feature transformer form.
    """
    group_chooser = 'predefined_selected'
    REQUIRED_FORM = ['type']
    REQUIRED_PRETRAINED = ['transformer']
    required_fields_groups = {
        'true': REQUIRED_PRETRAINED,
        'false': REQUIRED_FORM,
        None: REQUIRED_FORM
    }

    predefined_selected = BooleanField()
    feature_id = ModelField(model=Feature, return_model=True)

    type_field = CharField(name='type')
    params = JsonField()

    transformer = ModelField(model=Transformer, return_model=True)

    def validate_data(self):
        type_ = self.cleaned_data.get('type')
        pretrained_selected = self.cleaned_data.get('predefined_selected')
        if not pretrained_selected and type_ \
                and type_ not in Transformer.TYPES_LIST:
            self.add_error('type', 'type is invalid')
            return

        self.convert_params(type_,
                            self.cleaned_data.get('params'),
                            configuration=TRANSFORMERS)

    def save(self, commit=True, save=True):
        feature = self.cleaned_data.get('feature_id', None)
        is_pretrained = self.cleaned_data.get('predefined_selected', False)
        if is_pretrained:
            pretrained_transformer = self.cleaned_data.get('transformer')
            transformer = {
                'type': pretrained_transformer.name,
                'id': pretrained_transformer.id
            }
        else:
            transformer = {
                'id': -1,
                "type": self.cleaned_data.get('type'),
                "params": self.cleaned_data.get('params')
            }
        if feature is not None:
            feature.transformer = transformer
            feature.save()
        return transformer
Beispiel #7
0
class ModelEditForm(BaseForm):
    NO_REQUIRED_FOR_EDIT = True
    required_fields = ('name', )
    name = CharField()
    train_import_handler = ImportHandlerField()
    test_import_handler = ImportHandlerField()
    example_id = CharField()
    example_label = CharField()
    tags = JsonField()
    features = FeaturesField()

    def save(self, commit=True):
        old_tags = [tag for tag in self.obj.tags]
        old_tags_texts = [t.text for t in self.obj.tags]
        model = super(ModelEditForm, self).save()

        tags = self.cleaned_data.get('tags', None)
        if tags:
            for tag_text in tags:
                if tag_text not in old_tags_texts:
                    t = Tag.query.filter_by(text=tag_text).all()
                    if len(t):
                        new_tag = t[0]
                    else:
                        new_tag = Tag()
                        new_tag.text = tag_text
                        new_tag.save()
                    old_tags.append(new_tag)

            model.tags = [tag for tag in old_tags if tag.text in tags]
            model.save()
            for tag in old_tags:
                tag.update_counter()

        features = self.cleaned_data.get('features', None)
        if features:
            try:
                Feature.query.filter_by(
                    feature_set_id=model.features_set_id).delete()
                model.classifier = features['classifier'] or {}
                model.features_set.from_dict(features, commit=False)
            except Exception as e:
                db.session.rollback()
                raise DBException(
                    "Error occurred while updating features: "
                    "{0}".format(e), e)
            else:
                db.session.commit()

        return model
Beispiel #8
0
class GridSearchForm(BaseForm):
    parameters = JsonField()
    scoring = CharField()
    train_dataset = ModelField(model=DataSet, return_model=True)
    test_dataset = ModelField(model=DataSet, return_model=True)

    def __init__(self, *args, **kwargs):
        self.model = kwargs.get('model', None)
        super(GridSearchForm, self).__init__(*args, **kwargs)

    def clean_parameters(self, grid_params, field):
        params = {}
        config = CLASSIFIERS[self.model.classifier['type']]
        config_params = config['parameters']
        for pconfig in config_params:
            name = pconfig['name']
            if name in grid_params:
                value = grid_params[name]
                if not value:
                    continue

                value = value.split(',')
                type_ = pconfig.get('type', 'string')
                if type_ == 'integer':
                    value = [int(item) for item in value]
                elif type_ == 'float':
                    value = [float(item) for item in value]
                elif type_ == 'boolean':
                    value = [item == 'true' for item in value]

                choices = pconfig.get('choices')
                if choices:
                    for item in value:
                        if item not in choices:
                            raise ValidationError(
                                'Invalid {0}: should be one of {1}'.format(
                                    name, ','.join(choices)))

                params[name] = value
        return params

    def save(self, *args, **kwargs):
        obj = super(GridSearchForm, self).save(commit=False)
        obj.model = self.model
        obj.save()
        return obj
Beispiel #9
0
class ScalerForm(BasePredefinedForm):
    OBJECT_NAME = 'scaler'
    DOC = PredefinedScaler

    group_chooser = 'predefined_selected'
    required_fields_groups = {'true': ('scaler', ),
                              'false': ('type', ),
                              None: ('type', )}

    name = CharField()
    type_field = ChoiceField(choices=PredefinedScaler.TYPES_LIST, name='type')
    params = JsonField()
    # whether need to copy feature scaler fields from predefined one
    predefined_selected = BooleanField()
    # whether we need to create predefined item (not feature related)
    scaler = DocumentField(doc=PredefinedScaler, by_name=True, return_doc=True)
    feature_id = DocumentField(doc=Feature, by_name=False,
                               return_doc=False)
Beispiel #10
0
class ModelAddForm(BaseForm):
    """
    Adds new model.

    Note: If import handler and import handler file would be specified,
    new model will use import handler from file.
    """
    NO_REQUIRED_FOR_EDIT = True
    required_fields = ('name', ('import_handler', 'import_handler_file'))

    name = UniqueNameField(Model=Model)
    import_handler = ImportHandlerField()
    import_handler_file = ImportHandlerFileField()
    test_import_handler = ImportHandlerField()
    test_import_handler_file = ImportHandlerFileField()
    features = JsonField()
    trainer = CharField()

    def clean_import_handler(self, value, field):
        self.cleaned_data['train_import_handler'] = value
        return value

    def clean_import_handler_file(self, value, field):
        self.cleaned_data['train_import_params'] = field.import_params
        return value

    def clean_test_import_handler_file(self, value, field):
        self.cleaned_data['test_import_params'] = field.import_params
        return value

    def clean_features(self, value, field):
        if value:
            from cloudml.trainer.trainer import Trainer
            from cloudml.trainer.config import FeatureModel, SchemaException
            try:
                # TODO: add support of json dict to FeatureModel
                feature_model = FeatureModel(json.dumps(value), is_file=False)
                self.cleaned_data['trainer'] = Trainer(feature_model)
            except SchemaException, exc:
                raise ValidationError(
                    'Features JSON file is invalid: %s' % exc, exc)
        return value
Beispiel #11
0
class VisualizationOptionsForm(BaseForm):
    """
    Form used for updating Trained model visualization.

    Note:
        Now it support only `tree_deep` type for Decision Tree and
        Random Forest classifiers.
    """
    UPDATE_TREE_DEEP = 'tree_deep'
    TYPES = [
        UPDATE_TREE_DEEP,
    ]
    PARAMS_BY_TYPE = {UPDATE_TREE_DEEP: [{'name': 'deep', 'type': 'int'}]}

    parameters = JsonField()
    type_ = CharField(name="type")

    def __init__(self, *args, **kwargs):
        super(VisualizationOptionsForm, self).__init__(*args, **kwargs)

    def clean_type(self, value, field):
        if value and value not in self.TYPES:
            raise ValidationError('invalid type')
        return value

    def validate_data(self):
        type_ = self.cleaned_data.get('type')
        parameters = self.cleaned_data.get('parameters')
        config = self.PARAMS_BY_TYPE[type_]
        for item in config:
            name = item['name']
            val = parameters.get(name)
            if not val:
                self.add_error('parameters', 'Parameter %s is required' % name)
            if item['type'] == 'int':
                try:
                    parameters[name] = int(val)
                except Exception, exc:
                    self.add_error(
                        'parameters',
                        "Can't parse parameter %s: %s" % (name, exc), exc)
Beispiel #12
0
class PredefinedDataSourceForm(BaseForm):
    """
    DataSource add/edit form
    """
    NO_REQUIRED_FOR_EDIT = True
    required_fields = ('name', 'type')

    name = CharField()
    type_field = ChoiceField(choices=PredefinedDataSource.TYPES_LIST,
                             name='type')
    db = JsonField()

    def clean_name(self, value, field):
        query = PredefinedDataSource.query.filter_by(name=value)
        if self.obj.id:
            query = query.filter(PredefinedDataSource.id != self.obj.id)
        count = query.count()
        if count:
            raise ValidationError("DataSource with name \"%s\" already exist. "
                                  "Please choose another one." % value)
        return value
Beispiel #13
0
class NamedFeatureTypeForm(BaseForm, FeatureParamsMixin):
    required_fields = ('name', 'type')

    name = UniqueNameField(Model=NamedFeatureType)
    type_field = ChoiceField(choices=NamedFeatureType.TYPES_LIST, name='type')
    input_format = CharField()
    params = JsonField()

    def validate_data(self):
        if self.errors:
            return

        # Trying to make instance of the type
        type_ = self.cleaned_data.get('type')
        type_factory = FEATURE_TYPE_FACTORIES.get(type_)
        try:
            params = self.cleaned_data.get('params') or {}
            input_format = self.cleaned_data.get('params') or 'plain'
            type_factory.get_instance(params, input_format)
        except InvalidFeatureTypeException, exc:
            self.add_error("type", 'Cannot create instance of '
                           'feature type: {0}'.format(exc), exc)
Beispiel #14
0
class XmlDataSourceForm(ParametersConvertorMixin, BaseForm):
    XML_PARAMETERS = True
    PARAMETERS_CONFIGURATION = ExtractionPlan.get_datasources_config()

    required_fields = ('name', 'type', 'import_handler_id')
    NO_REQUIRED_FOR_EDIT = True

    name = CharField()
    type_field = ChoiceField(choices=_get_ds_types(), name='type')
    params = JsonField()
    import_handler_id = DocumentField(doc=XmlImportHandler,
                                      by_name=False,
                                      return_doc=False)

    def clean_name(self, value, field):
        if not ((self.NO_REQUIRED_FOR_EDIT and self.obj.id) or value):
            raise ValidationError('name is required field')

        import_handler_id = self.obj.import_handler_id if \
            self.obj.id else self.data['import_handler_id']

        query = XmlDataSource.query.filter_by(
            name=value, import_handler_id=import_handler_id)
        if self.obj.id:
            query = query.filter(XmlDataSource.id != self.obj.id)
        count = query.count()
        if count:
            raise ValidationError('Data Source with name "%s" already \
exist. Please choose another one.' % value)
        return value

    def validate_data(self):
        type_ = self.cleaned_data.get('type')
        self.convert_params(type_,
                            self.cleaned_data.get('params'),
                            configuration=self.PARAMETERS_CONFIGURATION)
Beispiel #15
0
class ClassifierForm(BasePredefinedForm, ParametersConvertorMixin):
    """
    Form for one of this cases (dependly of parameters):
        1. adding/edditing predifined classifier
        2. edditing specific model classifier
        3. copying classifier config from predefined one
           to the model's classifier.
    """
    OBJECT_NAME = 'classifier'
    DOC = PredefinedClassifier

    group_chooser = 'predefined_selected'
    required_fields_groups = {'true': ('classifier', ),
                              'false': ('type', ),
                              None: ('type', )}

    name = CharField()
    type_field = ChoiceField(
        choices=PredefinedClassifier.TYPES_LIST, name='type')
    params = JsonField()
    # whether need to copy model classifier fields from predefined one
    predefined_selected = BooleanField()
    # whether we need to create predefined item (not model-related)
    classifier = DocumentField(
        doc=PredefinedClassifier, by_name=False, return_doc=True)
    model_id = DocumentField(doc=Model, by_name=False, return_doc=False)

    def validate_data(self):
        super(ClassifierForm, self).validate_data()

        params = self.cleaned_data.get('params')
        if params:
            from config import CLASSIFIERS
            self.convert_params(self.cleaned_data['type'],
                                params,
                                configuration=CLASSIFIERS)
Beispiel #16
0
class QueryTestForm(BaseForm):
    required_fields = ('sql', 'limit', 'datasource')
    sql = CharField()
    params = JsonField()
    limit = IntegerField()
    datasource = CharField()
Beispiel #17
0
class LoadPigFieldsForm(BaseForm):
    params = JsonField()
Beispiel #18
0
class TransformerForm(BaseForm, ParametersConvertorMixin):
    """
    Adds/Edits Pretrained transformer form
    """
    NO_REQUIRED_FOR_EDIT = True
    REQUIRED_FIELDS = ['train_import_handler']
    FORM_REQUIRED_FIELDS = REQUIRED_FIELDS + \
        ['name', 'type', 'feature_type', 'field_name']
    group_chooser = 'json_selected'
    required_fields_groups = {
        'true': REQUIRED_FIELDS + ['json'],
        'false': FORM_REQUIRED_FIELDS,
        None: FORM_REQUIRED_FIELDS
    }

    name = CharField()
    feature_type = CharField()
    field_name = CharField()
    type_field = ChoiceField(choices=Transformer.TYPES_LIST, name='type')
    params = JsonField()
    json = JsonField()
    json_selected = BooleanField()
    train_import_handler = ImportHandlerField()

    def validate_data(self):
        name = self.cleaned_data.get('name')
        json_selected = self.cleaned_data.get('json_selected')
        if json_selected:
            json = self.cleaned_data.get('json')
            name = json['transformer-name']
            params = json['transformer'].get('params')
            type_ = json['transformer'].get('type')
            self.is_name_available(name, field_name='json')
        else:
            self.is_name_available(name)
            params = self.cleaned_data.get('params')
            type_ = self.cleaned_data.get('type')

        self.convert_params(type_, params, configuration=TRANSFORMERS)

    def save(self, commit=True):
        if self.cleaned_data.get('json_selected'):
            json = self.cleaned_data['json']
            transformer = Transformer()
            transformer.load_from_json(json)
            transformer.train_import_handler = \
                self.cleaned_data['train_import_handler']
            transformer.save(commit=commit)
            return transformer
        else:
            return super(TransformerForm, self).save(commit)

    def is_name_available(self, name, field_name='name'):
        if self.obj and self.obj.id:
            return True  # edit

        if Transformer.query.filter_by(name=name).count():
            self.add_error(
                field_name, 'Transformer with name {0} \
already exist'.format(name))
            return False
        return True
Beispiel #19
0
class FeatureForm(BaseForm, FeatureParamsMixin):
    """
    Feature add/edit form.
    """
    # we could edit only one feature field.
    # no need to fill all of them for edit
    NO_REQUIRED_FOR_EDIT = True
    required_fields = ('name', 'type', 'feature_set_id')

    name = CharField()
    type_field = CharField(name='type')
    input_format = CharField()
    params = JsonField()
    required = BooleanField()
    default = CharField()
    is_target_variable = BooleanField()
    feature_set_id = DocumentField(doc=FeatureSet, by_name=False,
                                   return_doc=False)
    disabled = BooleanField()

    transformer = FeatureTransformerForm(
        Model=Transformer,
        prefix='transformer-', data_from_request=False)
    remove_transformer = BooleanField()
    scaler = ScalerForm(Model=PredefinedScaler, prefix='scaler-',
                        data_from_request=False)
    remove_scaler = BooleanField()

    def validate_data(self):
        from numpy import nan
        feature_set_id = self.cleaned_data.get('feature_set_id')
        name = self.cleaned_data.get('name')
        query = Feature.query.filter_by(
            name=name,
            feature_set_id=feature_set_id)
        if self.obj.id:
            query = query.filter(Feature.id != self.obj.id)
        count = query.count()
        if count:
            self.add_error('name', 'Feature with name "%s" already \
exist. Please choose another one.' % name)
            return

        # Validating feature type and parameters
        def get_field_value(name):
            value = self.cleaned_data.get(name)
            if value is None and self.is_edit:
                return getattr(self.obj, name)
            return value

        feature_type = get_field_value('type')
        type_factory = FEATURE_TYPE_FACTORIES.get(feature_type)
        if type_factory:  # inline type
            try:
                params = get_field_value('params')
                input_format = get_field_value('input_format') or 'plain'
                type_ = type_factory.get_instance(params, input_format)
                default = self.cleaned_data.get('default', None)
                if default:
                    self.cleaned_data['default'] = type_.transform(default)
                    if self.cleaned_data['default'] is nan:
                        self.add_error(
                            "default",
                            "Incorrect default value {0} for type {1}. "
                            .format(default, feature_type))
            except InvalidFeatureTypeException, exc:
                self.add_error("type", 'Cannot create instance of '
                               'feature type: {0}'.format(exc), exc)
        else: