class PredictModelForm(BaseForm): required_fields = ('name', ('value', 'script')) NO_REQUIRED_FOR_EDIT = True name = CharField() value = CharField() script = CharField() def __init__(self, *args, **kwargs): handler_id = kwargs.pop('import_handler_id') if handler_id is None: raise ValidationError('Import Handler is required') self.import_handler = XmlImportHandler.query.get(handler_id) if self.import_handler is None: raise ValidationError('Invalid import handler id specified') super(PredictModelForm, self).__init__(*args, **kwargs) def save(self, *args, **kwargs): is_new_model = not self.is_edit model = super(PredictModelForm, self).save(commit=False) if is_new_model: predict = self.import_handler.predict predict.models.append(model) predict.save(commit=False) db.session.commit() return model
class FeatureSetForm(BaseForm): schema_name = CharField() group_by = JsonField() target_variable = CharField() target_feature = None def clean_group_by(self, value, field): if value is not None: ids = [feature['id'] for feature in value] return Feature.query.filter(Feature.id.in_(ids)).all() def clean_target_variable(self, value, field): if value: self.target_feature = Feature.query.filter_by( name=value, feature_set_id=self.id).one() if self.target_feature is None: raise ValidationError('Feature not found') return value def save(self): self.cleaned_data['modified'] = True if self.target_feature: self.target_feature.is_target_variable = True self.target_feature.required = True self.target_feature.save(commit=False) return super(FeatureSetForm, self).save()
class XmlInputParameterForm(BaseForm): required_fields = ('name', 'type', 'import_handler_id') NO_REQUIRED_FOR_EDIT = True name = CharField() type_field = ChoiceField(choices=XmlInputParameter.TYPES, name='type') format = CharField() regex = CharField() import_handler_id = DocumentField(doc=XmlImportHandler, by_name=False, return_doc=False) def clean_name(self, value, field): if not ((self.NO_REQUIRED_FOR_EDIT and self.obj.id) or value): raise ValidationError('name is required field') import_handler_id = self.obj.import_handler_id if \ self.obj.id else self.data['import_handler_id'] query = XmlInputParameter.query.filter_by( name=value, import_handler_id=import_handler_id) if self.obj.id: query = query.filter(XmlInputParameter.id != self.obj.id) count = query.count() if count: raise ValidationError('Input parameter with name "%s" already \ exist. Please choose another one.' % value) return value
class PredictModelWeightForm(BaseForm): required_fields = ('label', ('value', 'script'), 'predict_model_id') NO_REQUIRED_FOR_EDIT = True label = CharField() value = CharField() script = CharField() predict_model_id = ModelField(model=PredictModel)
class PredictResultProbabilityForm(BaseForm): required_fields = ('label', ('predict_model_id', 'script'), 'import_handler_id') NO_REQUIRED_FOR_EDIT = True predict_model_id = ModelField(model=PredictModel, return_model=False) label = CharField() script = CharField() import_handler_id = DocumentField(doc=XmlImportHandler, by_name=False, return_doc=True)
class InstanceForm(BaseForm): """ Add/Edit instance form. """ NO_REQUIRED_FOR_EDIT = True required_fields = ('name', 'ip', 'type') name = UniqueNameField(Model=Instance) description = CharField() ip = CharField() type_field = ChoiceField(choices=Instance.TYPES_LIST, name='type') is_default = BooleanField()
class XmlQueryForm(BaseForm): required_fields = ('text', 'entity_id', 'import_handler_id') NO_REQUIRED_FOR_EDIT = True text = CharField() target = CharField() sqoop_dataset_name = CharField() autoload_sqoop_dataset = BooleanField() entity_id = DocumentField(doc=XmlEntity, by_name=False, return_doc=False) import_handler_id = DocumentField(doc=XmlImportHandler, by_name=False, return_doc=False)
class ModelEditForm(BaseForm): NO_REQUIRED_FOR_EDIT = True required_fields = ('name', ) name = CharField() train_import_handler = ImportHandlerField() test_import_handler = ImportHandlerField() example_id = CharField() example_label = CharField() tags = JsonField() features = FeaturesField() def save(self, commit=True): old_tags = [tag for tag in self.obj.tags] old_tags_texts = [t.text for t in self.obj.tags] model = super(ModelEditForm, self).save() tags = self.cleaned_data.get('tags', None) if tags: for tag_text in tags: if tag_text not in old_tags_texts: t = Tag.query.filter_by(text=tag_text).all() if len(t): new_tag = t[0] else: new_tag = Tag() new_tag.text = tag_text new_tag.save() old_tags.append(new_tag) model.tags = [tag for tag in old_tags if tag.text in tags] model.save() for tag in old_tags: tag.update_counter() features = self.cleaned_data.get('features', None) if features: try: Feature.query.filter_by( feature_set_id=model.features_set_id).delete() model.classifier = features['classifier'] or {} model.features_set.from_dict(features, commit=False) except Exception as e: db.session.rollback() raise DBException( "Error occurred while updating features: " "{0}".format(e), e) else: db.session.commit() return model
class XmlScriptForm(BaseForm): required_fields = (('data', 'data_file', 'data_url'), 'import_handler_id') NO_REQUIRED_FOR_EDIT = True data = CharField() import_handler_id = DocumentField(doc=XmlImportHandler, by_name=False, return_doc=False) data_file = ScriptFileField() data_url = ScriptUrlField() type_field = ChoiceField(choices=XmlScript.TYPES, name='type') def save(self, *args, **kwargs): try: script_type = self.cleaned_data.get('type', None) data_file = self.cleaned_data.get('data_file', None) data_url = self.cleaned_data.get('data_url', None) data = self.cleaned_data.get('data', None) if script_type == XmlScript.TYPE_PYTHON_FILE: if data_file: key = XmlScript.to_s3( data_file, self.cleaned_data.get('import_handler_id')) self.cleaned_data['data'] = key elif data_url: self.cleaned_data['data'] = data_url else: raise ValidationError("File upload or URL required " "for type '{0}'".format(script_type)) elif script_type == XmlScript.TYPE_PYTHON_CODE: if not data: raise ValidationError("Code is required for type " "'{0}'".format(script_type)) # type is not passed else: if data_file: key = XmlScript.to_s3( data_file, self.cleaned_data.get('import_handler_id')) self.cleaned_data['data'] = key self.cleaned_data['type'] = XmlScript.TYPE_PYTHON_FILE elif data_url: self.cleaned_data['data'] = data_url self.cleaned_data['type'] = XmlScript.TYPE_PYTHON_FILE else: self.cleaned_data['type'] = XmlScript.TYPE_PYTHON_CODE script = super(XmlScriptForm, self).save() except Exception as e: raise ValidationError(e.message, e) return script def clean_data(self, value, field): try: s = ScriptManager() # this will raise exception in case of incorrect script s.add_python(value) except ImportHandlerException as ex: raise ValidationError(ex.message, ex) return value
class XmlSqoopForm(BaseForm): required_fields = ('entity', 'target', 'table', 'datasource') NO_REQUIRED_FOR_EDIT = True MAX_ITEMS_BY_ENTITY = 3 entity = DocumentField(doc=XmlEntity, by_name=False, return_doc=True) datasource = DocumentField(doc=XmlDataSource, by_name=False, return_doc=True) target = CharField() table = CharField() where = CharField() direct = CharField() mappers = CharField() options = CharField() text = CharField() def clean_entity(self, value, field): if value: if not (value.datasource and value.datasource.type == 'pig'): raise ValidationError('Only "pig" entity is allowed') if value and not self.is_edit: query = XmlSqoop.query.filter_by(entity=value) if query.count() >= self.MAX_ITEMS_BY_ENTITY: raise ValidationError( 'There can be no more than {0} elements'.format( self.MAX_ITEMS_BY_ENTITY)) return value def clean_datasource(self, value, field): if value: if value.type != 'db': raise ValidationError('Only "db" datasources are allowed') return value
class ServerForm(BaseForm): NO_REQUIRED_FOR_EDIT = True required_fields = ('name', 'ip', 'folder', 'type') name = CharField() description = CharField() ip = CharField() folder = CharField() is_default = BooleanField() type = ChoiceField(choices=Server.TYPES) def clean_name(self, value, field): query = Server.query.filter_by(name=value) if self.obj.id: query = query.filter(Server.id != self.obj.id) count = query.count() if count: raise ValidationError('name should be unique') return value
class XmlImportHandlerEditForm(BaseForm): required_fields = ('name', ) name = CharField() def clean_name(self, value, field): count = XmlImportHandler.query.filter_by(name=value).count() if count: raise ValidationError('Import Handler with name "%s" already \ exist. Please choose another one.' % value) return value
class FeatureTransformerForm(BaseForm, ParametersConvertorMixin): """ Adds/edits feature transformer form. """ group_chooser = 'predefined_selected' REQUIRED_FORM = ['type'] REQUIRED_PRETRAINED = ['transformer'] required_fields_groups = { 'true': REQUIRED_PRETRAINED, 'false': REQUIRED_FORM, None: REQUIRED_FORM } predefined_selected = BooleanField() feature_id = ModelField(model=Feature, return_model=True) type_field = CharField(name='type') params = JsonField() transformer = ModelField(model=Transformer, return_model=True) def validate_data(self): type_ = self.cleaned_data.get('type') pretrained_selected = self.cleaned_data.get('predefined_selected') if not pretrained_selected and type_ \ and type_ not in Transformer.TYPES_LIST: self.add_error('type', 'type is invalid') return self.convert_params(type_, self.cleaned_data.get('params'), configuration=TRANSFORMERS) def save(self, commit=True, save=True): feature = self.cleaned_data.get('feature_id', None) is_pretrained = self.cleaned_data.get('predefined_selected', False) if is_pretrained: pretrained_transformer = self.cleaned_data.get('transformer') transformer = { 'type': pretrained_transformer.name, 'id': pretrained_transformer.id } else: transformer = { 'id': -1, "type": self.cleaned_data.get('type'), "params": self.cleaned_data.get('params') } if feature is not None: feature.transformer = transformer feature.save() return transformer
class XmlFieldForm(BaseForm): required_fields = ('name', ) NO_REQUIRED_FOR_EDIT = True name = CharField() type = ChoiceField(choices=XmlField.TYPES) column = CharField() jsonpath = CharField() delimiter = CharField() regex = CharField() split = CharField() dateFormat = CharField() template = CharField() transform = ChoiceField(choices=XmlField.TRANSFORM_TYPES) headers = CharField() script = CharField() required = BooleanField() multipart = BooleanField() entity_id = DocumentField(doc=XmlEntity, by_name=False, return_doc=False) import_handler_id = DocumentField(doc=XmlImportHandler, by_name=False, return_doc=False)
class XmlImportHandlerUpdateXmlForm(BaseForm): required_fields = ('data', ) data = CharField() def clean_data(self, value, field): if value is None: return value = value.encode('utf-8') try: ExtractionPlan(value, is_file=False) return value except Exception as exc: raise ValidationError(exc.message, exc)
class GridSearchForm(BaseForm): parameters = JsonField() scoring = CharField() train_dataset = ModelField(model=DataSet, return_model=True) test_dataset = ModelField(model=DataSet, return_model=True) def __init__(self, *args, **kwargs): self.model = kwargs.get('model', None) super(GridSearchForm, self).__init__(*args, **kwargs) def clean_parameters(self, grid_params, field): params = {} config = CLASSIFIERS[self.model.classifier['type']] config_params = config['parameters'] for pconfig in config_params: name = pconfig['name'] if name in grid_params: value = grid_params[name] if not value: continue value = value.split(',') type_ = pconfig.get('type', 'string') if type_ == 'integer': value = [int(item) for item in value] elif type_ == 'float': value = [float(item) for item in value] elif type_ == 'boolean': value = [item == 'true' for item in value] choices = pconfig.get('choices') if choices: for item in value: if item not in choices: raise ValidationError( 'Invalid {0}: should be one of {1}'.format( name, ','.join(choices))) params[name] = value return params def save(self, *args, **kwargs): obj = super(GridSearchForm, self).save(commit=False) obj.model = self.model obj.save() return obj
class ScalerForm(BasePredefinedForm): OBJECT_NAME = 'scaler' DOC = PredefinedScaler group_chooser = 'predefined_selected' required_fields_groups = {'true': ('scaler', ), 'false': ('type', ), None: ('type', )} name = CharField() type_field = ChoiceField(choices=PredefinedScaler.TYPES_LIST, name='type') params = JsonField() # whether need to copy feature scaler fields from predefined one predefined_selected = BooleanField() # whether we need to create predefined item (not feature related) scaler = DocumentField(doc=PredefinedScaler, by_name=True, return_doc=True) feature_id = DocumentField(doc=Feature, by_name=False, return_doc=False)
class ServerModelVerificationForm(BaseForm): required_fields = ('server_id', 'model_id', 'test_result_id') server_id = ModelField(model=Server) model_id = ModelField(model=Model) import_handler_id = ModelField(model=XmlImportHandler) test_result_id = ModelField(model=TestResult) description = JsonField() params_map = JsonField() count = IntegerField() clazz = CharField() def save(self, *args, **kwargs): obj = super(ServerModelVerificationForm, self).save(*args, **kwargs) from tasks import verify_model verify_model.delay( obj.id, self.cleaned_data['count']) return obj
class ModelAddForm(BaseForm): """ Adds new model. Note: If import handler and import handler file would be specified, new model will use import handler from file. """ NO_REQUIRED_FOR_EDIT = True required_fields = ('name', ('import_handler', 'import_handler_file')) name = UniqueNameField(Model=Model) import_handler = ImportHandlerField() import_handler_file = ImportHandlerFileField() test_import_handler = ImportHandlerField() test_import_handler_file = ImportHandlerFileField() features = JsonField() trainer = CharField() def clean_import_handler(self, value, field): self.cleaned_data['train_import_handler'] = value return value def clean_import_handler_file(self, value, field): self.cleaned_data['train_import_params'] = field.import_params return value def clean_test_import_handler_file(self, value, field): self.cleaned_data['test_import_params'] = field.import_params return value def clean_features(self, value, field): if value: from cloudml.trainer.trainer import Trainer from cloudml.trainer.config import FeatureModel, SchemaException try: # TODO: add support of json dict to FeatureModel feature_model = FeatureModel(json.dumps(value), is_file=False) self.cleaned_data['trainer'] = Trainer(feature_model) except SchemaException, exc: raise ValidationError( 'Features JSON file is invalid: %s' % exc, exc) return value
class VisualizationOptionsForm(BaseForm): """ Form used for updating Trained model visualization. Note: Now it support only `tree_deep` type for Decision Tree and Random Forest classifiers. """ UPDATE_TREE_DEEP = 'tree_deep' TYPES = [ UPDATE_TREE_DEEP, ] PARAMS_BY_TYPE = {UPDATE_TREE_DEEP: [{'name': 'deep', 'type': 'int'}]} parameters = JsonField() type_ = CharField(name="type") def __init__(self, *args, **kwargs): super(VisualizationOptionsForm, self).__init__(*args, **kwargs) def clean_type(self, value, field): if value and value not in self.TYPES: raise ValidationError('invalid type') return value def validate_data(self): type_ = self.cleaned_data.get('type') parameters = self.cleaned_data.get('parameters') config = self.PARAMS_BY_TYPE[type_] for item in config: name = item['name'] val = parameters.get(name) if not val: self.add_error('parameters', 'Parameter %s is required' % name) if item['type'] == 'int': try: parameters[name] = int(val) except Exception, exc: self.add_error( 'parameters', "Can't parse parameter %s: %s" % (name, exc), exc)
class PredefinedDataSourceForm(BaseForm): """ DataSource add/edit form """ NO_REQUIRED_FOR_EDIT = True required_fields = ('name', 'type') name = CharField() type_field = ChoiceField(choices=PredefinedDataSource.TYPES_LIST, name='type') db = JsonField() def clean_name(self, value, field): query = PredefinedDataSource.query.filter_by(name=value) if self.obj.id: query = query.filter(PredefinedDataSource.id != self.obj.id) count = query.count() if count: raise ValidationError("DataSource with name \"%s\" already exist. " "Please choose another one." % value) return value
class NamedFeatureTypeForm(BaseForm, FeatureParamsMixin): required_fields = ('name', 'type') name = UniqueNameField(Model=NamedFeatureType) type_field = ChoiceField(choices=NamedFeatureType.TYPES_LIST, name='type') input_format = CharField() params = JsonField() def validate_data(self): if self.errors: return # Trying to make instance of the type type_ = self.cleaned_data.get('type') type_factory = FEATURE_TYPE_FACTORIES.get(type_) try: params = self.cleaned_data.get('params') or {} input_format = self.cleaned_data.get('params') or 'plain' type_factory.get_instance(params, input_format) except InvalidFeatureTypeException, exc: self.add_error("type", 'Cannot create instance of ' 'feature type: {0}'.format(exc), exc)
class XmlImportHandlerAddForm(BaseForm): required_fields = ('name', ) name = UniqueNameField(Model=XmlImportHandler, verbose_name='Import Handler') data = CharField() def clean_data(self, value, field): if value is None: return value = value.encode('utf-8') from cloudml.importhandler.importhandler import ExtractionPlan try: ExtractionPlan(value, is_file=False) return value except Exception as exc: raise ValidationError(exc.message, exc) def save(self): try: import_handler = XmlImportHandler(name=self.cleaned_data['name'], import_params=[]) import_handler._set_user() db.session.add(import_handler) try: import_handler.data = self.cleaned_data.get('data') except Exception, exc: self.add_error('fields', str(exc), exc) raise ValidationError(self.error_messages, exc, errors=self.errors) except Exception as e: db.session.rollback() raise DBException(e.message, e) else: db.session.commit() return import_handler
class XmlDataSourceForm(ParametersConvertorMixin, BaseForm): XML_PARAMETERS = True PARAMETERS_CONFIGURATION = ExtractionPlan.get_datasources_config() required_fields = ('name', 'type', 'import_handler_id') NO_REQUIRED_FOR_EDIT = True name = CharField() type_field = ChoiceField(choices=_get_ds_types(), name='type') params = JsonField() import_handler_id = DocumentField(doc=XmlImportHandler, by_name=False, return_doc=False) def clean_name(self, value, field): if not ((self.NO_REQUIRED_FOR_EDIT and self.obj.id) or value): raise ValidationError('name is required field') import_handler_id = self.obj.import_handler_id if \ self.obj.id else self.data['import_handler_id'] query = XmlDataSource.query.filter_by( name=value, import_handler_id=import_handler_id) if self.obj.id: query = query.filter(XmlDataSource.id != self.obj.id) count = query.count() if count: raise ValidationError('Data Source with name "%s" already \ exist. Please choose another one.' % value) return value def validate_data(self): type_ = self.cleaned_data.get('type') self.convert_params(type_, self.cleaned_data.get('params'), configuration=self.PARAMETERS_CONFIGURATION)
class ClassifierForm(BasePredefinedForm, ParametersConvertorMixin): """ Form for one of this cases (dependly of parameters): 1. adding/edditing predifined classifier 2. edditing specific model classifier 3. copying classifier config from predefined one to the model's classifier. """ OBJECT_NAME = 'classifier' DOC = PredefinedClassifier group_chooser = 'predefined_selected' required_fields_groups = {'true': ('classifier', ), 'false': ('type', ), None: ('type', )} name = CharField() type_field = ChoiceField( choices=PredefinedClassifier.TYPES_LIST, name='type') params = JsonField() # whether need to copy model classifier fields from predefined one predefined_selected = BooleanField() # whether we need to create predefined item (not model-related) classifier = DocumentField( doc=PredefinedClassifier, by_name=False, return_doc=True) model_id = DocumentField(doc=Model, by_name=False, return_doc=False) def validate_data(self): super(ClassifierForm, self).validate_data() params = self.cleaned_data.get('params') if params: from config import CLASSIFIERS self.convert_params(self.cleaned_data['type'], params, configuration=CLASSIFIERS)
class DataSetEditForm(BaseForm): NO_REQUIRED_FOR_EDIT = True required_fields = ('name', ) name = CharField()
class TransformersDownloadForm(BaseForm): required_fields = ('segment', 'data_format') segment = CharField() data_format = ChoiceField(choices=['csv', 'json'])
class QueryTestForm(BaseForm): required_fields = ('sql', 'limit', 'datasource') sql = CharField() params = JsonField() limit = IntegerField() datasource = CharField()
class TransformerForm(BaseForm, ParametersConvertorMixin): """ Adds/Edits Pretrained transformer form """ NO_REQUIRED_FOR_EDIT = True REQUIRED_FIELDS = ['train_import_handler'] FORM_REQUIRED_FIELDS = REQUIRED_FIELDS + \ ['name', 'type', 'feature_type', 'field_name'] group_chooser = 'json_selected' required_fields_groups = { 'true': REQUIRED_FIELDS + ['json'], 'false': FORM_REQUIRED_FIELDS, None: FORM_REQUIRED_FIELDS } name = CharField() feature_type = CharField() field_name = CharField() type_field = ChoiceField(choices=Transformer.TYPES_LIST, name='type') params = JsonField() json = JsonField() json_selected = BooleanField() train_import_handler = ImportHandlerField() def validate_data(self): name = self.cleaned_data.get('name') json_selected = self.cleaned_data.get('json_selected') if json_selected: json = self.cleaned_data.get('json') name = json['transformer-name'] params = json['transformer'].get('params') type_ = json['transformer'].get('type') self.is_name_available(name, field_name='json') else: self.is_name_available(name) params = self.cleaned_data.get('params') type_ = self.cleaned_data.get('type') self.convert_params(type_, params, configuration=TRANSFORMERS) def save(self, commit=True): if self.cleaned_data.get('json_selected'): json = self.cleaned_data['json'] transformer = Transformer() transformer.load_from_json(json) transformer.train_import_handler = \ self.cleaned_data['train_import_handler'] transformer.save(commit=commit) return transformer else: return super(TransformerForm, self).save(commit) def is_name_available(self, name, field_name='name'): if self.obj and self.obj.id: return True # edit if Transformer.query.filter_by(name=name).count(): self.add_error( field_name, 'Transformer with name {0} \ already exist'.format(name)) return False return True
class XmlEntityForm(BaseForm): required_fields = ('name', 'import_handler_id', 'entity_id', ('datasource', 'transformed_field')) NO_REQUIRED_FOR_EDIT = True DATASOURCE_MESSAGE = 'Can be only one of either datasource or' \ ' transformed_field' name = CharField() autoload_fields = BooleanField() import_handler_id = DocumentField(doc=XmlImportHandler, by_name=False, return_doc=False) entity_id = DocumentField(doc=XmlEntity, by_name=False, return_doc=False) datasource = DocumentField(doc=XmlDataSource, by_name=False, return_doc=True) transformed_field = DocumentField(doc=XmlField, by_name=False, return_doc=True) def clean_datasource(self, value, field): if value and self.data.get('transformed_field'): raise ValidationError(self.DATASOURCE_MESSAGE) return value def clean_transformed_field(self, value, field): if value and self.data.get('datasource'): raise ValidationError(self.DATASOURCE_MESSAGE) return value def save(self, *args, **kwargs): try: entity = super(XmlEntityForm, self).save() if self.cleaned_data.get('transformed_field') and \ entity.datasource: entity.datasource = None if self.cleaned_data.get('datasource') and \ entity.transformed_field: entity.transformed_field = None db.session.add(entity) if entity.transformed_field and entity.query_obj: db.session.delete(entity.query_obj) elif entity.datasource and not entity.query_obj: query = XmlQuery() db.session.add(query) entity.query_obj = query db.session.add(entity) ds = entity.datasource if not ds or (ds and ds.type != 'pig'): for sqoop in entity.sqoop_imports: db.session.delete(sqoop) except Exception as e: db.session.rollback() raise DBException(e.message, e) else: db.session.commit() return entity