Exemple #1
0
class XmlSqoopForm(BaseForm):
    required_fields = ('entity', 'target', 'table', 'datasource')
    NO_REQUIRED_FOR_EDIT = True

    MAX_ITEMS_BY_ENTITY = 3

    entity = DocumentField(doc=XmlEntity, by_name=False, return_doc=True)
    datasource = DocumentField(doc=XmlDataSource,
                               by_name=False,
                               return_doc=True)
    target = CharField()
    table = CharField()
    where = CharField()
    direct = CharField()
    mappers = CharField()
    options = CharField()
    text = CharField()

    def clean_entity(self, value, field):
        if value:
            if not (value.datasource and value.datasource.type == 'pig'):
                raise ValidationError('Only "pig" entity is allowed')
        if value and not self.is_edit:
            query = XmlSqoop.query.filter_by(entity=value)
            if query.count() >= self.MAX_ITEMS_BY_ENTITY:
                raise ValidationError(
                    'There can be no more than {0} elements'.format(
                        self.MAX_ITEMS_BY_ENTITY))
        return value

    def clean_datasource(self, value, field):
        if value:
            if value.type != 'db':
                raise ValidationError('Only "db" datasources are allowed')
        return value
Exemple #2
0
class XmlQueryForm(BaseForm):
    required_fields = ('text', 'entity_id', 'import_handler_id')
    NO_REQUIRED_FOR_EDIT = True

    text = CharField()
    target = CharField()
    sqoop_dataset_name = CharField()
    autoload_sqoop_dataset = BooleanField()
    entity_id = DocumentField(doc=XmlEntity, by_name=False, return_doc=False)
    import_handler_id = DocumentField(doc=XmlImportHandler,
                                      by_name=False,
                                      return_doc=False)
Exemple #3
0
class XmlInputParameterForm(BaseForm):
    required_fields = ('name', 'type', 'import_handler_id')
    NO_REQUIRED_FOR_EDIT = True

    name = CharField()
    type_field = ChoiceField(choices=XmlInputParameter.TYPES, name='type')
    format = CharField()
    regex = CharField()
    import_handler_id = DocumentField(doc=XmlImportHandler,
                                      by_name=False,
                                      return_doc=False)

    def clean_name(self, value, field):
        if not ((self.NO_REQUIRED_FOR_EDIT and self.obj.id) or value):
            raise ValidationError('name is required field')

        import_handler_id = self.obj.import_handler_id if \
            self.obj.id else self.data['import_handler_id']

        query = XmlInputParameter.query.filter_by(
            name=value, import_handler_id=import_handler_id)
        if self.obj.id:
            query = query.filter(XmlInputParameter.id != self.obj.id)
        count = query.count()
        if count:
            raise ValidationError('Input parameter with name "%s" already \
exist. Please choose another one.' % value)
        return value
Exemple #4
0
class XmlScriptForm(BaseForm):
    required_fields = (('data', 'data_file', 'data_url'), 'import_handler_id')
    NO_REQUIRED_FOR_EDIT = True

    data = CharField()
    import_handler_id = DocumentField(doc=XmlImportHandler,
                                      by_name=False,
                                      return_doc=False)
    data_file = ScriptFileField()
    data_url = ScriptUrlField()
    type_field = ChoiceField(choices=XmlScript.TYPES, name='type')

    def save(self, *args, **kwargs):
        try:
            script_type = self.cleaned_data.get('type', None)
            data_file = self.cleaned_data.get('data_file', None)
            data_url = self.cleaned_data.get('data_url', None)
            data = self.cleaned_data.get('data', None)
            if script_type == XmlScript.TYPE_PYTHON_FILE:
                if data_file:
                    key = XmlScript.to_s3(
                        data_file, self.cleaned_data.get('import_handler_id'))
                    self.cleaned_data['data'] = key
                elif data_url:
                    self.cleaned_data['data'] = data_url
                else:
                    raise ValidationError("File upload or URL required "
                                          "for type '{0}'".format(script_type))
            elif script_type == XmlScript.TYPE_PYTHON_CODE:
                if not data:
                    raise ValidationError("Code is required for type "
                                          "'{0}'".format(script_type))
            # type is not passed
            else:
                if data_file:
                    key = XmlScript.to_s3(
                        data_file, self.cleaned_data.get('import_handler_id'))
                    self.cleaned_data['data'] = key
                    self.cleaned_data['type'] = XmlScript.TYPE_PYTHON_FILE
                elif data_url:
                    self.cleaned_data['data'] = data_url
                    self.cleaned_data['type'] = XmlScript.TYPE_PYTHON_FILE
                else:
                    self.cleaned_data['type'] = XmlScript.TYPE_PYTHON_CODE

            script = super(XmlScriptForm, self).save()
        except Exception as e:
            raise ValidationError(e.message, e)
        return script

    def clean_data(self, value, field):
        try:
            s = ScriptManager()
            # this will raise exception in case of incorrect script
            s.add_python(value)
        except ImportHandlerException as ex:
            raise ValidationError(ex.message, ex)
        return value
Exemple #5
0
class PredictResultLabelForm(BaseForm):
    required_fields = (('predict_model_id', 'script'), 'import_handler_id')
    NO_REQUIRED_FOR_EDIT = True

    predict_model_id = ModelField(model=PredictModel, return_model=False)
    script = CharField()
    import_handler_id = DocumentField(doc=XmlImportHandler,
                                      by_name=False,
                                      return_doc=True)
Exemple #6
0
class ScalerForm(BasePredefinedForm):
    OBJECT_NAME = 'scaler'
    DOC = PredefinedScaler

    group_chooser = 'predefined_selected'
    required_fields_groups = {'true': ('scaler', ),
                              'false': ('type', ),
                              None: ('type', )}

    name = CharField()
    type_field = ChoiceField(choices=PredefinedScaler.TYPES_LIST, name='type')
    params = JsonField()
    # whether need to copy feature scaler fields from predefined one
    predefined_selected = BooleanField()
    # whether we need to create predefined item (not feature related)
    scaler = DocumentField(doc=PredefinedScaler, by_name=True, return_doc=True)
    feature_id = DocumentField(doc=Feature, by_name=False,
                               return_doc=False)
Exemple #7
0
class XmlFieldForm(BaseForm):
    required_fields = ('name', )
    NO_REQUIRED_FOR_EDIT = True

    name = CharField()
    type = ChoiceField(choices=XmlField.TYPES)
    column = CharField()
    jsonpath = CharField()
    delimiter = CharField()
    regex = CharField()
    split = CharField()
    dateFormat = CharField()
    template = CharField()
    transform = ChoiceField(choices=XmlField.TRANSFORM_TYPES)
    headers = CharField()
    script = CharField()
    required = BooleanField()
    multipart = BooleanField()
    entity_id = DocumentField(doc=XmlEntity, by_name=False, return_doc=False)
    import_handler_id = DocumentField(doc=XmlImportHandler,
                                      by_name=False,
                                      return_doc=False)
Exemple #8
0
class ClassifierForm(BasePredefinedForm, ParametersConvertorMixin):
    """
    Form for one of this cases (dependly of parameters):
        1. adding/edditing predifined classifier
        2. edditing specific model classifier
        3. copying classifier config from predefined one
           to the model's classifier.
    """
    OBJECT_NAME = 'classifier'
    DOC = PredefinedClassifier

    group_chooser = 'predefined_selected'
    required_fields_groups = {'true': ('classifier', ),
                              'false': ('type', ),
                              None: ('type', )}

    name = CharField()
    type_field = ChoiceField(
        choices=PredefinedClassifier.TYPES_LIST, name='type')
    params = JsonField()
    # whether need to copy model classifier fields from predefined one
    predefined_selected = BooleanField()
    # whether we need to create predefined item (not model-related)
    classifier = DocumentField(
        doc=PredefinedClassifier, by_name=False, return_doc=True)
    model_id = DocumentField(doc=Model, by_name=False, return_doc=False)

    def validate_data(self):
        super(ClassifierForm, self).validate_data()

        params = self.cleaned_data.get('params')
        if params:
            from config import CLASSIFIERS
            self.convert_params(self.cleaned_data['type'],
                                params,
                                configuration=CLASSIFIERS)
Exemple #9
0
class XmlDataSourceForm(ParametersConvertorMixin, BaseForm):
    XML_PARAMETERS = True
    PARAMETERS_CONFIGURATION = ExtractionPlan.get_datasources_config()

    required_fields = ('name', 'type', 'import_handler_id')
    NO_REQUIRED_FOR_EDIT = True

    name = CharField()
    type_field = ChoiceField(choices=_get_ds_types(), name='type')
    params = JsonField()
    import_handler_id = DocumentField(doc=XmlImportHandler,
                                      by_name=False,
                                      return_doc=False)

    def clean_name(self, value, field):
        if not ((self.NO_REQUIRED_FOR_EDIT and self.obj.id) or value):
            raise ValidationError('name is required field')

        import_handler_id = self.obj.import_handler_id if \
            self.obj.id else self.data['import_handler_id']

        query = XmlDataSource.query.filter_by(
            name=value, import_handler_id=import_handler_id)
        if self.obj.id:
            query = query.filter(XmlDataSource.id != self.obj.id)
        count = query.count()
        if count:
            raise ValidationError('Data Source with name "%s" already \
exist. Please choose another one.' % value)
        return value

    def validate_data(self):
        type_ = self.cleaned_data.get('type')
        self.convert_params(type_,
                            self.cleaned_data.get('params'),
                            configuration=self.PARAMETERS_CONFIGURATION)
Exemple #10
0
class XmlEntityForm(BaseForm):
    required_fields = ('name', 'import_handler_id', 'entity_id',
                       ('datasource', 'transformed_field'))
    NO_REQUIRED_FOR_EDIT = True
    DATASOURCE_MESSAGE = 'Can be only one of either datasource or' \
                         ' transformed_field'

    name = CharField()
    autoload_fields = BooleanField()
    import_handler_id = DocumentField(doc=XmlImportHandler,
                                      by_name=False,
                                      return_doc=False)
    entity_id = DocumentField(doc=XmlEntity, by_name=False, return_doc=False)
    datasource = DocumentField(doc=XmlDataSource,
                               by_name=False,
                               return_doc=True)
    transformed_field = DocumentField(doc=XmlField,
                                      by_name=False,
                                      return_doc=True)

    def clean_datasource(self, value, field):
        if value and self.data.get('transformed_field'):
            raise ValidationError(self.DATASOURCE_MESSAGE)
        return value

    def clean_transformed_field(self, value, field):
        if value and self.data.get('datasource'):
            raise ValidationError(self.DATASOURCE_MESSAGE)
        return value

    def save(self, *args, **kwargs):
        try:
            entity = super(XmlEntityForm, self).save()

            if self.cleaned_data.get('transformed_field') and \
                    entity.datasource:
                entity.datasource = None
            if self.cleaned_data.get('datasource') and \
                    entity.transformed_field:
                entity.transformed_field = None
            db.session.add(entity)

            if entity.transformed_field and entity.query_obj:
                db.session.delete(entity.query_obj)
            elif entity.datasource and not entity.query_obj:
                query = XmlQuery()
                db.session.add(query)
                entity.query_obj = query
                db.session.add(entity)

            ds = entity.datasource
            if not ds or (ds and ds.type != 'pig'):
                for sqoop in entity.sqoop_imports:
                    db.session.delete(sqoop)

        except Exception as e:
            db.session.rollback()
            raise DBException(e.message, e)
        else:
            db.session.commit()

        return entity
Exemple #11
0
class FeatureForm(BaseForm, FeatureParamsMixin):
    """
    Feature add/edit form.
    """
    # we could edit only one feature field.
    # no need to fill all of them for edit
    NO_REQUIRED_FOR_EDIT = True
    required_fields = ('name', 'type', 'feature_set_id')

    name = CharField()
    type_field = CharField(name='type')
    input_format = CharField()
    params = JsonField()
    required = BooleanField()
    default = CharField()
    is_target_variable = BooleanField()
    feature_set_id = DocumentField(doc=FeatureSet, by_name=False,
                                   return_doc=False)
    disabled = BooleanField()

    transformer = FeatureTransformerForm(
        Model=Transformer,
        prefix='transformer-', data_from_request=False)
    remove_transformer = BooleanField()
    scaler = ScalerForm(Model=PredefinedScaler, prefix='scaler-',
                        data_from_request=False)
    remove_scaler = BooleanField()

    def validate_data(self):
        from numpy import nan
        feature_set_id = self.cleaned_data.get('feature_set_id')
        name = self.cleaned_data.get('name')
        query = Feature.query.filter_by(
            name=name,
            feature_set_id=feature_set_id)
        if self.obj.id:
            query = query.filter(Feature.id != self.obj.id)
        count = query.count()
        if count:
            self.add_error('name', 'Feature with name "%s" already \
exist. Please choose another one.' % name)
            return

        # Validating feature type and parameters
        def get_field_value(name):
            value = self.cleaned_data.get(name)
            if value is None and self.is_edit:
                return getattr(self.obj, name)
            return value

        feature_type = get_field_value('type')
        type_factory = FEATURE_TYPE_FACTORIES.get(feature_type)
        if type_factory:  # inline type
            try:
                params = get_field_value('params')
                input_format = get_field_value('input_format') or 'plain'
                type_ = type_factory.get_instance(params, input_format)
                default = self.cleaned_data.get('default', None)
                if default:
                    self.cleaned_data['default'] = type_.transform(default)
                    if self.cleaned_data['default'] is nan:
                        self.add_error(
                            "default",
                            "Incorrect default value {0} for type {1}. "
                            .format(default, feature_type))
            except InvalidFeatureTypeException, exc:
                self.add_error("type", 'Cannot create instance of '
                               'feature type: {0}'.format(exc), exc)
        else: