Ejemplo n.º 1
0
predictor_metadata = ns_conf.model(
    'PredictorMetadata',
    {
        # Primary key
        'status':
        fields.String(required=False,
                      description='The current model status',
                      enum=['training', 'complete', 'error']),
        'current_phase':
        fields.String(required=False, description='Current training phase'),
        'name':
        fields.String(required=False, description='The predictor name'),
        'version':
        fields.String(
            required=False,
            description=
            'The predictor version to publish under, this is so that we can train multiple predictors for the same problem but expose them via the same name'
        ),
        # other attributes
        'data_preparation':
        fields.Nested(
            data_preparation_metadata,
            required=False,
            description=
            'The metadata used in the preparation stage, in which we break the data into train, test, validation'
        ),
        'accuracy':
        fields.Float(description='The current accuracy of the model'),
        'train_data_accuracy':
        fields.Float(description='The current accuracy of the model',
                     required=False),
        'test_data_accuracy':
        fields.Float(description='The current accuracy of the model',
                     required=False),
        'valid_data_accuracy':
        fields.Float(description='The current accuracy of the model',
                     required=False),
        'data_analysis':
        fields.Nested(
            data_analysis_metadata,
            required=False,
            description=
            'The metadata used in the analysis stage, in which we extract statistical information from the input data'
        ),
        'model_analysis':
        fields.List(
            fields.Nested(target_column_metadata),
            required=False,
            description=
            'The model analysis stage, in which we extract statistical information from the input data for each target variable, thus, this is a list; one item per target column'
        ),
        'data_analysis_v2':
        fields.Raw(default={}),
        'is_custom':
        fields.Boolean(default=False)
    })
Ejemplo n.º 2
0
from flask_restx import fields

target_column_metadata = ns_conf.model(
    'TargetColumnMetadata', {
        'column_name':
        fields.String(required=False, description='The column name'),
        'overall_input_importance':
        fields.Nested(histogram_data,
                      required=False,
                      description='The overall predictor feature importance'),
        'train_accuracy_over_time':
        fields.Nested(histogram_data,
                      required=False,
                      description='The predictor train accuracy over time'),
        'test_accuracy_over_time':
        fields.Nested(histogram_data,
                      required=False,
                      description='The predictor test accuracy over time'),
        'accuracy_histogram':
        fields.Nested(nested_histogram_data,
                      required=False,
                      description='The predictor accuracy acrross values'),
        'confusion_matrix':
        fields.Nested(
            confusion_matrix_data,
            required=False,
            description=
            'The predictor\'s confusion matrix for this column on the validation data'
        ),
    })
Ejemplo n.º 3
0
predictor_status = ns_conf.model(
    'PredictorStatus',
    {
        # Primary key
        'name':
        fields.String(
            required=False,
            description=
            'The predictor name, NOTE: That primary key is made of name:version'
        ),
        'version':
        fields.String(
            required=False,
            description=
            'The predictor version to publish under, this is so that we can train multiple predictors for the same problem but expose them via the same name'
        ),
        # other attributes
        'is_active':
        fields.Boolean(
            required=False,
            description='Only one predictor by public_name can be active'),
        'data_source':
        fields.String(required=False,
                      description='The data source it\'s learning from'),
        'predict':
        fields.List(fields.String,
                    required=False,
                    description='The list of columns/fields to be predicted'),
        'accuracy':
        fields.Float(description='The current accuracy of the model'),
        'status':
        fields.String(required=False,
                      description='The current model status',
                      enum=['training', 'complete', 'error']),
        'current_phase':
        fields.String(required=False, description='Current training phase'),
        'train_end_at':
        fields.DateTime(
            required=False,
            description='The time the predictor finished training'),
        'updated_at':
        fields.DateTime(
            required=False,
            description='The time the predictor was last updated at'),
        'created_at':
        fields.DateTime(required=False,
                        description='The time the predictor was created at'),
        'is_custom':
        fields.Boolean(default=False),
        'stack_trace_on_error':
        fields.String(required=False, description='Why it failed, if it did'),
        'error_explanation':
        fields.String(required=False,
                      description='Why it failed, if it did, short version')
    })
Ejemplo n.º 4
0
from mindsdb.api.http.namespaces.configs.predictors import ns_conf

from flask_restx import fields

histogram_data = ns_conf.model(
    'HistogramData',
    {
        'type':
        fields.String(required=False,
                      description='The type of histogram',
                      enum=['categorical', 'numeric']),
        'x':
        fields.List(
            fields.String, required=False, description='Ordered labels'),
        #'y': fields.List(fields.Float, required=False, description='Count for each label')
        'y':
        fields.List(
            fields.Raw, required=False, description='Count for each label')
    })

NUMERIC_EXAMPLE = {
    'type':
    'numeric',
    'x': [
        '1000', '1100', '1200', '1300', '1400', '1500', '1600', '1700', '1800',
        '1900', '2000', '2100', '2200', '2300', '2400'
    ],
    'y': [10, 20, 30, 20, 20, 50, 60, 70, 100, 10, 100, 120, 130, 150, 90]
}
Ejemplo n.º 5
0
from mindsdb.api.http.namespaces.configs.predictors import ns_conf
from mindsdb.api.http.namespaces.entitites.column_metadata import column_metadata

from flask_restx import fields

nested_histogram_data = ns_conf.model(
    'NestedHistogramData',
    {
        'x':
        fields.List(
            fields.String, required=False, description='Ordered labels'),
        #'y': fields.List(fields.Float, required=False, description='Count for each label'),
        'y':
        fields.List(
            fields.Raw, required=False, description='Count for each label'),
        'x_explained':
        fields.List(
            fields.List(fields.Nested(column_metadata)),
            required=False,
            description=
            'Ordered list of lists where each element in the histogram has a list of column metadata only relevant to each  subset of data defined by the histogram bucket '
        ),
    })
Ejemplo n.º 6
0
from mindsdb.api.http.namespaces.configs.predictors import ns_conf
from flask_restx import fields


quality_metric = ns_conf.model('QualityMetric', {
    'type': fields.String(required=False, description='The quality type', enum=['error', 'warning', 'info']),
    'score': fields.Float(required=False, description='The score on the specific metric value 0-1'),
    'description': fields.String(required=False, description='The quality metric description'),
    'warning': fields.String(required=False, description=''),
    'name': fields.String(required=False, description=''),
})
Ejemplo n.º 7
0
from mindsdb.api.http.namespaces.configs.predictors import ns_conf

from flask_restx import fields


data_preparation_metadata = ns_conf.model('DataPreparationMetadata', {
    'accepted_margin_of_error': fields.Float(required=False, description='This is the margin of error that the user accepted when training the predictor, based on this we estimate how much data to actually sample from the provided data set'),
    'total_row_count': fields.Integer(required=False, description='The total number of rows found on the data set'),
    'used_row_count': fields.Integer(required=False, description='The number of rows sampled fro the entire dataset, this is calculated accordingly from the margin of error argument'),
    'test_row_count': fields.Integer(required=False, description='The number of rows used on the test subset'),
    'train_row_count': fields.Integer(required=False, description='The number of rows used on the train subset'),
    'validation_row_count': fields.Integer(required=False, description='The number of rows used on the validation subset')
})


EXAMPLE = {
    'accepted_margin_of_error': 0.2,
    'total_row_count': 18000,
    'used_row_count': 10000,
    'test_row_count': 1000,
    'validation_row_count': 1000,
    'train_row_count': 8000
}
Ejemplo n.º 8
0
    'required': True
})])

put_predictor_metadata = ns_conf.model(
    'PUTPredictorMetadata', {
        'data_source_name':
        fields.String(
            required=False,
            description='Datasource name. Outdated, will be removed soon.'),
        'from':
        fields.Nested(ns_conf.model(
            'PUTPredictorMetadata_from', {
                'datasource':
                fields.String(required=False,
                              description='Name of datasource'),
                'query':
                fields.String(
                    required=False,
                    description='Query to datasource',
                )
            }),
                      required=False,
                      description='Source of data for predictor training'),
        'to_predict':
        fields.String(required=True, description='Predicted field name'),
        'kwargs':
        fields.Raw(default={})
    })

put_predictor_params = OrderedDict([
    ('name', {
Ejemplo n.º 9
0
from mindsdb.api.http.namespaces.configs.predictors import ns_conf
from mindsdb.api.http.namespaces.entitites.column_metadata import column_metadata

from flask_restx import fields

confusion_matrix_data = ns_conf.model('ConfusionMatrixData', {
    'matrix': fields.List(fields.List(fields.Integer, required=True)),
    'predicted': fields.List(fields.String, required=False, description='Predicted values'),
    'real': fields.List(fields.String, required=False, description='Real values'),
})
Ejemplo n.º 10
0
from mindsdb.api.http.namespaces.configs.predictors import ns_conf
from mindsdb.api.http.namespaces.entitites.quality_dimension import quality_dimension
from mindsdb.api.http.namespaces.entitites.histogram_data import histogram_data, NUMERIC_EXAMPLE
from mindsdb.api.http.namespaces.entitites.data_distribution_metadata import data_distribution_metadata

from flask_restx import fields

column_metadata = ns_conf.model('ColumnMetadata', {
    'column_name': fields.String(required=False, description='The column name'),
    'importance_score': fields.Float(required=False, description='This value is given once we have determined the importance score for a given column given the trained model'),
    'data_type': fields.String(required=False, description='The most prevalent data type that we detect', enum=['categorical', 'numeric', 'text', 'image']),
    'data_type_distribution': fields.Nested(histogram_data, required=False, description='The count of cells per data type'),
    'data_distribution': fields.Nested(data_distribution_metadata, required=False, description='The distribution of the data in this column'),
    'consistency': fields.Nested(quality_dimension, required=False, description='The consistency quality score'),
    'redundancy': fields.Nested(quality_dimension, required=False, description='The redundancy quality score'),
    'variability': fields.Nested(quality_dimension, required=False, description='The variability quality score')
})
Ejemplo n.º 11
0
from mindsdb.api.http.namespaces.configs.predictors import ns_conf

from mindsdb.api.http.namespaces.entitites.histogram_data import histogram_data
from mindsdb.api.http.namespaces.entitites.label_group import label_group

from flask_restx import fields

data_distribution_metadata = ns_conf.model(
    'DataDistributionMetadata', {
        'data_histogram':
        fields.Nested(
            histogram_data,
            required=False,
            description=
            'The histogram representing the data in this column if possible'),
        'clusters':
        fields.List(fields.Nested(label_group),
                    required=False,
                    description='The labels per cluster'),
        'mean':
        fields.String(
            required=False,
            description='The mean value if possible, encoded as string')
    })
Ejemplo n.º 12
0
from mindsdb.api.http.namespaces.configs.predictors import ns_conf
from flask_restx import fields


label_group = ns_conf.model('LabelGroup', {
    'group': fields.String(required=False, description='label name'),
    'members': fields.List(fields.String, required=False, description='members belonging to this group'),
})
Ejemplo n.º 13
0
from mindsdb.api.http.namespaces.configs.predictors import ns_conf
from mindsdb.api.http.namespaces.entitites.column_metadata import column_metadata

from flask_restx import fields

data_analysis_metadata = ns_conf.model('PredictorDataAnalysisMetadata', {
    'target_columns_metadata': fields.List( fields.Nested(column_metadata), required=False, description='The number of rows used on the validation subset'),
    'input_columns_metadata': fields.List( fields.Nested(column_metadata), required=False, description='The number of rows used on the validation subset')
})


Ejemplo n.º 14
0
from mindsdb.api.http.namespaces.configs.predictors import ns_conf
from mindsdb.api.http.namespaces.entitites.quality_metric import quality_metric
from flask_restx import fields

quality_dimension = ns_conf.model(
    'QualityDimension', {
        'score':
        fields.String(
            required=False,
            description=
            'The data quality score (0 to 10) derived from the metrics.'),
        'metrics':
        fields.List(fields.Nested(quality_metric),
                    required=False,
                    description='List of quality metrics evaluated'),
        'description':
        fields.String(required=False, description='The score description')
    })