Example #1
0
def confidence_suffle(columns, df_train, df_test, acc_score, to_predict):
    random.seed(2)
    confidences_missing_1 = []
    accurcy_missing_1 = []
    columns_missing_1 = []

    predictor = mindsdb.Predictor(name='confidence_test')
    predictor.learn(from_data=df_train, to_predict=to_predict, stop_training_in_x_seconds=10)
    predictions = predictor.predict(when_data=df_test)
    explainations = [x.explanation for x in predictions]

    normal_confidence = np.mean(np.array( [x[to_predict]['confidence'] for x in explainations] ))

    norma_accuracy = acc_score(list(df_test[to_predict]), [x[to_predict]['predicted_value'] for x in explainations])

    previously_removed = []
    for i in range(max(len(columns),2)):
        remove_columns = [random.choice(columns)]
        previously_removed.append(remove_columns[0])
        columns_missing_1.append(remove_columns)

        train = df_train.drop(columns=remove_columns)
        test = df_test.drop(columns=remove_columns)

        predictor = mindsdb.Predictor(name='confidence_test')
        predictor.learn(from_data=train, to_predict=to_predict, stop_training_in_x_seconds=10)
        predictions = predictor.predict(when_data=test)
        explainations = [x.explanation for x in predictions]

        confidences_missing_1.append(np.mean(np.array([x[to_predict]['confidence'] for x in explainations])))
        accurcy_missing_1.append(acc_score(list(test[to_predict]), [x[to_predict]['predicted_value'] for x in explainations]))

    train = df_train.drop(columns=previously_removed)
    test = df_test.drop(columns=previously_removed)

    predictor = mindsdb.Predictor(name='confidence_test')
    predictor.learn(from_data=train, to_predict=to_predict, stop_training_in_x_seconds=10)
    predictions = predictor.predict(when_data=test)
    explainations = [x.explanation for x in predictions]

    multiple_removed_confidence = np.mean(np.array([x[to_predict]['confidence'] for x in explainations]))
    multiple_removed_accuracy = acc_score(list(test[to_predict]), [x[to_predict]['predicted_value'] for x in explainations])

    for i in range(len(confidences_missing_1)):
        conf = confidences_missing_1[i]
        acc = accurcy_missing_1[i]
        missing = columns_missing_1[i]

        if conf >= normal_confidence:
            print(f'Got confidence of {conf} with missing columns {missing} which is bigger than the normal confidence {normal_confidence}')

        if conf <= multiple_removed_confidence:
            print(f'Got confidence of {conf} with missing columns {missing} which is smaller than the {multiple_removed_confidence} confidence with {previously_removed} columns removed')

        if acc >= norma_accuracy:
            print(f'Got accuracy of {acc} with missing columns {missing} which is bigger than the normal accuracy {norma_accuracy}')

        if acc <= multiple_removed_accuracy:
            print(f'Got accuracy of {acc} with missing columns {missing} which is smaller than the {multiple_removed_accuracy} accuracy with {previously_removed} columns removed')
Example #2
0
def basic_test(backend='lightwood',use_gpu=True, run_extra=False, IS_CI_TEST=False):
    mindsdb.CONFIG.IS_CI_TEST = IS_CI_TEST
    if run_extra:
        for py_file in [x for x in os.listdir('../functional_testing') if '.py' in x]:
            # Skip data source tests since installing dependencies is annoying
            # @TODO: Figure out a way to make travis install required dependencies on osx

            if any(x in py_file for x in ['all_data_sources', 'custom_model']):
                continue

            code = os.system(f'python3 ../functional_testing/{py_file}')
            if code != 0:
                raise Exception(f'Test failed with status code: {code} !')

    # Create & Learn
    to_predict = 'rental_price'
    mdb = mindsdb.Predictor(name='home_rentals_price')
    mdb.learn(to_predict=to_predict,from_data="https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv",backend=backend, stop_training_in_x_seconds=120,use_gpu=use_gpu)

    # Reload & Predict
    model_name = 'home_rentals_price'
    if run_extra:
        mdb.rename_model('home_rentals_price', 'home_rentals_price_renamed')
        model_name = 'home_rentals_price_renamed'

    mdb = mindsdb.Predictor(name=model_name)
    # Try predicting from a file and from a dictionary
    prediction = mdb.predict(when_data="https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", use_gpu=use_gpu)

    mdb.test(when_data="https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv",accuracy_score_functions=r2_score,predict_args={'use_gpu': use_gpu})

    prediction = mdb.predict(when={'sqft':300}, use_gpu=use_gpu)

    # Test all different forms of output
    # No need to print them in order to run these checks, we're just doing so for quick-debugging purposes, we just want to see if the interfaces will crash when we call them
    print(prediction)
    print(prediction[0])

    for item in prediction:
        print(item)


    for p in prediction:
        print(p)
    print(prediction[0].as_dict())
    print(prediction[0].as_list())
    print(prediction[0]['rental_price_confidence'])
    print(type(prediction[0]['rental_price_confidence']))

    print('\n\n========================\n\n')
    print(prediction[0].explain())
    print(prediction[0].explanation)
    print(prediction[0].raw_predictions())
    print('\n\n')

    # See if we can get the adapted model data
    amd = mdb.get_model_data(model_name)
    test_adapted_model_data(amd, to_predict)
Example #3
0
def basic_test(backend='lightwood', use_gpu=True, IS_CI_TEST=False):
    mindsdb.CONFIG.IS_CI_TEST = IS_CI_TEST

    # Create & Learn
    to_predict = 'rental_price'
    mdb = mindsdb.Predictor(name='home_rentals_price')
    mdb.learn(
        to_predict=to_predict,
        from_data=
        "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv",
        backend=backend,
        stop_training_in_x_seconds=120,
        use_gpu=use_gpu)

    # Reload & Predict
    model_name = 'home_rentals_price'

    mdb = mindsdb.Predictor(name=model_name)
    # Try predicting from a file and from a dictionary
    prediction = mdb.predict(
        when_data=
        "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv",
        use_gpu=use_gpu)

    mdb.test(
        when_data=
        "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv",
        accuracy_score_functions=r2_score,
        predict_args={'use_gpu': use_gpu})

    prediction = mdb.predict(when={'sqft': 300}, use_gpu=use_gpu)

    # Test all different forms of output
    # No need to print them in order to run these checks, we're just doing so for quick-debugging purposes, we just want to see if the interfaces will crash when we call them
    print(prediction)
    print(prediction[0])

    for item in prediction:
        print(item)

    for p in prediction:
        print(p)
    print(prediction[0].as_dict())
    print(prediction[0].as_list())
    print(prediction[0]['rental_price_confidence'])
    print(type(prediction[0]['rental_price_confidence']))

    print('\n\n========================\n\n')
    print(prediction[0].explain())  # Warning: Deprecated explainer
    print(prediction[0].explanation)
    print(prediction[0].raw_predictions())
    print('\n\n')

    # See if we can get the adapted model data
    amd = mdb.get_model_data(model_name)
    test_adapted_model_data(amd, to_predict)
Example #4
0
def basic_test(backend='lightwood',use_gpu=True,ignore_columns=[], run_extra=False, IS_CI_TEST=False):
    mindsdb.CONFIG.IS_CI_TEST = IS_CI_TEST
    if run_extra:
        for py_file in [x for x in os.listdir('../functional_testing') if '.py' in x]:
            os.system(f'python3 ../functional_testing/{py_file}')

    # Create & Learn
    to_predict = 'rental_price'
    mdb = mindsdb.Predictor(name='home_rentals_price')
    mdb.learn(to_predict=to_predict,from_data="https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv",backend=backend, stop_training_in_x_seconds=20,use_gpu=use_gpu)

    # Reload & Predict
    model_name = 'home_rentals_price'
    if run_extra:
        mdb.rename_model('home_rentals_price', 'home_rentals_price_renamed')
        model_name = 'home_rentals_price_renamed'

    mdb = mindsdb.Predictor(name=model_name)
    # Try predicting from a file and from a dictionary
    prediction = mdb.predict(when_data="https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", use_gpu=use_gpu)
    prediction = mdb.predict(when={'sqft':300}, use_gpu=use_gpu)

    # Test all different forms of output
    # No need to print them in order to run these checks, we're just doing so for quick-debugging purposes, we just want to see if the interfaces will crash when we call them
    print(prediction)
    print(prediction[0])

    for item in prediction:
        print(item)


    print(type(list(prediction.evaluations.values())[0][0]))
    assert('ProbabilityEvaluation' in str(type(list(prediction.evaluations.values())[0][0])))

    for p in prediction:
        print(p)
    print(prediction[0].as_dict())
    print(prediction[0].as_list())
    print(prediction[0]['rental_price_confidence'])
    print(type(prediction[0]['rental_price_confidence']))

    print('\n\n========================\n\n')
    print(prediction[0].explain())
    print('\n\n')

    # See if we can get the adapted model data
    amd = mdb.get_model_data(model_name)
    test_adapted_model_data(amd, to_predict)
Example #5
0
def prediction(uuid):
    try:
        model_uuid = uuid
        req = request.get_json(force=True)
        if request.method == 'POST':
            dt = req.copy()
            selectedFeature = dt.get('selectedFeature')
            selectedFeature = json.dumps(selectedFeature['data'])
            selectedTarget = dt.get('selectedTarget')
            targetname = selectedTarget['value']
            df = pd.read_json(path_or_buf=selectedFeature, orient='records')
            mdb = mindsdb.Predictor(name=model_uuid)
            result = mdb.predict(when_data=df)
            for x, y in result.data.items():
                if 'model_' + targetname == x:
                    predictedValues = y
                elif targetname + '_model_confidence' == x:
                    confidance = y
            data = {"values": predictedValues, "confidance": confidance}
            response = make_response(jsonify({
                "success": True,
                "data": data
            }), 200)
            response.headers["Content-Type"] = 'application/json'
            return response
        else:
            response = make_response(jsonify({"success": False}), 404)
            response.headers["Content-Type"] = 'application/json'
            return response
    except:
        response = make_response(jsonify({"success": False}), 500)
        response.headers["Content-Type"] = 'application/json'
        return response
def run():
    backend = 'lightwood'

    mdb = mindsdb.Predictor(name='Admission_prediction_model')

    mdb.learn(from_data='dataset/train.csv',
              to_predict='Chance of Admit ',
              backend=backend,
              disable_optional_analysis=True)

    predictions = mdb.predict(
        when_data='dataset/test.csv',
        unstable_parameters_dict={'always_use_model_predictions': True})

    pred_val = [x['Chance of Admit '] for x in predictions]
    real_val = list(
        pd.read_csv('dataset/test.csv',
                    usecols=['Chance of Admit '])['Chance of Admit '])

    accuracy = r2_score(real_val, pred_val)
    print(f'Got an r2 score of: {accuracy}')

    return {
        'accuracy': accuracy,
        'accuracy_function': 'r2_score',
        'backend': backend
    }
def run():
    backend = 'lightwood'

    mdb = mindsdb.Predictor(name='cc_fraud')

    mdb.learn(from_data='processed_data/train.csv',
              to_predict='Class',
              backend=backend,
              window_size=5)

    predictions = mdb.predict(when_data='processed_data/test.csv')

    pred_val = [int(x['Class']) for x in predictions]
    real_val = [
        int(x) for x in list(pd.read_csv('processed_data/test.csv')['Class'])
    ]

    accuracy = balanced_accuracy_score(real_val, pred_val)

    #show additional info for each transaction row
    additional_info = [x.explanation for x in predictions]

    return {
        'accuracy': accuracy,
        'backend': backend,
        'additional info': additional_info
    }
def transformation():

    # Avoid mindsdb storage path write access
    mindsdb.CONFIG.SAGEMAKER = 'True'
    mindsdb.CONFIG.MINDSDB_STORAGE_PATH = model_path

    try:
        when_data = parse_data(flask.request.content_type, flask.request)
    except ValueError:
        return flask.Response(
            response='This predictor supports JSON,CSV and Excel data',
            status=415,
            mimetype='text/plain')

    print('Invoked with {} records'.format(when_data))
    result = mindsdb.Predictor(name='mdbp').predict(when_data=when_data)

    cconfidence = [x['Class_confidence'] for x in result]
    response = {
        'prediction': str(result[0]),
        'class_confidence': cconfidence[-1]
    }

    print('Response prediction: {}'.format(response['prediction']))
    return flask.Response(response=json.dumps(response),
                          status=200,
                          mimetype='application/json')
Example #9
0
def run():
    backend = 'lightwood'
    target = 'Deaths'

    mdb = mindsdb.Predictor(name='corona-data')

    # We tell the Predictor what column or key we want to learn and from what data
    mdb.learn(from_data="processed_data/train.csv",
              to_predict=target,
              backend=backend,
              use_gpu=False)

    test_df = pd.read_csv('processed_data/test.csv')
    predictions = mdb.predict(when_data='processed_data/test.csv')

    results = [str(x[target]) for x in predictions]
    real = list(map(str, list(test_df[target])))
    accuracy = accuracy_score(real, results)
    print(accuracy)

    return {
        'accuracy': accuracy,
        'accuracy_function': 'r2_score',
        'backend': backend
    }
Example #10
0
def run():
    mdb = mindsdb.Predictor(name='robotic_failure')

    backend = 'lightwood'

    mdb.learn(from_data='dataset/train.csv',
              to_predict=['target'],
              order_by=['time'],
              window_size=14,
              group_by='id',
              disable_optional_analysis=True,
              backend=backend)

    predictions = mdb.predict(when='test.csv')

    pred_val = [x['target'] for x in predictions]
    real_val = list(pd.read_csv(open('dataset/test.csv', 'r'))['target'])

    accuracy = r2_score(real_val, pred_val)
    print(f'Got an r2 score of: {accuracy}')
    return {
        'accuracy': accuracy,
        'accuracy_function': 'balanced_accuracy_score',
        'backend': backend
    }
Example #11
0
def run():
    backend = 'lightwood'

    mdb = mindsdb.Predictor(name='hotel_booking')

    mdb.learn(from_data='processed_data/train.csv', to_predict='is_canceled', backend=backend,
              disable_optional_analysis=True)

    test_df = pd.read_csv('processed_data/test.csv')
    predictions = mdb.predict(when_data='processed_data/test.csv',
                              unstable_parameters_dict={'always_use_model_predictions': True})

    results = [str(x['is_canceled']) for x in predictions]
    real = list(map(str, list(test_df['is_canceled'])))

    accuracy = balanced_accuracy_score(real, results)

    #show additional info for each transaction row
    additional_info = [x.explanation for x in predictions]

    return {
        'accuracy': accuracy,
        'accuracy_function': 'balanced_accuracy_score',
        'backend': backend,
        'additional_info': additional_info
    }
Example #12
0
def run(sample):
    backend = 'lightwood'

    mdb = mindsdb.Predictor(name='cancer_model')

    mdb.learn(from_data='processed_data/train.csv',
              to_predict='diagnosis',
              use_gpu=True,
              backend=backend,
              stop_training_in_x_seconds=10,
              equal_accuracy_for_all_output_categories=True)

    test_df = pd.read_csv('processed_data/test.csv')
    predictions = mdb.predict(
        when_data='processed_data/test.csv',
        unstable_parameters_dict={'always_use_model_predictions': True})

    results = [str(x['diagnosis']) for x in predictions]
    real = list(map(str, list(test_df['diagnosis'])))

    accuracy = balanced_accuracy_score(real, results)

    return {
        'accuracy': accuracy,
        'accuracy_function': 'balanced_accuracy_score',
        'backend': backend
    }
Example #13
0
def run(sample):
    train_df = pd.read_csv('train.csv')
    test_df = pd.read_csv('test.csv')

    if sample:
        train_df = train_df.loc[train_df['superclass'] == 'fish'].reset_index()
        test_df = test_df.loc[test_df['superclass'] == 'fish'].reset_index()

    backend='lightwood'

    predictor = mindsdb.Predictor(name='CIFRAR_Model')

    predictor.learn(from_data=train_df, to_predict=['class'], ignore_columns='index', advanced_args={'use_selfaware_model': False,'force_disable_cache': False})

    #

    predictions = predictor.predict(when_data=test_df)

    predicted_class = list(map(lambda x: x['class'], predictions))
    real_class = list(test_df['class'])

    acc = accuracy_score(real_class, predicted_class) * 100
    print(f'Log loss accuracy of {acc}% for classes !')

    return {
        'accuracy': acc
        ,'accuracy_function': 'accuracy_score'
        ,'backend': backend
    }
def run():
    backend = 'lightwood'

    mdb = mindsdb.Predictor(name='lbs')

    mdb.learn(from_data='processed_data/train.csv',
              to_predict='cnt',
              backend=backend,
              window_size=5)

    predictions = mdb.predict(when_data='processed_data/test.csv')

    pred_val = [int(x['cnt']) for x in predictions]
    real_val = [
        int(x) for x in list(
            pd.read_csv(open('processed_data/test.csv', 'r'))['cnt'])
    ]

    accuracy = r2_score([math.log(x) if x > 0 else 0 for x in real_val],
                        [math.log(x) if x > 0 else 0 for x in pred_val])
    print(f'Got an r2_score for the log predictions of: {accuracy}')

    accuracy = pct_error(real_val, pred_val, 0.05)
    print(
        f'Got a percentage accuracy score with error-margin 5% of: {accuracy}')

    accuracy = pct_error(real_val, pred_val)
    print(f'Got a percentage accuracy score of: {accuracy}')

    return {
        'accuracy': accuracy,
        'accuracy_function': 'pct_error_0',
        'backend': backend
    }
Example #15
0
    def post(self, name):
        '''Queries predictor'''
        global model_swapping_map
        data = request.json

        when = data.get('when') or {}
        try:
            format_flag = data.get('format_flag')
        except:
            format_flag = 'explain'

        try:
            kwargs = data.get('kwargs')
        except:
            kwargs = {}

        if type(kwargs) != type({}):
            kwargs = {}

        # Not the fanciest semaphor, but should work since restplus is multi-threaded and this condition should rarely be reached
        while name in model_swapping_map and model_swapping_map[name] is True:
            time.sleep(1)

        mdb = mindsdb.Predictor(name=name)
        results = mdb.predict(when=when,
                              run_confidence_variation_analysis=True,
                              **kwargs)
        # return '', 500
        return preparse_results(results, format_flag)
Example #16
0
def run(sample):
    if sample:
        train_file = 'train_sample.tsv'
        test_file = 'test_sample.tsv'
    else:
        train_file = 'train.tsv'
        test_file = 'test.tsv'

    backend = 'lightwood'

    predictor = mindsdb.Predictor(name='imdb_predictor_x')

    predictor.learn(from_data=train_file, to_predict=['sentiment'])

    predictions = predictor.predict(when_data=test_file)

    predicted_sentiment = list(map(lambda x: x['sentiment'], predictions))

    real_sentiment = []

    with open(test_file, 'r') as raw_csv_fp:
        reader = csv.reader(raw_csv_fp, delimiter='\t')
        next(reader, None)
        for row in reader:
            real_sentiment.append(row[1])

    acc = accuracy_score(real_sentiment, predicted_sentiment) * 100
    print(f'Accuracy of {acc}% !')
    return {
        'accuracy': acc,
        'accuracy_function': 'accuracy_score',
        'backend': backend
    }
def run():
    backend = 'lightwood'

    mdb = mindsdb.Predictor(name='employee_retention_model')

    mdb.learn(from_data='dataset/train.csv',
              to_predict='Attrition',
              backend=backend,
              output_categories_importance_dictionary={
                  'Yes': 1,
                  'No': 0.7
              },
              disable_optional_analysis=True)

    test_df = pd.read_csv('dataset/test.csv')
    predictions = mdb.predict(
        when_data='dataset/test.csv',
        unstable_parameters_dict={'always_use_model_predictions': True})

    results = [str(x['Attrition']) for x in predictions]
    real = list(map(str, list(test_df['Attrition'])))

    accuracy = balanced_accuracy_score(real, results)

    return {
        'accuracy': accuracy,
        'accuracy_function': 'balanced_accuracy_score',
        'backend': backend
    }
Example #18
0
def run():
    backend = 'lightwood'

    mdb = mindsdb.Predictor(name='employee_retention_model')

    mdb.learn(from_data='dataset/train.csv',
              to_predict='Churn',
              backend=backend,
              output_categories_importance_dictionary={
                  'Yes': 1,
                  'No': 0.5
              })

    test_df = pd.read_csv('dataset/test.csv')
    predictions = mdb.predict(
        when_data='dataset/test.csv',
        unstable_parameters_dict={'always_use_model_predictions': True})

    predicted_val = [
        x.explanation['Churn']['predicted_value'] for x in predictions
    ]

    real_val = list(map(str, list(test_df['Churn'])))

    accuracy = accuracy_score(real_val, predicted_val)

    #show additional info for each transaction row
    additional_info = [x.explanation for x in predictions]

    return {
        'accuracy': accuracy,
        'accuracy_function': 'accuracy_score',
        'backend': backend,
        'single_row_predictions': additional_info
    }
Example #19
0
def threaded_task(df, model_uuid, targetname):
    try:
        mdb = mindsdb.Predictor(name=model_uuid)
        mdb.learn(from_data=df, to_predict=targetname)
        mdb.export_model()
        # oldpath = os.getcwd() + '\\' + model_uuid + '.zip'
        # newpath = os.getcwd() + '\\' + 'exportedmodels'
        # if os.path.exists(oldpath) == True:
        #     os.replace(oldpath, newpath)
    except ValueError:
        print(ValueError)
Example #20
0
        def learn(name, from_data, to_predict, kwargs):
            '''
            running at subprocess due to
            ValueError: signal only works in main thread

            this is work for celery worker here?
            '''
            mdb = mindsdb.Predictor(name=name)
            if sys.platform not in ['win32', 'cygwin', 'windows']:
                lightwood.config.config.CONFIG.HELPER_MIXERS = True

            mdb.learn(from_data=from_data, to_predict=to_predict, **kwargs)
Example #21
0
def run():

    mdb = mindsdb.Predictor(name='air_pl')

    mdb.learn(from_data='processed_data/train.csv', to_predict='SO2')

    predictions = mdb.predict(when_data='processed_data/test.csv')

    #show additional info for each transaction row
    additional_info = [x.explanation for x in predictions]

    return {'additional info': additional_info}
Example #22
0
def run(sample=False):
    backend = 'lightwood'

    mdb = mindsdb.Predictor(name='wineq')

    mdb.learn(from_data='processed_data/train.csv',
              to_predict='price',
              backend=backend,
              ignore_columns=['no'],
              use_gpu=True)

    predictions = mdb.predict(when_data=pd.read_csv('processed_data/test.csv'))

    print(predictions[0].explanation)

    confidence_range_arr = [
        x.explanation['price']['explanation']['confidence_interval']
        for x in predictions
    ]
    predicted_val_arr = [
        x.explanation['price']['predicted_value'] for x in predictions
    ]

    real_val_arr = list(
        pd.read_csv(open('processed_data/test.csv', 'r'))['price'])
    real_val_arr = [x if str(x) != 'nan' else 0 for x in real_val_arr]

    correct = 0
    incorrect = 0

    for i, cr in enumerate(confidence_range_arr):
        if cr[0] < real_val_arr[i] < cr[1]:
            correct += 1
        else:
            incorrect += 1

    print(
        f'Out of all predictions {correct} of the intervals contained the actual value and {incorrect} did not !'
    )

    accuracy = r2_score(real_val_arr, predicted_val_arr)
    print(f'Got an r2 score of: {accuracy}')

    return {
        'accuracy': accuracy,
        'accuracy_function': 'r2_score',
        'backend': backend
    }
Example #23
0
def run(sample=False):
    backend='lightwood'
    lightwood.config.config.CONFIG.HELPER_MIXERS = False

    # Instantiate a mindsdb Predictor
    mdb = mindsdb.Predictor(name='home_rentals')

    # We tell the Predictor what column or key we want to learn and from what data
    #mdb.breakpoint = 'DataAnalyzer'

    train_df = pd.read_csv('dataset/train.csv')
    #train_df = train_df.drop(columns=['initial_price'])
    #'''
    mdb.learn(
        from_data=train_df, # the path to the file where we can learn from, (note: can be url)
        to_predict='rental_price' # the column we want to learn to predict given all the data in the file
    	,stop_training_in_x_seconds=120
    	,use_gpu=False
    )
    #'''

    test_df = pd.read_csv('dataset/test.csv')

    for drop_cols in [['number_of_rooms','number_of_bathrooms','location','days_on_market', 'neighborhood'], []]:
        print(f'Predicting without columns: {drop_cols}')
        predictions = mdb.predict(when_data=test_df.drop(columns=drop_cols))

        pred_val = [x.explain()['rental_price']['predicted_value'] for x in predictions]
        real_val = list(pd.read_csv(open('dataset/test.csv', 'r'))['rental_price'])
        real_val = [x if str(x) != 'nan' else 0 for x in real_val]

        accuracy = r2_score([math.log(x) if x > 0 else 0 for x in real_val], [math.log(x) if x > 0 else 0 for x in pred_val])
        print(f'Got an r2_score for the log predictions of: {accuracy}')

        accuracy = pct_error(real_val, pred_val, 0.05)
        print(f'Got a percentage accuracy score with error-margin 5% of: {accuracy}')

        accuracy = pct_error(real_val, pred_val)
        print(f'Got a percentage accuracy score of: {accuracy}')
        print('\n\n------------------------\n\n')

    return {
        'accuracy': accuracy
        ,'accuracy_function': 'pct_error_0'
        ,'backend': backend
    }
Example #24
0
def run():
    train_file = 'data_set/train.csv'
    test_file = 'data_set/test.csv'

    backend = 'lightwood'

    predictor = mindsdb.Predictor(name='plant_pathology')

    train_df = pd.read_csv(train_file)
    predictor.learn(from_data=train_df,
                    to_predict=['class'],
                    disable_optional_analysis=True,
                    use_gpu=False,
                    backend=backend,
                    stop_training_in_x_seconds=round((3600 * 2)))

    test_df = pd.read_csv(test_file)
    predictions = predictor.predict(
        when_data=test_df,
        unstable_parameters_dict={'always_use_model_prediction': True},
        use_gpu=False)

    predicted_class = list(map(lambda x: x['class'], predictions))

    real_class = []
    first = True
    with open(test_file) as raw_csv_fp:
        reader = csv.reader(raw_csv_fp)
        for row in reader:
            if first:
                first = False
            else:
                real_class.append(row[1])

    acc = balanced_accuracy_score(real_class, predicted_class) * 100
    print(f'Balanced accuracy of {acc}% for classes !')

    return {
        'accuracy': acc,
        'accuracy_function': 'balanced_accuracy_score',
        'backend': backend
    }
Example #25
0
def run():
    mdb = mindsdb.Predictor(name='demand_forecast')

    backend = 'lightwood'

    mdb.learn(from_data='dataset/train_data.csv', to_predict='sales', order_by=['date'], group_by=['store', 'item'],
              backend=backend, window_size=7)

    predictions = mdb.predict(when='dataset/test_data.csv')

    pred_val = [x['sales'] for x in predictions]
    real_val = list(pd.read_csv(open('dataset/test_data.csv', 'r'))['sales'])

    accuracy = r2_score(real_val, pred_val)
    print(f'Got an r2 score of: {accuracy}')
    return {
        'accuracy': accuracy
        , 'accuracy_function': 'balanced_accuracy_score'
        , 'backend': backend
    }
def run(sample):
    backend='lightwood'

    mdb = mindsdb.Predictor(name='model_name')

    mdb.learn(from_data='processed_data/train.csv', to_predict='target_class', use_gpu=True, backend=backend, equal_accuracy_for_all_output_categories=True)

    test_df = pd.read_csv('processed_data/test.csv')
    predictions = mdb.predict(when_data='processed_data/test.csv')

    results = [x['target_class'] for x in predictions]
    real = list(test_df['target_class'])

    accuracy = acc_fun(real, result)
    print(f'Balacned accuracy score of {accuracy}')

    return {
        'accuracy': accuracy
        ,'accuracy_function': 'acc_fun'
        ,'backend': backend
    }
def run(sample):
    backend='lightwood'

    mdb = mindsdb.Predictor(name='churn_model')

    mdb.learn(from_data=pd.read_csv('dataset/train.csv'), to_predict='Exited', backend=backend, ignore_columns=['RowNumber','CustomerId','Surname'])

    test_df = pd.read_csv('dataset/test.csv')
    predictions = mdb.predict(when_data='dataset/test.csv')

    results = [x['Exited'] for x in predictions]
    real = list(test_df['Exited'])

    accuracy = balanced_accuracy_score(list(map(int,real)), list(map(int,results)))
    print(f'Balacned accuracy score of {accuracy}')

    return {
        'accuracy': accuracy
        ,'accuracy_function': 'balanced_accuracy_score'
        ,'backend': backend
    }
Example #28
0
def run():

    mdb = mindsdb.Predictor(name='corona-data')

    target = 'Deaths'
    # We tell the Predictor what column or key we want to learn and from what data
    mdb.learn(
        from_data="processed_data/train.csv", 
        to_predict=target
    )

    test_df = pd.read_csv('processed_data/test.csv')
    predictions = mdb.predict(when_data='processed_data/test.csv')

    results = [str(x[target]) for x in predictions]
    real = list(map(str,list(test_df[target])))
    print(predictions[0])
    accuracy = accuracy_score(real, results)

    return {
        'accuracy': accuracy
    }
Example #29
0
def delete(uuid):
    try:
        model_uuid = uuid
        if request.method == 'GET':
            predictor = mindsdb.Predictor(name=model_uuid)
            res = predictor.delete_model(model_name=model_uuid)
            path = os.getcwd(
            ) + '\modelsinfo\\' + model_uuid + '_lightwood_data'
            print("path ==> ", path)
            if os.path.exists(path):
                os.remove(path)
                response = make_response(jsonify({"success": True}), 200)
                response.headers["Content-Type"] = 'application/json'
                return response
            else:
                response = make_response(jsonify({"success": False}), 404)
                response.headers["Content-Type"] = 'application/json'
                return response
    except:
        response = make_response(jsonify({"success": False}), 500)
        response.headers["Content-Type"] = 'application/json'
        return response
Example #30
0
    def post(self, name):
        global model_swapping_map

        data = request.json

        from_data = get_datasource_path(data.get('data_source_name'))
        try:
            format_flag = data.get('format_flag')
        except:
            format_flag = 'explain'

        try:
            kwargs = data.get('kwargs')
        except:
            kwargs = {}

        if type(kwargs) != type({}):
            kwargs = {}

        if from_data is None:
            from_data = data.get('from_data')
        if from_data is None:
            from_data = data.get('when_data')
        if from_data is None:
            abort(400, 'No valid datasource given')

        # Not the fanciest semaphor, but should work since restplus is multi-threaded and this condition should rarely be reached
        while name in model_swapping_map and model_swapping_map[name] is True:
            time.sleep(1)

        mdb = mindsdb.Predictor(name=name)
        try:
            results = mdb.predict(when_data=from_data, **kwargs)
        except:
            results = mdb.predict(when=from_data, **kwargs)

        return preparse_results(results, format_flag)