Beispiel #1
0
def run(sample=False):
    backend = 'lightwood'

    mdb = Predictor(name='german_data')

    mdb.learn(to_predict='class',
              from_data='processed_data/train.csv',
              backend=backend)

    predictions = mdb.predict(when_data='processed_data/test.csv')

    predicted_val = [
        x.explanation['class']['predicted_value'] for x in predictions
    ]
    real_val = list(pd.read_csv('processed_data/test.csv')['class'])

    accuracy = balanced_accuracy_score(real_val, predicted_val)

    cm = confusion_matrix(real_val, predicted_val)
    print(cm)

    #show additional info for each transaction row
    additional_info = [x.explanation for x in predictions]

    return {
        'accuracy': accuracy,
        'accuracy_function': 'balanced_accuracy_score',
        'backend': backend,
        'single_row_predictions': additional_info
    }
Beispiel #2
0
class Robotics:
    def __init__(self):
        self.mindsDb = Predictor(name='human_activity')

    def train(self):
        print("model training started")
        self.mindsDb.learn(from_data="train.csv",
                           to_predict=['target'],
                           order_by=['time'],
                           window_size=128,
                           group_by='id',
                           disable_optional_analysis=True)
        print("model training completed")

    def predict_test(self):
        print("test prediction started")
        y_real = pd.read_csv("test.csv")
        y_real = list(y_real["target"])
        results = self.mindsDb.predict(when_data="test.csv")
        y_pred = []
        for row in results:
            y_pred.append(row['target'])
        predictions = pd.DataFrame(y_pred)
        predictions.to_csv(index=False,
                           header=True,
                           path_or_buf="test_pred.csv")
        acc_score = accuracy_score(y_real, y_pred, normalize=True)
        acc_pct = round(acc_score * 100)
        print(pd.crosstab(pd.Series(y_pred), pd.Series(y_real)))
        test_cm = pd.crosstab(pd.Series(y_pred), pd.Series(y_real))
        test_cm.to_csv('test_final_cm.csv', header=True, index=True)
        print(f'Accuracy of : {acc_pct}%')
        print("test prediction completed")
Beispiel #3
0
def basic_test(backend='ludwig',
               use_gpu=True,
               ignore_columns=[],
               run_extra=False):
    if run_extra:
        for py_file in [
                x for x in os.listdir('../functional_testing') if '.py' in x
        ]:
            os.system(f'python3 ../functional_testing/{py_file}')

    # Create & Learn
    mdb = Predictor(name='home_rentals_price')
    mdb.learn(
        to_predict='rental_price',
        from_data=
        "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv",
        backend=backend,
        stop_training_in_x_seconds=20,
        use_gpu=use_gpu)

    # Reload & Predict
    mdb = Predictor(name='home_rentals_price')
    prediction = mdb.predict(when={'sqft': 300}, use_gpu=use_gpu)

    # Test all different forms of output
    # No need to print them, we're just doing so for debugging purposes, we just want to see if the interface will crash or not

    print(prediction)
    print(prediction[0])

    for item in prediction:
        print(item)

    print(type(list(prediction.evaluations.values())[0][0]))
    assert ('ProbabilityEvaluation'
            in str(type(list(prediction.evaluations.values())[0][0])))

    for p in prediction:
        print(p)
    print(prediction[0].as_dict())
    print(prediction[0].as_list())
    print(prediction[0]['rental_price_confidence'])
    print(type(prediction[0]['rental_price_confidence']))

    print('\n\n========================\n\n')
    print(prediction[0].explain())
    print('\n\n')

    # See if we can get the adapted metadata
    amd = mdb.get_model_data('home_rentals_price')
    # Make some simple assertions about it
    assert (5 < len(list(amd.keys())))
class Electricity:

    def __init__(self):
        self.mindsDb = Predictor(name='demand_30')

    def train(self):
        self.mindsDb.learn(to_predict='power_consumed', from_data='dataset/mdb_train.csv',
                           window_size=84, order_by=['TimeStamp'], group_by=['customer'],
                           disable_optional_analysis=True)

    def test_predict(self):
        y_real = pd.read_csv("mdb_test.csv")
        y_real = list(y_real["power_consumed"])
        results = self.mindsDb.predict(when_data="dataset/mdb_test.csv")
        y_pred = []
        for row in results:
            y_pred.append(row['power_consumed'])
        print(r2_score(y_real, y_pred))
Beispiel #5
0
class Temperature:

    def __init__(self):
        self.mindsDb = Predictor(name='temperature')

    def temp_train(self):
        self.mindsDb.learn(to_predict='temperature', from_data='train.csv',
                           window_size=20, order_by='index')

    def temp_predict(self):
        y_real = pd.read_csv("test.csv")
        results = self.mindsDb.predict(when_data="test.csv")
        y_pred = []
        for row in results:
            y_pred.append(row['temperature'])
        predictions = pd.DataFrame(y_pred)
        predictions.to_csv(index=False, header=True, path_or_buf="test_pred.csv")
        print(r2_score(y_real['temperature'].tolist(), pd.Series(y_pred).tolist()))
Beispiel #6
0
class Insurance:
    def __init__(self):
        self.mindsDb = Predictor(name='insurance1')

    def insurance_train(self):
        self.mindsDb.learn(to_predict='PolicyStatus',
                           from_data='insu_train_indep_dep.csv')

    def insurance_predict(self):
        df = pd.read_csv('insu_test_indep_dep.csv')

        y_real = list(df['PolicyStatus'])

        results = self.mindsDb.predict(when_data="insu_test_indep_dep.csv")

        y_pred = []
        for row in results:
            y_pred.append(row['PolicyStatus'])

        acc_score = accuracy_score(y_real, y_pred, normalize=True)
        acc_pct = round(acc_score * 100)
        print(f'Accuracy of : {acc_pct}%')
Beispiel #7
0
from mindsdb import Predictor
import sys
import pandas as pd
import json
import time


mdb = Predictor(name='test_predictor')
#'rental_price',
mdb.learn(to_predict=['neighborhood'],from_data="https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv",use_gpu=False,stop_training_in_x_seconds=3000, backend='lightwood', unstable_parameters_dict={'use_selfaware_model':True})

p = mdb.predict(when={'number_of_rooms': 3, 'number_of_bathrooms': 2, 'neighborhood': 'south_side', 'sqft':2411}, run_confidence_variation_analysis=True, use_gpu=True)
e = p[0].explanation
print(e)

p_arr = mdb.predict(when_data='https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv', use_gpu=True)

for p in p_arr:
    e = p.explanation

p = mdb.predict(when={'number_of_rooms': 3, 'number_of_bathrooms': 2, 'neighborhood': 'south_side', 'sqft':2411}, run_confidence_variation_analysis=True, use_gpu=True)

for p in p_arr:
    exp_s = p.epitomize()
    exp = p.explanation
    print(exp_s)

    print(p.as_dict())
    print(p.as_list())
    print(p.raw_predictions())
Beispiel #8
0
from mindsdb import Predictor

# We tell mindsDB what we want to learn and from what data
mdb = Predictor(name='home_rentals_price')

mdb.learn(
    to_predict=
    'rental_price',  # the column we want to learn to predict given all the data in the file
    from_data=
    "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv"
    # the path to the file where we can learn from, (note: can be url)
)

prediction = mdb.predict(when={'sqft': 300})
print(prediction[0])
amd = mdb.get_model_data('home_rentals_price')
print(amd)
Beispiel #9
0
from mindsdb import Predictor
import sys
import pandas as pd
import json
import time


mdb = Predictor(name='test_predictor')

mdb.learn(to_predict=['rental_price', 'location'],from_data="https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv",use_gpu=True,stop_training_in_x_seconds=30, backend='lightwood')
p_arr = mdb.predict(when_data='https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv')

for p in p_arr:
    exp_s = p.epitomize()
    #exp = p.explain()
    #print(exp)
    #print(exp_s)
'''
print(mdb.predict(when={'number_of_rooms': 3, 'number_of_bathrooms': 2, 'neighborhood': 'south_side', 'sqft':2411}, run_confidence_variation_analysis=True)[0].explain())
'''
#print(json.dumps(mdb.get_model_data('test_predictor')))
Beispiel #10
0
from mindsdb import Predictor
import sys

mdb = Predictor(name='sensor123')

mdb.learn(
    to_predict='output',
    from_data=
    "https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/sensor_data.csv",
    use_gpu=False,
    stop_training_in_x_seconds=40)

p_arr = mdb.predict(
    when_data=
    'https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/sensor_data.csv')

pdct = mdb.predict(when={
    'sensor 1': 0.5,
    'sensor 2': 2,
    'sensor 3': 0,
    'sensor4': 5
})
print(pdct)

for p in p_arr:
    exp_s = p.epitomize()
    exp = p.explain()

    if len(exp['output']) > 0:
        print(exp)
        print(exp_s)

from mindsdb import Predictor, ClickhouseDS

# Get data
pg_ds = ClickhouseDS(query="SELECT number_of_rooms,number_of_bathrooms,sqft,location,days_on_market,initial_price,neighborhood,rental_price FROM default.home_rentalss",
                     password="******", port=8123)

# Train model
mdb = Predictor(name="home-rentals")
mdb.learn(from_data=pg_ds, to_predict="rental_price")

# Get prediction
prediction = mdb.predict(when={"number_of_rooms": 3, 'initial_price': 2000})
print(prediction[0].explanation)
Beispiel #12
0
from mindsdb import Predictor

mdb = Predictor(name='photo_score_model12')

mdb.learn(from_data="integration_testing/image_testing/train.csv",
          to_predict=['Score'])
print(
    '------------------------------------------------------------Done training------------------------------------------------------------'
)

predicted = mdb.predict(
    when_data="integration_testing/image_testing/predict.csv")
print(
    '------------------------------------------------------------Preidiction output------------------------------------------------------------'
)
for val in predicted:
    print(val)
Beispiel #13
0
def run(sample):
    train_file = 'dataset/train.csv'
    test_file = 'dataset/test.csv'

    backend = 'lightwood'

    def get_real_test_data():
        test_reader = csv.reader(open(test_file, 'r'))
        next(test_reader, None)
        test_rows = [x for x in test_reader]
        return list(map(lambda x: int(x[-1]), test_rows))

    target_val_real = get_real_test_data()

    #lightwood.config.config.CONFIG.HELPER_MIXERS = False
    mdb = Predictor(name='default_on_credit_dp4')

    mdb.learn(to_predict='default.payment.next.month',
              from_data=train_file,
              backend=backend)

    predictions = mdb.predict(when_data=test_file)

    cfz = 0
    cfo = 0
    lcfz = 0.00001
    lcfo = 0.00001
    for p in predictions:
        tv = str(p['default.payment.next.month'])
        if tv == '0':
            cfz += p['default.payment.next.month_confidence']
            lcfz += 1
        else:
            cfo += p['default.payment.next.month_confidence']
            lcfo += 1

    print('Confidence for 0: ')
    print(cfz / lcfz)

    print('Confidence for 1: ')
    print(cfo / lcfo)

    target_val_predictions = list(
        map(lambda x: x['default.payment.next.month'], predictions))

    for i in range(len(target_val_predictions)):
        try:
            target_val_predictions[i] = int(str(target_val_predictions[i]))
        except:
            target_val_predictions[i] = 2

    accuracy = balanced_accuracy_score(target_val_real, target_val_predictions)
    print(f'Balacned accuracy score of {accuracy}')

    cm = confusion_matrix(target_val_real, target_val_predictions)

    return {
        'accuracy': accuracy,
        'accuracy_function': 'balanced_accuracy_score',
        'backend': backend
    }
Beispiel #14
0
from sklearn.metrics import mean_squared_error
from mindsdb import Predictor
from helper import plotter
import pandas as pd

if __name__ == '__main__':
    train_data = pd.read_csv("mall_traffic_train.csv", index_col=False)
    test_data = pd.read_csv("mall_traffic_predict.csv", index_col=False)
    target = 'people_count'

    p = Predictor(name='mall_traffic')

    p.learn(
        from_data=train_data,
        to_predict=target,
        timeseries_settings={
            'order_by': ['TimeStamp'],
            'window': 6,  # consider last hour worth of measurements
            'use_previous_target': True
        })

    forecast = p.predict(when_data=test_data)

    mse = mean_squared_error(forecast._data[f'__observed_{target}'],
                             forecast._data[f'{target}'])

    print(f"\n\n[ Mall traffic ]\n\tRMSE: {round(mse**(1/2), 1)}\n\n")

    plotter(test_data['TimeStamp'], forecast._data[f'__observed_{target}'],
            forecast._data[f'{target}'])
Beispiel #15
0
from mindsdb import Predictor, MySqlDS

# Get data
pg_ds = MySqlDS(
    query=
    "SELECT age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal, target FROM sys.heartdisease",
    user="******",
    password="******",
    port=3306,
    host="localhost",
    table="heartdisease",
    database="sys")

# Train model
mdb = Predictor(name="heart-disease")
mdb.learn(from_data=pg_ds, to_predict="target")

# Get prediction
prediction = mdb.predict(when={
    "age": "40",
    "sex": 0,
    "chol": 180,
    "fbs": 0,
    "thal": 3,
    "exang": 0
})
print(prediction[0].explanation)
Beispiel #16
0
            df = self.transaction.input_data.validation_df
        elif mode == 'test':
            df = self.transaction.input_data.test_df

        X = []
        for col in self.input_columns:
            X.append(self.le_arr[col].transform(df[col]))

        X = np.swapaxes(X, 1, 0)

        predictions = self.clf.predict(X)

        formated_predictions = {self.output_columns[0]: predictions}

        return formated_predictions


predictor = Predictor(name='custom_model_test_predictor')

dt_model = CustomDTModel()

predictor.learn(
    to_predict='rental_price',
    from_data=
    "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv",
    backend=dt_model)
predictions = predictor.predict(
    when_data=
    "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv",
    backend=dt_model)
Beispiel #17
0
from mindsdb import Predictor
import sys
import pandas as pd
import json

mdb = Predictor(name='test_predictor')
'''
mdb.learn(to_predict=['rental_price', 'location'],from_data="https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv",use_gpu=True,stop_training_in_x_seconds=30, backend='ludwig')
p_arr = mdb.predict(when_data='https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv')

for p in p_arr:
    exp_s = p.epitomize()
    #exp = p.explain()
    #print(exp)
    print(exp_s)
'''
print(
    mdb.predict(when={
        'number_of_rooms': 3,
        'number_of_bathrooms': 2,
        'neighborhood': 'south_side',
        'sqft': 2411
    },
                run_confidence_variation_analysis=True)[0].explain())

#print(json.dumps(mdb.get_model_data('test_predictor')))