class Robotics: def __init__(self): self.mindsDb = Predictor(name='human_activity') def train(self): print("model training started") self.mindsDb.learn(from_data="train.csv", to_predict=['target'], order_by=['time'], window_size=128, group_by='id', disable_optional_analysis=True) print("model training completed") def predict_test(self): print("test prediction started") y_real = pd.read_csv("test.csv") y_real = list(y_real["target"]) results = self.mindsDb.predict(when_data="test.csv") y_pred = [] for row in results: y_pred.append(row['target']) predictions = pd.DataFrame(y_pred) predictions.to_csv(index=False, header=True, path_or_buf="test_pred.csv") acc_score = accuracy_score(y_real, y_pred, normalize=True) acc_pct = round(acc_score * 100) print(pd.crosstab(pd.Series(y_pred), pd.Series(y_real))) test_cm = pd.crosstab(pd.Series(y_pred), pd.Series(y_real)) test_cm.to_csv('test_final_cm.csv', header=True, index=True) print(f'Accuracy of : {acc_pct}%') print("test prediction completed")
def run(sample=False): backend = 'lightwood' mdb = Predictor(name='german_data') mdb.learn(to_predict='class', from_data='processed_data/train.csv', backend=backend) predictions = mdb.predict(when_data='processed_data/test.csv') predicted_val = [ x.explanation['class']['predicted_value'] for x in predictions ] real_val = list(pd.read_csv('processed_data/test.csv')['class']) accuracy = balanced_accuracy_score(real_val, predicted_val) cm = confusion_matrix(real_val, predicted_val) print(cm) #show additional info for each transaction row additional_info = [x.explanation for x in predictions] return { 'accuracy': accuracy, 'accuracy_function': 'balanced_accuracy_score', 'backend': backend, 'single_row_predictions': additional_info }
def basic_test(backend='ludwig', use_gpu=True, ignore_columns=[], run_extra=False): if run_extra: for py_file in [ x for x in os.listdir('../functional_testing') if '.py' in x ]: os.system(f'python3 ../functional_testing/{py_file}') # Create & Learn mdb = Predictor(name='home_rentals_price') mdb.learn( to_predict='rental_price', from_data= "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", backend=backend, stop_training_in_x_seconds=20, use_gpu=use_gpu) # Reload & Predict mdb = Predictor(name='home_rentals_price') prediction = mdb.predict(when={'sqft': 300}, use_gpu=use_gpu) # Test all different forms of output # No need to print them, we're just doing so for debugging purposes, we just want to see if the interface will crash or not print(prediction) print(prediction[0]) for item in prediction: print(item) print(type(list(prediction.evaluations.values())[0][0])) assert ('ProbabilityEvaluation' in str(type(list(prediction.evaluations.values())[0][0]))) for p in prediction: print(p) print(prediction[0].as_dict()) print(prediction[0].as_list()) print(prediction[0]['rental_price_confidence']) print(type(prediction[0]['rental_price_confidence'])) print('\n\n========================\n\n') print(prediction[0].explain()) print('\n\n') # See if we can get the adapted metadata amd = mdb.get_model_data('home_rentals_price') # Make some simple assertions about it assert (5 < len(list(amd.keys())))
class Insurance: def __init__(self): self.mindsDb = Predictor(name='insurance') def insurance_train(self): self.mindsDb.learn(to_predict='PolicyStatus', from_data="insu_replicate.csv", order_by=[ 'DateRequested', 'DateRqmtLastFollowed1', 'DateRqmtLastFollowed2', 'DateRqmtLastFollowedF', 'DateRqmtLastFollowed3', 'DateSignedOff' ], window_size_samples=4)
class Electricity: def __init__(self): self.mindsDb = Predictor(name='demand_30') def train(self): self.mindsDb.learn(to_predict='power_consumed', from_data='dataset/mdb_train.csv', window_size=84, order_by=['TimeStamp'], group_by=['customer'], disable_optional_analysis=True) def test_predict(self): y_real = pd.read_csv("mdb_test.csv") y_real = list(y_real["power_consumed"]) results = self.mindsDb.predict(when_data="dataset/mdb_test.csv") y_pred = [] for row in results: y_pred.append(row['power_consumed']) print(r2_score(y_real, y_pred))
class Temperature: def __init__(self): self.mindsDb = Predictor(name='temperature') def temp_train(self): self.mindsDb.learn(to_predict='temperature', from_data='train.csv', window_size=20, order_by='index') def temp_predict(self): y_real = pd.read_csv("test.csv") results = self.mindsDb.predict(when_data="test.csv") y_pred = [] for row in results: y_pred.append(row['temperature']) predictions = pd.DataFrame(y_pred) predictions.to_csv(index=False, header=True, path_or_buf="test_pred.csv") print(r2_score(y_real['temperature'].tolist(), pd.Series(y_pred).tolist()))
class Insurance: def __init__(self): self.mindsDb = Predictor(name='insurance1') def insurance_train(self): self.mindsDb.learn(to_predict='PolicyStatus', from_data='insu_train_indep_dep.csv') def insurance_predict(self): df = pd.read_csv('insu_test_indep_dep.csv') y_real = list(df['PolicyStatus']) results = self.mindsDb.predict(when_data="insu_test_indep_dep.csv") y_pred = [] for row in results: y_pred.append(row['PolicyStatus']) acc_score = accuracy_score(y_real, y_pred, normalize=True) acc_pct = round(acc_score * 100) print(f'Accuracy of : {acc_pct}%')
from mindsdb import Predictor import sys import pandas as pd import json import time mdb = Predictor(name='test_predictor') mdb.learn(to_predict=['rental_price', 'location'],from_data="https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv",use_gpu=True,stop_training_in_x_seconds=30, backend='lightwood') p_arr = mdb.predict(when_data='https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv') for p in p_arr: exp_s = p.epitomize() #exp = p.explain() #print(exp) #print(exp_s) ''' print(mdb.predict(when={'number_of_rooms': 3, 'number_of_bathrooms': 2, 'neighborhood': 'south_side', 'sqft':2411}, run_confidence_variation_analysis=True)[0].explain()) ''' #print(json.dumps(mdb.get_model_data('test_predictor')))
from mindsdb import Predictor import sys mdb = Predictor(name='sensor123') mdb.learn( to_predict='output', from_data= "https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/sensor_data.csv", use_gpu=False, stop_training_in_x_seconds=40) p_arr = mdb.predict( when_data= 'https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/sensor_data.csv') pdct = mdb.predict(when={ 'sensor 1': 0.5, 'sensor 2': 2, 'sensor 3': 0, 'sensor4': 5 }) print(pdct) for p in p_arr: exp_s = p.epitomize() exp = p.explain() if len(exp['output']) > 0: print(exp) print(exp_s)
from mindsdb import Predictor import sys import pandas as pd mdb = Predictor(name='sensor123') mdb.learn( to_predict='rental_price', from_data= "https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv", use_gpu=True, stop_training_in_x_seconds=15) p_arr = mdb.predict( when_data= 'https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv') for p in p_arr: exp_s = p.epitomize() #exp = p.explain() #print(exp) print(exp_s) print(mdb.get_model_data('sensor123'))
from mindsdb import Predictor, MySqlDS # Get data pg_ds = MySqlDS( query= "SELECT age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal, target FROM sys.heartdisease", user="******", password="******", port=3306, host="localhost", table="heartdisease", database="sys") # Train model mdb = Predictor(name="heart-disease") mdb.learn(from_data=pg_ds, to_predict="target") # Get prediction prediction = mdb.predict(when={ "age": "40", "sex": 0, "chol": 180, "fbs": 0, "thal": 3, "exang": 0 }) print(prediction[0].explanation)
from mindsdb import Predictor import sys if len(sys.argv) > 1: backend = sys.argv[1] else: backend = 'ludwig' mdb = Predictor(name='home_rentals_price') mdb.learn( to_predict='rental_price', from_data= "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", disable_optional_analysis=False, backend=backend) prediction = mdb.predict(when={'sqft': 300}) print(prediction[0]) amd = mdb.get_model_data('home_rentals_price') #print(amd)
from mindsdb import Predictor, ClickhouseDS # Get data pg_ds = ClickhouseDS(query="SELECT number_of_rooms,number_of_bathrooms,sqft,location,days_on_market,initial_price,neighborhood,rental_price FROM default.home_rentalss", password="******", port=8123) # Train model mdb = Predictor(name="home-rentals") mdb.learn(from_data=pg_ds, to_predict="rental_price") # Get prediction prediction = mdb.predict(when={"number_of_rooms": 3, 'initial_price': 2000}) print(prediction[0].explanation)
from sklearn.metrics import mean_squared_error from mindsdb import Predictor from helper import plotter import pandas as pd if __name__ == '__main__': train_data = pd.read_csv("mall_traffic_train.csv", index_col=False) test_data = pd.read_csv("mall_traffic_predict.csv", index_col=False) target = 'people_count' p = Predictor(name='mall_traffic') p.learn( from_data=train_data, to_predict=target, timeseries_settings={ 'order_by': ['TimeStamp'], 'window': 6, # consider last hour worth of measurements 'use_previous_target': True }) forecast = p.predict(when_data=test_data) mse = mean_squared_error(forecast._data[f'__observed_{target}'], forecast._data[f'{target}']) print(f"\n\n[ Mall traffic ]\n\tRMSE: {round(mse**(1/2), 1)}\n\n") plotter(test_data['TimeStamp'], forecast._data[f'__observed_{target}'], forecast._data[f'{target}'])
from mindsdb import Predictor # We tell mindsDB what we want to learn and from what data mdb = Predictor(name='home_rentals_price') mdb.learn( to_predict= 'rental_price', # the column we want to learn to predict given all the data in the file from_data= "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv" # the path to the file where we can learn from, (note: can be url) ) prediction = mdb.predict(when={'sqft': 300}) print(prediction[0]) amd = mdb.get_model_data('home_rentals_price') print(amd)
from mindsdb import Predictor mdb = Predictor(name='photo_score_model12') mdb.learn(from_data="integration_testing/image_testing/train.csv", to_predict=['Score']) print( '------------------------------------------------------------Done training------------------------------------------------------------' ) predicted = mdb.predict( when_data="integration_testing/image_testing/predict.csv") print( '------------------------------------------------------------Preidiction output------------------------------------------------------------' ) for val in predicted: print(val)
from mindsdb import Predictor mdb = Predictor(name='suicide_model') mdb.learn(from_data="integration_testing/suicide.csv", to_predict='suicides_no') # use the model to make predictions result = Predictor(name='suicide_rates').predict(when={'country':'Greece','year':1981,'sex':'male','age':'35-54','population':300000}) # you can now print the results print(result)
from mindsdb import Predictor import sys if len(sys.argv) > 1: backend = sys.argv[1] else: backend = 'ludwig' mdb = Predictor(name='home_rentals_price') #mdb.learn(to_predict='rental_price',from_data="https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv",backend=backend) mdb.learn(to_predict='rental_price',from_data="docs/examples/basic/home_rentals.csv",backend=backend) prediction = mdb.predict(when={'sqft':300}) print(prediction[0]) amd = mdb.get_model_data('home_rentals_price') print(amd)
def run(sample): train_file = 'dataset/train.csv' test_file = 'dataset/test.csv' backend = 'lightwood' def get_real_test_data(): test_reader = csv.reader(open(test_file, 'r')) next(test_reader, None) test_rows = [x for x in test_reader] return list(map(lambda x: int(x[-1]), test_rows)) target_val_real = get_real_test_data() #lightwood.config.config.CONFIG.HELPER_MIXERS = False mdb = Predictor(name='default_on_credit_dp4') mdb.learn(to_predict='default.payment.next.month', from_data=train_file, backend=backend) predictions = mdb.predict(when_data=test_file) cfz = 0 cfo = 0 lcfz = 0.00001 lcfo = 0.00001 for p in predictions: tv = str(p['default.payment.next.month']) if tv == '0': cfz += p['default.payment.next.month_confidence'] lcfz += 1 else: cfo += p['default.payment.next.month_confidence'] lcfo += 1 print('Confidence for 0: ') print(cfz / lcfz) print('Confidence for 1: ') print(cfo / lcfo) target_val_predictions = list( map(lambda x: x['default.payment.next.month'], predictions)) for i in range(len(target_val_predictions)): try: target_val_predictions[i] = int(str(target_val_predictions[i])) except: target_val_predictions[i] = 2 accuracy = balanced_accuracy_score(target_val_real, target_val_predictions) print(f'Balacned accuracy score of {accuracy}') cm = confusion_matrix(target_val_real, target_val_predictions) return { 'accuracy': accuracy, 'accuracy_function': 'balanced_accuracy_score', 'backend': backend }
from mindsdb import Predictor import sys import pandas as pd import json import time mdb = Predictor(name='test_predictor') #'rental_price', mdb.learn(to_predict=['neighborhood'],from_data="https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv",use_gpu=False,stop_training_in_x_seconds=3000, backend='lightwood', unstable_parameters_dict={'use_selfaware_model':True}) p = mdb.predict(when={'number_of_rooms': 3, 'number_of_bathrooms': 2, 'neighborhood': 'south_side', 'sqft':2411}, run_confidence_variation_analysis=True, use_gpu=True) e = p[0].explanation print(e) p_arr = mdb.predict(when_data='https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv', use_gpu=True) for p in p_arr: e = p.explanation p = mdb.predict(when={'number_of_rooms': 3, 'number_of_bathrooms': 2, 'neighborhood': 'south_side', 'sqft':2411}, run_confidence_variation_analysis=True, use_gpu=True) for p in p_arr: exp_s = p.epitomize() exp = p.explanation print(exp_s) print(p.as_dict()) print(p.as_list()) print(p.raw_predictions())
from mindsdb import Predictor mdb = Predictor(name='marvel') mdb.learn(from_data="marvel-wikia.xlsx", to_predict='FIRST_APPEARANCE') print( '------------------------------------------------------------Done training------------------------------------------------------------' ) """ predicted = mdb.predict(when={ 'Date':'11/03/2020', 'Time':'18.00.00', 'NMHC_GT': 1360.0, 'AH': 0.655 }) print('------------------------------------------------------------Preidiction output------------------------------------------------------------') for val in predicted: print(val['CO_GT']) print(val['CO_GT_confidence']) """
df = self.transaction.input_data.validation_df elif mode == 'test': df = self.transaction.input_data.test_df X = [] for col in self.input_columns: X.append(self.le_arr[col].transform(df[col])) X = np.swapaxes(X, 1, 0) predictions = self.clf.predict(X) formated_predictions = {self.output_columns[0]: predictions} return formated_predictions predictor = Predictor(name='custom_model_test_predictor') dt_model = CustomDTModel() predictor.learn( to_predict='rental_price', from_data= "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", backend=dt_model) predictions = predictor.predict( when_data= "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", backend=dt_model)
from mindsdb import Predictor import sys import pandas as pd import json mdb = Predictor(name='test_predictor') ''' mdb.learn(to_predict=['rental_price', 'location'],from_data="https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv",use_gpu=True,stop_training_in_x_seconds=30, backend='ludwig') p_arr = mdb.predict(when_data='https://mindsdb-example-data.s3.eu-west-2.amazonaws.com/home_rentals.csv') for p in p_arr: exp_s = p.epitomize() #exp = p.explain() #print(exp) print(exp_s) ''' print( mdb.predict(when={ 'number_of_rooms': 3, 'number_of_bathrooms': 2, 'neighborhood': 'south_side', 'sqft': 2411 }, run_confidence_variation_analysis=True)[0].explain()) #print(json.dumps(mdb.get_model_data('test_predictor')))