def confidence_suffle(columns, df_train, df_test, acc_score, to_predict): random.seed(2) confidences_missing_1 = [] accurcy_missing_1 = [] columns_missing_1 = [] predictor = mindsdb.Predictor(name='confidence_test') predictor.learn(from_data=df_train, to_predict=to_predict, stop_training_in_x_seconds=10) predictions = predictor.predict(when_data=df_test) explainations = [x.explanation for x in predictions] normal_confidence = np.mean(np.array( [x[to_predict]['confidence'] for x in explainations] )) norma_accuracy = acc_score(list(df_test[to_predict]), [x[to_predict]['predicted_value'] for x in explainations]) previously_removed = [] for i in range(max(len(columns),2)): remove_columns = [random.choice(columns)] previously_removed.append(remove_columns[0]) columns_missing_1.append(remove_columns) train = df_train.drop(columns=remove_columns) test = df_test.drop(columns=remove_columns) predictor = mindsdb.Predictor(name='confidence_test') predictor.learn(from_data=train, to_predict=to_predict, stop_training_in_x_seconds=10) predictions = predictor.predict(when_data=test) explainations = [x.explanation for x in predictions] confidences_missing_1.append(np.mean(np.array([x[to_predict]['confidence'] for x in explainations]))) accurcy_missing_1.append(acc_score(list(test[to_predict]), [x[to_predict]['predicted_value'] for x in explainations])) train = df_train.drop(columns=previously_removed) test = df_test.drop(columns=previously_removed) predictor = mindsdb.Predictor(name='confidence_test') predictor.learn(from_data=train, to_predict=to_predict, stop_training_in_x_seconds=10) predictions = predictor.predict(when_data=test) explainations = [x.explanation for x in predictions] multiple_removed_confidence = np.mean(np.array([x[to_predict]['confidence'] for x in explainations])) multiple_removed_accuracy = acc_score(list(test[to_predict]), [x[to_predict]['predicted_value'] for x in explainations]) for i in range(len(confidences_missing_1)): conf = confidences_missing_1[i] acc = accurcy_missing_1[i] missing = columns_missing_1[i] if conf >= normal_confidence: print(f'Got confidence of {conf} with missing columns {missing} which is bigger than the normal confidence {normal_confidence}') if conf <= multiple_removed_confidence: print(f'Got confidence of {conf} with missing columns {missing} which is smaller than the {multiple_removed_confidence} confidence with {previously_removed} columns removed') if acc >= norma_accuracy: print(f'Got accuracy of {acc} with missing columns {missing} which is bigger than the normal accuracy {norma_accuracy}') if acc <= multiple_removed_accuracy: print(f'Got accuracy of {acc} with missing columns {missing} which is smaller than the {multiple_removed_accuracy} accuracy with {previously_removed} columns removed')
def basic_test(backend='lightwood',use_gpu=True, run_extra=False, IS_CI_TEST=False): mindsdb.CONFIG.IS_CI_TEST = IS_CI_TEST if run_extra: for py_file in [x for x in os.listdir('../functional_testing') if '.py' in x]: # Skip data source tests since installing dependencies is annoying # @TODO: Figure out a way to make travis install required dependencies on osx if any(x in py_file for x in ['all_data_sources', 'custom_model']): continue code = os.system(f'python3 ../functional_testing/{py_file}') if code != 0: raise Exception(f'Test failed with status code: {code} !') # Create & Learn to_predict = 'rental_price' mdb = mindsdb.Predictor(name='home_rentals_price') mdb.learn(to_predict=to_predict,from_data="https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv",backend=backend, stop_training_in_x_seconds=120,use_gpu=use_gpu) # Reload & Predict model_name = 'home_rentals_price' if run_extra: mdb.rename_model('home_rentals_price', 'home_rentals_price_renamed') model_name = 'home_rentals_price_renamed' mdb = mindsdb.Predictor(name=model_name) # Try predicting from a file and from a dictionary prediction = mdb.predict(when_data="https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", use_gpu=use_gpu) mdb.test(when_data="https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv",accuracy_score_functions=r2_score,predict_args={'use_gpu': use_gpu}) prediction = mdb.predict(when={'sqft':300}, use_gpu=use_gpu) # Test all different forms of output # No need to print them in order to run these checks, we're just doing so for quick-debugging purposes, we just want to see if the interfaces will crash when we call them print(prediction) print(prediction[0]) for item in prediction: print(item) for p in prediction: print(p) print(prediction[0].as_dict()) print(prediction[0].as_list()) print(prediction[0]['rental_price_confidence']) print(type(prediction[0]['rental_price_confidence'])) print('\n\n========================\n\n') print(prediction[0].explain()) print(prediction[0].explanation) print(prediction[0].raw_predictions()) print('\n\n') # See if we can get the adapted model data amd = mdb.get_model_data(model_name) test_adapted_model_data(amd, to_predict)
def basic_test(backend='lightwood', use_gpu=True, IS_CI_TEST=False): mindsdb.CONFIG.IS_CI_TEST = IS_CI_TEST # Create & Learn to_predict = 'rental_price' mdb = mindsdb.Predictor(name='home_rentals_price') mdb.learn( to_predict=to_predict, from_data= "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", backend=backend, stop_training_in_x_seconds=120, use_gpu=use_gpu) # Reload & Predict model_name = 'home_rentals_price' mdb = mindsdb.Predictor(name=model_name) # Try predicting from a file and from a dictionary prediction = mdb.predict( when_data= "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", use_gpu=use_gpu) mdb.test( when_data= "https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", accuracy_score_functions=r2_score, predict_args={'use_gpu': use_gpu}) prediction = mdb.predict(when={'sqft': 300}, use_gpu=use_gpu) # Test all different forms of output # No need to print them in order to run these checks, we're just doing so for quick-debugging purposes, we just want to see if the interfaces will crash when we call them print(prediction) print(prediction[0]) for item in prediction: print(item) for p in prediction: print(p) print(prediction[0].as_dict()) print(prediction[0].as_list()) print(prediction[0]['rental_price_confidence']) print(type(prediction[0]['rental_price_confidence'])) print('\n\n========================\n\n') print(prediction[0].explain()) # Warning: Deprecated explainer print(prediction[0].explanation) print(prediction[0].raw_predictions()) print('\n\n') # See if we can get the adapted model data amd = mdb.get_model_data(model_name) test_adapted_model_data(amd, to_predict)
def basic_test(backend='lightwood',use_gpu=True,ignore_columns=[], run_extra=False, IS_CI_TEST=False): mindsdb.CONFIG.IS_CI_TEST = IS_CI_TEST if run_extra: for py_file in [x for x in os.listdir('../functional_testing') if '.py' in x]: os.system(f'python3 ../functional_testing/{py_file}') # Create & Learn to_predict = 'rental_price' mdb = mindsdb.Predictor(name='home_rentals_price') mdb.learn(to_predict=to_predict,from_data="https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv",backend=backend, stop_training_in_x_seconds=20,use_gpu=use_gpu) # Reload & Predict model_name = 'home_rentals_price' if run_extra: mdb.rename_model('home_rentals_price', 'home_rentals_price_renamed') model_name = 'home_rentals_price_renamed' mdb = mindsdb.Predictor(name=model_name) # Try predicting from a file and from a dictionary prediction = mdb.predict(when_data="https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv", use_gpu=use_gpu) prediction = mdb.predict(when={'sqft':300}, use_gpu=use_gpu) # Test all different forms of output # No need to print them in order to run these checks, we're just doing so for quick-debugging purposes, we just want to see if the interfaces will crash when we call them print(prediction) print(prediction[0]) for item in prediction: print(item) print(type(list(prediction.evaluations.values())[0][0])) assert('ProbabilityEvaluation' in str(type(list(prediction.evaluations.values())[0][0]))) for p in prediction: print(p) print(prediction[0].as_dict()) print(prediction[0].as_list()) print(prediction[0]['rental_price_confidence']) print(type(prediction[0]['rental_price_confidence'])) print('\n\n========================\n\n') print(prediction[0].explain()) print('\n\n') # See if we can get the adapted model data amd = mdb.get_model_data(model_name) test_adapted_model_data(amd, to_predict)
def prediction(uuid): try: model_uuid = uuid req = request.get_json(force=True) if request.method == 'POST': dt = req.copy() selectedFeature = dt.get('selectedFeature') selectedFeature = json.dumps(selectedFeature['data']) selectedTarget = dt.get('selectedTarget') targetname = selectedTarget['value'] df = pd.read_json(path_or_buf=selectedFeature, orient='records') mdb = mindsdb.Predictor(name=model_uuid) result = mdb.predict(when_data=df) for x, y in result.data.items(): if 'model_' + targetname == x: predictedValues = y elif targetname + '_model_confidence' == x: confidance = y data = {"values": predictedValues, "confidance": confidance} response = make_response(jsonify({ "success": True, "data": data }), 200) response.headers["Content-Type"] = 'application/json' return response else: response = make_response(jsonify({"success": False}), 404) response.headers["Content-Type"] = 'application/json' return response except: response = make_response(jsonify({"success": False}), 500) response.headers["Content-Type"] = 'application/json' return response
def run(): backend = 'lightwood' mdb = mindsdb.Predictor(name='Admission_prediction_model') mdb.learn(from_data='dataset/train.csv', to_predict='Chance of Admit ', backend=backend, disable_optional_analysis=True) predictions = mdb.predict( when_data='dataset/test.csv', unstable_parameters_dict={'always_use_model_predictions': True}) pred_val = [x['Chance of Admit '] for x in predictions] real_val = list( pd.read_csv('dataset/test.csv', usecols=['Chance of Admit '])['Chance of Admit ']) accuracy = r2_score(real_val, pred_val) print(f'Got an r2 score of: {accuracy}') return { 'accuracy': accuracy, 'accuracy_function': 'r2_score', 'backend': backend }
def run(): backend = 'lightwood' mdb = mindsdb.Predictor(name='cc_fraud') mdb.learn(from_data='processed_data/train.csv', to_predict='Class', backend=backend, window_size=5) predictions = mdb.predict(when_data='processed_data/test.csv') pred_val = [int(x['Class']) for x in predictions] real_val = [ int(x) for x in list(pd.read_csv('processed_data/test.csv')['Class']) ] accuracy = balanced_accuracy_score(real_val, pred_val) #show additional info for each transaction row additional_info = [x.explanation for x in predictions] return { 'accuracy': accuracy, 'backend': backend, 'additional info': additional_info }
def transformation(): # Avoid mindsdb storage path write access mindsdb.CONFIG.SAGEMAKER = 'True' mindsdb.CONFIG.MINDSDB_STORAGE_PATH = model_path try: when_data = parse_data(flask.request.content_type, flask.request) except ValueError: return flask.Response( response='This predictor supports JSON,CSV and Excel data', status=415, mimetype='text/plain') print('Invoked with {} records'.format(when_data)) result = mindsdb.Predictor(name='mdbp').predict(when_data=when_data) cconfidence = [x['Class_confidence'] for x in result] response = { 'prediction': str(result[0]), 'class_confidence': cconfidence[-1] } print('Response prediction: {}'.format(response['prediction'])) return flask.Response(response=json.dumps(response), status=200, mimetype='application/json')
def run(): backend = 'lightwood' target = 'Deaths' mdb = mindsdb.Predictor(name='corona-data') # We tell the Predictor what column or key we want to learn and from what data mdb.learn(from_data="processed_data/train.csv", to_predict=target, backend=backend, use_gpu=False) test_df = pd.read_csv('processed_data/test.csv') predictions = mdb.predict(when_data='processed_data/test.csv') results = [str(x[target]) for x in predictions] real = list(map(str, list(test_df[target]))) accuracy = accuracy_score(real, results) print(accuracy) return { 'accuracy': accuracy, 'accuracy_function': 'r2_score', 'backend': backend }
def run(): mdb = mindsdb.Predictor(name='robotic_failure') backend = 'lightwood' mdb.learn(from_data='dataset/train.csv', to_predict=['target'], order_by=['time'], window_size=14, group_by='id', disable_optional_analysis=True, backend=backend) predictions = mdb.predict(when='test.csv') pred_val = [x['target'] for x in predictions] real_val = list(pd.read_csv(open('dataset/test.csv', 'r'))['target']) accuracy = r2_score(real_val, pred_val) print(f'Got an r2 score of: {accuracy}') return { 'accuracy': accuracy, 'accuracy_function': 'balanced_accuracy_score', 'backend': backend }
def run(): backend = 'lightwood' mdb = mindsdb.Predictor(name='hotel_booking') mdb.learn(from_data='processed_data/train.csv', to_predict='is_canceled', backend=backend, disable_optional_analysis=True) test_df = pd.read_csv('processed_data/test.csv') predictions = mdb.predict(when_data='processed_data/test.csv', unstable_parameters_dict={'always_use_model_predictions': True}) results = [str(x['is_canceled']) for x in predictions] real = list(map(str, list(test_df['is_canceled']))) accuracy = balanced_accuracy_score(real, results) #show additional info for each transaction row additional_info = [x.explanation for x in predictions] return { 'accuracy': accuracy, 'accuracy_function': 'balanced_accuracy_score', 'backend': backend, 'additional_info': additional_info }
def run(sample): backend = 'lightwood' mdb = mindsdb.Predictor(name='cancer_model') mdb.learn(from_data='processed_data/train.csv', to_predict='diagnosis', use_gpu=True, backend=backend, stop_training_in_x_seconds=10, equal_accuracy_for_all_output_categories=True) test_df = pd.read_csv('processed_data/test.csv') predictions = mdb.predict( when_data='processed_data/test.csv', unstable_parameters_dict={'always_use_model_predictions': True}) results = [str(x['diagnosis']) for x in predictions] real = list(map(str, list(test_df['diagnosis']))) accuracy = balanced_accuracy_score(real, results) return { 'accuracy': accuracy, 'accuracy_function': 'balanced_accuracy_score', 'backend': backend }
def run(sample): train_df = pd.read_csv('train.csv') test_df = pd.read_csv('test.csv') if sample: train_df = train_df.loc[train_df['superclass'] == 'fish'].reset_index() test_df = test_df.loc[test_df['superclass'] == 'fish'].reset_index() backend='lightwood' predictor = mindsdb.Predictor(name='CIFRAR_Model') predictor.learn(from_data=train_df, to_predict=['class'], ignore_columns='index', advanced_args={'use_selfaware_model': False,'force_disable_cache': False}) # predictions = predictor.predict(when_data=test_df) predicted_class = list(map(lambda x: x['class'], predictions)) real_class = list(test_df['class']) acc = accuracy_score(real_class, predicted_class) * 100 print(f'Log loss accuracy of {acc}% for classes !') return { 'accuracy': acc ,'accuracy_function': 'accuracy_score' ,'backend': backend }
def run(): backend = 'lightwood' mdb = mindsdb.Predictor(name='lbs') mdb.learn(from_data='processed_data/train.csv', to_predict='cnt', backend=backend, window_size=5) predictions = mdb.predict(when_data='processed_data/test.csv') pred_val = [int(x['cnt']) for x in predictions] real_val = [ int(x) for x in list( pd.read_csv(open('processed_data/test.csv', 'r'))['cnt']) ] accuracy = r2_score([math.log(x) if x > 0 else 0 for x in real_val], [math.log(x) if x > 0 else 0 for x in pred_val]) print(f'Got an r2_score for the log predictions of: {accuracy}') accuracy = pct_error(real_val, pred_val, 0.05) print( f'Got a percentage accuracy score with error-margin 5% of: {accuracy}') accuracy = pct_error(real_val, pred_val) print(f'Got a percentage accuracy score of: {accuracy}') return { 'accuracy': accuracy, 'accuracy_function': 'pct_error_0', 'backend': backend }
def post(self, name): '''Queries predictor''' global model_swapping_map data = request.json when = data.get('when') or {} try: format_flag = data.get('format_flag') except: format_flag = 'explain' try: kwargs = data.get('kwargs') except: kwargs = {} if type(kwargs) != type({}): kwargs = {} # Not the fanciest semaphor, but should work since restplus is multi-threaded and this condition should rarely be reached while name in model_swapping_map and model_swapping_map[name] is True: time.sleep(1) mdb = mindsdb.Predictor(name=name) results = mdb.predict(when=when, run_confidence_variation_analysis=True, **kwargs) # return '', 500 return preparse_results(results, format_flag)
def run(sample): if sample: train_file = 'train_sample.tsv' test_file = 'test_sample.tsv' else: train_file = 'train.tsv' test_file = 'test.tsv' backend = 'lightwood' predictor = mindsdb.Predictor(name='imdb_predictor_x') predictor.learn(from_data=train_file, to_predict=['sentiment']) predictions = predictor.predict(when_data=test_file) predicted_sentiment = list(map(lambda x: x['sentiment'], predictions)) real_sentiment = [] with open(test_file, 'r') as raw_csv_fp: reader = csv.reader(raw_csv_fp, delimiter='\t') next(reader, None) for row in reader: real_sentiment.append(row[1]) acc = accuracy_score(real_sentiment, predicted_sentiment) * 100 print(f'Accuracy of {acc}% !') return { 'accuracy': acc, 'accuracy_function': 'accuracy_score', 'backend': backend }
def run(): backend = 'lightwood' mdb = mindsdb.Predictor(name='employee_retention_model') mdb.learn(from_data='dataset/train.csv', to_predict='Attrition', backend=backend, output_categories_importance_dictionary={ 'Yes': 1, 'No': 0.7 }, disable_optional_analysis=True) test_df = pd.read_csv('dataset/test.csv') predictions = mdb.predict( when_data='dataset/test.csv', unstable_parameters_dict={'always_use_model_predictions': True}) results = [str(x['Attrition']) for x in predictions] real = list(map(str, list(test_df['Attrition']))) accuracy = balanced_accuracy_score(real, results) return { 'accuracy': accuracy, 'accuracy_function': 'balanced_accuracy_score', 'backend': backend }
def run(): backend = 'lightwood' mdb = mindsdb.Predictor(name='employee_retention_model') mdb.learn(from_data='dataset/train.csv', to_predict='Churn', backend=backend, output_categories_importance_dictionary={ 'Yes': 1, 'No': 0.5 }) test_df = pd.read_csv('dataset/test.csv') predictions = mdb.predict( when_data='dataset/test.csv', unstable_parameters_dict={'always_use_model_predictions': True}) predicted_val = [ x.explanation['Churn']['predicted_value'] for x in predictions ] real_val = list(map(str, list(test_df['Churn']))) accuracy = accuracy_score(real_val, predicted_val) #show additional info for each transaction row additional_info = [x.explanation for x in predictions] return { 'accuracy': accuracy, 'accuracy_function': 'accuracy_score', 'backend': backend, 'single_row_predictions': additional_info }
def threaded_task(df, model_uuid, targetname): try: mdb = mindsdb.Predictor(name=model_uuid) mdb.learn(from_data=df, to_predict=targetname) mdb.export_model() # oldpath = os.getcwd() + '\\' + model_uuid + '.zip' # newpath = os.getcwd() + '\\' + 'exportedmodels' # if os.path.exists(oldpath) == True: # os.replace(oldpath, newpath) except ValueError: print(ValueError)
def learn(name, from_data, to_predict, kwargs): ''' running at subprocess due to ValueError: signal only works in main thread this is work for celery worker here? ''' mdb = mindsdb.Predictor(name=name) if sys.platform not in ['win32', 'cygwin', 'windows']: lightwood.config.config.CONFIG.HELPER_MIXERS = True mdb.learn(from_data=from_data, to_predict=to_predict, **kwargs)
def run(): mdb = mindsdb.Predictor(name='air_pl') mdb.learn(from_data='processed_data/train.csv', to_predict='SO2') predictions = mdb.predict(when_data='processed_data/test.csv') #show additional info for each transaction row additional_info = [x.explanation for x in predictions] return {'additional info': additional_info}
def run(sample=False): backend = 'lightwood' mdb = mindsdb.Predictor(name='wineq') mdb.learn(from_data='processed_data/train.csv', to_predict='price', backend=backend, ignore_columns=['no'], use_gpu=True) predictions = mdb.predict(when_data=pd.read_csv('processed_data/test.csv')) print(predictions[0].explanation) confidence_range_arr = [ x.explanation['price']['explanation']['confidence_interval'] for x in predictions ] predicted_val_arr = [ x.explanation['price']['predicted_value'] for x in predictions ] real_val_arr = list( pd.read_csv(open('processed_data/test.csv', 'r'))['price']) real_val_arr = [x if str(x) != 'nan' else 0 for x in real_val_arr] correct = 0 incorrect = 0 for i, cr in enumerate(confidence_range_arr): if cr[0] < real_val_arr[i] < cr[1]: correct += 1 else: incorrect += 1 print( f'Out of all predictions {correct} of the intervals contained the actual value and {incorrect} did not !' ) accuracy = r2_score(real_val_arr, predicted_val_arr) print(f'Got an r2 score of: {accuracy}') return { 'accuracy': accuracy, 'accuracy_function': 'r2_score', 'backend': backend }
def run(sample=False): backend='lightwood' lightwood.config.config.CONFIG.HELPER_MIXERS = False # Instantiate a mindsdb Predictor mdb = mindsdb.Predictor(name='home_rentals') # We tell the Predictor what column or key we want to learn and from what data #mdb.breakpoint = 'DataAnalyzer' train_df = pd.read_csv('dataset/train.csv') #train_df = train_df.drop(columns=['initial_price']) #''' mdb.learn( from_data=train_df, # the path to the file where we can learn from, (note: can be url) to_predict='rental_price' # the column we want to learn to predict given all the data in the file ,stop_training_in_x_seconds=120 ,use_gpu=False ) #''' test_df = pd.read_csv('dataset/test.csv') for drop_cols in [['number_of_rooms','number_of_bathrooms','location','days_on_market', 'neighborhood'], []]: print(f'Predicting without columns: {drop_cols}') predictions = mdb.predict(when_data=test_df.drop(columns=drop_cols)) pred_val = [x.explain()['rental_price']['predicted_value'] for x in predictions] real_val = list(pd.read_csv(open('dataset/test.csv', 'r'))['rental_price']) real_val = [x if str(x) != 'nan' else 0 for x in real_val] accuracy = r2_score([math.log(x) if x > 0 else 0 for x in real_val], [math.log(x) if x > 0 else 0 for x in pred_val]) print(f'Got an r2_score for the log predictions of: {accuracy}') accuracy = pct_error(real_val, pred_val, 0.05) print(f'Got a percentage accuracy score with error-margin 5% of: {accuracy}') accuracy = pct_error(real_val, pred_val) print(f'Got a percentage accuracy score of: {accuracy}') print('\n\n------------------------\n\n') return { 'accuracy': accuracy ,'accuracy_function': 'pct_error_0' ,'backend': backend }
def run(): train_file = 'data_set/train.csv' test_file = 'data_set/test.csv' backend = 'lightwood' predictor = mindsdb.Predictor(name='plant_pathology') train_df = pd.read_csv(train_file) predictor.learn(from_data=train_df, to_predict=['class'], disable_optional_analysis=True, use_gpu=False, backend=backend, stop_training_in_x_seconds=round((3600 * 2))) test_df = pd.read_csv(test_file) predictions = predictor.predict( when_data=test_df, unstable_parameters_dict={'always_use_model_prediction': True}, use_gpu=False) predicted_class = list(map(lambda x: x['class'], predictions)) real_class = [] first = True with open(test_file) as raw_csv_fp: reader = csv.reader(raw_csv_fp) for row in reader: if first: first = False else: real_class.append(row[1]) acc = balanced_accuracy_score(real_class, predicted_class) * 100 print(f'Balanced accuracy of {acc}% for classes !') return { 'accuracy': acc, 'accuracy_function': 'balanced_accuracy_score', 'backend': backend }
def run(): mdb = mindsdb.Predictor(name='demand_forecast') backend = 'lightwood' mdb.learn(from_data='dataset/train_data.csv', to_predict='sales', order_by=['date'], group_by=['store', 'item'], backend=backend, window_size=7) predictions = mdb.predict(when='dataset/test_data.csv') pred_val = [x['sales'] for x in predictions] real_val = list(pd.read_csv(open('dataset/test_data.csv', 'r'))['sales']) accuracy = r2_score(real_val, pred_val) print(f'Got an r2 score of: {accuracy}') return { 'accuracy': accuracy , 'accuracy_function': 'balanced_accuracy_score' , 'backend': backend }
def run(sample): backend='lightwood' mdb = mindsdb.Predictor(name='model_name') mdb.learn(from_data='processed_data/train.csv', to_predict='target_class', use_gpu=True, backend=backend, equal_accuracy_for_all_output_categories=True) test_df = pd.read_csv('processed_data/test.csv') predictions = mdb.predict(when_data='processed_data/test.csv') results = [x['target_class'] for x in predictions] real = list(test_df['target_class']) accuracy = acc_fun(real, result) print(f'Balacned accuracy score of {accuracy}') return { 'accuracy': accuracy ,'accuracy_function': 'acc_fun' ,'backend': backend }
def run(sample): backend='lightwood' mdb = mindsdb.Predictor(name='churn_model') mdb.learn(from_data=pd.read_csv('dataset/train.csv'), to_predict='Exited', backend=backend, ignore_columns=['RowNumber','CustomerId','Surname']) test_df = pd.read_csv('dataset/test.csv') predictions = mdb.predict(when_data='dataset/test.csv') results = [x['Exited'] for x in predictions] real = list(test_df['Exited']) accuracy = balanced_accuracy_score(list(map(int,real)), list(map(int,results))) print(f'Balacned accuracy score of {accuracy}') return { 'accuracy': accuracy ,'accuracy_function': 'balanced_accuracy_score' ,'backend': backend }
def run(): mdb = mindsdb.Predictor(name='corona-data') target = 'Deaths' # We tell the Predictor what column or key we want to learn and from what data mdb.learn( from_data="processed_data/train.csv", to_predict=target ) test_df = pd.read_csv('processed_data/test.csv') predictions = mdb.predict(when_data='processed_data/test.csv') results = [str(x[target]) for x in predictions] real = list(map(str,list(test_df[target]))) print(predictions[0]) accuracy = accuracy_score(real, results) return { 'accuracy': accuracy }
def delete(uuid): try: model_uuid = uuid if request.method == 'GET': predictor = mindsdb.Predictor(name=model_uuid) res = predictor.delete_model(model_name=model_uuid) path = os.getcwd( ) + '\modelsinfo\\' + model_uuid + '_lightwood_data' print("path ==> ", path) if os.path.exists(path): os.remove(path) response = make_response(jsonify({"success": True}), 200) response.headers["Content-Type"] = 'application/json' return response else: response = make_response(jsonify({"success": False}), 404) response.headers["Content-Type"] = 'application/json' return response except: response = make_response(jsonify({"success": False}), 500) response.headers["Content-Type"] = 'application/json' return response
def post(self, name): global model_swapping_map data = request.json from_data = get_datasource_path(data.get('data_source_name')) try: format_flag = data.get('format_flag') except: format_flag = 'explain' try: kwargs = data.get('kwargs') except: kwargs = {} if type(kwargs) != type({}): kwargs = {} if from_data is None: from_data = data.get('from_data') if from_data is None: from_data = data.get('when_data') if from_data is None: abort(400, 'No valid datasource given') # Not the fanciest semaphor, but should work since restplus is multi-threaded and this condition should rarely be reached while name in model_swapping_map and model_swapping_map[name] is True: time.sleep(1) mdb = mindsdb.Predictor(name=name) try: results = mdb.predict(when_data=from_data, **kwargs) except: results = mdb.predict(when=from_data, **kwargs) return preparse_results(results, format_flag)