def test_ignore_nan_values(): estimate = np.array([1, 2, 3, np.nan]) actual = np.array([1, 2, 3, np.nan]) weights = np.array([1, 1, 1, 1]) calculated_nwrmsle = evaluation.nwrmsle(estimate, actual, weights) assert calculated_nwrmsle == 0.0
def main(model=Model.DECISION_TREE, seed=None): original_train, original_validate = load_data() train, validate = encode(original_train, original_validate) with tracking.track() as track: track.set_model(model) model, params = train_model(train, model, seed) track.log_params(params) validation_predictions = make_predictions(model, validate) print("Calculating metrics") evaluation_metrics = { 'nwrmsle': evaluation.nwrmsle(validation_predictions, validate['unit_sales'].values, validate['perishable'].values), 'r2_score': metrics.r2_score(y_true=validate['unit_sales'].values, y_pred=validation_predictions) } track.log_metrics(evaluation_metrics) write_predictions_and_score(evaluation_metrics, model, original_train.columns) print("Evaluation done with metrics {}.".format( json.dumps(evaluation_metrics)))
def test_calculates_nwrmsle_for_perfect_match(): estimate = np.array([1, 2, 3]) actual = np.array([1, 2, 3]) weights = np.array([1, 1, 1]) calculated_nwrmsle = evaluation.nwrmsle(estimate, actual, weights) assert calculated_nwrmsle == 0.0
def test_eliminate_negative_values(): estimate = np.array([1, 2, 3, -1]) actual = np.array([1, 2, 3, -1]) weights = np.array([1, 1, 1, 1]) calculated_nwrmsle = evaluation.nwrmsle(estimate, actual, weights) assert calculated_nwrmsle == 0.0
def main(): train, validate = load_data() print("Not predicting returns...") train.loc[train['unit_sales'] < 0, 'unit_sales'] = 0 validate.loc[validate['unit_sales'] < 0, 'unit_sales'] = 0 validation_predictions, problem_pairs_part = get_predictions( validate, train) preds_sorted = validation_predictions.sort_values(by=['id']) subset_for_validation = validate[validate.id.isin( validation_predictions['id'])].sort_values(by=['id']) print("Calculating estimated error") validation_score = evaluation.nwrmsle( preds_sorted['unit_sales'].values, subset_for_validation['unit_sales'].values, subset_for_validation['perishable'].values) write_predictions_and_score(validation_score, 0, train.columns) print( "Times series analysis done with a validation score (error rate) of {}." .format(validation_score))
def test_calculates_nwrmsle_for_perfect_match(): estimate = np.array([1, 2, 3]) actual = np.array([1, 2, 3]) weights = np.array([1, 1, 1]) calculated_nwrmsle = evaluation.nwrmsle(estimate, actual, weights) assert calculated_nwrmsle == 0.0
def test_calculates_nwrmsle_for_imperfect_match(): estimate = np.array([0, 0, 0]) actual = np.array([1, 1, 1]) weights = np.array([1, 1, 1]) calculated_nwrmsle = evaluation.nwrmsle(estimate, actual, weights) # Assert by-hand calculation of nwrmsle is reasonably close to python calculation assert approx(calculated_nwrmsle, 0.69314718)
def test_calculates_nwrmsle_for_imperfect_match(): estimate = np.array([0, 0, 0]) actual = np.array([1, 1, 1]) weights = np.array([1, 1, 1]) calculated_nwrmsle = evaluation.nwrmsle(estimate, actual, weights) # Assert by-hand calculation of nwrmsle is reasonably close to python calculation assert approx(calculated_nwrmsle, 0.69314718)
def main(model=Model.RANDOM_FOREST): original_train, original_validate = load_data() train, validate = encode(original_train, original_validate) model = make_model(train, model) validation_predictions = make_predictions(model, validate) print("Calculating estimated error") validation_score = evaluation.nwrmsle(validation_predictions, validate['unit_sales'].values, validate['perishable'].values) write_predictions_and_score(validation_score, model, original_train.columns) print("Decision tree analysis done with a validation score (error rate) of {}.".format(validation_score))
def main(): train, validate = load_data() print("Not predicting returns...") train.loc[train['unit_sales']<0, 'unit_sales']=0 validate.loc[validate['unit_sales']<0, 'unit_sales']=0 validation_predictions, problem_pairs_part = get_predictions(validate, train) preds_sorted = validation_predictions.sort_values(by=['id']) subset_for_validation = validate[validate.id.isin(validation_predictions['id'])].sort_values(by=['id']) print("Calculating estimated error") validation_score = evaluation.nwrmsle(preds_sorted['unit_sales'].values, subset_for_validation['unit_sales'].values, subset_for_validation['perishable'].values) write_predictions_and_score(validation_score, 0, train.columns) print("Times series analysis done with a validation score (error rate) of {}.".format(validation_score))