def predict_dataset(data_frame, city, columns): """ Predicts prices of listings from a dataset :params data_frame dataframe: :params city str: :params columns list: :returns predictions array: """ x_var, y_var = cm.to_matrix(data_frame, columns) x_var = boxcox1p(x_var, 0.15) + 1 price_length = len(y_var) #imports models regressor_a = joblib.load(DATA_DIR + '/' + city + c.MODEL_1_SUFFIX) regressor_b = joblib.load(DATA_DIR + '/' + city + c.MODEL_2_SUFFIX) outlier_boundary = min(do.detect_outlier(y_var)) inbound = (y_var < outlier_boundary) outbound = (y_var >= outlier_boundary) #populates predictions depending on current price predictions = np.zeros(price_length) predictions[inbound] = np.expm1(regressor_b.predict(x_var[inbound])) predictions[outbound] = np.expm1(regressor_a.predict(x_var[outbound])) return predictions
def test_to_matrix_check_dtype(self): """ Tests that to_matrix will not run if datatype is not dataframe. :params self: :return boolean: """ with self.assertRaises(ValueError): ctm.to_matrix('check', con.LISTING_COLUMNS) ctm.to_matrix(1, con.LISTING_COLUMNS) ctm.to_matrix([1, 2, 3], con.LISTING_COLUMNS)
import unittest import zillowbnb.test.submodule_path import constants as co import convert_to_matrix as cm import get_data as gd import get_cleaned_listings as gcl import train_model as tm DATA = gd.download_dataset(co.DATASET_PROPERTIES, co.LISTINGS_DATA) DATAFRAME = gcl.get_listings_dataframe(DATA, co.LISTING_COLUMNS) X_VAR, Y_VAR = cm.to_matrix(DATAFRAME, co.LISTING_COLUMNS) class TrainModelTest(unittest.TestCase): """ This class runs unit tests for the train_model module. """ def test_input_size(self): """ Tests that train_model will not run if sizes of x_var and y_var do not match. :params self: :returns boolean: """ with self.assertRaises(IndexError): tm.train_model(X_VAR[:500], Y_VAR, 'Hawaii')