Esempio n. 1
0
def predict_dataset(data_frame, city, columns):
    """
    Predicts prices of listings from a dataset
    :params data_frame dataframe:
    :params city str:
    :params columns list:
    :returns predictions array:
    """
    x_var, y_var = cm.to_matrix(data_frame, columns)
    x_var = boxcox1p(x_var, 0.15) + 1
    price_length = len(y_var)

    #imports models
    regressor_a = joblib.load(DATA_DIR + '/' + city + c.MODEL_1_SUFFIX)
    regressor_b = joblib.load(DATA_DIR + '/' + city + c.MODEL_2_SUFFIX)

    outlier_boundary = min(do.detect_outlier(y_var))
    inbound = (y_var < outlier_boundary)
    outbound = (y_var >= outlier_boundary)

    #populates predictions depending on current price
    predictions = np.zeros(price_length)

    predictions[inbound] = np.expm1(regressor_b.predict(x_var[inbound]))
    predictions[outbound] = np.expm1(regressor_a.predict(x_var[outbound]))

    return predictions
Esempio n. 2
0
 def test_to_matrix_check_dtype(self):
     """
     Tests that to_matrix will not run if datatype is not dataframe.
     :params self:
     :return boolean:
     """
     with self.assertRaises(ValueError):
         ctm.to_matrix('check', con.LISTING_COLUMNS)
         ctm.to_matrix(1, con.LISTING_COLUMNS)
         ctm.to_matrix([1, 2, 3], con.LISTING_COLUMNS)
Esempio n. 3
0
import unittest

import zillowbnb.test.submodule_path

import constants as co
import convert_to_matrix as cm
import get_data as gd
import get_cleaned_listings as gcl
import train_model as tm

DATA = gd.download_dataset(co.DATASET_PROPERTIES, co.LISTINGS_DATA)

DATAFRAME = gcl.get_listings_dataframe(DATA, co.LISTING_COLUMNS)

X_VAR, Y_VAR = cm.to_matrix(DATAFRAME, co.LISTING_COLUMNS)


class TrainModelTest(unittest.TestCase):
    """
    This class runs unit tests for the train_model module.
    """
    def test_input_size(self):
        """
        Tests that train_model will not run if sizes of x_var and y_var
        do not match.
        :params self:
        :returns boolean:
        """
        with self.assertRaises(IndexError):
            tm.train_model(X_VAR[:500], Y_VAR, 'Hawaii')