# Check outliers df.drop(df[df['price'] > 3000000].index, inplace=True) # plt.hist(df['price']) # plt.show() # plt.figure() # plt.scatter(df['area'], df['price']) # plt.show() # Scale output between [0, 1] max_price = train['price'].max() train_y = train['price'] / max_price test_y = test['price'] / max_price # Processing data (train_x, test_x) = datasets.process_house_attributes(df, train, test) # Create model model = models.create_mlp(train_x.shape[1], regress=True) opt = Adam(lr=0.001, decay=0.001 / 200) model.compile(loss='mean_absolute_percentage_error', optimizer=opt) # Train model model.fit(train_x, train_y, validation_data=(test_x, test_y), epochs=200, batch_size=8) # Predict house prices preds = model.predict(test_x) diff = preds.flatten() - test_y percent_diff = (diff / test_y) * 100
# for training and the remaining 25% for evaluation print("[INFO] constructing training/testing split...") (train, test) = train_test_split(df, test_size=0.25, random_state=42) # find the largest house price in the training set and use it to # scale our house prices to the range [0, 1] (this will lead to # better training and convergence) maxPrice = train["price"].max() trainY = train["price"] / maxPrice testY = test["price"] / maxPrice # process the house attributes data by performing min-max scaling # on continuous features, one-hot encoding on categorical features, # and then finally concatenating them together print("[INFO] processing data...") (trainX, testX) = datasets.process_house_attributes(df, train, test) # create our MLP and then compile the model using mean absolute # percentage error as our loss, implying that we seek to minimize # the absolute percentage difference between our price *predictions* # and the *actual prices* model = models.create_mlp(trainX.shape[1], regress=True) opt = Adam(lr=1e-3, decay=1e-3 / 200) model.compile(loss="mean_absolute_percentage_error", optimizer=opt) # train the model print("[INFO] training model...") model.fit(trainX, trainY, validation_data=(testX, testY), epochs=200,
# the data for training and the remaining 25% for testing print("[INFO] processing data...") split = train_test_split(df, images, test_size=0.25, random_state=42) (trainAttrX, testAttrX, trainImagesX, testImagesX) = split # find the largest house price in the training set and use it to # scale our house prices to the range [0, 1] (will lead to better # training and convergence) maxPrice = trainAttrX["price"].max() trainY = trainAttrX["price"] / maxPrice testY = testAttrX["price"] / maxPrice # process the house attributes data by performing min-max scaling # on continuous features, one-hot encoding on categorical features, # and then finally concatenating them together (trainAttrX, testAttrX) = datasets.process_house_attributes(df, trainAttrX, testAttrX) # create the MLP and CNN models mlp = models.create_mlp(trainAttrX.shape[1], regress=False) cnn = models.create_cnn(64, 64, 3, regress=False) # create the input to our final set of layers as the *output* of both # the MLP and CNN combinedInput = concatenate([mlp.output, cnn.output]) # our final FC layer head will have two dense layers, the final one # being our regression head x = Dense(4, activation="relu")(combinedInput) x = Dense(1, activation="linear")(x) # our final model will accept categorical/numerical data on the MLP
testIDs = test.index.values trainXConv = [] for i in range(len(trainY)): id = trainIDs[i] trainXConv.append(images[i]) trainXConv = np.array(trainXConv) testXConv = [] for i in range(len(testY)): id = testIDs[i] testXConv.append(images[i]) testXConv = np.array(testXConv) (trainXMlp, testXMlp) = datasets.process_house_attributes(houseData, train, test) model = models.createCombined_cnn_mlp(trainXConv[0].shape, trainXMlp[0].shape) opt = Adam( lr=1e-3, decay=1e-3 / 200 ) # lr --> Learnrate decay ---> absenken der Lernrate nach jeder Epoche model.compile(loss="mean_absolute_percentage_error", optimizer=opt) model.fit([trainXConv, trainXMlp], trainY, validation_data=([testXConv, testXMlp], testY), epochs=500, batch_size=20, callbacks=[PlotLosses()]) score = model.evaluate([trainXConv, trainXMlp], trainY, verbose=0)