def getProperty(prop_id): try: record = PropertyDAO().getRecord(prop_id)[0] property = Property(record[0],record[1],record[2],record[3]) return property.getJson(); except: return 'No record found!'
import nltk # Load NLTK # nltk.download() nltk.download('punkt') nltk.download('stopwords') nltk.download('wordnet') from db.postgresl import PropertyDAO from model.Property import Property from service.TextMiningService import TextMiningService propertyDao = PropertyDAO() textMiningService = TextMiningService() rows = propertyDao.getRecords(500) #rows = propertyDao.getRecord(6829944535); print("Records: ", len(rows)) records = [] for row in rows: records.append(Property(row[0], row[1].encode("utf-8"))) text = "" print("\nShow me the first 5 records: ", "\n") for x in range(0, len(rows)): text += str(records[x].description) + "\n" if x < 5: print("Id: ", records[x].id, " Desc: ", records[x].description)
def test(): record = PropertyDAO().getRecord(6842453594)[0] property = Property(record[0],record[1],record[2],record[3]) return property.getJson();
import nltk # Load NLTK import time from service.TextMiningService import TextMiningService from service.sklearnService import SkLearnService from service.propertyService import PropertyService # nltk.download() nltk.download('punkt') nltk.download('stopwords') nltk.download('wordnet') from db.postgresl import PropertyDAO from model.Property import Property propertyDao = PropertyDAO() propertyService = PropertyService() sklearnService = SkLearnService() textMiningService = TextMiningService() rows = propertyDao.getRecords(20000) print("Records: ", len(rows)) records = [] for row in rows: records.append(Property(row[0], row[1].encode("utf-8"), row[2], row[3])) size = len(rows) count = 0 for property in records: property = propertyService.populateRoomSize(property)
import xgboost as xgb import pandas as pd import numpy as np import statsmodels.api as sm import matplotlib.pyplot as plt import pickle import statistics from sklearn.model_selection import train_test_split from sklearn.model_selection import GridSearchCV from sklearn.metrics import mean_squared_error from db.postgresl import PropertyDAO filename = '../data/grad_model.sav' dataset = PropertyDAO().getDataFrameRecords(' where bedrooms > 0 and bedrooms < 5 and size_sqft < 5000 and price < 6000 and bath < 5'); print('Dataset Acquired: (',dataset.id.count(),')') #Create training and test datasets # old dataset #X = dataset[['bedrooms','bath','size_sqft','professionally_managed', 'no_pet_allowed', 'suit_laundry', 'park_stall', 'available_now', 'amenities', 'brand_new']] # new dataset X = dataset[['bedrooms', 'bath', 'size_sqft', 'professionally_managed', 'no_pet_allowed', 'suit_laundry', 'park_stall', 'available_now', 'amenities', 'brand_new','loc_vancouver', 'loc_burnaby', 'loc_richmond', 'loc_surrey', 'loc_newwest', 'loc_abbotsford', 'no_basement']] y = dataset['price'].values y = y.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, y,test_size = 0.2, random_state=42)
import nltk # Load NLTK import time from service.TextMiningService import TextMiningService from service.sklearnService import SkLearnService from service.propertyService import PropertyService # nltk.download() nltk.download('punkt') nltk.download('stopwords') nltk.download('wordnet') from db.postgresl import PropertyDAO from model.Property import Property propertyDao = PropertyDAO() propertyService = PropertyService() sklearnService = SkLearnService() textMiningService = TextMiningService() rows = propertyDao.getNoSuitLaundryRecords(40000) print("Records: ", len(rows)) records = [] for row in rows: records.append(Property(row[0], row[1].encode("utf-8"), row[2], row[3])) size = len(rows) count = 0 for property in records: sentences = propertyService.getSentences(str(property.description))
import nltk # Load NLTK import time from service.TextMiningService import TextMiningService from service.sklearnService import SkLearnService from service.propertyService import PropertyService # nltk.download() nltk.download('punkt') nltk.download('stopwords') nltk.download('wordnet') from db.postgresl import PropertyDAO from model.Property import Property propertyDao = PropertyDAO() propertyService = PropertyService() sklearnService = SkLearnService() textMiningService = TextMiningService() rows = propertyDao.getNoRoomSizeRecords(20000) print("Records: ", len(rows)) records = [] for row in rows: records.append(Property(row[0], row[1].encode("utf-8"), row[2], row[3])) size = len(rows) count = 0 for property in records: property = propertyService.tryGetBedroomFromDescription(property)
import nltk # Load NLTK #nltk modules nltk.download('punkt') nltk.download('stopwords') nltk.download('wordnet') #imports from service.TextMiningService import TextMiningService from service.sklearnService import SkLearnService from db.postgresl import PropertyDAO from model.Property import Property #data access object for properties stored in database propertyDao = PropertyDAO() #sklearn implemented services for our use case sklearnService = SkLearnService() #nltk implemented services for our use case textMiningService = TextMiningService() records = [] descriptions = []; rows = propertyDao.getRecords(1000); #fetch rows from database #rows = propertyDao.getRecord(6829944535); #fecth one record from database print("Records: ", len(rows)) #print amount of rows returned
from builtins import list from db.postgresl import PropertyDAO import matplotlib.pyplot as plt import pandas as pd # pip install tables import seaborn as sns property_dao = PropertyDAO() where = ' where bedrooms > 0 and bedrooms < 5 and size_sqft < 5000 and price < 6000 and bath < 5' properties = property_dao.getDataFrameRecords(where) print(properties.head()) print(list(properties)) #Bar Charts sns.distplot(properties['bedrooms'], bins=10, kde=False) plt.figure() sns.distplot(properties['bath'], bins=10, kde=False) plt.figure() sns.distplot(properties['size_sqft'], bins=10, kde=False) plt.figure() sns.distplot(properties['price'], bins=10, kde=False) plt.figure() #BoxPlots sns.boxplot( y=properties['bedrooms']) plt.figure() sns.boxplot( y=properties['bath'])
import time from service.TextMiningService import TextMiningService from service.sklearnService import SkLearnService from service.propertyService import PropertyService from collections import defaultdict # nltk.download() nltk.download('punkt') nltk.download('stopwords') nltk.download('wordnet') from db.postgresl import PropertyDAO from model.Property import Property propertyDao = PropertyDAO() propertyService = PropertyService() sklearnService = SkLearnService() textMiningService = TextMiningService() rows = propertyDao.getRecordsWithNoLocation() print("Records: ", len(rows)) records = [] for row in rows: records.append( Property(row[0], row[1].encode("utf-8"), row[2], row[3], row[4])) size = len(rows) count = 0
import pickle import statistics from sklearn import datasets # # imports datasets from scikit-learn from sklearn import metrics from db.postgresl import PropertyDAO import matplotlib.pyplot as plt import numpy as np import pandas as pd import statsmodels.api as sm print('Regression model Init') dataset = PropertyDAO().getDataFrameRecords( ' where bedrooms > 0 and bedrooms < 5 and size_sqft < 5000 and price < 6000 and bath < 5' ) print('Dataset Acquired: (', dataset.id.count(), ')') print(dataset.head()) print(list(dataset)) # old dataset # define the data/predictors as the pre-set feature names #df = dataset[['bedrooms', 'bath', 'size_sqft', 'professionally_managed', 'no_pet_allowed', 'suit_laundry', 'park_stall', 'available_now', 'amenities', 'brand_new']] # new dataset # define the data/predictors as the pre-set feature names df = dataset[[ 'bedrooms', 'bath', 'size_sqft', 'professionally_managed', 'no_pet_allowed', 'suit_laundry', 'park_stall', 'available_now', 'amenities', 'brand_new', 'loc_vancouver', 'loc_burnaby', 'loc_richmond', 'loc_surrey', 'loc_newwest', 'loc_abbotsford', 'no_basement'
import nltk # Load NLTK import time from service.TextMiningService import TextMiningService from service.sklearnService import SkLearnService from service.propertyService import PropertyService # nltk.download() nltk.download('punkt') nltk.download('stopwords') nltk.download('wordnet') from db.postgresl import PropertyDAO from model.Property import Property propertyDao = PropertyDAO() propertyService = PropertyService() sklearnService = SkLearnService() textMiningService = TextMiningService() rows = propertyDao.getNoBathRecords(20000); #rows = propertyDao.getRecord('6829979395'); print("Records: ", len(rows)) records = [] for row in rows: records.append(Property(row[0], row[1].encode("utf-8"), row[3], row[2])) size = len(rows) count = 0 for property in records: