Esempio n. 1
0
def getProperty(prop_id):
    try:
        record = PropertyDAO().getRecord(prop_id)[0]
        property = Property(record[0],record[1],record[2],record[3])
        return property.getJson();
    except:
        return 'No record found!'
Esempio n. 2
0
import nltk  # Load NLTK
# nltk.download()

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

from db.postgresl import PropertyDAO
from model.Property import Property
from service.TextMiningService import TextMiningService

propertyDao = PropertyDAO()
textMiningService = TextMiningService()

rows = propertyDao.getRecords(500)
#rows = propertyDao.getRecord(6829944535);

print("Records: ", len(rows))
records = []

for row in rows:
    records.append(Property(row[0], row[1].encode("utf-8")))

text = ""

print("\nShow me the first 5 records: ", "\n")
for x in range(0, len(rows)):
    text += str(records[x].description) + "\n"
    if x < 5:
        print("Id: ", records[x].id, " Desc: ", records[x].description)
Esempio n. 3
0
def test():
    record = PropertyDAO().getRecord(6842453594)[0]
    property = Property(record[0],record[1],record[2],record[3])
    return property.getJson();
import nltk  # Load NLTK
import time
from service.TextMiningService import TextMiningService
from service.sklearnService import SkLearnService
from service.propertyService import PropertyService
# nltk.download()

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

from db.postgresl import PropertyDAO
from model.Property import Property

propertyDao = PropertyDAO()
propertyService = PropertyService()
sklearnService = SkLearnService()
textMiningService = TextMiningService()

rows = propertyDao.getRecords(20000)

print("Records: ", len(rows))
records = []

for row in rows:
    records.append(Property(row[0], row[1].encode("utf-8"), row[2], row[3]))

size = len(rows)
count = 0
for property in records:
    property = propertyService.populateRoomSize(property)
Esempio n. 5
0
import xgboost as xgb
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import pickle
import statistics

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from db.postgresl import PropertyDAO

filename = '../data/grad_model.sav'

dataset = PropertyDAO().getDataFrameRecords(' where bedrooms > 0 and bedrooms < 5 and size_sqft < 5000 and price < 6000 and bath < 5');
print('Dataset Acquired: (',dataset.id.count(),')')

#Create training and test datasets

# old dataset
#X = dataset[['bedrooms','bath','size_sqft','professionally_managed', 'no_pet_allowed', 'suit_laundry', 'park_stall', 'available_now', 'amenities', 'brand_new']]

# new dataset
X = dataset[['bedrooms', 'bath', 'size_sqft', 'professionally_managed', 'no_pet_allowed', 'suit_laundry', 'park_stall', 'available_now', 'amenities', 'brand_new','loc_vancouver', 'loc_burnaby', 'loc_richmond', 'loc_surrey', 'loc_newwest', 'loc_abbotsford', 'no_basement']]
y = dataset['price'].values
y = y.reshape(-1, 1)


X_train, X_test, y_train, y_test = train_test_split(X, y,test_size = 0.2, random_state=42)
import nltk  # Load NLTK
import time
from service.TextMiningService import TextMiningService
from service.sklearnService import SkLearnService
from service.propertyService import PropertyService
# nltk.download()

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

from db.postgresl import PropertyDAO
from model.Property import Property

propertyDao = PropertyDAO()
propertyService = PropertyService()
sklearnService = SkLearnService()
textMiningService = TextMiningService()

rows = propertyDao.getNoSuitLaundryRecords(40000)

print("Records: ", len(rows))
records = []

for row in rows:
    records.append(Property(row[0], row[1].encode("utf-8"), row[2], row[3]))

size = len(rows)
count = 0
for property in records:
    sentences = propertyService.getSentences(str(property.description))
import nltk  # Load NLTK
import time
from service.TextMiningService import TextMiningService
from service.sklearnService import SkLearnService
from service.propertyService import PropertyService
# nltk.download()

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

from db.postgresl import PropertyDAO
from model.Property import Property

propertyDao = PropertyDAO()
propertyService = PropertyService()
sklearnService = SkLearnService()
textMiningService = TextMiningService()

rows = propertyDao.getNoRoomSizeRecords(20000)

print("Records: ", len(rows))
records = []

for row in rows:
    records.append(Property(row[0], row[1].encode("utf-8"), row[2], row[3]))

size = len(rows)
count = 0
for property in records:
    property = propertyService.tryGetBedroomFromDescription(property)
Esempio n. 8
0
import nltk  # Load NLTK

#nltk modules
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')


#imports
from service.TextMiningService import TextMiningService
from service.sklearnService import SkLearnService
from db.postgresl import PropertyDAO
from model.Property import Property

#data access object for properties stored in database
propertyDao = PropertyDAO()

#sklearn implemented services for our use case
sklearnService = SkLearnService()

#nltk implemented services for our use case
textMiningService = TextMiningService()

records = []
descriptions = [];


rows = propertyDao.getRecords(1000); #fetch rows from database
#rows = propertyDao.getRecord(6829944535); #fecth one record from database

print("Records: ", len(rows)) #print amount of rows returned 
Esempio n. 9
0
from builtins import list

from db.postgresl import PropertyDAO
import matplotlib.pyplot as plt
import pandas as pd  # pip install tables
import seaborn as sns


property_dao = PropertyDAO()

where = ' where bedrooms > 0 and bedrooms < 5 and size_sqft < 5000 and price < 6000 and bath < 5'
properties = property_dao.getDataFrameRecords(where)


print(properties.head())
print(list(properties))

#Bar Charts    
sns.distplot(properties['bedrooms'], bins=10, kde=False)
plt.figure()
sns.distplot(properties['bath'], bins=10, kde=False)
plt.figure()
sns.distplot(properties['size_sqft'], bins=10, kde=False)
plt.figure()
sns.distplot(properties['price'], bins=10, kde=False)
plt.figure()

#BoxPlots
sns.boxplot( y=properties['bedrooms'])
plt.figure()
sns.boxplot( y=properties['bath'])
Esempio n. 10
0
import time
from service.TextMiningService import TextMiningService
from service.sklearnService import SkLearnService
from service.propertyService import PropertyService
from collections import defaultdict

# nltk.download()

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

from db.postgresl import PropertyDAO
from model.Property import Property

propertyDao = PropertyDAO()
propertyService = PropertyService()
sklearnService = SkLearnService()
textMiningService = TextMiningService()

rows = propertyDao.getRecordsWithNoLocation()

print("Records: ", len(rows))
records = []

for row in rows:
    records.append(
        Property(row[0], row[1].encode("utf-8"), row[2], row[3], row[4]))

size = len(rows)
count = 0
Esempio n. 11
0
import pickle
import statistics

from sklearn import datasets  # # imports datasets from scikit-learn
from sklearn import metrics

from db.postgresl import PropertyDAO
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm

print('Regression model Init')
dataset = PropertyDAO().getDataFrameRecords(
    ' where bedrooms > 0 and bedrooms < 5 and size_sqft < 5000 and price < 6000 and bath < 5'
)
print('Dataset Acquired: (', dataset.id.count(), ')')
print(dataset.head())
print(list(dataset))

# old dataset
# define the data/predictors as the pre-set feature names
#df = dataset[['bedrooms', 'bath', 'size_sqft', 'professionally_managed', 'no_pet_allowed', 'suit_laundry', 'park_stall', 'available_now', 'amenities', 'brand_new']]

# new dataset
# define the data/predictors as the pre-set feature names
df = dataset[[
    'bedrooms', 'bath', 'size_sqft', 'professionally_managed',
    'no_pet_allowed', 'suit_laundry', 'park_stall', 'available_now',
    'amenities', 'brand_new', 'loc_vancouver', 'loc_burnaby', 'loc_richmond',
    'loc_surrey', 'loc_newwest', 'loc_abbotsford', 'no_basement'
Esempio n. 12
0
import nltk  # Load NLTK
import time
from service.TextMiningService import TextMiningService
from service.sklearnService import SkLearnService
from service.propertyService import PropertyService
# nltk.download()

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

from db.postgresl import PropertyDAO
from model.Property import Property

propertyDao = PropertyDAO()
propertyService = PropertyService()
sklearnService = SkLearnService()
textMiningService = TextMiningService()

rows = propertyDao.getNoBathRecords(20000);
#rows = propertyDao.getRecord('6829979395');

print("Records: ", len(rows)) 
records = []

for row in rows:
    records.append(Property(row[0], row[1].encode("utf-8"), row[3], row[2]))

size = len(rows)
count = 0
for property in records: