def main(): inventory = [] huur, verhuurd, koop, verkocht = get_url_list.get_aparts(60, 230, 390, 760) url_list = [] #Compile the four lists into one for item in huur: url_list.append([item, "huur"]) for item in verhuurd: url_list.append([item, "verhurrd"]) for item in koop: url_list.append([item, "koop"]) for item in verkocht: url_list.append([item, "verkocht"]) print "Done getting all the urls!" new_url_list = get_chunks(url_list, 1000) threads = [] for i in range(len(new_url_list)): t = threading.Thread(target = get_inventory, args=(inventory, new_url_list[i], )) threads.append(t) t.start() for t in threads: t.join() print "Done getting all the features!" threading_output = unicodecsv.writer(open("FundaInventoryLatestExpanded.csv", "wb"), encoding='utf-8', delimiter='|') sample = get_features.get_features(sample_url) sample["type"] = "sample" expand_features.expand_features(sample) threading_output.writerow(sample.keys()) for row in inventory: threading_output.writerow(row.values()) print "Done writing output file!"
def get_inventory(inventory, url_list): # failed_urls = unicodecsv.writer(open("failed_urls_all_threading_20150516.csv", "wb"), encoding='utf-8', delimiter='|') count = 0 #Loop through all huur and verhuurd urls for row in url_list: try: apart = get_features.get_features(row[0]) apart["type"] = row[1] expand_features.expand_features(apart) inventory.append(apart) except: print "error url:" + row[0] print sys.exc_info() # failed_urls.writerow([row[0], sys.exc_info()]) count += 1 if count % 100 == 0: print "We have gathered properties: " + str(count) time.sleep(2)
from load_data import load_data from write_submission import write_submission import numpy as np #from matplotlib import pyplot as plt from expand_features import expand_features def rmse(predictions, targets): return np.sqrt(((predictions - targets)**2).mean()) # load data [Xtr, Ytr, Xte, testID] = load_data() # expand features Xtr_expanded = expand_features(Xtr) Xte_expanded = expand_features(Xte) print('Xtr shape', Xtr_expanded.shape, 'Ytr shape', Ytr.shape, 'Xte shape', Xte.shape) clf = linear_model.RidgeCV(alphas=[1e-3, 1e-2, 1e-1], normalize=True, store_cv_values=True).fit(Xtr_expanded, Ytr) ridge_preds = clf.predict(Xtr_expanded) print('RMSE Ridge:', rmse(ridge_preds, Ytr)) print(clf.alpha_) # clf = kernel_ridge.KernelRidge(alpha=0.01) # clf.fit(Xtr_expanded, Ytr) # kridge_preds = clf.predict(Xtr_expanded)
import csv import get_features import expand_features import unicodecsv sample_url = "http://www.funda.nl/koop/amsterdam/appartement-49453167-van-boetzelaerstraat-34-2/" header_output = unicodecsv.writer(open("expanded_headers.csv", "wb"), encoding='utf-8', delimiter='|') sample = get_features.get_features(sample_url) sample["type"] = "sample" expand_features.expand_features(sample) header_output.writerow(sample.keys())