def main(): train_flats = files.export_flats(config.TRAIN_FILE) test_flats = files.export_flats(config.TEST_FILE) train_flats_data = files.get_flats_data(train_flats) test_flats_data = files.get_flats_data(test_flats) classified_flats = classify_flats(train_flats_data) avg_vals = {} for k, v in classified_flats.items(): avg_vals[k] = get_avg_class(v) labels, predictions = get_errors(test_flats_data, avg_vals) print(get_quality(labels, predictions))
def restore(): full_flats, not_full_flats = split_dataset() full_flats_data = files.get_flats_data(full_flats) not_full_flats_data = files.get_flats_data(not_full_flats) params = {} for flat in full_flats_data: for k, v in flat.items(): if k != 'rating': try: params[k].append(v) except: params[k] = [] params[k].append(v) it = 0 n = len(not_full_flats_data) for flat in not_full_flats: fl = files.get_flat_data(flat) it += 1 print(f'Flat {it} out of {n}') for k in fl: if k == 'floor' or k == 'condition' or k == 'house': if fl[k] == 0: pred = eucl_rest(full_flats_data, fl, k) if pred == None: pred = np.median(params[k]) elif pred > max(params[k]): pred = max(params[k]) fl[k] = pred if k == 'floor': flat[k] = str(pred) if k == 'condition': for k1, v1 in config.CONDITIONS.items(): if v1 == pred: flat[k] = k1 elif k == 'house': for k1, v1 in config.HOUSES.items(): if v1 == pred: flat[k] = k1 else: if fl['floors'] == 0: pred = cc_rest(fl, 'floors', params) if pred > max(params['floors']): pred = max(params['floors']) fl['floors'] = pred flat['floors'] = str(pred) if fl['balcony'] == 0: fl['balcony'] = 5 for k1, v1 in config.BALCONIES.items(): if v1 == 5: flat['balcony'] = k1 res = full_flats + not_full_flats files.save_flats(res, config.FLATS_FILE)
def create_db(num): flats = files.export_flats(FLATS_FILE) flats_data = files.get_flats_data(flats) print('Creating DB...') users = [] it = 0 while True: user = generate_user() rs = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0} for flat_data in flats_data: res = rate(flat_data, user) rs[res] += 1 std_dev = np.std(tuple(rs.values())) if std_dev < 400: users.append(user) it += 1 print(f'Generating user {it}/{num}...') if it == num: break flats_users = {} flats_cnt = len(flats) flats_it = 1 for flat in flats: flats_users[flat['link']] = {} flat_data = files.get_flat_data(flat) user_it = 1 print(f'Rating {flats_it}/{flats_cnt}...') for user in users: user_name = 'user_{}'.format(user_it) r = str(rate(flat_data, user)) flats_users[flat['link']][user_name] = r user_it += 1 flats_it += 1 # for k, v in flats_users.items(): # print(k, v) save_users(flats_users, USERS_FILE)
import files from config import * import math def get_quality_rmse(labels, predictions): sum = 0 n = len(labels) for i in range(n): num = (predictions[i]-labels[i])**2 sum += num/n return math.sqrt(sum) train_flats = files.export_flats(TRAIN_FILE) test_flats = files.export_flats(TEST_FILE) train_flats_data = files.get_flats_data(train_flats) test_flats_data = files.get_flats_data(test_flats) def get_xy(flats_data, n = None): if n == None: n = len(flats_data) X = [] Y = [] cnt = 0 for flat_data in flats_data: x = [] y = flat_data['rating'] for k, v in flat_data.items(): if k != 'rating': x.append(v) X.append(x)
from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import roc_auc_score from config import * from files import export_flats, get_flats_data from math import radians, cos, sin, acos import numpy as np model = RandomForestClassifier(n_estimators=21, oob_score=True, random_state=1) train_flats = export_flats(TRAIN_FILE) test_flats = export_flats(TEST_FILE) train_flats_data = get_flats_data(train_flats) test_flats_data = get_flats_data(test_flats) def calculate_distance(location1, location2): radius = 6371 lat1 = radians(location1[0]) lng1 = radians(location1[1]) lat2 = radians(location2[0]) lng2 = radians(location2[1]) distance = radius * acos( sin(lat1) * sin(lat2) + cos(lat1) * cos(lat2) * cos(lng1 - lng2)) return distance def get_center_distance(coordinates): town_center = (43.240544, 76.917604) dist = calculate_distance(coordinates, town_center) return dist