def main(): train_flats = files.export_flats(config.TRAIN_FILE) test_flats = files.export_flats(config.TEST_FILE) train_flats_data = files.get_flats_data(train_flats) test_flats_data = files.get_flats_data(test_flats) classified_flats = classify_flats(train_flats_data) avg_vals = {} for k, v in classified_flats.items(): avg_vals[k] = get_avg_class(v) labels, predictions = get_errors(test_flats_data, avg_vals) print(get_quality(labels, predictions))
def main(): # restore() flats = files.export_flats(config.FLATS_FILE) for flat in flats: if flat['rating'] != '0': flat['rating'] = '0' files.save_flats(flats, config.FLATS_FILE)
def split_dataset(): flats = files.export_flats(config.FLATS_FILE) full_flats, not_full_flats = [], [] for flat in flats: if is_full_info(flat): full_flats.append(flat) else: not_full_flats.append(flat) return full_flats, not_full_flats
def create_db(num): flats = files.export_flats(FLATS_FILE) flats_data = files.get_flats_data(flats) print('Creating DB...') users = [] it = 0 while True: user = generate_user() rs = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0} for flat_data in flats_data: res = rate(flat_data, user) rs[res] += 1 std_dev = np.std(tuple(rs.values())) if std_dev < 400: users.append(user) it += 1 print(f'Generating user {it}/{num}...') if it == num: break flats_users = {} flats_cnt = len(flats) flats_it = 1 for flat in flats: flats_users[flat['link']] = {} flat_data = files.get_flat_data(flat) user_it = 1 print(f'Rating {flats_it}/{flats_cnt}...') for user in users: user_name = 'user_{}'.format(user_it) r = str(rate(flat_data, user)) flats_users[flat['link']][user_name] = r user_it += 1 flats_it += 1 # for k, v in flats_users.items(): # print(k, v) save_users(flats_users, USERS_FILE)
from lightgbm import LGBMClassifier import files from config import * import math def get_quality_rmse(labels, predictions): sum = 0 n = len(labels) for i in range(n): num = (predictions[i]-labels[i])**2 sum += num/n return math.sqrt(sum) train_flats = files.export_flats(TRAIN_FILE) test_flats = files.export_flats(TEST_FILE) train_flats_data = files.get_flats_data(train_flats) test_flats_data = files.get_flats_data(test_flats) def get_xy(flats_data, n = None): if n == None: n = len(flats_data) X = [] Y = [] cnt = 0 for flat_data in flats_data: x = [] y = flat_data['rating'] for k, v in flat_data.items(): if k != 'rating': x.append(v)
from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import roc_auc_score from config import * from files import export_flats, get_flats_data from math import radians, cos, sin, acos import numpy as np model = RandomForestClassifier(n_estimators=21, oob_score=True, random_state=1) train_flats = export_flats(TRAIN_FILE) test_flats = export_flats(TEST_FILE) train_flats_data = get_flats_data(train_flats) test_flats_data = get_flats_data(test_flats) def calculate_distance(location1, location2): radius = 6371 lat1 = radians(location1[0]) lng1 = radians(location1[1]) lat2 = radians(location2[0]) lng2 = radians(location2[1]) distance = radius * acos( sin(lat1) * sin(lat2) + cos(lat1) * cos(lat2) * cos(lng1 - lng2)) return distance def get_center_distance(coordinates): town_center = (43.240544, 76.917604) dist = calculate_distance(coordinates, town_center) return dist
def main(): flats = files.export_flats(config.FLATS_FILE) train_flats = flats[:3000] test_flats = flats[3000:] files.save_flats(train_flats, config.TRAIN_FILE) files.save_flats(test_flats, config.TEST_FILE)