Esempio n. 1
0
def main():
    train_flats = files.export_flats(config.TRAIN_FILE)
    test_flats = files.export_flats(config.TEST_FILE)
    train_flats_data = files.get_flats_data(train_flats)
    test_flats_data = files.get_flats_data(test_flats)
    classified_flats = classify_flats(train_flats_data)
    avg_vals = {}
    for k, v in classified_flats.items():
        avg_vals[k] = get_avg_class(v)
    labels, predictions = get_errors(test_flats_data, avg_vals)
    print(get_quality(labels, predictions))
Esempio n. 2
0
def main():
    # restore()
    flats = files.export_flats(config.FLATS_FILE)
    for flat in flats:
        if flat['rating'] != '0':
            flat['rating'] = '0'
    files.save_flats(flats, config.FLATS_FILE)
Esempio n. 3
0
def split_dataset():
    flats = files.export_flats(config.FLATS_FILE)
    full_flats, not_full_flats = [], []
    for flat in flats:
        if is_full_info(flat):
            full_flats.append(flat)
        else:
            not_full_flats.append(flat)
    return full_flats, not_full_flats
Esempio n. 4
0
def create_db(num):
    flats = files.export_flats(FLATS_FILE)
    flats_data = files.get_flats_data(flats)
    print('Creating DB...')

    users = []

    it = 0
    while True:
        user = generate_user()
        rs = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0}
        for flat_data in flats_data:
            res = rate(flat_data, user)
            rs[res] += 1
        std_dev = np.std(tuple(rs.values()))
        if std_dev < 400:
            users.append(user)
            it += 1
            print(f'Generating user {it}/{num}...')
        if it == num:
            break

    flats_users = {}
    flats_cnt = len(flats)
    flats_it = 1
    for flat in flats:
        flats_users[flat['link']] = {}
        flat_data = files.get_flat_data(flat)
        user_it = 1
        print(f'Rating {flats_it}/{flats_cnt}...')
        for user in users:
            user_name = 'user_{}'.format(user_it)
            r = str(rate(flat_data, user))
            flats_users[flat['link']][user_name] = r
            user_it += 1
        flats_it += 1

    # for k, v in flats_users.items():
    #     print(k, v)

    save_users(flats_users, USERS_FILE)
Esempio n. 5
0
from lightgbm import LGBMClassifier
import files
from config import *

import math
def get_quality_rmse(labels, predictions):
    sum = 0
    n = len(labels)
    for i in range(n):
        num = (predictions[i]-labels[i])**2
        sum += num/n
    return math.sqrt(sum)

train_flats = files.export_flats(TRAIN_FILE)
test_flats = files.export_flats(TEST_FILE)

train_flats_data = files.get_flats_data(train_flats)
test_flats_data = files.get_flats_data(test_flats)

def get_xy(flats_data, n = None):
    if n == None:
        n = len(flats_data)
    X = []
    Y = []
    cnt = 0
    for flat_data in flats_data:
        x = []
        y = flat_data['rating']
        for k, v in flat_data.items():
            if k != 'rating':
                x.append(v)
Esempio n. 6
0
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
from config import *
from files import export_flats, get_flats_data
from math import radians, cos, sin, acos
import numpy as np

model = RandomForestClassifier(n_estimators=21, oob_score=True, random_state=1)

train_flats = export_flats(TRAIN_FILE)
test_flats = export_flats(TEST_FILE)
train_flats_data = get_flats_data(train_flats)
test_flats_data = get_flats_data(test_flats)


def calculate_distance(location1, location2):
    radius = 6371
    lat1 = radians(location1[0])
    lng1 = radians(location1[1])
    lat2 = radians(location2[0])
    lng2 = radians(location2[1])
    distance = radius * acos(
        sin(lat1) * sin(lat2) + cos(lat1) * cos(lat2) * cos(lng1 - lng2))
    return distance


def get_center_distance(coordinates):
    town_center = (43.240544, 76.917604)
    dist = calculate_distance(coordinates, town_center)
    return dist
Esempio n. 7
0
def main():
    flats = files.export_flats(config.FLATS_FILE)
    train_flats = flats[:3000]
    test_flats = flats[3000:]
    files.save_flats(train_flats, config.TRAIN_FILE)
    files.save_flats(test_flats, config.TEST_FILE)