Esempio n. 1
0
def main():
    train_flats = files.export_flats(config.TRAIN_FILE)
    test_flats = files.export_flats(config.TEST_FILE)
    train_flats_data = files.get_flats_data(train_flats)
    test_flats_data = files.get_flats_data(test_flats)
    classified_flats = classify_flats(train_flats_data)
    avg_vals = {}
    for k, v in classified_flats.items():
        avg_vals[k] = get_avg_class(v)
    labels, predictions = get_errors(test_flats_data, avg_vals)
    print(get_quality(labels, predictions))
Esempio n. 2
0
def restore():
    full_flats, not_full_flats = split_dataset()
    full_flats_data = files.get_flats_data(full_flats)
    not_full_flats_data = files.get_flats_data(not_full_flats)

    params = {}
    for flat in full_flats_data:
        for k, v in flat.items():
            if k != 'rating':
                try:
                    params[k].append(v)
                except:
                    params[k] = []
                    params[k].append(v)
    it = 0
    n = len(not_full_flats_data)
    for flat in not_full_flats:
        fl = files.get_flat_data(flat)
        it += 1
        print(f'Flat {it} out of {n}')
        for k in fl:
            if k == 'floor' or k == 'condition' or k == 'house':
                if fl[k] == 0:
                    pred = eucl_rest(full_flats_data, fl, k)
                    if pred == None:
                        pred = np.median(params[k])
                    elif pred > max(params[k]):
                        pred = max(params[k])
                    fl[k] = pred
                    if k == 'floor':
                        flat[k] = str(pred)
                    if k == 'condition':
                        for k1, v1 in config.CONDITIONS.items():
                            if v1 == pred:
                                flat[k] = k1
                    elif k == 'house':
                        for k1, v1 in config.HOUSES.items():
                            if v1 == pred:
                                flat[k] = k1
            else:
                if fl['floors'] == 0:
                    pred = cc_rest(fl, 'floors', params)
                    if pred > max(params['floors']):
                        pred = max(params['floors'])
                    fl['floors'] = pred
                    flat['floors'] = str(pred)
                if fl['balcony'] == 0:
                    fl['balcony'] = 5
                    for k1, v1 in config.BALCONIES.items():
                        if v1 == 5:
                            flat['balcony'] = k1
    res = full_flats + not_full_flats
    files.save_flats(res, config.FLATS_FILE)
Esempio n. 3
0
def create_db(num):
    flats = files.export_flats(FLATS_FILE)
    flats_data = files.get_flats_data(flats)
    print('Creating DB...')

    users = []

    it = 0
    while True:
        user = generate_user()
        rs = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0}
        for flat_data in flats_data:
            res = rate(flat_data, user)
            rs[res] += 1
        std_dev = np.std(tuple(rs.values()))
        if std_dev < 400:
            users.append(user)
            it += 1
            print(f'Generating user {it}/{num}...')
        if it == num:
            break

    flats_users = {}
    flats_cnt = len(flats)
    flats_it = 1
    for flat in flats:
        flats_users[flat['link']] = {}
        flat_data = files.get_flat_data(flat)
        user_it = 1
        print(f'Rating {flats_it}/{flats_cnt}...')
        for user in users:
            user_name = 'user_{}'.format(user_it)
            r = str(rate(flat_data, user))
            flats_users[flat['link']][user_name] = r
            user_it += 1
        flats_it += 1

    # for k, v in flats_users.items():
    #     print(k, v)

    save_users(flats_users, USERS_FILE)
Esempio n. 4
0
import files
from config import *

import math
def get_quality_rmse(labels, predictions):
    sum = 0
    n = len(labels)
    for i in range(n):
        num = (predictions[i]-labels[i])**2
        sum += num/n
    return math.sqrt(sum)

train_flats = files.export_flats(TRAIN_FILE)
test_flats = files.export_flats(TEST_FILE)

train_flats_data = files.get_flats_data(train_flats)
test_flats_data = files.get_flats_data(test_flats)

def get_xy(flats_data, n = None):
    if n == None:
        n = len(flats_data)
    X = []
    Y = []
    cnt = 0
    for flat_data in flats_data:
        x = []
        y = flat_data['rating']
        for k, v in flat_data.items():
            if k != 'rating':
                x.append(v)
        X.append(x)
Esempio n. 5
0
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
from config import *
from files import export_flats, get_flats_data
from math import radians, cos, sin, acos
import numpy as np

model = RandomForestClassifier(n_estimators=21, oob_score=True, random_state=1)

train_flats = export_flats(TRAIN_FILE)
test_flats = export_flats(TEST_FILE)
train_flats_data = get_flats_data(train_flats)
test_flats_data = get_flats_data(test_flats)


def calculate_distance(location1, location2):
    radius = 6371
    lat1 = radians(location1[0])
    lng1 = radians(location1[1])
    lat2 = radians(location2[0])
    lng2 = radians(location2[1])
    distance = radius * acos(
        sin(lat1) * sin(lat2) + cos(lat1) * cos(lat2) * cos(lng1 - lng2))
    return distance


def get_center_distance(coordinates):
    town_center = (43.240544, 76.917604)
    dist = calculate_distance(coordinates, town_center)
    return dist