예제 #1
0
def user_attributes(user_reviews):
    business_list = json_load.convert(rootdir, 'business.json')
    company_list = [key for key in user_reviews]
    attribute_dict = {}

    for business in business_list:
        if business['business_id'] in company_list:
            attributes = business['attributes']
            if attributes != None:
                for attribute in attributes:
                    if attributes[attribute] == 'True':
                        if attribute in attribute_dict:
                            attribute_dict[attribute] += 1
                        else:
                            attribute_dict[attribute] = 1

    for attribute in attribute_dict:
        attribute_dict[attribute] = (attribute_dict[attribute],
                                     attribute_dict[attribute] /
                                     len(user_reviews))

    return {
        k: v
        for k, v in sorted(
            attribute_dict.items(), reverse=True, key=lambda item: item[1][1])
    }
예제 #2
0
def attribute_matrix_nw(relevant_companies, user_attributes):
    matrix = pd.DataFrame(index=set(relevant_companies),
                          columns=list(user_attributes))
    business_list = json_load.convert(rootdir, 'business.json')

    for business in business_list:
        if business['business_id'] in matrix.index:
            for attribute in matrix.columns:
                matrix[attribute][business['business_id']] = 0
                if business['attributes'] != None:
                    if attribute in business['attributes']:
                        if business['attributes'][attribute] == 'True':
                            matrix[attribute][business['business_id']] = 1
    return matrix
예제 #3
0
def relevant_companies(user_reviews):
    business_list = json_load.convert(rootdir, 'business.json')
    company_list = []

    category_list = []
    for review in user_reviews:
        category_list.extend(user_reviews[review][1])
    category_set = set(category_list)

    for business in business_list:
        for category in category_set:
            if business["categories"] != None:
                if category in business["categories"]:
                    company_list.append(business["business_id"])

    return company_list
예제 #4
0
def user_reviews(user, data):
    review_list = data
    business_list = json_load.convert(rootdir, 'business.json')
    business_dict = {}

    for business in business_list:
        business_dict[business["business_id"]] = business["categories"]

    user_reviews = {
        review['business_id']: (review['stars'], [
            category.strip()
            for category in business_dict[review['business_id']].split(',')
        ])
        for review in review_list
        if review['user_id'] == user and review['stars'] >= 3
    }
    return user_reviews
예제 #5
0
def random_recomendations(company_dict, user_reviews):
    business_list = json_load.convert(rootdir, 'business.json')

    for company in business_list:
        if company['business_id'] in company_dict:
            if len(user_reviews) > 9:
                company_dict[company['business_id']] = (
                    company_dict[company['business_id']] *
                    0.7) * (company['stars'] * 0.3)
            elif len(user_reviews) > 4:
                company_dict[company['business_id']] = (
                    company_dict[company['business_id']] *
                    0.5) * (company['stars'] * 0.5)

    return_dict = {}
    for x in range(20):
        company = random.choice(list(company_dict))
        return_dict[company] = company_dict[company]

    return return_dict
예제 #6
0
def user_recomendations(company_dict, user_reviews):
    business_list = json_load.convert(rootdir, 'business.json')

    for company in business_list:
        if company['business_id'] in company_dict:
            if len(user_reviews) > 9:
                company_dict[company['business_id']] = (
                    company_dict[company['business_id']] *
                    0.7) * (company['stars'] * 0.3)
            elif len(user_reviews) > 4:
                company_dict[company['business_id']] = (
                    company_dict[company['business_id']] *
                    0.5) * (company['stars'] * 0.5)

    return dict(
        itertools.islice({
            k: v
            for k, v in sorted(
                company_dict.items(), reverse=True, key=lambda item: item[1])
        }.items(), 20))
예제 #7
0
def user_list():
    return [
        user['user_id'] for user in json_load.convert(rootdir, 'user.json')
    ]
예제 #8
0
import numpy as np
import json
import os
import json_load
import collections
import pandas as pd
import itertools
import random
import statistics
import helperfunctions as hf

rootdir = './data'

df_training, df_test = hf.split_data(
    pd.DataFrame(json_load.convert('./data', 'review.json')))
training_json = df_training.to_json(orient='records')
test_json = df_test.to_json(orient='records')
trainingdata = json.loads(training_json)
testdata = json.loads(test_json)

#print(training_json)


# Returns a list with user id's from the region
def user_list():
    return [
        user['user_id'] for user in json_load.convert(rootdir, 'user.json')
    ]


# Returns a dict with review scores from the user
예제 #9
0
import numpy as np
import matplotlib.pyplot as plt
import json
import os
import json_load

rootdir = './data'
file = 'user.json'

json_data = json_load.convert(rootdir, file)
review = {}

#Create feature plot
for data in json_data:
    key = round(data["average_stars"], 1)
    review[key] = data['useful']

lists = sorted(review.items())  # sorted by key, return a list of tuples

x, y = zip(*lists)  # unpack a list of pairs into two tuples
plt.plot(x, y)
plt.title('Ten cities\nUsefulness based on average stars\ntested on')
plt.xlabel('Average stars')
plt.ylabel('Noted as useful')
plt.savefig('./plots/user_stars_usefull.png')
plt.show()
예제 #10
0
userId = "t-nB38eHbeFuabXBdJMwvg"

skip = False
picklesFound = hf.checkForPickle('./pickles')
if(picklesFound == True):
    answer = input('Already found matrices. Do you want to use these? Type yes/no\n')
    if(answer.lower() == 'yes'):
        skip = True
    else:
        print('No pickle files found that contain matrices. Creating new matrices now..............')
        skip = False

df_training = None
df_test = None
if (skip == False):
    data = pd.DataFrame(json_load.convert('./data', 'review.json'))
    data.drop_duplicates(subset =["business_id","user_id"], keep = False, inplace = True)

    df_training, df_test = hf.split_data(data)
    df_training.to_pickle('./pickles/df_training.pkl')
    df_test.to_pickle('./pickles/df_test.pkl')

    print('trainingset' + str(len(df_training)))
    print('testset' + str(len(df_test)))

    #utility matrix:
    utility_review = hf.pivot_ratings(df_training)
    utility_review.to_pickle('./pickles/utility.pkl')

    #mean center utility matrix
    centered = hf.mean_center_columns(utility_review)