#!i/usr/bin/env python # -*- coding: utf-8 -*- from lib.ErogameScape import SqlExecuter import sklearn.decomposition import multiprocessing as mp import pandas as pd import sqlite3 import json def square_error(x, y): return ((x - y )**2).sum() def create_distance_ranking(x): return {x: [j[1] for j in sorted([(square_error(utility_matrix_pca.ix[x], utility_matrix_pca.ix[i]), i) for i in utility_matrix_pca.index if i != x])][:5]} review = SqlExecuter.execute("select uid,game,tokuten from userreview") for i in review[review['tokuten'] == ''].index: review['tokuten'][i] = None utility_matrix = review.pivot(index='game', columns='uid', values='tokuten') utility_matrix = utility_matrix[utility_matrix.columns[utility_matrix.notnull().sum() >= 20]] utility_matrix = utility_matrix.fillna(0) utility_matrix = utility_matrix.astype('int64') pca = sklearn.decomposition.PCA(100) pca.fit(utility_matrix.values) utility_matrix_pca = pca.transform(utility_matrix.values) utility_matrix_pca = pd.DataFrame(index=utility_matrix.index, data=utility_matrix_pca) p = mp.Pool() result = p.map(create_distance_ranking, utility_matrix_pca.index) recommendation = {k:v for dic in result for k,v in dic.items()}
def create_dmm_image_url(dmm, dmm_genre, dmm_genre_2): if dmm == "": return None if dmm_genre == "mono": genre = "mono" genre2 = "game" else: genre = "digital" genre2 = "game" if dmm_genre_2 == "doujin" else "pcgame" return "http://pics.dmm.co.jp/{0}/{1}/{2}/{2}ps.jpg".format(genre, genre2, dmm) game = SqlExecuter.execute( """ select game.id, game.gamename, game.dmm, game.dmm_genre, game.dmm_genre_2, amazon.asin, amazon.mediumimage as amazon_image_url from gamelist as game left join amazon_game on game.id = amazon_game.game left join amazonlist as amazon on amazon_game.asin = amazon.asin """ ) games = [] for key, row in game.iterrows(): games.append( ( row["id"], row["gamename"], row["asin"], row["amazon_image_url"], create_dmm_image_url(row["dmm"], row["dmm_genre"], row["dmm_genre_2"]), None, )