Ejemplo n.º 1
0
import pandas as pd
import numpy as np
import datetime
import time
import math
import util

start = time.time()
sparse_dict = util.create_sparse_matrix("new.csv", replace=False)
print("time", time.time() - start)



Ejemplo n.º 2
0
import pandas as pd
import util

DATAFRAME_NAME = './new.csv'

df = util.create_sparse_matrix(DATAFRAME_NAME, replace=False).iloc[:2000,:]

n_users, n_movies = len(df.user_id.unique()), len(df.hotel_id.unique())
print(n_movies)
user_index_map = {ids:i for (ids, i) in zip(list(set(df['user_id'])),list(range(n_users)))}
df['user_id'] = df['user_id'].apply(lambda x: user_index_map[x])

hotel_index_map = {ids:i for (ids, i) in zip(list(set(df['hotel_id'])),list(range(n_movies)))}
df['hotel_id'] = df['hotel_id'].apply(lambda x: hotel_index_map[x])

df.to_csv("data.csv")
Ejemplo n.º 3
0
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt
import keras
import util
import pickle

import os

DATAFRAME_NAME = './new.csv'

df = util.create_sparse_matrix(DATAFRAME_NAME, replace=True)

n_users, n_movies = len(df.user_id.unique()), len(df.hotel_id.unique())
user_index_map = {
    ids: i
    for (ids, i) in zip(list(set(df['user_id'])), list(range(n_users)))
}
df['user_id'] = df['user_id'].apply(lambda x: user_index_map[x])

hotel_index_map = {
    ids: i
    for (ids, i) in zip(list(set(df['hotel_id'])), list(range(n_movies)))
}
df['hotel_id'] = df['hotel_id'].apply(lambda x: hotel_index_map[x])
print(df.head())
train, test = train_test_split(df, test_size=0.2)

with open('user_id_map.pkl', "wb") as handle: