def get_data(): df = dataio.read_process("datasets/ml-20m/ratings.csv", sep=",") rows = len(df) df = df.iloc[np.random.permutation(rows)].reset_index(drop=True) split_index = int(rows * 0.9) df_train = df[0:split_index] df_test = df[split_index:].reset_index(drop=True) return df_train, df_test
def get_data(): df = dataio.read_process("./tmp/movielens/ml-1m/ratings.dat", sep="::") rows = len(df) df = df.iloc[np.random.permutation(rows)].reset_index(drop=True) split_index = int(rows * 0.9) df_train = df[0:split_index] df_test = df[split_index:].reset_index(drop=True) return df_train, df_test, rows
def get_data(): df = dataio.read_process(r"D:\Users\fuzzhang\software\tensorflow\TF_Recommend_Basic\TF_Recommend_Basic\TF-recomm\ratings.dat", sep="::") rows = len(df) df = df.iloc[np.random.permutation(rows)].reset_index(drop=True) split_index = int(rows * 0.9) df_train = df[0:split_index] df_test = df[split_index:].reset_index(drop=True) return df_train, df_test
def get_data(): df = dataio.read_process("/tmp/movielens/ml-1m/ratings.dat", sep="::") rows = len(df) df = df.iloc[np.random.permutation(rows)].reset_index(drop=True) split_index = int(rows * 0.9) df_train = df[0:split_index] df_test = df[split_index:].reset_index(drop=True) return df_train, df_test
def get_data(): df = dataio.read_process( "/Users/chengyao/Projects/netease_spider/music_data/records_movielens_like.csv", sep=",") rows = len(df) df = df.iloc[np.random.permutation(rows)].reset_index(drop=True) split_index = int(rows * 0.9) df_train = df[0:split_index] df_test = df[split_index:].reset_index(drop=True) return df_train, df_test
def get_data(): df = dataio.read_process("/tmp/movielens/ml-1m/ratings.dat", sep="::") wt = pd.read_csv("totalfortest2.csv") rows = len(df) #print(wt.shape) df = df.iloc[np.random.permutation(rows)].reset_index(drop=True) split_index = int(rows * 0.9) df_train = df[0:split_index] df_test = df[split_index:].reset_index(drop=True) return df_train, df_test, wt
def get_data(): df = dataio.read_process("/tmp/movielens/ml-1m/ratings.dat", sep="::") rows = len(df) df = df.iloc[np.random.permutation(rows)].reset_index(drop=True) split_index = int(rows * 0.9) df_train = df[0:split_index] df_test = df[split_index:].reset_index(drop=True) maxtime = max(df['st']) ut_mean = df.groupby(['user'])['st'].mean() return df_train, df_test, maxtime, ut_mean
def get_data(): df = dataio.read_process("data/user_basket_size.csv", sep=",") df['group_indicator'] = (df.ix[:, 0] != df.ix[:, 0].shift(-1)).astype(int) df_train = df.loc[df.group_indicator == 0] df_train = df_train.drop('group_indicator', axis=1) df_test = df.loc[df.group_indicator == 1] df_test = df_test.drop('group_indicator', axis=1) df = df.drop('group_indicator', axis=1) return df_train, df_test
def get_data(): df = dataio.read_process("/Users/xinghailong/Documents/workspace/my/DMInAction/src/tesnsorflow/recommend/ml-1m/test.dat", sep="::") rows = len(df) # 打乱顺序 # np.permutation(rows) 随机生成rows内的一维数组: # https://docs.scipy.org/doc/numpy/reference/generated/numpy.random.permutation.html # 并且重置索引 df = df.iloc[np.random.permutation(rows)].reset_index(drop=True) # 切分数据集,90%用于训练集,10%用于测试集 split_index = int(rows * 0.9) df_train = df[0:split_index] df_test = df[split_index:].reset_index(drop=True) return df_train, df_test
def get_data(): 'Grab data, compute implicit matrix, and do train-test split' # Grab data using dataio functions df = dataio.read_process( "../data_cleaning/data_for_CF/user_item_rating_fac.csv", sep=",") # Compute implicit matrix implicit_mat = df.pivot(index='user', columns='item', values='rate').notnull().as_matrix().astype(float) # Perform data shuffle rows = len(df) sample_index = np.random.permutation(rows) #df = df.iloc[sample_index].reset_index(drop=True) # Train-test split split_index = int(rows * 0.9) #df_train = df[0:split_index] df_train = df df_test = df[split_index:].reset_index(drop=True) return df_train, df_test, implicit_mat