예제 #1
0
def get_data():
    df = dataio.read_process("datasets/ml-20m/ratings.csv", sep=",")
    rows = len(df)
    df = df.iloc[np.random.permutation(rows)].reset_index(drop=True)
    split_index = int(rows * 0.9)
    df_train = df[0:split_index]
    df_test = df[split_index:].reset_index(drop=True)
    return df_train, df_test
예제 #2
0
def get_data():
    df = dataio.read_process("./tmp/movielens/ml-1m/ratings.dat", sep="::")
    rows = len(df)
    df = df.iloc[np.random.permutation(rows)].reset_index(drop=True)
    split_index = int(rows * 0.9)
    df_train = df[0:split_index]
    df_test = df[split_index:].reset_index(drop=True)
    return df_train, df_test, rows
def get_data():
    df = dataio.read_process(r"D:\Users\fuzzhang\software\tensorflow\TF_Recommend_Basic\TF_Recommend_Basic\TF-recomm\ratings.dat", sep="::")
    rows = len(df)
    df = df.iloc[np.random.permutation(rows)].reset_index(drop=True)
    split_index = int(rows * 0.9)
    df_train = df[0:split_index]
    df_test = df[split_index:].reset_index(drop=True)
    return df_train, df_test
예제 #4
0
def get_data():
    df = dataio.read_process("/tmp/movielens/ml-1m/ratings.dat", sep="::")
    rows = len(df)
    df = df.iloc[np.random.permutation(rows)].reset_index(drop=True)
    split_index = int(rows * 0.9)
    df_train = df[0:split_index]
    df_test = df[split_index:].reset_index(drop=True)
    return df_train, df_test
예제 #5
0
def get_data():
    df = dataio.read_process(
        "/Users/chengyao/Projects/netease_spider/music_data/records_movielens_like.csv", sep=",")
    rows = len(df)
    df = df.iloc[np.random.permutation(rows)].reset_index(drop=True)
    split_index = int(rows * 0.9)
    df_train = df[0:split_index]
    df_test = df[split_index:].reset_index(drop=True)
    return df_train, df_test
예제 #6
0
def get_data():
    df = dataio.read_process("/tmp/movielens/ml-1m/ratings.dat", sep="::")
    wt = pd.read_csv("totalfortest2.csv")
    rows = len(df)
    #print(wt.shape)
    df = df.iloc[np.random.permutation(rows)].reset_index(drop=True)
    split_index = int(rows * 0.9)
    df_train = df[0:split_index]
    df_test = df[split_index:].reset_index(drop=True)
    return df_train, df_test, wt
예제 #7
0
def get_data():
    df = dataio.read_process("/tmp/movielens/ml-1m/ratings.dat", sep="::")
    rows = len(df)
    df = df.iloc[np.random.permutation(rows)].reset_index(drop=True)
    split_index = int(rows * 0.9)
    df_train = df[0:split_index]
    df_test = df[split_index:].reset_index(drop=True)
    maxtime = max(df['st'])
    ut_mean = df.groupby(['user'])['st'].mean()
    return df_train, df_test, maxtime, ut_mean
예제 #8
0
def get_data():
    df = dataio.read_process("data/user_basket_size.csv", sep=",")
    df['group_indicator'] = (df.ix[:, 0] != df.ix[:, 0].shift(-1)).astype(int)

    df_train = df.loc[df.group_indicator == 0]
    df_train = df_train.drop('group_indicator', axis=1)

    df_test = df.loc[df.group_indicator == 1]
    df_test = df_test.drop('group_indicator', axis=1)
    df = df.drop('group_indicator', axis=1)

    return df_train, df_test
예제 #9
0
def get_data():
    df = dataio.read_process("/Users/xinghailong/Documents/workspace/my/DMInAction/src/tesnsorflow/recommend/ml-1m/test.dat", sep="::")
    rows = len(df)
    # 打乱顺序
    # np.permutation(rows) 随机生成rows内的一维数组:
    # https://docs.scipy.org/doc/numpy/reference/generated/numpy.random.permutation.html
    # 并且重置索引
    df = df.iloc[np.random.permutation(rows)].reset_index(drop=True)
    # 切分数据集,90%用于训练集,10%用于测试集
    split_index = int(rows * 0.9)
    df_train = df[0:split_index]
    df_test = df[split_index:].reset_index(drop=True)
    return df_train, df_test
예제 #10
0
def get_data():
    'Grab data, compute implicit matrix, and do train-test split'

    # Grab data using dataio functions
    df = dataio.read_process(
        "../data_cleaning/data_for_CF/user_item_rating_fac.csv", sep=",")

    # Compute implicit matrix
    implicit_mat = df.pivot(index='user', columns='item',
                            values='rate').notnull().as_matrix().astype(float)

    # Perform data shuffle
    rows = len(df)
    sample_index = np.random.permutation(rows)
    #df = df.iloc[sample_index].reset_index(drop=True)

    # Train-test split
    split_index = int(rows * 0.9)
    #df_train = df[0:split_index]
    df_train = df
    df_test = df[split_index:].reset_index(drop=True)

    return df_train, df_test, implicit_mat