Esempio n. 1
0
    u_features, v_features, adj_train, train_labels, train_u_indices, train_v_indices, \
        val_labels, val_u_indices, val_v_indices, test_labels, \
        test_u_indices, test_v_indices, class_values = create_trainvaltest_split(DATASET, DATASEED, TESTING,
                                                                                 datasplit_path, SPLITFROMFILE,
                                                                                 VERBOSE)

num_users, num_items = adj_train.shape

num_side_features = 0

# feature loading
if not FEATURES:
    u_features = sp.identity(num_users, format='csr')
    v_features = sp.identity(num_items, format='csr')

    u_features, v_features = preprocess_user_item_features(
        u_features, v_features)

elif FEATURES and u_features is not None and v_features is not None:
    # use features as side information and node_id's as node input features

    print("Normalizing feature vectors...")
    # liy - u_features 是个神马东西?user本身有age,zipcode ,occupation三个属性呢
    # liy u_features 有23个属性,应该是把zipcode和occupation给one-hot离散化了

    # liy -- 这个东西是对一个tuple的归一化,如果一个向量变成one-hot之后多个点都是1,这些点的权重也会减小
    # 这个东西并不是对于一个属性的归一化,比如年龄神马的,不知道为什么要做这个操作,貌似是对属性稠密点和稀疏点做的一个归一化
    u_features_side = normalize_features(u_features)
    v_features_side = normalize_features(v_features)

    #liy - 为什么要做这个hstack操作?
    u_features_side, v_features_side = preprocess_user_item_features(
Esempio n. 2
0
val_labels, val_u_indices, val_v_indices, test_labels, \
test_u_indices, test_v_indices, class_values = create_trainvaltest_split(DATASET, DATASEED, TESTING,
                                                                         datasplit_path, SPLITFROMFILE, VERBOSE)

# num_mini_batch = np.int(np.ceil(train_labels.shape[0]/float(BATCHSIZE)))
num_mini_batch = train_labels.shape[0] // BATCHSIZE
print('num mini batch = ', num_mini_batch)

num_users, num_items = adj_train.shape

# feature loading
if not FEATURES:
    u_features = sp.identity(num_users, format='csr')
    v_features = sp.identity(num_items, format='csr')

    u_features, v_features = preprocess_user_item_features(
        u_features, v_features)

else:
    raise ValueError('Features are not supported in this implementation.')

# global normalization
support = []
support_t = []
adj_train_int = sp.csr_matrix(adj_train, dtype=np.int32)
for i in range(NUMCLASSES):
    # build individual binary rating matrices (supports) for each rating
    support_unnormalized = sp.csr_matrix(adj_train_int == i + 1,
                                         dtype=np.float32)
    support_unnormalized_transpose = support_unnormalized.T
    support.append(support_unnormalized)
    support_t.append(support_unnormalized_transpose)