u_features, v_features, adj_train, train_labels, train_u_indices, train_v_indices, \ val_labels, val_u_indices, val_v_indices, test_labels, \ test_u_indices, test_v_indices, class_values = create_trainvaltest_split(DATASET, DATASEED, TESTING, datasplit_path, SPLITFROMFILE, VERBOSE) num_users, num_items = adj_train.shape num_side_features = 0 # feature loading if not FEATURES: u_features = sp.identity(num_users, format='csr') v_features = sp.identity(num_items, format='csr') u_features, v_features = preprocess_user_item_features( u_features, v_features) elif FEATURES and u_features is not None and v_features is not None: # use features as side information and node_id's as node input features print("Normalizing feature vectors...") # liy - u_features 是个神马东西?user本身有age,zipcode ,occupation三个属性呢 # liy u_features 有23个属性,应该是把zipcode和occupation给one-hot离散化了 # liy -- 这个东西是对一个tuple的归一化,如果一个向量变成one-hot之后多个点都是1,这些点的权重也会减小 # 这个东西并不是对于一个属性的归一化,比如年龄神马的,不知道为什么要做这个操作,貌似是对属性稠密点和稀疏点做的一个归一化 u_features_side = normalize_features(u_features) v_features_side = normalize_features(v_features) #liy - 为什么要做这个hstack操作? u_features_side, v_features_side = preprocess_user_item_features(
val_labels, val_u_indices, val_v_indices, test_labels, \ test_u_indices, test_v_indices, class_values = create_trainvaltest_split(DATASET, DATASEED, TESTING, datasplit_path, SPLITFROMFILE, VERBOSE) # num_mini_batch = np.int(np.ceil(train_labels.shape[0]/float(BATCHSIZE))) num_mini_batch = train_labels.shape[0] // BATCHSIZE print('num mini batch = ', num_mini_batch) num_users, num_items = adj_train.shape # feature loading if not FEATURES: u_features = sp.identity(num_users, format='csr') v_features = sp.identity(num_items, format='csr') u_features, v_features = preprocess_user_item_features( u_features, v_features) else: raise ValueError('Features are not supported in this implementation.') # global normalization support = [] support_t = [] adj_train_int = sp.csr_matrix(adj_train, dtype=np.int32) for i in range(NUMCLASSES): # build individual binary rating matrices (supports) for each rating support_unnormalized = sp.csr_matrix(adj_train_int == i + 1, dtype=np.float32) support_unnormalized_transpose = support_unnormalized.T support.append(support_unnormalized) support_t.append(support_unnormalized_transpose)