dataset = MovieLens(path, model_name='all-MiniLM-L6-v2') data = dataset[0].to(device) # Add user node features for message passing: data['user'].x = torch.eye(data['user'].num_nodes, device=device) del data['user'].num_nodes # Add a reverse ('movie', 'rev_rates', 'user') relation for message passing: data = T.ToUndirected()(data) del data['movie', 'rev_rates', 'user'].edge_label # Remove "reverse" label. # Perform a link-level split into training, validation, and test edges: train_data, val_data, test_data = T.RandomLinkSplit( num_val=0.1, num_test=0.1, neg_sampling_ratio=0.0, edge_types=[('user', 'rates', 'movie')], rev_edge_types=[('movie', 'rev_rates', 'user')], )(data) # We have an unbalanced dataset with many labels for rating 3 and 4, and very # few for 0 and 1. Therefore we use a weighted MSE loss. if args.use_weighted_loss: weight = torch.bincount(train_data['user', 'movie'].edge_label) weight = weight.max() / weight else: weight = None def weighted_mse_loss(pred, target, weight=None): weight = 1. if weight is None else weight[target].to(pred.dtype)
parser.add_argument('--variational', action='store_true') parser.add_argument('--linear', action='store_true') parser.add_argument('--dataset', type=str, default='Cora', choices=['Cora', 'CiteSeer', 'PubMed']) parser.add_argument('--epochs', type=int, default=400) args = parser.parse_args() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') transform = T.Compose([ T.NormalizeFeatures(), T.ToDevice(device), T.RandomLinkSplit(num_val=0.05, num_test=0.1, is_undirected=True, split_labels=True, add_negative_train_samples=False), ]) path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'Planetoid') dataset = Planetoid(path, args.dataset, transform=transform) train_data, val_data, test_data = dataset[0] class GCNEncoder(torch.nn.Module): def __init__(self, in_channels, out_channels): super().__init__() self.conv1 = GCNConv(in_channels, 2 * out_channels) self.conv2 = GCNConv(2 * out_channels, out_channels) def forward(self, x, edge_index):
def _setup(args, device): if not args['hvg_file_path']: adata_hvg, adata_khvg, X_hvg, X_khvg = _prepare_training_data(args) else: assert args['khvg_file_path'] is not None adata_hvg = _load_separate_hvg(hvg_path=args['hvg_file_path']) adata_khvg = _load_separate_hvg(hvg_path=args['khvg_file_path']) if args['transpose_input']: print(f'Transposing input HVG file to {adata_hvg.shape[::-1]}...') adata_hvg = adata_hvg.copy().transpose() print(f'Transposing input KHVG file to {adata_khvg.shape[::-1]}...') adata_khvg = adata_khvg.copy().transpose() X_hvg = adata_hvg.X X_khvg = adata_khvg.X if not args['graph_file_path']: try: edgelist = _prepare_graphs(adata_khvg, X_khvg, args) except ValueError as ve: print() print(colored('Exception: ' + str(ve), 'red')) print('Might need to transpose input with the --transpose_input argument.') sys.exit(1) else: edgelist = _load_separate_graph_edgelist(args['graph_file_path']) num_nodes = X_hvg.shape[0] print(f'Number of nodes in graph: {num_nodes}.') edge_index = np.array(edgelist).astype(int).T edge_index = to_undirected(torch.from_numpy(edge_index).to(torch.long), num_nodes) scaler = MinMaxScaler() scaled_x = torch.from_numpy(scaler.fit_transform(X_hvg)) data_obj = Data(edge_index=edge_index, x=scaled_x) data_obj.num_nodes = X_hvg.shape[0] data_obj.train_mask = data_obj.val_mask = data_obj.test_mask = data_obj.y = None if (args['load_model_path'] is not None): print('Assuming loaded model is used for testing.') # PyTorch Geometric does not allow 0 training samples (all test), so we need to store all test data as 'training'. test_split = 0.0 val_split = 0.0 else: test_split = args['test_split'] val_split = args['val_split'] # Can set validation ratio try: add_negative_train_samples = args['load_model_path'] is not None transform = T.RandomLinkSplit(num_val=val_split, num_test=test_split, is_undirected=True, add_negative_train_samples=add_negative_train_samples, split_labels=True) train_data, val_data, test_data = transform(data_obj) except IndexError as ie: print() print(colored('Exception: ' + str(ie), 'red')) print('Might need to transpose input with the --transpose_input argument.') sys.exit(1) num_features = data_obj.num_features if args['graph_convolution'] in ['GAT', 'GATv2']: num_heads = {} if len(args['num_heads']) == 4: num_heads['first'] = args['num_heads'][0] num_heads['second'] = args['num_heads'][1] num_heads['mean'] = args['num_heads'][2] num_heads['std'] = args['num_heads'][3] elif len(args['num_heads']) == 5: num_heads['first'] = args['num_heads'][0] num_heads['second'] = args['num_heads'][1] num_heads['third'] = args['num_heads'][2] num_heads['mean'] = args['num_heads'][3] num_heads['std'] = args['num_heads'][4] encoder = CellVGAE_Encoder( in_channels=num_features, num_hidden_layers=args['num_hidden_layers'], num_heads=num_heads, hidden_dims=args['hidden_dims'], dropout=args['dropout'], latent_dim=args['latent_dim'], v2=args['graph_convolution'] == 'GATv2', concat={'first': True, 'second': True}) else: encoder = CellVGAE_GCNEncoder( in_channels=num_features, num_hidden_layers=args['num_hidden_layers'], hidden_dims=args['hidden_dims'], latent_dim=args['latent_dim']) model = CellVGAE(encoder=encoder, decoder_nn_dim1=args['decoder_nn_dim1'], gcn_or_gat=args['graph_convolution']) optimizer = torch.optim.Adam(model.parameters(), lr=args['lr']) model = model.to(device) return model, optimizer, train_data, val_data, test_data