def create_HinSAGE_model(graph, link_prediction=False): if link_prediction: generator = HinSAGELinkGenerator(graph, batch_size=2, num_samples=[2, 1]) edge_ids_train = np.array([[1, 2], [2, 3], [1, 3]]) train_gen = generator.flow(edge_ids_train, np.array([1, 1, 0])) else: generator = HinSAGENodeGenerator(graph, batch_size=2, num_samples=[2, 2]) train_gen = generator.flow([1, 2], np.array([[1, 0], [0, 1]])) base_model = HinSAGE(layer_sizes=[8, 8], generator=train_gen, bias=True, dropout=0.5) if link_prediction: # Define input and output sockets of hinsage: x_inp, x_out = base_model.default_model() # Final estimator layer prediction = link_regression(edge_embedding_method="ip")(x_out) else: x_inp, x_out = base_model.default_model(flatten_output=True) prediction = layers.Dense(units=2, activation="softmax")(x_out) keras_model = Model(inputs=x_inp, outputs=prediction) return base_model, keras_model, generator, train_gen
def train( self, layer_size, num_samples, train_size=0.7, batch_size: int = 200, num_epochs: int = 20, learning_rate=5e-3, dropout=0.0, use_bias=True, ): """ Build and train the HinSAGE model for link attribute prediction on the specified graph G with given parameters. Args: layer_size: a list of number of hidden nodes in each layer num_samples: number of neighbours to sample at each layer batch_size: size of mini batch num_epochs: number of epochs to train the model (epoch = all training batches are streamed through the model once) learning_rate: initial learning rate dropout: dropout probability in the range [0, 1) use_bias: tells whether to use a bias terms in HinSAGE model Returns: """ # Training and test edges edges = list(self.g.edges(data=True)) edges_train, edges_test = model_selection.train_test_split( edges, train_size=train_size) # Edgelists: edgelist_train = [(e[0], e[1]) for e in edges_train] edgelist_test = [(e[0], e[1]) for e in edges_test] labels_train = [e[2]["score"] for e in edges_train] labels_test = [e[2]["score"] for e in edges_test] # Our machine learning task of learning user-movie ratings can be framed as a supervised Link Attribute Inference: # given a graph of user-movie ratings, we train a model for rating prediction using the ratings edges_train, # and evaluate it using the test ratings edges_test. The model also requires the user-movie graph structure. # To proceed, we need to create a StellarGraph object from the ingested graph, for training the model: # When sampling the GraphSAGE subgraphs, we want to treat user-movie links as undirected self.g = sg.StellarGraph(self.g, node_features="feature") # Next, we create the link generators for preparing and streaming training and testing data to the model. # The mappers essentially sample k-hop subgraphs of G with randomly selected head nodes, as required by # the HinSAGE algorithm, and generate minibatches of those samples to be fed to the input layer of the HinSAGE model. generator = HinSAGELinkGenerator( self.g, batch_size, num_samples, ) train_gen = generator.flow(edgelist_train, labels_train) test_gen = generator.flow(edgelist_test, labels_test) # Build the model by stacking a two-layer HinSAGE model and a link regression layer on top. assert len(layer_size) == len( num_samples ), "layer_size and num_samples must be of the same length! Stopping." hinsage = HinSAGE(layer_sizes=layer_size, generator=train_gen, bias=use_bias, dropout=dropout) # Define input and output sockets of hinsage: x_inp, x_out = hinsage.default_model() # Final estimator layer score_prediction = link_regression( edge_embedding_method=args.edge_embedding_method)(x_out) # Create Keras model for training model = Model(inputs=x_inp, outputs=score_prediction) model.compile( optimizer=optimizers.Adam(lr=learning_rate), loss=losses.mean_squared_error, metrics=[root_mean_square_error, metrics.mae], ) # Train model print("Training the model for {} epochs with initial learning rate {}". format(num_epochs, learning_rate)) history = model.fit_generator( train_gen, validation_data=test_gen, epochs=num_epochs, verbose=2, shuffle=True, use_multiprocessing=True, workers=multiprocessing.cpu_count() // 2, ) # Evaluate and print metrics test_metrics = model.evaluate_generator(test_gen) print("Test Evaluation:") for name, val in zip(model.metrics_names, test_metrics): print("\t{}: {:0.4f}".format(name, val))