Python HinSAGENodeGenerator 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: stellargraph.mapper

클래스/타입: HinSAGENodeGenerator

hotexamples.com에서의 예제들: 6

Python HinSAGENodeGenerator - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 stellargraph.mapper.HinSAGENodeGenerator에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

HinSAGENodeGenerator(6)

flow(4)

flow_from_dataframe(1)

예제 #1

파일 보기

    def inductive_step_hinsage(self, S, trained_model,
                               inductive_node_identifiers, batch_size):
        """
        
        This function generates embeddings for unseen nodes using a trained hinsage model.
        It returns the embeddings for these unseen nodes. 
        
        Parameters
        ----------
        S : StellarGraph Object
            The graph on which HinSAGE is deployed.
        trained_model : Neural Network
            The trained hinsage model, containing the trained and optimized aggregation functions per depth.
        inductive_node_identifiers : list
            Defines the nodes that HinSAGE needs to generate embeddings for
        batch_size: int
            batch size for the neural network in which HinSAGE is implemented.

        """

        # The mapper feeds data from sampled subgraph to HinSAGE model
        generator = HinSAGENodeGenerator(
            S,
            batch_size,
            self.num_samples,
            head_node_type=self.embedding_for_node_type)
        test_gen_not_shuffled = generator.flow(inductive_node_identifiers,
                                               shuffle=False)

        inductive_emb = trained_model.predict(test_gen_not_shuffled, verbose=1)
        inductive_emb = pd.DataFrame(inductive_emb,
                                     index=inductive_node_identifiers)

        return inductive_emb

예제 #2

파일 보기

파일: HinSAGE.py 프로젝트: HendrikTytgat/KU_LEUVEN

    def train_hinsage(self, S, node_identifiers, label, batch_size, epochs):

        """
        
        This function trains a HinSAGE model, implemented in Tensorflow.
        It returns the trained HinSAGE model and a pandas datarame containing the embeddings generated for the train nodes.
        
        Parameters
        ----------
        S : StellarGraph Object
            The graph on which HinSAGE trains its aggregator functions.
        node_identifiers : list
            Defines the nodes that HinSAGE uses to train its aggregation functions.
        label: Pandas dataframe
            Defines the label of the nodes used for training, with the index representing the nodes.
        batch_size: int
            batch size to train the neural network in which HinSAGE is implemented.
        epochs: int
            Number of epochs for the neural network.
        
        """
        # The mapper feeds data from sampled subgraph to GraphSAGE model
        train_node_identifiers = node_identifiers[:round(0.8*len(node_identifiers))]
        train_labels = label.loc[train_node_identifiers]
        validation_node_identifiers = node_identifiers[round(0.8*len(node_identifiers)):]
        validation_labels = label.loc[validation_node_identifiers]
        generator = HinSAGENodeGenerator(S, batch_size, self.num_samples, head_node_type= self.embedding_for_node_type)
        train_gen = generator.flow(train_node_identifiers, train_labels, shuffle=True)
        test_gen = generator.flow(validation_node_identifiers, validation_labels)

        # HinSAGE model
        model = HinSAGE(layer_sizes=[self.embedding_size]*len(self.num_samples), generator=generator, dropout=0)
        x_inp, x_out = model.build()
        
        # Final estimator layer
        prediction = layers.Dense(units=1, activation="sigmoid", dtype='float32')(x_out)
        
        # Create Keras model for training
        model = Model(inputs=x_inp, outputs=prediction)
        model.compile(
        optimizer=optimizers.Adam(lr=1e-3),
             loss=binary_crossentropy,
            )
        
        # Train Model
        model.fit(
        train_gen, epochs=epochs, verbose=1, validation_data=test_gen, shuffle=False
        )
 
        trained_model = Model(inputs=x_inp, outputs=x_out)
        train_gen_not_shuffled = generator.flow( node_identifiers, label, shuffle=False)
        embeddings_train = trained_model.predict(train_gen_not_shuffled)

        train_emb = pd.DataFrame(embeddings_train,  index=node_identifiers)
    
        return trained_model, train_emb

예제 #3

파일 보기

def create_HinSAGE_model(graph, link_prediction=False):

    if link_prediction:
        generator = HinSAGELinkGenerator(graph,
                                         batch_size=2,
                                         num_samples=[2, 1])
        edge_ids_train = np.array([[1, 2], [2, 3], [1, 3]])
        train_gen = generator.flow(edge_ids_train, np.array([1, 1, 0]))
    else:
        generator = HinSAGENodeGenerator(graph,
                                         batch_size=2,
                                         num_samples=[2, 2])
        train_gen = generator.flow([1, 2], np.array([[1, 0], [0, 1]]))

    base_model = HinSAGE(layer_sizes=[8, 8],
                         generator=train_gen,
                         bias=True,
                         dropout=0.5)

    if link_prediction:
        # Define input and output sockets of hinsage:
        x_inp, x_out = base_model.build()

        # Final estimator layer
        prediction = link_regression(edge_embedding_method="ip")(x_out)
    else:
        x_inp, x_out = base_model.build()
        prediction = layers.Dense(units=2, activation="softmax")(x_out)

    keras_model = Model(inputs=x_inp, outputs=prediction)

    return base_model, keras_model, generator, train_gen

예제 #4

파일 보기

파일: yelp-example.py 프로젝트: zoahmed97/stellargraph

def train(
    G,
    user_targets,
    layer_size,
    num_samples,
    batch_size,
    num_epochs,
    learning_rate,
    dropout,
):
    """
    Train a HinSAGE model on the specified graph G with given parameters.

    Args:
        G: A StellarGraph object ready for machine learning
        layer_size: A list of number of hidden nodes in each layer
        num_samples: Number of neighbours to sample at each layer
        batch_size: Size of batch for inference
        num_epochs: Number of epochs to train the model
        learning_rate: Initial Learning rate
        dropout: The dropout (0->1)
    """
    print(G.info())

    # Split "user" nodes into train/test
    # Split nodes into train/test using stratification.
    train_targets, test_targets = model_selection.train_test_split(
        user_targets, train_size=0.25, test_size=None
    )

    print("Train targets:\n", train_targets.iloc[:, 0].value_counts())
    print("Test targets:\n", test_targets.iloc[:, 0].value_counts())

    # The mapper feeds data from sampled subgraph to GraphSAGE model
    generator = HinSAGENodeGenerator(G, batch_size, num_samples, head_node_type="user")
    train_gen = generator.flow_from_dataframe(train_targets, shuffle=True)
    test_gen = generator.flow_from_dataframe(test_targets)

    # GraphSAGE model
    model = HinSAGE(layer_sizes=layer_size, generator=generator, dropout=dropout)
    x_inp, x_out = model.build()

    # Final estimator layer
    prediction = layers.Dense(units=train_targets.shape[1], activation="softmax")(x_out)

    # The elite label is only true for a small fraction of the total users,
    # so weight the training loss to ensure that model learns to predict
    # the positive class.
    # class_count = train_targets.values.sum(axis=0)
    # weights = class_count.sum()/class_count
    weights = [0.01, 1.0]
    print("Weighting loss by: {}".format(weights))

    # Create Keras model for training
    model = keras.Model(inputs=x_inp, outputs=prediction)
    model.compile(
        optimizer=optimizers.Adam(lr=learning_rate),
        loss=weighted_binary_crossentropy(weights),
        metrics=[metrics.binary_accuracy],
    )

    # Train model
    history = model.fit_generator(
        train_gen, epochs=num_epochs, verbose=2, shuffle=False
    )

    # Evaluate on test set and print metrics
    predictions = model.predict_generator(test_gen)
    binary_predictions = predictions[:, 1] > 0.5
    print("\nTest Set Metrics (on {} nodes)".format(len(predictions)))

    # Calculate metrics using Scikit-Learn
    cm = sk_metrics.confusion_matrix(test_targets.iloc[:, 1], binary_predictions)
    print("Confusion matrix:")
    print(cm)

    accuracy = sk_metrics.accuracy_score(test_targets.iloc[:, 1], binary_predictions)
    precision = sk_metrics.precision_score(test_targets.iloc[:, 1], binary_predictions)
    recall = sk_metrics.recall_score(test_targets.iloc[:, 1], binary_predictions)
    f1 = sk_metrics.f1_score(test_targets.iloc[:, 1], binary_predictions)
    roc_auc = sk_metrics.roc_auc_score(test_targets.iloc[:, 1], binary_predictions)

    print(
        "accuracy = {:0.3}, precision = {:0.3}, recall = {:0.3}, f1 = {:0.3}".format(
            accuracy, precision, recall, f1
        )
    )
    print("ROC AUC = {:0.3}".format(roc_auc))

    # Save model
    save_str = "_n{}_l{}_d{}_r{}".format(
        "_".join([str(x) for x in num_samples]),
        "_".join([str(x) for x in layer_size]),
        dropout,
        learning_rate,
    )
    model.save("yelp_model" + save_str + ".h5")

예제 #5

파일 보기

    def create_embeddings(node_type, num_samples, hinsage_layer_sizes, epochs,
                          patience, batch_size, dropout, activations):

        # Check if num_samples and layer_size are compatible
        assert len(hinsage_layer_sizes) == len(num_samples)

        generator = HinSAGENodeGenerator(G,
                                         batch_size,
                                         num_samples=num_samples,
                                         head_node_type=node_type)

        # HinSAGE layers
        hinsage = HinSAGE(layer_sizes=hinsage_layer_sizes,
                          activations=activations,
                          generator=generator,
                          bias=True,
                          normalize="l2",
                          dropout=dropout)

        def run_deep_graph_infomax(base_model, generator, epochs, node_type):
            corrupted_generator = CorruptedGenerator(generator)
            gen = corrupted_generator.flow(G.nodes(node_type=node_type))
            infomax = DeepGraphInfomax(base_model, corrupted_generator)

            x_in, x_out = infomax.in_out_tensors()

            print("Starting Training")
            ttrain = time.time()
            # Train
            model = Model(inputs=x_in, outputs=x_out)
            model.compile(loss=tf.nn.sigmoid_cross_entropy_with_logits,
                          optimizer=Adam(lr=1e-3))
            es = EarlyStopping(monitor="loss", min_delta=0, patience=patience)

            history = model.fit(gen,
                                epochs=epochs,
                                verbose=verbose,
                                callbacks=[es])
            # sg.utils.plot_history(history)

            ttrain1 = time.time()
            print(
                f"Training complete in {(ttrain1-ttrain):.2f} s ({(ttrain1-ttrain)/60:.2f} min)"
            )

            x_emb_in, x_emb_out = base_model.in_out_tensors()
            # for full batch models, squeeze out the batch dim (which is 1)
            if generator.num_batch_dims() == 2:
                x_emb_out = tf.squeeze(x_emb_out, axis=0)

            return x_emb_in, x_emb_out

        # Run Deep Graph Infomax
        x_emb_in, x_emb_out = run_deep_graph_infomax(hinsage,
                                                     generator,
                                                     epochs=epochs,
                                                     node_type=node_type)

        emb_model = Model(inputs=x_emb_in, outputs=x_emb_out)
        all_embeddings = emb_model.predict(
            generator.flow(G.nodes(node_type=node_type)))

        # TSNE visualization of embeddings
        ttsne = time.time()
        print("Creating TSNE")
        embeddings_2d = pd.DataFrame(
            TSNE(n_components=2).fit_transform(all_embeddings),
            index=G.nodes(node_type=node_type))

        # draw the points (colors based on ExtendedCaseGraphID)
        node_ids = G.nodes(node_type=node_type).tolist()
        ext_targets = v_sample.loc[[int(node_id) for node_id in node_ids
                                    ]].ExtendedCaseGraphID

        label_map = {
            l: i * 10
            for i, l in enumerate(np.unique(ext_targets), start=10)
            if pd.notna(l)
        }
        node_colours = [
            label_map[target] if pd.notna(target) else 0
            for target in ext_targets
        ]

        ttsne1 = time.time()
        print(
            f"TSNE completed in {(ttsne1-ttsne):.2f} s ({(ttsne1-ttsne)/60:.2f} min)"
        )

        alpha = 0.7
        fig, ax = plt.subplots(figsize=(15, 15))
        ax.scatter(
            embeddings_2d[0],
            embeddings_2d[1],
            c=node_colours,
            cmap="jet",
            alpha=alpha,
        )
        ax.set(aspect="equal")
        plt.title(
            f'TSNE visualization of HinSAGE "{node_type}" embeddings with Deep Graph Infomax'
        )
        plt.savefig(f"./embeddings/HinSAGE_DGI_embeddings_{node_type}.pdf")

        return all_embeddings, embeddings_2d

예제 #6

파일 보기

    def run_for_node_type(v_type, hinsage_layer_sizes, num_samples,
                          activations, epochs):
        nan_tflag = data_splits[v_type].iloc[0].values[0]
        train_tflag = data_splits[v_type].iloc[1].values[0]
        test_tflag = data_splits[v_type].iloc[2].values[0]

        train_cv_set = v_sets[v_type][nan_tflag:nan_tflag + train_tflag]
        train_cv_ids = train_cv_set.index.values.tolist()
        train_cv_labels = v_data.loc[[
            int(node_id) for node_id in train_cv_ids
        ]].ExtendedCaseGraphID

        test_set = v_sets[v_type][-test_tflag:]
        test_ids = test_set.index.values.tolist()

        generator = HinSAGENodeGenerator(G,
                                         batch_size,
                                         num_samples,
                                         head_node_type=v_type)

        hinsage = HinSAGE(layer_sizes=hinsage_layer_sizes,
                          activations=activations,
                          generator=generator,
                          bias=True,
                          normalize="l2",
                          dropout=dropout)

        def run_deep_graph_infomax(base_model, generator, epochs):
            print(f"Starting training for {v_type} type: ")
            t0 = time.time()
            corrupted_generator = CorruptedGenerator(generator)
            gen = corrupted_generator.flow(G.nodes(node_type=v_type))
            infomax = DeepGraphInfomax(base_model, corrupted_generator)

            x_in, x_out = infomax.in_out_tensors()

            # Train with DGI
            model = Model(inputs=x_in, outputs=x_out)
            model.compile(loss=tf.nn.sigmoid_cross_entropy_with_logits,
                          optimizer=Adam(lr=1e-3))
            es = EarlyStopping(monitor="loss", min_delta=0, patience=10)
            history = model.fit(gen,
                                epochs=epochs,
                                verbose=verbose,
                                callbacks=[es])
            #sg.utils.plot_history(history)

            x_emb_in, x_emb_out = base_model.in_out_tensors()
            if generator.num_batch_dims() == 2:
                x_emb_out = tf.squeeze(x_emb_out, axis=0)

            t1 = time.time()
            print(f'Time required: {t1-t0:.2f} s ({(t1-t0)/60:.1f} min)')

            return x_emb_in, x_emb_out, model

        #? Train HinSAGE model:
        x_emb_in, x_emb_out, _model = run_deep_graph_infomax(hinsage,
                                                             generator,
                                                             epochs=epochs)

        emb_model = Model(inputs=x_emb_in, outputs=x_emb_out)

        train_cv_embs = emb_model.predict(
            generator.flow(train_cv_set.index.values))

        #? Optional: Plot embeddings of training and CV set of current node type
        if (visualize == True):
            train_cv_embs_2d = pd.DataFrame(
                TSNE(n_components=2).fit_transform(train_cv_embs),
                index=train_cv_set.index.values)
            label_map = {
                l: i * 10
                for i, l in enumerate(np.unique(train_cv_labels), start=10)
                if pd.notna(l)
            }
            node_colours = [
                label_map[target] if pd.notna(target) else 0
                for target in train_cv_labels
            ]

            alpha = 0.7
            fig, ax = plt.subplots(figsize=(15, 15))
            ax.scatter(
                train_cv_embs_2d[0],
                train_cv_embs_2d[1],
                c=node_colours,
                cmap="jet",
                alpha=alpha,
            )
            ax.set(aspect="equal")
            plt.title(
                f"TSNE of HinSAGE {v_type} embeddings with DGI- coloring on ExtendedCaseGraphID"
            )
            plt.show()

            return 1

        #? Split training and cross valuation set using 80% 20% simple ordered split
        n_embs = train_cv_embs.shape[0]
        train_size = int(n_embs * 0.80)
        cv_size = int(n_embs * 0.20)

        train_set = train_cv_embs[:train_size]
        train_labels = np.ravel(
            pd.DataFrame(train_cv_labels.values[:train_size]).fillna(0))

        cv_set = train_cv_embs[-cv_size:]
        cv_labels = np.ravel(
            pd.DataFrame(train_cv_labels.values[-cv_size:]).fillna(0))

        #? CLASSIFY
        print(f"Running Classifier for {v_type} type")
        classifier = DecisionTreeClassifier()
        classifier.fit(
            X=train_set,
            y=train_labels,
        )
        cv_pred = classifier.predict(cv_set)
        f1_avg = f1_score(cv_labels, cv_pred, average='weighted')
        acc = (cv_pred == cv_labels).mean()
        print(f"{v_type} CV Metrics: f1: {f1_avg:.6f} - acc: {acc:.6f}")

        #? Now Run on test set
        test_embs = emb_model.predict(generator.flow(test_set.index.values))
        test_pred = classifier.predict(test_embs)

        #? Save predictions
        outdir = './output'
        outname = f"{v_type}_predictions.csv"
        if not os.path.exists(outdir):
            os.mkdir(outdir)
        fullname = os.path.join(outdir, outname)

        output = pd.DataFrame(test_ids)
        output = output.rename(columns={0: 'node_id'})
        output['ExtendedCaseGraphID'] = test_pred
        output = output.set_index('node_id')

        output.to_csv(fullname)

        return output