Beispiel #1
0
    def initialize(self,**hyper_params):

        if(not "batch_size" in hyper_params.keys()):
            batch_size = 16
        if(not "layer_sizes" in hyper_params.keys()):
            num_samples = [25, 10]
        if(not "num_samples" in hyper_params.keys()):
            layer_sizes = [256, 256]
        if(not "bias" in hyper_params.keys()):
            bias = True
        if(not "dropout" in hyper_params.keys()):
            dropout = 0.0
        if(not "lr" in hyper_params.keys()):
            lr = 1e-3
        if(not "num_walks" in hyper_params.keys()):
            num_walks = 1
        if(not "length" in hyper_params.keys()):
            length = 5

        self.graph = sg.StellarGraph(nodes=self.nodes_df,edges=self.edges_df)
        self.nodes = list(self.graph.nodes())

        del self.nodes_df
        del self.edges_df

        unsupervised_samples = UnsupervisedSampler(
            self.graph, nodes=self.nodes, length=length, number_of_walks=num_walks
        )

        # Train iterators
        train_gen = GraphSAGELinkGenerator(self.graph, batch_size, num_samples)
        self.train_flow = train_gen.flow(unsupervised_samples)

        # Model defining - Keras functional API + Stellargraph layers
        graphsage = GraphSAGE(
            layer_sizes=layer_sizes, generator=train_gen, bias=bias, dropout=dropout, normalize="l2"
        )

        x_inp, x_out = graphsage.in_out_tensors()

        prediction = link_classification(
            output_dim=1, output_act="sigmoid", edge_embedding_method="ip"
        )(x_out)

        self.model = keras.Model(inputs=x_inp, outputs=prediction)

        self.model.compile(
            optimizer=keras.optimizers.Adam(lr=lr),
            loss=keras.losses.binary_crossentropy,
            metrics=[keras.metrics.binary_accuracy],
        )

        x_inp_src = x_inp[0::2]
        x_out_src = x_out[0]
        self.embedding_model = keras.Model(inputs=x_inp_src, outputs=x_out_src)

        self.node_gen = GraphSAGENodeGenerator(self.graph, batch_size, num_samples).flow(self.nodes)

        return self.model.get_weights()
    def initialize(self,**hyper_params):

        if(not "batch_size" in hyper_params.keys()):
            batch_size = 20
        if(not "layer_sizes" in hyper_params.keys()):
            num_samples = [20, 10]
        if(not "num_samples" in hyper_params.keys()):
            layer_sizes = [10, 10 ]
        if(not "bias" in hyper_params.keys()):
            bias = True
        if(not "dropout" in hyper_params.keys()):
            dropout = 0.1
        if(not "lr" in hyper_params.keys()):
            lr = 1e-2

        graph = sg.StellarGraph(nodes=self.nodes,edges=self.edges)

        # Test split
        edge_splitter_test = EdgeSplitter(graph)
        self.graph_test, edge_ids_test, edge_labels_test = edge_splitter_test.train_test_split(
            p=0.1, method="global", keep_connected=True, seed = 42
        )

        # Train split
        edge_splitter_train = EdgeSplitter(self.graph_test)
        self.graph_train, edge_ids_train, edge_labels_train = edge_splitter_train.train_test_split(
            p=0.1, method="global", keep_connected=True, seed = 42
        )

        # Train iterators
        train_gen = GraphSAGELinkGenerator(self.graph_train, batch_size, num_samples, seed = 42)
        self.train_flow = train_gen.flow(edge_ids_train, edge_labels_train, shuffle=True)

        # Test iterators
        test_gen = GraphSAGELinkGenerator(self.graph_train, batch_size, num_samples, seed = 42)
        self.test_flow = test_gen.flow(edge_ids_test, edge_labels_test, shuffle=True)

        # Model defining - Keras functional API + Stellargraph layers
        graphsage = GraphSAGE(
            layer_sizes=layer_sizes, generator=train_gen, bias=bias, dropout=dropout
        )

        x_inp, x_out = graphsage.in_out_tensors()

        prediction = link_classification(
            output_dim=1, output_act="sigmoid", edge_embedding_method="ip"
        )(x_out)

        self.model = keras.Model(inputs=x_inp, outputs=prediction)

        self.model.compile(
            optimizer=keras.optimizers.Adam(lr=lr),
            loss=keras.losses.binary_crossentropy,
            metrics=[keras.metrics.BinaryAccuracy(),keras.metrics.Recall(),keras.metrics.AUC(),keras.metrics.Precision()],
        )

        # return number of training and testing examples
        return edge_ids_train.shape[0],edge_ids_test.shape[0]
Beispiel #3
0
    def initialize(self, **hyper_params):

        if (not "batch_size" in hyper_params.keys()):
            batch_size = 20
        if (not "layer_sizes" in hyper_params.keys()):
            num_samples = [20, 10]
        if (not "num_samples" in hyper_params.keys()):
            layer_sizes = [20, 20]
        if (not "bias" in hyper_params.keys()):
            bias = True
        if (not "dropout" in hyper_params.keys()):
            dropout = 0.3
        if (not "lr" in hyper_params.keys()):
            lr = 1e-3
        if (not "train_split" in hyper_params.keys()):
            train_split = 0.2

        self.graph = sg.StellarGraph(nodes=self.nodes, edges=self.edges)

        # Train split
        edge_splitter_train = EdgeSplitter(self.graph)
        graph_train, edge_ids_train, edge_labels_train = edge_splitter_train.train_test_split(
            p=train_split, method="global", keep_connected=True)

        # Train iterators
        train_gen = GraphSAGELinkGenerator(graph_train, batch_size,
                                           num_samples)
        self.train_flow = train_gen.flow(edge_ids_train,
                                         edge_labels_train,
                                         shuffle=True)

        # Model defining - Keras functional API + Stellargraph layers
        graphsage = GraphSAGE(layer_sizes=layer_sizes,
                              generator=train_gen,
                              bias=bias,
                              dropout=dropout)

        x_inp, x_out = graphsage.in_out_tensors()

        prediction = link_classification(output_dim=1,
                                         output_act="relu",
                                         edge_embedding_method="ip")(x_out)

        self.model = keras.Model(inputs=x_inp, outputs=prediction)

        self.model.compile(
            optimizer=keras.optimizers.Adam(lr=lr),
            loss=keras.losses.binary_crossentropy,
            metrics=["acc"],
        )

        return self.model.get_weights()
Beispiel #4
0
def create_graphSAGE_model(graph, link_prediction=False):

    if link_prediction:
        # We are going to train on the original graph
        generator = GraphSAGELinkGenerator(graph, batch_size=2, num_samples=[2, 2])
        edge_ids_train = np.array([[1, 2], [2, 3], [1, 3]])
        train_gen = generator.flow(edge_ids_train, np.array([1, 1, 0]))
    else:
        generator = GraphSAGENodeGenerator(graph, batch_size=2, num_samples=[2, 2])
        train_gen = generator.flow([1, 2], np.array([[1, 0], [0, 1]]))

    # if link_prediction:
    #     edge_ids_train = np.array([[1, 2], [2, 3], [1, 3]])
    #     train_gen = generator.flow(edge_ids_train, np.array([1, 1, 0]))
    # else:
    #     train_gen = generator.flow([1, 2], np.array([[1, 0], [0, 1]]))

    base_model = GraphSAGE(
        layer_sizes=[8, 8], generator=generator, bias=True, dropout=0.5
    )

    if link_prediction:
        # Expose input and output sockets of graphsage, for source and destination nodes:
        x_inp, x_out = base_model.in_out_tensors()

        prediction = link_classification(
            output_dim=1, output_act="relu", edge_embedding_method="ip"
        )(x_out)

        keras_model = Model(inputs=x_inp, outputs=prediction)
    else:
        x_inp, x_out = base_model.in_out_tensors()
        prediction = layers.Dense(units=2, activation="softmax")(x_out)

        keras_model = Model(inputs=x_inp, outputs=prediction)

    return base_model, keras_model, generator, train_gen
    def run_model(self):
        graph_sampled, label_series_sampled = self.prepare_data_for_stellargraph(
        )
        train_targets, valid_targets, test_targets, train_labels, valid_labels, test_labels = self.get_train_valid_test(
            label_series_sampled)

        batch_size = self.hyperparams["batch_size"]
        num_samples = self.hyperparams["num_samples"]
        generator = GraphSAGENodeGenerator(graph_sampled, batch_size,
                                           num_samples)
        train_gen = generator.flow(train_labels.index,
                                   train_targets,
                                   shuffle=True)
        graphsage_model = GraphSAGE(
            layer_sizes=self.hyperparams["layer_sizes"],
            generator=generator,
            bias=self.hyperparams["bias"],
            dropout=self.hyperparams["dropout"],
        )
        x_inp, x_out = graphsage_model.in_out_tensors()
        prediction = layers.Dense(units=train_targets.shape[1],
                                  activation="softmax")(x_out)

        model = Model(inputs=x_inp, outputs=prediction)
        model.compile(
            optimizer=optimizers.Adam(lr=self.hyperparams["lr"]),
            loss=losses.categorical_crossentropy,
            metrics=["acc"],
        )

        valid_gen = generator.flow(valid_labels.index, valid_targets)

        history = model.fit(
            train_gen,
            epochs=self.hyperparams["n_epochs"],
            validation_data=valid_gen,
            verbose=self.hyperparams["verbose"],
            shuffle=True,
            use_multiprocessing=True,
        )

        sg.utils.plot_history(history)

        test_gen = generator.flow(test_labels.index, test_targets)
        test_metrics = model.evaluate(test_gen)
        print("\nTest Set Metrics:")
        for name, valid in zip(model.metrics_names, test_metrics):
            print("\t{}: {:0.4f}".format(name, valid))
def get_dropout(input_tensor, p=0.1, mc=False):
    if mc:
        return Dropout(p)(input_tensor, training=True)
    else:
        return Dropout(p)(input_tensor)


graphsage_model = GraphSAGE(layer_sizes=[64, 32, 16],
                            generator=generator,
                            activations=["relu", "relu", "linear"],
                            bias=True,
                            aggregator=MaxPoolingAggregator,
                            dropout=0.1)

x_inp, x_out = graphsage_model.in_out_tensors()
x_out = layers.Dense(units=10, activation="relu")(x_out)
x_out = layers.Dense(units=10, activation="relu")(x_out)
x_out = get_dropout(x_out, p=0.1, mc='mc')
prediction = layers.Dense(units=train_targets.shape[1],
                          activation="softmax")(x_out)

model = Model(inputs=x_inp, outputs=prediction)
model.summary()

##
# model.compile( optimizer=optimizers.Adam(), loss = noderankloss(), metrics=["acc"])
# model.compile( optimizer=optimizers.Adam(), loss="mean_squared_error", metrics=["acc"])
model.compile(optimizer=optimizers.Adam(),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=["acc"])
epochs = 20
num_samples = [20, 10]
layer_sizes = [20, 20]

# Train iterators
train_gen = GraphSAGELinkGenerator(G_train, batch_size, num_samples)
train_flow = train_gen.flow(edge_ids_train, edge_labels_train, shuffle=True)



# Model defining - Keras functional API + Stellargraph layers
graphsage = GraphSAGE(
    layer_sizes=layer_sizes, generator=train_gen, bias=True, dropout=0.3
)

x_inp, x_out = graphsage.in_out_tensors()

prediction = link_classification(
    output_dim=1, output_act="relu", edge_embedding_method="ip"
)(x_out)

model = keras.Model(inputs=x_inp, outputs=prediction)

model.compile(
    optimizer=keras.optimizers.Adam(lr=1e-3),
    loss=keras.losses.binary_crossentropy,
    metrics=["acc"],
)

# Set weights
weights = np.load(path_weights,allow_pickle=True)
Beispiel #8
0
def train(G_list,
          nodes_subjects_list,
          run_num=1,
          start_month_id=220,
          end_month_id=264):
    # 提前定义一些列表方便记录数据,大循环的列表存小循环的列表
    graph_history_list_list = []
    model_list_list = []
    train_gen_list_list = []
    time_list_list = []
    model_weight_list_list = []

    # 选择运行run_num次
    run_num = run_num
    # 选择进行训练的月份,end_month_id最多取
    start_month_id = start_month_id
    end_month_id = end_month_id

    # 创建文件夹保存model
    if not os.path.exists('model'):
        os.makedirs('model')

    # 创建文件夹保存history
    if not os.path.exists('history'):
        os.makedirs('history')

    # 创建文件夹保存figure
    if not os.path.exists('figure'):
        os.makedirs('figure')

    # 创建文件夹保存figure
    if not os.path.exists('figure_distribution'):
        os.makedirs('figure_distribution')

    # 创建文件夹保存test结果
    if not os.path.exists('test_result'):
        os.makedirs('test_result')

    # 大循环记录训练了几次,计算多次是为了减少variance
    # 小循环记录训练的月份
    for j in range(run_num):
        num_samples = [40]

        # 提前定义一些列表记录小循环的数据
        graph_history_list = []
        model_list = []
        train_gen_list = []
        time_list = []
        model_weight_list = []
        test_result = []

        # i为0代表220
        for i in range(start_month_id - 220, end_month_id - 220):
            start = time.time()

            # 前一个月训练,后一个月验证
            train_idx = i
            val_idx = i + 1
            test_idx = i + 2

            # 用train_idx的数据生成训练集的generator
            generator = GraphSAGENodeGenerator(
                G=G_list[train_idx],
                batch_size=len(nodes_subjects_list[train_idx]),
                num_samples=num_samples,
                seed=100)
            train_gen = generator.flow(list(
                nodes_subjects_list[train_idx].index),
                                       nodes_subjects_list[train_idx].values,
                                       shuffle=False)

            # 生成GraphSAGE模型
            graphsage_model = GraphSAGE(layer_sizes=[1],
                                        generator=generator,
                                        bias=True,
                                        aggregator=sg.layer.MeanAggregator,
                                        normalize=None)

            # 提取输出输出的tensor,用keras来构建模型
            x_inp, x_out = graphsage_model.in_out_tensors()
            #         prediction = layers.Dense(units=1)(x_out)

            # 用val_idx的数据生成验证集的generator
            generator = GraphSAGENodeGenerator(
                G=G_list[val_idx],
                batch_size=len(nodes_subjects_list[val_idx]),
                num_samples=num_samples,
                seed=100)
            val_gen = generator.flow(list(nodes_subjects_list[val_idx].index),
                                     nodes_subjects_list[val_idx].values)

            # 用test_idx的数据生成验证集的generator
            generator = GraphSAGENodeGenerator(
                G=G_list[test_idx],
                batch_size=len(nodes_subjects_list[test_idx]),
                num_samples=num_samples,
                seed=100)
            test_gen = generator.flow(
                list(nodes_subjects_list[test_idx].index),
                nodes_subjects_list[test_idx].values)

            # 通过输入输出的tensor构建model
            model = Model(inputs=x_inp, outputs=x_out)
            monitor = EarlyStopping(monitor='val_loss',
                                    min_delta=1e-3,
                                    patience=10,
                                    verbose=2,
                                    mode='auto',
                                    restore_best_weights=True)
            model.compile(optimizer=optimizers.Adam(lr=0.05),
                          loss=losses.mean_squared_error,
                          metrics=[pearson_r])

            history = model.fit(train_gen,
                                epochs=500,
                                validation_data=val_gen,
                                verbose=0,
                                shuffle=False,
                                callbacks=[monitor])

            test_metrics = model.evaluate(test_gen)
            test_result_dict = {}
            print("\n" + str(train_idx + 220) + "'s Test Set: " +
                  str(test_idx + 220) + "'s Metrics:")
            for name, val in zip(model.metrics_names, test_metrics):
                print("\t{}: {:0.4f}".format(name, val))
                test_result_dict[name] = val
            json.dump(
                test_result_dict,
                open(
                    'test_result/' + str(train_idx + 220) + "_" +
                    str(test_idx + 220) + '.json', 'w'))

            test_preds = model.predict(test_gen)

            end = time.time()

            # 保存一些结果
            graph_history_list.append(history)  # 保存训练过程
            model_list.append(model)  # 保存model
            train_gen_list.append(train_gen)  # 保存train_gen方便之后算中间层的结果
            time_list.append(end - start)  # 保存运行时间
            model_weight_list.append(model.weights)  # 保存model的参数
            test_result.append(test_metrics[1])

            # # 存模型model
            # model.save('model/' + str(train_idx + 220) + "_" + str(val_idx + 220) + '.h5')
            # # 存训练过程history
            # json.dump(history.history,
            #           open('history/' + str(train_idx + 220) + "_" + str(val_idx + 220) + '.json', 'w'))
            # # 存训练过程图片figure
            # sg.utils.plot_history(history)
            # plt.title(str(train_idx + 220) + '->' + str(val_idx + 220))
            # plt.savefig('figure/' + str(train_idx + 220) + "_" + str(val_idx + 220) + '.png')
            # plt.show()
            # 存test的prediction的distribution
            plt.figure(figsize=(5, 10))
            plt.subplot(211)
            plt.hist(test_preds, bins=500)
            plt.title("Distribution of Prediction of " + str(test_idx + 220))
            plt.subplot(212)
            plt.hist(nodes_subjects_list[test_idx].values, bins=500)
            plt.title("Distribution of Origin of " + str(test_idx + 220))
            plt.xlabel("ic=" + str(test_metrics[1]))
            plt.savefig('figure_distribution/distribution-' +
                        str(train_idx + 220) + "_" + str(test_idx + 220) +
                        '.png',
                        dpi=300)
            plt.show()

            print(str(i + 220) + "'s " + str(j + 1) + " run has finished")
            print()

        # 将小循环的数据保存
        graph_history_list_list.append(graph_history_list)
        model_list_list.append(model_list)
        train_gen_list_list.append(train_gen_list)
        time_list_list.append(time_list)
        model_weight_list_list.append(model_weight_list)

        return graph_history_list_list, model_list_list, train_gen_list_list, time_list_list, model_weight_list_list, test_result