Example #1
0
class SixthPage(BoxLayout):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.orientation = "vertical"
        self.add_widget(Label())
        self.add_widget(Label())
        self.add_widget(Label())
        self.add_widget(Label())
        self.updating_btn = Button(text="Start Training",
                                   on_press=self.updating,
                                   pos_hint={
                                       "center_x": 0.5,
                                       "center_y": 0.5
                                   },
                                   size_hint=(0.5, 0.5),
                                   background_color=(0, 0, 0, 1),
                                   font_size="20sp")
        self.add_widget(self.updating_btn)
        self.add_widget(Label())
        self.add_widget(Label())
        self.add_widget(Label())
        self.add_widget(Label())

    def updating(self, obj):
        # print(main_app.fourth_page.dataset_folder_name)
        # print(int(main_app.fifth_page.epoch_inp.text))
        # print(int(main_app.fifth_page.batch_size_inp.text))
        # print(float(main_app.fifth_page.learning_rate_inp.text))
        # print(float(int(main_app.fifth_page.drop_out_rate_inp.text)/100))
        self.clear_widgets()

        self.orientation = "vertical"
        local_weights_file = "model_weights/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5"

        pre_trained_model = InceptionV3(input_shape=(150, 150, 3),
                                        include_top=False,
                                        weights=None)

        pre_trained_model.load_weights(local_weights_file)

        for layer in pre_trained_model.layers:
            layer.trainable = False

        last_layer = pre_trained_model.get_layer("mixed7")

        last_output = last_layer.output

        x = layers.Flatten()(last_output)
        x = layers.Dense(1024, activation="relu")(x)
        x = layers.Dropout(
            float(int(main_app.fifth_page.drop_out_rate_inp.text) / 100))(x)
        x = layers.Dense(1, activation="sigmoid")(x)

        self.model = Model(pre_trained_model.input, x)

        self.model.compile(optimizer=RMSprop(
            lr=float(main_app.fifth_page.learning_rate_inp.text)),
                           loss="categorical_crossentropy",
                           metrics=["acc"])

        from tensorflow.keras.preprocessing.image import ImageDataGenerator

        train_datagen = ImageDataGenerator(rescale=1. / 255,
                                           rotation_range=40,
                                           width_shift_range=0.2,
                                           height_shift_range=0.2,
                                           shear_range=0.2,
                                           zoom_range=0.2,
                                           horizontal_flip=True)

        train_generator = train_datagen.flow_from_directory(
            main_app.fourth_page.dataset_folder_name + "train/",
            target_size=(150, 150),
            class_mode="categorical")

        validation_datagen = ImageDataGenerator(rescale=1 / 255)

        self.validation_generator = validation_datagen.flow_from_directory(
            main_app.fourth_page.dataset_folder_name + "test/",
            target_size=(150, 150),
            class_mode="categorical")
        history = self.model.fit_generator(
            train_generator,
            validation_data=self.validation_generator,
            epochs=int(main_app.fifth_page.epoch_inp.text),
            verbose=2)

        scores = self.model.evaluate_generator(self.validation_generator)
        print("%s: %.2f%%" % (self.model.metrics_names[1], scores[1] * 100))

        acc = history.history["acc"]
        val_acc = history.history["val_acc"]

        loss = history.history["loss"]
        val_loss = history.history["val_loss"]
        epochs = range(len(acc))
        plt.subplot(211)
        plt.title("Training History")
        plt.plot(epochs, acc, "-b", label="training_acc")
        plt.plot(epochs, val_acc, "-r", label="validation_acc")
        plt.legend(loc="upper left")

        plt.subplot(212)
        # self.add_widget(FigureCanvasKivyAgg(plt.gcf()))
        plt.plot(epochs, loss, "-b", label="training_loss")
        plt.plot(epochs, val_loss, "-r", label="validation_loss")
        plt.legend(loc="upper left")

        self.add_widget(FigureCanvasKivyAgg(plt.gcf()))

        self.model_save_btn = Button(text="Save model",
                                     background_color=(0, 0, 0, 1),
                                     font_size="20sp",
                                     on_press=self.save_model,
                                     size_hint=(0.2, 0.2),
                                     pos_hint={
                                         "center_x": 0.5,
                                         "center_y": 0.8
                                     })
        self.add_widget(self.model_save_btn)
        self.model_load_btn = Button(text="Load model",
                                     background_color=(0, 0, 0, 1),
                                     font_size="20sp",
                                     on_press=self.load_model,
                                     size_hint=(0.2, 0.2),
                                     pos_hint={
                                         "center_x": 0.5,
                                         "center_y": 0.8
                                     })
        self.add_widget(self.model_load_btn)
        self.add_widget(
            Button(text="Quit",
                   background_color=(0, 0, 0, 1),
                   font_size="20sp",
                   on_press=self.quit_app,
                   size_hint=(0.2, 0.2),
                   pos_hint={
                       "center_x": 0.9,
                       "center_y": 1.0
                   }))

    def path_check(self, path):
        if os.path.exists(path):
            pass
        else:
            os.makedirs(path)

    def quit_app(self, obj):
        App.get_running_app().stop()

    def save_model(self, obj):
        self.model_json = self.model.to_json()
        self.path_check("saved_models/")
        with open("saved_models/model.json", "w") as json_file:
            json_file.write(self.model_json)

        self.model.save_weights("saved_models/model.h5")
        print("Saved model to disk")

    def load_model(self, obj):
        json_file = open('saved_models/model.json', 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        loaded_model = model_from_json(loaded_model_json)
        # load weights into new model
        loaded_model.load_weights("saved_models/model.h5")
        print("Loaded model from disk")

        # evaluate loaded model on test data
        loaded_model.compile(loss='categorical_crossentropy',
                             optimizer='rmsprop',
                             metrics=['accuracy'])
        score = loaded_model.evaluate_generator(self.validation_generator)
        print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1] * 100))
Example #2
0
    def train(
        self,
        layer_size,
        num_samples,
        train_size=0.7,
        batch_size: int = 200,
        num_epochs: int = 20,
        learning_rate=5e-3,
        dropout=0.0,
        use_bias=True,
    ):
        """
        Build and train the HinSAGE model for link attribute prediction on the specified graph G
        with given parameters.

        Args:
            layer_size: a list of number of hidden nodes in each layer
            num_samples: number of neighbours to sample at each layer
            batch_size: size of mini batch
            num_epochs: number of epochs to train the model (epoch = all training batches are streamed through the model once)
            learning_rate: initial learning rate
            dropout: dropout probability in the range [0, 1)
            use_bias: tells whether to use a bias terms in HinSAGE model

        Returns:

        """

        # Training and test edges
        edges = list(self.g.edges(data=True))
        edges_train, edges_test = model_selection.train_test_split(
            edges, train_size=train_size)

        #  Edgelists:
        edgelist_train = [(e[0], e[1]) for e in edges_train]
        edgelist_test = [(e[0], e[1]) for e in edges_test]

        labels_train = [e[2]["score"] for e in edges_train]
        labels_test = [e[2]["score"] for e in edges_test]

        # Our machine learning task of learning user-movie ratings can be framed as a supervised Link Attribute Inference:
        # given a graph of user-movie ratings, we train a model for rating prediction using the ratings edges_train,
        # and evaluate it using the test ratings edges_test. The model also requires the user-movie graph structure.
        # To proceed, we need to create a StellarGraph object from the ingested graph, for training the model:
        # When sampling the GraphSAGE subgraphs, we want to treat user-movie links as undirected
        self.g = sg.StellarGraph(self.g, node_features="feature")

        # Next, we create the link generators for preparing and streaming training and testing data to the model.
        # The mappers essentially sample k-hop subgraphs of G with randomly selected head nodes, as required by
        # the HinSAGE algorithm, and generate minibatches of those samples to be fed to the input layer of the HinSAGE model.
        generator = HinSAGELinkGenerator(self.g,
                                         batch_size,
                                         num_samples,
                                         head_node_types=["user", "movie"])
        train_gen = generator.flow(edgelist_train, labels_train)
        test_gen = generator.flow(edgelist_test, labels_test)

        # Build the model by stacking a two-layer HinSAGE model and a link regression layer on top.
        assert len(layer_size) == len(
            num_samples
        ), "layer_size and num_samples must be of the same length! Stopping."

        hinsage = HinSAGE(layer_sizes=layer_size,
                          generator=generator,
                          bias=use_bias,
                          dropout=dropout)

        # Define input and output sockets of hinsage:
        x_inp, x_out = hinsage.build()

        # Final estimator layer
        score_prediction = link_regression(
            edge_embedding_method=args.edge_embedding_method)(x_out)

        # Create Keras model for training
        model = Model(inputs=x_inp, outputs=score_prediction)
        model.compile(
            optimizer=optimizers.Adam(lr=learning_rate),
            loss=losses.mean_squared_error,
            metrics=[root_mean_square_error, metrics.mae],
        )

        # Train model
        print("Training the model for {} epochs with initial learning rate {}".
              format(num_epochs, learning_rate))
        history = model.fit_generator(
            train_gen,
            validation_data=test_gen,
            epochs=num_epochs,
            verbose=2,
            shuffle=True,
            use_multiprocessing=True,
            workers=multiprocessing.cpu_count() // 2,
        )

        # Evaluate and print metrics
        test_metrics = model.evaluate_generator(test_gen)

        print("Test Evaluation:")
        for name, val in zip(model.metrics_names, test_metrics):
            print("\t{}: {:0.4f}".format(name, val))
Example #3
0
model = Model(inputs=x_inp, outputs=prediction)
model.compile(
    optimizer=optimizers.Adam(lr=0.005),
    loss=losses.categorical_crossentropy,
    metrics=["acc"],
)

test_gen = generator.flow(test_data.index, test_targets)
history = model.fit_generator(
    train_gen, epochs=20, validation_data=test_gen, verbose=2, shuffle=False
)

sg.utils.plot_history(history)

test_metrics = model.evaluate_generator(test_gen)
print("\nTest Set Metrics:")
for name, val in zip(model.metrics_names, test_metrics):
    print("\t{}: {:0.4f}".format(name, val))

all_nodes = node_data.index
all_mapper = generator.flow(all_nodes)
all_predictions = model.predict_generator(all_mapper)

node_predictions = target_encoding.inverse_transform(all_predictions)

results = pd.DataFrame(node_predictions, index=all_nodes).idxmax(axis=1)
df = pd.DataFrame({"Predicted": results, "True": node_data["subject"]})
display(df.head(10))

plt.xlabel('Number of Epoch')
plt.ylabel('Accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.savefig('transfer_acc_6.png')
plt.gcf().clear()

plt.plot(epochs, loss, 'r', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Number of Epoch')
plt.ylabel('Loss')
plt.legend()
plt.savefig('transfer_loss_6.png')

scores = model.evaluate_generator(validation_generator, 45)
print("  Test Accuracy = ", scores[1])
print("  Test Loss =", scores[0])

# Evaluation of predicted result and print confusion matrix and classification report
batch_size = 5
# y_pred = model.predict_generator(test_generator, 1300 // batch_size+1)
y_pred = model.predict_generator(
    validation_generator,
    validation_generator.samples // validation_generator.batch_size)
y_pred = np.argmax(y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(validation_generator.classes, y_pred))

print('Classification Report')
num_classes = 9
Example #5
0
def train_model(Gnx, train_data, test_data, all_features):
    output_results = {}
    from collections import Counter
    #TODO: save size of dataset, train_data, and test data
    #save the count of each subject in the blocks
    print(len(train_data), len(test_data))
    subject_groups_train = Counter(train_data['subject'])
    subject_groups_test = Counter(test_data['subject'])
    output_results['train_size'] = len(train_data)
    output_results['test_size'] = len(test_data)
    output_results['subject_groups_train'] = subject_groups_train
    output_results['subject_groups_test'] = subject_groups_test

    #node_features = train_data[feature_names]
    #print (feature_names)
    G = sg.StellarGraph(Gnx, node_features=all_features)
    #TODO: save graph info
    print(G.info())
    print("writing graph.dot")
    #write_dot(Gnx,"graph.dot")
    output_results['graph_info'] = G.info()
    print("building the graph generator...")

    batch_size = 50
    num_samples = [10, 5]
    generator = GraphSAGENodeGenerator(G, batch_size, num_samples)
    #generator = HinSAGENodeGenerator(G, batch_size, num_samples)

    target_encoding = feature_extraction.DictVectorizer(sparse=False)
    train_targets = target_encoding.fit_transform(
        train_data[["subject"]].to_dict('records'))
    print(np.unique(train_data["subject"].to_list()))
    class_weights = class_weight.compute_class_weight(
        'balanced', np.unique(train_data["subject"].to_list()),
        train_data["subject"].to_list())
    print('class_weights', class_weights)
    test_targets = target_encoding.transform(test_data[["subject"
                                                        ]].to_dict('records'))
    train_gen = generator.flow(train_data.index, train_targets, shuffle=True)
    graphsage_model = GraphSAGE(
        #graphsage_model = HinSAGE(
        #layer_sizes=[32, 32],
        layer_sizes=[80, 80],
        generator=generator,  #train_gen,
        bias=True,
        dropout=0.5,
    )
    print("building model...")
    #x_inp, x_out = graphsage_model.build(flatten_output=True)
    x_inp, x_out = graphsage_model.build()
    prediction = layers.Dense(units=train_targets.shape[1],
                              activation="softmax")(x_out)

    model = Model(inputs=x_inp, outputs=prediction)
    print("compiling model...")
    model.compile(
        optimizer=optimizers.Adam(lr=0.005),
        loss=losses.categorical_crossentropy,
        metrics=["acc", metrics.categorical_accuracy],
    )
    print("testing the model...")
    test_gen = generator.flow(test_data.index, test_targets)
    history = model.fit_generator(
        train_gen,
        epochs=EPOCH,
        validation_data=test_gen,
        verbose=2,
        shuffle=True,
        class_weight=class_weights,
    )
    # save test metrics
    test_metrics = model.evaluate_generator(test_gen)
    print("\nTest Set Metrics:")
    output_results['test_metrics'] = []
    for name, val in zip(model.metrics_names, test_metrics):
        output_results['test_metrics'].append({'name': name, 'val:': val})
        print("\t{}: {:0.4f}".format(name, val))

    test_nodes = test_data.index
    test_mapper = generator.flow(test_nodes)
    test_predictions = model.predict_generator(test_mapper)
    node_predictions = target_encoding.inverse_transform(test_predictions)
    results = pd.DataFrame(node_predictions, index=test_nodes).idxmax(axis=1)
    df = pd.DataFrame({
        "Predicted": results,
        "True": test_data['subject']
    })  #, "program":test_data['program']})
    clean_result_labels = df["Predicted"].map(
        lambda x: x.replace('subject=', ''))
    # save predicted labels
    pred_labels = np.unique(clean_result_labels.values)
    #pred_program = np.unique(df['program'].values)
    # save predictions per label
    precision, recall, f1, _ = skmetrics.precision_recall_fscore_support(
        df['True'].values,
        clean_result_labels.values,
        average=None,
        labels=pred_labels)
    output_results['classifier'] = []
    for lbl, prec, rec, fm in zip(pred_labels, precision, recall, f1):
        output_results['classifier'].append({
            'label': lbl,
            'precision': prec,
            'recall': rec,
            'fscore': fm
        })
    print(output_results['classifier'])
    print(pred_labels)
    print('precision: {}'.format(precision))
    print('recall: {}'.format(recall))
    print('fscore: {}'.format(f1))

    return generator, model, x_inp, x_out, history, target_encoding, output_results