class SixthPage(BoxLayout): def __init__(self, **kwargs): super().__init__(**kwargs) self.orientation = "vertical" self.add_widget(Label()) self.add_widget(Label()) self.add_widget(Label()) self.add_widget(Label()) self.updating_btn = Button(text="Start Training", on_press=self.updating, pos_hint={ "center_x": 0.5, "center_y": 0.5 }, size_hint=(0.5, 0.5), background_color=(0, 0, 0, 1), font_size="20sp") self.add_widget(self.updating_btn) self.add_widget(Label()) self.add_widget(Label()) self.add_widget(Label()) self.add_widget(Label()) def updating(self, obj): # print(main_app.fourth_page.dataset_folder_name) # print(int(main_app.fifth_page.epoch_inp.text)) # print(int(main_app.fifth_page.batch_size_inp.text)) # print(float(main_app.fifth_page.learning_rate_inp.text)) # print(float(int(main_app.fifth_page.drop_out_rate_inp.text)/100)) self.clear_widgets() self.orientation = "vertical" local_weights_file = "model_weights/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5" pre_trained_model = InceptionV3(input_shape=(150, 150, 3), include_top=False, weights=None) pre_trained_model.load_weights(local_weights_file) for layer in pre_trained_model.layers: layer.trainable = False last_layer = pre_trained_model.get_layer("mixed7") last_output = last_layer.output x = layers.Flatten()(last_output) x = layers.Dense(1024, activation="relu")(x) x = layers.Dropout( float(int(main_app.fifth_page.drop_out_rate_inp.text) / 100))(x) x = layers.Dense(1, activation="sigmoid")(x) self.model = Model(pre_trained_model.input, x) self.model.compile(optimizer=RMSprop( lr=float(main_app.fifth_page.learning_rate_inp.text)), loss="categorical_crossentropy", metrics=["acc"]) from tensorflow.keras.preprocessing.image import ImageDataGenerator train_datagen = ImageDataGenerator(rescale=1. / 255, rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) train_generator = train_datagen.flow_from_directory( main_app.fourth_page.dataset_folder_name + "train/", target_size=(150, 150), class_mode="categorical") validation_datagen = ImageDataGenerator(rescale=1 / 255) self.validation_generator = validation_datagen.flow_from_directory( main_app.fourth_page.dataset_folder_name + "test/", target_size=(150, 150), class_mode="categorical") history = self.model.fit_generator( train_generator, validation_data=self.validation_generator, epochs=int(main_app.fifth_page.epoch_inp.text), verbose=2) scores = self.model.evaluate_generator(self.validation_generator) print("%s: %.2f%%" % (self.model.metrics_names[1], scores[1] * 100)) acc = history.history["acc"] val_acc = history.history["val_acc"] loss = history.history["loss"] val_loss = history.history["val_loss"] epochs = range(len(acc)) plt.subplot(211) plt.title("Training History") plt.plot(epochs, acc, "-b", label="training_acc") plt.plot(epochs, val_acc, "-r", label="validation_acc") plt.legend(loc="upper left") plt.subplot(212) # self.add_widget(FigureCanvasKivyAgg(plt.gcf())) plt.plot(epochs, loss, "-b", label="training_loss") plt.plot(epochs, val_loss, "-r", label="validation_loss") plt.legend(loc="upper left") self.add_widget(FigureCanvasKivyAgg(plt.gcf())) self.model_save_btn = Button(text="Save model", background_color=(0, 0, 0, 1), font_size="20sp", on_press=self.save_model, size_hint=(0.2, 0.2), pos_hint={ "center_x": 0.5, "center_y": 0.8 }) self.add_widget(self.model_save_btn) self.model_load_btn = Button(text="Load model", background_color=(0, 0, 0, 1), font_size="20sp", on_press=self.load_model, size_hint=(0.2, 0.2), pos_hint={ "center_x": 0.5, "center_y": 0.8 }) self.add_widget(self.model_load_btn) self.add_widget( Button(text="Quit", background_color=(0, 0, 0, 1), font_size="20sp", on_press=self.quit_app, size_hint=(0.2, 0.2), pos_hint={ "center_x": 0.9, "center_y": 1.0 })) def path_check(self, path): if os.path.exists(path): pass else: os.makedirs(path) def quit_app(self, obj): App.get_running_app().stop() def save_model(self, obj): self.model_json = self.model.to_json() self.path_check("saved_models/") with open("saved_models/model.json", "w") as json_file: json_file.write(self.model_json) self.model.save_weights("saved_models/model.h5") print("Saved model to disk") def load_model(self, obj): json_file = open('saved_models/model.json', 'r') loaded_model_json = json_file.read() json_file.close() loaded_model = model_from_json(loaded_model_json) # load weights into new model loaded_model.load_weights("saved_models/model.h5") print("Loaded model from disk") # evaluate loaded model on test data loaded_model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) score = loaded_model.evaluate_generator(self.validation_generator) print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1] * 100))
def train( self, layer_size, num_samples, train_size=0.7, batch_size: int = 200, num_epochs: int = 20, learning_rate=5e-3, dropout=0.0, use_bias=True, ): """ Build and train the HinSAGE model for link attribute prediction on the specified graph G with given parameters. Args: layer_size: a list of number of hidden nodes in each layer num_samples: number of neighbours to sample at each layer batch_size: size of mini batch num_epochs: number of epochs to train the model (epoch = all training batches are streamed through the model once) learning_rate: initial learning rate dropout: dropout probability in the range [0, 1) use_bias: tells whether to use a bias terms in HinSAGE model Returns: """ # Training and test edges edges = list(self.g.edges(data=True)) edges_train, edges_test = model_selection.train_test_split( edges, train_size=train_size) # Edgelists: edgelist_train = [(e[0], e[1]) for e in edges_train] edgelist_test = [(e[0], e[1]) for e in edges_test] labels_train = [e[2]["score"] for e in edges_train] labels_test = [e[2]["score"] for e in edges_test] # Our machine learning task of learning user-movie ratings can be framed as a supervised Link Attribute Inference: # given a graph of user-movie ratings, we train a model for rating prediction using the ratings edges_train, # and evaluate it using the test ratings edges_test. The model also requires the user-movie graph structure. # To proceed, we need to create a StellarGraph object from the ingested graph, for training the model: # When sampling the GraphSAGE subgraphs, we want to treat user-movie links as undirected self.g = sg.StellarGraph(self.g, node_features="feature") # Next, we create the link generators for preparing and streaming training and testing data to the model. # The mappers essentially sample k-hop subgraphs of G with randomly selected head nodes, as required by # the HinSAGE algorithm, and generate minibatches of those samples to be fed to the input layer of the HinSAGE model. generator = HinSAGELinkGenerator(self.g, batch_size, num_samples, head_node_types=["user", "movie"]) train_gen = generator.flow(edgelist_train, labels_train) test_gen = generator.flow(edgelist_test, labels_test) # Build the model by stacking a two-layer HinSAGE model and a link regression layer on top. assert len(layer_size) == len( num_samples ), "layer_size and num_samples must be of the same length! Stopping." hinsage = HinSAGE(layer_sizes=layer_size, generator=generator, bias=use_bias, dropout=dropout) # Define input and output sockets of hinsage: x_inp, x_out = hinsage.build() # Final estimator layer score_prediction = link_regression( edge_embedding_method=args.edge_embedding_method)(x_out) # Create Keras model for training model = Model(inputs=x_inp, outputs=score_prediction) model.compile( optimizer=optimizers.Adam(lr=learning_rate), loss=losses.mean_squared_error, metrics=[root_mean_square_error, metrics.mae], ) # Train model print("Training the model for {} epochs with initial learning rate {}". format(num_epochs, learning_rate)) history = model.fit_generator( train_gen, validation_data=test_gen, epochs=num_epochs, verbose=2, shuffle=True, use_multiprocessing=True, workers=multiprocessing.cpu_count() // 2, ) # Evaluate and print metrics test_metrics = model.evaluate_generator(test_gen) print("Test Evaluation:") for name, val in zip(model.metrics_names, test_metrics): print("\t{}: {:0.4f}".format(name, val))
model = Model(inputs=x_inp, outputs=prediction) model.compile( optimizer=optimizers.Adam(lr=0.005), loss=losses.categorical_crossentropy, metrics=["acc"], ) test_gen = generator.flow(test_data.index, test_targets) history = model.fit_generator( train_gen, epochs=20, validation_data=test_gen, verbose=2, shuffle=False ) sg.utils.plot_history(history) test_metrics = model.evaluate_generator(test_gen) print("\nTest Set Metrics:") for name, val in zip(model.metrics_names, test_metrics): print("\t{}: {:0.4f}".format(name, val)) all_nodes = node_data.index all_mapper = generator.flow(all_nodes) all_predictions = model.predict_generator(all_mapper) node_predictions = target_encoding.inverse_transform(all_predictions) results = pd.DataFrame(node_predictions, index=all_nodes).idxmax(axis=1) df = pd.DataFrame({"Predicted": results, "True": node_data["subject"]}) display(df.head(10))
plt.xlabel('Number of Epoch') plt.ylabel('Accuracy') plt.title('Training and validation accuracy') plt.legend() plt.savefig('transfer_acc_6.png') plt.gcf().clear() plt.plot(epochs, loss, 'r', label='Training loss') plt.plot(epochs, val_loss, 'b', label='Validation loss') plt.title('Training and validation loss') plt.xlabel('Number of Epoch') plt.ylabel('Loss') plt.legend() plt.savefig('transfer_loss_6.png') scores = model.evaluate_generator(validation_generator, 45) print(" Test Accuracy = ", scores[1]) print(" Test Loss =", scores[0]) # Evaluation of predicted result and print confusion matrix and classification report batch_size = 5 # y_pred = model.predict_generator(test_generator, 1300 // batch_size+1) y_pred = model.predict_generator( validation_generator, validation_generator.samples // validation_generator.batch_size) y_pred = np.argmax(y_pred, axis=1) print('Confusion Matrix') print(confusion_matrix(validation_generator.classes, y_pred)) print('Classification Report') num_classes = 9
def train_model(Gnx, train_data, test_data, all_features): output_results = {} from collections import Counter #TODO: save size of dataset, train_data, and test data #save the count of each subject in the blocks print(len(train_data), len(test_data)) subject_groups_train = Counter(train_data['subject']) subject_groups_test = Counter(test_data['subject']) output_results['train_size'] = len(train_data) output_results['test_size'] = len(test_data) output_results['subject_groups_train'] = subject_groups_train output_results['subject_groups_test'] = subject_groups_test #node_features = train_data[feature_names] #print (feature_names) G = sg.StellarGraph(Gnx, node_features=all_features) #TODO: save graph info print(G.info()) print("writing graph.dot") #write_dot(Gnx,"graph.dot") output_results['graph_info'] = G.info() print("building the graph generator...") batch_size = 50 num_samples = [10, 5] generator = GraphSAGENodeGenerator(G, batch_size, num_samples) #generator = HinSAGENodeGenerator(G, batch_size, num_samples) target_encoding = feature_extraction.DictVectorizer(sparse=False) train_targets = target_encoding.fit_transform( train_data[["subject"]].to_dict('records')) print(np.unique(train_data["subject"].to_list())) class_weights = class_weight.compute_class_weight( 'balanced', np.unique(train_data["subject"].to_list()), train_data["subject"].to_list()) print('class_weights', class_weights) test_targets = target_encoding.transform(test_data[["subject" ]].to_dict('records')) train_gen = generator.flow(train_data.index, train_targets, shuffle=True) graphsage_model = GraphSAGE( #graphsage_model = HinSAGE( #layer_sizes=[32, 32], layer_sizes=[80, 80], generator=generator, #train_gen, bias=True, dropout=0.5, ) print("building model...") #x_inp, x_out = graphsage_model.build(flatten_output=True) x_inp, x_out = graphsage_model.build() prediction = layers.Dense(units=train_targets.shape[1], activation="softmax")(x_out) model = Model(inputs=x_inp, outputs=prediction) print("compiling model...") model.compile( optimizer=optimizers.Adam(lr=0.005), loss=losses.categorical_crossentropy, metrics=["acc", metrics.categorical_accuracy], ) print("testing the model...") test_gen = generator.flow(test_data.index, test_targets) history = model.fit_generator( train_gen, epochs=EPOCH, validation_data=test_gen, verbose=2, shuffle=True, class_weight=class_weights, ) # save test metrics test_metrics = model.evaluate_generator(test_gen) print("\nTest Set Metrics:") output_results['test_metrics'] = [] for name, val in zip(model.metrics_names, test_metrics): output_results['test_metrics'].append({'name': name, 'val:': val}) print("\t{}: {:0.4f}".format(name, val)) test_nodes = test_data.index test_mapper = generator.flow(test_nodes) test_predictions = model.predict_generator(test_mapper) node_predictions = target_encoding.inverse_transform(test_predictions) results = pd.DataFrame(node_predictions, index=test_nodes).idxmax(axis=1) df = pd.DataFrame({ "Predicted": results, "True": test_data['subject'] }) #, "program":test_data['program']}) clean_result_labels = df["Predicted"].map( lambda x: x.replace('subject=', '')) # save predicted labels pred_labels = np.unique(clean_result_labels.values) #pred_program = np.unique(df['program'].values) # save predictions per label precision, recall, f1, _ = skmetrics.precision_recall_fscore_support( df['True'].values, clean_result_labels.values, average=None, labels=pred_labels) output_results['classifier'] = [] for lbl, prec, rec, fm in zip(pred_labels, precision, recall, f1): output_results['classifier'].append({ 'label': lbl, 'precision': prec, 'recall': rec, 'fscore': fm }) print(output_results['classifier']) print(pred_labels) print('precision: {}'.format(precision)) print('recall: {}'.format(recall)) print('fscore: {}'.format(f1)) return generator, model, x_inp, x_out, history, target_encoding, output_results