def model_init(init_scheme, **flow): """ Model initialization Input: ~ init_scheme (string) : specifies the initialization scheme to use ~ flow (dictionary) : flow control Returns: model (keras.models.Sequential) : initialized model """ if flow['mnist']: input_dim = (784,) output_dim = 10 else: input_dim = (31,) output_dim = 4 #end print('Model ``{}`` initialization'.format(init_scheme)) model = Sequential() if init_scheme == 'orth': weight_init = Orthogonal(gain = 1.0, seed = flow['seed']) elif init_scheme == 'normal': weight_init = RandomNormal(mean = 0.0, stddev = 0.1, seed = flow['seed']) elif init_scheme == 'glorot': weight_init = glorot_normal(seed = flow['seed']) #end bias_init = RandomNormal(mean = 0.0, stddev = 0.1, seed = flow['seed']) model.add(Dense(flow['network'][flow['seed']][0], activation = 'relu', input_shape = input_dim, kernel_initializer = weight_init, bias_initializer = bias_init)) for hidden_units in flow['network'][flow['seed']][1:]: model.add(Dense(hidden_units, activation = 'relu', kernel_initializer = weight_init, bias_initializer = bias_init)) #end model.add(Dense(output_dim, activation = 'softmax', kernel_initializer = weight_init, bias_initializer = bias_init)) sgd = keras.optimizers.SGD(lr = 0.01, decay = 1e-6, momentum = 0.6, nesterov = True) model.compile(loss = 'categorical_crossentropy', optimizer = sgd, metrics = ['accuracy']) path_model_initialized = flow['path_output'] + r'\init' streams.check_create_directory(path_model_initialized) model.save(path_model_initialized + r'\model_init.h5') return model
def model_initialisation(N_input, N_classes, initialiser, seed_value, path_init): """ Model initialization according to a scheme given as input Input: ~ N_input, N_classes Integers ~ initialiser string, among 'normal', 'orth', 'glorot' ~ seed_value seed for reproducibility, it specifies the directory in which to store the results ~ path_init path to store the initialised model Returns: ~ model keras.Models.Sequential instance. A linear stack of layers. Parameters are initialised according to the scheme specified Note that the task to be performed by the network are straight-forward. An equally overall simple model and algorithmic setup suffices to capture the problem complexity """ print("\nModel initialisation. Scheme:") model = Sequential() if (initialiser == 'orth'): print("Orthogonal weights initialisation") weights_initializer = Orthogonal(gain = 1.0, seed = seed_value) elif (initialiser == 'normal'): print("Normal weights initialisation") weights_initializer = RandomNormal(mean = 0.0, stddev = 0.1, seed = seed_value) elif (initialiser == 'glorot'): print("Glorot weights initialisation") weights_initializer = glorot_normal(seed = seed_value) elif (initialiser == 'zeros'): weights_initializer = Zeros() else: print('NO initialiser match') #end model.add(Dense(input_dim = N_input, units = 20, kernel_initializer = weights_initializer, bias_initializer = RandomNormal(mean = 0.0, stddev = 0.1, seed = seed_value), activation = 'relu')) model.add(Dense(input_dim = 20, units = 10, kernel_initializer = weights_initializer, bias_initializer = RandomNormal(mean = 0.0, stddev = 0.1, seed = seed_value), activation = 'relu')) model.add(Dense(units = N_classes, kernel_initializer = weights_initializer, bias_initializer = RandomNormal(mean = 0.0, stddev = 0.1, seed = seed_value), activation = 'softmax')) """ The optimization algorithm details are defined here once for all, the model is returned with these details embedded yet. Hereafter, in the actual training stage it is ready to use with the model.fit(args) method """ sgd = keras.optimizers.SGD(lr = 0.01, decay = 1e-6, momentum = 0.6, nesterov = True) model.compile(loss = 'categorical_crossentropy', optimizer = sgd, metrics = ['accuracy']) streams.check_create_directory(path_init + r'\init') model.save(path_init + r'\init' + r'\model_init.h5') return model
def model_train_multitask(path_save_model, dataset_id, split_fraction, plot, init_scheme): """ *** D E P R E C A T E D *** In the original spirit of the work, a multitask training was thought to be an interesting feature to analyse. In the full swing of the work by Kashtan and Alon ``Spontaneous evolution of modularity and network motifs'', PNAS September 27, 2005 102 (39) 13773-13778; https://doi.org/10.1073/pnas.0503610102, training was performed pericodically on both the data set, in order to observe an hypothetic topological response encoding commonalities and/or differences informations. In the multitask training, the dataset_id flag is set to 'mvg'. The two data sets are fetched and training is performed cyclically on both. The call signature and return type are the same as in the model_training function above, modulo differences in the labelling conventions """ print("\n\nMultitask training.\n\n") model = load_model(path_save_model + r'\init\model_init.h5') X_1,Y_1 = load_data(r'DataSets/TreeLev2_DS_list.pkl') X_2,Y_2 = load_data(r'DataSets/Clusters_DS_list.pkl') Xtrain_1, Xtest_1, Ytrain_1, Ytest_1 = train_test_split( X_1, Y_1, test_size = split_fraction, random_state = 20) Xtrain_2, Xtest_2, Ytrain_2, Ytest_2 = train_test_split( X_2, Y_2, test_size = split_fraction, random_state = 20) es1 = EarlyStopping(monitor='val_acc', mode='auto', patience = 30, verbose = 0) es2 = EarlyStopping(monitor='val_loss', mode='auto', patience = 20, verbose = 0) params = model.get_weights() params_pre = params streams.check_create_directory(path_save_model + r'\images') path_save_pic = path_save_figs + r'\{}\{}_'.format(init_scheme,dataset_id) if (plot): plotNet = npl.plotNet(params_pre, path_save_pic, trained = False, asGraph = False) plotNet.plotNetFunction() #end for I in range(10): model.set_weights(params) model.fit(Xtrain_1, Ytrain_1, validation_split = 0.1, epochs = 100, verbose = 0, callbacks = [es1, es2]) if (I % 2 == 0): print("\nSuperepoch: {}, goal 1\n".format(I)) print("Model evaluation on test data: loss and accuracy : ", model.evaluate(Xtest_1,Ytest_1, verbose = 2)) params = model.get_weights() model.set_weights(params) model.fit(Xtrain_2, Ytrain_2, validation_split = 0.1, epochs = 100, verbose = 0, callbacks = [es1, es2]) if (I % 2 == 0): print("\nSuperepoch: {}, goal 2".format(I)) print("Model evaluation on test data: loss and accuracy : ", model.evaluate(Xtest_2,Ytest_2, verbose = 2)) params_post = model.get_weights() params = params_post
def model_training(path_save_model, dataset_id, split_fraction, plot, init_scheme): """ Proper training stage. As usual, proper directories are checked and if not present created, in order to store the trained model file in a devoted space. Input: ~ path_save_model string, where to save the model (.h5) ~ dataset_id string, environment specifier ~ split_fraction float, the percentage/100 of held-out samples to evaluate the model with once trained ~ plot dict, containing a bool flag, specifying whether plot meaning figures or not ~ init_scheme string, specifies the initialisation scheme used Returns: ~ model This time the keras.Models.Sequential type, instantiated in the model_initialisation function, is returned with the parameters adjusted according to the task learned ~ history.history['acc'] list, contains the values of the accuracy for each training epoch, so that it is possible to plot the learning profiles, if needed Note again that since the tasks are straight-forward, the only anti-overfit measure adopted is the early stopping. """ print("\nModel Training with " + dataset_id + " data set.\n") model = load_model(path_save_model + r'\init\model_init.h5') params_pre = model.get_weights() streams.check_create_directory(path_save_model + r'\images') path_save_pic = path_save_figs + r'\{}\{}_'.format(init_scheme,dataset_id) if (plot['network']): plotNet = npl.plotNet(params_pre, path_save_pic, trained = False, asGraph = False) plotNet.plotNetFunction() #end if (dataset_id == 'tree'): X,Y = load_data(r'DataSets/TreeLev2_DS_list.pkl') elif (dataset_id == 'clus'): X,Y = load_data(r'DataSets/Clusters_DS_list.pkl') #end Xtrain, Xtest, Ytrain, Ytest = train_test_split( X, Y, test_size = split_fraction, random_state = 20) es1 = EarlyStopping(monitor='val_acc', mode='auto', patience = 30, verbose = 0) es2 = EarlyStopping(monitor='val_loss', mode='auto', patience = 20, verbose = 0) history = model.fit(Xtrain, Ytrain, validation_split = 0.1, epochs = 100, verbose = 0, callbacks = [es1,es2]) if (plot['training']): plt.figure(figsize=(10,4)) plt.subplot(1,2,1) plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('Model accuracy') plt.ylabel('Accuracy') plt.xlabel('Epoch') plt.legend(['Train', 'Validation'], loc='lower right') plt.subplot(1,2,2) plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('Model loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Validation'], loc='upper right') plt.savefig(path_save_pic + "performance.png") plt.show() #end params_post = model.get_weights() print("Model evaluation on test data: loss and accuracy\n", model.evaluate(Xtest,Ytest, verbose = 2))
from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt import seaborn as sns sns.set_style("ticks") """ For the sake of simplicity, the path to figures folder is set to a global variable. The user is free to specify whatever dicrectory is thought useful """ path_save_figs = r'../figures' streams.check_create_directory(path_save_figs) def model_initialisation(N_input, N_classes, initialiser, seed_value, path_init): """ Model initialization according to a scheme given as input Input: ~ N_input, N_classes Integers ~ initialiser string, among 'normal', 'orth', 'glorot' ~ seed_value seed for reproducibility, it specifies the directory in which to store the results ~ path_init path to store the initialised model Returns:
detail = 'mtm' #seeds = [3,5,6] seeds = [618] initialisations = ['orth','normal','glorot'] # U ['zeros'] ? #initialisations = ['orth'] #datasets = ['init','tree','clus','mvg'] datasets = ['init','tree','clus'] #datasets = ['tree'] path_in_results = os.getcwd() + r'\Results' path_save_figs = # *** absolute path where figures are wanted to be saved *** streams.check_create_directory(path_save_figs) """ The idea is to loop over: ~ seeds ~ initialisation schemes in such a way to execute the program once of all the possible configuration Apposite directories are created, if not already present, on-fly, in such a way to store the informations generated by the program execution. Images are stored directly in the directory in which the written project is kept and worked.
def spectrum_discretize(bins_edges, dataset, init_scheme, **flow): """ This is the core of the procedure: the keras model is turned to a graph, via the ``proGraphDataStructure'' module functionalities. Input: ~ bins_edges (list of floats) : see above ~ datasets (list of strings) : see above ~ init_scheme (string) : see above ~ flow (dictionary) : see above Returns: ~ edges_df (pandas.DataFrame) : contains the edges meta-informations that is the nodes the edge links, the category associated to each edge, the connection strength. The category information is used to classify edges among strongly positive or negative, mildly positive or negative, negligible. """ print('\nWeights specturm discretisation of ' + dataset + ' domain') model = load_model(flow['path_output'] + r'\{}\model_{}.h5'.format(dataset, dataset)) path_save_figures = flow['path_output'] + r'\_Figures' graph = graphds.proGraph(model) edges = graph.GetEdges() edges_df = pd.DataFrame.from_dict(edges, orient='index', columns=['edge', 'src', 'trg', 'param']) weights = np.asarray(edges_df['param']) if flow['plot']['preprocess']: spectrum_split_plot(weights, path_save_figures, dataset, bins_edges) #end edges_df = parameters_categories(edges_df, bins_edges) """ NOTE: owing to the choice of having categories ~ to remove ~ mildly positive/negative ~ positive ~ negative and owing to the fact that the histogram exported by the bins_for_scheme function are five, this modification is in order, that is ~ the category 4 contains mildly positive, the is set to 2, which ~ is the category that already contains mildy negative values ~ category 3 contains null values, and is set to 4, the category that is then removed ~ category 5 contains positive value, but now category 3 has been set to 3 and category 4 has been set to 2, then it remains to set cat 5 to 3, that of positive values category 1 and 2, resp. negative and mildly negative, remain untouched. """ edges_df.loc[edges_df['cats'] == 4, 'cats'] = 2 edges_df.loc[edges_df['cats'] == 3, 'cats'] = 4 edges_df.loc[edges_df['cats'] == 5, 'cats'] = 3 edges_df = edges_df[edges_df['cats'] != 4] df_copy = edges_df edges_df = edges_df[['src', 'trg', 'cats']] # _edges = {'df_{}'.format(dataset) : edges_df, 'values_{}'.format(dataset) : weights_values} streams.check_create_directory(flow['path_output'] + r'\{}'.format(dataset)) if flow['write_graph']: print('Writing Graph File\n') filename = flow['path_output'] + r'\{}\_{}_{}_Graph.txt'.format( dataset, dataset, flow['weighted_graph']) np.savetxt(filename, edges_df.values, fmt='%d') #end # return edges_df return df_copy
path_results, path_summary_plots, \ path_latex_tables, path_serialized_dataframes = \ streams.hierarchy(flow['seed'], **flow) flow.update({'path_splots': path_summary_plots}) for init_scheme in initializations: """ PREPROCESS: Initialization, training, graph construction _____________________________________________________________________________________________ """ path_results_initscheme = path_results + r'\{}'.format(init_scheme) path_save_figures = path_results_initscheme + r'\_Figures' streams.check_create_directory(path_results_initscheme) streams.check_create_directory(path_save_figures) flow.update({'path_figures': path_save_figures}) flow.update({'path_output': path_results_initscheme}) flow.update({'path_serialize': path_serialized_dataframes}) if flow['initialize']: train.model_init(init_scheme, **flow) #end if flow['train']: for dataset in datasets[1:]: model, accuracy = train.model_train('', dataset, init_scheme, **flow) #end
def spectrum_discretize(path_in_dir, dataset_id, plot, weighted_graph, write_file, init_scheme, bins_edges): """ This is the core of the procedure: the keras model is turned to a graph, via the ``proGraphDataStructure'' module functionalities. Input: ~ path_in_dir same as above ~ dataset_id string, as above ~ plot dict, contains bools to instruct the program flow about whether display graphics or not ~ weighted_graph char, instructs the program flow whether the graph is treated as weighted or not ~ write_file char, instructs the program flow whether to write the graph structure to file or not ~ init_scheme as above ~ bins_edges list of floats, where to place the subdivisions among categories Returns: ~ EdgesDF pandas.DataFrame, contains the edges meta-informations that is the nodes the edge links, the category associated to each edge, the connection strength. The category information is used to classify edges among strongly positive or negative, mildly positive or negative, negligible. """ print("\nWeights specturm discretisation of " + dataset_id + " domain") model = load_model(path_in_dir + r'\{}\model_{}.h5'.format(dataset_id, dataset_id)) streams.check_create_directory(path_in_dir + r'\images') path_save_pic = path_save_figs + r'\{}\{}'.format(init_scheme, dataset_id) graph = pg.proGraph(model) Edges = graph.GetEdges() EdgesDF = pd.DataFrame.from_dict(Edges, orient="index", columns=["edge", "param"]) weights = np.asarray(EdgesDF["param"]) ssp.spectrum_split_plot(weights, path_save_pic, dataset_id, bins_edges) EdgesDF = ssp.CategoriseWeightsBiases(EdgesDF, bins_edges) """ NOTE: owing to the choice of having categories ~ to remove ~ mildly positive/negative ~ positive ~ negative and owing to the fact that the histogram exported by the bins_for_scheme function are five, this modification is in order, that is ~ the category 4 contains mildly positive, the is set to 2, which ~ is the category that already contains mildy negative values ~ category 3 contains null values, and is set to 4, the category that is then removed ~ category 5 contains positive value, but now category 3 has been set to 3 and category 4 has been set to 2, then it remains to set cat 5 to 3, that of positive values category 1 and 2, resp. negative and mildly negative, remain untouched. """ EdgesDF.loc[EdgesDF['cats'] == 4, 'cats'] = 2 EdgesDF.loc[EdgesDF['cats'] == 3, 'cats'] = 4 EdgesDF.loc[EdgesDF['cats'] == 5, 'cats'] = 3 streams.check_create_directory(path_in_dir + r'\{}'.format(dataset_id)) filename = path_in_dir + r'\{}\{}_{}_Graph.txt'.format( dataset_id, dataset_id, weighted_graph) if (write_file == "Y" or write_file == "y"): print("Writing Graph File\n") AdjLists = graph.GetAdjLists() with open(filename, 'w') as f: for _, i in enumerate(AdjLists): for j in range(len(AdjLists[i])): l = AdjLists[i][j] # tmp = EdgesDF.loc[EdgesDF["edge"] == (i,l)] # we preventively EXCLUDE the elements which have category 4 # that is, weak connections if (int(EdgesDF[EdgesDF['edge'] == (i, l)]['cats']) != 4): if (weighted_graph == "u" or weighted_graph == "U"): f.write("%s %s %s" % (i - 1, l - 1, 1)) f.write("\n") else: f.write("%s %s " % (i - 1, l - 1)) """ UNCOMMENT to account for nodes values (colors) """ # f.write("%s %s %s " % (int(NodesDF.loc[i,"cats"]), # int(NodesDF.loc[l,"cats"]), # int(tmp.at[tmp.index[0],"cats"]))) cat = int(EdgesDF[EdgesDF['edge'] == (i, l)]['cats']) f.write("%s " % (cat)) f.write("\n") #end #end #end #end f.close() #end return EdgesDF #end
def model_train(path_results, dataset, init_scheme, **flow): """ Model training with Stochastic Gradient Descent. Note that the batch size is a fiddleable parameter. Input: ~ path_results (string) : main directory, from which the directory hierarchy stems ~ dataset (string) : see above ~ init_scheme (string) : see above Returns: ~ model (keras.models.Sequential) : trained model ~ history.history['acc'] : training accuracy. It is needed for the efficacy plots """ if flow['mnist']: Xtrain,Ytrain, Xtest,Ytest = load_mnist_data() batch_size = 128 else: Xtrain,Ytrain, Xtest,Ytest = load_synth_data(dataset) batch_size = 20 #end print('Model train with {} data set'.format(dataset)) if path_results == '': model = load_model(flow['path_output'] + r'\init\model_init.h5') else: model = load_model(path_results + r'\{}\init\model_init.h5'.format(init_scheme)) #end es1 = EarlyStopping(monitor='val_acc', mode='auto', patience = 30, verbose = 0) es2 = EarlyStopping(monitor='val_loss', mode='auto', patience = 20, verbose = 0) history = model.fit(Xtrain, Ytrain, batch_size = batch_size, validation_split = 0.1, epochs = 20, verbose = 0, callbacks = [es1,es2]) path_save_model = flow['path_output'] + r'\{}'.format(dataset) streams.check_create_directory(path_save_model) score = model.evaluate(Xtest,Ytest, verbose = 2) scores_log = 'Test loss and accuracy : {:.6f} ; {:.6f} '.format(score[0], score[1]) with open(path_save_model + r'\train_log.txt','w') as f: f.write('Training log\n') f.write(scores_log) print('Saved on log:\n' + scores_log) #end f.close() if flow['plot']['train']: plt.figure(figsize=(10,4)) plt.subplot(1,2,1) plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('Model accuracy') plt.ylabel('Accuracy') plt.xlabel('Epoch') plt.legend(['Train', 'Validation'], loc='lower right') plt.subplot(1,2,2) plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('Model loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Validation'], loc='upper right') plt.savefig(flow['path_figures'] + r'\{}_{}_performance.png'.format(init_scheme, dataset), dpi=300, bbox_inches = 'tight') plt.show() #end if flow['save_model']: model.save(path_save_model + r'\model_{}.h5'.format(dataset)) #end return model,history.history['acc']