import sys from keras.models import load_model sys.setrecursionlimit(10000) num_networks = 1 train_ratio = 0.95 train_file = 'feature_train_so2.pickle' with open('feature_train_so2.pickle', 'rb') as f: X, y = pickle.load(f) num_records = len(y) models = [] for i in range(num_networks): print("Fitting NN_with_EntityEmbedding...") models.append(NN_with_EntityEmbedding(train_ratio, train_file)) #with open('models.pickle', 'wb') as f: # pickle.dump(models, f, -1) def evaluate_models(models, num_records): model0 = models[0] train_size = train_ratio * num_records total_sqe = 0 num_real_test = 0 if model0.train_ratio == 1: return 0 for i in range(model0.train_size, num_records): record = X[i] aqi = y[i]
from keras.layers.embeddings import Embedding from keras.models import model_from_json from lime_tabular import LimeTabularExplainer from models import NN_with_EntityEmbedding filtered_tasks_ids = [3, 20, 24, 41, 45, 49, 3492, 3493, 3494, 3560, 34537, 34539, 146195] taskid = filtered_tasks_ids[0] X, X_train, X_test, y_train_str, y_test_str, y_train_int, y_test_int, feature_names, class_names, categorical_names, categorical_features = get_data(taskid, random_state=0) nn_with_embedding = NN_with_EntityEmbedding(X_train, y_train_int, categorical_features, categorical_names, class_names, epochs=1, batch_size=128, ) model = nn_with_embedding.model with open('logs/nn_with_embedding_model_architecture.json', 'r') as f: model = model_from_json(f.read()) model.load_weights('logs/nn_with_embedding_model_weights.h5') layer_name = 'fully-connected' previous_layer_name = 'embedding' intermediate_layer_model = KerasModel(inputs=model.input, outputs=model.get_layer(previous_layer_name).output) intermediate_output = intermediate_layer_model.predict(preprocessing(X_test))
print("++++++++++++++++++++++++++++++++++++++++++++++++") print("++++++++++ tasks: " + str(j + 1) + "/" + str(len(filtered_tasks_ids)) + "+++") print("++++++++++++++++++++++++++++++++++++++++++++++++") # within different runs, split traning/testing sets with different # random_state. taskid = filtered_tasks_ids[j] X, X_train, X_test, y_train_str, y_test_str, y_train_int, y_test_int, feature_names, class_names, categorical_names, categorical_features = get_data( taskid, random_state=run_id) # nn with embedding related nn_with_embedding = NN_with_EntityEmbedding( X_train, y_train_int, categorical_features, categorical_names, class_names, epochs=epochs, batch_size=batch_size, ) nn_with_embedding_loss, nn_with_embedding_score = nn_with_embedding.evaluate( X_test, y_test_int) print("nn_with_embedding prediction score: ", str(nn_with_embedding_score)) logger.log('nn_with_embedding', taskid, run_id, nn_with_embedding_score) # nn related nn = NN( X_train, y_train_int, categorical_features, categorical_names,
em_size = calc_embedding_size(df_train, features_em, N_em) print("embedding size:") print(em_size) # features and targets for training target = ['Sales'] # features used features = ['Store', 'DayOfWeek', 'Promo', 'Year', 'Month', 'Day', 'State'] val_ratio = 0.1 X_train, X_val, y_train, y_val = get_train_val(df_train, features, target, val_ratio) start = time() # get starting time print("Fitting NN_with_EntityEmbedding...") epochs = 10 model_nne = NN_with_EntityEmbedding(X_train, y_train, X_val, y_val, epochs, features, features_em, em_size) end = time() # get ending time train_time = (end - start) / 60 # time used, minutes val_err = model_nne.evaluate(X_val, y_val) print("validation error MAPE: {:.4f}".format(val_err)) print("training time: {:.4f} minutes".format(train_time)) results = {} results['feature_dimension'] = X_train.shape[1] results['train_time'] = train_time results['val_error'] = val_err rs = pd.Series(results, name="NN_with_embedding") # save performance results res_dir = 'results/' res_path = os.path.join(res_dir, "res_NN_with_embedding.csv")