예제 #1
0
import sys
from keras.models import load_model
sys.setrecursionlimit(10000)

num_networks = 1
train_ratio = 0.95
train_file = 'feature_train_so2.pickle'

with open('feature_train_so2.pickle', 'rb') as f:
    X, y = pickle.load(f)
    num_records = len(y)

models = []
for i in range(num_networks):
    print("Fitting NN_with_EntityEmbedding...")
    models.append(NN_with_EntityEmbedding(train_ratio, train_file))

#with open('models.pickle', 'wb') as f:
#    pickle.dump(models, f, -1)


def evaluate_models(models, num_records):
    model0 = models[0]
    train_size = train_ratio * num_records
    total_sqe = 0
    num_real_test = 0
    if model0.train_ratio == 1:
        return 0
    for i in range(model0.train_size, num_records):
        record = X[i]
        aqi = y[i]
예제 #2
0
from keras.layers.embeddings import Embedding
from keras.models import model_from_json


from lime_tabular import LimeTabularExplainer
from models import NN_with_EntityEmbedding


filtered_tasks_ids = [3, 20, 24, 41, 45, 49, 3492, 3493, 3494, 3560, 34537, 34539, 146195]
taskid = filtered_tasks_ids[0]
X, X_train, X_test, y_train_str, y_test_str, y_train_int, y_test_int, feature_names, class_names, categorical_names, categorical_features = get_data(taskid, random_state=0)

nn_with_embedding = NN_with_EntityEmbedding(X_train, y_train_int,
                                            categorical_features,
                                            categorical_names,
                                            class_names,
                                            epochs=1,
                                            batch_size=128,
)

model = nn_with_embedding.model
with open('logs/nn_with_embedding_model_architecture.json', 'r') as f:
    model = model_from_json(f.read())
model.load_weights('logs/nn_with_embedding_model_weights.h5')

layer_name = 'fully-connected'

previous_layer_name = 'embedding'
intermediate_layer_model = KerasModel(inputs=model.input,
                                      outputs=model.get_layer(previous_layer_name).output)
intermediate_output = intermediate_layer_model.predict(preprocessing(X_test))
예제 #3
0
    print("++++++++++++++++++++++++++++++++++++++++++++++++")
    print("++++++++++ tasks: " + str(j + 1) + "/" +
          str(len(filtered_tasks_ids)) + "+++")
    print("++++++++++++++++++++++++++++++++++++++++++++++++")
    # within different runs, split traning/testing sets with different
    # random_state.
    taskid = filtered_tasks_ids[j]
    X, X_train, X_test, y_train_str, y_test_str, y_train_int, y_test_int, feature_names, class_names, categorical_names, categorical_features = get_data(
        taskid, random_state=run_id)

    # nn with embedding related
    nn_with_embedding = NN_with_EntityEmbedding(
        X_train,
        y_train_int,
        categorical_features,
        categorical_names,
        class_names,
        epochs=epochs,
        batch_size=batch_size,
    )
    nn_with_embedding_loss, nn_with_embedding_score = nn_with_embedding.evaluate(
        X_test, y_test_int)
    print("nn_with_embedding prediction score: ", str(nn_with_embedding_score))
    logger.log('nn_with_embedding', taskid, run_id, nn_with_embedding_score)

    # nn related
    nn = NN(
        X_train,
        y_train_int,
        categorical_features,
        categorical_names,
예제 #4
0
em_size = calc_embedding_size(df_train, features_em, N_em)
print("embedding size:")
print(em_size)

# features and targets for training
target = ['Sales']
# features used
features = ['Store', 'DayOfWeek', 'Promo', 'Year', 'Month', 'Day', 'State']
val_ratio = 0.1
X_train, X_val, y_train, y_val = get_train_val(df_train, features, target,
                                               val_ratio)

start = time()  # get starting time
print("Fitting NN_with_EntityEmbedding...")
epochs = 10
model_nne = NN_with_EntityEmbedding(X_train, y_train, X_val, y_val, epochs,
                                    features, features_em, em_size)
end = time()  # get ending time
train_time = (end - start) / 60  # time used, minutes
val_err = model_nne.evaluate(X_val, y_val)
print("validation error MAPE: {:.4f}".format(val_err))
print("training time: {:.4f} minutes".format(train_time))

results = {}
results['feature_dimension'] = X_train.shape[1]
results['train_time'] = train_time
results['val_error'] = val_err
rs = pd.Series(results, name="NN_with_embedding")

# save performance results
res_dir = 'results/'
res_path = os.path.join(res_dir, "res_NN_with_embedding.csv")