Esempio n. 1
0
def evaluate_model(model, args, data, training_report):
    # Get performance on each metric for each split
    if args.checkpoints:
        # if checkpointing was used, then make sure we use the 'best'
        # model for evaluation
        model.load_weights(join(training_report['cfg_folder'], 'model_weights.h5'))
    for split in data.splits.keys():
        if args.siamese:
            X = data.splits[split]['siam_X']
            y = data.splits[split]['siam_y']
        else:
            if args.nn == "DAE":
                X, y = data.get_data_for_neural_net_unsupervised(split, args.noise_level)
            else:
                X, y = data.get_data_for_neural_net(split, one_hot=not args.triplet)
        if args.triplet:
            # TODO: remove copy/pasted code
            embedding_dim = model.layers[-1].output_shape[1]
            bh_P = args.batch_hard_P
            bh_K = args.batch_hard_K
            num_batches = args.num_batches_val
            eval_data = triplet.TripletSequence(
                X, y, embedding_dim, bh_P, bh_K, num_batches)
            eval_results = model.evaluate_generator(eval_data, verbose=1)
        else:
            eval_results = model.evaluate(x=X, y=y)
        try:  # Ensure eval_results is iterable
            _ = (i for i in eval_results)
        except TypeError:
            eval_results = [eval_results]
        for metric, res in zip(model.metrics_names, eval_results):
            training_report['res_{}_{}'.format(split, metric)] = res
            print('{}\t{}\t{}'.format(split, metric, res))
    # Additionally, test retrieval performance with valid and test splits as
    # queries
    print("Conducting retrieval testing...")
    database = data.get_expression_mat(split='train')
    if args.nn == "DAE":
        sample_in = Input(shape=model.layers[0].input_shape[1:],
                          name='sample_input')
        embedded = Lambda(lambda x: model.layers[1].encode(x),
                          output_shape=(training_report['cfg_DIMS'],),
                          name='encoder')(sample_in)
        embedded._uses_learning_phase = True
        embedder = Model(sample_in, embedded)
    else:
        reducing_model = model
        if args.siamese:
            reducing_model = model.layers[2]
            last_hidden_layer = reducing_model.layers[-1]
        elif args.triplet:
            last_hidden_layer = reducing_model.layers[-1]
        else:
            last_hidden_layer = reducing_model.layers[-2]
        embedder = Model(inputs=reducing_model.layers[0].input, outputs=last_hidden_layer.output)
    database = embedder.predict(database)
    database_labels = data.get_labels('train')
    for split in ['valid', 'test']:
        query = data.get_expression_mat(split)
        query = embedder.predict(query)
        query_labels = data.get_labels(split)
        avg_map, wt_avg_map, avg_mafp, wt_avg_mafp = retrieval_test_in_memory(
            database, database_labels, query, query_labels)
        training_report['res_{}_avg_map'.format(split)] = avg_map
        training_report['res_{}_wt_avg_map'.format(split)] = wt_avg_map
        training_report['res_{}_avg_mafp'.format(split)] = avg_mafp
        training_report['res_{}_wt_avg_mafp'.format(split)] = wt_avg_mafp
        print("{}\tAvg MAP\t{}".format(split, avg_map))
        print("{}\tWt Avg MAP\t{}".format(split, wt_avg_map))
        print("{}\tAvg MAFP\t{}".format(split, avg_mafp))
        print("{}\tWt Avg MAFP\t{}".format(split, wt_avg_mafp))
                          dropout=0.20,
                          use_bias=True)(inputs)

model = Model(inputs=inputs, outputs=x)
print(model.summary())

model.compile(loss='mean_squared_error', optimizer='sgd')
model.fit(X_corrupt, X, epochs=10)

# Use it to embed some data
print(model.layers[2].shape)
sample_in = Input(shape=model.layers[0].input_shape[1:], name='sample_input')
embedded = Lambda(lambda x: model.layers[1].encode(x),
                  output_shape=(20, ),
                  name='encode')(sample_in)
embedded._uses_learning_phase = True  # Dropout ops use learning phase
embedder = Model(sample_in, embedded)
print(embedder.summary())

X_transformed = embedder.predict(X)
print(X_transformed)
print("X_encoded shape: {}".format(X_transformed.shape))
assert (X_transformed.shape[1] == 20)

model.save('dense_autoencoder_test.h5')

del model

model = load_model(
    'dense_autoencoder_test.h5',
    custom_objects={'DenseLayerAutoencoder': DenseLayerAutoencoder})