Esempio n. 1
0
def mc_dropout_uq(predictor,
                  descriptors=False,
                  descriptor_names=False,
                  num=500):
    labels = load_test_labels(predictor)
    if not 'clf' in predictor:
        labels = np.array(labels, dtype='float64')
    else:
        labels = np.array(labels, dtype='int')
    train_mean_x, train_mean_y, train_var_x, train_var_y = load_normalization_data(
        predictor)
    if (descriptors and descriptor_names):
        excitation = tf_ANN_excitation_prepare(predictor, descriptors,
                                               descriptor_names)
        excitation = data_normalize(excitation, train_mean_x, train_var_x)
    else:
        mat = load_test_data(predictor)
        mat = np.array(mat, dtype='float64')
        train_mat = data_normalize(mat, train_mean_x, train_var_x)
        excitation = np.array(train_mat)
    print(('excitation is ' + str(excitation.shape)))
    loaded_model = load_keras_ann(predictor)
    get_outputs = K.function(
        [loaded_model.layers[0].input,
         K.learning_phase()], [loaded_model.layers[-1].output])
    print(('LOADED MODEL HAS ' + str(len(loaded_model.layers)) +
           ' layers, so latent space measure will be from first ' +
           str(len(loaded_model.layers) - 1) + ' layers'))
    results_list = []
    err_list = []
    for ii in range(num):
        if not np.mod(ii, int(num / 10)):
            print(('%d / %d' % (ii, num)))
        if not 'clf' in predictor:
            results = data_rescale(np.array(get_outputs([excitation, 1])),
                                   train_mean_y, train_var_y)[0]
        else:
            results = np.array(get_outputs([excitation, 1]))[0]
        results = results.squeeze(axis=1)
        err = np.linalg.norm(labels - results)**2
        results_list.append(results)
        err_list.append(err)
    results_list = np.transpose(np.array(results_list))
    f = lambda tau: mc_dropout_logp(tau, np.array(err_list))
    tau = sp.optimize.minimize(f, 10).x
    result_mean, result_std = np.mean(results_list,
                                      axis=1), np.std(results_list, axis=1)
    result_std = np.sqrt(1 / tau + result_std**2)
    # print(tau, result_std[:3])
    labels = np.squeeze(labels, axis=1)
    error_for_mean = np.abs(labels - result_mean)
    return result_mean, result_std, error_for_mean
Esempio n. 2
0
def ensemble_maker(predictor, num=10):
    train_mean_x, train_mean_y, train_var_x, train_var_y = load_normalization_data(
        predictor)
    mat = load_training_data(predictor)
    mat = np.array(mat, dtype='float64')
    train_mat = data_normalize(mat, train_mean_x, train_var_x)
    labels = load_training_labels(predictor)
    if not "clf" in predictor:
        labels = np.array(labels, dtype='float64')
        labels = data_normalize(labels, train_mean_y, train_var_y)
    info_dict = load_train_info(predictor)
    model_list = ensemble_maker_inner(
        train_mat=train_mat,
        labels=labels,
        model_gen_function=lambda: load_keras_ann(predictor),
        info_dict=info_dict,
        num=num)
    for ii, current_model in enumerate(model_list):
        save_model(current_model, predictor, ii)
Esempio n. 3
0
def sklearn_supervisor(predictor, descriptors, descriptor_names, debug=False):
    print('scikitlearn models activated for ' + str(predictor))
    excitation = tf_ANN_excitation_prepare(predictor, descriptors,
                                           descriptor_names)
    if debug:
        print('excitation is ' + str(excitation.shape))
        print('fetching non-dimensionalization data... ')
    train_mean_x, train_mean_y, train_var_x, train_var_y = load_normalization_data(
        predictor)
    if debug:
        print('rescaling input excitation...')
    excitation = data_normalize(excitation, train_mean_x, train_var_x)
    loaded_model = load_sklearn_model(predictor)
    if not "clf" in predictor:
        result = data_rescale(loaded_model.predict(excitation), train_mean_y,
                              train_var_y)
    else:
        result = loaded_model.predict_proba(excitation)
        result = np.array([[1 - x[0]] if x[0] >= x[1] else [x[1]]
                           for x in result])
    model_uncertainty = [-1]
    return result, model_uncertainty
Esempio n. 4
0
def latent_space_uq(predictor,
                    layer_index=-2,
                    descriptors=False,
                    descriptor_names=False,
                    entropy=False):
    key = get_key(predictor, suffix=False)
    base_path = resource_filename(Requirement.parse("molSimplify"),
                                  "molSimplify/tf_nn/" + key)
    base_path = base_path + 'ensemble_models'
    if not os.path.exists(base_path):
        print('Ensemble models do not exist now, training...')
        ensemble_maker(predictor)
    print(('ANN activated for ' + str(predictor)))
    model_list = glob.glob(base_path + '/*.h5')
    train_mean_x, train_mean_y, train_var_x, train_var_y = load_normalization_data(
        predictor)
    if (descriptors and descriptor_names):
        excitation = tf_ANN_excitation_prepare(predictor, descriptors,
                                               descriptor_names)
        excitation = data_normalize(excitation, train_mean_x, train_var_x)
    else:
        mat = load_training_data(predictor)
        mat = np.array(mat, dtype='float64')
        train_mat = data_normalize(mat, train_mean_x, train_var_x)
        excitation = np.array(train_mat)
    ### load test data
    if (descriptors and descriptor_names):
        excitation = tf_ANN_excitation_prepare(predictor, descriptors,
                                               descriptor_names)
        excitation_test = data_normalize(excitation, train_mean_x, train_var_x)
    else:
        mat = load_test_data(predictor)
        mat = np.array(mat, dtype='float64')
        test_mat = data_normalize(mat, train_mean_x, train_var_x)
        excitation_test = np.array(test_mat)
    labels = load_test_labels(predictor)
    labels_train = load_training_labels(predictor)
    if not 'clf' in predictor:
        labels = np.array(labels, dtype='float64')
        labels_train = np.array(labels_train, dtype='float64')
    else:
        labels = np.array(labels, dtype='int')
        labels_train = np.array(labels_train, dtype='int')
    results_list = []
    err_list = []
    dist_list = []
    for model in model_list:
        _base = model.split('.')[0]
        json_file = open(_base + '.json', 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        loaded_model = model_from_json(loaded_model_json)
        loaded_model.load_weights(model)
        loaded_model.compile(loss="mse",
                             optimizer='adam',
                             metrics=['mse', 'mae', 'mape'])
        get_outputs = K.function(
            [loaded_model.layers[0].input,
             K.learning_phase()], [loaded_model.layers[-1].output])
        get_latent = K.function(
            [loaded_model.layers[0].input,
             K.learning_phase()], [loaded_model.layers[layer_index].output])
        print(
            ('NOTE: you are choosing:', loaded_model.layers[layer_index],
             loaded_model.layers[layer_index].name, 'for the latence space!'))
        if not 'clf' in predictor:
            results = data_rescale(np.array(get_outputs([excitation_test, 0])),
                                   train_mean_y, train_var_y)[0]
        else:
            results = np.array(get_outputs([excitation_test, 0]))[0]
        ## get latent dist
        training_latent_distance = np.array(get_latent([excitation, 0]))[0]
        nn_latent_dist_train, _, __ = dist_neighbor(training_latent_distance,
                                                    training_latent_distance,
                                                    labels_train,
                                                    l=5,
                                                    dist_ref=1,
                                                    just_nn=True)
        nn_dist_avrg_train = np.mean(nn_latent_dist_train)
        # print(nn_dist_avrg_train)
        test_latent_distance = np.array(get_latent([excitation_test, 0]))[0]
        nn_latent_dist_test, nn_dists, nn_labels = dist_neighbor(
            test_latent_distance,
            training_latent_distance,
            labels_train,
            l=5,
            dist_ref=nn_dist_avrg_train,
            just_nn=False)
        if not entropy:
            # print(nn_latent_dist_test.shape)
            # print(min(nn_latent_dist_test), max(nn_latent_dist_test))
            dist_list.append(nn_latent_dist_test)
        else:
            entropy = []
            for idx, _dists in enumerate(nn_dists):
                entropy.append(get_entropy(_dists, nn_labels[idx]))
            dist_list.append(np.array(entropy))
        results = results.squeeze(axis=1)
        err = np.linalg.norm(labels - results)**2
        results_list.append(results)
        err_list.append(err)
    dist_list = np.transpose(np.array(dist_list))
    results_list = np.transpose(np.array(results_list))
    result_mean, result_std = np.mean(results_list,
                                      axis=1), np.std(results_list, axis=1)
    latent_dist = np.mean(dist_list, axis=1)
    labels = np.squeeze(labels, axis=1)
    error_for_mean = np.abs(labels - result_mean)
    return result_mean, latent_dist, error_for_mean
Esempio n. 5
0
def ensemble_uq(predictor,
                descriptors=False,
                descriptor_names=False,
                suffix=False):
    key = get_key(predictor, suffix)
    base_path = resource_filename(Requirement.parse("molSimplify"),
                                  "molSimplify/tf_nn/" + key)
    base_path = base_path + 'ensemble_models'
    if not os.path.exists(base_path):
        print('Ensemble models do not exist now, training...')
        ensemble_maker(predictor)
    print(('ANN activated for ' + str(predictor)))
    model_list = glob.glob(base_path + '/*.h5')

    labels = load_test_labels(predictor)
    if not 'clf' in predictor:
        labels = np.array(labels, dtype='float64')
    else:
        labels = np.array(labels, dtype='int')
    train_mean_x, train_mean_y, train_var_x, train_var_y = load_normalization_data(
        predictor)
    if (descriptors and descriptor_names):
        excitation = tf_ANN_excitation_prepare(predictor, descriptors,
                                               descriptor_names)
        excitation = data_normalize(excitation, train_mean_x, train_var_x)
    else:
        mat = load_test_data(predictor)
        mat = np.array(mat, dtype='float64')
        train_mat = data_normalize(mat, train_mean_x, train_var_x)
        excitation = np.array(train_mat)
    print(('excitation is ' + str(excitation.shape)))
    print(('actual label:', labels[:3]))
    results_list = []
    # print('models', model_list)
    for idx, model in enumerate(model_list):
        _base = model.split('.')[0]
        json_file = open(_base + '.json', 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        loaded_model = model_from_json(loaded_model_json)
        # load weights into  model
        loaded_model.load_weights(model)
        # complile model
        loaded_model.compile(loss="mse",
                             optimizer='adam',
                             metrics=['mse', 'mae', 'mape'])
        if not 'clf' in predictor:
            result = data_rescale(loaded_model.predict(excitation),
                                  train_mean_y, train_var_y)
        else:
            result = loaded_model.predict(excitation)
            result = np.squeeze(result, axis=1)
        results_list.append(result)
    results_list = np.transpose(np.array(results_list))
    # print(results_list.shape)
    result_mean, result_std = np.mean(results_list,
                                      axis=1), np.std(results_list, axis=1)
    labels = np.squeeze(labels, axis=1)
    print((labels.shape, result_mean.shape))
    error_for_mean = np.abs(labels - result_mean)
    return result_mean, result_std, error_for_mean