Ejemplo n.º 1
0
def load_train_data(start, stop):
    cwd = Path(os.path.abspath(os.path.dirname(__file__)))
    data = cwd / 'data'
    hdf5_dir = (data / 'hdf5_data').as_posix()
    train_hdf5_dir = os.path.join(hdf5_dir, 'train')
    files = provider.getDataFiles(os.path.join(train_hdf5_dir,
                                               'all_files.txt'))

    data_batch_list = []
    label_batch_list = []

    count = 0
    for h5_filename in files[start:stop]:
        data_batch, label_batch = provider.load_h5(h5_filename)
        #       data_batch = provider.jitter_point_cloud(data_batch)
        print(f'h5_filename = {h5_filename}')
        print(f'data_batch.shape = {data_batch.shape}')
        count += data_batch.shape[0]
        data_batch_list.append(data_batch)
        label_batch_list.append(label_batch)

    data_batches = np.concatenate(data_batch_list, 0)
    label_batches = np.concatenate(label_batch_list, 0)
    print(f'data_batches.shape = {data_batches.shape}')
    print(f'label_batches.shape = {label_batches.shape}')

    print(f'count = {count}')
    train_idxs = list(range(0, count))

    train_data = data_batches[train_idxs, ...]
    train_label = label_batches[train_idxs]
    print(
        f'train_data.shape, train_label.shape = {train_data.shape}, {train_label.shape}'
    )
    return train_data, train_label
Ejemplo n.º 2
0
def load_test_data():
    """Load test data."""
    cwd = Path(os.path.abspath(os.path.dirname(__file__)))
    data = cwd / 'data'
    hdf5_dir = (data / 'hdf5_data').as_posix()
    test_hdf5_dir = os.path.join(hdf5_dir, 'test')
    test_files = provider.getDataFiles(
        os.path.join(test_hdf5_dir, 'all_files.txt'))

    data_batch_list = []
    label_batch_list = []
    count = 0
    for h5_filename in test_files:
        data_batch, label_batch = provider.load_h5(h5_filename)
        #       data_batch = provider.jitter_point_cloud(data_batch)
        print(f'h5_filename = {h5_filename}')
        print(f'data_batch.shape = {data_batch.shape}')
        count += data_batch.shape[0]
        data_batch_list.append(data_batch)
        label_batch_list.append(label_batch)

    data_batches = np.concatenate(data_batch_list, 0)
    label_batches = np.concatenate(label_batch_list, 0)
    print(f'data_batches.shape = {data_batches.shape}')
    print(f'label_batches.shape = {label_batches.shape}')

    test_idxs = list(range(0, count))
    print(f'len(test_idxs) = {len(test_idxs)}')

    test_data = data_batches[test_idxs, ...]
    test_label = label_batches[test_idxs]
    print(
        f'test_data.shape, test_label.shape = {test_data.shape}, {test_label.shape}'
    )
    return test_data, test_label
Ejemplo n.º 3
0
def train_one_epoch(sess, ops, train_writer):
    """ ops: dict mapping from string to tf ops """
    is_training = True

    # Shuffle train samples
    train_idxs = np.arange(0, len(TRAIN_FILES))
    np.random.shuffle(train_idxs)

    total_correct = total_seen = total_sig = loss_sum = 0

    for fn in range(len(TRAIN_FILES)):
        #log_string('----' + str(fn) + '-----')
        current_file = os.path.join(H5_DIR, TRAIN_FILES[train_idxs[fn]])
        current_data, current_label, global_pl = provider.load_h5(current_file,
                                                                  'seg',
                                                                  glob=True)
        #print (current_data, current_label)
        current_data, current_label, current_global, _ = provider.shuffle_data(
            current_data, np.squeeze(current_label), global_pl=global_pl)
        current_label = np.squeeze(current_label)

        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE

        #log_string(str(datetime.now()))

        for batch_idx in range(num_batches):
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx + 1) * BATCH_SIZE
            batch_data, batch_label, batch_global = get_batch(
                current_data, current_label, current_global, start_idx,
                end_idx)

            #print(batch_weight)
            feed_dict = {
                ops['pointclouds_pl']: batch_data,
                ops['labels_pl']: batch_label,
                ops['is_training_pl']: is_training,
                ops['global_pl']: batch_global,
            }
            summary, step, _, loss_val, pred_val, coefs = sess.run(
                [
                    ops['merged'], ops['step'], ops['train_op'], ops['loss'],
                    ops['pred'], ops['coefs']
                ],
                #ops['coefs2']],
                feed_dict=feed_dict)

            train_writer.add_summary(summary, step)
            pred_val = np.argmax(pred_val, 2)
            total_sig += np.sum(batch_label)
            correct = np.sum((pred_val == batch_label))
            total_correct += correct
            total_seen += BATCH_SIZE * NUM_POINT
            loss_sum += np.mean(loss_val)

    log_string('mean loss: %f' % (loss_sum / float(num_batches)))
    log_string('accuracy: %f' % (total_correct / float(total_seen)))
Ejemplo n.º 4
0
 def __init__(self,
              root,
              batch_size=16,
              npoints=1024,
              split='train',
              shuffle=True,
              augment=True):
     '''
     root: file path of data
     batch size:
     n_points: number of points
     split: 'train' or 'test'
     shuffle: if true, shuffle the dataset
     augment: if true, do data augmentation
     '''
     self.root = root
     self.batch_size = batch_size
     self.npoints = npoints
     self.split = split
     self.augment = augment
     if split is None:
         if split == 'train': self.shuffle = True
         else: self.shuffle == False
     else:
         self.shuffle = shuffle
     # load category names
     self.catfile = os.path.join(self.root, 'shape_names.txt')
     self.cat = [line.rstrip() for line in open(self.catfile)]
     shape_ids = {}
     # load data path
     shape_ids['train'] = [
         line.rstrip()
         for line in open(os.path.join(self.root, 'train_files.txt'))
     ]
     shape_ids['test'] = [
         line.rstrip()
         for line in open(os.path.join(self.root, 'test_files.txt'))
     ]
     # it's a small dataset, load all data in memory
     datas = [provider.load_h5(x)[0] for x in shape_ids[split]]
     labels = [provider.load_h5(x)[1] for x in shape_ids[split]]
     self.datas = np.concatenate(datas,
                                 axis=0)  # concatenate list to numpy array
     self.labels = np.concatenate(labels, axis=0)
     self.on_epoch_end()
Ejemplo n.º 5
0
def train_one_epoch(sess, ops, train_writer):
    """ ops: dict mapping from string to tf ops """
    is_training = True

    loss_sum = 0

    current_data_pl, current_label = provider.load_h5(TRAIN_FILE, 'class')
    #,nevts=5e5
    if multi:
        current_label = np.argmax(current_label, axis=-1)
    current_data_pl, current_label, _ = provider.shuffle_data(
        current_data_pl, np.squeeze(current_label))

    file_size = current_data_pl.shape[0]
    num_batches = file_size // BATCH_SIZE
    #num_batches = 4
    log_string(str(datetime.now()))
    for batch_idx in range(num_batches):

        start_idx = batch_idx * (BATCH_SIZE)
        end_idx = (batch_idx + 1) * (BATCH_SIZE)
        batch_data_pl, batch_label = get_batch(current_data_pl, current_label,
                                               start_idx, end_idx)
        mask_padded = batch_data_pl[:, :, 2] == 0

        feed_dict = {
            ops['pointclouds_pl']: batch_data_pl,
            ops['labels_pl']: batch_label,
            ops['mask_pl']: mask_padded.astype(float),
            ops['is_training']: is_training,
        }

        train_op = 'train_op'
        attention = 'attention'
        loss = 'loss'
        summary, step, _, loss, attention = sess.run([
            ops['merged'], ops['step'], ops['train_op'], ops['loss'],
            ops['attention']
        ],
                                                     feed_dict=feed_dict)

        #print(attention)
        train_writer.add_summary(summary, step)
        loss_sum += np.mean(loss)

    log_string('mean loss: %f' % (loss_sum / float(num_batches)))
Ejemplo n.º 6
0
def eval_one_epoch(sess, ops):
    is_training = False
    y_pred = []

    current_data_pl, current_label = provider.load_h5(EVALUATE_FILE, 'class')
    if multi:
        current_label = np.argmax(current_label, axis=-1)
    file_size = current_data_pl.shape[0]
    num_batches = file_size // BATCH_SIZE
    #num_batches = 4
    for batch_idx in range(num_batches):
        start_idx = batch_idx * (BATCH_SIZE)
        end_idx = (batch_idx + 1) * (BATCH_SIZE)
        batch_data_pl, batch_label = get_batch(current_data_pl, current_label,
                                               start_idx, end_idx)
        mask_padded = batch_data_pl[:, :, 2] == 0

        feed_dict = {
            ops['pointclouds_pl']: batch_data_pl,
            ops['labels_pl']: batch_label,
            ops['is_training']: is_training,
            ops['mask_pl']: mask_padded.astype(float),
        }

        atts1, atts2, atts3, pred = sess.run(
            [ops['atts1'], ops['atts2'], ops['atts3'], ops['pred']],
            feed_dict=feed_dict)
        if len(y_pred) == 0:
            y_pred = np.squeeze(pred)
        else:
            y_pred = np.concatenate((y_pred, pred), axis=0)

    with h5py.File(os.path.join(H5_OUT, '{0}.h5'.format(FLAGS.name)),
                   "w") as fh5:
        dset = fh5.create_dataset("DNN", data=y_pred)
        dset = fh5.create_dataset("pid",
                                  data=current_label[:num_batches *
                                                     (BATCH_SIZE)])
Ejemplo n.º 7
0
def eval_one_epoch(sess, ops):
    is_training = False

    total_correct = 0
    total_seen = 0
    loss_sum = 0
    ncorr = 0
    eval_idxs = np.arange(0, len(EVALUATE_FILES))
    y_val = []

    for fn in range(len(EVALUATE_FILES)):
        current_file = os.path.join(H5_DIR, EVALUATE_FILES[eval_idxs[fn]])
        current_data, current_label, current_global = provider.load_h5(
            current_file, 'class', glob=True)
        current_label = np.squeeze(current_label)

        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE

        for batch_idx in range(num_batches):
            scores = np.zeros(NUM_POINT)
            true = np.zeros(NUM_POINT)
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx + 1) * BATCH_SIZE

            batch_data, batch_label, batch_global = get_batch(
                current_data, current_label, current_global, start_idx,
                end_idx)

            cur_batch_size = end_idx - start_idx

            feed_dict = {
                ops['pointclouds_pl']: batch_data,
                ops['labels_pl']: batch_label,
                ops['is_training_pl']: is_training,
                ops['global_pl']: batch_global,
            }
            #,beforemax
            loss, pred, coefs, coefs2 = sess.run(
                [ops['loss'], ops['pred'], ops['coefs'], ops['coefs2']],
                feed_dict=feed_dict)

            pred_val = np.argmax(pred, 1)
            correct = np.sum(pred_val == batch_label)

            total_correct += correct
            total_seen += (BATCH_SIZE)
            loss_sum += np.mean(loss)
            idx_batch = 0
            if len(y_val) == 0:
                y_val = batch_label
                y_coef1 = np.squeeze(np.max(coefs, -1))
                y_coef2 = np.squeeze(np.max(coefs2, -1))
                y_data = batch_data[:, :, :3]
                y_sc = pred[:, 1]
            else:
                y_val = np.concatenate((y_val, batch_label), axis=0)
                y_coef1 = np.concatenate(
                    (y_coef1, np.squeeze(np.max(coefs, -1))), axis=0)
                y_coef2 = np.concatenate(
                    (y_coef2, np.squeeze(np.max(coefs2, -1))), axis=0)
                y_data = np.concatenate((y_data, batch_data[:, :, :3]), axis=0)
                y_sc = np.concatenate((y_sc, pred[:, 1]), axis=0)

    pos_label = 1
    fpr, tpr, thresholds = metrics.roc_curve(y_val, y_sc, pos_label=pos_label)

    print('AUC: ', metrics.roc_auc_score(y_val, y_sc))

    signal = y_sc[y_val == 1]
    background = y_sc[y_val == 0]
    n, bins, patches = plt.hist([signal, background],
                                50,
                                color=['m', 'g'],
                                alpha=0.75,
                                range=(0, 1),
                                label=['Signal', 'Background'],
                                histtype='stepfilled')
    plt.grid(True)
    plt.savefig("{0}/output_{1}.pdf".format(FLAGS.plot_path, FLAGS.name),
                dpi=150)
    print('Saving DNN output histograms at: ',
          "{0}/output_{1}.pdf".format(FLAGS.plot_path, FLAGS.name))

    fig, base = plt.subplots(dpi=150)
    p = base.semilogy(tpr, 1.0 / fpr, color='m')
    bineff30 = np.argmax(tpr > 0.3)
    bineff50 = np.argmax(tpr > 0.5)
    print('1/effB at {0} effS: '.format(tpr[bineff30]), 1.0 / fpr[bineff30])
    print('1/effB at {0} effS: '.format(tpr[bineff50]), 1.0 / fpr[bineff50])
    base.set_xlabel("True Postive Rate")
    base.set_ylabel("1.0/False Postive Rate")
    plt.grid(True)
    plt.savefig("{0}/ROC_{1}.pdf".format(FLAGS.plot_path, FLAGS.name))

    total_loss = loss_sum * 1.0 / float(num_batches)
    print('The total accuracy is {0}'.format(total_correct /
                                             float(total_seen)))
    npyname = 'GapNet_{0}.npy'.format(FLAGS.name)
    np.save(npyname, y_sc)

    with h5py.File('{0}.h5'.format(FLAGS.name), "w") as fh5:
        dset = fh5.create_dataset("pid", data=y_val)
        dset = fh5.create_dataset("DNN", data=y_sc)
        dset = fh5.create_dataset("coef1", data=y_coef1)
        dset = fh5.create_dataset("coef2", data=y_coef2)
        dset = fh5.create_dataset("data", data=y_data)
import tensorflow as tf
import numpy as np
import sys
import os
import math
import provider
import keras
from keras.models import load_model
from keras.utils import plot_model

#read file
# TEST_FILES = provider.getDataFiles('/Users/wangxue/gitpro/DL/pointcloud/pointtest/modelnet40_ply_hdf5_2048/test_files.txt')
#load model
model1 = load_model('pointtest/model/modelK11.h5')  #load model

predict_data, predict_label = provider.load_h5(
    'pointtest/modelnet40_ply_hdf5_2048/ply_data_test1.h5')
predict_data = predict_data[:, 0:2048, :]
# predict_data, predict_label, _ = provider.shuffle_data(test_data, np.squeeze(test_label))
predict_data = predict_data[:, :, :, np.newaxis]
predict_label = np.squeeze(predict_label)
predict_label = keras.utils.to_categorical(predict_label, num_classes=40)
pre = model1.predict(predict_data, batch_size=32, verbose=1)
print(pre)
print("----\n", predict_label)
max_probability = 0.0
index1 = 0
index2 = 0
accuracy = 0

#calculate the prediction accuracy
pre_objects = predict_data.shape[0]
Ejemplo n.º 9
0
os.system('cp %s %s' % (MODEL_FILE, LOG_DIR))  # bkp of model def
os.system('cp train_ours_triplet.py %s' % (LOG_DIR))  # bkp of train procedure
LOG_FOUT = open(os.path.join(LOG_DIR, 'log_train.txt'), 'w')
LOG_FOUT.write(str(FLAGS) + '\n')

BN_INIT_DECAY = 0.5
BN_DECAY_DECAY_RATE = 0.5
BN_DECAY_DECAY_STEP = float(DECAY_STEP)
BN_DECAY_CLIP = 0.99

HOSTNAME = socket.gethostname()

OBJ_CAT = FLAGS.category
TRAIN_FILE = '../candidate_generation/train_' + OBJ_CAT + '.h5'
TEST_FILE = '../candidate_generation/test_' + OBJ_CAT + '.h5'
TRAIN_DATA = provider.load_h5(TRAIN_FILE)
TEST_DATA = provider.load_h5(TEST_FILE)

TRAIN_CANDIDATES_FILE = 'generate_deformed_candidates/arap_triplet_train_' + OBJ_CAT + '.pickle'
pickle_in = open(TRAIN_CANDIDATES_FILE, "rb")
TRAIN_DICT = pickle.load(pickle_in)

# TEST_CANDIDATES_FILE = FLAGS.test_candidates_file
# pickle_in = open(TEST_CANDIDATES_FILE,"rb")
# TEST_DICT = pickle.load(pickle_in)

OUTPUT_DIM = FLAGS.output_dim

np.random.seed(0)

Ejemplo n.º 10
0
def train_one_epoch(sess, ops, train_writer, is_full_training):
    """ ops: dict mapping from string to tf ops """
    is_training = True

    train_idxs = np.arange(0, len(TRAIN_FILES))

    acc = loss_sum = 0
    y_pool = []
    y_assign = []
    for fn in range(len(TRAIN_FILES)):
        #log_string('----' + str(fn) + '-----')
        current_file = os.path.join(H5_DIR, TRAIN_FILES[train_idxs[fn]])
        if RD:
            current_data, current_cluster, current_label = provider.load_h5_data_label_seg(
                current_file)
        else:
            current_data, current_label = provider.load_h5(current_file, 'seg')

        adds = provider.load_add(current_file, ['global'])
        if NUM_GLOB < adds['global'].shape[1]:
            log_string("Using less global variables than possible")
            adds['global'] = adds['global'][:, :NUM_GLOB]

        current_label = np.squeeze(current_label)

        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE
        if FLAGS.nbatches > 0:
            num_batches = FLAGS.nbatches

        log_string(str(datetime.now()))

        for batch_idx in range(num_batches):
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx + 1) * BATCH_SIZE
            batch_data, batch_label, batch_global = get_batch(
                current_data, current_label, adds['global'], start_idx,
                end_idx)
            cur_batch_size = end_idx - start_idx

            #print(batch_weight)
            feed_dict = {
                ops['pointclouds_pl']: batch_data,
                ops['labels_pl']: batch_label,
                ops['global_pl']: batch_global,
                ops['is_training_pl']: is_training,
                ops['alpha']: 10 * (EPOCH_CNT - MAX_PRETRAIN + 1),
            }
            if is_full_training:
                summary, step, _, loss_val, pred_val, max_pool, dist = sess.run(
                    [
                        ops['merged'], ops['step'], ops['train_op_full'],
                        ops['kmeans_loss'], ops['pred'], ops['max_pool'],
                        ops['stack_dist']
                    ],
                    feed_dict=feed_dict)

                cluster_assign = np.zeros((cur_batch_size), dtype=int)
                for i in range(cur_batch_size):
                    index_closest_cluster = np.argmin(dist[:, i])
                    cluster_assign[i] = index_closest_cluster
                if RD:
                    batch_cluster = current_cluster[start_idx:end_idx]
                    if batch_cluster.size == cluster_assign.size:
                        acc += cluster_acc(batch_cluster, cluster_assign)

            else:
                summary, step, _, loss_val, pred_val, max_pool = sess.run(
                    [
                        ops['merged'], ops['step'], ops['train_op'],
                        ops['classify_loss'], ops['pred'], ops['max_pool']
                    ],
                    feed_dict=feed_dict)

            loss_sum += np.mean(loss_val)
            if len(y_pool) == 0:
                y_pool = np.squeeze(max_pool)

            else:
                y_pool = np.concatenate((y_pool, np.squeeze(max_pool)), axis=0)

            train_writer.add_summary(summary, step)
    log_string('mean loss: %f' % (loss_sum / float(num_batches)))
    log_string('train clustering accuracy: %f' % (acc / float(num_batches)))
    return y_pool
Ejemplo n.º 11
0
def eval_one_epoch(sess, ops, test_writer, is_full_training):
    """ ops: dict mapping from string to tf ops """
    global EPOCH_CNT
    is_training = False
    test_idxs = np.arange(0, len(TEST_FILES))
    # Test on all data: last batch might be smaller than BATCH_SIZE
    loss_sum = acc = 0
    acc_seg = 0

    for fn in range(len(TEST_FILES)):
        #log_string('----' + str(fn) + '-----')
        current_file = os.path.join(H5_DIR, TEST_FILES[test_idxs[fn]])
        if RD:
            current_data, current_cluster, current_label = provider.load_h5_data_label_seg(
                current_file)
        else:
            current_data, current_label = provider.load_h5(current_file, 'seg')
        adds = provider.load_add(current_file, ['global'])
        if NUM_GLOB < adds['global'].shape[1]:
            log_string("Using less global variables than possible")
            adds['global'] = adds['global'][:, :NUM_GLOB]

        current_label = np.squeeze(current_label)

        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE
        for batch_idx in range(num_batches):
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx + 1) * BATCH_SIZE
            batch_data, batch_label, batch_global = get_batch(
                current_data, current_label, adds['global'], start_idx,
                end_idx)
            cur_batch_size = end_idx - start_idx

            feed_dict = {
                ops['pointclouds_pl']: batch_data,
                ops['is_training_pl']: is_training,
                ops['global_pl']: batch_global,
                ops['labels_pl']: batch_label,
                ops['alpha']: 10 * (EPOCH_CNT - MAX_PRETRAIN + 1),
            }
            if is_full_training:
                summary, step, loss_val, pred_val, max_pool, dist = sess.run(
                    [
                        ops['merged'],
                        ops['step'],
                        ops['kmeans_loss'],
                        ops['pred'],
                        ops['max_pool'],
                        ops['stack_dist'],

                        #ops['pi']
                    ],
                    feed_dict=feed_dict)

                cluster_assign = np.zeros((cur_batch_size), dtype=int)
                for i in range(cur_batch_size):
                    index_closest_cluster = np.argmin(dist[:, i])
                    cluster_assign[i] = index_closest_cluster
                if RD:
                    batch_cluster = current_cluster[start_idx:end_idx]

                    if batch_cluster.size == cluster_assign.size:
                        acc += cluster_acc(batch_cluster, cluster_assign)

            else:
                summary, step, loss_val, pred_val, max_pool = sess.run(
                    [
                        ops['merged'],
                        ops['step'],
                        ops['classify_loss'],
                        ops['pred'],
                        ops['max_pool'],
                    ],
                    feed_dict=feed_dict)

            test_writer.add_summary(summary, step)

            loss_sum += np.mean(loss_val)

    total_loss = loss_sum * 1.0 / float(num_batches)
    log_string('mean loss: %f' % (total_loss))
    log_string('testing clustering accuracy: %f' % (acc / float(num_batches)))

    EPOCH_CNT += 1
    if FLAGS.min == 'acc':
        return total_correct / float(total_seen)
    else:
        return total_loss
Ejemplo n.º 12
0
def eval_one_epoch(sess, ops):
    is_training = False
    eval_idxs = np.arange(0, len(EVALUATE_FILES))

    y_assign = []
    y_glob = []
    acc = 0

    for fn in range(len(EVALUATE_FILES)):
        current_file = os.path.join(H5_DIR, EVALUATE_FILES[eval_idxs[fn]])
        if RD:
            current_data, current_cluster, current_label = provider.load_h5_data_label_seg(
                current_file)
        else:
            current_data, current_label = provider.load_h5(current_file, 'seg')

        adds = provider.load_add(current_file, ['global', 'masses'])

        if NUM_GLOB < adds['global'].shape[1]:
            print("Using less global variables than possible")
            current_glob = adds['global'][:, :NUM_GLOB]
        else:
            current_glob = adds['global']

        current_label = np.squeeze(current_label)

        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE
        for batch_idx in range(num_batches):
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx + 1) * BATCH_SIZE

            batch_data, batch_label, batch_global = get_batch(
                current_data, current_label, current_glob, start_idx, end_idx)

            cur_batch_size = end_idx - start_idx

            feed_dict = {
                ops['pointclouds_pl']: batch_data,
                ops['global_pl']: batch_global,
                ops['labels_pl']: batch_label,
                ops['alpha']: 1,  #No impact during evaluation
                ops['is_training_pl']: is_training,
            }

            dist, mu, max_pool = sess.run(
                [ops['stack_dist'], ops['mu'], ops['max_pool']],
                feed_dict=feed_dict)

            cluster_assign = np.zeros((cur_batch_size), dtype=int)
            if RD:
                batch_cluster = current_cluster[start_idx:end_idx]

            for i in range(cur_batch_size):
                index_closest_cluster = np.argmin(dist[:, i])
                cluster_assign[i] = index_closest_cluster
                if RD:
                    acc += cluster_acc(batch_cluster, cluster_assign)

            if len(y_assign) == 0:
                if RD:
                    y_val = batch_cluster
                y_assign = cluster_assign
                y_pool = np.squeeze(max_pool)
            else:
                y_assign = np.concatenate((y_assign, cluster_assign), axis=0)
                y_pool = np.concatenate((y_pool, np.squeeze(max_pool)), axis=0)

                if RD:
                    y_val = np.concatenate((y_val, batch_cluster), axis=0)

        if len(y_glob) == 0:
            y_glob = adds['global'][:num_batches * BATCH_SIZE]
            y_mass = adds['masses'][:num_batches * BATCH_SIZE]
        else:
            y_glob = np.concatenate(
                (y_glob, adds['global'][:num_batches * BATCH_SIZE]), axis=0)
            y_mass = np.concatenate(
                (y_mass, adds['masses'][:num_batches * BATCH_SIZE]), axis=0)

    with h5py.File(os.path.join(H5_OUT, '{0}.h5'.format(FLAGS.name)),
                   "w") as fh5:
        if RD:
            dset = fh5.create_dataset("label", data=y_val)
        dset = fh5.create_dataset("pid", data=y_assign)
        dset = fh5.create_dataset("max_pool", data=y_pool)
        dset = fh5.create_dataset("global", data=y_glob)
        dset = fh5.create_dataset("masses", data=y_mass)
Ejemplo n.º 13
0
def eval_one_epoch(sess, ops):
    is_training = False

    total_correct = total_correct_ones = total_seen = total_seen_ones = loss_sum = 0
    eval_idxs = np.arange(0, len(EVALUATE_FILES))
    y_val = []
    for fn in range(len(EVALUATE_FILES)):
        current_file = os.path.join(H5_DIR, EVALUATE_FILES[eval_idxs[fn]])
        current_truth = []
        current_mass = []
        current_data, current_label, current_global = provider.load_h5(
            current_file, 'seg', glob=True)

        current_label = np.squeeze(current_label)

        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE

        for batch_idx in range(num_batches):
            scores = np.zeros(NUM_POINT)
            true = np.zeros(NUM_POINT)
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx + 1) * BATCH_SIZE

            batch_data, batch_label, batch_global = get_batch(
                current_data, current_label, current_global, start_idx,
                end_idx)

            cur_batch_size = end_idx - start_idx

            feed_dict = {
                ops['pointclouds_pl']: batch_data,
                ops['labels_pl']: batch_label,
                ops['is_training_pl']: is_training,
                ops['global_pl']: batch_global,
            }
            #,beforemax
            loss, pred, coefs, coefs2 = sess.run(
                [ops['loss'], ops['pred'], ops['coefs'], ops['coefs2']],
                feed_dict=feed_dict)

            pred_val = np.argmax(pred, 2)

            correct = np.sum(pred_val == batch_label)
            correct_ones = np.sum(pred_val * batch_label)
            total_correct += correct
            total_correct_ones += correct_ones
            total_seen += (BATCH_SIZE * NUM_POINT)
            total_seen_ones += np.sum(batch_label)
            loss_sum += np.mean(loss)
            if len(y_val) == 0:
                y_val = batch_label
                y_data = batch_data[:, :, :]
                y_glob = batch_global
                y_sc = pred[:, :, 1]
            else:
                y_val = np.concatenate((y_val, batch_label), axis=0)
                y_data = np.concatenate((y_data, batch_data[:, :, :]), axis=0)
                y_glob = np.concatenate((y_glob, batch_global), axis=0)
                y_sc = np.concatenate((y_sc, pred[:, :, 1]), axis=0)

    pos_label = 1
    total_loss = loss_sum * 1.0 / float(num_batches)
    print('The total accuracy is {0}'.format(total_correct /
                                             float(total_seen)))
    print('The signal accuracy is {0}'.format(total_correct_ones /
                                              float(total_seen_ones)))

    with h5py.File('{0}.h5'.format(FLAGS.name), "w") as fh5:
        dset = fh5.create_dataset("pid", data=y_val)
        dset = fh5.create_dataset("DNN", data=y_sc)
        dset = fh5.create_dataset("global", data=y_glob)
        dset = fh5.create_dataset("data", data=y_data)
Ejemplo n.º 14
0
    def load_train_data(self):
        print("**** Loading dataset....................")
        train_files = provider.get_data_files(
            os.path.join(self.data_config.datadir_root,
                         self.data_config.dataset_name, 'train_files.txt'))
        test_files = provider.get_data_files(
            os.path.join(self.data_config.datadir_root,
                         self.data_config.dataset_name, 'test_files.txt'))
        self.label_map = provider.get_label_map(
            os.path.join(self.data_config.datadir_root,
                         self.data_config.dataset_name, 'label_map.yaml'))

        train_file_idxs = np.arange(0, len(train_files))
        np.random.shuffle(train_file_idxs)

        if self.is_train == True:
            pointcloud_data = []
            feature_data = []
            image_data = []
            labels = []
            for fn in range(len(train_files)):
                print(train_files[train_file_idxs[fn]])
                if ".h5" in train_files[train_file_idxs[fn]]:
                    pointcloud, image, mask_rgb, feature, label = provider.load_h5(
                        train_files[train_file_idxs[fn]],
                        cloud_color=self.model_config.pointcloud_color,
                        load_feature=False)
                elif ".pgz" in train_files[train_file_idxs[fn]]:
                    pointcloud, label = provider.load_pickle_file_with_label(
                        train_files[train_file_idxs[fn]],
                        compressed=True,
                        cloud_color=self.model_config.pointcloud_color)

                pointcloud, label, idx = provider.shuffle_data(
                    pointcloud, np.squeeze(label))
                label = np.squeeze(label)

                pointcloud_data.extend(pointcloud)
                labels.extend(label)

            print('**** Train dataset loaded....................')
            self.train_pointcloud_data = np.asarray(pointcloud_data)
            self.train_labels = np.asarray(labels)

        print("**** Loading test dataset....................")
        test_pointcloud_data = []
        test_image_data = []
        test_feature_data = []
        test_labels = []
        for fn in range(len(test_files)):
            if ".h5" in test_files[fn]:
                pointcloud, image, mask_rgb, feature, label = provider.load_h5(
                    test_files[fn],
                    cloud_color=self.model_config.pointcloud_color,
                    load_feature=False)
            elif ".pgz" in test_files[fn]:
                pointcloud, label = provider.load_pickle_file_with_label(
                    test_files[fn],
                    compressed=True,
                    cloud_color=self.model_config.pointcloud_color)

            label = np.squeeze(label)

            test_pointcloud_data.extend(pointcloud)
            test_labels.extend(label)

        test_pointcloud_data = np.asarray(test_pointcloud_data)
        test_labels = np.asarray(test_labels)
        test_pointcloud_data, test_labels, idx = provider.shuffle_data(
            test_pointcloud_data, test_labels)

        self.test_data = {}
        self.test_data['pointcloud_data'] = test_pointcloud_data
        self.test_data['labels'] = test_labels
Ejemplo n.º 15
0
def eval_one_epoch(sess, ops, test_writer):
    """ ops: dict mapping from string to tf ops """
    global EPOCH_CNT
    is_training = False
    test_idxs = np.arange(0, len(TEST_FILES))
    # Test on all data: last batch might be smaller than BATCH_SIZE
    total_correct = total_correct_ones = total_seen = total_seen_ones = loss_sum = total_sig = 0

    for fn in range(len(TEST_FILES)):
        #log_string('----' + str(fn) + '-----')
        current_file = os.path.join(H5_DIR, TEST_FILES[test_idxs[fn]])
        current_data, current_label, global_pl = provider.load_h5(current_file,
                                                                  'seg',
                                                                  glob=True)
        current_data, current_label, current_global, _ = provider.shuffle_data(
            current_data, np.squeeze(current_label), global_pl=global_pl)
        current_label = np.squeeze(current_label)

        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE

        for batch_idx in range(num_batches):
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx + 1) * BATCH_SIZE
            batch_data, batch_label, batch_global = get_batch(
                current_data, current_label, current_global, start_idx,
                end_idx)
            cur_batch_size = end_idx - start_idx

            feed_dict = {
                ops['pointclouds_pl']: batch_data,
                ops['labels_pl']: batch_label,
                ops['is_training_pl']: is_training,
                ops['global_pl']: batch_global,
            }
            summary, step, loss_val, pred_val, coefs = sess.run(
                [
                    ops['merged'], ops['step'], ops['loss'], ops['pred'],
                    ops['coefs']
                ],
                #ops['coefs2'],],
                feed_dict=feed_dict)

            test_writer.add_summary(summary, step)

            pred_val = np.argmax(pred_val, 2)
            correct = np.sum((pred_val == batch_label))
            correct_ones = np.sum(pred_val * batch_label)

            total_sig += np.sum(batch_label)
            total_correct_ones += correct_ones
            #print (correct)
            total_correct += correct
            total_seen_ones += np.sum(batch_label)
            total_seen += BATCH_SIZE * NUM_POINT
            #total_seen += BATCH_SIZE*NUM_POINT
            loss_sum += np.mean(loss_val)

    total_loss = loss_sum * 1.0 / float(num_batches)
    log_string('mean loss: %f' % (total_loss))
    log_string('accuracy: %f' % (total_correct / float(total_seen)))
    log_string('The signal accuracy is {0}'.format(total_correct_ones /
                                                   float(total_seen_ones)))

    EPOCH_CNT += 1
    if FLAGS.min == 'acc':
        return total_correct / float(total_seen)
    else:
        return total_loss
Ejemplo n.º 16
0
LOG_DIR = FLAGS.log_dir
if not os.path.exists(LOG_DIR): os.mkdir(LOG_DIR)
os.system('cp model.py %s' % (LOG_DIR)) 
os.system('cp train.py %s' % (LOG_DIR)) 
LOG_FOUT = open(os.path.join(LOG_DIR, 'log_train.txt'), 'w')
LOG_FOUT.write(str(FLAGS)+'\n')

MAX_NUM_POINT = 16384
NUM_CLASSES = 13

BN_INIT_DECAY = 0.5
BN_DECAY_DECAY_RATE = 0.5
BN_DECAY_DECAY_STEP = float(DECAY_STEP)
BN_DECAY_CLIP = 0.99

train_data, train_label = provider.load_h5('/home/chencan/data/KITTI/object/training/hdf5/train_data.h5')
train_label = train_label.reshape((-1, NUM_POINT))


def log_string(out_str):
  LOG_FOUT.write(out_str+'\n')
  LOG_FOUT.flush()
  print(out_str)


def get_learning_rate(batch):
  learning_rate = tf.train.exponential_decay(
            BASE_LEARNING_RATE,  # Base learning rate.
            batch * BATCH_SIZE,  # Current index into the dataset.
            DECAY_STEP,          # Decay step.
            DECAY_RATE,          # Decay rate.
Ejemplo n.º 17
0
def eval_one_epoch(sess, ops, test_writer):
    """ ops: dict mapping from string to tf ops """
    global EPOCH_CNT
    is_training = False
    test_idxs = np.arange(0, len(TEST_FILES))
    # Test on all data: last batch might be smaller than BATCH_SIZE
    total_correct = 0
    total_seen = 0
    loss_sum = 0
    y_val=[]
    for fn in range(len(TEST_FILES)):
        log_string('----' + str(fn) + '-----')
        current_file = os.path.join(H5_DIR,TEST_FILES[test_idxs[fn]])
        current_data, current_label, current_global = provider.load_h5(current_file,'class',glob=True)
        current_data, current_label,current_global, _ = provider.shuffle_data(current_data, np.squeeze(current_label),global_pl=current_global)
        current_label = np.squeeze(current_label)
        
        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE
                                                                
        
        log_string(str(datetime.now()))
        log_string('---- EPOCH %03d EVALUATION ----'%(EPOCH_CNT))
        for batch_idx in range(num_batches):
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx+1) * BATCH_SIZE
            batch_data, batch_label,batch_global = get_batch(current_data, current_label,current_global, start_idx, end_idx)
            cur_batch_size = end_idx-start_idx
            
            feed_dict = {ops['pointclouds_pl']: batch_data,
                         ops['labels_pl']: batch_label,
                         ops['is_training_pl']: is_training,
                         ops['global_pl']:batch_global,
            }
            if batch_idx ==0:
                start_time = time.time()
                
            summary, step, loss_val, pred_val, coefs, coefs2, adj = sess.run([ops['merged'], ops['step'],
                                                                              ops['loss'], ops['pred'],
                                                                              ops['coefs'],ops['coefs2'],
                                                                              ops['adj'],],
                                                                        feed_dict=feed_dict)
            if batch_idx ==0:
                 duration = time.time() - start_time
                 log_string("Eval time: "+str(duration)) 

            test_writer.add_summary(summary, step)
            pred=pred_val
            pred_val = np.argmax(pred_val, 1)
            
            correct = np.sum(pred_val == batch_label)
            #print (correct)
            total_correct += correct
            total_seen += (BATCH_SIZE)
            loss_sum += np.mean(loss_val)
            if len(y_val)==0:
                y_val=batch_label
                y_sc=pred[:,1]                
            else:
                y_val=np.concatenate((y_val,batch_label),axis=0)
                y_sc=np.concatenate((y_sc,pred[:,1]),axis=0)

    fpr, tpr, thresholds = metrics.roc_curve(y_val, y_sc, pos_label=1)
    bineff30 = np.argmax(tpr>0.3)
    log_string('1/effB at {0} effS: {1}'.format(tpr[bineff30],1.0/fpr[bineff30]))
        
    total_loss = loss_sum*1.0 / float(num_batches)
    log_string('mean loss: %f' % (total_loss))
    log_string('accuracy: %f' % (total_correct / float(total_seen)))

    EPOCH_CNT += 1
    if FLAGS.min == 'acc':
        return total_correct / float(total_seen)
    else:
        return total_loss
Ejemplo n.º 18
0
FLAGS = parser.parse_args()

BATCH_SIZE = FLAGS.batch_size
NUM_POINT = FLAGS.num_point
MODEL_PATH = FLAGS.model_path
GPU_INDEX = FLAGS.gpu
DUMP_DIR = FLAGS.dump_dir
if not os.path.exists(DUMP_DIR): os.mkdir(DUMP_DIR)

NUM_CLASSES = 4

LOG_FOUT = open(os.path.join(DUMP_DIR, 'log_evaluate.txt'), 'w')
LOG_FOUT.write(str(FLAGS)+'\n')


test_data, test_label = provider.load_h5('/home/chencan/dataset/kitti/test_data.h5')
test_label = test_label.reshape((-1, NUM_POINT))



def log_string(out_str):
    LOG_FOUT.write(out_str + '\n')
    LOG_FOUT.flush()
    print(out_str)

def evaluate():

    with tf.device('/gpu:' + str(GPU_INDEX)):
        pointclouds_pl, labels_pl = placeholder_inputs(BATCH_SIZE, NUM_POINT)
        is_training_pl = tf.placeholder(tf.bool, shape=())
Ejemplo n.º 19
0
def eval_one_epoch(sess, ops, test_writer):
    """ ops: dict mapping from string to tf ops """
    global EPOCH_CNT
    is_training = False
    loss_sum = 0
    y_source = []

    current_data_pl, current_label = provider.load_h5(TEST_FILE, 'class')
    if multi:
        current_label = np.argmax(current_label, axis=-1)
    current_data_pl, current_label, _ = provider.shuffle_data(
        current_data_pl, np.squeeze(current_label))

    file_size = current_data_pl.shape[0]
    num_batches = file_size // (BATCH_SIZE)
    #num_batches = 4

    log_string(str(datetime.now()))
    log_string('---- EPOCH %03d EVALUATION ----' % (EPOCH_CNT))
    for batch_idx in range(num_batches):

        start_idx = batch_idx * (BATCH_SIZE)
        end_idx = (batch_idx + 1) * (BATCH_SIZE)
        batch_data_pl, batch_label = get_batch(current_data_pl, current_label,
                                               start_idx, end_idx)
        mask_padded = batch_data_pl[:, :, 2] == 0

        feed_dict = {
            ops['pointclouds_pl']: batch_data_pl,
            ops['labels_pl']: batch_label,
            ops['is_training']: is_training,
            ops['mask_pl']: mask_padded.astype(float),
        }

        if batch_idx == 0:
            start_time = time.time()

        summary, step, loss, pred, lr = sess.run([
            ops['merged'], ops['step'], ops['loss'], ops['pred'],
            ops['learning_rate']
        ],
                                                 feed_dict=feed_dict)

        if batch_idx == 0:
            duration = time.time() - start_time
            log_string("Eval time: " + str(duration))
            log_string("Learning rate: " + str(lr))
            #log_string("{}".format(sub_feat))

        test_writer.add_summary(summary, step)

        loss_sum += np.mean(loss)
        if len(y_source) == 0:
            y_source = np.squeeze(pred)
        else:
            y_source = np.concatenate((y_source, np.squeeze(pred)), axis=0)

    if multi:
        name_convert = {
            0: 'Gluon',
            1: 'Quark',
            2: 'Z',
            3: 'W',
            4: 'Top',
        }
        label = current_label[:num_batches * (BATCH_SIZE)]
        for isample in np.unique(label):
            fpr, tpr, _ = metrics.roc_curve(label == isample,
                                            y_source[:, isample],
                                            pos_label=1)
            log_string("Class: {}, AUC: {}".format(name_convert[isample],
                                                   metrics.auc(fpr, tpr)))
            bineff = np.argmax(fpr > 0.1)
            log_string('SOURCE: effS at {0} effB = {1}'.format(
                tpr[bineff], fpr[bineff]))
        log_string('mean loss: %f' % (loss_sum * 1.0 / float(num_batches)))
    else:
        fpr, tpr, _ = metrics.roc_curve(current_label[:num_batches *
                                                      (BATCH_SIZE)],
                                        y_source[:, 1],
                                        pos_label=1)
        log_string("AUC: {}".format(metrics.auc(fpr, tpr)))

        bineff = np.argmax(tpr > 0.3)

        log_string('SOURCE: 1/effB at {0} effS = {1}'.format(
            tpr[bineff], 1.0 / fpr[bineff]))
        log_string('mean loss: %f' % (loss_sum * 1.0 / float(num_batches)))
    EPOCH_CNT += 1

    return loss_sum * 1.0 / float(num_batches)
Ejemplo n.º 20
0
def eval_one_epoch(sess, ops):
    is_training = False

    total_correct = total_sig = total_correct_ones = total_seen = total_seen_ones = loss_sum = 0
    eval_idxs = np.arange(0, len(EVALUATE_FILES))
    y_pred = []
    for fn in range(len(EVALUATE_FILES)):
        current_file = os.path.join(H5_DIR, EVALUATE_FILES[eval_idxs[fn]])
        current_data, current_label = provider.load_h5(current_file, 'seg')
        full_data = current_data
        if current_data.shape[2] > NFEATURES:
            print('puppi not used')
            current_data = current_data[:, :, :NFEATURES]
        if current_data.shape[1] > NUM_POINT:
            print('Using less points')
            current_data = current_data[:, :NUM_POINT]
            current_label = current_label[:, :NUM_POINT]

        add_list = [
            'PFNoPU',
            'puppiPU',
            'chs',
            'NPU',
            'CHS_MET',
            'PUPPI_MET',
            #'puppiNoPU',
        ]
        adds = provider.load_add(current_file, add_list)
        if not FLAGS.is_data:
            current_truth = adds['PFNoPU']
            current_truth = preprocessing(current_data, current_truth)
        else:
            add_list.append('nLeptons')
            current_truth = np.zeros((current_data.shape))

        current_label = np.squeeze(current_label)

        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE
        #num_batches = 1
        # if FLAGS.is_data:
        #     num_batches = 600

        for batch_idx in range(num_batches):
            scores = np.zeros(NUM_POINT)
            true = np.zeros(NUM_POINT)
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx + 1) * BATCH_SIZE

            batch_data, batch_label, batch_truth = get_batch(
                current_data, current_label, current_truth, start_idx, end_idx)

            cur_batch_size = end_idx - start_idx

            feed_dict = {
                ops['pointclouds_pl']: batch_data,
                ops['truth_pl']: batch_truth,
                ops['labels_pl']: batch_label,
                ops['is_training_pl']: is_training,
            }
            #,beforemax
            loss, pred = sess.run([ops['loss'], ops['pred']],
                                  feed_dict=feed_dict)
            pred_val = np.argmax(pred, 2)

            correct_ones = pred_val * batch_label
            total_sig += np.sum(batch_label == 2)
            total_correct_ones += np.sum(correct_ones == 4)

            loss_sum += np.mean(loss)
            if len(y_pred) == 0:
                y_pred = pred[:, :, 2]
                y_data = full_data[start_idx:end_idx]
                y_lab = batch_label
                y_add = {}
                for add in adds:
                    y_add[add] = adds[add][start_idx:end_idx]
            else:
                y_pred = np.concatenate((y_pred, pred[:, :, 2]), axis=0)
                y_data = np.concatenate((y_data, full_data[start_idx:end_idx]),
                                        axis=0)
                y_lab = np.concatenate((y_lab, batch_label), axis=0)
                for add in adds:
                    y_add[add] = np.concatenate(
                        (y_add[add], adds[add][start_idx:end_idx]), axis=0)

    if not FLAGS.is_data:
        print('The signal accuracy is {0}'.format(total_correct_ones /
                                                  float(total_sig)))
        flat_pred = y_pred.flatten()
        flat_lab = y_lab.flatten()
        flat_lab = flat_lab == 2
        results = metrics.roc_curve(flat_lab, flat_pred)
        threshs = [0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.90, 0.95]
        with open(os.path.join(MODEL_PATH, 'cut_eff.txt'), 'w') as f:
            for thresh in threshs:
                bin = np.argmax(results[1] > thresh)
                cut = results[2][bin]
                f.write('eff: {}, fpr: {}, cut: {} \n'.format(
                    results[1][bin], results[0][bin], cut))
    with h5py.File(os.path.join(H5_OUT, '{0}.h5'.format(FLAGS.name)),
                   "w") as fh5:
        dset = fh5.create_dataset("DNN", data=y_pred)
        dset = fh5.create_dataset("data", data=y_data)
        dset = fh5.create_dataset("pid", data=y_lab)
        for add in adds:
            dset = fh5.create_dataset(add, data=y_add[add])