def train_one_epoch(train_file_idx, epoch_num):
            is_training = True

            for i in range(num_train_file):
                cur_train_filename = os.path.join(hdf5_data_dir, train_file_list[train_file_idx[i]])
                printout(flog, 'Loading train file ' + cur_train_filename)

                cur_data, cur_labels, cur_seg = provider.load_h5_data_label_seg(cur_train_filename)
                cur_data, cur_labels, order = provider.shuffle_data(cur_data, np.squeeze(cur_labels))
                cur_seg = cur_seg[order, ...]

                cur_labels_one_hot = convert_label_to_one_hot(cur_labels)

                num_data = len(cur_labels)
                num_batch = num_data // (FLAGS.num_gpu * batch_size) # For all working gpus

                total_loss = 0.0
                total_seg_acc = 0.0

                for j in range(num_batch):
                    begidx_0 = j * batch_size
                    endidx_0 = (j + 1) * batch_size

                    feed_dict = {
                            # For the first gpu
                            pointclouds_phs[0]: cur_data[begidx_0: endidx_0, ...], 
                            input_label_phs[0]: cur_labels_one_hot[begidx_0: endidx_0, ...], 
                            seg_phs[0]: cur_seg[begidx_0: endidx_0, ...],
                            is_training_phs[0]: is_training, 
                            }


                    # train_op is for both gpus, and the others are for gpu_1
                    _, loss_val, per_instance_seg_loss_val, seg_pred_val, pred_seg_res \
                            = sess.run([train_op, loss, per_instance_seg_loss, seg_pred, per_instance_seg_pred_res], \
                            feed_dict=feed_dict)

                    per_instance_part_acc = np.mean(pred_seg_res == cur_seg[begidx_0: endidx_0, ...], axis=1)
                    average_part_acc = np.mean(per_instance_part_acc)

                    total_loss += loss_val
                    total_seg_acc += average_part_acc

                total_loss = total_loss * 1.0 / num_batch
                total_seg_acc = total_seg_acc * 1.0 / num_batch

                lr_sum, bn_decay_sum, batch_sum, train_loss_sum, train_seg_acc_sum = sess.run(\
                        [lr_op, bn_decay_op, batch_op, total_train_loss_sum_op, seg_train_acc_sum_op], \
                        feed_dict={total_training_loss_ph: total_loss, seg_training_acc_ph: total_seg_acc})

                train_writer.add_summary(train_loss_sum, i + epoch_num * num_train_file)
                train_writer.add_summary(lr_sum, i + epoch_num * num_train_file)
                train_writer.add_summary(bn_decay_sum, i + epoch_num * num_train_file)
                train_writer.add_summary(train_seg_acc_sum, i + epoch_num * num_train_file)
                train_writer.add_summary(batch_sum, i + epoch_num * num_train_file)

                printout(flog, '\tTraining Total Mean_loss: %f' % total_loss)
                printout(flog, '\t\tTraining Seg Accuracy: %f' % total_seg_acc)
Exemple #2
0
def predict():
    is_training = False
    
    with tf.device('/gpu:'+str(gpu_to_use)):
        pointclouds_ph, input_label_ph = placeholder_inputs()
        is_training_ph = tf.placeholder(tf.bool, shape=())

        # simple model
        pred, seg_pred, end_points = model.get_model(pointclouds_ph, input_label_ph, \
                cat_num=NUM_OBJ_CATS, part_num=NUM_PART_CATS, is_training=is_training_ph, \
                batch_size=batch_size, num_point=point_num, weight_decay=0.0, bn_decay=None)
        
    # Add ops to save and restore all the variables.
    saver = tf.train.Saver()

    # Later, launch the model, use the saver to restore variables from disk, and
    # do some work with the model.
    
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True

    with tf.Session(config=config) as sess:
        if not os.path.exists(output_dir):
            os.mkdir(output_dir)

        flog = open(os.path.join(output_dir, 'log.txt'), 'w')

        # Restore variables from disk.
        printout(flog, 'Loading model %s' % pretrained_model_path)
        saver.restore(sess, pretrained_model_path)
        printout(flog, 'Model restored.')
        
        # Note: the evaluation for the model with BN has to have some statistics
        # Using some test datas as the statistics
        batch_data = np.zeros([batch_size, point_num, 3]).astype(np.float32)

        total_acc = 0.0
        total_seen = 0
        total_acc_iou = 0.0

        total_per_cat_acc = np.zeros((NUM_OBJ_CATS)).astype(np.float32)
        total_per_cat_iou = np.zeros((NUM_OBJ_CATS)).astype(np.float32)
        total_per_cat_seen = np.zeros((NUM_OBJ_CATS)).astype(np.int32)

        ffiles = open(test_file_list, 'r')
        files = [line.rstrip() for line in ffiles.readlines()]
        ffiles.close()

        len_pts_files = len(files)
        data, labels, segs = provider.load_h5_data_label_seg(
            os.path.join(hdf5_data_dir, files[0]))

        for i in range(len(data)):
            if i % 100 == 0:
                printout(flog, '%d/%d ...' % (i, len(data)))

            cur_gt_label = labels[i]

            cur_label_one_hot = np.zeros((1, NUM_OBJ_CATS), dtype=np.float32)
            cur_label_one_hot[0, cur_gt_label] = 1

            # pts_file_to_load = os.path.join(ply_data_dir, pts_files[shape_idx])
            # seg_file_to_load = os.path.join(ply_data_dir, seg_files[shape_idx])

            pts = data[i]
            seg = segs[i]
            ori_point_num = len(seg)

            batch_data[0, ...] = pc_augment_to_point_num(pc_normalize(pts), point_num)

            label_pred_val, seg_pred_res = sess.run([pred, seg_pred], feed_dict={
                        pointclouds_ph: batch_data,
                        input_label_ph: cur_label_one_hot, 
                        is_training_ph: is_training,
                    })

            label_pred_val = np.argmax(label_pred_val[0, :])
            
            seg_pred_res = seg_pred_res[0, ...]

            iou_oids = object2setofoid[objcats[cur_gt_label]]
            non_cat_labels = list(set(np.arange(NUM_PART_CATS)).difference(set(iou_oids)))

            mini = np.min(seg_pred_res)
            seg_pred_res[:, non_cat_labels] = mini - 1000

            seg_pred_val = np.argmax(seg_pred_res, axis=1)[:ori_point_num]

            seg_acc = np.mean(seg_pred_val == seg)

            total_acc += seg_acc
            total_seen += 1

            total_per_cat_seen[cur_gt_label] += 1
            total_per_cat_acc[cur_gt_label] += seg_acc

            mask = np.int32(seg_pred_val == seg)

            total_iou = 0.0
            iou_log = ''
            for oid in iou_oids:
                n_pred = np.sum(seg_pred_val == oid)
                n_gt = np.sum(seg == oid)
                n_intersect = np.sum(np.int32(seg == oid) * mask)
                n_union = n_pred + n_gt - n_intersect
                iou_log += '_' + str(n_pred)+'_'+str(n_gt)+'_'+str(n_intersect)+'_'+str(n_union)+'_'
                if n_union == 0:
                    total_iou += 1
                    iou_log += '_1\n'
                else:
                    total_iou += n_intersect * 1.0 / n_union
                    iou_log += '_'+str(n_intersect * 1.0 / n_union)+'\n'

            avg_iou = total_iou / len(iou_oids)
            total_acc_iou += avg_iou
            total_per_cat_iou[cur_gt_label] += avg_iou
            
            if output_verbose:
                output_color_point_cloud(pts, seg, os.path.join(output_dir, str(i)+'_gt.obj'))
                output_color_point_cloud(pts, seg_pred_val, os.path.join(output_dir, str(i)+'_pred.obj'))
                output_color_point_cloud_red_blue(pts, np.int32(seg == seg_pred_val), 
                        os.path.join(output_dir, str(i)+'_diff.obj'))

                with open(os.path.join(output_dir, str(i)+'.log'), 'w') as fout:
                    fout.write('Total Point: %d\n\n' % ori_point_num)
                    fout.write('Ground Truth: %s\n' % objnames[cur_gt_label])
                    fout.write('Predict: %s\n\n' % objnames[label_pred_val])
                    fout.write('Accuracy: %f\n' % seg_acc)
                    fout.write('IoU: %f\n\n' % avg_iou)
                    fout.write('IoU details: %s\n' % iou_log)

        printout(flog, 'Accuracy: %f' % (total_acc / total_seen))
        printout(flog, 'IoU: %f' % (total_acc_iou / total_seen))

        for cat_idx in range(NUM_OBJ_CATS):
            printout(flog, '\t ' + objcats[cat_idx] + ' Total Number: ' + str(total_per_cat_seen[cat_idx]))
            if total_per_cat_seen[cat_idx] > 0:
                printout(flog, '\t ' + objcats[cat_idx] + ' Accuracy: ' + \
                        str(total_per_cat_acc[cat_idx] / total_per_cat_seen[cat_idx]))
                printout(flog, '\t ' + objcats[cat_idx] + ' IoU: '+ \
                        str(total_per_cat_iou[cat_idx] / total_per_cat_seen[cat_idx]))
        def eval_one_epoch(epoch_num):
            is_training = False

            total_loss = 0.0
            total_seg_acc = 0.0
            total_seen = 0

            total_seg_acc_per_cat = np.zeros((NUM_CATEGORIES)).astype(np.float32)
            total_seen_per_cat = np.zeros((NUM_CATEGORIES)).astype(np.int32)

            for i in range(num_test_file):
                cur_test_filename = os.path.join(hdf5_data_dir, test_file_list[i])
                printout(flog, 'Loading test file ' + cur_test_filename)

                cur_data, cur_labels, cur_seg = provider.load_h5_data_label_seg(cur_test_filename)
                cur_labels = np.squeeze(cur_labels)

                cur_labels_one_hot = convert_label_to_one_hot(cur_labels)

                num_data = len(cur_labels)
                num_batch = num_data // batch_size

                # Run on gpu_1, since the tensors used for evaluation are defined on gpu_1
                for j in range(num_batch):
                    begidx = j * batch_size
                    endidx = (j + 1) * batch_size
                    feed_dict = {
                            pointclouds_phs[0]: cur_data[begidx: endidx, ...], 
                            input_label_phs[0]: cur_labels_one_hot[begidx: endidx, ...], 
                            seg_phs[0]: cur_seg[begidx: endidx, ...],
                            is_training_phs[0]: is_training}

                    loss_val, per_instance_seg_loss_val, seg_pred_val, pred_seg_res \
                            = sess.run([loss, per_instance_seg_loss, seg_pred, per_instance_seg_pred_res], \
                            feed_dict=feed_dict)

                    per_instance_part_acc = np.mean(pred_seg_res == cur_seg[begidx: endidx, ...], axis=1)
                    average_part_acc = np.mean(per_instance_part_acc)

                    total_seen += 1
                    total_loss += loss_val
                    
                    total_seg_acc += average_part_acc

                    for shape_idx in range(begidx, endidx):
                        total_seen_per_cat[cur_labels[shape_idx]] += 1
                        total_seg_acc_per_cat[cur_labels[shape_idx]] += per_instance_part_acc[shape_idx - begidx]

            total_loss = total_loss * 1.0 / total_seen
            total_seg_acc = total_seg_acc * 1.0 / total_seen

            test_loss_sum, test_seg_acc_sum = sess.run(\
                    [total_test_loss_sum_op, seg_test_acc_sum_op], \
                    feed_dict={total_testing_loss_ph: total_loss, \
                    seg_testing_acc_ph: total_seg_acc})

            test_writer.add_summary(test_loss_sum, (epoch_num+1) * num_train_file-1)
            test_writer.add_summary(test_seg_acc_sum, (epoch_num+1) * num_train_file-1)

            printout(flog, '\tTesting Total Mean_loss: %f' % total_loss)
            printout(flog, '\t\tTesting Seg Accuracy: %f' % total_seg_acc)

            for cat_idx in range(NUM_CATEGORIES):
                if total_seen_per_cat[cat_idx] > 0:
                    printout(flog, '\n\t\tCategory %s Object Number: %d' % (all_obj_cats[cat_idx][0], total_seen_per_cat[cat_idx]))
                    printout(flog, '\t\tCategory %s Seg Accuracy: %f' % (all_obj_cats[cat_idx][0], total_seg_acc_per_cat[cat_idx]/total_seen_per_cat[cat_idx]))
Exemple #4
0
    def eval_one_epoch(epoch_num):
      is_training = False

      total_loss = 0.0
      total_seg_acc = 0.0
      total_seen = 0

      total_seg_acc_per_cat = np.zeros((NUM_CATEGORIES)).astype(np.float32)  # [0. 0. ....0.]  16个0
      total_seen_per_cat = np.zeros((NUM_CATEGORIES)).astype(np.int32)  # [0 0 0 0 ...0]

      for i in range(num_test_file):   # i= 0  num_test_file=1
        cur_test_filename = os.path.join(hdf5_data_dir, test_file_list[i])    # ply_data_val0.h5
        printout(flog, 'Loading test file ' + cur_test_filename)  # ply_data_val0.h5

        # 读取数据
        # data = f['data'][:] # (1870, 2048, 3)
        # label = f['label'][:] # (1870,1)  1870个点云的类别0-15
        # seg = f['pid'][:] # (1870, 2048)  ,表示每一个点属于的类别0-49,一共50类点
        cur_data, cur_labels, cur_seg = provider.load_h5_data_label_seg(cur_test_filename)
        cur_labels = np.squeeze(cur_labels)         # shape:(1870,)

        cur_labels_one_hot = convert_label_to_one_hot(cur_labels)   # 1870*16

        num_data = len(cur_labels)      # 1870个点云
        num_batch = num_data // batch_size    # 467个batch

        # Run on gpu_1, since the tensors used for evaluation are defined on gpu_1
        for j in range(num_batch):      # 0-466
          begidx = j * batch_size
          endidx = (j + 1) * batch_size
          # feed_dict = {
          #     pointclouds_phs[1]: cur_data[begidx: endidx, ...],
          #     input_label_phs[1]: cur_labels_one_hot[begidx: endidx, ...],
          #     seg_phs[1]: cur_seg[begidx: endidx, ...],
          #     is_training_phs[1]: is_training}

          feed_dict = {
            pointclouds_phs[0]: cur_data[begidx: endidx, ...],      # 4*2048*3
            input_label_phs[0]: cur_labels_one_hot[begidx: endidx, ...],    # 4*16
            seg_phs[0]: cur_seg[begidx: endidx, ...],                # 4*2048
            is_training_phs[0]: is_training}

          # pred_seg_res:每个点的分割结果 shape:4*2048
          # seg_pred_val:分割结果,概率编码 shape:4*2048*50
          # per_instance_seg_loss_val: 每一个点云的损失 shape:(4,)
          # loss_val:当前batch的平均损失 shape:()
          loss_val, per_instance_seg_loss_val, seg_pred_val, pred_seg_res \
              = sess.run([loss, per_instance_seg_loss, seg_pred, per_instance_seg_pred_res], \
              feed_dict=feed_dict)

          per_instance_part_acc = np.mean(pred_seg_res == cur_seg[begidx: endidx, ...], axis=1)  # 4个点云的每一个的分割精度  shape:(4,)
          average_part_acc = np.mean(per_instance_part_acc)     # 4个点云的平均分割精度

          total_seen += 1   # 以batch为单位
          total_loss += loss_val
          
          total_seg_acc += average_part_acc     # average_part_acc:当前batch的分割精度

          for shape_idx in range(begidx, endidx):   # 对当前batch中的每一个点云
            total_seen_per_cat[cur_labels[shape_idx]] += 1      # 统计看过的每一类点云的数量
            total_seg_acc_per_cat[cur_labels[shape_idx]] += per_instance_part_acc[shape_idx - begidx]   # 统计每一类点云的精度

      total_loss = total_loss * 1.0 / total_seen            # 总的分割损失
      total_seg_acc = total_seg_acc * 1.0 / total_seen      # 总的平均分割精度,以batch为单位

      # 绘制图
      test_loss_sum, test_seg_acc_sum = sess.run(\
          [total_test_loss_sum_op, seg_test_acc_sum_op], \
          feed_dict={total_testing_loss_ph: total_loss, \
          seg_testing_acc_ph: total_seg_acc})

      test_writer.add_summary(test_loss_sum, (epoch_num+1) * num_train_file-1)
      test_writer.add_summary(test_seg_acc_sum, (epoch_num+1) * num_train_file-1)


      printout(flog, '\t\tTesting Total Mean_loss: %f' % total_loss)
      printout(flog, '\t\tTesting Seg Accuracy: %f' % total_seg_acc)

      for cat_idx in range(NUM_CATEGORIES):     # 0-15
        if total_seen_per_cat[cat_idx] > 0:  # 如果看过这类物体,就打印看得数量
          printout(flog, '\n\t\tCategory %s Object Number: %d' % (all_obj_cats[cat_idx][0], total_seen_per_cat[cat_idx]))
          printout(flog, '\t\tCategory %s Seg Accuracy: %f' % (all_obj_cats[cat_idx][0], total_seg_acc_per_cat[cat_idx]/total_seen_per_cat[cat_idx]))
Exemple #5
0
def eval_one_epoch(sess, ops, test_writer,is_full_training):
    """ ops: dict mapping from string to tf ops """
    global EPOCH_CNT
    is_training = False
    test_idxs = np.arange(0, len(TEST_FILES))
    # Test on all data: last batch might be smaller than BATCH_SIZE
    loss_sum = acc =0
    acc_kmeans = 0


    for fn in range(len(TEST_FILES)):
        #log_string('----' + str(fn) + '-----')
        current_file = os.path.join(H5_DIR,TEST_FILES[test_idxs[fn]])
        current_data, current_label, current_cluster = provider.load_h5_data_label_seg(current_file)
        current_label = np.squeeze(current_label)

            
            
        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE
        #num_batches = 5
        for batch_idx in range(num_batches):
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx+1) * BATCH_SIZE
            batch_data, batch_label = get_batch(current_data, current_label,start_idx, end_idx)
            cur_batch_size = end_idx-start_idx
            
            feed_dict = {ops['pointclouds_pl']: batch_data,
                         ops['is_training_pl']: is_training,
                         ops['labels_pl']: batch_label,
                         ops['alpha']: 2*(EPOCH_CNT-MAX_PRETRAIN+1),

            }

            if is_full_training:
                summary, step, loss_val, max_pool,dist,mu= sess.run([ops['merged'], ops['step'],
                                                                     ops['kmeans_loss'],
                                                                     ops['max_pool'],ops['stack_dist'],
                                                                     ops['mu']
                                                                 ],
                                                                    feed_dict=feed_dict)
                if batch_idx==0:
                    log_string("mu: {}".format(mu))     
                batch_cluster = np.array([np.where(r==1)[0][0] for r in current_cluster[start_idx:end_idx]])
                cluster_assign = np.zeros((cur_batch_size), dtype=int)
                for i in range(cur_batch_size):
                    index_closest_cluster = np.argmin(dist[:, i])
                    cluster_assign[i] = index_closest_cluster

                acc+=cluster_acc(batch_cluster,cluster_assign)
                

            else:
                summary, step, loss_val= sess.run([ops['merged'], ops['step'],
                                                   ops['class_loss']
                                               ],
                                                  feed_dict=feed_dict)


            test_writer.add_summary(summary, step)
            


            loss_sum += np.mean(loss_val)
        
    total_loss = loss_sum*1.0 / float(num_batches)
    log_string('test mean loss: %f' % (total_loss))
    log_string('testing clustering accuracy: %f' % (acc / float(num_batches)))

    EPOCH_CNT += 1
Exemple #6
0
    def train_one_epoch(train_file_idx, epoch_num):     # 输入为打乱后的顺序
      is_training = True

      # 遍历每一个train文件
      for i in range(num_train_file):
        cur_train_filename = os.path.join(hdf5_data_dir, train_file_list[train_file_idx[i]])  #  获取当前的train的点云文件
        printout(flog, 'Loading train file ' + cur_train_filename)

        cur_data, cur_labels, cur_seg = provider.load_h5_data_label_seg(cur_train_filename)
        cur_data, cur_labels, order = provider.shuffle_data(cur_data, np.squeeze(cur_labels))
        cur_seg = cur_seg[order, ...]

        cur_labels_one_hot = convert_label_to_one_hot(cur_labels)

        num_data = len(cur_labels)
        num_batch = num_data // (FLAGS.num_gpu * batch_size) # For all working gpus  num——batch代表这个train文件分几个batch

        total_loss = 0.0
        total_seg_acc = 0.0

        # 对每一个batch
        for j in range(num_batch):
          begidx_0 = j * batch_size       # 第一个gpu
          endidx_0 = (j + 1) * batch_size
          begidx_1 = (j + 1) * batch_size   # 第二个gpu
          endidx_1 = (j + 2) * batch_size

          feed_dict = {
              # For the first gpu
              pointclouds_phs[0]: cur_data[begidx_0: endidx_0, ...],      # 4*2048*3
              input_label_phs[0]: cur_labels_one_hot[begidx_0: endidx_0, ...],    # 4*16
              seg_phs[0]: cur_seg[begidx_0: endidx_0, ...],     # 4*2048  ,每一个数都在0-49之间
              is_training_phs[0]: is_training, 
              # # For the second gpu
              # pointclouds_phs[1]: cur_data[begidx_1: endidx_1, ...],
              # input_label_phs[1]: cur_labels_one_hot[begidx_1: endidx_1, ...],
              # seg_phs[1]: cur_seg[begidx_1: endidx_1, ...],
              # is_training_phs[1]: is_training,
              }


          # train_op is for both gpus, and the others are for gpu_1
          # 每一个batch的平均损失、每一个点云的损失、分割预测值
          _, loss_val, per_instance_seg_loss_val, seg_pred_val, pred_seg_res \
              = sess.run([train_op, loss, per_instance_seg_loss, seg_pred, per_instance_seg_pred_res], \
              feed_dict=feed_dict)

          # per_instance_part_acc = np.mean(pred_seg_res == cur_seg[begidx_1: endidx_1, ...], axis=1)
          per_instance_part_acc = np.mean(pred_seg_res == cur_seg[begidx_0: endidx_0, ...], axis=1)     # 每一个点云的分割精度

          average_part_acc = np.mean(per_instance_part_acc)        # 当前batch的平均精度

          total_loss += loss_val
          total_seg_acc += average_part_acc
          # 至此,一个train文件遍历完成

        total_loss = total_loss * 1.0 / num_batch     # 每一个train文件都得到一个loss和seg_acc
        total_seg_acc = total_seg_acc * 1.0 / num_batch

        # 绘制图
        lr_sum, bn_decay_sum, batch_sum, train_loss_sum, train_seg_acc_sum = sess.run(\
            [lr_op, bn_decay_op, batch_op, total_train_loss_sum_op, seg_train_acc_sum_op], \
            feed_dict={total_training_loss_ph: total_loss, seg_training_acc_ph: total_seg_acc})

        train_writer.add_summary(train_loss_sum, i + epoch_num * num_train_file)   # epoch_num是一个不断变化的值
        train_writer.add_summary(lr_sum, i + epoch_num * num_train_file)
        train_writer.add_summary(bn_decay_sum, i + epoch_num * num_train_file)
        train_writer.add_summary(train_seg_acc_sum, i + epoch_num * num_train_file)
        train_writer.add_summary(batch_sum, i + epoch_num * num_train_file)

        printout(flog, '\tTanin_file: {},Training Total Mean_loss: {}'.format(i,total_loss))    # 每一个train文件的loss和acc
        printout(flog, '\t\tTanin_file: {} ,Training Seg Accuracy: {}'.format(i,total_seg_acc))
Exemple #7
0
def train_one_epoch(sess, ops, train_writer,is_full_training):
    """ ops: dict mapping from string to tf ops """
    is_training = True
    
    
    train_idxs = np.arange(0, len(TRAIN_FILES))
    
    acc =  loss_sum = 0
    y_pool = []
    for fn in range(len(TRAIN_FILES)):
        #log_string('----' + str(fn) + '-----')
        current_file = os.path.join(H5_DIR,TRAIN_FILES[train_idxs[fn]])
        current_data, current_label, current_cluster = provider.load_h5_data_label_seg(current_file)
        
        current_label = np.squeeze(current_label)
        
        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE
        #num_batches = 5
        log_string(str(datetime.now()))        

        
        for batch_idx in range(num_batches):
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx+1) * BATCH_SIZE
            batch_data, batch_label = get_batch(current_data, current_label,start_idx, end_idx)
            cur_batch_size = end_idx-start_idx
                

            #print(batch_weight) 
            feed_dict = {ops['pointclouds_pl']: batch_data,
                         ops['labels_pl']: batch_label,
                         ops['is_training_pl']: is_training,
                         ops['alpha']: 2*(EPOCH_CNT-MAX_PRETRAIN+1),
                         
                        
            }
            if is_full_training:
                summary, step, _, loss_val,dist,lr = sess.run([ops['merged'], ops['step'],
                                                            ops['train_op_full'], ops['kmeans_loss'],
                                                            ops['stack_dist'],ops['learning_rate']
                                                           ], 
                                                              feed_dict=feed_dict)

            
                batch_cluster = np.array([np.where(r==1)[0][0] for r in current_cluster[start_idx:end_idx]])
                cluster_assign = np.zeros((cur_batch_size), dtype=int)


                for i in range(cur_batch_size):
                    index_closest_cluster = np.argmin(dist[:, i])
                    cluster_assign[i] = index_closest_cluster

                acc+=cluster_acc(batch_cluster,cluster_assign)
            else:
                summary, step, _, loss_val,max_pool,lr = sess.run([ops['merged'], ops['step'],
                                                                   ops['train_op'], ops['class_loss'],
                                                                   ops['max_pool'],ops['learning_rate']],
                                                               
                                                               feed_dict=feed_dict)
                
                
                
                if len(y_pool)==0:
                    y_pool=np.squeeze(max_pool)                
                else:
                    y_pool=np.concatenate((y_pool,np.squeeze(max_pool)),axis=0)
                
            loss_sum += np.mean(loss_val)

            train_writer.add_summary(summary, step)
    log_string('learning rate: %f' % (lr))
    log_string('train mean loss: %f' % (loss_sum / float(num_batches)))
    log_string('train clustering accuracy: %f' % (acc/ float(num_batches)))
    return y_pool
Exemple #8
0
def eval_one_epoch(sess, ops, test_writer, is_full_training):
    """ ops: dict mapping from string to tf ops """
    global EPOCH_CNT
    is_training = False
    test_idxs = np.arange(0, len(TEST_FILES))
    # Test on all data: last batch might be smaller than BATCH_SIZE
    loss_sum = acc = 0
    acc_seg = 0

    for fn in range(len(TEST_FILES)):
        #log_string('----' + str(fn) + '-----')
        current_file = os.path.join(H5_DIR, TEST_FILES[test_idxs[fn]])
        if RD:
            current_data, current_cluster, current_label = provider.load_h5_data_label_seg(
                current_file)
        else:
            current_data, current_label = provider.load_h5(current_file, 'seg')
        adds = provider.load_add(current_file, ['global'])
        if NUM_GLOB < adds['global'].shape[1]:
            log_string("Using less global variables than possible")
            adds['global'] = adds['global'][:, :NUM_GLOB]

        current_label = np.squeeze(current_label)

        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE
        for batch_idx in range(num_batches):
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx + 1) * BATCH_SIZE
            batch_data, batch_label, batch_global = get_batch(
                current_data, current_label, adds['global'], start_idx,
                end_idx)
            cur_batch_size = end_idx - start_idx

            feed_dict = {
                ops['pointclouds_pl']: batch_data,
                ops['is_training_pl']: is_training,
                ops['global_pl']: batch_global,
                ops['labels_pl']: batch_label,
                ops['alpha']: 10 * (EPOCH_CNT - MAX_PRETRAIN + 1),
            }
            if is_full_training:
                summary, step, loss_val, pred_val, max_pool, dist = sess.run(
                    [
                        ops['merged'],
                        ops['step'],
                        ops['kmeans_loss'],
                        ops['pred'],
                        ops['max_pool'],
                        ops['stack_dist'],

                        #ops['pi']
                    ],
                    feed_dict=feed_dict)

                cluster_assign = np.zeros((cur_batch_size), dtype=int)
                for i in range(cur_batch_size):
                    index_closest_cluster = np.argmin(dist[:, i])
                    cluster_assign[i] = index_closest_cluster
                if RD:
                    batch_cluster = current_cluster[start_idx:end_idx]

                    if batch_cluster.size == cluster_assign.size:
                        acc += cluster_acc(batch_cluster, cluster_assign)

            else:
                summary, step, loss_val, pred_val, max_pool = sess.run(
                    [
                        ops['merged'],
                        ops['step'],
                        ops['classify_loss'],
                        ops['pred'],
                        ops['max_pool'],
                    ],
                    feed_dict=feed_dict)

            test_writer.add_summary(summary, step)

            loss_sum += np.mean(loss_val)

    total_loss = loss_sum * 1.0 / float(num_batches)
    log_string('mean loss: %f' % (total_loss))
    log_string('testing clustering accuracy: %f' % (acc / float(num_batches)))

    EPOCH_CNT += 1
    if FLAGS.min == 'acc':
        return total_correct / float(total_seen)
    else:
        return total_loss
Exemple #9
0
def train_one_epoch(sess, ops, train_writer, is_full_training):
    """ ops: dict mapping from string to tf ops """
    is_training = True

    train_idxs = np.arange(0, len(TRAIN_FILES))

    acc = loss_sum = 0
    y_pool = []
    y_assign = []
    for fn in range(len(TRAIN_FILES)):
        #log_string('----' + str(fn) + '-----')
        current_file = os.path.join(H5_DIR, TRAIN_FILES[train_idxs[fn]])
        if RD:
            current_data, current_cluster, current_label = provider.load_h5_data_label_seg(
                current_file)
        else:
            current_data, current_label = provider.load_h5(current_file, 'seg')

        adds = provider.load_add(current_file, ['global'])
        if NUM_GLOB < adds['global'].shape[1]:
            log_string("Using less global variables than possible")
            adds['global'] = adds['global'][:, :NUM_GLOB]

        current_label = np.squeeze(current_label)

        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE
        if FLAGS.nbatches > 0:
            num_batches = FLAGS.nbatches

        log_string(str(datetime.now()))

        for batch_idx in range(num_batches):
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx + 1) * BATCH_SIZE
            batch_data, batch_label, batch_global = get_batch(
                current_data, current_label, adds['global'], start_idx,
                end_idx)
            cur_batch_size = end_idx - start_idx

            #print(batch_weight)
            feed_dict = {
                ops['pointclouds_pl']: batch_data,
                ops['labels_pl']: batch_label,
                ops['global_pl']: batch_global,
                ops['is_training_pl']: is_training,
                ops['alpha']: 10 * (EPOCH_CNT - MAX_PRETRAIN + 1),
            }
            if is_full_training:
                summary, step, _, loss_val, pred_val, max_pool, dist = sess.run(
                    [
                        ops['merged'], ops['step'], ops['train_op_full'],
                        ops['kmeans_loss'], ops['pred'], ops['max_pool'],
                        ops['stack_dist']
                    ],
                    feed_dict=feed_dict)

                cluster_assign = np.zeros((cur_batch_size), dtype=int)
                for i in range(cur_batch_size):
                    index_closest_cluster = np.argmin(dist[:, i])
                    cluster_assign[i] = index_closest_cluster
                if RD:
                    batch_cluster = current_cluster[start_idx:end_idx]
                    if batch_cluster.size == cluster_assign.size:
                        acc += cluster_acc(batch_cluster, cluster_assign)

            else:
                summary, step, _, loss_val, pred_val, max_pool = sess.run(
                    [
                        ops['merged'], ops['step'], ops['train_op'],
                        ops['classify_loss'], ops['pred'], ops['max_pool']
                    ],
                    feed_dict=feed_dict)

            loss_sum += np.mean(loss_val)
            if len(y_pool) == 0:
                y_pool = np.squeeze(max_pool)

            else:
                y_pool = np.concatenate((y_pool, np.squeeze(max_pool)), axis=0)

            train_writer.add_summary(summary, step)
    log_string('mean loss: %f' % (loss_sum / float(num_batches)))
    log_string('train clustering accuracy: %f' % (acc / float(num_batches)))
    return y_pool
Exemple #10
0
def eval_one_epoch(sess, ops):
    is_training = False
    eval_idxs = np.arange(0, len(EVALUATE_FILES))

    y_assign = []
    y_glob = []
    acc = 0

    for fn in range(len(EVALUATE_FILES)):
        current_file = os.path.join(H5_DIR, EVALUATE_FILES[eval_idxs[fn]])
        if RD:
            current_data, current_cluster, current_label = provider.load_h5_data_label_seg(
                current_file)
        else:
            current_data, current_label = provider.load_h5(current_file, 'seg')

        adds = provider.load_add(current_file, ['global', 'masses'])

        if NUM_GLOB < adds['global'].shape[1]:
            print("Using less global variables than possible")
            current_glob = adds['global'][:, :NUM_GLOB]
        else:
            current_glob = adds['global']

        current_label = np.squeeze(current_label)

        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE
        for batch_idx in range(num_batches):
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx + 1) * BATCH_SIZE

            batch_data, batch_label, batch_global = get_batch(
                current_data, current_label, current_glob, start_idx, end_idx)

            cur_batch_size = end_idx - start_idx

            feed_dict = {
                ops['pointclouds_pl']: batch_data,
                ops['global_pl']: batch_global,
                ops['labels_pl']: batch_label,
                ops['alpha']: 1,  #No impact during evaluation
                ops['is_training_pl']: is_training,
            }

            dist, mu, max_pool = sess.run(
                [ops['stack_dist'], ops['mu'], ops['max_pool']],
                feed_dict=feed_dict)

            cluster_assign = np.zeros((cur_batch_size), dtype=int)
            if RD:
                batch_cluster = current_cluster[start_idx:end_idx]

            for i in range(cur_batch_size):
                index_closest_cluster = np.argmin(dist[:, i])
                cluster_assign[i] = index_closest_cluster
                if RD:
                    acc += cluster_acc(batch_cluster, cluster_assign)

            if len(y_assign) == 0:
                if RD:
                    y_val = batch_cluster
                y_assign = cluster_assign
                y_pool = np.squeeze(max_pool)
            else:
                y_assign = np.concatenate((y_assign, cluster_assign), axis=0)
                y_pool = np.concatenate((y_pool, np.squeeze(max_pool)), axis=0)

                if RD:
                    y_val = np.concatenate((y_val, batch_cluster), axis=0)

        if len(y_glob) == 0:
            y_glob = adds['global'][:num_batches * BATCH_SIZE]
            y_mass = adds['masses'][:num_batches * BATCH_SIZE]
        else:
            y_glob = np.concatenate(
                (y_glob, adds['global'][:num_batches * BATCH_SIZE]), axis=0)
            y_mass = np.concatenate(
                (y_mass, adds['masses'][:num_batches * BATCH_SIZE]), axis=0)

    with h5py.File(os.path.join(H5_OUT, '{0}.h5'.format(FLAGS.name)),
                   "w") as fh5:
        if RD:
            dset = fh5.create_dataset("label", data=y_val)
        dset = fh5.create_dataset("pid", data=y_assign)
        dset = fh5.create_dataset("max_pool", data=y_pool)
        dset = fh5.create_dataset("global", data=y_glob)
        dset = fh5.create_dataset("masses", data=y_mass)
Exemple #11
0
def eval_one_epoch(sess, ops):
    is_training = False

    eval_idxs = np.arange(0, len(EVALUATE_FILES))
    y_val = []
    for fn in range(len(EVALUATE_FILES)):
        current_file = os.path.join(H5_DIR, EVALUATE_FILES[eval_idxs[fn]])
        current_data, current_label, current_cluster = provider.load_h5_data_label_seg(
            current_file)
        adds = provider.load_add(current_file, ['masses'])

        current_label = np.squeeze(current_label)

        file_size = current_data.shape[0]
        num_batches = file_size // BATCH_SIZE
        num_batches = 5

        for batch_idx in range(num_batches):
            start_idx = batch_idx * BATCH_SIZE
            end_idx = (batch_idx + 1) * BATCH_SIZE

            batch_data, batch_label = get_batch(current_data, current_label,
                                                start_idx, end_idx)
            batch_cluster = current_cluster[start_idx:end_idx]
            cur_batch_size = end_idx - start_idx

            feed_dict = {
                ops['pointclouds_pl']: batch_data,
                ops['labels_pl']: batch_label,
                ops['alpha']: 1,  #No impact on evaluation,
                ops['is_training_pl']: is_training,
            }

            loss, dist, max_pool = sess.run(
                [ops['kmeans_loss'], ops['stack_dist'], ops['max_pool']],
                feed_dict=feed_dict)
            cluster_assign = np.zeros((cur_batch_size), dtype=int)
            for i in range(cur_batch_size):
                index_closest_cluster = np.argmin(dist[:, i])
                cluster_assign[i] = index_closest_cluster

            batch_cluster = np.array([
                np.where(r == 1)[0][0]
                for r in current_cluster[start_idx:end_idx]
            ])

            if len(y_val) == 0:
                y_val = batch_cluster
                y_assign = cluster_assign
                y_pool = np.squeeze(max_pool)
                y_mass = adds['masses'][start_idx:end_idx]
            else:
                y_val = np.concatenate((y_val, batch_cluster), axis=0)
                y_assign = np.concatenate((y_assign, cluster_assign), axis=0)
                y_pool = np.concatenate((y_pool, np.squeeze(max_pool)), axis=0)
                y_mass = np.concatenate(
                    (y_mass, adds['masses'][start_idx:end_idx]), axis=0)

    with h5py.File(os.path.join(H5_OUT, '{0}.h5'.format(FLAGS.name)),
                   "w") as fh5:
        dset = fh5.create_dataset("pid", data=y_val)  #Real jet categories
        dset = fh5.create_dataset("label", data=y_assign)  #Cluster labeling
        dset = fh5.create_dataset("max_pool", data=y_pool)
        dset = fh5.create_dataset("masses", data=y_mass)