コード例 #1
0
  def testTripletSemiHard(self):
    with self.test_session():
      num_data = 10
      feat_dim = 6
      margin = 1.0
      num_classes = 4

      embedding = np.random.rand(num_data, feat_dim).astype(np.float32)
      labels = np.random.randint(
          0, num_classes, size=(num_data)).astype(np.float32)

      # Reshape labels to compute adjacency matrix.
      labels_reshaped = np.reshape(labels, (labels.shape[0], 1))
      # Compute the loss in NP.
      adjacency = np.equal(labels_reshaped, labels_reshaped.T)

      pdist_matrix = pairwise_distance_np(embedding, squared=True)
      loss_np = 0.0
      num_positives = 0.0
      for i in range(num_data):
        for j in range(num_data):
          if adjacency[i][j] > 0.0 and i != j:
            num_positives += 1.0

            pos_distance = pdist_matrix[i][j]
            neg_distances = []

            for k in range(num_data):
              if adjacency[i][k] == 0:
                neg_distances.append(pdist_matrix[i][k])

            # Sort by distance.
            neg_distances.sort()
            chosen_neg_distance = neg_distances[0]

            for l in range(len(neg_distances)):
              chosen_neg_distance = neg_distances[l]
              if chosen_neg_distance > pos_distance:
                break

            loss_np += np.maximum(
                0.0, margin - chosen_neg_distance + pos_distance)

      loss_np /= num_positives

      # Compute the loss in TF.
      loss_tf = metric_loss_ops.triplet_semihard_loss(
          labels=ops.convert_to_tensor(labels),
          embeddings=ops.convert_to_tensor(embedding),
          margin=margin)
      loss_tf = loss_tf.eval()
      self.assertAllClose(loss_np, loss_tf)
コード例 #2
0
    def testTripletSemiHard(self):
        with self.cached_session():
            num_data = 10
            feat_dim = 6
            margin = 1.0
            num_classes = 4

            embedding = np.random.rand(num_data, feat_dim).astype(np.float32)
            labels = np.random.randint(0, num_classes,
                                       size=(num_data)).astype(np.float32)

            # Reshape labels to compute adjacency matrix.
            labels_reshaped = np.reshape(labels, (labels.shape[0], 1))
            # Compute the loss in NP.
            adjacency = np.equal(labels_reshaped, labels_reshaped.T)

            pdist_matrix = pairwise_distance_np(embedding, squared=True)
            loss_np = 0.0
            num_positives = 0.0
            for i in range(num_data):
                for j in range(num_data):
                    if adjacency[i][j] > 0.0 and i != j:
                        num_positives += 1.0

                        pos_distance = pdist_matrix[i][j]
                        neg_distances = []

                        for k in range(num_data):
                            if adjacency[i][k] == 0:
                                neg_distances.append(pdist_matrix[i][k])

                        # Sort by distance.
                        neg_distances.sort()
                        chosen_neg_distance = neg_distances[0]

                        for l in range(len(neg_distances)):
                            chosen_neg_distance = neg_distances[l]
                            if chosen_neg_distance > pos_distance:
                                break

                        loss_np += np.maximum(
                            0.0, margin - chosen_neg_distance + pos_distance)

            loss_np /= num_positives

            # Compute the loss in TF.
            loss_tf = metric_loss_ops.triplet_semihard_loss(
                labels=ops.convert_to_tensor(labels),
                embeddings=ops.convert_to_tensor(embedding),
                margin=margin)
            loss_tf = loss_tf.eval()
            self.assertAllClose(loss_np, loss_tf)
コード例 #3
0
ファイル: models.py プロジェクト: lxxue/spherical-cnn
def sphcnn_afterconv(curr, net, args, l_or_h):
    """ Part of model after convolutional layers;
    should be common for different architectures. """
    # normalize by area before computing the mean
    with tf.name_scope('wsa'):
        if args.weighted_sph_avg:
            n = tfnp.shape(curr)[1]
            phi, theta = util.sph_sample(n)
            phi += np.diff(phi)[0]/2
            curr *= np.sin(phi)[np.newaxis, np.newaxis, :, np.newaxis]

    net['final_conv'] = curr

    if 'complex' in args.model:
        curr = tf.abs(curr)
        nlin = 'relu'
    else:
        nlin = args.nonlin

    # curr is last conv layer
    with tf.name_scope('final_pool'):
        net['gap'] = tf.reduce_mean(curr, axis=(1, 2))
        if args.final_pool in ['max', 'all']:
            net['max'] = tf.reduce_max(curr, axis=(1, 2))
        if args.final_pool in ['magnitudes', 'all']:
            net['final_coeffs'] = spherical.sph_harm_transform_batch(curr,
                                                                method=args.transform_method,
                                                                harmonics=l_or_h,
                                                                m0_only=False)
            # use per frequency magnitudes
            net['magnitudes'] = tf.contrib.layers.flatten(tf.reduce_sum(tf.square(net['final_coeffs']),
                                                                        axis=(1, 3)))
            net['magnitudes'] = tf.real(net['magnitudes'])
        if args.final_pool != 'all':
            curr = net[args.final_pool]
        else:
            curr = tf.concat([net['gap'], net['max'], net['magnitudes']], axis=-1)

    if args.dropout:
        curr = tf.nn.dropout(curr,
                             keep_prob=tf.cond(net['training'],
                                               lambda: 0.5,
                                               lambda: 1.0))

    if not args.no_final_fc:
        with tf.variable_scope('fc1') as scope:
            net['fc1'], curr = dup(block(AttrDict({**args.__dict__,
                                                   'batch_norm': False,
                                                   'nonlin': nlin}),
                                         tf.layers.dense, net['training'], curr, 64))
            if args.dropout:
                curr = tf.nn.dropout(curr,
                                     keep_prob=tf.cond(net['training'],
                                                       lambda: 0.5,
                                                       lambda: 1.0))
                for v in scope.trainable_variables():
                    tf.summary.histogram(v.name, v)

    net['descriptor'] = curr

    if args.triplet_loss:
        norm_desc = tf.nn.l2_normalize(curr, dim=-1)
        # this only works w/ fixed batch size
        triplet_loss = triplet_semihard_loss(tf.cast(tf.reshape(net['label'],
                                                                (args.train_bsize,)),
                                                     'int32'),
                                             norm_desc)
        # NaNs may appear if bsize is small:
        triplet_loss = tf.where(tf.is_nan(triplet_loss),
                                tf.zeros_like(triplet_loss),
                                triplet_loss)
        tf.add_to_collection(tf.GraphKeys.LOSSES, triplet_loss)
        net['triplet_loss'] = triplet_loss
    else:
        net['triplet_loss'] = 0

    with tf.variable_scope('out') as scope:
        if args.extra_loss:
            nch = tfnp.shape(curr)[-1]
            net['out'] = tf.layers.dense(curr, args.n_classes)
            second_branch_out = tf.layers.dense(curr[..., nch//2:], args.n_classes)
            tf.losses.softmax_cross_entropy(tf.one_hot(net['label'], args.n_classes),
                                            second_branch_out)
        else:
            net['out'], curr = dup(tf.layers.dense(curr, args.n_classes))
        for v in scope.trainable_variables():
            tf.summary.histogram(v.name, v)

    return net
コード例 #4
0
def main():

    cfg = TrainConfig().parse()
    print (cfg.name)
    result_dir = os.path.join(cfg.result_root, 
            cfg.name+'_'+datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    images_root = '/mnt/work/CUB_200_2011/images/'
    with open('/mnt/work/CUB_200_2011/images.txt', 'r') as fin:
        image_files = fin.read().strip().split('\n')
    with open('/mnt/work/CUB_200_2011/image_class_labels.txt', 'r') as fin:
        labels = fin.read().strip().split('\n')

    train_files = []
    train_labels = []
    val_files = []
    val_labels = []
    for i in range(len(image_files)):
        label = int(labels[i].split(' ')[1])
        if label <= 100:
            train_files.append(images_root+image_files[i].split(' ')[1])
            train_labels.append(label)
        else:
            val_files.append(images_root+image_files[i].split(' ')[1])
            val_labels.append(label)

    class_idx_dict = {}
    for i, l in enumerate(train_labels):
        l = int(l)
        if l not in class_idx_dict:
            class_idx_dict[l] = [i]
        else:
            class_idx_dict[l].append(i)
    C = len(list(class_idx_dict.keys()))

    val_images = np.zeros((len(val_files), 256, 256, 3), dtype=np.uint8)
    for i in range(len(val_files)):
        img = Image.open(val_files[i]).convert('RGB').resize((256,256))
        val_images[i] = np.array(img)

    # generate metadata.tsv for visualize embedding
    with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
        for l in val_labels:
            fout.write('{}\n'.format(int(l)))


    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        # load backbone model
        model_emb = networks.CUBLayer(n_input=1024, n_output=cfg.emb_dim)

        # get the embedding
        input_ph = tf.placeholder(tf.float32, shape=[None, 256, 256, 3])
        label_ph = tf.placeholder(tf.int32, shape=[None])
        dropout_ph = tf.placeholder(tf.float32, shape=[])

        pool5 = networks.Inception_V2(input_ph)
        model_emb.forward(pool5, dropout_ph)
        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model_emb.logits, axis=-1, epsilon=1e-10)
        else:
            embedding = model_emb.logits

        # variable for visualizing the embeddings
        emb_var = tf.Variable([0.0], name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        # calculated for monitoring all-pair embedding distance
        diffs = utils.all_diffs_tf(embedding, embedding)
        all_dist = utils.cdist_tf(diffs)
        tf.summary.histogram('embedding_dists', all_dist)

        # use tensorflow implementation...
        if cfg.loss == 'triplet':
            metric_loss = metric_loss_ops.triplet_semihard_loss(
                          labels=label_ph,
                          embeddings=embedding,
                          margin=cfg.alpha)
        elif cfg.loss == 'lifted':
            metric_loss = metric_loss_ops.lifted_struct_loss(
                          labels=label_ph,
                          embeddings=embedding,
                          margin=cfg.alpha)
        elif cfg.loss == 'mylifted':
            metric_loss, num_active, diff, weights, fp, cn = networks.lifted_loss(all_dist, label_ph, cfg.alpha, weighted=False)

        else:
            raise NotImplementedError

        regularization_loss = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = metric_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                lr_ph, tf.global_variables())

        saver = tf.train.Saver(max_to_keep=10)

        summary_op = tf.summary.merge_all()

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            ################## Training loop ##################
            for epoch in range(cfg.max_epochs):

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))


                # sample images
                class_in_batch = set()
                idx_batch = np.array([], dtype=np.int32)
                while len(idx_batch) < cfg.batch_size:
                    sampled_class = np.random.choice(list(class_idx_dict.keys()))
                    if not sampled_class in class_in_batch:
                        class_in_batch.add(sampled_class)
                        subsample_size = np.random.choice(range(5, 11))
                        subsample = np.random.permutation(class_idx_dict[sampled_class])[:subsample_size]
                        idx_batch = np.append(idx_batch, subsample)
                idx_batch = idx_batch[:cfg.batch_size]

                image_batch = np.zeros((len(idx_batch), 256, 256, 3), dtype=np.uint8)
                lab_batch = np.zeros((len(idx_batch), ), dtype=np.int32)
                for i, idx in enumerate(idx_batch):
                    # load image with random flipping
                    if np.random.rand() < 0.5:
                        img = Image.open(train_files[idx]).convert('RGB').resize((256,256)).transpose(Image.FLIP_LEFT_RIGHT)
                    else:
                        img = Image.open(train_files[idx]).convert('RGB').resize((256,256))
                    image_batch[i] = np.array(img)
                    lab_batch[i] = train_labels[idx]

                pdb.set_trace()
                # perform training on the selected triplets
                err, _, step, summ = sess.run([total_loss, train_op, global_step, summary_op],
                                feed_dict = {input_ph: image_batch,
                                            label_ph: lab_batch,
                                            dropout_ph: cfg.keep_prob,
                                            lr_ph: learning_rate})

                print ("%s\tEpoch: %d\tImages num: %d\tLoss %.4f" % \
                        (cfg.name, epoch+1, feat_batch.shape[0], err))

                summary = tf.Summary(value=[tf.Summary.Value(tag="train_loss", simple_value=err),
                        tf.Summary.Value(tag="images_num", simple_value=feat_batch.shape[0])])
                summary_writer.add_summary(summary, step)
                summary_writer.add_summary(summ, step)

                # validation on val_set
                if (epoch+1) % 1000 == 0:
                    val_embeddings, _ = sess.run([embedding,set_emb], feed_dict={input_ph: val_images, label_ph:val_labels, dropout_ph: 1.0})
                    mAP, mPrec, recall = utils.evaluate_simple(val_embeddings, val_labels)
                    summary = tf.Summary(value=[tf.Summary.Value(tag="Valiation mAP", simple_value=mAP),
                                        tf.Summary.Value(tag="Validation Recall@1", simple_value=recall),
                                        tf.Summary.Value(tag="Validation [email protected]", simple_value=mPrec)])
                    print ("Epoch: [%d]\tmAP: %.4f\trecall: %.4f" % (epoch+1,mAP,recall))

                    # config for embedding visualization
                    config = projector.ProjectorConfig()
                    visual_embedding = config.embeddings.add()
                    visual_embedding.tensor_name = emb_var.name
                    visual_embedding.metadata_path = os.path.join(result_dir, 'metadata_val.tsv')
                    projector.visualize_embeddings(summary_writer, config)

                    summary_writer.add_summary(summary, step)


                    # save model
                    saver.save(sess, os.path.join(result_dir, cfg.name+'.ckpt'), global_step=step)
コード例 #5
0
ファイル: model.py プロジェクト: dzorlu/humpback_whales
 def _loss_fn(y_true, y_pred):
     y_true = tf.keras.backend.argmax(y_true, axis=-1)
     return triplet_semihard_loss(labels=y_true,
                                  embeddings=y_pred,
                                  margin=params.triplet_margin)