Python ConvBiRTSN Examples

Programming Language: Python

Namespace/Package Name: networks

Method/Function: ConvBiRTSN

Examples at hotexamples.com: 7

Python ConvBiRTSN - 7 examples found. These are the top rated real world Python examples of networks.ConvBiRTSN extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: multimodal_model.py Project: xyang35/multimodal_similarity_cleaned

def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    train_set = prepare_multimodal_dataset(cfg.feature_root, train_session,
                                           cfg.feat, cfg.label_root)
    if cfg.task == "supervised":  # fully supervised task
        train_set = train_set[:cfg.label_num]
    batch_per_epoch = len(train_set) // cfg.sess_per_batch
    labeled_session = train_session[:cfg.label_num]

    val_session = cfg.val_session
    val_set = prepare_multimodal_dataset(cfg.feature_root, val_session,
                                         cfg.feat, cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        ####################### Load models here ########################
        sensors_emb_dim = 32
        segment_emb_dim = 32

        with tf.variable_scope("modality_core"):
            # load backbone model
            if cfg.network == "convtsn":
                model_emb = networks.ConvTSN(n_seg=cfg.num_seg,
                                             emb_dim=cfg.emb_dim)
            elif cfg.network == "convrtsn":
                model_emb = networks.ConvRTSN(n_seg=cfg.num_seg,
                                              emb_dim=cfg.emb_dim)
            elif cfg.network == "convbirtsn":
                model_emb = networks.ConvBiRTSN(n_seg=cfg.num_seg,
                                                emb_dim=cfg.emb_dim)
            else:
                raise NotImplementedError

            input_ph = tf.placeholder(
                tf.float32, shape=[None, cfg.num_seg, None, None, None])
            dropout_ph = tf.placeholder(tf.float32, shape=[])
            model_emb.forward(input_ph,
                              dropout_ph)  # for lstm has variable scope

        with tf.variable_scope("modality_sensors"):
            model_emb_sensors = networks.RTSN(n_seg=cfg.num_seg,
                                              emb_dim=sensors_emb_dim)
            model_pairsim_sensors = networks.PDDM(n_input=sensors_emb_dim)

            input_sensors_ph = tf.placeholder(tf.float32,
                                              shape=[None, cfg.num_seg, 8])
            model_emb_sensors.forward(input_sensors_ph, dropout_ph)

            var_list = {}
            for v in tf.global_variables():
                if v.op.name.startswith("modality_sensors"):
                    var_list[v.op.name.replace("modality_sensors/", "")] = v
            restore_saver_sensors = tf.train.Saver(var_list)

        with tf.variable_scope("modality_segment"):
            model_emb_segment = networks.RTSN(n_seg=cfg.num_seg,
                                              emb_dim=segment_emb_dim,
                                              n_input=357)
            model_pairsim_segment = networks.PDDM(n_input=segment_emb_dim)

            input_segment_ph = tf.placeholder(tf.float32,
                                              shape=[None, cfg.num_seg, 357])
            model_emb_segment.forward(input_segment_ph, dropout_ph)

            var_list = {}
            for v in tf.global_variables():
                if v.op.name.startswith("modality_segment"):
                    var_list[v.op.name.replace("modality_segment/", "")] = v
            restore_saver_segment = tf.train.Saver(var_list)

        ############################# Forward Pass #############################

        # Core branch
        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model_emb.hidden,
                                           axis=-1,
                                           epsilon=1e-10)
        else:
            embedding = model_emb.hidden

        # get the number of multimodal triplets (x3)
        mul_num_ph = tf.placeholder(tf.int32, shape=[])
        margins_ph = tf.placeholder(tf.float32, shape=[None])
        struct_num = tf.shape(margins_ph)[0] * 3

        # variable for visualizing the embeddings
        emb_var = tf.Variable([0.0], name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        # calculated for monitoring all-pair embedding distance
        diffs = utils.all_diffs_tf(embedding, embedding)
        all_dist = utils.cdist_tf(diffs)
        tf.summary.histogram('embedding_dists', all_dist)

        # split embedding into anchor, positive and negative and calculate triplet loss
        anchor, positive, negative = tf.unstack(
            tf.reshape(embedding[:(tf.shape(embedding)[0] - mul_num_ph)],
                       [-1, 3, cfg.emb_dim]), 3, 1)
        anchor_hard, positive_hard, negative_hard = tf.unstack(
            tf.reshape(embedding[-mul_num_ph:-struct_num],
                       [-1, 3, cfg.emb_dim]), 3, 1)
        anchor_struct, positive_struct, negative_struct = tf.unstack(
            tf.reshape(embedding[-struct_num:], [-1, 3, cfg.emb_dim]), 3, 1)

        # Sensors branch
        emb_sensors = model_emb_sensors.hidden
        A_sensors, B_sensors, C_sensors = tf.unstack(
            tf.reshape(emb_sensors, [-1, 3, sensors_emb_dim]), 3, 1)
        model_pairsim_sensors.forward(tf.stack([A_sensors, B_sensors], axis=1))
        pddm_AB_sensors = model_pairsim_sensors.prob[:, 1]
        model_pairsim_sensors.forward(tf.stack([A_sensors, C_sensors], axis=1))
        pddm_AC_sensors = model_pairsim_sensors.prob[:, 1]

        # Segment branch
        emb_segment = model_emb_segment.hidden
        A_segment, B_segment, C_segment = tf.unstack(
            tf.reshape(emb_segment, [-1, 3, segment_emb_dim]), 3, 1)
        model_pairsim_segment.forward(tf.stack([A_segment, B_segment], axis=1))
        pddm_AB_segment = model_pairsim_segment.prob[:, 1]
        model_pairsim_segment.forward(tf.stack([A_segment, C_segment], axis=1))
        pddm_AC_segment = model_pairsim_segment.prob[:, 1]

        # fuse prob from all modalities
        prob_AB = 0.5 * (pddm_AB_sensors + pddm_AB_segment)
        prob_AC = 0.5 * (pddm_AC_sensors + pddm_AC_segment)

        ############################# Calculate loss #############################

        # triplet loss for labeled inputs
        metric_loss1 = networks.triplet_loss(anchor, positive, negative,
                                             cfg.alpha)

        # weighted triplet loss for multimodal inputs
        #        if cfg.weighted:
        #            metric_loss2, _ = networks.weighted_triplet_loss(anchor_hard, positive_hard, negative_hard, prob_AB, prob_AC, cfg.alpha)
        #        else:

        # triplet loss for hard examples from multimodal data
        metric_loss2 = networks.triplet_loss(anchor_hard, positive_hard,
                                             negative_hard, cfg.alpha)

        # margin-based triplet loss for structure mining from multimodal data
        metric_loss3 = networks.triplet_loss(anchor_struct, positive_struct,
                                             negative_struct, margins_ph)

        # whether to apply joint optimization
        if cfg.no_joint:
            unimodal_var_list = [
                v for v in tf.global_variables()
                if v.op.name.startswith("modality_core")
            ]
            train_var_list = unimodal_var_list
        else:
            multimodal_var_list = [
                v for v in tf.global_variables()
                if not (v.op.name.startswith("modality_sensors/RTSN")
                        or v.op.name.startswith("modality_segment/RTSN"))
            ]
            train_var_list = multimodal_var_list

        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = tf.cond(
            tf.greater(mul_num_ph, 0), lambda: tf.cond(
                tf.equal(mul_num_ph,
                         tf.shape(embedding)[0]), lambda:
                (metric_loss2 + metric_loss3 * 0.3) * cfg.lambda_multimodal +
                regularization_loss * cfg.lambda_l2, lambda: metric_loss1 +
                (metric_loss2 + metric_loss3 * 0.3) * cfg.lambda_multimodal +
                regularization_loss * cfg.lambda_l2),
            lambda: metric_loss1 + regularization_loss * cfg.lambda_l2)

        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, train_var_list)

        saver = tf.train.Saver(max_to_keep=10)
        summary_op = tf.summary.merge_all(
        )  # not logging histogram of variables because it will cause problem when only unimodal_train_op is called

        summ_prob_AB = tf.summary.histogram('Prob_AB_histogram', prob_AB)
        summ_prob_AC = tf.summary.histogram('Prob_AC_histogram', prob_AC)
        #        summ_weights = tf.summary.histogram('Weights_histogram', weights)

        #########################################################################

        # session iterator for session sampling
        feat_paths_ph = tf.placeholder(tf.string,
                                       shape=[None, cfg.sess_per_batch])
        feat2_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        feat3_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        label_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        train_data = multimodal_session_generator(
            feat_paths_ph,
            feat2_paths_ph,
            feat3_paths_ph,
            label_paths_ph,
            sess_per_batch=cfg.sess_per_batch,
            num_threads=2,
            shuffled=False,
            preprocess_func=[
                model_emb.prepare_input, model_emb_sensors.prepare_input,
                model_emb_segment.prepare_input
            ])
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # prepare validation data
        val_sess = []
        val_feats = []
        val_feats2 = []
        val_feats3 = []
        val_labels = []
        val_boundaries = []
        for session in val_set:
            session_id = os.path.basename(session[1]).split('_')[0]
            eve_batch, lab_batch, boundary = load_data_and_label(
                session[0], session[-1], model_emb.prepare_input_test
            )  # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
            val_sess.extend([session_id] * eve_batch.shape[0])
            val_boundaries.extend(boundary)

            eve2_batch, _, _ = load_data_and_label(
                session[1], session[-1], model_emb_sensors.prepare_input_test)
            val_feats2.append(eve2_batch)

            eve3_batch, _, _ = load_data_and_label(
                session[2], session[-1], model_emb_segment.prepare_input_test)
            val_feats3.append(eve3_batch)
        val_feats = np.concatenate(val_feats, axis=0)
        val_feats2 = np.concatenate(val_feats2, axis=0)
        val_feats3 = np.concatenate(val_feats3, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print("Shape of val_feats: ", val_feats.shape)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            fout.write('id\tlabel\tsession_id\tstart\tend\n')
            for i in range(len(val_sess)):
                fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format(
                    i, val_labels[i, 0], val_sess[i], val_boundaries[i][0],
                    val_boundaries[i][1]))

        #########################################################################

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            # load pretrain model, if needed
            if cfg.model_path:
                print("Restoring pretrained model: %s" % cfg.model_path)
                saver.restore(sess, cfg.model_path)

            print("Restoring sensors model: %s" % cfg.sensors_path)
            restore_saver_sensors.restore(sess, cfg.sensors_path)
            print("Restoring segment model: %s" % cfg.segment_path)
            restore_saver_segment.restore(sess, cfg.segment_path)

            ################## Training loop ##################

            # Initialize pairwise embedding distance for each class on validation set
            val_embeddings, _ = sess.run([embedding, set_emb],
                                         feed_dict={
                                             input_ph: val_feats,
                                             dropout_ph: 1.0
                                         })
            dist_dict = {}
            for i in range(np.max(val_labels) + 1):
                temp_emb = val_embeddings[np.where(val_labels == i)[0]]
                dist_dict[i] = [
                    np.mean(
                        utils.cdist(utils.all_diffs(temp_emb, temp_emb),
                                    metric=cfg.metric))
                ]

            epoch = -1
            while epoch < cfg.max_epochs - 1:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // batch_per_epoch

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.01**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # prepare data for this epoch
                random.shuffle(train_set)

                paths = list(zip(*[iter(train_set)] * cfg.sess_per_batch))

                feat_paths = [[p[0] for p in path] for path in paths]
                feat2_paths = [[p[1] for p in path] for path in paths]
                feat3_paths = [[p[2] for p in path] for path in paths]
                label_paths = [[p[-1] for p in path] for path in paths]

                sess.run(train_sess_iterator.initializer,
                         feed_dict={
                             feat_paths_ph: feat_paths,
                             feat2_paths_ph: feat2_paths,
                             feat3_paths_ph: feat3_paths,
                             label_paths_ph: label_paths
                         })

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        ##################### Data loading ########################
                        start_time = time.time()
                        eve, eve_sensors, eve_segment, lab, batch_sess = sess.run(
                            next_train)

                        # for memory concern, 1000 events are used in maximum
                        if eve.shape[0] > cfg.event_per_batch:
                            idx = np.random.permutation(
                                eve.shape[0])[:cfg.event_per_batch]
                            eve = eve[idx]
                            eve_sensors = eve_sensors[idx]
                            eve_segment = eve_segment[idx]
                            lab = lab[idx]
                            batch_sess = batch_sess[idx]
                        load_time = time.time() - start_time

                        ##################### Triplet selection #####################
                        start_time = time.time()
                        # Get the embeddings of all events
                        eve_embedding = np.zeros((eve.shape[0], cfg.emb_dim),
                                                 dtype='float32')
                        for start, end in zip(
                                range(0, eve.shape[0], cfg.batch_size),
                                range(cfg.batch_size,
                                      eve.shape[0] + cfg.batch_size,
                                      cfg.batch_size)):
                            end = min(end, eve.shape[0])
                            emb = sess.run(embedding,
                                           feed_dict={
                                               input_ph: eve[start:end],
                                               dropout_ph: 1.0
                                           })
                            eve_embedding[start:end] = np.copy(emb)

                        # sample triplets within sampled sessions
                        all_diff = utils.all_diffs(eve_embedding,
                                                   eve_embedding)
                        triplet_selected, active_count = utils.select_triplets_facenet(
                            lab, utils.cdist(all_diff, metric=cfg.metric),
                            cfg.triplet_per_batch, cfg.alpha)

                        hard_count = 0
                        struct_count = 0
                        if epoch >= cfg.multimodal_epochs:
                            # Get the similarity of all events
                            sim_prob = np.zeros((eve.shape[0], eve.shape[0]),
                                                dtype='float32') * np.nan
                            comb = list(
                                itertools.combinations(range(eve.shape[0]), 2))
                            for start, end in zip(
                                    range(0, len(comb), cfg.batch_size),
                                    range(cfg.batch_size,
                                          len(comb) + cfg.batch_size,
                                          cfg.batch_size)):
                                end = min(end, len(comb))
                                comb_idx = []
                                for c in comb[start:end]:
                                    comb_idx.extend([c[0], c[1], c[1]])
                                sim = sess.run(prob_AB,
                                               feed_dict={
                                                   input_sensors_ph:
                                                   eve_sensors[comb_idx],
                                                   input_segment_ph:
                                                   eve_segment[comb_idx],
                                                   dropout_ph:
                                                   1.0
                                               })
                                for i in range(sim.shape[0]):
                                    sim_prob[comb[start + i][0],
                                             comb[start + i][1]] = sim[i]
                                    sim_prob[comb[start + i][1],
                                             comb[start + i][0]] = sim[i]

                            # sample triplets from similarity prediction
                            # maximum number not exceed the cfg.triplet_per_batch

                            triplet_input_idx, margins, triplet_count, hard_count, struct_count = select_triplets_mul(
                                triplet_selected, lab, sim_prob, dist_dict,
                                cfg.triplet_per_batch, 3, 0.8, 0.2)

                            # add up all multimodal triplets
                            multimodal_count = hard_count + struct_count

                            sensors_input = eve_sensors[
                                triplet_input_idx[-(3 * multimodal_count):]]
                            segment_input = eve_segment[
                                triplet_input_idx[-(3 * multimodal_count):]]

                        print(triplet_count, hard_count, struct_count)
                        triplet_input = eve[triplet_input_idx]

                        select_time = time.time() - start_time

                        if len(triplet_input.shape) > 5:  # debugging
                            pdb.set_trace()

                        ##################### Start training  ########################

                        # supervised initialization
                        if multimodal_count == 0:
                            if triplet_count == 0:
                                continue
                            err, metric_err1, _, step, summ = sess.run(
                                [
                                    total_loss, metric_loss1, train_op,
                                    global_step, summary_op
                                ],
                                feed_dict={
                                    input_ph: triplet_input,
                                    dropout_ph: cfg.keep_prob,
                                    mul_num_ph: 0,
                                    lr_ph: learning_rate
                                })
                            metric_err2 = 0
                            metric_err3 = 0
                        else:
                            err, metric_err1, metric_err2, metric_err3, _, step, summ, s_AB, s_AC = sess.run(
                                [
                                    total_loss, metric_loss1, metric_loss2,
                                    metric_loss3, train_op, global_step,
                                    summary_op, summ_prob_AB, summ_prob_AC
                                ],
                                feed_dict={
                                    input_ph: triplet_input,
                                    input_sensors_ph: sensors_input,
                                    input_segment_ph: segment_input,
                                    mul_num_ph: multimodal_count * 3,
                                    margins_ph: margins,
                                    dropout_ph: cfg.keep_prob,
                                    lr_ph: learning_rate
                                })
                            summary_writer.add_summary(s_AB, step)
                            summary_writer.add_summary(s_AC, step)


                        print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tTriplet num: %d\tLoad time: %.3f\tSelect time: %.3f\tLoss %.4f" % \
                                (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], triplet_count+multimodal_count, load_time, select_time, err))

                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag="train_loss",
                                             simple_value=err),
                            tf.Summary.Value(tag="active_count",
                                             simple_value=active_count),
                            tf.Summary.Value(tag="triplet_count",
                                             simple_value=triplet_count),
                            tf.Summary.Value(tag="hard_count",
                                             simple_value=hard_count),
                            tf.Summary.Value(tag="struct_count",
                                             simple_value=struct_count),
                            tf.Summary.Value(tag="metric_loss1",
                                             simple_value=metric_err1),
                            tf.Summary.Value(tag="metric_loss3",
                                             simple_value=metric_err3),
                            tf.Summary.Value(tag="metric_loss2",
                                             simple_value=metric_err2)
                        ])

                        summary_writer.add_summary(summary, step)
                        summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_embeddings, _ = sess.run([embedding, set_emb],
                                             feed_dict={
                                                 input_ph: val_feats,
                                                 dropout_ph: 1.0
                                             })
                mAP, mPrec, recall = utils.evaluate_simple(
                    val_embeddings, val_labels)
                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Valiation mAP", simple_value=mAP),
                    tf.Summary.Value(tag="Validation Recall@1",
                                     simple_value=recall),
                    tf.Summary.Value(tag="Validation [email protected]",
                                     simple_value=mPrec)
                ])
                summary_writer.add_summary(summary, step)
                print("Epoch: [%d]\tmAP: %.4f\tmPrec: %.4f" %
                      (epoch + 1, mAP, mPrec))

                # config for embedding visualization
                config = projector.ProjectorConfig()
                visual_embedding = config.embeddings.add()
                visual_embedding.tensor_name = emb_var.name
                visual_embedding.metadata_path = os.path.join(
                    result_dir, 'metadata_val.tsv')
                projector.visualize_embeddings(summary_writer, config)

                # update dist_dict
                if (epoch + 1) == 50 or (epoch + 1) % 200 == 0:
                    for i in dist_dict.keys():
                        temp_emb = val_embeddings[np.where(val_labels == i)[0]]
                        dist_dict[i].append(
                            np.mean(
                                utils.cdist(utils.all_diffs(
                                    temp_emb, temp_emb),
                                            metric=cfg.metric)))

                    pickle.dump(
                        dist_dict,
                        open(os.path.join(result_dir, 'dist_dict.pkl'), 'wb'))

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)

Example #2

Show file

File: multitask_cross_prediction.py Project: xyang35/multimodal_similarity_cleaned

def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    train_set = prepare_multimodal_dataset(cfg.feature_root, train_session,
                                           cfg.feat, cfg.label_root)
    if cfg.task == "supervised":  # fully supervised task
        train_set = train_set[:cfg.label_num]
    batch_per_epoch = len(train_set) // cfg.sess_per_batch
    labeled_session = train_session[:cfg.label_num]

    val_session = cfg.val_session
    val_set = prepare_multimodal_dataset(cfg.feature_root, val_session,
                                         cfg.feat, cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        ####################### Load models here ########################
        sensors_emb_dim = 32
        segment_emb_dim = 32

        with tf.variable_scope("modality_core"):
            # load backbone model
            if cfg.network == "convtsn":
                model_emb = networks.ConvTSN(n_seg=cfg.num_seg,
                                             emb_dim=cfg.emb_dim)
            elif cfg.network == "convrtsn":
                model_emb = networks.ConvRTSN(n_seg=cfg.num_seg,
                                              emb_dim=cfg.emb_dim)
            elif cfg.network == "convbirtsn":
                model_emb = networks.ConvBiRTSN(n_seg=cfg.num_seg,
                                                emb_dim=cfg.emb_dim)
            else:
                raise NotImplementedError

            input_ph = tf.placeholder(
                tf.float32, shape=[None, cfg.num_seg, None, None, None])
            dropout_ph = tf.placeholder(tf.float32, shape=[])
            model_emb.forward(input_ph,
                              dropout_ph)  # for lstm has variable scope

            with tf.variable_scope("sensors"):
                model_output_sensors = networks.OutputLayer(
                    n_input=cfg.emb_dim, n_output=sensors_emb_dim)
            with tf.variable_scope("segment"):
                model_output_segment = networks.OutputLayer(
                    n_input=cfg.emb_dim, n_output=segment_emb_dim)

        lambda_mul_ph = tf.placeholder(tf.float32, shape=[])
        with tf.variable_scope("modality_sensors"):
            model_emb_sensors = networks.RTSN(n_seg=cfg.num_seg,
                                              emb_dim=sensors_emb_dim)

            input_sensors_ph = tf.placeholder(tf.float32,
                                              shape=[None, cfg.num_seg, 8])
            model_emb_sensors.forward(input_sensors_ph, dropout_ph)

            var_list = {}
            for v in tf.global_variables():
                if v.op.name.startswith("modality_sensors"):
                    var_list[v.op.name.replace("modality_sensors/", "")] = v
            restore_saver_sensors = tf.train.Saver(var_list)

        with tf.variable_scope("modality_segment"):
            model_emb_segment = networks.RTSN(n_seg=cfg.num_seg,
                                              emb_dim=segment_emb_dim,
                                              n_input=357)

            input_segment_ph = tf.placeholder(tf.float32,
                                              shape=[None, cfg.num_seg, 357])
            model_emb_segment.forward(input_segment_ph, dropout_ph)

            var_list = {}
            for v in tf.global_variables():
                if v.op.name.startswith("modality_segment"):
                    var_list[v.op.name.replace("modality_segment/", "")] = v
            restore_saver_segment = tf.train.Saver(var_list)

        ############################# Forward Pass #############################

        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model_emb.hidden,
                                           axis=-1,
                                           epsilon=1e-10)
            embedding_sensors = tf.nn.l2_normalize(model_emb_sensors.hidden,
                                                   axis=-1,
                                                   epsilon=1e-10)
            embedding_segment = tf.nn.l2_normalize(model_emb_segment.hidden,
                                                   axis=-1,
                                                   epsilon=1e-10)
        else:
            embedding = model_emb.hidden
            embedding_sensors = model_emb_sensors.hidden
            embedding_segment = model_emb_segment.hidden

        # get the number of unsupervised training
        unsup_num = tf.shape(input_sensors_ph)[0]

        # variable for visualizing the embeddings
        emb_var = tf.Variable(tf.zeros([1116, cfg.emb_dim], dtype=tf.float32),
                              name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        # calculated for monitoring all-pair embedding distance
        diffs = utils.all_diffs_tf(embedding, embedding)
        all_dist = utils.cdist_tf(diffs)
        tf.summary.histogram('embedding_dists', all_dist)

        # split embedding into anchor, positive and negative and calculate triplet loss
        anchor, positive, negative = tf.unstack(
            tf.reshape(embedding[:-unsup_num], [-1, 3, cfg.emb_dim]), 3, 1)
        metric_loss = networks.triplet_loss(anchor, positive, negative,
                                            cfg.alpha)

        model_output_sensors.forward(tf.nn.relu(embedding[-unsup_num:]),
                                     dropout_ph)
        logits_sensors = model_output_sensors.logits
        model_output_segment.forward(tf.nn.relu(embedding[-unsup_num:]),
                                     dropout_ph)
        logits_segment = model_output_segment.logits

        # MSE loss
        MSE_loss_sensors = tf.losses.mean_squared_error(
            embedding_sensors, logits_sensors) / sensors_emb_dim
        MSE_loss_segment = tf.losses.mean_squared_error(
            embedding_sensors, logits_segment) / segment_emb_dim
        MSE_loss = MSE_loss_sensors + MSE_loss_segment
        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = tf.cond(
            tf.equal(unsup_num,
                     tf.shape(embedding)[0]), lambda: MSE_loss * lambda_mul_ph
            + regularization_loss * cfg.lambda_l2, lambda: metric_loss +
            MSE_loss * lambda_mul_ph + regularization_loss * cfg.lambda_l2)

        tf.summary.scalar('learning_rate', lr_ph)
        # only train the core branch
        train_var_list = [
            v for v in tf.global_variables()
            if v.op.name.startswith("modality_core")
        ]
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, train_var_list)

        saver = tf.train.Saver(max_to_keep=10)

        summary_op = tf.summary.merge_all()

        #########################################################################

        # session iterator for session sampling
        feat_paths_ph = tf.placeholder(tf.string,
                                       shape=[None, cfg.sess_per_batch])
        feat2_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        feat3_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        label_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        train_data = multimodal_session_generator(
            feat_paths_ph,
            feat2_paths_ph,
            feat3_paths_ph,
            label_paths_ph,
            sess_per_batch=cfg.sess_per_batch,
            num_threads=2,
            shuffled=False,
            preprocess_func=[
                model_emb.prepare_input, model_emb_sensors.prepare_input,
                model_emb_segment.prepare_input
            ])
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # prepare validation data
        val_sess = []
        val_feats = []
        val_feats2 = []
        val_feats3 = []
        val_labels = []
        val_boundaries = []
        for session in val_set:
            session_id = os.path.basename(session[1]).split('_')[0]
            eve_batch, lab_batch, boundary = load_data_and_label(
                session[0], session[-1], model_emb.prepare_input_test
            )  # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
            val_sess.extend([session_id] * eve_batch.shape[0])
            val_boundaries.extend(boundary)

            eve2_batch, _, _ = load_data_and_label(
                session[1], session[-1], model_emb_sensors.prepare_input_test)
            val_feats2.append(eve2_batch)

            eve3_batch, _, _ = load_data_and_label(
                session[2], session[-1], model_emb_segment.prepare_input_test)
            val_feats3.append(eve3_batch)
        val_feats = np.concatenate(val_feats, axis=0)
        val_feats2 = np.concatenate(val_feats2, axis=0)
        val_feats3 = np.concatenate(val_feats3, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print("Shape of val_feats: ", val_feats.shape)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            fout.write('id\tlabel\tsession_id\tstart\tend\n')
            for i in range(len(val_sess)):
                fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format(
                    i, val_labels[i, 0], val_sess[i], val_boundaries[i][0],
                    val_boundaries[i][1]))

        #########################################################################

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())
            print("Restoring sensors model: %s" % cfg.sensors_path)
            restore_saver_sensors.restore(sess, cfg.sensors_path)
            print("Restoring segment model: %s" % cfg.segment_path)
            restore_saver_segment.restore(sess, cfg.segment_path)

            # load pretrain model, if needed
            if cfg.model_path:
                print("Restoring pretrained model: %s" % cfg.model_path)
                saver.restore(sess, cfg.model_path)

            ################## Training loop ##################
            epoch = -1
            while epoch < cfg.max_epochs - 1:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // batch_per_epoch

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.01**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # prepare data for this epoch
                random.shuffle(train_set)

                paths = list(zip(*[iter(train_set)] * cfg.sess_per_batch))

                feat_paths = [[p[0] for p in path] for path in paths]
                feat2_paths = [[p[1] for p in path] for path in paths]
                feat3_paths = [[p[2] for p in path] for path in paths]
                label_paths = [[p[-1] for p in path] for path in paths]

                sess.run(train_sess_iterator.initializer,
                         feed_dict={
                             feat_paths_ph: feat_paths,
                             feat2_paths_ph: feat2_paths,
                             feat3_paths_ph: feat3_paths,
                             label_paths_ph: label_paths
                         })

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        ##################### Data loading ########################
                        start_time = time.time()
                        eve, eve_sensors, eve_segment, lab, batch_sess = sess.run(
                            next_train)

                        # for memory concern, 1000 events are used in maximum
                        if eve.shape[0] > 1000:
                            idx = np.random.permutation(eve.shape[0])[:1000]
                            eve = eve[idx]
                            eve_sensors = eve_sensors[idx]
                            eve_segment = eve_segment[idx]
                            lab = lab[idx]
                            batch_sess = batch_sess[idx]
                        load_time = time.time() - start_time

                        ##################### Triplet selection #####################
                        start_time = time.time()
                        # for labeled sessions, use facenet sampling
                        eve_labeled = []
                        lab_labeled = []
                        for i in range(eve.shape[0]):
                            # FIXME: use decode again to get session_id str
                            if batch_sess[i, 0].decode() in labeled_session:
                                eve_labeled.append(eve[i])
                                lab_labeled.append(lab[i])

                        if len(eve_labeled):  # if labeled sessions exist
                            eve_labeled = np.stack(eve_labeled, axis=0)
                            lab_labeled = np.stack(lab_labeled, axis=0)

                            # Get the embeddings of all events
                            eve_embedding = np.zeros(
                                (eve_labeled.shape[0], cfg.emb_dim),
                                dtype='float32')
                            for start, end in zip(
                                    range(0, eve_labeled.shape[0],
                                          cfg.batch_size),
                                    range(
                                        cfg.batch_size,
                                        eve_labeled.shape[0] + cfg.batch_size,
                                        cfg.batch_size)):
                                end = min(end, eve_labeled.shape[0])
                                emb = sess.run(embedding,
                                               feed_dict={
                                                   input_ph:
                                                   eve_labeled[start:end],
                                                   dropout_ph: 1.0
                                               })
                                eve_embedding[start:end] = np.copy(emb)

                            # Second, sample triplets within sampled sessions
                            all_diff = utils.all_diffs(eve_embedding,
                                                       eve_embedding)
                            triplet_input_idx, active_count = utils.select_triplets_facenet(
                                lab_labeled,
                                utils.cdist(all_diff, metric=cfg.metric),
                                cfg.triplet_per_batch,
                                cfg.alpha,
                                num_negative=cfg.num_negative)

                            if len(triplet_input_idx) == 0:
                                triplet_input = eve_labeled[triplet_input_idx]

                        else:
                            active_count = -1

                        # for all sessions in the batch
                        perm_idx = np.random.permutation(eve.shape[0])
                        perm_idx = perm_idx[:min(3 * (len(perm_idx) // 3), 3 *
                                                 cfg.triplet_per_batch)]
                        mul_input = eve[perm_idx]

                        if len(eve_labeled) and triplet_input_idx is not None:
                            triplet_input = np.concatenate(
                                (triplet_input, mul_input), axis=0)
                        else:
                            triplet_input = mul_input
                        sensors_input = eve_sensors[perm_idx]
                        segment_input = eve_segment[perm_idx]

                        ##################### Start training  ########################

                        # supervised initialization
                        if epoch < cfg.multimodal_epochs:
                            if not len(eve_labeled
                                       ):  # if no labeled sessions exist
                                continue
                            err, mse_err, _, step, summ = sess.run(
                                [
                                    total_loss, MSE_loss, train_op,
                                    global_step, summary_op
                                ],
                                feed_dict={
                                    input_ph: triplet_input,
                                    input_sensors_ph: sensors_input,
                                    dropout_ph: cfg.keep_prob,
                                    lambda_mul_ph: 0.0,
                                    lr_ph: learning_rate
                                })
                        else:
                            print(triplet_input.shape)
                            err, mse_err1, mse_err2, _, step, summ = sess.run(
                                [
                                    total_loss, MSE_loss_sensors,
                                    MSE_loss_segment, train_op, global_step,
                                    summary_op
                                ],
                                feed_dict={
                                    input_ph: triplet_input,
                                    input_sensors_ph: sensors_input,
                                    input_segment_ph: segment_input,
                                    dropout_ph: cfg.keep_prob,
                                    lambda_mul_ph: cfg.lambda_multimodal,
                                    lr_ph: learning_rate
                                })
                        train_time = time.time() - start_time

                        print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tLoad time: %.3f\tTrain_time: %.3f\tLoss %.4f" % \
                                (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], load_time, train_time, err))

                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag="train_loss",
                                             simple_value=err),
                            tf.Summary.Value(tag="active_count",
                                             simple_value=active_count),
                            tf.Summary.Value(
                                tag="triplet_num",
                                simple_value=(triplet_input.shape[0] -
                                              sensors_input.shape[0]) // 3),
                            tf.Summary.Value(tag="MSE_loss_sensors",
                                             simple_value=mse_err1),
                            tf.Summary.Value(tag="MSE_loss_segment",
                                             simple_value=mse_err2)
                        ])

                        summary_writer.add_summary(summary, step)
                        summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_err1, val_err2, val_embeddings, _ = sess.run(
                    [MSE_loss_sensors, MSE_loss_segment, embedding, set_emb],
                    feed_dict={
                        input_ph: val_feats,
                        input_sensors_ph: val_feats2,
                        input_segment_ph: val_feats3,
                        dropout_ph: 1.0
                    })
                mAP, mPrec = utils.evaluate_simple(val_embeddings, val_labels)

                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Valiation mAP", simple_value=mAP),
                    tf.Summary.Value(tag="Validation [email protected]",
                                     simple_value=mPrec),
                    tf.Summary.Value(tag="Validation mse loss sensors",
                                     simple_value=val_err1),
                    tf.Summary.Value(tag="Validation mse loss segment",
                                     simple_value=val_err2)
                ])
                summary_writer.add_summary(summary, step)
                print("Epoch: [%d]\tmAP: %.4f\tmPrec: %.4f" %
                      (epoch + 1, mAP, mPrec))

                # config for embedding visualization
                config = projector.ProjectorConfig()
                visual_embedding = config.embeddings.add()
                visual_embedding.tensor_name = emb_var.name
                visual_embedding.metadata_path = os.path.join(
                    result_dir, 'metadata_val.tsv')
                projector.visualize_embeddings(summary_writer, config)

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)

Example #3

Show file

def main():

    # Load configurations and write to config.txt
    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    train_set = prepare_dataset(cfg.feature_root, train_session, cfg.feat,
                                cfg.label_root)
    train_set = train_set[:cfg.label_num]
    batch_per_epoch = len(train_set) // cfg.sess_per_batch

    val_session = cfg.val_session
    val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat,
                              cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        label_ph = tf.placeholder(tf.int32, shape=[None], name="label")
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        ####################### Define model here ########################

        # Load embedding model
        if cfg.network == "tsn":
            model_emb = networks.TSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "rtsn":
            model_emb = networks.RTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "convtsn":
            model_emb = networks.ConvTSN(n_seg=cfg.num_seg,
                                         emb_dim=cfg.emb_dim)
        elif cfg.network == "convrtsn":
            model_emb = networks.ConvRTSN(n_seg=cfg.num_seg,
                                          emb_dim=cfg.emb_dim,
                                          n_h=cfg.n_h,
                                          n_w=cfg.n_w,
                                          n_C=cfg.n_C,
                                          n_input=cfg.n_input)
        elif cfg.network == "convbirtsn":
            model_emb = networks.ConvBiRTSN(n_seg=cfg.num_seg,
                                            emb_dim=cfg.emb_dim)
        else:
            raise NotImplementedError

        # get the embedding
        if cfg.feat == "sensors" or cfg.feat == "segment":
            input_ph = tf.placeholder(tf.float32,
                                      shape=[None, cfg.num_seg, None])
        elif cfg.feat == "resnet" or cfg.feat == "segment_down":
            input_ph = tf.placeholder(
                tf.float32, shape=[None, cfg.num_seg, None, None, None])
        dropout_ph = tf.placeholder(tf.float32, shape=[])
        model_emb.forward(input_ph, dropout_ph)
        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model_emb.hidden,
                                           axis=-1,
                                           epsilon=1e-10)
        else:
            embedding = model_emb.hidden

        # Use tensorflow implementation for loss functions
        if cfg.loss == 'triplet':
            metric_loss, active_count = loss_tf.triplet_semihard_loss(
                labels=label_ph, embeddings=embedding, margin=cfg.alpha)
        elif cfg.loss == 'lifted':
            metric_loss, active_count = loss_tf.lifted_struct_loss(
                labels=label_ph, embeddings=embedding, margin=cfg.alpha)
        else:
            raise NotImplementedError

        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = metric_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, tf.global_variables())

        ####################### Define data loader ############################

        # session iterator for session sampling
        feat_paths_ph = tf.placeholder(tf.string,
                                       shape=[None, cfg.sess_per_batch])
        label_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        train_data = session_generator(feat_paths_ph,
                                       label_paths_ph,
                                       sess_per_batch=cfg.sess_per_batch,
                                       num_threads=2,
                                       shuffled=False,
                                       preprocess_func=model_emb.prepare_input)
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # Prepare validation data
        val_sess = []
        val_feats = []
        val_labels = []
        val_boundaries = []
        for session in val_set:
            session_id = os.path.basename(session[1]).split('_')[0]
            eve_batch, lab_batch, boundary = load_data_and_label(
                session[0], session[-1], model_emb.prepare_input_test
            )  # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
            val_sess.extend([session_id] * eve_batch.shape[0])
            val_boundaries.extend(boundary)
        val_feats = np.concatenate(val_feats, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print("Shape of val_feats: ", val_feats.shape)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            fout.write('id\tlabel\tsession_id\tstart\tend\n')
            for i in range(len(val_sess)):
                fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format(
                    i, val_labels[i, 0], val_sess[i], val_boundaries[i][0],
                    val_boundaries[i][1]))

        # Variable for visualizing the embeddings
        emb_var = tf.Variable(tf.zeros([val_feats.shape[0], cfg.emb_dim]),
                              name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        # calculated for monitoring all-pair embedding distance
        diffs = utils.all_diffs_tf(embedding, embedding)
        all_dist = utils.cdist_tf(diffs)
        tf.summary.histogram('embedding_dists', all_dist)

        summary_op = tf.summary.merge_all()
        saver = tf.train.Saver(max_to_keep=10)

        #########################################################################

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            # load pretrain model, if needed
            if cfg.model_path:
                print("Restoring pretrained model: %s" % cfg.model_path)
                saver.restore(sess, cfg.model_path)

            ################## Training loop ##################
            epoch = -1
            while epoch < cfg.max_epochs - 1:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // batch_per_epoch

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.01**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # prepare data for this epoch
                random.shuffle(train_set)

                feat_paths = [path[0] for path in train_set]
                label_paths = [path[1] for path in train_set]
                # reshape a list to list of list
                # interesting hacky code from: https://stackoverflow.com/questions/10124751/convert-a-flat-list-to-list-of-list-in-python
                feat_paths = list(zip(*[iter(feat_paths)] *
                                      cfg.sess_per_batch))
                label_paths = list(
                    zip(*[iter(label_paths)] * cfg.sess_per_batch))

                sess.run(train_sess_iterator.initializer,
                         feed_dict={
                             feat_paths_ph: feat_paths,
                             label_paths_ph: label_paths
                         })

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        # Get a batch
                        start_time_select = time.time()

                        eve, se, lab = sess.run(next_train)
                        # for memory concern, cfg.event_per_batch events are used in maximum
                        if eve.shape[0] > cfg.event_per_batch:
                            idx = np.random.permutation(
                                eve.shape[0])[:cfg.event_per_batch]
                            eve = eve[idx]
                            se = se[idx]
                            lab = lab[idx]

                        select_time = time.time() - start_time_select

                        start_time_train = time.time()

                        # perform training on the batch
                        err, _, step, summ = sess.run(
                            [total_loss, train_op, global_step, summary_op],
                            feed_dict={
                                input_ph: eve,
                                label_ph: np.squeeze(lab),
                                dropout_ph: cfg.keep_prob,
                                lr_ph: learning_rate
                            })

                        train_time = time.time() - start_time_train

                        print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tSelect_time: %.3f\tTrain_time: %.3f\tLoss %.4f" % \
                                (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], select_time, train_time, err))

                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag="train_loss",
                                             simple_value=err),
                        ])
                        summary_writer.add_summary(summary, step)
                        summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_embeddings, _ = sess.run([embedding, set_emb],
                                             feed_dict={
                                                 input_ph: val_feats,
                                                 dropout_ph: 1.0
                                             })
                mAP, mPrec, recall = utils.evaluate_simple(
                    val_embeddings, val_labels)
                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Valiation mAP", simple_value=mAP),
                    tf.Summary.Value(tag="Validation Recall@1",
                                     simple_value=recall),
                    tf.Summary.Value(tag="Validation [email protected]",
                                     simple_value=mPrec)
                ])
                summary_writer.add_summary(summary, step)
                print("Epoch: [%d]\tmAP: %.4f\tmPrec: %.4f" %
                      (epoch + 1, mAP, mPrec))

                # config for embedding visualization
                config = projector.ProjectorConfig()
                visual_embedding = config.embeddings.add()
                visual_embedding.tensor_name = emb_var.name
                visual_embedding.metadata_path = os.path.join(
                    result_dir, 'metadata_val.tsv')
                projector.visualize_embeddings(summary_writer, config)

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)

Example #4

Show file

File: multimodal_model_weak.py Project: xyang35/multimodal_similarity

def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    train_set = prepare_multimodal_dataset(cfg.feature_root, train_session,
                                           cfg.feat, cfg.label_root)
    batch_per_epoch = len(train_set) // cfg.sess_per_batch

    val_session = cfg.val_session
    val_set = prepare_multimodal_dataset(cfg.feature_root, val_session,
                                         cfg.feat, cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        ####################### Load models here ########################

        with tf.variable_scope("modality_core"):
            # load backbone model
            if cfg.network == "convtsn":
                model_emb = networks.ConvTSN(n_seg=cfg.num_seg,
                                             emb_dim=cfg.emb_dim)
            elif cfg.network == "convrtsn":
                model_emb = networks.ConvRTSN(n_seg=cfg.num_seg,
                                              emb_dim=cfg.emb_dim)
            elif cfg.network == "convbirtsn":
                model_emb = networks.ConvBiRTSN(n_seg=cfg.num_seg,
                                                emb_dim=cfg.emb_dim)
            else:
                raise NotImplementedError

            input_ph = tf.placeholder(
                tf.float32, shape=[None, cfg.num_seg, None, None, None])
            dropout_ph = tf.placeholder(tf.float32, shape=[])
            model_emb.forward(input_ph,
                              dropout_ph)  # for lstm has variable scope

        with tf.variable_scope("modality_sensors"):
            sensors_emb_dim = 32
            model_emb_sensors = networks.RTSN(n_seg=cfg.num_seg,
                                              emb_dim=sensors_emb_dim)
            model_pairsim_sensors = networks.PairSim(n_input=sensors_emb_dim)

            input_sensors_ph = tf.placeholder(tf.float32,
                                              shape=[None, cfg.num_seg, 8])
            model_emb_sensors.forward(input_sensors_ph, dropout_ph)

            var_list = {}
            for v in tf.global_variables():
                if v.op.name.startswith("modality_sensors"):
                    var_list[v.op.name.replace("modality_sensors/", "")] = v
            restore_saver_sensors = tf.train.Saver(var_list)

        ############################# Forward Pass #############################

        # Core branch
        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model_emb.hidden,
                                           axis=-1,
                                           epsilon=1e-10)
        else:
            embedding = model_emb.hidden

        # variable for visualizing the embeddings
        emb_var = tf.Variable([0.0], name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        # calculated for monitoring all-pair embedding distance
        diffs = utils.all_diffs_tf(embedding, embedding)
        all_dist = utils.cdist_tf(diffs)
        tf.summary.histogram('embedding_dists', all_dist)

        # split embedding into anchor, positive and negative and calculate triplet loss
        anchor, positive, negative = tf.unstack(
            tf.reshape(embedding, [-1, 3, cfg.emb_dim]), 3, 1)

        # Sensors branch
        emb_sensors = model_emb_sensors.hidden
        A_sensors, B_sensors, C_sensors = tf.unstack(
            tf.reshape(emb_sensors, [-1, 3, sensors_emb_dim]), 3, 1)
        AB_pairs_sensors = tf.stack([A_sensors, B_sensors], axis=1)
        AC_pairs_sensors = tf.stack([A_sensors, C_sensors], axis=1)
        pairs_sensors = tf.concat([AB_pairs_sensors, AC_pairs_sensors], axis=0)
        model_pairsim_sensors.forward(pairs_sensors, dropout_ph)
        prob_sensors = model_pairsim_sensors.prob
        prob_sensors = tf.concat([
            prob_sensors[:tf.shape(A_sensors)[0]],
            prob_sensors[tf.shape(A_sensors)[0]:]
        ],
                                 axis=1)  # shape: [N, 4]

        # fuse prob from all modalities
        prob = prob_sensors

        ############################# Calculate loss #############################

        # triplet loss for labeled inputs
        metric_loss1 = networks.triplet_loss(anchor, positive, negative,
                                             cfg.alpha)

        # weighted triplet loss for multimodal inputs
        mul_num = tf.shape(prob)[0]
        metric_loss2 = networks.triplet_loss(anchor[:mul_num],
                                             positive[:mul_num],
                                             negative[:mul_num], cfg.alpha)
        weighted_metric_loss, weights = networks.weighted_triplet_loss(
            anchor[-mul_num:], positive[-mul_num:], negative[-mul_num:],
            prob[:, 1], prob[:, 3], cfg.alpha)

        unimodal_var_list = [
            v for v in tf.global_variables()
            if v.op.name.startswith("modality_core")
        ]

        # whether to apply joint optimization
        if cfg.no_joint:
            multimodal_var_list = unimodal_var_list
        else:
            multimodal_var_list = tf.global_variables()

        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        unimodal_loss = metric_loss1 + regularization_loss * cfg.lambda_l2
        multimodal_loss = metric_loss2 + cfg.lambda_multimodal * weighted_metric_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('learning_rate', lr_ph)
        unimodal_train_op = utils.optimize(unimodal_loss, global_step,
                                           cfg.optimizer, lr_ph,
                                           unimodal_var_list)
        multimodal_train_op = utils.optimize(multimodal_loss, global_step,
                                             cfg.optimizer, lr_ph,
                                             multimodal_var_list)

        saver = tf.train.Saver(max_to_keep=10)
        summary_op = tf.summary.merge_all(
        )  # not logging histogram of variables because it will cause problem when only unimodal_train_op is called

        summ_prob = tf.summary.histogram('Prob_histogram', prob)
        summ_weights = tf.summary.histogram('Weights_histogram', weights)

        #########################################################################

        # session iterator for session sampling
        feat_paths_ph = tf.placeholder(tf.string,
                                       shape=[None, cfg.sess_per_batch])
        feat2_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        label_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        train_data = multimodal_session_generator(
            feat_paths_ph,
            feat2_paths_ph,
            label_paths_ph,
            sess_per_batch=cfg.sess_per_batch,
            num_threads=2,
            shuffled=False,
            preprocess_func=[
                model_emb.prepare_input, model_emb_sensors.prepare_input
            ])
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # prepare validation data
        val_sess = []
        val_feats = []
        val_feats2 = []
        val_labels = []
        val_boundaries = []
        for session in val_set:
            session_id = os.path.basename(session[1]).split('_')[0]
            eve_batch, lab_batch, boundary = load_data_and_label(
                session[0], session[-1], model_emb.prepare_input_test
            )  # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
            val_sess.extend([session_id] * eve_batch.shape[0])
            val_boundaries.extend(boundary)

            eve2_batch, _, _ = load_data_and_label(session[1], session[-1],
                                                   utils.mean_pool_input)
            val_feats2.append(eve2_batch)
        val_feats = np.concatenate(val_feats, axis=0)
        val_feats2 = np.concatenate(val_feats2, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print("Shape of val_feats: ", val_feats.shape)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            fout.write('id\tlabel\tsession_id\tstart\tend\n')
            for i in range(len(val_sess)):
                fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format(
                    i, val_labels[i, 0], val_sess[i], val_boundaries[i][0],
                    val_boundaries[i][1]))

        #########################################################################

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            # load pretrain model, if needed
            if cfg.model_path:
                print("Restoring pretrained model: %s" % cfg.model_path)
                saver.restore(sess, cfg.model_path)

            #print ("Restoring sensors model: %s" % cfg.sensors_path)
            restore_saver_sensors.restore(sess, cfg.sensors_path)

            ################## Training loop ##################
            epoch = -1
            while epoch < cfg.max_epochs - 1:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // batch_per_epoch

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # prepare data for this epoch
                random.shuffle(train_set)

                paths = list(zip(*[iter(train_set)] * cfg.sess_per_batch))

                feat_paths = [[p[0] for p in path] for path in paths]
                feat2_paths = [[p[1] for p in path] for path in paths]
                label_paths = [[p[-1] for p in path] for path in paths]

                sess.run(train_sess_iterator.initializer,
                         feed_dict={
                             feat_paths_ph: feat_paths,
                             feat2_paths_ph: feat2_paths,
                             label_paths_ph: label_paths
                         })

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        ##################### Data loading ########################
                        start_time = time.time()
                        eve, eve_sensors, lab = sess.run(next_train)
                        load_time = time.time() - start_time

                        ##################### Triplet selection #####################
                        start_time = time.time()
                        # Get the embeddings of all events
                        eve_embedding = np.zeros((eve.shape[0], cfg.emb_dim),
                                                 dtype='float32')
                        for start, end in zip(
                                range(0, eve.shape[0], cfg.batch_size),
                                range(cfg.batch_size,
                                      eve.shape[0] + cfg.batch_size,
                                      cfg.batch_size)):
                            end = min(end, eve.shape[0])
                            emb = sess.run(embedding,
                                           feed_dict={
                                               input_ph: eve[start:end],
                                               dropout_ph: 1.0
                                           })
                            eve_embedding[start:end] = np.copy(emb)

                        # sample triplets within sampled sessions
                        triplet_input_idx, negative_count = utils.select_triplets_facenet(
                            lab,
                            eve_embedding,
                            cfg.triplet_per_batch,
                            cfg.alpha,
                            num_negative=cfg.num_negative)
                        if triplet_input_idx is None:
                            continue

                        multimodal_count = 0
                        if epoch >= cfg.multimodal_epochs:
                            # Get the similairty prediction of all pos-neg pairs
                            pos_neg_idx = pos_neg_pairs(lab)
                            sim_prob = np.zeros((eve.shape[0], eve.shape[0]),
                                                dtype='float32') * np.nan
                            for start, end in zip(
                                    range(0, len(pos_neg_idx),
                                          3 * cfg.batch_size),
                                    range(
                                        3 * cfg.batch_size,
                                        len(pos_neg_idx) + 3 * cfg.batch_size,
                                        3 * cfg.batch_size)):
                                ####### for debugging
                                if pos_neg_idx is None:
                                    pdb.set_trace()
                                end = min(end, len(pos_neg_idx))
                                batch_idx = pos_neg_idx[start:end]
                                batch_prob, histo_prob = sess.run(
                                    [prob, summ_prob],
                                    feed_dict={
                                        input_sensors_ph:
                                        eve_sensors[batch_idx],
                                        dropout_ph: 1.0
                                    })
                                summary_writer.add_summary(histo_prob, step)

                                for i in range(batch_prob.shape[0]):
                                    sim_prob[batch_idx[i * 3],
                                             batch_idx[i * 3 + 1]] = np.copy(
                                                 batch_prob[i, 1])

                            # post-process the similarity prediction matrix [N,N]
                            # average two predictions sim(A,B) and sim(B,A)
                            # not implemented because of nan for backgrounds
                            #sim_prob = 0.5 * (sim_prob + sim_prob.T)

                            # sample triplets from similarity prediction
                            # maximum number not exceed the number of triplet_input from facenet selection
                            if cfg.multimodal_select == "confidence":
                                multimodal_input_idx, multimodal_count = select_triplets_multimodal(
                                    sim_prob,
                                    threshold=0.9,
                                    max_num=len(triplet_input_idx) // 3)
                            elif cfg.multimodal_select == "nopos":
                                multimodal_input_idx, multimodal_count = nopos_triplets_multimodal(
                                    sim_prob,
                                    max_num=len(triplet_input_idx) // 3)
                            elif cfg.multimodal_select == "random":
                                multimodal_input_idx, multimodal_count = random_triplets_multimodal(
                                    sim_prob,
                                    max_num=len(triplet_input_idx) // 3)
                            else:
                                raise NotImplementedError

                            print(len(triplet_input_idx),
                                  len(multimodal_input_idx), multimodal_count)
                            sensors_input = eve_sensors[multimodal_input_idx]
                            triplet_input_idx.extend(multimodal_input_idx)

                        triplet_input = eve[triplet_input_idx]

                        select_time = time.time() - start_time

                        if len(triplet_input.shape) > 5:  # debugging
                            pdb.set_trace()

                        ##################### Start training  ########################

                        # be careful that for multimodal_count = 0 we just optimize unimodal part
                        if epoch < cfg.multimodal_epochs or multimodal_count == 0:
                            err, metric_err, _, step, summ = sess.run(
                                [
                                    unimodal_loss, metric_loss1,
                                    unimodal_train_op, global_step, summary_op
                                ],
                                feed_dict={
                                    input_ph: triplet_input,
                                    dropout_ph: cfg.keep_prob,
                                    lr_ph: learning_rate
                                })
                            mul_err = 0.0
                        else:
                            err, w, metric_err, mul_err, _, step, summ, histo_w = sess.run(
                                [
                                    multimodal_loss, weights, metric_loss2,
                                    weighted_metric_loss, multimodal_train_op,
                                    global_step, summary_op, summ_weights
                                ],
                                feed_dict={
                                    input_ph: triplet_input,
                                    input_sensors_ph: sensors_input,
                                    dropout_ph: cfg.keep_prob,
                                    lr_ph: learning_rate
                                })

                            # add summary of weights histogram
                            summary_writer.add_summary(histo_w, step)

                        print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tTriplet num: %d\tLoad time: %.3f\tSelect time: %.3f\tLoss %.4f" % \
                                (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], triplet_input.shape[0]//3, load_time, select_time, err))

                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag="train_loss",
                                             simple_value=err),
                            tf.Summary.Value(tag="negative_count",
                                             simple_value=negative_count),
                            tf.Summary.Value(tag="multimodal_count",
                                             simple_value=multimodal_count),
                            tf.Summary.Value(tag="metric_loss",
                                             simple_value=metric_err),
                            tf.Summary.Value(tag="weghted_metric_loss",
                                             simple_value=mul_err)
                        ])

                        summary_writer.add_summary(summary, step)
                        summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_embeddings, _ = sess.run([embedding, set_emb],
                                             feed_dict={
                                                 input_ph: val_feats,
                                                 dropout_ph: 1.0
                                             })
                mAP, mPrec = utils.evaluate_simple(val_embeddings, val_labels)

                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Valiation mAP", simple_value=mAP),
                    tf.Summary.Value(tag="Validation [email protected]",
                                     simple_value=mPrec)
                ])
                summary_writer.add_summary(summary, step)
                print("Epoch: [%d]\tmAP: %.4f\tmPrec: %.4f" %
                      (epoch + 1, mAP, mPrec))

                # config for embedding visualization
                config = projector.ProjectorConfig()
                visual_embedding = config.embeddings.add()
                visual_embedding.tensor_name = emb_var.name
                visual_embedding.metadata_path = os.path.join(
                    result_dir, 'metadata_val.tsv')
                projector.visualize_embeddings(summary_writer, config)

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)

Example #5

Show file

File: evaluate_model.py Project: xyang35/multimodal_similarity

def main():

    cfg = EvalConfig().parse()
    print ("Evaluate the model: {}".format(os.path.basename(cfg.model_path)))
    np.random.seed(seed=cfg.seed)

    test_session = cfg.test_session
    test_set = prepare_dataset(cfg.feature_root, test_session, cfg.feat, cfg.label_root, cfg.label_type)

    # load backbone model
    if cfg.network == "tsn":
        model = networks.TSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
    elif cfg.network == "rtsn":
        model = networks.RTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim, n_input=cfg.n_input)
    elif cfg.network == "convtsn":
        model = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
    elif cfg.network == "convrtsn":
        model = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim, n_h=cfg.n_h, n_w=cfg.n_w, n_C=cfg.n_C, n_input=cfg.n_input)
    elif cfg.network == "seq2seqtsn":
        model = networks.Seq2seqTSN(n_seg=cfg.num_seg, n_input=n_input, emb_dim=cfg.emb_dim, reverse=cfg.reverse)
    elif cfg.network == "convbirtsn":
        model = networks.ConvBiRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
    else:
        raise NotImplementedError


    # get the embedding
    if cfg.feat == "sensors" or cfg.feat == "segment":
        input_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, None])
    elif cfg.feat == "resnet" or cfg.feat == "segment_down":
        input_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, None, None, None])
    dropout_ph = tf.placeholder(tf.float32, shape=[])
    model.forward(input_ph, dropout_ph)
    embedding = tf.nn.l2_normalize(model.hidden, axis=1, epsilon=1e-10, name='embedding')

    # Testing
    if cfg.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

    gpu_options = tf.GPUOptions(allow_growth=True)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    # restore variables
    var_list = {}
    for v in tf.global_variables():
        var_list[cfg.variable_name+v.op.name] = v

    saver = tf.train.Saver(var_list)
    with sess.as_default():
        sess.run(tf.global_variables_initializer())

        # load the model (note that model_path already contains snapshot number
        saver.restore(sess, cfg.model_path)

        duration = 0.0
        eve_embeddings = []
        labels = []
        for i, session in enumerate(test_set):
            session_id = os.path.basename(session[1]).split('_')[0]
            print ("{0} / {1}: {2}".format(i, len(test_set), session_id))

#            eve_batch, lab_batch, _ = load_data_and_label(session[0], session[1], mean_pool_input, transfer=cfg.transfer)    # use prepare_input_test for testing time
            eve_batch, lab_batch, _ = load_data_and_label(session[0], session[1], model.prepare_input_test, transfer=cfg.transfer)    # use prepare_input_test for testing time

            start_time = time.time()
            emb = sess.run(embedding, feed_dict={input_ph: eve_batch, dropout_ph: 1.0})
#            emb = eve_batch
            duration += time.time() - start_time

            eve_embeddings.append(emb)
            labels.append(lab_batch)

        eve_embeddings = np.concatenate(eve_embeddings, axis=0)
        labels = np.concatenate(labels, axis=0)

    # evaluate the results
    mAP, mAP_event, mPrec, confusion, count, recall = evaluate(eve_embeddings, np.squeeze(labels))

    mAP_macro = 0.0
    for key in mAP_event:
        mAP_macro += mAP_event[key]
    mAP_macro /= len(list(mAP_event.keys()))

    print ("%d events with dim %d for evaluation, run time: %.3f." % (labels.shape[0], eve_embeddings.shape[1], duration))
    print ("mAP = {:.4f}".format(mAP))
    print ("mAP_macro = {:.4f}".format(mAP_macro))
    print ("[email protected] = {:.4f}".format(mPrec))
    print ("Recall@1 = {:.4f}".format(recall[0]))
    print ("Recall@2 = {:.4f}".format(recall[1]))
    print ("Recall@4 = {:.4f}".format(recall[2]))
    print ("Recall@8 = {:.4f}".format(recall[3]))
    print ("Recall@16 = {:.4f}".format(recall[4]))
    print ("Recall@32 = {:.4f}".format(recall[5]))

    if cfg.label_type == 'goal':
        num2labels = honda_num2labels
    elif cfg.label_type == 'stimuli':
        num2labels = stimuli_num2labels

    keys = confusion['labels']
    for i, key in enumerate(keys):
        if key not in mAP_event:
            continue
        print ("Event {0}: {1}, ratio = {2:.4f}, mAP = {3:.4f}, [email protected] = {4:.4f}".format(
            key,
            num2labels[key],
            float(count[i]) / np.sum(count),
            mAP_event[key],
            confusion['confusion_matrix'][i, i]))

    # store results
    pkl.dump({"mAP": mAP,
              "mAP_macro": mAP_macro,
              "mAP_event": mAP_event,
              "mPrec": mPrec,
              "confusion": confusion,
              "count": count,
              "recall": recall},
              open(os.path.join(os.path.dirname(cfg.model_path), "results.pkl"), 'wb'))

Example #6

Show file

File: check_inconsistent_pddm.py Project: xyang35/multimodal_similarity

def main():

    cfg = TrainConfig().parse()
    print (cfg.name)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    val_session = cfg.val_session
    val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat, cfg.label_root)


    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)

        # load backbone model
        if cfg.network == "tsn":
            model_emb = networks.TSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "rtsn":
            model_emb = networks.RTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim, n_input=cfg.n_input)
        elif cfg.network == "convtsn":
            model_emb = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "convrtsn":
            model_emb = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim, n_h=cfg.n_h, n_w=cfg.n_w, n_C=cfg.n_C, n_input=cfg.n_input)
        elif cfg.network == "convbirtsn":
            model_emb = networks.ConvBiRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        else:
            raise NotImplementedError
        model_ver = networks.PDDM(n_input=cfg.emb_dim)

        # get the embedding
        if cfg.feat == "sensors" or cfg.feat == "segment":
            input_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, None])
        elif cfg.feat == "resnet" or cfg.feat == "segment_down":
            input_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, None, None, None])
        dropout_ph = tf.placeholder(tf.float32, shape=[])
        model_emb.forward(input_ph, dropout_ph)
        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model_emb.hidden, axis=-1, epsilon=1e-10)
        else:
            embedding = model_emb.hidden

        # split the embedding
        emb_A = embedding[:(tf.shape(embedding)[0]//2)]
        emb_B = embedding[(tf.shape(embedding)[0]//2):]
        model_ver.forward(tf.stack((emb_A, emb_B), axis=1))
        pddm = model_ver.prob

        restore_saver = tf.train.Saver()

        # prepare validation data
        val_sess = []
        val_feats = []
        val_labels = []
        val_boundaries = []
        for session in val_set:
            session_id = os.path.basename(session[1]).split('_')[0]
            eve_batch, lab_batch, boundary = load_data_and_label(session[0], session[-1], model_emb.prepare_input_test)    # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
            val_sess.extend([session_id]*eve_batch.shape[0])
            val_boundaries.extend(boundary)

        val_feats = np.concatenate(val_feats, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print ("Shape of val_feats: ", val_feats.shape)

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            print ("Restoring pretrained model: %s" % cfg.model_path)
            restore_saver.restore(sess, cfg.model_path)


            fout_fp = open(os.path.join(os.path.dirname(cfg.model_path), 'val_fp.txt'), 'w')
            fout_fn = open(os.path.join(os.path.dirname(cfg.model_path), 'val_fn.txt'), 'w')
            fout_fp.write('id_A\tid_B\tlabel_A\tlabel_B\tprob_0\tprob_1\n')
            fout_fn.write('id_A\tid_B\tlabel_A\tlabel_B\tprob_0\tprob_1\n')
            count = 0
            count_high = 0    # high confidence (0.9)
            count_fp = 0
            count_fn = 0

            for i in range(val_feats.shape[0]):
                print ("%d/%d" % (i,val_feats.shape[0]))
                if val_labels[i] == 0:
                    continue
                A_input = np.tile(val_feats[i], (val_feats.shape[0]-i,1,1))
                AB_input = np.vstack((A_input, val_feats[i:]))    # concatenate along axis 0
                temp_prob = sess.run(pddm, feed_dict={input_ph: AB_input, dropout_ph:1.0})
                count += temp_prob.shape[0]

                threshold = 0.8
                for j in range(temp_prob.shape[0]):
                    if temp_prob[j, 0] > threshold or temp_prob[j, 1] > threshold:
                        count_high += 1
                        if val_labels[i] == val_labels[i+j] and temp_prob[j, 0]>threshold:
                            count_fn += 1
                            fout_fn.write("{}\t{}\t{}\t{}\t{:.4f}\t{:.4f}\n".format(i,i+j,val_labels[i,0],val_labels[i+j,0],temp_prob[j,0],temp_prob[j,1]))
                        elif val_labels[i] != val_labels[i+j] and temp_prob[j,1] > threshold:
                            count_fp += 1
                            fout_fp.write("{}\t{}\t{}\t{}\t{:.4f}\t{:.4f}\n".format(i,i+j,val_labels[i,0],val_labels[i+j,0],temp_prob[j,0],temp_prob[j,1]))
            fout_fp.close()
            fout_fn.close()

            print ("High confidence (%f) pairs ratio: %.4f" % (threshold, float(count_high)/count))
            print ("Consistent pairs ratio: %.4f" % (float(count_high-count_fp-count_fn)/count_high))
            print ("False positive pairs ratio: %.4f" % (float(count_fp)/count_high))
            print ("False negative pairs ratio: %.4f" % (float(count_fn)/count_high))

Example #7

Show file

def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    train_set = prepare_dataset(cfg.feature_root, train_session, cfg.feat,
                                cfg.label_root)
    train_set = train_set[:cfg.label_num]
    batch_per_epoch = len(train_set) // cfg.sess_per_batch

    val_session = cfg.val_session
    val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat,
                              cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        # load backbone model
        if cfg.network == "tsn":
            model_emb = networks.TSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "rtsn":
            model_emb = networks.RTSN(n_seg=cfg.num_seg,
                                      emb_dim=cfg.emb_dim,
                                      n_input=cfg.n_input)
        elif cfg.network == "convtsn":
            model_emb = networks.ConvTSN(n_seg=cfg.num_seg,
                                         emb_dim=cfg.emb_dim)
        elif cfg.network == "convrtsn":
            model_emb = networks.ConvRTSN(n_seg=cfg.num_seg,
                                          emb_dim=cfg.emb_dim,
                                          n_h=cfg.n_h,
                                          n_w=cfg.n_w,
                                          n_C=cfg.n_C,
                                          n_input=cfg.n_input)
        elif cfg.network == "convbirtsn":
            model_emb = networks.ConvBiRTSN(n_seg=cfg.num_seg,
                                            emb_dim=cfg.emb_dim)
        else:
            raise NotImplementedError
        model_ver = networks.PDDM(n_input=cfg.emb_dim)

        # get the embedding
        if cfg.feat == "sensors" or cfg.feat == "segment":
            input_ph = tf.placeholder(tf.float32,
                                      shape=[None, cfg.num_seg, None])
        elif cfg.feat == "resnet" or cfg.feat == "segment_down":
            input_ph = tf.placeholder(
                tf.float32, shape=[None, cfg.num_seg, None, None, None])
        dropout_ph = tf.placeholder(tf.float32, shape=[])
        model_emb.forward(input_ph, dropout_ph)
        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model_emb.hidden,
                                           axis=-1,
                                           epsilon=1e-10)
        else:
            embedding = model_emb.hidden

        # variable for visualizing the embeddings
        emb_var = tf.Variable([0.0], name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        # calculated for monitoring all-pair embedding distance
        diffs = utils.all_diffs_tf(embedding, embedding)
        all_dist = utils.cdist_tf(diffs)
        tf.summary.histogram('embedding_dists', all_dist)

        # split embedding into anchor, positive and negative and calculate triplet loss
        anchor, positive, negative = tf.unstack(
            tf.reshape(embedding, [-1, 3, cfg.emb_dim]), 3, 1)
        metric_loss = networks.triplet_loss(anchor, positive, negative,
                                            cfg.alpha)

        model_ver.forward(tf.stack((anchor, positive), axis=1))
        pddm_ap = model_ver.prob[:, 0]
        model_ver.forward(tf.stack((anchor, negative), axis=1))
        pddm_an = model_ver.prob[:, 0]
        pddm_loss = tf.reduce_mean(
            tf.maximum(tf.add(tf.subtract(pddm_ap, pddm_an), 0.6), 0.0), 0)

        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = pddm_loss + 0.5 * metric_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, tf.global_variables())

        saver = tf.train.Saver(max_to_keep=10)

        summary_op = tf.summary.merge_all()

        # session iterator for session sampling
        feat_paths_ph = tf.placeholder(tf.string,
                                       shape=[None, cfg.sess_per_batch])
        label_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        train_data = session_generator(feat_paths_ph,
                                       label_paths_ph,
                                       sess_per_batch=cfg.sess_per_batch,
                                       num_threads=2,
                                       shuffled=False,
                                       preprocess_func=model_emb.prepare_input)
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # prepare validation data
        val_feats = []
        val_labels = []
        for session in val_set:
            eve_batch, lab_batch, _ = load_data_and_label(
                session[0], session[1], model_emb.prepare_input_test
            )  # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
        val_feats = np.concatenate(val_feats, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print("Shape of val_feats: ", val_feats.shape)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            for v in val_labels:
                fout.write('%d\n' % int(v))

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            # load pretrain model, if needed
            if cfg.model_path:
                print("Restoring pretrained model: %s" % cfg.model_path)
                saver.restore(sess, cfg.model_path)

            ################## Training loop ##################
            epoch = -1
            while epoch < cfg.max_epochs - 1:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // batch_per_epoch

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # prepare data for this epoch
                random.shuffle(train_set)

                feat_paths = [path[0] for path in train_set]
                label_paths = [path[1] for path in train_set]
                # reshape a list to list of list
                # interesting hacky code from: https://stackoverflow.com/questions/10124751/convert-a-flat-list-to-list-of-list-in-python
                feat_paths = list(zip(*[iter(feat_paths)] *
                                      cfg.sess_per_batch))
                label_paths = list(
                    zip(*[iter(label_paths)] * cfg.sess_per_batch))

                sess.run(train_sess_iterator.initializer,
                         feed_dict={
                             feat_paths_ph: feat_paths,
                             label_paths_ph: label_paths
                         })

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        # Hierarchical sampling (same as fast rcnn)
                        start_time_select = time.time()

                        # First, sample sessions for a batch
                        eve, se, lab = sess.run(next_train)

                        select_time1 = time.time() - start_time_select

                        # Get the similarity of all events
                        sim_prob = np.zeros((eve.shape[0], eve.shape[0]),
                                            dtype='float32') * np.nan
                        comb = list(
                            itertools.combinations(range(eve.shape[0]), 2))
                        for start, end in zip(
                                range(0, len(comb), cfg.batch_size),
                                range(cfg.batch_size,
                                      len(comb) + cfg.batch_size,
                                      cfg.batch_size)):
                            end = min(end, len(comb))
                            comb_idx = []
                            for c in comb[start:end]:
                                comb_idx.extend([c[0], c[1], c[1]])
                            emb = sess.run(pddm_ap,
                                           feed_dict={
                                               input_ph: eve[comb_idx],
                                               dropout_ph: 1.0
                                           })
                            for i in range(emb.shape[0]):
                                sim_prob[comb[start + i][0],
                                         comb[start + i][1]] = emb[i]
                                sim_prob[comb[start + i][1],
                                         comb[start + i][0]] = emb[i]

                        # Second, sample triplets within sampled sessions
                        triplet_selected, active_count = utils.select_triplets_facenet(
                            lab, sim_prob, cfg.triplet_per_batch, cfg.alpha)

                        select_time2 = time.time(
                        ) - start_time_select - select_time1

                        start_time_train = time.time()
                        triplet_input_idx = [
                            idx for triplet in triplet_selected
                            for idx in triplet
                        ]
                        triplet_input = eve[triplet_input_idx]
                        # perform training on the selected triplets
                        err, _, step, summ = sess.run(
                            [total_loss, train_op, global_step, summary_op],
                            feed_dict={
                                input_ph: triplet_input,
                                dropout_ph: cfg.keep_prob,
                                lr_ph: learning_rate
                            })

                        train_time = time.time() - start_time_train
                        print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tTriplet num: %d\tSelect_time1: %.3f\tSelect_time2: %.3f\tTrain_time: %.3f\tLoss %.4f" % \
                                (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], triplet_input.shape[0]//3, select_time1, select_time2, train_time, err))

                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag="train_loss",
                                             simple_value=err),
                            tf.Summary.Value(tag="active_count",
                                             simple_value=active_count),
                            tf.Summary.Value(
                                tag="triplet_num",
                                simple_value=triplet_input.shape[0] // 3)
                        ])
                        summary_writer.add_summary(summary, step)
                        summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_embeddings, _ = sess.run([embedding, set_emb],
                                             feed_dict={
                                                 input_ph: val_feats,
                                                 dropout_ph: 1.0
                                             })
                mAP, mPrec = utils.evaluate_simple(val_embeddings, val_labels)

                val_sim_prob = np.zeros(
                    (val_feats.shape[0], val_feats.shape[0]),
                    dtype='float32') * np.nan
                val_comb = list(
                    itertools.combinations(range(val_feats.shape[0]), 2))
                for start, end in zip(
                        range(0, len(val_comb), cfg.batch_size),
                        range(cfg.batch_size,
                              len(val_comb) + cfg.batch_size, cfg.batch_size)):
                    end = min(end, len(val_comb))
                    comb_idx = []
                    for c in val_comb[start:end]:
                        comb_idx.extend([c[0], c[1], c[1]])
                    emb = sess.run(pddm_ap,
                                   feed_dict={
                                       input_ph: val_feats[comb_idx],
                                       dropout_ph: 1.0
                                   })
                    for i in range(emb.shape[0]):
                        val_sim_prob[val_comb[start + i][0],
                                     val_comb[start + i][1]] = emb[i]
                        val_sim_prob[val_comb[start + i][1],
                                     val_comb[start + i][0]] = emb[i]

                mAP_PDDM = 0.0
                count = 0
                for i in range(val_labels.shape[0]):
                    if val_labels[i] > 0:
                        temp_labels = np.delete(val_labels, i, 0)
                        temp = np.delete(val_sim_prob, i, 1)
                        mAP_PDDM += average_precision_score(
                            np.squeeze(temp_labels == val_labels[i, 0]),
                            np.squeeze(1 - temp[i]))
                        count += 1
                mAP_PDDM /= count

                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Validation mAP", simple_value=mAP),
                    tf.Summary.Value(tag="Validation mAP_PDDM",
                                     simple_value=mAP_PDDM),
                    tf.Summary.Value(tag="Validation [email protected]",
                                     simple_value=mPrec)
                ])
                summary_writer.add_summary(summary, step)
                print("Epoch: [%d]\tmAP: %.4f\tmPrec: %.4f\tmAP_PDDM: %.4f" %
                      (epoch + 1, mAP, mPrec, mAP_PDDM))

                # config for embedding visualization
                config = projector.ProjectorConfig()
                visual_embedding = config.embeddings.add()
                visual_embedding.tensor_name = emb_var.name
                visual_embedding.metadata_path = os.path.join(
                    result_dir, 'metadata_val.tsv')
                projector.visualize_embeddings(summary_writer, config)

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)