Example #1
0
def main():

    cfg = EvalConfig().parse()
    print("Evaluate the model: {}".format(os.path.basename(cfg.model_path)))
    np.random.seed(seed=cfg.seed)

    test_session = cfg.test_session
    test_set = prepare_dataset(cfg.feature_root, test_session, cfg.feat,
                               cfg.label_root)

    # get the embedding
    with tf.variable_scope("modality_core"):
        # load backbone model
        if cfg.network == "convtsn":
            model_emb = networks.ConvTSN(n_seg=cfg.num_seg,
                                         emb_dim=cfg.emb_dim)
        elif cfg.network == "convrtsn":
            model_emb = networks.ConvRTSN(n_seg=cfg.num_seg,
                                          emb_dim=cfg.emb_dim)
        else:
            raise NotImplementedError

        input_ph = tf.placeholder(tf.float32,
                                  shape=[None, cfg.num_seg, None, None, None])
        dropout_ph = tf.placeholder(tf.float32, shape=[])
        model_emb.forward(input_ph, dropout_ph)  # for lstm has variable scope

    with tf.variable_scope("hallucination_sensors"):
        sensors_emb_dim = 32
        # load backbone model
        if cfg.network == "convtsn":
            hal_emb_sensors = networks.ConvTSN(n_seg=cfg.num_seg,
                                               emb_dim=sensors_emb_dim)
        elif cfg.network == "convrtsn":
            hal_emb_sensors = networks.ConvRTSN(n_seg=cfg.num_seg,
                                                emb_dim=sensors_emb_dim)
        else:
            raise NotImplementedError

        hal_emb_sensors.forward(input_ph,
                                dropout_ph)  # for lstm has variable scope

    # Core branch
    if cfg.normalized:
        embedding = tf.nn.l2_normalize(model_emb.hidden,
                                       axis=-1,
                                       epsilon=1e-10)
        embedding_hal_sensors = tf.nn.l2_normalize(hal_emb_sensors.hidden,
                                                   axis=-1,
                                                   epsilon=1e-10)
    else:
        embedding = model_emb.hidden
        embedding_hal_sensors = hal_emb_sensors.hidden

    embedding_fused = tf.concat((embedding, embedding_hal_sensors), axis=1)

    # Testing
    if cfg.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

    gpu_options = tf.GPUOptions(allow_growth=True)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    saver = tf.train.Saver(tf.global_variables())
    with sess.as_default():
        sess.run(tf.global_variables_initializer())

        # load the model (note that model_path already contains snapshot number
        saver.restore(sess, cfg.model_path)

        duration = 0.0
        eve_embeddings = []
        labels = []
        for i, session in enumerate(test_set):
            session_id = os.path.basename(session[1]).split('_')[0]
            print("{0} / {1}: {2}".format(i, len(test_set), session_id))

            eve_batch, lab_batch, _ = load_data_and_label(
                session[0], session[1], model_emb.prepare_input_test
            )  # use prepare_input_test for testing time

            start_time = time.time()
            emb = sess.run(embedding_fused,
                           feed_dict={
                               input_ph: eve_batch,
                               dropout_ph: 1.0
                           })
            duration += time.time() - start_time

            eve_embeddings.append(emb)
            labels.append(lab_batch)

        eve_embeddings = np.concatenate(eve_embeddings, axis=0)
        labels = np.concatenate(labels, axis=0)

    # evaluate the results
    mAP, mAP_event, mPrec, confusion, count, recall = evaluate(
        eve_embeddings, np.squeeze(labels))

    mAP_macro = 0.0
    for key in mAP_event:
        mAP_macro += mAP_event[key]
    mAP_macro /= len(list(mAP_event.keys()))

    print("%d events with dim %d for evaluation, run time: %.3f." %
          (labels.shape[0], eve_embeddings.shape[1], duration))
    print("mAP = {:.4f}".format(mAP))
    print("mAP_macro = {:.4f}".format(mAP_macro))
    print("[email protected] = {:.4f}".format(mPrec))
    print("Recall@1 = {:.4f}, Recall@10 = {:.4f}, Recall@100 = {:.4f}".format(
        recall[0], recall[1], recall[2]))

    keys = confusion['labels']
    for i, key in enumerate(keys):
        if key not in mAP_event:
            continue
        print(
            "Event {0}: {1}, ratio = {2:.4f}, mAP = {3:.4f}, [email protected] = {4:.4f}"
            .format(key, honda_num2labels[key],
                    float(count[i]) / np.sum(count), mAP_event[key],
                    confusion['confusion_matrix'][i, i]))

    # store results
    pkl.dump(
        {
            "mAP": mAP,
            "mAP_macro": mAP_macro,
            "mAP_event": mAP_event,
            "mPrec": mPrec,
            "confusion": confusion,
            "count": count,
            "recall": recall
        },
        open(os.path.join(os.path.dirname(cfg.model_path), "results.pkl"),
             'wb'))
Example #2
0
def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    train_set = prepare_dataset(cfg.feature_root, train_session, cfg.feat,
                                cfg.label_root)
    batch_per_epoch = len(train_set) // cfg.sess_per_batch

    val_session = cfg.val_session
    val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat,
                              cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        # load backbone model
        if cfg.network == "tsn":
            model = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "rtsn":
            model = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)

        # get the embedding
        input_ph = tf.placeholder(tf.float32,
                                  shape=[None, cfg.num_seg, None, None, None])
        label_ph = tf.placeholder(tf.float32, shape=[None])
        dropout_ph = tf.placeholder(tf.float32, shape=[])
        model.forward(input_ph, dropout_ph)
        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model.hidden,
                                           axis=-1,
                                           epsilon=1e-10)
        else:
            embedding = model.hidden

        # variable for visualizing the embeddings
        emb_var = tf.Variable([0.0], name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        # calculated for monitoring all-pair embedding distance
        diffs = utils.all_diffs_tf(embedding, embedding)
        all_dist = utils.cdist_tf(diffs)
        tf.summary.histogram('embedding_dists', all_dist)

        metric_loss, num_active, diff, weights, fp, cn = networks.lifted_loss(
            all_dist, label_ph, cfg.alpha)

        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = metric_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('active_ratio', num_active)
        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, tf.global_variables())

        saver = tf.train.Saver(max_to_keep=10)

        summary_op = tf.summary.merge_all()

        # session iterator for session sampling
        feat_paths_ph = tf.placeholder(tf.string,
                                       shape=[None, cfg.sess_per_batch])
        label_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        train_data = session_generator(feat_paths_ph,
                                       label_paths_ph,
                                       sess_per_batch=cfg.sess_per_batch,
                                       num_threads=2,
                                       shuffled=False,
                                       preprocess_func=model.prepare_input)
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # prepare validation data
        val_feats = []
        val_labels = []
        for session in val_set:
            eve_batch, lab_batch, _ = load_data_and_label(
                session[0], session[1], model.prepare_input_test
            )  # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
        val_feats = np.concatenate(val_feats, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print("Shape of val_feats: ", val_feats.shape)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            for v in val_labels:
                fout.write('%d\n' % int(v))

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            # load pretrain model, if needed
            if cfg.model_path:
                print("Restoring pretrained model: %s" % cfg.model_path)
                saver.restore(sess, cfg.model_path)

            ################## Training loop ##################
            epoch = -1
            while epoch < cfg.max_epochs - 1:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // batch_per_epoch

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # prepare data for this epoch
                random.shuffle(train_set)

                feat_paths = [path[0] for path in train_set]
                label_paths = [path[1] for path in train_set]
                # reshape a list to list of list
                # interesting hacky code from: https://stackoverflow.com/questions/10124751/convert-a-flat-list-to-list-of-list-in-python
                feat_paths = list(zip(*[iter(feat_paths)] *
                                      cfg.sess_per_batch))
                label_paths = list(
                    zip(*[iter(label_paths)] * cfg.sess_per_batch))

                sess.run(train_sess_iterator.initializer,
                         feed_dict={
                             feat_paths_ph: feat_paths,
                             label_paths_ph: label_paths
                         })

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        # First, sample sessions for a batch
                        start_time_select = time.time()
                        eve, se, lab = sess.run(next_train)
                        select_time1 = time.time() - start_time_select

                        # Second, select samples for a batch
                        batch_idx = select_batch(lab, cfg.batch_size)
                        eve = eve[batch_idx]
                        lab = lab[batch_idx]

                        # Third, perform training on a batch
                        start_time_train = time.time()
                        err, _, step, summ, diff_v, weights_v, fp_v, cn_v, dist_v = sess.run(
                            [
                                total_loss, train_op, global_step, summary_op,
                                diff, weights, fp, cn, all_dist
                            ],
                            feed_dict={
                                input_ph: eve,
                                dropout_ph: cfg.keep_prob,
                                label_ph: np.squeeze(lab),
                                lr_ph: learning_rate
                            })

                        train_time = time.time() - start_time_train
                        print ("Epoch: [%d][%d/%d]\tEvent num: %d\tSelect_time: %.3f\tTrain_time: %.3f\tLoss %.4f" % \
                                (epoch+1, batch_count, batch_per_epoch, eve.shape[0], select_time1, train_time, err))

                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag="train_loss",
                                             simple_value=err),
                            tf.Summary.Value(tag="select_time1",
                                             simple_value=select_time1)
                        ])
                        summary_writer.add_summary(summary, step)
                        summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_embeddings, _ = sess.run([embedding, set_emb],
                                             feed_dict={
                                                 input_ph: val_feats,
                                                 dropout_ph: 1.0
                                             })
                mAP, mPrec = utils.evaluate_simple(val_embeddings, val_labels)

                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Valiation mAP", simple_value=mAP),
                    tf.Summary.Value(tag="Validation [email protected]",
                                     simple_value=mPrec)
                ])
                summary_writer.add_summary(summary, step)

                # config for embedding visualization
                config = projector.ProjectorConfig()
                visual_embedding = config.embeddings.add()
                visual_embedding.tensor_name = emb_var.name
                visual_embedding.metadata_path = os.path.join(
                    result_dir, 'metadata_val.tsv')
                projector.visualize_embeddings(summary_writer, config)

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)
def main():

    cfg = EvalConfig().parse()
    np.random.seed(seed=cfg.seed)

    test_session = cfg.test_session
    test_set = prepare_dataset(cfg.feature_root, test_session, cfg.feat, cfg.label_root)

    ####################### Load models here ########################

    input_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, None, None, None])
    dropout_ph = tf.placeholder(tf.float32, shape=[])

    with tf.variable_scope("modality_core"):
        # load backbone model
        if cfg.network == "convtsn":
            model_emb = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "convrtsn":
            model_emb = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        else:
            raise NotImplementedError

        model_emb.forward(input_ph, dropout_ph)    # for lstm has variable scope

        var_list = {}
        for v in tf.global_variables():
            if v.op.name.startswith("modality_core"):
                var_list[v.op.name.replace("modality_core/","")] = v
        restore_saver = tf.train.Saver(var_list)

    with tf.variable_scope("modality_sensors"):
        sensors_emb_dim = 128
        if cfg.network == "convtsn":
            model_emb_sensors = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "convrtsn":
            model_emb_sensors = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        else:
            raise NotImplementedError
        model_output_sensors = networks.OutputLayer(n_input=sensors_emb_dim, n_output=8)

        model_emb_sensors.forward(input_ph, dropout_ph)
        model_output_sensors.forward(tf.nn.relu(model_emb_sensors.hidden), dropout_ph)

        var_list = {}
        for v in tf.global_variables():
            if v.op.name.startswith("modality_sensors"):
                var_list[v.op.name.replace("modality_sensors/","")] = v
        restore_saver_sensors = tf.train.Saver(var_list)

    ############################# Forward Pass #############################

    # get embeddings
    embedding = tf.nn.l2_normalize(model_emb.hidden, axis=-1, epsilon=1e-10)
    if cfg.use_output:
        if cfg.normalized:
            embedding_sensors = tf.nn.l2_normalize(model_output_sensors.logits)
        else:
            embedding_sensors = model_output_sensors.logits
    else:
        embedding_sensors = tf.nn.l2_normalize(model_emb_sensors.hidden, axis=-1, epsilon=1e-10)

    #########################################################################

    # Testing
    if cfg.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

    gpu_options = tf.GPUOptions(allow_growth=True)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    saver = tf.train.Saver()
    with sess.as_default():
        sess.run(tf.global_variables_initializer())

        # load the model (note that model_path already contains snapshot number
        restore_saver.restore(sess, cfg.model_path)
        print ("Restoring the model: {}".format(os.path.basename(cfg.model_path)))
        restore_saver_sensors.restore(sess, cfg.sensors_path)
        print ("Restoring the model: {}".format(os.path.basename(cfg.sensors_path)))

        eve_embeddings = []
        sensors_embeddings = []
        labels = []
        for i, session in enumerate(test_set):
            session_id = os.path.basename(session[1]).split('_')[0]
            print ("{0} / {1}: {2}".format(i, len(test_set), session_id))

            eve_batch, lab_batch, _ = load_data_and_label(session[0], session[1], model_emb.prepare_input_test)    # use prepare_input_test for testing time

            emb, emb_s = sess.run([embedding, embedding_sensors], 
                    feed_dict={input_ph: eve_batch, dropout_ph: 1.0})

            eve_embeddings.append(emb)
            sensors_embeddings.append(emb_s)
            labels.append(lab_batch)

        eve_embeddings = np.concatenate(eve_embeddings, axis=0)
        sensors_embeddings = np.concatenate(sensors_embeddings, axis=0)
        labels = np.concatenate(labels, axis=0)

    # evaluate the results
    fused_embeddings = np.concatenate((eve_embeddings, sensors_embeddings), axis=1)
    mAP, mAP_event, mPrec, confusion, count, recall = evaluate(fused_embeddings, np.squeeze(labels))

    mAP_macro = 0.0
    for key in mAP_event:
        mAP_macro += mAP_event[key]
    mAP_macro /= len(list(mAP_event.keys()))

    print ("%d events with dim %d for evaluation." % (labels.shape[0], fused_embeddings.shape[1]))
    print ("mAP = {}".format(mAP))
    print ("mAP_macro = {}".format(mAP_macro))
    print ("[email protected] = {}".format(mPrec))
    print ("Recall@1 = {}, Recall@10 = {}, Recall@100 = {}".format(recall[0], recall[1], recall[2]))

    keys = confusion['labels']
    for i, key in enumerate(keys):
        if key not in mAP_event:
            continue
        print ("Event {0}: {1}, ratio = {2}, mAP = {3}, [email protected] = {4}".format(
            key,
            honda_num2labels[key],
            float(count[i]) / np.sum(count),
            mAP_event[key],
            confusion['confusion_matrix'][i, i]))

    # store results
    pkl.dump({"mAP": mAP,
              "mAP_macro": mAP_macro,
              "mAP_event": mAP_event,
              "mPrec": mPrec,
              "confusion": confusion,
              "count": count,
              "recall": recall},
              open(os.path.join(os.path.dirname(cfg.model_path), "results.pkl"), 'wb'))
Example #4
0
def main():

    # Load configurations and write to config.txt
    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    train_set = prepare_dataset(cfg.feature_root, train_session, cfg.feat,
                                cfg.label_root)
    train_set = train_set[:cfg.label_num]
    batch_per_epoch = len(train_set) // cfg.sess_per_batch

    val_session = cfg.val_session
    val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat,
                              cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        label_ph = tf.placeholder(tf.int32, shape=[None], name="label")
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        ####################### Define model here ########################

        # Load embedding model
        if cfg.network == "tsn":
            model_emb = networks.TSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "rtsn":
            model_emb = networks.RTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "convtsn":
            model_emb = networks.ConvTSN(n_seg=cfg.num_seg,
                                         emb_dim=cfg.emb_dim)
        elif cfg.network == "convrtsn":
            model_emb = networks.ConvRTSN(n_seg=cfg.num_seg,
                                          emb_dim=cfg.emb_dim,
                                          n_h=cfg.n_h,
                                          n_w=cfg.n_w,
                                          n_C=cfg.n_C,
                                          n_input=cfg.n_input)
        elif cfg.network == "convbirtsn":
            model_emb = networks.ConvBiRTSN(n_seg=cfg.num_seg,
                                            emb_dim=cfg.emb_dim)
        else:
            raise NotImplementedError

        # get the embedding
        if cfg.feat == "sensors" or cfg.feat == "segment":
            input_ph = tf.placeholder(tf.float32,
                                      shape=[None, cfg.num_seg, None])
        elif cfg.feat == "resnet" or cfg.feat == "segment_down":
            input_ph = tf.placeholder(
                tf.float32, shape=[None, cfg.num_seg, None, None, None])
        dropout_ph = tf.placeholder(tf.float32, shape=[])
        model_emb.forward(input_ph, dropout_ph)
        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model_emb.hidden,
                                           axis=-1,
                                           epsilon=1e-10)
        else:
            embedding = model_emb.hidden

        # Use tensorflow implementation for loss functions
        if cfg.loss == 'triplet':
            metric_loss, active_count = loss_tf.triplet_semihard_loss(
                labels=label_ph, embeddings=embedding, margin=cfg.alpha)
        elif cfg.loss == 'lifted':
            metric_loss, active_count = loss_tf.lifted_struct_loss(
                labels=label_ph, embeddings=embedding, margin=cfg.alpha)
        else:
            raise NotImplementedError

        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = metric_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, tf.global_variables())

        ####################### Define data loader ############################

        # session iterator for session sampling
        feat_paths_ph = tf.placeholder(tf.string,
                                       shape=[None, cfg.sess_per_batch])
        label_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        train_data = session_generator(feat_paths_ph,
                                       label_paths_ph,
                                       sess_per_batch=cfg.sess_per_batch,
                                       num_threads=2,
                                       shuffled=False,
                                       preprocess_func=model_emb.prepare_input)
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # Prepare validation data
        val_sess = []
        val_feats = []
        val_labels = []
        val_boundaries = []
        for session in val_set:
            session_id = os.path.basename(session[1]).split('_')[0]
            eve_batch, lab_batch, boundary = load_data_and_label(
                session[0], session[-1], model_emb.prepare_input_test
            )  # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
            val_sess.extend([session_id] * eve_batch.shape[0])
            val_boundaries.extend(boundary)
        val_feats = np.concatenate(val_feats, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print("Shape of val_feats: ", val_feats.shape)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            fout.write('id\tlabel\tsession_id\tstart\tend\n')
            for i in range(len(val_sess)):
                fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format(
                    i, val_labels[i, 0], val_sess[i], val_boundaries[i][0],
                    val_boundaries[i][1]))

        # Variable for visualizing the embeddings
        emb_var = tf.Variable(tf.zeros([val_feats.shape[0], cfg.emb_dim]),
                              name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        # calculated for monitoring all-pair embedding distance
        diffs = utils.all_diffs_tf(embedding, embedding)
        all_dist = utils.cdist_tf(diffs)
        tf.summary.histogram('embedding_dists', all_dist)

        summary_op = tf.summary.merge_all()
        saver = tf.train.Saver(max_to_keep=10)

        #########################################################################

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            # load pretrain model, if needed
            if cfg.model_path:
                print("Restoring pretrained model: %s" % cfg.model_path)
                saver.restore(sess, cfg.model_path)

            ################## Training loop ##################
            epoch = -1
            while epoch < cfg.max_epochs - 1:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // batch_per_epoch

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.01**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # prepare data for this epoch
                random.shuffle(train_set)

                feat_paths = [path[0] for path in train_set]
                label_paths = [path[1] for path in train_set]
                # reshape a list to list of list
                # interesting hacky code from: https://stackoverflow.com/questions/10124751/convert-a-flat-list-to-list-of-list-in-python
                feat_paths = list(zip(*[iter(feat_paths)] *
                                      cfg.sess_per_batch))
                label_paths = list(
                    zip(*[iter(label_paths)] * cfg.sess_per_batch))

                sess.run(train_sess_iterator.initializer,
                         feed_dict={
                             feat_paths_ph: feat_paths,
                             label_paths_ph: label_paths
                         })

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        # Get a batch
                        start_time_select = time.time()

                        eve, se, lab = sess.run(next_train)
                        # for memory concern, cfg.event_per_batch events are used in maximum
                        if eve.shape[0] > cfg.event_per_batch:
                            idx = np.random.permutation(
                                eve.shape[0])[:cfg.event_per_batch]
                            eve = eve[idx]
                            se = se[idx]
                            lab = lab[idx]

                        select_time = time.time() - start_time_select

                        start_time_train = time.time()

                        # perform training on the batch
                        err, _, step, summ = sess.run(
                            [total_loss, train_op, global_step, summary_op],
                            feed_dict={
                                input_ph: eve,
                                label_ph: np.squeeze(lab),
                                dropout_ph: cfg.keep_prob,
                                lr_ph: learning_rate
                            })

                        train_time = time.time() - start_time_train

                        print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tSelect_time: %.3f\tTrain_time: %.3f\tLoss %.4f" % \
                                (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], select_time, train_time, err))

                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag="train_loss",
                                             simple_value=err),
                        ])
                        summary_writer.add_summary(summary, step)
                        summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_embeddings, _ = sess.run([embedding, set_emb],
                                             feed_dict={
                                                 input_ph: val_feats,
                                                 dropout_ph: 1.0
                                             })
                mAP, mPrec, recall = utils.evaluate_simple(
                    val_embeddings, val_labels)
                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Valiation mAP", simple_value=mAP),
                    tf.Summary.Value(tag="Validation Recall@1",
                                     simple_value=recall),
                    tf.Summary.Value(tag="Validation [email protected]",
                                     simple_value=mPrec)
                ])
                summary_writer.add_summary(summary, step)
                print("Epoch: [%d]\tmAP: %.4f\tmPrec: %.4f" %
                      (epoch + 1, mAP, mPrec))

                # config for embedding visualization
                config = projector.ProjectorConfig()
                visual_embedding = config.embeddings.add()
                visual_embedding.tensor_name = emb_var.name
                visual_embedding.metadata_path = os.path.join(
                    result_dir, 'metadata_val.tsv')
                projector.visualize_embeddings(summary_writer, config)

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)
def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    val_session = cfg.val_session
    val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat,
                              cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        with tf.variable_scope("modality_sensors"):
            sensors_emb_dim = 32
            model_emb_sensors = networks.RTSN(n_seg=cfg.num_seg,
                                              emb_dim=sensors_emb_dim,
                                              n_input=cfg.n_input)
            model_pairsim_sensors = networks.PairSim(n_input=sensors_emb_dim)

            input_sensors_ph = tf.placeholder(
                tf.float32, shape=[None, cfg.num_seg, cfg.n_input])
            dropout_ph = tf.placeholder(tf.float32, shape=[])
            model_emb_sensors.forward(input_sensors_ph, dropout_ph)

            var_list = {}
            for v in tf.global_variables():
                if v.op.name.startswith("modality_sensors"):
                    var_list[v.op.name.replace("modality_sensors/", "")] = v
            restore_saver_sensors = tf.train.Saver(var_list)

        # Sensors branch
        emb_sensors = model_emb_sensors.hidden
        A_sensors = emb_sensors[:(tf.shape(emb_sensors)[0] // 2)]
        B_sensors = emb_sensors[(tf.shape(emb_sensors)[0] // 2):]
        AB_pairs_sensors = tf.stack([A_sensors, B_sensors], axis=1)
        model_pairsim_sensors.forward(AB_pairs_sensors, dropout_ph)
        prob_sensors = model_pairsim_sensors.prob

        # prepare validation data
        val_sess = []
        val_feats = []
        val_labels = []
        val_boundaries = []
        for session in val_set:
            session_id = os.path.basename(session[1]).split('_')[0]
            eve_batch, lab_batch, boundary = load_data_and_label(
                session[0], session[-1], model_emb_sensors.prepare_input_test
            )  # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
            val_sess.extend([session_id] * eve_batch.shape[0])
            val_boundaries.extend(boundary)

        val_feats = np.concatenate(val_feats, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print("Shape of val_feats: ", val_feats.shape)

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            print("Restoring pretrained model: %s" % cfg.model_path)
            restore_saver_sensors.restore(sess, cfg.model_path)

            fout = open(
                os.path.join(os.path.dirname(cfg.model_path),
                             'val_inconsistent.txt'), 'w')
            fout.write('id_A\tid_B\tlabel_A\tlabel_B\tprob_0\tprob_1\n')
            for i in range(val_feats.shape[0]):
                print("%d/%d" % (i, val_feats.shape[0]))
                if val_labels[i] == 0:
                    continue
                A_input = np.tile(val_feats[i], (val_feats.shape[0], 1, 1))
                AB_input = np.vstack((A_input, val_feats))

                temp_prob = sess.run(prob_sensors,
                                     feed_dict={
                                         input_sensors_ph: AB_input,
                                         dropout_ph: 1.0
                                     })

                for j in range(temp_prob.shape[0]):
                    if val_labels[i] == val_labels[j] and temp_prob[j,
                                                                    0] > 0.95:
                        fout.write("{}\t{}\t{}\t{}\t{}\t{}\n".format(
                            i, j, val_labels[i, 0], val_labels[j, 0],
                            temp_prob[j, 0], temp_prob[j, 1]))
                    elif val_labels[i] != val_labels[j] and temp_prob[
                            j, 1] > 0.95:
                        fout.write("{}\t{}\t{}\t{}\t{}\t{}\n".format(
                            i, j, val_labels[i, 0], val_labels[j, 0],
                            temp_prob[j, 0], temp_prob[j, 1]))
            fout.close()
def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    val_session = cfg.val_session
    val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat,
                              cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        with tf.variable_scope("test"):
            # load backbone model
            if cfg.network == "tsn":
                model_emb = networks.TSN(n_seg=cfg.num_seg,
                                         emb_dim=cfg.emb_dim)
            elif cfg.network == "rtsn":
                model_emb = networks.RTSN(n_seg=cfg.num_seg,
                                          emb_dim=cfg.emb_dim)
            elif cfg.network == "convtsn":
                model_emb = networks.ConvTSN(n_seg=cfg.num_seg,
                                             emb_dim=cfg.emb_dim)
            elif cfg.network == "convrtsn":
                model_emb = networks.ConvRTSN(n_seg=cfg.num_seg,
                                              emb_dim=cfg.emb_dim)
            else:
                raise NotImplementedError

            # multitask loss (verification)
            #model_ver = networks.PairSim2(n_input=cfg.emb_dim)
            model_ver = networks.PairSim(n_input=cfg.emb_dim)

        var_list = {}
        for v in tf.global_variables():
            if v.op.name.startswith("test"):
                var_list[v.op.name.replace("test/", "")] = v
        restore_saver = tf.train.Saver(var_list)

        # get the embedding
        if cfg.feat == "sensors":
            input_ph = tf.placeholder(tf.float32,
                                      shape=[None, cfg.num_seg, None])
        elif cfg.feat == "resnet":
            input_ph = tf.placeholder(
                tf.float32, shape=[None, cfg.num_seg, None, None, None])
        dropout_ph = tf.placeholder(tf.float32, shape=[])
        model_emb.forward(input_ph, dropout_ph)
        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model_emb.hidden,
                                           axis=-1,
                                           epsilon=1e-10)
        else:
            embedding = model_emb.hidden

        # split embedding into anchor, positive and negative and calculate distance
        anchor, positive, negative = tf.unstack(
            tf.reshape(embedding, [-1, 3, cfg.emb_dim]), 3, 1)
        dist = tf.concat([
            tf.reshape(utils.cdist_tf(anchor - positive), [-1, 1]),
            tf.reshape(utils.cdist_tf(anchor - negative), [-1, 1])
        ],
                         axis=1)

        # verification
        pos_pairs = tf.concat(
            [tf.expand_dims(anchor, axis=1),
             tf.expand_dims(positive, axis=1)],
            axis=1)
        pos_label = tf.ones((tf.shape(pos_pairs)[0], ), tf.int32)
        neg_pairs = tf.concat(
            [tf.expand_dims(anchor, axis=1),
             tf.expand_dims(negative, axis=1)],
            axis=1)
        neg_label = tf.zeros((tf.shape(neg_pairs)[0], ), tf.int32)

        ver_pairs = tf.concat([pos_pairs, neg_pairs], axis=0)
        ver_label = tf.concat([pos_label, neg_label], axis=0)

        model_ver.forward(ver_pairs, dropout_ph)
        prob = tf.reshape(model_ver.prob[:, 1], (-1, 1))
        sim = tf.concat(
            [prob[:tf.shape(pos_pairs)[0]], prob[tf.shape(pos_pairs)[0]:]],
            axis=1)
        pred = tf.argmax(model_ver.logits, -1)

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            print("Restoring pretrained model: %s" % cfg.model_path)
            restore_saver.restore(sess, cfg.model_path)

            fout = open(
                os.path.join(os.path.dirname(cfg.model_path),
                             'val_pairsim.txt'), 'w')
            for i, session in enumerate(val_set):
                session_id = os.path.basename(session[1]).split('_')[0]
                print("{0} / {1}: {2}".format(i, len(val_set), session_id))

                eve_batch, lab_batch, _ = load_data_and_label(
                    session[0], session[1], model_emb.prepare_input_test
                )  # use prepare_input_test for testing time

                emb = sess.run(embedding,
                               feed_dict={
                                   input_ph: eve_batch,
                                   dropout_ph: 1.0
                               })

                triplet_per_batch = 10
                triplet_input_idx, negative_count = select_triplets(
                    lab_batch, emb, triplet_per_batch, 0.2)

                triplet_input = eve_batch[triplet_input_idx]
                dist_batch, sim_batch, pred_batch = sess.run([dist, sim, pred],
                                                             feed_dict={
                                                                 input_ph:
                                                                 triplet_input,
                                                                 dropout_ph:
                                                                 1.0
                                                             })

                pdb.set_trace()
                batch_label = np.hstack(
                    (np.ones((triplet_input.shape[0] // 3, ), dtype='int32'),
                     np.zeros((triplet_input.shape[0] // 3, ), dtype='int32')))
                acc = accuracy_score(batch_label, pred_batch)
                fout.write("{}: acc = {}\n".format(session_id, acc))
                for i in range(dist_batch.shape[0]):
                    fout.write("{}\t{}\t{}\t{}\n".format(
                        dist_batch[i, 0], dist_batch[i, 1], sim_batch[i, 0],
                        sim_batch[i, 1]))
            fout.close()
def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    train_set = prepare_dataset(cfg.feature_root, train_session, cfg.feat,
                                cfg.label_root)
    batch_per_epoch = len(train_set) // cfg.sess_per_batch

    val_session = cfg.val_session[:3]
    val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat,
                              cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)

        # subtract global_step by 1 if needed (for hard negative mining, keep global_step unchanged)
        subtract_global_step_op = tf.assign(global_step, global_step - 1)

        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        # load backbone model
        if cfg.network == "tsn":
            model_emb = networks.TSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "rtsn":
            model_emb = networks.RTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "convtsn":
            model_emb = networks.ConvTSN(n_seg=cfg.num_seg,
                                         emb_dim=cfg.emb_dim)
        elif cfg.network == "convrtsn":
            model_emb = networks.ConvRTSN(n_seg=cfg.num_seg,
                                          emb_dim=cfg.emb_dim)
        else:
            raise NotImplementedError

        model_ver = networks.PairSim(n_input=cfg.emb_dim)

        # get the embedding
        if cfg.feat == "sensors":
            input_ph = tf.placeholder(tf.float32,
                                      shape=[None, cfg.num_seg, None])
        elif cfg.feat == "resnet":
            input_ph = tf.placeholder(
                tf.float32, shape=[None, cfg.num_seg, None, None, None])
        dropout_ph = tf.placeholder(tf.float32, shape=[])
        label_ph = tf.placeholder(tf.int32, shape=[None])
        model_emb.forward(input_ph, dropout_ph)
        embedding = model_emb.hidden

        # split embedding into A and B
        emb_A, emb_B = tf.unstack(tf.reshape(embedding, [-1, 2, cfg.emb_dim]),
                                  2, 1)
        pairs = tf.stack([emb_A, emb_B], axis=1)

        model_ver.forward(pairs, dropout_ph)
        logits = model_ver.logits
        prob = model_ver.prob
        pred = tf.argmax(logits, -1)

        ver_loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label_ph,
                                                           logits=logits))

        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = ver_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, tf.global_variables())

        saver = tf.train.Saver(max_to_keep=10)

        summary_op = tf.summary.merge_all()

        # session iterator for session sampling
        feat_paths_ph = tf.placeholder(tf.string,
                                       shape=[None, cfg.sess_per_batch])
        label_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        train_data = session_generator(feat_paths_ph,
                                       label_paths_ph,
                                       sess_per_batch=cfg.sess_per_batch,
                                       num_threads=2,
                                       shuffled=False,
                                       preprocess_func=model_emb.prepare_input)
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # prepare validation data
        val_sess = []
        val_feats = []
        val_labels = []
        val_boundaries = []
        for session in val_set:
            session_id = os.path.basename(session[1]).split('_')[0]
            eve_batch, lab_batch, boundary = load_data_and_label(
                session[0], session[1], model_emb.prepare_input_test
            )  # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
            val_sess.extend([session_id] * eve_batch.shape[0])
            val_boundaries.extend(boundary)
        val_feats = np.concatenate(val_feats, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            fout.write('id\tlabel\tsession_id\tstart\tend\n')
            for i in range(len(val_sess)):
                fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format(
                    i, val_labels[i, 0], val_sess[i], val_boundaries[i][0],
                    val_boundaries[i][1]))

        val_idx, val_labels = random_pairs(val_labels, 1000000, test=True)
        val_feats = val_feats[val_idx]
        val_labels = np.asarray(val_labels, dtype='int32')
        print("Shape of val_feats: ", val_feats.shape)

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            # load pretrain model, if needed
            if cfg.model_path:
                print("Restoring pretrained model: %s" % cfg.model_path)
                saver.restore(sess, cfg.model_path)

            ################## Training loop ##################
            epoch = -1
            while epoch < cfg.max_epochs - 1:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // batch_per_epoch

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # prepare data for this epoch
                random.shuffle(train_set)

                feat_paths = [path[0] for path in train_set]
                label_paths = [path[1] for path in train_set]
                # reshape a list to list of list
                # interesting hacky code from: https://stackoverflow.com/questions/10124751/convert-a-flat-list-to-list-of-list-in-python
                feat_paths = list(zip(*[iter(feat_paths)] *
                                      cfg.sess_per_batch))
                label_paths = list(
                    zip(*[iter(label_paths)] * cfg.sess_per_batch))

                sess.run(train_sess_iterator.initializer,
                         feed_dict={
                             feat_paths_ph: feat_paths,
                             label_paths_ph: label_paths
                         })

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        # Hierarchical sampling (same as fast rcnn)
                        start_time_select = time.time()

                        # First, sample sessions for a batch
                        eve, se, lab = sess.run(next_train)

                        select_time1 = time.time() - start_time_select

                        # select pairs for training
                        pair_idx, train_labels = random_pairs(
                            lab, cfg.batch_size, cfg.num_negative)

                        train_input = eve[pair_idx]
                        train_labels = np.asarray(train_labels, dtype='int32')
                        select_time2 = time.time(
                        ) - start_time_select - select_time1

                        start_time_train = time.time()
                        # perform training on the selected pairs
                        err, y_pred, y_prob, _, step, summ = sess.run(
                            [
                                total_loss, pred, prob, train_op, global_step,
                                summary_op
                            ],
                            feed_dict={
                                input_ph: train_input,
                                label_ph: train_labels,
                                dropout_ph: cfg.keep_prob,
                                lr_ph: learning_rate
                            })
                        acc = accuracy_score(train_labels, y_pred)

                        negative_count = 0
                        if epoch >= cfg.negative_epochs:
                            hard_idx, hard_labels, negative_count = hard_pairs(
                                train_labels, y_prob, 0.5)
                            if negative_count > 0:
                                hard_input = train_input[hard_idx]
                                hard_labels = np.asarray(hard_labels,
                                                         dtype='int32')

                                step = sess.run(subtract_global_step_op)
                                hard_err, y_pred, _, step = sess.run(
                                    [total_loss, pred, train_op, global_step],
                                    feed_dict={
                                        input_ph: hard_input,
                                        label_ph: hard_labels,
                                        dropout_ph: cfg.keep_prob,
                                        lr_ph: learning_rate
                                    })

                        train_time = time.time() - start_time_train

                        print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tSelect_time1: %.3f\tSelect_time2: %.3f\tTrain_time: %.3f\tLoss: %.4f" % \
                                (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], select_time1, select_time2, train_time, err))

                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag="train_loss",
                                             simple_value=err),
                            tf.Summary.Value(tag="acc", simple_value=acc),
                            tf.Summary.Value(tag="negative_count",
                                             simple_value=negative_count)
                        ])
                        summary_writer.add_summary(summary, step)
                        summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_err, val_pred, val_prob = sess.run(
                    [total_loss, pred, prob],
                    feed_dict={
                        input_ph: val_feats,
                        label_ph: val_labels,
                        dropout_ph: 1.0
                    })
                val_acc = accuracy_score(val_labels, val_pred)

                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Valiation acc",
                                     simple_value=val_acc),
                    tf.Summary.Value(tag="Validation loss",
                                     simple_value=val_err)
                ])
                summary_writer.add_summary(summary, step)

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)

        # print log for analysis
        with open(os.path.join(result_dir, 'val_results.txt'), 'w') as fout:
            fout.write("acc = %.4f\n" % val_acc)
            fout.write("label\tprob_0\tprob_1\tA_idx\tB_idx\n")
            for i in range(val_prob.shape[0]):
                fout.write("%d\t%.4f\t%.4f\t%d\t%d\n" %
                           (val_labels[i], val_prob[i, 0], val_prob[i, 1],
                            val_idx[2 * i], val_idx[2 * i + 1]))
Example #8
0
def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    train_set = prepare_dataset(cfg.feature_root, train_session, cfg.feat,
                                cfg.label_root)
    batch_per_epoch = len(train_set) // cfg.sess_per_batch

    val_session = cfg.val_session
    val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat,
                              cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        # load backbone model
        if cfg.network == "tsn":
            model_emb = networks.TSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "rtsn":
            model_emb = networks.RTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "convtsn":
            model_emb = networks.ConvTSN(n_seg=cfg.num_seg,
                                         emb_dim=cfg.emb_dim)
        elif cfg.network == "convrtsn":
            model_emb = networks.ConvRTSN(n_seg=cfg.num_seg,
                                          emb_dim=cfg.emb_dim)
        else:
            raise NotImplementedError

        # multitask loss (verification)
        model_ver = networks.PairSim2(n_input=cfg.emb_dim)
        #model_ver = networks.PairSim(n_input=cfg.emb_dim)

        # get the embedding
        if cfg.feat == "sensors":
            input_ph = tf.placeholder(tf.float32,
                                      shape=[None, cfg.num_seg, None])
        elif cfg.feat == "resnet":
            input_ph = tf.placeholder(
                tf.float32, shape=[None, cfg.num_seg, None, None, None])
        dropout_ph = tf.placeholder(tf.float32, shape=[])
        model_emb.forward(input_ph, dropout_ph)
        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model_emb.hidden,
                                           axis=-1,
                                           epsilon=1e-10)
        else:
            embedding = model_emb.hidden

        # variable for visualizing the embeddings
        emb_var = tf.Variable([0.0], name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        # calculated for monitoring all-pair embedding distance
        diffs = utils.all_diffs_tf(embedding, embedding)
        all_dist = utils.cdist_tf(diffs)
        tf.summary.histogram('embedding_dists', all_dist)

        # split embedding into anchor, positive and negative and calculate triplet loss
        anchor, positive, negative = tf.unstack(
            tf.reshape(embedding, [-1, 3, cfg.emb_dim]), 3, 1)
        metric_loss = networks.triplet_loss(anchor, positive, negative,
                                            cfg.alpha)

        # verification loss
        pos_pairs = tf.concat(
            [tf.expand_dims(anchor, axis=1),
             tf.expand_dims(positive, axis=1)],
            axis=1)
        pos_label = tf.ones((tf.shape(pos_pairs)[0], ), tf.int32)
        neg_pairs = tf.concat(
            [tf.expand_dims(anchor, axis=1),
             tf.expand_dims(negative, axis=1)],
            axis=1)
        neg_label = tf.zeros((tf.shape(neg_pairs)[0], ), tf.int32)

        ver_pairs = tf.concat([pos_pairs, neg_pairs], axis=0)
        ver_label = tf.concat([pos_label, neg_label], axis=0)

        model_ver.forward(ver_pairs, dropout_ph)
        logits = model_ver.logits
        pred = tf.argmax(logits, -1)

        ver_loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=ver_label,
                                                           logits=logits))

        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = metric_loss + cfg.lambda_ver * ver_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, tf.global_variables())

        saver = tf.train.Saver(max_to_keep=10)

        summary_op = tf.summary.merge_all()

        # session iterator for session sampling
        feat_paths_ph = tf.placeholder(tf.string,
                                       shape=[None, cfg.sess_per_batch])
        label_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        train_data = session_generator(feat_paths_ph,
                                       label_paths_ph,
                                       sess_per_batch=cfg.sess_per_batch,
                                       num_threads=2,
                                       shuffled=False,
                                       preprocess_func=model_emb.prepare_input)
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # prepare validation data
        val_sess = []
        val_feats = []
        val_labels = []
        val_boundaries = []
        for session in val_set:
            session_id = os.path.basename(session[1]).split('_')[0]
            eve_batch, lab_batch, boundary = load_data_and_label(
                session[0], session[1], model_emb.prepare_input_test
            )  # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
            val_sess.extend([session_id] * eve_batch.shape[0])
            val_boundaries.extend(boundary)
        val_feats = np.concatenate(val_feats, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print("Shape of val_feats: ", val_feats.shape)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            fout.write('id\tlabel\tsession_id\tstart\tend\n')
            for i in range(len(val_sess)):
                fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format(
                    i, val_labels[i, 0], val_sess[i], val_boundaries[i][0],
                    val_boundaries[i][1]))

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            # load pretrain model, if needed
            if cfg.model_path:
                print("Restoring pretrained model: %s" % cfg.model_path)
                saver.restore(sess, cfg.model_path)

            ################## Training loop ##################
            epoch = -1
            while epoch < cfg.max_epochs - 1:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // batch_per_epoch

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # prepare data for this epoch
                random.shuffle(train_set)

                feat_paths = [path[0] for path in train_set]
                label_paths = [path[1] for path in train_set]
                # reshape a list to list of list
                # interesting hacky code from: https://stackoverflow.com/questions/10124751/convert-a-flat-list-to-list-of-list-in-python
                feat_paths = list(zip(*[iter(feat_paths)] *
                                      cfg.sess_per_batch))
                label_paths = list(
                    zip(*[iter(label_paths)] * cfg.sess_per_batch))

                sess.run(train_sess_iterator.initializer,
                         feed_dict={
                             feat_paths_ph: feat_paths,
                             label_paths_ph: label_paths
                         })

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        # Hierarchical sampling (same as fast rcnn)
                        start_time_select = time.time()

                        # First, sample sessions for a batch
                        eve, se, lab = sess.run(next_train)

                        select_time1 = time.time() - start_time_select

                        # Get the embeddings of all events
                        eve_embedding = np.zeros((eve.shape[0], cfg.emb_dim),
                                                 dtype='float32')
                        for start, end in zip(
                                range(0, eve.shape[0], cfg.batch_size),
                                range(cfg.batch_size,
                                      eve.shape[0] + cfg.batch_size,
                                      cfg.batch_size)):
                            end = min(end, eve.shape[0])
                            emb = sess.run(embedding,
                                           feed_dict={
                                               input_ph: eve[start:end],
                                               dropout_ph: 1.0
                                           })
                            eve_embedding[start:end] = emb

                        # Second, sample triplets within sampled sessions
                        triplet_input, negative_count = select_triplets_facenet(
                            eve,
                            lab,
                            eve_embedding,
                            cfg.triplet_per_batch,
                            cfg.alpha,
                            metric=cfg.metric)

                        select_time2 = time.time(
                        ) - start_time_select - select_time1

                        if triplet_input is not None:
                            start_time_train = time.time()
                            # perform training on the selected triplets
                            err, metric_err, ver_err, y_pred, _, step, summ = sess.run(
                                [
                                    total_loss, metric_loss, ver_loss, pred,
                                    train_op, global_step, summary_op
                                ],
                                feed_dict={
                                    input_ph: triplet_input,
                                    dropout_ph: cfg.keep_prob,
                                    lr_ph: learning_rate
                                })

                            train_time = time.time() - start_time_train

                            # calculate accuracy
                            batch_label = np.hstack(
                                (np.ones((triplet_input.shape[0] // 3, ),
                                         dtype='int32'),
                                 np.zeros((triplet_input.shape[0] // 3, ),
                                          dtype='int32')))
                            acc = accuracy_score(batch_label, y_pred)
                            print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tTriplet num: %d\tSelect_time1: %.3f\tSelect_time2: %.3f\tTrain_time: %.3f\tLoss %.4f" % \
                                    (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], triplet_input.shape[0], select_time1, select_time2, train_time, err))

                            summary = tf.Summary(value=[
                                tf.Summary.Value(tag="train_loss",
                                                 simple_value=err),
                                tf.Summary.Value(tag="metric_loss",
                                                 simple_value=metric_err),
                                tf.Summary.Value(tag="ver_loss",
                                                 simple_value=ver_err),
                                tf.Summary.Value(tag="acc", simple_value=acc),
                                tf.Summary.Value(tag="negative_count",
                                                 simple_value=negative_count)
                            ])
                            summary_writer.add_summary(summary, step)
                            summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_embeddings, _ = sess.run([embedding, set_emb],
                                             feed_dict={
                                                 input_ph: val_feats,
                                                 dropout_ph: 1.0
                                             })
                mAP, mPrec = utils.evaluate_simple(val_embeddings, val_labels)

                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Valiation mAP", simple_value=mAP),
                    tf.Summary.Value(tag="Validation [email protected]",
                                     simple_value=mPrec)
                ])
                summary_writer.add_summary(summary, step)

                # config for embedding visualization
                config = projector.ProjectorConfig()
                visual_embedding = config.embeddings.add()
                visual_embedding.tensor_name = emb_var.name
                visual_embedding.metadata_path = os.path.join(
                    result_dir, 'metadata_val.tsv')
                projector.visualize_embeddings(summary_writer, config)

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)
def main():

    cfg = EvalConfig().parse()
    print ("Evaluate the model: {}".format(os.path.basename(cfg.model_path)))
    np.random.seed(seed=cfg.seed)

    test_session = cfg.test_session
    test_set = prepare_dataset(cfg.feature_root, test_session, cfg.feat, cfg.label_root, cfg.label_type)

    # load backbone model
    if cfg.network == "tsn":
        model = networks.TSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
    elif cfg.network == "rtsn":
        model = networks.RTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim, n_input=cfg.n_input)
    elif cfg.network == "convtsn":
        model = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
    elif cfg.network == "convrtsn":
        model = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim, n_h=cfg.n_h, n_w=cfg.n_w, n_C=cfg.n_C, n_input=cfg.n_input)
    elif cfg.network == "seq2seqtsn":
        model = networks.Seq2seqTSN(n_seg=cfg.num_seg, n_input=n_input, emb_dim=cfg.emb_dim, reverse=cfg.reverse)
    elif cfg.network == "convbirtsn":
        model = networks.ConvBiRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
    else:
        raise NotImplementedError


    # get the embedding
    if cfg.feat == "sensors" or cfg.feat == "segment":
        input_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, None])
    elif cfg.feat == "resnet" or cfg.feat == "segment_down":
        input_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, None, None, None])
    dropout_ph = tf.placeholder(tf.float32, shape=[])
    model.forward(input_ph, dropout_ph)
    embedding = tf.nn.l2_normalize(model.hidden, axis=1, epsilon=1e-10, name='embedding')

    # Testing
    if cfg.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

    gpu_options = tf.GPUOptions(allow_growth=True)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    # restore variables
    var_list = {}
    for v in tf.global_variables():
        var_list[cfg.variable_name+v.op.name] = v

    saver = tf.train.Saver(var_list)
    with sess.as_default():
        sess.run(tf.global_variables_initializer())

        # load the model (note that model_path already contains snapshot number
        saver.restore(sess, cfg.model_path)

        duration = 0.0
        eve_embeddings = []
        labels = []
        for i, session in enumerate(test_set):
            session_id = os.path.basename(session[1]).split('_')[0]
            print ("{0} / {1}: {2}".format(i, len(test_set), session_id))

#            eve_batch, lab_batch, _ = load_data_and_label(session[0], session[1], mean_pool_input, transfer=cfg.transfer)    # use prepare_input_test for testing time
            eve_batch, lab_batch, _ = load_data_and_label(session[0], session[1], model.prepare_input_test, transfer=cfg.transfer)    # use prepare_input_test for testing time

            start_time = time.time()
            emb = sess.run(embedding, feed_dict={input_ph: eve_batch, dropout_ph: 1.0})
#            emb = eve_batch
            duration += time.time() - start_time

            eve_embeddings.append(emb)
            labels.append(lab_batch)

        eve_embeddings = np.concatenate(eve_embeddings, axis=0)
        labels = np.concatenate(labels, axis=0)

    # evaluate the results
    mAP, mAP_event, mPrec, confusion, count, recall = evaluate(eve_embeddings, np.squeeze(labels))

    mAP_macro = 0.0
    for key in mAP_event:
        mAP_macro += mAP_event[key]
    mAP_macro /= len(list(mAP_event.keys()))

    print ("%d events with dim %d for evaluation, run time: %.3f." % (labels.shape[0], eve_embeddings.shape[1], duration))
    print ("mAP = {:.4f}".format(mAP))
    print ("mAP_macro = {:.4f}".format(mAP_macro))
    print ("[email protected] = {:.4f}".format(mPrec))
    print ("Recall@1 = {:.4f}".format(recall[0]))
    print ("Recall@2 = {:.4f}".format(recall[1]))
    print ("Recall@4 = {:.4f}".format(recall[2]))
    print ("Recall@8 = {:.4f}".format(recall[3]))
    print ("Recall@16 = {:.4f}".format(recall[4]))
    print ("Recall@32 = {:.4f}".format(recall[5]))

    if cfg.label_type == 'goal':
        num2labels = honda_num2labels
    elif cfg.label_type == 'stimuli':
        num2labels = stimuli_num2labels

    keys = confusion['labels']
    for i, key in enumerate(keys):
        if key not in mAP_event:
            continue
        print ("Event {0}: {1}, ratio = {2:.4f}, mAP = {3:.4f}, [email protected] = {4:.4f}".format(
            key,
            num2labels[key],
            float(count[i]) / np.sum(count),
            mAP_event[key],
            confusion['confusion_matrix'][i, i]))

    # store results
    pkl.dump({"mAP": mAP,
              "mAP_macro": mAP_macro,
              "mAP_event": mAP_event,
              "mPrec": mPrec,
              "confusion": confusion,
              "count": count,
              "recall": recall},
              open(os.path.join(os.path.dirname(cfg.model_path), "results.pkl"), 'wb'))
def main():

    cfg = TrainConfig().parse()
    print (cfg.name)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    val_session = cfg.val_session
    val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat, cfg.label_root)


    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)

        # load backbone model
        if cfg.network == "tsn":
            model_emb = networks.TSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "rtsn":
            model_emb = networks.RTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim, n_input=cfg.n_input)
        elif cfg.network == "convtsn":
            model_emb = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "convrtsn":
            model_emb = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim, n_h=cfg.n_h, n_w=cfg.n_w, n_C=cfg.n_C, n_input=cfg.n_input)
        elif cfg.network == "convbirtsn":
            model_emb = networks.ConvBiRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        else:
            raise NotImplementedError
        model_ver = networks.PDDM(n_input=cfg.emb_dim)

        # get the embedding
        if cfg.feat == "sensors" or cfg.feat == "segment":
            input_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, None])
        elif cfg.feat == "resnet" or cfg.feat == "segment_down":
            input_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, None, None, None])
        dropout_ph = tf.placeholder(tf.float32, shape=[])
        model_emb.forward(input_ph, dropout_ph)
        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model_emb.hidden, axis=-1, epsilon=1e-10)
        else:
            embedding = model_emb.hidden

        # split the embedding
        emb_A = embedding[:(tf.shape(embedding)[0]//2)]
        emb_B = embedding[(tf.shape(embedding)[0]//2):]
        model_ver.forward(tf.stack((emb_A, emb_B), axis=1))
        pddm = model_ver.prob

        restore_saver = tf.train.Saver()

        # prepare validation data
        val_sess = []
        val_feats = []
        val_labels = []
        val_boundaries = []
        for session in val_set:
            session_id = os.path.basename(session[1]).split('_')[0]
            eve_batch, lab_batch, boundary = load_data_and_label(session[0], session[-1], model_emb.prepare_input_test)    # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
            val_sess.extend([session_id]*eve_batch.shape[0])
            val_boundaries.extend(boundary)

        val_feats = np.concatenate(val_feats, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print ("Shape of val_feats: ", val_feats.shape)

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            print ("Restoring pretrained model: %s" % cfg.model_path)
            restore_saver.restore(sess, cfg.model_path)


            fout_fp = open(os.path.join(os.path.dirname(cfg.model_path), 'val_fp.txt'), 'w')
            fout_fn = open(os.path.join(os.path.dirname(cfg.model_path), 'val_fn.txt'), 'w')
            fout_fp.write('id_A\tid_B\tlabel_A\tlabel_B\tprob_0\tprob_1\n')
            fout_fn.write('id_A\tid_B\tlabel_A\tlabel_B\tprob_0\tprob_1\n')
            count = 0
            count_high = 0    # high confidence (0.9)
            count_fp = 0
            count_fn = 0

            for i in range(val_feats.shape[0]):
                print ("%d/%d" % (i,val_feats.shape[0]))
                if val_labels[i] == 0:
                    continue
                A_input = np.tile(val_feats[i], (val_feats.shape[0]-i,1,1))
                AB_input = np.vstack((A_input, val_feats[i:]))    # concatenate along axis 0
                temp_prob = sess.run(pddm, feed_dict={input_ph: AB_input, dropout_ph:1.0})
                count += temp_prob.shape[0]

                threshold = 0.8
                for j in range(temp_prob.shape[0]):
                    if temp_prob[j, 0] > threshold or temp_prob[j, 1] > threshold:
                        count_high += 1
                        if val_labels[i] == val_labels[i+j] and temp_prob[j, 0]>threshold:
                            count_fn += 1
                            fout_fn.write("{}\t{}\t{}\t{}\t{:.4f}\t{:.4f}\n".format(i,i+j,val_labels[i,0],val_labels[i+j,0],temp_prob[j,0],temp_prob[j,1]))
                        elif val_labels[i] != val_labels[i+j] and temp_prob[j,1] > threshold:
                            count_fp += 1
                            fout_fp.write("{}\t{}\t{}\t{}\t{:.4f}\t{:.4f}\n".format(i,i+j,val_labels[i,0],val_labels[i+j,0],temp_prob[j,0],temp_prob[j,1]))
            fout_fp.close()
            fout_fn.close()

            print ("High confidence (%f) pairs ratio: %.4f" % (threshold, float(count_high)/count))
            print ("Consistent pairs ratio: %.4f" % (float(count_high-count_fp-count_fn)/count_high))
            print ("False positive pairs ratio: %.4f" % (float(count_fp)/count_high))
            print ("False negative pairs ratio: %.4f" % (float(count_fn)/count_high))
Example #11
0
def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    train_set = prepare_dataset(cfg.feature_root, train_session, cfg.feat,
                                cfg.label_root)
    train_set = train_set[:cfg.label_num]
    batch_per_epoch = len(train_set) // cfg.sess_per_batch

    val_session = cfg.val_session
    val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat,
                              cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        # load backbone model
        if cfg.network == "tsn":
            model_emb = networks.TSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
        elif cfg.network == "rtsn":
            model_emb = networks.RTSN(n_seg=cfg.num_seg,
                                      emb_dim=cfg.emb_dim,
                                      n_input=cfg.n_input)
        elif cfg.network == "convtsn":
            model_emb = networks.ConvTSN(n_seg=cfg.num_seg,
                                         emb_dim=cfg.emb_dim)
        elif cfg.network == "convrtsn":
            model_emb = networks.ConvRTSN(n_seg=cfg.num_seg,
                                          emb_dim=cfg.emb_dim,
                                          n_h=cfg.n_h,
                                          n_w=cfg.n_w,
                                          n_C=cfg.n_C,
                                          n_input=cfg.n_input)
        elif cfg.network == "convbirtsn":
            model_emb = networks.ConvBiRTSN(n_seg=cfg.num_seg,
                                            emb_dim=cfg.emb_dim)
        else:
            raise NotImplementedError
        model_ver = networks.PDDM(n_input=cfg.emb_dim)

        # get the embedding
        if cfg.feat == "sensors" or cfg.feat == "segment":
            input_ph = tf.placeholder(tf.float32,
                                      shape=[None, cfg.num_seg, None])
        elif cfg.feat == "resnet" or cfg.feat == "segment_down":
            input_ph = tf.placeholder(
                tf.float32, shape=[None, cfg.num_seg, None, None, None])
        dropout_ph = tf.placeholder(tf.float32, shape=[])
        model_emb.forward(input_ph, dropout_ph)
        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model_emb.hidden,
                                           axis=-1,
                                           epsilon=1e-10)
        else:
            embedding = model_emb.hidden

        # variable for visualizing the embeddings
        emb_var = tf.Variable([0.0], name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        # calculated for monitoring all-pair embedding distance
        diffs = utils.all_diffs_tf(embedding, embedding)
        all_dist = utils.cdist_tf(diffs)
        tf.summary.histogram('embedding_dists', all_dist)

        # split embedding into anchor, positive and negative and calculate triplet loss
        anchor, positive, negative = tf.unstack(
            tf.reshape(embedding, [-1, 3, cfg.emb_dim]), 3, 1)
        metric_loss = networks.triplet_loss(anchor, positive, negative,
                                            cfg.alpha)

        model_ver.forward(tf.stack((anchor, positive), axis=1))
        pddm_ap = model_ver.prob[:, 0]
        model_ver.forward(tf.stack((anchor, negative), axis=1))
        pddm_an = model_ver.prob[:, 0]
        pddm_loss = tf.reduce_mean(
            tf.maximum(tf.add(tf.subtract(pddm_ap, pddm_an), 0.6), 0.0), 0)

        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = pddm_loss + 0.5 * metric_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, tf.global_variables())

        saver = tf.train.Saver(max_to_keep=10)

        summary_op = tf.summary.merge_all()

        # session iterator for session sampling
        feat_paths_ph = tf.placeholder(tf.string,
                                       shape=[None, cfg.sess_per_batch])
        label_paths_ph = tf.placeholder(tf.string,
                                        shape=[None, cfg.sess_per_batch])
        train_data = session_generator(feat_paths_ph,
                                       label_paths_ph,
                                       sess_per_batch=cfg.sess_per_batch,
                                       num_threads=2,
                                       shuffled=False,
                                       preprocess_func=model_emb.prepare_input)
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # prepare validation data
        val_feats = []
        val_labels = []
        for session in val_set:
            eve_batch, lab_batch, _ = load_data_and_label(
                session[0], session[1], model_emb.prepare_input_test
            )  # use prepare_input_test for testing time
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
        val_feats = np.concatenate(val_feats, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        print("Shape of val_feats: ", val_feats.shape)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            for v in val_labels:
                fout.write('%d\n' % int(v))

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            # load pretrain model, if needed
            if cfg.model_path:
                print("Restoring pretrained model: %s" % cfg.model_path)
                saver.restore(sess, cfg.model_path)

            ################## Training loop ##################
            epoch = -1
            while epoch < cfg.max_epochs - 1:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // batch_per_epoch

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                # prepare data for this epoch
                random.shuffle(train_set)

                feat_paths = [path[0] for path in train_set]
                label_paths = [path[1] for path in train_set]
                # reshape a list to list of list
                # interesting hacky code from: https://stackoverflow.com/questions/10124751/convert-a-flat-list-to-list-of-list-in-python
                feat_paths = list(zip(*[iter(feat_paths)] *
                                      cfg.sess_per_batch))
                label_paths = list(
                    zip(*[iter(label_paths)] * cfg.sess_per_batch))

                sess.run(train_sess_iterator.initializer,
                         feed_dict={
                             feat_paths_ph: feat_paths,
                             label_paths_ph: label_paths
                         })

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        # Hierarchical sampling (same as fast rcnn)
                        start_time_select = time.time()

                        # First, sample sessions for a batch
                        eve, se, lab = sess.run(next_train)

                        select_time1 = time.time() - start_time_select

                        # Get the similarity of all events
                        sim_prob = np.zeros((eve.shape[0], eve.shape[0]),
                                            dtype='float32') * np.nan
                        comb = list(
                            itertools.combinations(range(eve.shape[0]), 2))
                        for start, end in zip(
                                range(0, len(comb), cfg.batch_size),
                                range(cfg.batch_size,
                                      len(comb) + cfg.batch_size,
                                      cfg.batch_size)):
                            end = min(end, len(comb))
                            comb_idx = []
                            for c in comb[start:end]:
                                comb_idx.extend([c[0], c[1], c[1]])
                            emb = sess.run(pddm_ap,
                                           feed_dict={
                                               input_ph: eve[comb_idx],
                                               dropout_ph: 1.0
                                           })
                            for i in range(emb.shape[0]):
                                sim_prob[comb[start + i][0],
                                         comb[start + i][1]] = emb[i]
                                sim_prob[comb[start + i][1],
                                         comb[start + i][0]] = emb[i]

                        # Second, sample triplets within sampled sessions
                        triplet_selected, active_count = utils.select_triplets_facenet(
                            lab, sim_prob, cfg.triplet_per_batch, cfg.alpha)

                        select_time2 = time.time(
                        ) - start_time_select - select_time1

                        start_time_train = time.time()
                        triplet_input_idx = [
                            idx for triplet in triplet_selected
                            for idx in triplet
                        ]
                        triplet_input = eve[triplet_input_idx]
                        # perform training on the selected triplets
                        err, _, step, summ = sess.run(
                            [total_loss, train_op, global_step, summary_op],
                            feed_dict={
                                input_ph: triplet_input,
                                dropout_ph: cfg.keep_prob,
                                lr_ph: learning_rate
                            })

                        train_time = time.time() - start_time_train
                        print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tTriplet num: %d\tSelect_time1: %.3f\tSelect_time2: %.3f\tTrain_time: %.3f\tLoss %.4f" % \
                                (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], triplet_input.shape[0]//3, select_time1, select_time2, train_time, err))

                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag="train_loss",
                                             simple_value=err),
                            tf.Summary.Value(tag="active_count",
                                             simple_value=active_count),
                            tf.Summary.Value(
                                tag="triplet_num",
                                simple_value=triplet_input.shape[0] // 3)
                        ])
                        summary_writer.add_summary(summary, step)
                        summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_embeddings, _ = sess.run([embedding, set_emb],
                                             feed_dict={
                                                 input_ph: val_feats,
                                                 dropout_ph: 1.0
                                             })
                mAP, mPrec = utils.evaluate_simple(val_embeddings, val_labels)

                val_sim_prob = np.zeros(
                    (val_feats.shape[0], val_feats.shape[0]),
                    dtype='float32') * np.nan
                val_comb = list(
                    itertools.combinations(range(val_feats.shape[0]), 2))
                for start, end in zip(
                        range(0, len(val_comb), cfg.batch_size),
                        range(cfg.batch_size,
                              len(val_comb) + cfg.batch_size, cfg.batch_size)):
                    end = min(end, len(val_comb))
                    comb_idx = []
                    for c in val_comb[start:end]:
                        comb_idx.extend([c[0], c[1], c[1]])
                    emb = sess.run(pddm_ap,
                                   feed_dict={
                                       input_ph: val_feats[comb_idx],
                                       dropout_ph: 1.0
                                   })
                    for i in range(emb.shape[0]):
                        val_sim_prob[val_comb[start + i][0],
                                     val_comb[start + i][1]] = emb[i]
                        val_sim_prob[val_comb[start + i][1],
                                     val_comb[start + i][0]] = emb[i]

                mAP_PDDM = 0.0
                count = 0
                for i in range(val_labels.shape[0]):
                    if val_labels[i] > 0:
                        temp_labels = np.delete(val_labels, i, 0)
                        temp = np.delete(val_sim_prob, i, 1)
                        mAP_PDDM += average_precision_score(
                            np.squeeze(temp_labels == val_labels[i, 0]),
                            np.squeeze(1 - temp[i]))
                        count += 1
                mAP_PDDM /= count

                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Validation mAP", simple_value=mAP),
                    tf.Summary.Value(tag="Validation mAP_PDDM",
                                     simple_value=mAP_PDDM),
                    tf.Summary.Value(tag="Validation [email protected]",
                                     simple_value=mPrec)
                ])
                summary_writer.add_summary(summary, step)
                print("Epoch: [%d]\tmAP: %.4f\tmPrec: %.4f\tmAP_PDDM: %.4f" %
                      (epoch + 1, mAP, mPrec, mAP_PDDM))

                # config for embedding visualization
                config = projector.ProjectorConfig()
                visual_embedding = config.embeddings.add()
                visual_embedding.tensor_name = emb_var.name
                visual_embedding.metadata_path = os.path.join(
                    result_dir, 'metadata_val.tsv')
                projector.visualize_embeddings(summary_writer, config)

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)
Example #12
0
def main():

    cfg = EvalConfig().parse()
    print("Evaluate the model: {}".format(os.path.basename(cfg.model_path)))
    np.random.seed(seed=cfg.seed)

    all_session = cfg.train_session
    all_set = prepare_dataset(cfg.feature_root, all_session, cfg.feat,
                              cfg.label_root)

    n_input = cfg.feat_dim[cfg.feat]

    ########################### Extract features ###########################

    # load backbone model
    model = networks.Seq2seqTSN(n_seg=cfg.num_seg,
                                n_input=n_input,
                                emb_dim=cfg.emb_dim,
                                reverse=cfg.reverse)

    # get the embedding
    input_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, n_input])
    dropout_ph = tf.placeholder(tf.float32, shape=[])
    model.forward(input_ph, dropout_ph)
    if cfg.normalized:
        embedding = tf.nn.l2_normalize(model.hidden, axis=-1, epsilon=1e-10)
    else:
        embedding = model.hidden

    # Testing
    if cfg.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

    gpu_options = tf.GPUOptions(allow_growth=True)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    saver = tf.train.Saver()
    with sess.as_default():
        sess.run(tf.global_variables_initializer())

        # load the model (note that model_path already contains snapshot number
        saver.restore(sess, cfg.model_path)

        eve_embeddings = []
        sessions = []
        eids_all = []
        for i, session in enumerate(all_set):
            session_id = os.path.basename(session[1]).split('_')[0]
            print("{0} / {1}: {2}".format(i, len(all_set), session_id))

            eve_batch, _, boundary = load_data_and_label(
                session[0], session[1], model.prepare_input_test)
            for start, end in zip(
                    range(0, eve_batch.shape[0], cfg.batch_size),
                    range(cfg.batch_size, eve_batch.shape[0] + cfg.batch_size,
                          cfg.batch_size)):
                end = min(end, eve_batch.shape[0])
                emb = sess.run(embedding,
                               feed_dict={
                                   input_ph: eve_batch[start:end],
                                   dropout_ph: 1.0
                               })
                eve_embeddings.append(emb)

            # for tracking data sources
            sessions.extend([session_id] * eve_batch.shape[0])
            eids_all.extend(boundary)

        eve_embeddings = np.concatenate(eve_embeddings, axis=0)

    print("Feature extraction done!")

    ########################### Clustering ###########################

    NUM_CLUSTER = 20  # k for k-means
    NUM_HIGH = 100  # number of high-confidence points used

    kmeans = KMeans(n_clusters=NUM_CLUSTER, n_init=20)
    print("Fitting clustering... {} points with dim {}".format(
        eve_embeddings.shape[0], eve_embeddings.shape[1]))
    start_time = time.time()
    kmeans.fit(eve_embeddings)
    duration = time.time() - start_time
    print("Done. %.3f seconds used" % (duration))

    ################### Get high-confidence points ##########################

    cluster_idx = kmeans.predict(eve_embeddings)
    cluster_dist = kmeans.transform(eve_embeddings)

    feat = []
    label = []
    ses = []
    eids = []
    for i in range(NUM_CLUSTER):
        idx = np.where(cluster_idx == i)[0]
        dist = cluster_dist[idx, i]
        sorted_idx = np.argsort(dist)

        idx = idx[sorted_idx[:NUM_HIGH]]
        temp = eve_embeddings[idx]
        feat.append(temp)
        label.append(i * np.ones((temp.shape[0], 1), dtype='int32'))
        for j in idx:
            ses.append(sessions[j])
            eids.append(eids_all[j])
        print("Label {} with {} points".format(i, temp.shape[0]))

    feat = np.concatenate(feat, axis=0)
    label = np.concatenate(label, axis=0)

    #########################################################################

    # save results
    result_dir = os.path.join(
        os.path.dirname(cfg.model_path),
        'kmeans_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)

    pkl.dump(kmeans, open(os.path.join(result_dir, 'kmeans_model.pkl'), 'wb'))
    pkl.dump(
        {
            'feats': feat,
            'labels': label,
            'sessions': ses,
            'boundaries': eids
        }, open(os.path.join(result_dir, 'train_data.pkl'), 'wb'))

    ############################ Feature for validation #################################

    val_session = cfg.val_session
    val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat,
                              cfg.label_root)

    with sess.as_default():
        sess.run(tf.global_variables_initializer())

        # load the model (note that model_path already contains snapshot number
        saver.restore(sess, cfg.model_path)

        eve_embeddings = []
        sessions = []
        eids_all = []
        for i, session in enumerate(val_set):
            session_id = os.path.basename(session[1]).split('_')[0]
            print("{0} / {1}: {2}".format(i, len(all_set), session_id))

            eve_batch, _, boundary = load_data_and_label(
                session[0], session[1], model.prepare_input_test)
            for start, end in zip(
                    range(0, eve_batch.shape[0], cfg.batch_size),
                    range(cfg.batch_size, eve_batch.shape[0] + cfg.batch_size,
                          cfg.batch_size)):
                end = min(end, eve_batch.shape[0])
                emb = sess.run(embedding,
                               feed_dict={
                                   input_ph: eve_batch[start:end],
                                   dropout_ph: 1.0
                               })
                eve_embeddings.append(emb)

            # for tracking data sources
            sessions.extend([session_id] * eve_batch.shape[0])
            eids_all.extend(boundary)

        eve_embeddings = np.concatenate(eve_embeddings, axis=0)

    cluster_idx = kmeans.predict(eve_embeddings)
    cluster_dist = kmeans.transform(eve_embeddings)
    NUM_HIGH = 20

    feat = []
    label = []
    ses = []
    eids = []
    for i in range(NUM_CLUSTER):
        idx = np.where(cluster_idx == i)[0]
        dist = cluster_dist[idx, i]
        sorted_idx = np.argsort(dist)

        idx = idx[sorted_idx[:NUM_HIGH]]
        temp = eve_embeddings[idx]
        feat.append(temp)
        label.append(i * np.ones((temp.shape[0], 1), dtype='int32'))
        for j in idx:
            ses.append(sessions[j])
            eids.append(eids_all[j])
        print("Label {} with {} points".format(i, temp.shape[0]))

    feat = np.concatenate(feat, axis=0)
    label = np.concatenate(label, axis=0)

    pkl.dump(
        {
            'feats': feat,
            'labels': label,
            'sessions': ses,
            'boundaries': eids
        }, open(os.path.join(result_dir, 'val_data.pkl'), 'wb'))
Example #13
0
def main():

    cfg = TrainConfig().parse()
    print(cfg.name)
    result_dir = os.path.join(
        cfg.result_root,
        cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S'))
    if not os.path.isdir(result_dir):
        os.makedirs(result_dir)
    utils.write_configure_to_file(cfg, result_dir)
    np.random.seed(seed=cfg.seed)

    # prepare dataset
    train_session = cfg.train_session
    tfrecords_files = glob.glob(cfg.tfrecords_root + '*.tfrecords')
    tfrecords_files = sorted(tfrecords_files)
    train_set = [
        f for f in tfrecords_files
        if os.path.basename(f).split('_')[0] in train_session
    ]
    print("Number of training events: %d" % len(train_set))

    val_session = cfg.val_session
    val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat,
                              cfg.label_root)

    # construct the graph
    with tf.Graph().as_default():
        tf.set_random_seed(cfg.seed)
        global_step = tf.Variable(0, trainable=False)
        lr_ph = tf.placeholder(tf.float32, name='learning_rate')

        # load backbone model and get the embdding
        if cfg.network == "tsn":
            model = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim)
            input_ph = tf.placeholder(
                tf.float32, shape=[None, cfg.num_seg, None, None, None])
            seqlen_ph = tf.placeholder(tf.int32,
                                       shape=[None])  # fake, for consistency
            model.forward(input_ph)

        elif cfg.network == "lstm":
            model = networks.ConvLSTM(max_time=cfg.MAX_LENGTH_FRAMES,
                                      emb_dim=cfg.emb_dim)
            input_ph = tf.placeholder(
                tf.float32,
                shape=[None, cfg.MAX_LENGTH_FRAMES, None, None, None])
            seqlen_ph = tf.placeholder(tf.int32, shape=[None])
            model.forward(input_ph, seqlen_ph)

        if cfg.normalized:
            embedding = tf.nn.l2_normalize(model.hidden,
                                           axis=-1,
                                           epsilon=1e-10)
        else:
            embedding = model.hidden

        # variable for visualizing the embeddings
        emb_var = tf.Variable([0.0], name='embeddings')
        set_emb = tf.assign(emb_var, embedding, validate_shape=False)

        # calculated for monitoring all-pair embedding distance
        diffs = utils.all_diffs_tf(embedding, embedding)
        all_dist = utils.cdist_tf(diffs)
        tf.summary.histogram('embedding_dists', all_dist)

        # split embedding into anchor, positive and negative and calculate triplet loss
        anchor, positive, negative = tf.unstack(
            tf.reshape(embedding, [-1, 3, cfg.emb_dim]), 3, 1)
        triplet_loss = networks.triplet_loss(anchor, positive, negative,
                                             cfg.alpha)

        regularization_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        total_loss = triplet_loss + regularization_loss * cfg.lambda_l2

        tf.summary.scalar('learning_rate', lr_ph)
        train_op = utils.optimize(total_loss, global_step, cfg.optimizer,
                                  lr_ph, tf.global_variables())

        saver = tf.train.Saver(max_to_keep=10)

        summary_op = tf.summary.merge_all()

        # session iterator for session sampling
        tf_paths_ph = tf.placeholder(tf.string, shape=[None])
        feat_dict = {'resnet': 98304}
        context_dict = {'label': 'int', 'length': 'int'}
        train_data = event_generator(tf_paths_ph,
                                     feat_dict,
                                     context_dict,
                                     event_per_batch=cfg.event_per_batch,
                                     num_threads=4,
                                     shuffled=True,
                                     preprocess_func=model.prepare_input_tf)
        train_sess_iterator = train_data.make_initializable_iterator()
        next_train = train_sess_iterator.get_next()

        # prepare validation data
        val_feats = []
        val_labels = []
        val_lengths = []
        for session in val_set:
            eve_batch, lab_batch, bou_batch = load_data_and_label(
                session[0], session[1], model.prepare_input)
            val_feats.append(eve_batch)
            val_labels.append(lab_batch)
            val_lengths.extend([b[1] - b[0] for b in bou_batch])
        val_feats = np.concatenate(val_feats, axis=0)
        val_labels = np.concatenate(val_labels, axis=0)
        val_lengths = np.asarray(val_lengths, dtype='int32')
        print("Shape of val_feats: ", val_feats.shape)

        # generate metadata.tsv for visualize embedding
        with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout:
            for v in val_labels:
                fout.write('%d\n' % int(v))

        # Start running the graph
        if cfg.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu

        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        summary_writer = tf.summary.FileWriter(result_dir, sess.graph)

        with sess.as_default():

            sess.run(tf.global_variables_initializer())

            # load pretrain model, if needed
            if cfg.pretrained_model:
                print("Restoring pretrained model: %s" % cfg.pretrained_model)
                saver.restore(sess, cfg.pretrained_model)

            ################## Training loop ##################
            epoch = 0
            while epoch < cfg.max_epochs:
                step = sess.run(global_step, feed_dict=None)

                # learning rate schedule, reference: "In defense of Triplet Loss"
                if epoch < cfg.static_epochs:
                    learning_rate = cfg.learning_rate
                else:
                    learning_rate = cfg.learning_rate * \
                            0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs))

                sess.run(train_sess_iterator.initializer,
                         feed_dict={tf_paths_ph: train_set})

                # for each epoch
                batch_count = 1
                while True:
                    try:
                        start_time_select = time.time()

                        context, feature_lists = sess.run(next_train)
                        select_time = time.time() - start_time_select

                        eve = feature_lists[cfg.feat].reshape(
                            (-1, cfg.num_seg) + cfg.feat_dim[cfg.feat])
                        lab = context['label']
                        seq_len = context['length']

                        # Get the embeddings of all events
                        eve_embedding = np.zeros((eve.shape[0], cfg.emb_dim),
                                                 dtype='float32')
                        for start, end in zip(
                                range(0, eve.shape[0], cfg.batch_size),
                                range(cfg.batch_size,
                                      eve.shape[0] + cfg.batch_size,
                                      cfg.batch_size)):
                            end = min(end, eve.shape[0])
                            emb = sess.run(embedding,
                                           feed_dict={
                                               input_ph: eve[start:end],
                                               seqlen_ph: seq_len[start:end]
                                           })
                            eve_embedding[start:end] = emb

                        # Second, sample triplets within sampled sessions
                        # return the triplet input indices
                        if cfg.triplet_select == 'random':
                            triplet_input = select_triplets_random(
                                eve, lab, cfg.triplet_per_batch)
                            negative_count = 0
                        elif cfg.triplet_select == 'facenet':
                            if epoch < cfg.negative_epochs:
                                triplet_input = select_triplets_random(
                                    eve, lab, cfg.triplet_per_batch)
                                negative_count = 0
                            else:
                                triplet_input_idx, negative_count = select_triplets_facenet(
                                    lab,
                                    eve_embedding,
                                    cfg.triplet_per_batch,
                                    cfg.alpha,
                                    metric=cfg.metric)
                        else:
                            raise NotImplementedError

                        select_time2 = time.time(
                        ) - start_time_select - select_time1

                        if triplet_input_idx is not None:

                            triplet_input = eve[triplet_input_idx]
                            triplet_length = seq_len[triplet_input_idx]

                            start_time_train = time.time()
                            # perform training on the selected triplets
                            err, _, step, summ = sess.run(
                                [
                                    total_loss, train_op, global_step,
                                    summary_op
                                ],
                                feed_dict={
                                    input_ph: triplet_input,
                                    seqlen_ph: triplet_length,
                                    lr_ph: learning_rate
                                })

                            train_time = time.time() - start_time_train
                            print ("Epoch: [%d][%d/%d]\tEvent num: %d\tTriplet num: %d\tSelect_time1: %.3f\tSelect_time2: %.3f\tTrain_time: %.3f\tLoss %.4f" % \
                                    (epoch+1, batch_count, batch_per_epoch, eve.shape[0], triplet_input.shape[0], select_time1, select_time2, train_time, err))

                            summary = tf.Summary(value=[
                                tf.Summary.Value(tag="train_loss",
                                                 simple_value=err),
                                tf.Summary.Value(tag="negative_count",
                                                 simple_value=negative_count),
                                tf.Summary.Value(tag="select_time1",
                                                 simple_value=select_time1)
                            ])
                            summary_writer.add_summary(summary, step)
                            summary_writer.add_summary(summ, step)

                        batch_count += 1

                    except tf.errors.OutOfRangeError:
                        print("Epoch %d done!" % (epoch + 1))
                        break

                # validation on val_set
                print("Evaluating on validation set...")
                val_embeddings, _ = sess.run([embedding, set_emb],
                                             feed_dict={
                                                 input_ph: val_feats,
                                                 seqlen_ph: val_lengths
                                             })
                mAP, _ = utils.evaluate(val_embeddings, val_labels)

                summary = tf.Summary(value=[
                    tf.Summary.Value(tag="Valiation mAP", simple_value=mAP)
                ])
                summary_writer.add_summary(summary, step)

                # config for embedding visualization
                config = projector.ProjectorConfig()
                visual_embedding = config.embeddings.add()
                visual_embedding.tensor_name = emb_var.name
                visual_embedding.metadata_path = os.path.join(
                    result_dir, 'metadata_val.tsv')
                projector.visualize_embeddings(summary_writer, config)

                # save model
                saver.save(sess,
                           os.path.join(result_dir, cfg.name + '.ckpt'),
                           global_step=step)