def main(): cfg = TrainConfig().parse() print(cfg.name) result_dir = os.path.join( cfg.result_root, cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')) if not os.path.isdir(result_dir): os.makedirs(result_dir) utils.write_configure_to_file(cfg, result_dir) np.random.seed(seed=cfg.seed) # prepare dataset val_session = cfg.val_session val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat, cfg.label_root) # construct the graph with tf.Graph().as_default(): tf.set_random_seed(cfg.seed) global_step = tf.Variable(0, trainable=False) lr_ph = tf.placeholder(tf.float32, name='learning_rate') with tf.variable_scope("test"): # load backbone model if cfg.network == "tsn": model_emb = networks.TSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "rtsn": model_emb = networks.RTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "convtsn": model_emb = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "convrtsn": model_emb = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) else: raise NotImplementedError # multitask loss (verification) #model_ver = networks.PairSim2(n_input=cfg.emb_dim) model_ver = networks.PairSim(n_input=cfg.emb_dim) var_list = {} for v in tf.global_variables(): if v.op.name.startswith("test"): var_list[v.op.name.replace("test/", "")] = v restore_saver = tf.train.Saver(var_list) # get the embedding if cfg.feat == "sensors": input_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, None]) elif cfg.feat == "resnet": input_ph = tf.placeholder( tf.float32, shape=[None, cfg.num_seg, None, None, None]) dropout_ph = tf.placeholder(tf.float32, shape=[]) model_emb.forward(input_ph, dropout_ph) if cfg.normalized: embedding = tf.nn.l2_normalize(model_emb.hidden, axis=-1, epsilon=1e-10) else: embedding = model_emb.hidden # split embedding into anchor, positive and negative and calculate distance anchor, positive, negative = tf.unstack( tf.reshape(embedding, [-1, 3, cfg.emb_dim]), 3, 1) dist = tf.concat([ tf.reshape(utils.cdist_tf(anchor - positive), [-1, 1]), tf.reshape(utils.cdist_tf(anchor - negative), [-1, 1]) ], axis=1) # verification pos_pairs = tf.concat( [tf.expand_dims(anchor, axis=1), tf.expand_dims(positive, axis=1)], axis=1) pos_label = tf.ones((tf.shape(pos_pairs)[0], ), tf.int32) neg_pairs = tf.concat( [tf.expand_dims(anchor, axis=1), tf.expand_dims(negative, axis=1)], axis=1) neg_label = tf.zeros((tf.shape(neg_pairs)[0], ), tf.int32) ver_pairs = tf.concat([pos_pairs, neg_pairs], axis=0) ver_label = tf.concat([pos_label, neg_label], axis=0) model_ver.forward(ver_pairs, dropout_ph) prob = tf.reshape(model_ver.prob[:, 1], (-1, 1)) sim = tf.concat( [prob[:tf.shape(pos_pairs)[0]], prob[tf.shape(pos_pairs)[0]:]], axis=1) pred = tf.argmax(model_ver.logits, -1) # Start running the graph if cfg.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) with sess.as_default(): sess.run(tf.global_variables_initializer()) print("Restoring pretrained model: %s" % cfg.model_path) restore_saver.restore(sess, cfg.model_path) fout = open( os.path.join(os.path.dirname(cfg.model_path), 'val_pairsim.txt'), 'w') for i, session in enumerate(val_set): session_id = os.path.basename(session[1]).split('_')[0] print("{0} / {1}: {2}".format(i, len(val_set), session_id)) eve_batch, lab_batch, _ = load_data_and_label( session[0], session[1], model_emb.prepare_input_test ) # use prepare_input_test for testing time emb = sess.run(embedding, feed_dict={ input_ph: eve_batch, dropout_ph: 1.0 }) triplet_per_batch = 10 triplet_input_idx, negative_count = select_triplets( lab_batch, emb, triplet_per_batch, 0.2) triplet_input = eve_batch[triplet_input_idx] dist_batch, sim_batch, pred_batch = sess.run([dist, sim, pred], feed_dict={ input_ph: triplet_input, dropout_ph: 1.0 }) pdb.set_trace() batch_label = np.hstack( (np.ones((triplet_input.shape[0] // 3, ), dtype='int32'), np.zeros((triplet_input.shape[0] // 3, ), dtype='int32'))) acc = accuracy_score(batch_label, pred_batch) fout.write("{}: acc = {}\n".format(session_id, acc)) for i in range(dist_batch.shape[0]): fout.write("{}\t{}\t{}\t{}\n".format( dist_batch[i, 0], dist_batch[i, 1], sim_batch[i, 0], sim_batch[i, 1])) fout.close()
def main(): cfg = TrainConfig().parse() print (cfg.name) result_dir = os.path.join(cfg.result_root, cfg.name+'_'+datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')) if not os.path.isdir(result_dir): os.makedirs(result_dir) utils.write_configure_to_file(cfg, result_dir) np.random.seed(seed=cfg.seed) # prepare dataset images_root = '/mnt/work/CUB_200_2011/images/' with open('/mnt/work/CUB_200_2011/images.txt', 'r') as fin: image_files = fin.read().strip().split('\n') with open('/mnt/work/CUB_200_2011/image_class_labels.txt', 'r') as fin: labels = fin.read().strip().split('\n') train_files = [] train_labels = [] val_files = [] val_labels = [] for i in range(len(image_files)): label = int(labels[i].split(' ')[1]) if label <= 100: train_files.append(images_root+image_files[i].split(' ')[1]) train_labels.append(label) else: val_files.append(images_root+image_files[i].split(' ')[1]) val_labels.append(label) class_idx_dict = {} for i, l in enumerate(train_labels): l = int(l) if l not in class_idx_dict: class_idx_dict[l] = [i] else: class_idx_dict[l].append(i) C = len(list(class_idx_dict.keys())) val_images = np.zeros((len(val_files), 256, 256, 3), dtype=np.uint8) for i in range(len(val_files)): img = Image.open(val_files[i]).convert('RGB').resize((256,256)) val_images[i] = np.array(img) # generate metadata.tsv for visualize embedding with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout: for l in val_labels: fout.write('{}\n'.format(int(l))) # construct the graph with tf.Graph().as_default(): tf.set_random_seed(cfg.seed) global_step = tf.Variable(0, trainable=False) lr_ph = tf.placeholder(tf.float32, name='learning_rate') # load backbone model model_emb = networks.CUBLayer(n_input=1024, n_output=cfg.emb_dim) # get the embedding input_ph = tf.placeholder(tf.float32, shape=[None, 256, 256, 3]) label_ph = tf.placeholder(tf.int32, shape=[None]) dropout_ph = tf.placeholder(tf.float32, shape=[]) pool5 = networks.Inception_V2(input_ph) model_emb.forward(pool5, dropout_ph) if cfg.normalized: embedding = tf.nn.l2_normalize(model_emb.logits, axis=-1, epsilon=1e-10) else: embedding = model_emb.logits # variable for visualizing the embeddings emb_var = tf.Variable([0.0], name='embeddings') set_emb = tf.assign(emb_var, embedding, validate_shape=False) # calculated for monitoring all-pair embedding distance diffs = utils.all_diffs_tf(embedding, embedding) all_dist = utils.cdist_tf(diffs) tf.summary.histogram('embedding_dists', all_dist) # use tensorflow implementation... if cfg.loss == 'triplet': metric_loss = metric_loss_ops.triplet_semihard_loss( labels=label_ph, embeddings=embedding, margin=cfg.alpha) elif cfg.loss == 'lifted': metric_loss = metric_loss_ops.lifted_struct_loss( labels=label_ph, embeddings=embedding, margin=cfg.alpha) elif cfg.loss == 'mylifted': metric_loss, num_active, diff, weights, fp, cn = networks.lifted_loss(all_dist, label_ph, cfg.alpha, weighted=False) else: raise NotImplementedError regularization_loss = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) total_loss = metric_loss + regularization_loss * cfg.lambda_l2 tf.summary.scalar('learning_rate', lr_ph) train_op = utils.optimize(total_loss, global_step, cfg.optimizer, lr_ph, tf.global_variables()) saver = tf.train.Saver(max_to_keep=10) summary_op = tf.summary.merge_all() # Start running the graph if cfg.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) summary_writer = tf.summary.FileWriter(result_dir, sess.graph) with sess.as_default(): sess.run(tf.global_variables_initializer()) ################## Training loop ################## for epoch in range(cfg.max_epochs): # learning rate schedule, reference: "In defense of Triplet Loss" if epoch < cfg.static_epochs: learning_rate = cfg.learning_rate else: learning_rate = cfg.learning_rate * \ 0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs)) # sample images class_in_batch = set() idx_batch = np.array([], dtype=np.int32) while len(idx_batch) < cfg.batch_size: sampled_class = np.random.choice(list(class_idx_dict.keys())) if not sampled_class in class_in_batch: class_in_batch.add(sampled_class) subsample_size = np.random.choice(range(5, 11)) subsample = np.random.permutation(class_idx_dict[sampled_class])[:subsample_size] idx_batch = np.append(idx_batch, subsample) idx_batch = idx_batch[:cfg.batch_size] image_batch = np.zeros((len(idx_batch), 256, 256, 3), dtype=np.uint8) lab_batch = np.zeros((len(idx_batch), ), dtype=np.int32) for i, idx in enumerate(idx_batch): # load image with random flipping if np.random.rand() < 0.5: img = Image.open(train_files[idx]).convert('RGB').resize((256,256)).transpose(Image.FLIP_LEFT_RIGHT) else: img = Image.open(train_files[idx]).convert('RGB').resize((256,256)) image_batch[i] = np.array(img) lab_batch[i] = train_labels[idx] pdb.set_trace() # perform training on the selected triplets err, _, step, summ = sess.run([total_loss, train_op, global_step, summary_op], feed_dict = {input_ph: image_batch, label_ph: lab_batch, dropout_ph: cfg.keep_prob, lr_ph: learning_rate}) print ("%s\tEpoch: %d\tImages num: %d\tLoss %.4f" % \ (cfg.name, epoch+1, feat_batch.shape[0], err)) summary = tf.Summary(value=[tf.Summary.Value(tag="train_loss", simple_value=err), tf.Summary.Value(tag="images_num", simple_value=feat_batch.shape[0])]) summary_writer.add_summary(summary, step) summary_writer.add_summary(summ, step) # validation on val_set if (epoch+1) % 1000 == 0: val_embeddings, _ = sess.run([embedding,set_emb], feed_dict={input_ph: val_images, label_ph:val_labels, dropout_ph: 1.0}) mAP, mPrec, recall = utils.evaluate_simple(val_embeddings, val_labels) summary = tf.Summary(value=[tf.Summary.Value(tag="Valiation mAP", simple_value=mAP), tf.Summary.Value(tag="Validation Recall@1", simple_value=recall), tf.Summary.Value(tag="Validation [email protected]", simple_value=mPrec)]) print ("Epoch: [%d]\tmAP: %.4f\trecall: %.4f" % (epoch+1,mAP,recall)) # config for embedding visualization config = projector.ProjectorConfig() visual_embedding = config.embeddings.add() visual_embedding.tensor_name = emb_var.name visual_embedding.metadata_path = os.path.join(result_dir, 'metadata_val.tsv') projector.visualize_embeddings(summary_writer, config) summary_writer.add_summary(summary, step) # save model saver.save(sess, os.path.join(result_dir, cfg.name+'.ckpt'), global_step=step)
def main(): cfg = TrainConfig().parse() print(cfg.name) result_dir = os.path.join( cfg.result_root, cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')) if not os.path.isdir(result_dir): os.makedirs(result_dir) utils.write_configure_to_file(cfg, result_dir) np.random.seed(seed=cfg.seed) # prepare dataset train_session = cfg.train_session train_set = prepare_multimodal_dataset(cfg.feature_root, train_session, cfg.feat, cfg.label_root) batch_per_epoch = len(train_set) // cfg.sess_per_batch val_session = cfg.val_session val_set = prepare_multimodal_dataset(cfg.feature_root, val_session, cfg.feat, cfg.label_root) # construct the graph with tf.Graph().as_default(): tf.set_random_seed(cfg.seed) global_step = tf.Variable(0, trainable=False) lr_ph = tf.placeholder(tf.float32, name='learning_rate') ####################### Load models here ######################## with tf.variable_scope("modality_core"): # load backbone model if cfg.network == "convtsn": model_emb = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "convrtsn": model_emb = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "convbirtsn": model_emb = networks.ConvBiRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) else: raise NotImplementedError input_ph = tf.placeholder( tf.float32, shape=[None, cfg.num_seg, None, None, None]) dropout_ph = tf.placeholder(tf.float32, shape=[]) model_emb.forward(input_ph, dropout_ph) # for lstm has variable scope with tf.variable_scope("modality_sensors"): sensors_emb_dim = 32 model_emb_sensors = networks.RTSN(n_seg=cfg.num_seg, emb_dim=sensors_emb_dim) model_pairsim_sensors = networks.PairSim(n_input=sensors_emb_dim) input_sensors_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, 8]) model_emb_sensors.forward(input_sensors_ph, dropout_ph) var_list = {} for v in tf.global_variables(): if v.op.name.startswith("modality_sensors"): var_list[v.op.name.replace("modality_sensors/", "")] = v restore_saver_sensors = tf.train.Saver(var_list) ############################# Forward Pass ############################# # Core branch if cfg.normalized: embedding = tf.nn.l2_normalize(model_emb.hidden, axis=-1, epsilon=1e-10) else: embedding = model_emb.hidden # variable for visualizing the embeddings emb_var = tf.Variable([0.0], name='embeddings') set_emb = tf.assign(emb_var, embedding, validate_shape=False) # calculated for monitoring all-pair embedding distance diffs = utils.all_diffs_tf(embedding, embedding) all_dist = utils.cdist_tf(diffs) tf.summary.histogram('embedding_dists', all_dist) # split embedding into anchor, positive and negative and calculate triplet loss anchor, positive, negative = tf.unstack( tf.reshape(embedding, [-1, 3, cfg.emb_dim]), 3, 1) # Sensors branch emb_sensors = model_emb_sensors.hidden A_sensors, B_sensors, C_sensors = tf.unstack( tf.reshape(emb_sensors, [-1, 3, sensors_emb_dim]), 3, 1) AB_pairs_sensors = tf.stack([A_sensors, B_sensors], axis=1) AC_pairs_sensors = tf.stack([A_sensors, C_sensors], axis=1) pairs_sensors = tf.concat([AB_pairs_sensors, AC_pairs_sensors], axis=0) model_pairsim_sensors.forward(pairs_sensors, dropout_ph) prob_sensors = model_pairsim_sensors.prob prob_sensors = tf.concat([ prob_sensors[:tf.shape(A_sensors)[0]], prob_sensors[tf.shape(A_sensors)[0]:] ], axis=1) # shape: [N, 4] # fuse prob from all modalities prob = prob_sensors ############################# Calculate loss ############################# # triplet loss for labeled inputs metric_loss1 = networks.triplet_loss(anchor, positive, negative, cfg.alpha) # weighted triplet loss for multimodal inputs mul_num = tf.shape(prob)[0] metric_loss2 = networks.triplet_loss(anchor[:mul_num], positive[:mul_num], negative[:mul_num], cfg.alpha) weighted_metric_loss, weights = networks.weighted_triplet_loss( anchor[-mul_num:], positive[-mul_num:], negative[-mul_num:], prob[:, 1], prob[:, 3], cfg.alpha) unimodal_var_list = [ v for v in tf.global_variables() if v.op.name.startswith("modality_core") ] # whether to apply joint optimization if cfg.no_joint: multimodal_var_list = unimodal_var_list else: multimodal_var_list = tf.global_variables() regularization_loss = tf.reduce_sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) unimodal_loss = metric_loss1 + regularization_loss * cfg.lambda_l2 multimodal_loss = metric_loss2 + cfg.lambda_multimodal * weighted_metric_loss + regularization_loss * cfg.lambda_l2 tf.summary.scalar('learning_rate', lr_ph) unimodal_train_op = utils.optimize(unimodal_loss, global_step, cfg.optimizer, lr_ph, unimodal_var_list) multimodal_train_op = utils.optimize(multimodal_loss, global_step, cfg.optimizer, lr_ph, multimodal_var_list) saver = tf.train.Saver(max_to_keep=10) summary_op = tf.summary.merge_all( ) # not logging histogram of variables because it will cause problem when only unimodal_train_op is called summ_prob = tf.summary.histogram('Prob_histogram', prob) summ_weights = tf.summary.histogram('Weights_histogram', weights) ######################################################################### # session iterator for session sampling feat_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) feat2_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) label_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) train_data = multimodal_session_generator( feat_paths_ph, feat2_paths_ph, label_paths_ph, sess_per_batch=cfg.sess_per_batch, num_threads=2, shuffled=False, preprocess_func=[ model_emb.prepare_input, model_emb_sensors.prepare_input ]) train_sess_iterator = train_data.make_initializable_iterator() next_train = train_sess_iterator.get_next() # prepare validation data val_sess = [] val_feats = [] val_feats2 = [] val_labels = [] val_boundaries = [] for session in val_set: session_id = os.path.basename(session[1]).split('_')[0] eve_batch, lab_batch, boundary = load_data_and_label( session[0], session[-1], model_emb.prepare_input_test ) # use prepare_input_test for testing time val_feats.append(eve_batch) val_labels.append(lab_batch) val_sess.extend([session_id] * eve_batch.shape[0]) val_boundaries.extend(boundary) eve2_batch, _, _ = load_data_and_label(session[1], session[-1], utils.mean_pool_input) val_feats2.append(eve2_batch) val_feats = np.concatenate(val_feats, axis=0) val_feats2 = np.concatenate(val_feats2, axis=0) val_labels = np.concatenate(val_labels, axis=0) print("Shape of val_feats: ", val_feats.shape) # generate metadata.tsv for visualize embedding with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout: fout.write('id\tlabel\tsession_id\tstart\tend\n') for i in range(len(val_sess)): fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format( i, val_labels[i, 0], val_sess[i], val_boundaries[i][0], val_boundaries[i][1])) ######################################################################### # Start running the graph if cfg.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) summary_writer = tf.summary.FileWriter(result_dir, sess.graph) with sess.as_default(): sess.run(tf.global_variables_initializer()) # load pretrain model, if needed if cfg.model_path: print("Restoring pretrained model: %s" % cfg.model_path) saver.restore(sess, cfg.model_path) #print ("Restoring sensors model: %s" % cfg.sensors_path) restore_saver_sensors.restore(sess, cfg.sensors_path) ################## Training loop ################## epoch = -1 while epoch < cfg.max_epochs - 1: step = sess.run(global_step, feed_dict=None) epoch = step // batch_per_epoch # learning rate schedule, reference: "In defense of Triplet Loss" if epoch < cfg.static_epochs: learning_rate = cfg.learning_rate else: learning_rate = cfg.learning_rate * \ 0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs)) # prepare data for this epoch random.shuffle(train_set) paths = list(zip(*[iter(train_set)] * cfg.sess_per_batch)) feat_paths = [[p[0] for p in path] for path in paths] feat2_paths = [[p[1] for p in path] for path in paths] label_paths = [[p[-1] for p in path] for path in paths] sess.run(train_sess_iterator.initializer, feed_dict={ feat_paths_ph: feat_paths, feat2_paths_ph: feat2_paths, label_paths_ph: label_paths }) # for each epoch batch_count = 1 while True: try: ##################### Data loading ######################## start_time = time.time() eve, eve_sensors, lab = sess.run(next_train) load_time = time.time() - start_time ##################### Triplet selection ##################### start_time = time.time() # Get the embeddings of all events eve_embedding = np.zeros((eve.shape[0], cfg.emb_dim), dtype='float32') for start, end in zip( range(0, eve.shape[0], cfg.batch_size), range(cfg.batch_size, eve.shape[0] + cfg.batch_size, cfg.batch_size)): end = min(end, eve.shape[0]) emb = sess.run(embedding, feed_dict={ input_ph: eve[start:end], dropout_ph: 1.0 }) eve_embedding[start:end] = np.copy(emb) # sample triplets within sampled sessions triplet_input_idx, negative_count = utils.select_triplets_facenet( lab, eve_embedding, cfg.triplet_per_batch, cfg.alpha, num_negative=cfg.num_negative) if triplet_input_idx is None: continue multimodal_count = 0 if epoch >= cfg.multimodal_epochs: # Get the similairty prediction of all pos-neg pairs pos_neg_idx = pos_neg_pairs(lab) sim_prob = np.zeros((eve.shape[0], eve.shape[0]), dtype='float32') * np.nan for start, end in zip( range(0, len(pos_neg_idx), 3 * cfg.batch_size), range( 3 * cfg.batch_size, len(pos_neg_idx) + 3 * cfg.batch_size, 3 * cfg.batch_size)): ####### for debugging if pos_neg_idx is None: pdb.set_trace() end = min(end, len(pos_neg_idx)) batch_idx = pos_neg_idx[start:end] batch_prob, histo_prob = sess.run( [prob, summ_prob], feed_dict={ input_sensors_ph: eve_sensors[batch_idx], dropout_ph: 1.0 }) summary_writer.add_summary(histo_prob, step) for i in range(batch_prob.shape[0]): sim_prob[batch_idx[i * 3], batch_idx[i * 3 + 1]] = np.copy( batch_prob[i, 1]) # post-process the similarity prediction matrix [N,N] # average two predictions sim(A,B) and sim(B,A) # not implemented because of nan for backgrounds #sim_prob = 0.5 * (sim_prob + sim_prob.T) # sample triplets from similarity prediction # maximum number not exceed the number of triplet_input from facenet selection if cfg.multimodal_select == "confidence": multimodal_input_idx, multimodal_count = select_triplets_multimodal( sim_prob, threshold=0.9, max_num=len(triplet_input_idx) // 3) elif cfg.multimodal_select == "nopos": multimodal_input_idx, multimodal_count = nopos_triplets_multimodal( sim_prob, max_num=len(triplet_input_idx) // 3) elif cfg.multimodal_select == "random": multimodal_input_idx, multimodal_count = random_triplets_multimodal( sim_prob, max_num=len(triplet_input_idx) // 3) else: raise NotImplementedError print(len(triplet_input_idx), len(multimodal_input_idx), multimodal_count) sensors_input = eve_sensors[multimodal_input_idx] triplet_input_idx.extend(multimodal_input_idx) triplet_input = eve[triplet_input_idx] select_time = time.time() - start_time if len(triplet_input.shape) > 5: # debugging pdb.set_trace() ##################### Start training ######################## # be careful that for multimodal_count = 0 we just optimize unimodal part if epoch < cfg.multimodal_epochs or multimodal_count == 0: err, metric_err, _, step, summ = sess.run( [ unimodal_loss, metric_loss1, unimodal_train_op, global_step, summary_op ], feed_dict={ input_ph: triplet_input, dropout_ph: cfg.keep_prob, lr_ph: learning_rate }) mul_err = 0.0 else: err, w, metric_err, mul_err, _, step, summ, histo_w = sess.run( [ multimodal_loss, weights, metric_loss2, weighted_metric_loss, multimodal_train_op, global_step, summary_op, summ_weights ], feed_dict={ input_ph: triplet_input, input_sensors_ph: sensors_input, dropout_ph: cfg.keep_prob, lr_ph: learning_rate }) # add summary of weights histogram summary_writer.add_summary(histo_w, step) print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tTriplet num: %d\tLoad time: %.3f\tSelect time: %.3f\tLoss %.4f" % \ (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], triplet_input.shape[0]//3, load_time, select_time, err)) summary = tf.Summary(value=[ tf.Summary.Value(tag="train_loss", simple_value=err), tf.Summary.Value(tag="negative_count", simple_value=negative_count), tf.Summary.Value(tag="multimodal_count", simple_value=multimodal_count), tf.Summary.Value(tag="metric_loss", simple_value=metric_err), tf.Summary.Value(tag="weghted_metric_loss", simple_value=mul_err) ]) summary_writer.add_summary(summary, step) summary_writer.add_summary(summ, step) batch_count += 1 except tf.errors.OutOfRangeError: print("Epoch %d done!" % (epoch + 1)) break # validation on val_set print("Evaluating on validation set...") val_embeddings, _ = sess.run([embedding, set_emb], feed_dict={ input_ph: val_feats, dropout_ph: 1.0 }) mAP, mPrec = utils.evaluate_simple(val_embeddings, val_labels) summary = tf.Summary(value=[ tf.Summary.Value(tag="Valiation mAP", simple_value=mAP), tf.Summary.Value(tag="Validation [email protected]", simple_value=mPrec) ]) summary_writer.add_summary(summary, step) print("Epoch: [%d]\tmAP: %.4f\tmPrec: %.4f" % (epoch + 1, mAP, mPrec)) # config for embedding visualization config = projector.ProjectorConfig() visual_embedding = config.embeddings.add() visual_embedding.tensor_name = emb_var.name visual_embedding.metadata_path = os.path.join( result_dir, 'metadata_val.tsv') projector.visualize_embeddings(summary_writer, config) # save model saver.save(sess, os.path.join(result_dir, cfg.name + '.ckpt'), global_step=step)
def main(): cfg = TrainConfig().parse() print(cfg.name) result_dir = os.path.join( cfg.result_root, cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')) if not os.path.isdir(result_dir): os.makedirs(result_dir) write_configure_to_file(cfg, result_dir) np.random.seed(seed=cfg.seed) # prepare dataset train_session = cfg.train_session tfrecords_files = glob.glob(cfg.tfrecords_root + '*.tfrecords') tfrecords_files = sorted(tfrecords_files) train_set = [ f for f in tfrecords_files if os.path.basename(f).split('_')[0] in train_session ] print("Number of training events: %d" % len(train_set)) val_session = cfg.val_session val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat, cfg.label_root) # construct the graph with tf.Graph().as_default(): tf.set_random_seed(cfg.seed) global_step = tf.Variable(0, trainable=False) lr_ph = tf.placeholder(tf.float32, name='learning_rate') # load backbone model if cfg.network == "tsn": model = networks.ConvTSNClassifier(n_seg=cfg.num_seg, output_keep_prob=cfg.keep_prob) # get prediction input_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, None, None, None]) output_ph = tf.placeholder(tf.int32, shape=[None]) model.forward(input_ph) embedding = tf.nn.l2_normalize(model.feat, axis=1, epsilon=1e-10, name='embedding') logits = model.logits pred = tf.argmax(logits, 1) # variable for visualizing the embeddings emb_var = tf.Variable([0.0], name='embeddings') set_emb = tf.assign(emb_var, embedding, validate_shape=False) loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=output_ph, logits=logits)) regularization_loss = tf.reduce_sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) total_loss = loss + regularization_loss * cfg.lambda_l2 tf.summary.scalar('learning_rate', lr_ph) train_op = utils.optimize(total_loss, global_step, cfg.optimizer, lr_ph, tf.global_variables()) saver = tf.train.Saver(max_to_keep=10) summary_op = tf.summary.merge_all() # session iterator for session sampling tf_paths_ph = tf.placeholder(tf.string, shape=[None]) train_data = event_generator(tf_paths_ph, cfg.feat_dict, cfg.context_dict, event_per_batch=cfg.event_per_batch, num_threads=1, shuffled=True, preprocess_func=model.prepare_input_tf) train_sess_iterator = train_data.make_initializable_iterator() next_train = train_sess_iterator.get_next() # prepare validation data val_feats = [] val_labels = [] for session in val_set: eve_batch, lab_batch, _ = load_data_and_label( session[0], session[1], model.prepare_input_test) val_feats.append(eve_batch) val_labels.append(lab_batch) val_feats = np.concatenate(val_feats, axis=0) val_labels = np.concatenate(val_labels, axis=0) print("Shape of val_feats: ", val_feats.shape) # generate metadata.tsv for visualize embedding with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout: for v in val_labels: fout.write('%d\n' % int(v)) # Start running the graph if cfg.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) summary_writer = tf.summary.FileWriter(result_dir, sess.graph) with sess.as_default(): sess.run(tf.global_variables_initializer()) # load pretrain model, if needed if cfg.pretrained_model: print("Restoring pretrained model: %s" % cfg.pretrained_model) saver.restore(sess, cfg.pretrained_model) ################## Training loop ################## epoch = 0 while epoch < cfg.max_epochs: step = sess.run(global_step, feed_dict=None) # learning rate schedule, reference: "In defense of Triplet Loss" if epoch < cfg.static_epochs: learning_rate = cfg.learning_rate else: learning_rate = cfg.learning_rate * \ 0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs)) sess.run(train_sess_iterator.initializer, feed_dict={tf_paths_ph: train_set}) # for each epoch batch_count = 1 while True: try: start_time_select = time.time() context, feature_lists = sess.run(next_train) select_time = time.time() - start_time_select eve = feature_lists[cfg.feat].reshape( (-1, cfg.num_seg) + cfg.feat_dim[cfg.feat]) lab = context['label'] # perform training on the batch start_time_train = time.time() err, y_pred, _, step, summ = sess.run([ total_loss, pred, train_op, global_step, summary_op ], feed_dict={ input_ph: eve, output_ph: lab, lr_ph: learning_rate }) # classification accuracy on batch acc = accuracy_score(lab, y_pred) train_time = time.time() - start_time_train print ("Epoch: [%d: %d]\tSelect_time: %.3f\tTrain_time: %.3f\tLoss: %.4f\tAcc: %.4f" % \ (epoch+1, batch_count, select_time, train_time, err, acc)) summary = tf.Summary(value=[ tf.Summary.Value(tag="train_loss", simple_value=err), tf.Summary.Value(tag="train_acc", simple_value=acc) ]) summary_writer.add_summary(summary, step) summary_writer.add_summary(summ, step) batch_count += 1 except tf.errors.OutOfRangeError: print("Epoch %d done!" % (epoch + 1)) epoch += 1 break # validation on val_set print("Evaluating on validation set...") val_embeddings, val_pred, _ = sess.run( [embedding, pred, set_emb], feed_dict={input_ph: val_feats}) acc = accuracy_score(val_labels, val_pred) mAP, _ = utils.evaluate(val_embeddings, val_labels) summary = tf.Summary(value=[ tf.Summary.Value(tag="Valiation mAP", simple_value=mAP), tf.Summary.Value(tag="Validation ACC", simple_value=acc) ]) summary_writer.add_summary(summary, step) # config for embedding visualization config = projector.ProjectorConfig() visual_embedding = config.embeddings.add() visual_embedding.tensor_name = emb_var.name visual_embedding.metadata_path = os.path.join( result_dir, 'metadata_val.tsv') projector.visualize_embeddings(summary_writer, config) # write summary and save model saver.save(sess, os.path.join(result_dir, cfg.name + '.ckpt'), global_step=step)
def main(): cfg = TrainConfig().parse() print(cfg.name) # use model_path to indicate directory of clustering results result_dir = os.path.join( cfg.model_path, cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')) if not os.path.isdir(result_dir): os.makedirs(result_dir) utils.write_configure_to_file(cfg, result_dir) np.random.seed(seed=cfg.seed) # load data train_data = pickle.load( open(os.path.join(cfg.model_path, 'train_data.pkl'), 'rb')) val_data = pickle.load( open(os.path.join(cfg.model_path, 'val_data.pkl'), 'rb')) val_A_idx, val_B_idx = prepare_val(val_data['labels']) val_input = np.concatenate([ np.expand_dims(val_data['feats'][val_A_idx], axis=1), np.expand_dims(val_data['feats'][val_B_idx], axis=1) ], axis=1) val_label = (val_data['labels'][val_A_idx] == val_data['labels'][val_B_idx] ).astype('int32') print("Shape of validation data: ".format(val_input.shape)) # construct the graph with tf.Graph().as_default(): tf.set_random_seed(cfg.seed) global_step = tf.Variable(0, trainable=False) lr_ph = tf.placeholder(tf.float32, name='learning_rate') # load backbone model input_ph = tf.placeholder(tf.float32, shape=[None, 2, cfg.emb_dim]) dropout_ph = tf.placeholder(tf.float32, shape=[]) model = networks.PairSim(n_input=cfg.emb_dim) model.forward(input_ph, dropout_ph) logits = model.logits prob = model.prob pred = tf.argmax(logits, -1) label_ph = tf.placeholder(tf.int32, shape=[None]) CE_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label_ph, logits=logits)) regularization_loss = tf.reduce_sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) total_loss = CE_loss + regularization_loss * cfg.lambda_l2 tf.summary.scalar('learning_rate', lr_ph) train_op = utils.optimize(total_loss, global_step, cfg.optimizer, lr_ph, tf.global_variables()) saver = tf.train.Saver(max_to_keep=10) summary_op = tf.summary.merge_all() # Start running the graph if cfg.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) summary_writer = tf.summary.FileWriter(result_dir, sess.graph) with sess.as_default(): sess.run(tf.global_variables_initializer()) ################## Training loop ################## epoch = -1 while epoch < cfg.max_epochs - 1: step = sess.run(global_step, feed_dict=None) # learning rate schedule, reference: "In defense of Triplet Loss" if epoch < cfg.static_epochs: learning_rate = cfg.learning_rate else: learning_rate = cfg.learning_rate * \ 0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs)) # defin phase to control the number of negative sample if epoch < cfg.static_epochs: phase = 1 else: phase = 1 + (epoch - cfg.static_epochs) / float( (cfg.max_epochs - cfg.static_epochs) / 5) # loop for batch # use cfg.batch_size to indicate num_pos chosen for a batch batch_count = 0 for A_idx, B_idx in enumerate_batch(train_data['labels'], cfg.batch_size, phase): batch_input = np.concatenate([ np.expand_dims(train_data['feats'][A_idx], axis=1), np.expand_dims(train_data['feats'][B_idx], axis=1) ], axis=1) batch_label = (train_data['labels'][A_idx] == train_data['labels'][B_idx]).astype('int32') start_time_train = time.time() err, y_pred, _, step, summ = sess.run( [total_loss, pred, train_op, global_step, summary_op], feed_dict={ input_ph: batch_input, dropout_ph: cfg.keep_prob, label_ph: np.squeeze(batch_label), lr_ph: learning_rate }) # calculate accuracy acc = accuracy_score(batch_label, y_pred) train_time = time.time() - start_time_train print ("Epoch: [%d][%d/%d]\tTrain_time: %.3f\tLoss %.4f\tAcc: %.4f" % \ (epoch+1, batch_label.sum(), batch_label.shape[0], train_time, err, acc)) batch_count += 1 summary = tf.Summary(value=[ tf.Summary.Value(tag="train_loss", simple_value=err), tf.Summary.Value(tag="train_acc", simple_value=acc), tf.Summary.Value( tag="pos_ratio", simple_value=float(batch_label.sum()) / batch_label.shape[0]) ]) summary_writer.add_summary(summary, step) summary_writer.add_summary(summ, step) print("Epoch %d done!" % (epoch + 1)) epoch += 1 # validation on val_set print("Evaluating on validation set...") val_err, val_pred, val_prob = sess.run( [total_loss, pred, prob], feed_dict={ input_ph: val_input, dropout_ph: 1.0, label_ph: np.squeeze(val_label) }) val_acc = accuracy_score(val_label, val_pred) summary = tf.Summary(value=[ tf.Summary.Value(tag="Validation acc", simple_value=val_acc), tf.Summary.Value(tag="Validation loss", simple_value=val_err) ]) summary_writer.add_summary(summary, step) # save model saver.save(sess, os.path.join(result_dir, cfg.name + '.ckpt'), global_step=step) # print log for analysis with open(os.path.join(result_dir, 'val_results.txt'), 'w') as fout: fout.write("A_idx\tB_idx\tlabel\tprob_0\tprob_1\n") for i in range(val_prob.shape[0]): fout.write("%d\t%d\t%d\t%.4f\t%.4f\n" % (val_A_idx[i], val_B_idx[i], val_label[i], val_prob[i, 0], val_prob[i, 1]))
def main(): cfg = TrainConfig().parse() print(cfg.name) result_dir = os.path.join( cfg.result_root, cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')) if not os.path.isdir(result_dir): os.makedirs(result_dir) utils.write_configure_to_file(cfg, result_dir) np.random.seed(seed=cfg.seed) # prepare dataset feat_train = np.load('/mnt/work/CUB_200_2011/data/feat_train.npy') val_feats = np.load('/mnt/work/CUB_200_2011/data/feat_test.npy') label_train = np.load('/mnt/work/CUB_200_2011/data/label_train.npy') label_train -= 1 # make labels start from 0 val_labels = np.load('/mnt/work/CUB_200_2011/data/label_test.npy') class_idx_dict = {} for i, l in enumerate(label_train): l = int(l) if l not in class_idx_dict: class_idx_dict[l] = [i] else: class_idx_dict[l].append(i) C = len(list(class_idx_dict.keys())) val_triplet_idx = select_triplets_random(val_labels, 1000) # generate metadata.tsv for visualize embedding with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout: for l in val_labels: fout.write('{}\n'.format(int(l))) # construct the graph with tf.Graph().as_default(): tf.set_random_seed(cfg.seed) global_step = tf.Variable(0, trainable=False) lr_ph = tf.placeholder(tf.float32, name='learning_rate') # load backbone model model_emb = networks.CUBLayer(n_input=1024, n_output=cfg.emb_dim) #model_emb = networks.OutputLayer(n_input=1024, n_output=cfg.emb_dim) # get the embedding input_ph = tf.placeholder(tf.float32, shape=[None, 1024]) dropout_ph = tf.placeholder(tf.float32, shape=[]) model_emb.forward(input_ph, dropout_ph) if cfg.normalized: embedding = tf.nn.l2_normalize(model_emb.logits, axis=-1, epsilon=1e-10) else: embedding = model_emb.logits # variable for visualizing the embeddings emb_var = tf.Variable([0.0], name='embeddings') set_emb = tf.assign(emb_var, embedding, validate_shape=False) # calculated for monitoring all-pair embedding distance # diffs = utils.all_diffs_tf(embedding, embedding) # all_dist = utils.cdist_tf(diffs) # tf.summary.histogram('embedding_dists', all_dist) # split embedding into anchor, positive and negative and calculate triplet loss anchor, positive, negative = tf.unstack( tf.reshape(embedding, [-1, 3, cfg.emb_dim]), 3, 1) metric_loss = networks.triplet_loss(anchor, positive, negative, cfg.alpha) regularization_loss = tf.reduce_sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) total_loss = metric_loss + regularization_loss * cfg.lambda_l2 tf.summary.scalar('learning_rate', lr_ph) train_op = utils.optimize(total_loss, global_step, cfg.optimizer, lr_ph, tf.global_variables()) saver = tf.train.Saver(max_to_keep=10) summary_op = tf.summary.merge_all() # Start running the graph if cfg.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) summary_writer = tf.summary.FileWriter(result_dir, sess.graph) with sess.as_default(): sess.run(tf.global_variables_initializer()) ################## Training loop ################## for epoch in range(cfg.max_epochs): # learning rate schedule, reference: "In defense of Triplet Loss" if epoch < cfg.static_epochs: learning_rate = cfg.learning_rate else: learning_rate = cfg.learning_rate * \ 0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs)) # sample images class_in_batch = set() idx_batch = np.array([], dtype=np.int32) while len(idx_batch) < cfg.batch_size: sampled_class = np.random.choice( list(class_idx_dict.keys())) if not sampled_class in class_in_batch: class_in_batch.add(sampled_class) subsample_size = np.random.choice(range(5, 11)) subsample = np.random.permutation( class_idx_dict[sampled_class])[:subsample_size] idx_batch = np.append(idx_batch, subsample) idx_batch = idx_batch[:cfg.batch_size] feat_batch = feat_train[idx_batch] lab_batch = label_train[idx_batch] emb = sess.run(embedding, feed_dict={ input_ph: feat_batch, dropout_ph: 1.0 }) # get distance for all pairs all_diff = utils.all_diffs(emb, emb) triplet_input_idx, active_count = select_triplets_facenet( lab_batch, utils.cdist(all_diff, metric=cfg.metric), cfg.triplet_per_batch, cfg.alpha, num_negative=cfg.num_negative) if triplet_input_idx is not None: triplet_input = feat_batch[triplet_input_idx] # perform training on the selected triplets err, _, step, summ = sess.run( [total_loss, train_op, global_step, summary_op], feed_dict={ input_ph: triplet_input, dropout_ph: cfg.keep_prob, lr_ph: learning_rate }) print ("%s\tEpoch: %d\tImages num: %d\tTriplet num: %d\tLoss %.4f" % \ (cfg.name, epoch+1, feat_batch.shape[0], triplet_input.shape[0]//3, err)) summary = tf.Summary(value=[ tf.Summary.Value(tag="train_loss", simple_value=err), tf.Summary.Value(tag="active_count", simple_value=active_count), tf.Summary.Value(tag="images_num", simple_value=feat_batch.shape[0]), tf.Summary.Value(tag="triplet_num", simple_value=triplet_input.shape[0] // 3) ]) summary_writer.add_summary(summary, step) summary_writer.add_summary(summ, step) # validation on val_set if (epoch + 1) % 100 == 0: print("Evaluating on validation set...") val_err = sess.run(total_loss, feed_dict={ input_ph: val_feats[val_triplet_idx], dropout_ph: 1.0 }) summary = tf.Summary(value=[ tf.Summary.Value(tag="Valiation loss", simple_value=val_err), ]) print("Epoch: [%d]\tloss: %.4f" % (epoch + 1, val_err)) if (epoch + 1) % 1000 == 0: val_embeddings, _ = sess.run([embedding, set_emb], feed_dict={ input_ph: val_feats, dropout_ph: 1.0 }) mAP, mPrec, recall = utils.evaluate_simple( val_embeddings, val_labels) summary = tf.Summary(value=[ tf.Summary.Value(tag="Valiation mAP", simple_value=mAP), tf.Summary.Value(tag="Validation Recall@1", simple_value=recall), tf.Summary.Value(tag="Validation [email protected]", simple_value=mPrec) ]) print("Epoch: [%d]\tmAP: %.4f\trecall: %.4f" % (epoch + 1, mAP, recall)) # config for embedding visualization config = projector.ProjectorConfig() visual_embedding = config.embeddings.add() visual_embedding.tensor_name = emb_var.name visual_embedding.metadata_path = os.path.join( result_dir, 'metadata_val.tsv') projector.visualize_embeddings(summary_writer, config) summary_writer.add_summary(summary, step) # save model saver.save(sess, os.path.join(result_dir, cfg.name + '.ckpt'), global_step=step)
def main(): cfg = TrainConfig().parse() print(cfg.name) result_dir = os.path.join( cfg.result_root, cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')) if not os.path.isdir(result_dir): os.makedirs(result_dir) utils.write_configure_to_file(cfg, result_dir) np.random.seed(seed=cfg.seed) # prepare dataset train_session = cfg.train_session train_set = prepare_dataset(cfg.feature_root, train_session, cfg.feat, cfg.label_root) batch_per_epoch = len(train_set) // cfg.sess_per_batch val_session = cfg.val_session val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat, cfg.label_root) # construct the graph with tf.Graph().as_default(): tf.set_random_seed(cfg.seed) global_step = tf.Variable(0, trainable=False) lr_ph = tf.placeholder(tf.float32, name='learning_rate') # load backbone model if cfg.network == "tsn": model_emb = networks.TSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "rtsn": model_emb = networks.RTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "convtsn": model_emb = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "convrtsn": model_emb = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) else: raise NotImplementedError # multitask loss (verification) model_ver = networks.PairSim2(n_input=cfg.emb_dim) #model_ver = networks.PairSim(n_input=cfg.emb_dim) # get the embedding if cfg.feat == "sensors": input_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, None]) elif cfg.feat == "resnet": input_ph = tf.placeholder( tf.float32, shape=[None, cfg.num_seg, None, None, None]) dropout_ph = tf.placeholder(tf.float32, shape=[]) model_emb.forward(input_ph, dropout_ph) if cfg.normalized: embedding = tf.nn.l2_normalize(model_emb.hidden, axis=-1, epsilon=1e-10) else: embedding = model_emb.hidden # variable for visualizing the embeddings emb_var = tf.Variable([0.0], name='embeddings') set_emb = tf.assign(emb_var, embedding, validate_shape=False) # calculated for monitoring all-pair embedding distance diffs = utils.all_diffs_tf(embedding, embedding) all_dist = utils.cdist_tf(diffs) tf.summary.histogram('embedding_dists', all_dist) # split embedding into anchor, positive and negative and calculate triplet loss anchor, positive, negative = tf.unstack( tf.reshape(embedding, [-1, 3, cfg.emb_dim]), 3, 1) metric_loss = networks.triplet_loss(anchor, positive, negative, cfg.alpha) # verification loss pos_pairs = tf.concat( [tf.expand_dims(anchor, axis=1), tf.expand_dims(positive, axis=1)], axis=1) pos_label = tf.ones((tf.shape(pos_pairs)[0], ), tf.int32) neg_pairs = tf.concat( [tf.expand_dims(anchor, axis=1), tf.expand_dims(negative, axis=1)], axis=1) neg_label = tf.zeros((tf.shape(neg_pairs)[0], ), tf.int32) ver_pairs = tf.concat([pos_pairs, neg_pairs], axis=0) ver_label = tf.concat([pos_label, neg_label], axis=0) model_ver.forward(ver_pairs, dropout_ph) logits = model_ver.logits pred = tf.argmax(logits, -1) ver_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=ver_label, logits=logits)) regularization_loss = tf.reduce_sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) total_loss = metric_loss + cfg.lambda_ver * ver_loss + regularization_loss * cfg.lambda_l2 tf.summary.scalar('learning_rate', lr_ph) train_op = utils.optimize(total_loss, global_step, cfg.optimizer, lr_ph, tf.global_variables()) saver = tf.train.Saver(max_to_keep=10) summary_op = tf.summary.merge_all() # session iterator for session sampling feat_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) label_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) train_data = session_generator(feat_paths_ph, label_paths_ph, sess_per_batch=cfg.sess_per_batch, num_threads=2, shuffled=False, preprocess_func=model_emb.prepare_input) train_sess_iterator = train_data.make_initializable_iterator() next_train = train_sess_iterator.get_next() # prepare validation data val_sess = [] val_feats = [] val_labels = [] val_boundaries = [] for session in val_set: session_id = os.path.basename(session[1]).split('_')[0] eve_batch, lab_batch, boundary = load_data_and_label( session[0], session[1], model_emb.prepare_input_test ) # use prepare_input_test for testing time val_feats.append(eve_batch) val_labels.append(lab_batch) val_sess.extend([session_id] * eve_batch.shape[0]) val_boundaries.extend(boundary) val_feats = np.concatenate(val_feats, axis=0) val_labels = np.concatenate(val_labels, axis=0) print("Shape of val_feats: ", val_feats.shape) # generate metadata.tsv for visualize embedding with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout: fout.write('id\tlabel\tsession_id\tstart\tend\n') for i in range(len(val_sess)): fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format( i, val_labels[i, 0], val_sess[i], val_boundaries[i][0], val_boundaries[i][1])) # Start running the graph if cfg.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) summary_writer = tf.summary.FileWriter(result_dir, sess.graph) with sess.as_default(): sess.run(tf.global_variables_initializer()) # load pretrain model, if needed if cfg.model_path: print("Restoring pretrained model: %s" % cfg.model_path) saver.restore(sess, cfg.model_path) ################## Training loop ################## epoch = -1 while epoch < cfg.max_epochs - 1: step = sess.run(global_step, feed_dict=None) epoch = step // batch_per_epoch # learning rate schedule, reference: "In defense of Triplet Loss" if epoch < cfg.static_epochs: learning_rate = cfg.learning_rate else: learning_rate = cfg.learning_rate * \ 0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs)) # prepare data for this epoch random.shuffle(train_set) feat_paths = [path[0] for path in train_set] label_paths = [path[1] for path in train_set] # reshape a list to list of list # interesting hacky code from: https://stackoverflow.com/questions/10124751/convert-a-flat-list-to-list-of-list-in-python feat_paths = list(zip(*[iter(feat_paths)] * cfg.sess_per_batch)) label_paths = list( zip(*[iter(label_paths)] * cfg.sess_per_batch)) sess.run(train_sess_iterator.initializer, feed_dict={ feat_paths_ph: feat_paths, label_paths_ph: label_paths }) # for each epoch batch_count = 1 while True: try: # Hierarchical sampling (same as fast rcnn) start_time_select = time.time() # First, sample sessions for a batch eve, se, lab = sess.run(next_train) select_time1 = time.time() - start_time_select # Get the embeddings of all events eve_embedding = np.zeros((eve.shape[0], cfg.emb_dim), dtype='float32') for start, end in zip( range(0, eve.shape[0], cfg.batch_size), range(cfg.batch_size, eve.shape[0] + cfg.batch_size, cfg.batch_size)): end = min(end, eve.shape[0]) emb = sess.run(embedding, feed_dict={ input_ph: eve[start:end], dropout_ph: 1.0 }) eve_embedding[start:end] = emb # Second, sample triplets within sampled sessions triplet_input, negative_count = select_triplets_facenet( eve, lab, eve_embedding, cfg.triplet_per_batch, cfg.alpha, metric=cfg.metric) select_time2 = time.time( ) - start_time_select - select_time1 if triplet_input is not None: start_time_train = time.time() # perform training on the selected triplets err, metric_err, ver_err, y_pred, _, step, summ = sess.run( [ total_loss, metric_loss, ver_loss, pred, train_op, global_step, summary_op ], feed_dict={ input_ph: triplet_input, dropout_ph: cfg.keep_prob, lr_ph: learning_rate }) train_time = time.time() - start_time_train # calculate accuracy batch_label = np.hstack( (np.ones((triplet_input.shape[0] // 3, ), dtype='int32'), np.zeros((triplet_input.shape[0] // 3, ), dtype='int32'))) acc = accuracy_score(batch_label, y_pred) print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tTriplet num: %d\tSelect_time1: %.3f\tSelect_time2: %.3f\tTrain_time: %.3f\tLoss %.4f" % \ (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], triplet_input.shape[0], select_time1, select_time2, train_time, err)) summary = tf.Summary(value=[ tf.Summary.Value(tag="train_loss", simple_value=err), tf.Summary.Value(tag="metric_loss", simple_value=metric_err), tf.Summary.Value(tag="ver_loss", simple_value=ver_err), tf.Summary.Value(tag="acc", simple_value=acc), tf.Summary.Value(tag="negative_count", simple_value=negative_count) ]) summary_writer.add_summary(summary, step) summary_writer.add_summary(summ, step) batch_count += 1 except tf.errors.OutOfRangeError: print("Epoch %d done!" % (epoch + 1)) break # validation on val_set print("Evaluating on validation set...") val_embeddings, _ = sess.run([embedding, set_emb], feed_dict={ input_ph: val_feats, dropout_ph: 1.0 }) mAP, mPrec = utils.evaluate_simple(val_embeddings, val_labels) summary = tf.Summary(value=[ tf.Summary.Value(tag="Valiation mAP", simple_value=mAP), tf.Summary.Value(tag="Validation [email protected]", simple_value=mPrec) ]) summary_writer.add_summary(summary, step) # config for embedding visualization config = projector.ProjectorConfig() visual_embedding = config.embeddings.add() visual_embedding.tensor_name = emb_var.name visual_embedding.metadata_path = os.path.join( result_dir, 'metadata_val.tsv') projector.visualize_embeddings(summary_writer, config) # save model saver.save(sess, os.path.join(result_dir, cfg.name + '.ckpt'), global_step=step)
def main(): cfg = TrainConfig().parse() print(cfg.name) result_dir = os.path.join( cfg.result_root, cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')) if not os.path.isdir(result_dir): os.makedirs(result_dir) utils.write_configure_to_file(cfg, result_dir) np.random.seed(seed=cfg.seed) # prepare dataset train_session = cfg.train_session train_set = prepare_multimodal_dataset(cfg.feature_root, train_session, cfg.feat, cfg.label_root) if cfg.task == "supervised": # fully supervised task train_set = train_set[:cfg.label_num] batch_per_epoch = len(train_set) // cfg.sess_per_batch labeled_session = train_session[:cfg.label_num] val_session = cfg.val_session val_set = prepare_multimodal_dataset(cfg.feature_root, val_session, cfg.feat, cfg.label_root) # construct the graph with tf.Graph().as_default(): tf.set_random_seed(cfg.seed) global_step = tf.Variable(0, trainable=False) lr_ph = tf.placeholder(tf.float32, name='learning_rate') ####################### Load models here ######################## sensors_emb_dim = 32 segment_emb_dim = 32 with tf.variable_scope("modality_core"): # load backbone model if cfg.network == "convtsn": model_emb = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "convrtsn": model_emb = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "convbirtsn": model_emb = networks.ConvBiRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) else: raise NotImplementedError input_ph = tf.placeholder( tf.float32, shape=[None, cfg.num_seg, None, None, None]) dropout_ph = tf.placeholder(tf.float32, shape=[]) model_emb.forward(input_ph, dropout_ph) # for lstm has variable scope with tf.variable_scope("sensors"): model_output_sensors = networks.OutputLayer( n_input=cfg.emb_dim, n_output=sensors_emb_dim) with tf.variable_scope("segment"): model_output_segment = networks.OutputLayer( n_input=cfg.emb_dim, n_output=segment_emb_dim) lambda_mul_ph = tf.placeholder(tf.float32, shape=[]) with tf.variable_scope("modality_sensors"): model_emb_sensors = networks.RTSN(n_seg=cfg.num_seg, emb_dim=sensors_emb_dim) input_sensors_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, 8]) model_emb_sensors.forward(input_sensors_ph, dropout_ph) var_list = {} for v in tf.global_variables(): if v.op.name.startswith("modality_sensors"): var_list[v.op.name.replace("modality_sensors/", "")] = v restore_saver_sensors = tf.train.Saver(var_list) with tf.variable_scope("modality_segment"): model_emb_segment = networks.RTSN(n_seg=cfg.num_seg, emb_dim=segment_emb_dim, n_input=357) input_segment_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, 357]) model_emb_segment.forward(input_segment_ph, dropout_ph) var_list = {} for v in tf.global_variables(): if v.op.name.startswith("modality_segment"): var_list[v.op.name.replace("modality_segment/", "")] = v restore_saver_segment = tf.train.Saver(var_list) ############################# Forward Pass ############################# if cfg.normalized: embedding = tf.nn.l2_normalize(model_emb.hidden, axis=-1, epsilon=1e-10) embedding_sensors = tf.nn.l2_normalize(model_emb_sensors.hidden, axis=-1, epsilon=1e-10) embedding_segment = tf.nn.l2_normalize(model_emb_segment.hidden, axis=-1, epsilon=1e-10) else: embedding = model_emb.hidden embedding_sensors = model_emb_sensors.hidden embedding_segment = model_emb_segment.hidden # get the number of unsupervised training unsup_num = tf.shape(input_sensors_ph)[0] # variable for visualizing the embeddings emb_var = tf.Variable(tf.zeros([1116, cfg.emb_dim], dtype=tf.float32), name='embeddings') set_emb = tf.assign(emb_var, embedding, validate_shape=False) # calculated for monitoring all-pair embedding distance diffs = utils.all_diffs_tf(embedding, embedding) all_dist = utils.cdist_tf(diffs) tf.summary.histogram('embedding_dists', all_dist) # split embedding into anchor, positive and negative and calculate triplet loss anchor, positive, negative = tf.unstack( tf.reshape(embedding[:-unsup_num], [-1, 3, cfg.emb_dim]), 3, 1) metric_loss = networks.triplet_loss(anchor, positive, negative, cfg.alpha) model_output_sensors.forward(tf.nn.relu(embedding[-unsup_num:]), dropout_ph) logits_sensors = model_output_sensors.logits model_output_segment.forward(tf.nn.relu(embedding[-unsup_num:]), dropout_ph) logits_segment = model_output_segment.logits # MSE loss MSE_loss_sensors = tf.losses.mean_squared_error( embedding_sensors, logits_sensors) / sensors_emb_dim MSE_loss_segment = tf.losses.mean_squared_error( embedding_sensors, logits_segment) / segment_emb_dim MSE_loss = MSE_loss_sensors + MSE_loss_segment regularization_loss = tf.reduce_sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) total_loss = tf.cond( tf.equal(unsup_num, tf.shape(embedding)[0]), lambda: MSE_loss * lambda_mul_ph + regularization_loss * cfg.lambda_l2, lambda: metric_loss + MSE_loss * lambda_mul_ph + regularization_loss * cfg.lambda_l2) tf.summary.scalar('learning_rate', lr_ph) # only train the core branch train_var_list = [ v for v in tf.global_variables() if v.op.name.startswith("modality_core") ] train_op = utils.optimize(total_loss, global_step, cfg.optimizer, lr_ph, train_var_list) saver = tf.train.Saver(max_to_keep=10) summary_op = tf.summary.merge_all() ######################################################################### # session iterator for session sampling feat_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) feat2_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) feat3_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) label_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) train_data = multimodal_session_generator( feat_paths_ph, feat2_paths_ph, feat3_paths_ph, label_paths_ph, sess_per_batch=cfg.sess_per_batch, num_threads=2, shuffled=False, preprocess_func=[ model_emb.prepare_input, model_emb_sensors.prepare_input, model_emb_segment.prepare_input ]) train_sess_iterator = train_data.make_initializable_iterator() next_train = train_sess_iterator.get_next() # prepare validation data val_sess = [] val_feats = [] val_feats2 = [] val_feats3 = [] val_labels = [] val_boundaries = [] for session in val_set: session_id = os.path.basename(session[1]).split('_')[0] eve_batch, lab_batch, boundary = load_data_and_label( session[0], session[-1], model_emb.prepare_input_test ) # use prepare_input_test for testing time val_feats.append(eve_batch) val_labels.append(lab_batch) val_sess.extend([session_id] * eve_batch.shape[0]) val_boundaries.extend(boundary) eve2_batch, _, _ = load_data_and_label( session[1], session[-1], model_emb_sensors.prepare_input_test) val_feats2.append(eve2_batch) eve3_batch, _, _ = load_data_and_label( session[2], session[-1], model_emb_segment.prepare_input_test) val_feats3.append(eve3_batch) val_feats = np.concatenate(val_feats, axis=0) val_feats2 = np.concatenate(val_feats2, axis=0) val_feats3 = np.concatenate(val_feats3, axis=0) val_labels = np.concatenate(val_labels, axis=0) print("Shape of val_feats: ", val_feats.shape) # generate metadata.tsv for visualize embedding with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout: fout.write('id\tlabel\tsession_id\tstart\tend\n') for i in range(len(val_sess)): fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format( i, val_labels[i, 0], val_sess[i], val_boundaries[i][0], val_boundaries[i][1])) ######################################################################### # Start running the graph if cfg.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) summary_writer = tf.summary.FileWriter(result_dir, sess.graph) with sess.as_default(): sess.run(tf.global_variables_initializer()) print("Restoring sensors model: %s" % cfg.sensors_path) restore_saver_sensors.restore(sess, cfg.sensors_path) print("Restoring segment model: %s" % cfg.segment_path) restore_saver_segment.restore(sess, cfg.segment_path) # load pretrain model, if needed if cfg.model_path: print("Restoring pretrained model: %s" % cfg.model_path) saver.restore(sess, cfg.model_path) ################## Training loop ################## epoch = -1 while epoch < cfg.max_epochs - 1: step = sess.run(global_step, feed_dict=None) epoch = step // batch_per_epoch # learning rate schedule, reference: "In defense of Triplet Loss" if epoch < cfg.static_epochs: learning_rate = cfg.learning_rate else: learning_rate = cfg.learning_rate * \ 0.01**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs)) # prepare data for this epoch random.shuffle(train_set) paths = list(zip(*[iter(train_set)] * cfg.sess_per_batch)) feat_paths = [[p[0] for p in path] for path in paths] feat2_paths = [[p[1] for p in path] for path in paths] feat3_paths = [[p[2] for p in path] for path in paths] label_paths = [[p[-1] for p in path] for path in paths] sess.run(train_sess_iterator.initializer, feed_dict={ feat_paths_ph: feat_paths, feat2_paths_ph: feat2_paths, feat3_paths_ph: feat3_paths, label_paths_ph: label_paths }) # for each epoch batch_count = 1 while True: try: ##################### Data loading ######################## start_time = time.time() eve, eve_sensors, eve_segment, lab, batch_sess = sess.run( next_train) # for memory concern, 1000 events are used in maximum if eve.shape[0] > 1000: idx = np.random.permutation(eve.shape[0])[:1000] eve = eve[idx] eve_sensors = eve_sensors[idx] eve_segment = eve_segment[idx] lab = lab[idx] batch_sess = batch_sess[idx] load_time = time.time() - start_time ##################### Triplet selection ##################### start_time = time.time() # for labeled sessions, use facenet sampling eve_labeled = [] lab_labeled = [] for i in range(eve.shape[0]): # FIXME: use decode again to get session_id str if batch_sess[i, 0].decode() in labeled_session: eve_labeled.append(eve[i]) lab_labeled.append(lab[i]) if len(eve_labeled): # if labeled sessions exist eve_labeled = np.stack(eve_labeled, axis=0) lab_labeled = np.stack(lab_labeled, axis=0) # Get the embeddings of all events eve_embedding = np.zeros( (eve_labeled.shape[0], cfg.emb_dim), dtype='float32') for start, end in zip( range(0, eve_labeled.shape[0], cfg.batch_size), range( cfg.batch_size, eve_labeled.shape[0] + cfg.batch_size, cfg.batch_size)): end = min(end, eve_labeled.shape[0]) emb = sess.run(embedding, feed_dict={ input_ph: eve_labeled[start:end], dropout_ph: 1.0 }) eve_embedding[start:end] = np.copy(emb) # Second, sample triplets within sampled sessions all_diff = utils.all_diffs(eve_embedding, eve_embedding) triplet_input_idx, active_count = utils.select_triplets_facenet( lab_labeled, utils.cdist(all_diff, metric=cfg.metric), cfg.triplet_per_batch, cfg.alpha, num_negative=cfg.num_negative) if len(triplet_input_idx) == 0: triplet_input = eve_labeled[triplet_input_idx] else: active_count = -1 # for all sessions in the batch perm_idx = np.random.permutation(eve.shape[0]) perm_idx = perm_idx[:min(3 * (len(perm_idx) // 3), 3 * cfg.triplet_per_batch)] mul_input = eve[perm_idx] if len(eve_labeled) and triplet_input_idx is not None: triplet_input = np.concatenate( (triplet_input, mul_input), axis=0) else: triplet_input = mul_input sensors_input = eve_sensors[perm_idx] segment_input = eve_segment[perm_idx] ##################### Start training ######################## # supervised initialization if epoch < cfg.multimodal_epochs: if not len(eve_labeled ): # if no labeled sessions exist continue err, mse_err, _, step, summ = sess.run( [ total_loss, MSE_loss, train_op, global_step, summary_op ], feed_dict={ input_ph: triplet_input, input_sensors_ph: sensors_input, dropout_ph: cfg.keep_prob, lambda_mul_ph: 0.0, lr_ph: learning_rate }) else: print(triplet_input.shape) err, mse_err1, mse_err2, _, step, summ = sess.run( [ total_loss, MSE_loss_sensors, MSE_loss_segment, train_op, global_step, summary_op ], feed_dict={ input_ph: triplet_input, input_sensors_ph: sensors_input, input_segment_ph: segment_input, dropout_ph: cfg.keep_prob, lambda_mul_ph: cfg.lambda_multimodal, lr_ph: learning_rate }) train_time = time.time() - start_time print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tLoad time: %.3f\tTrain_time: %.3f\tLoss %.4f" % \ (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], load_time, train_time, err)) summary = tf.Summary(value=[ tf.Summary.Value(tag="train_loss", simple_value=err), tf.Summary.Value(tag="active_count", simple_value=active_count), tf.Summary.Value( tag="triplet_num", simple_value=(triplet_input.shape[0] - sensors_input.shape[0]) // 3), tf.Summary.Value(tag="MSE_loss_sensors", simple_value=mse_err1), tf.Summary.Value(tag="MSE_loss_segment", simple_value=mse_err2) ]) summary_writer.add_summary(summary, step) summary_writer.add_summary(summ, step) batch_count += 1 except tf.errors.OutOfRangeError: print("Epoch %d done!" % (epoch + 1)) break # validation on val_set print("Evaluating on validation set...") val_err1, val_err2, val_embeddings, _ = sess.run( [MSE_loss_sensors, MSE_loss_segment, embedding, set_emb], feed_dict={ input_ph: val_feats, input_sensors_ph: val_feats2, input_segment_ph: val_feats3, dropout_ph: 1.0 }) mAP, mPrec = utils.evaluate_simple(val_embeddings, val_labels) summary = tf.Summary(value=[ tf.Summary.Value(tag="Valiation mAP", simple_value=mAP), tf.Summary.Value(tag="Validation [email protected]", simple_value=mPrec), tf.Summary.Value(tag="Validation mse loss sensors", simple_value=val_err1), tf.Summary.Value(tag="Validation mse loss segment", simple_value=val_err2) ]) summary_writer.add_summary(summary, step) print("Epoch: [%d]\tmAP: %.4f\tmPrec: %.4f" % (epoch + 1, mAP, mPrec)) # config for embedding visualization config = projector.ProjectorConfig() visual_embedding = config.embeddings.add() visual_embedding.tensor_name = emb_var.name visual_embedding.metadata_path = os.path.join( result_dir, 'metadata_val.tsv') projector.visualize_embeddings(summary_writer, config) # save model saver.save(sess, os.path.join(result_dir, cfg.name + '.ckpt'), global_step=step)
def main(): cfg = TrainConfig().parse() print(cfg.name) result_dir = os.path.join( cfg.result_root, cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')) if not os.path.isdir(result_dir): os.makedirs(result_dir) utils.write_configure_to_file(cfg, result_dir) np.random.seed(seed=cfg.seed) # prepare dataset train_session = cfg.train_session train_set = prepare_multimodal_dataset(cfg.feature_root, train_session, cfg.feat, cfg.label_root) batch_per_epoch = len(train_set) // cfg.sess_per_batch val_session = cfg.val_session val_set = prepare_multimodal_dataset( cfg.feature_root, val_session, cfg.feat, cfg.label_root) # only have one modality in testing time # construct the graph with tf.Graph().as_default(): tf.set_random_seed(cfg.seed) global_step = tf.Variable(0, trainable=False) lr_ph = tf.placeholder(tf.float32, name='learning_rate') # load backbone model if cfg.network == "convtsn": model_emb = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "convrtsn": model_emb = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) else: raise NotImplementedError input_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, None, None, None]) output_ph = tf.placeholder(tf.float32, shape=(None, ) + cfg.feat_dim[cfg.feat[1]]) dropout_ph = tf.placeholder(tf.float32, shape=[]) model_emb.forward(input_ph, dropout_ph) hidden = model_emb.hidden embedding = tf.nn.l2_normalize(model_emb.hidden, axis=-1, epsilon=1e-10) # variable for visualizing the embeddings emb_var = tf.Variable([0.0], name='embeddings') set_emb = tf.assign(emb_var, embedding, validate_shape=False) model_output = networks.OutputLayer( n_input=cfg.emb_dim, n_output=cfg.feat_dim[cfg.feat[1]][0]) model_output.forward(tf.nn.relu(hidden), dropout_ph) logits = model_output.logits # MSE loss MSE_loss = tf.losses.mean_squared_error(output_ph, logits) regularization_loss = tf.reduce_sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) total_loss = MSE_loss + regularization_loss * cfg.lambda_l2 tf.summary.scalar('learning_rate', lr_ph) train_op = utils.optimize(total_loss, global_step, cfg.optimizer, lr_ph, tf.global_variables()) saver = tf.train.Saver(max_to_keep=10) summary_op = tf.summary.merge_all() ######################################################################### # session iterator for session sampling feat_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) feat2_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) label_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) train_data = multimodal_session_generator( feat_paths_ph, feat2_paths_ph, label_paths_ph, sess_per_batch=cfg.sess_per_batch, num_threads=2, shuffled=False, preprocess_func=[model_emb.prepare_input, utils.mean_pool_input]) train_sess_iterator = train_data.make_initializable_iterator() next_train = train_sess_iterator.get_next() # prepare validation data val_sess = [] val_feats = [] val_feats2 = [] val_labels = [] val_boundaries = [] for session in val_set: session_id = os.path.basename(session[1]).split('_')[0] eve_batch, lab_batch, boundary = load_data_and_label( session[0], session[-1], model_emb.prepare_input_test ) # use prepare_input_test for testing time val_feats.append(eve_batch) val_labels.append(lab_batch) val_sess.extend([session_id] * eve_batch.shape[0]) val_boundaries.extend(boundary) eve2_batch, _, _ = load_data_and_label(session[1], session[-1], utils.mean_pool_input) val_feats2.append(eve2_batch) val_feats = np.concatenate(val_feats, axis=0) val_feats2 = np.concatenate(val_feats2, axis=0) val_labels = np.concatenate(val_labels, axis=0) print("Shape of val_feats: ", val_feats.shape) # generate metadata.tsv for visualize embedding with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout: fout.write('id\tlabel\tsession_id\tstart\tend\n') for i in range(len(val_sess)): fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format( i, val_labels[i, 0], val_sess[i], val_boundaries[i][0], val_boundaries[i][1])) ######################################################################### # Start running the graph if cfg.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) summary_writer = tf.summary.FileWriter(result_dir, sess.graph) with sess.as_default(): sess.run(tf.global_variables_initializer()) # load pretrain model, if needed if cfg.model_path: print("Restoring pretrained model: %s" % cfg.model_path) saver.restore(sess, cfg.model_path) ################## Training loop ################## epoch = -1 while epoch < cfg.max_epochs - 1: step = sess.run(global_step, feed_dict=None) epoch = step // batch_per_epoch # learning rate schedule, reference: "In defense of Triplet Loss" if epoch < cfg.static_epochs: learning_rate = cfg.learning_rate else: learning_rate = cfg.learning_rate * \ 0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs)) # prepare data for this epoch random.shuffle(train_set) paths = list(zip(*[iter(train_set)] * cfg.sess_per_batch)) feat_paths = [[p[0] for p in path] for path in paths] feat2_paths = [[p[1] for p in path] for path in paths] label_paths = [[p[-1] for p in path] for path in paths] sess.run(train_sess_iterator.initializer, feed_dict={ feat_paths_ph: feat_paths, feat2_paths_ph: feat2_paths, label_paths_ph: label_paths }) # for each epoch batch_count = 1 while True: try: ##################### Data loading ######################## start_time = time.time() start_time = time.time() eve, eve2, lab = sess.run(next_train) load_time = time.time() - start_time ##################### Start training ######################## start_time = time.time() err, _, step, summ = sess.run( [total_loss, train_op, global_step, summary_op], feed_dict={ input_ph: eve, output_ph: eve2, dropout_ph: cfg.keep_prob, lr_ph: learning_rate }) train_time = time.time() - start_time print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tLoad time: %.3f\tTrain_time: %.3f\tLoss %.4f" % \ (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], load_time, train_time, err)) summary = tf.Summary(value=[ tf.Summary.Value(tag="train_loss", simple_value=err), ]) summary_writer.add_summary(summary, step) summary_writer.add_summary(summ, step) batch_count += 1 except tf.errors.OutOfRangeError: print("Epoch %d done!" % (epoch + 1)) break # validation on val_set print("Evaluating on validation set...") val_err, val_embeddings, val_pred, _ = sess.run( [total_loss, embedding, logits, set_emb], feed_dict={ input_ph: val_feats, output_ph: val_feats2, dropout_ph: 1.0 }) mAP, mPrec = utils.evaluate_simple(val_embeddings, val_labels) mAP2, mPrec2 = utils.evaluate_simple( val_pred, val_labels) # use prediction for retrieval summary = tf.Summary(value=[ tf.Summary.Value(tag="Valiation mAP", simple_value=mAP), tf.Summary.Value(tag="Validation [email protected]", simple_value=mPrec), tf.Summary.Value(tag="Validation mAP 2", simple_value=mAP2), tf.Summary.Value(tag="Validation [email protected] 2", simple_value=mPrec2), tf.Summary.Value(tag="Validation loss", simple_value=val_err) ]) summary_writer.add_summary(summary, step) # config for embedding visualization config = projector.ProjectorConfig() visual_embedding = config.embeddings.add() visual_embedding.tensor_name = emb_var.name visual_embedding.metadata_path = os.path.join( result_dir, 'metadata_val.tsv') projector.visualize_embeddings(summary_writer, config) # save model saver.save(sess, os.path.join(result_dir, cfg.name + '.ckpt'), global_step=step)
def main(): cfg = TrainConfig().parse() print(cfg.name) result_dir = os.path.join( cfg.result_root, cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')) if not os.path.isdir(result_dir): os.makedirs(result_dir) utils.write_configure_to_file(cfg, result_dir) np.random.seed(seed=cfg.seed) # prepare dataset train_session = cfg.train_session train_set = prepare_dataset(cfg.feature_root, train_session, cfg.feat, cfg.label_root) batch_per_epoch = len(train_set) // cfg.sess_per_batch val_session = cfg.val_session[:3] val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat, cfg.label_root) # construct the graph with tf.Graph().as_default(): tf.set_random_seed(cfg.seed) global_step = tf.Variable(0, trainable=False) # subtract global_step by 1 if needed (for hard negative mining, keep global_step unchanged) subtract_global_step_op = tf.assign(global_step, global_step - 1) lr_ph = tf.placeholder(tf.float32, name='learning_rate') # load backbone model if cfg.network == "tsn": model_emb = networks.TSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "rtsn": model_emb = networks.RTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "convtsn": model_emb = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "convrtsn": model_emb = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) else: raise NotImplementedError model_ver = networks.PairSim(n_input=cfg.emb_dim) # get the embedding if cfg.feat == "sensors": input_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, None]) elif cfg.feat == "resnet": input_ph = tf.placeholder( tf.float32, shape=[None, cfg.num_seg, None, None, None]) dropout_ph = tf.placeholder(tf.float32, shape=[]) label_ph = tf.placeholder(tf.int32, shape=[None]) model_emb.forward(input_ph, dropout_ph) embedding = model_emb.hidden # split embedding into A and B emb_A, emb_B = tf.unstack(tf.reshape(embedding, [-1, 2, cfg.emb_dim]), 2, 1) pairs = tf.stack([emb_A, emb_B], axis=1) model_ver.forward(pairs, dropout_ph) logits = model_ver.logits prob = model_ver.prob pred = tf.argmax(logits, -1) ver_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label_ph, logits=logits)) regularization_loss = tf.reduce_sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) total_loss = ver_loss + regularization_loss * cfg.lambda_l2 tf.summary.scalar('learning_rate', lr_ph) train_op = utils.optimize(total_loss, global_step, cfg.optimizer, lr_ph, tf.global_variables()) saver = tf.train.Saver(max_to_keep=10) summary_op = tf.summary.merge_all() # session iterator for session sampling feat_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) label_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) train_data = session_generator(feat_paths_ph, label_paths_ph, sess_per_batch=cfg.sess_per_batch, num_threads=2, shuffled=False, preprocess_func=model_emb.prepare_input) train_sess_iterator = train_data.make_initializable_iterator() next_train = train_sess_iterator.get_next() # prepare validation data val_sess = [] val_feats = [] val_labels = [] val_boundaries = [] for session in val_set: session_id = os.path.basename(session[1]).split('_')[0] eve_batch, lab_batch, boundary = load_data_and_label( session[0], session[1], model_emb.prepare_input_test ) # use prepare_input_test for testing time val_feats.append(eve_batch) val_labels.append(lab_batch) val_sess.extend([session_id] * eve_batch.shape[0]) val_boundaries.extend(boundary) val_feats = np.concatenate(val_feats, axis=0) val_labels = np.concatenate(val_labels, axis=0) # generate metadata.tsv for visualize embedding with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout: fout.write('id\tlabel\tsession_id\tstart\tend\n') for i in range(len(val_sess)): fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format( i, val_labels[i, 0], val_sess[i], val_boundaries[i][0], val_boundaries[i][1])) val_idx, val_labels = random_pairs(val_labels, 1000000, test=True) val_feats = val_feats[val_idx] val_labels = np.asarray(val_labels, dtype='int32') print("Shape of val_feats: ", val_feats.shape) # Start running the graph if cfg.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) summary_writer = tf.summary.FileWriter(result_dir, sess.graph) with sess.as_default(): sess.run(tf.global_variables_initializer()) # load pretrain model, if needed if cfg.model_path: print("Restoring pretrained model: %s" % cfg.model_path) saver.restore(sess, cfg.model_path) ################## Training loop ################## epoch = -1 while epoch < cfg.max_epochs - 1: step = sess.run(global_step, feed_dict=None) epoch = step // batch_per_epoch # learning rate schedule, reference: "In defense of Triplet Loss" if epoch < cfg.static_epochs: learning_rate = cfg.learning_rate else: learning_rate = cfg.learning_rate * \ 0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs)) # prepare data for this epoch random.shuffle(train_set) feat_paths = [path[0] for path in train_set] label_paths = [path[1] for path in train_set] # reshape a list to list of list # interesting hacky code from: https://stackoverflow.com/questions/10124751/convert-a-flat-list-to-list-of-list-in-python feat_paths = list(zip(*[iter(feat_paths)] * cfg.sess_per_batch)) label_paths = list( zip(*[iter(label_paths)] * cfg.sess_per_batch)) sess.run(train_sess_iterator.initializer, feed_dict={ feat_paths_ph: feat_paths, label_paths_ph: label_paths }) # for each epoch batch_count = 1 while True: try: # Hierarchical sampling (same as fast rcnn) start_time_select = time.time() # First, sample sessions for a batch eve, se, lab = sess.run(next_train) select_time1 = time.time() - start_time_select # select pairs for training pair_idx, train_labels = random_pairs( lab, cfg.batch_size, cfg.num_negative) train_input = eve[pair_idx] train_labels = np.asarray(train_labels, dtype='int32') select_time2 = time.time( ) - start_time_select - select_time1 start_time_train = time.time() # perform training on the selected pairs err, y_pred, y_prob, _, step, summ = sess.run( [ total_loss, pred, prob, train_op, global_step, summary_op ], feed_dict={ input_ph: train_input, label_ph: train_labels, dropout_ph: cfg.keep_prob, lr_ph: learning_rate }) acc = accuracy_score(train_labels, y_pred) negative_count = 0 if epoch >= cfg.negative_epochs: hard_idx, hard_labels, negative_count = hard_pairs( train_labels, y_prob, 0.5) if negative_count > 0: hard_input = train_input[hard_idx] hard_labels = np.asarray(hard_labels, dtype='int32') step = sess.run(subtract_global_step_op) hard_err, y_pred, _, step = sess.run( [total_loss, pred, train_op, global_step], feed_dict={ input_ph: hard_input, label_ph: hard_labels, dropout_ph: cfg.keep_prob, lr_ph: learning_rate }) train_time = time.time() - start_time_train print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tSelect_time1: %.3f\tSelect_time2: %.3f\tTrain_time: %.3f\tLoss: %.4f" % \ (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], select_time1, select_time2, train_time, err)) summary = tf.Summary(value=[ tf.Summary.Value(tag="train_loss", simple_value=err), tf.Summary.Value(tag="acc", simple_value=acc), tf.Summary.Value(tag="negative_count", simple_value=negative_count) ]) summary_writer.add_summary(summary, step) summary_writer.add_summary(summ, step) batch_count += 1 except tf.errors.OutOfRangeError: print("Epoch %d done!" % (epoch + 1)) break # validation on val_set print("Evaluating on validation set...") val_err, val_pred, val_prob = sess.run( [total_loss, pred, prob], feed_dict={ input_ph: val_feats, label_ph: val_labels, dropout_ph: 1.0 }) val_acc = accuracy_score(val_labels, val_pred) summary = tf.Summary(value=[ tf.Summary.Value(tag="Valiation acc", simple_value=val_acc), tf.Summary.Value(tag="Validation loss", simple_value=val_err) ]) summary_writer.add_summary(summary, step) # save model saver.save(sess, os.path.join(result_dir, cfg.name + '.ckpt'), global_step=step) # print log for analysis with open(os.path.join(result_dir, 'val_results.txt'), 'w') as fout: fout.write("acc = %.4f\n" % val_acc) fout.write("label\tprob_0\tprob_1\tA_idx\tB_idx\n") for i in range(val_prob.shape[0]): fout.write("%d\t%.4f\t%.4f\t%d\t%d\n" % (val_labels[i], val_prob[i, 0], val_prob[i, 1], val_idx[2 * i], val_idx[2 * i + 1]))
def main(): cfg = TrainConfig().parse() print (cfg.name) np.random.seed(seed=cfg.seed) # prepare dataset val_session = cfg.val_session val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat, cfg.label_root) # construct the graph with tf.Graph().as_default(): tf.set_random_seed(cfg.seed) # load backbone model if cfg.network == "tsn": model_emb = networks.TSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "rtsn": model_emb = networks.RTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim, n_input=cfg.n_input) elif cfg.network == "convtsn": model_emb = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "convrtsn": model_emb = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim, n_h=cfg.n_h, n_w=cfg.n_w, n_C=cfg.n_C, n_input=cfg.n_input) elif cfg.network == "convbirtsn": model_emb = networks.ConvBiRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) else: raise NotImplementedError model_ver = networks.PDDM(n_input=cfg.emb_dim) # get the embedding if cfg.feat == "sensors" or cfg.feat == "segment": input_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, None]) elif cfg.feat == "resnet" or cfg.feat == "segment_down": input_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, None, None, None]) dropout_ph = tf.placeholder(tf.float32, shape=[]) model_emb.forward(input_ph, dropout_ph) if cfg.normalized: embedding = tf.nn.l2_normalize(model_emb.hidden, axis=-1, epsilon=1e-10) else: embedding = model_emb.hidden # split the embedding emb_A = embedding[:(tf.shape(embedding)[0]//2)] emb_B = embedding[(tf.shape(embedding)[0]//2):] model_ver.forward(tf.stack((emb_A, emb_B), axis=1)) pddm = model_ver.prob restore_saver = tf.train.Saver() # prepare validation data val_sess = [] val_feats = [] val_labels = [] val_boundaries = [] for session in val_set: session_id = os.path.basename(session[1]).split('_')[0] eve_batch, lab_batch, boundary = load_data_and_label(session[0], session[-1], model_emb.prepare_input_test) # use prepare_input_test for testing time val_feats.append(eve_batch) val_labels.append(lab_batch) val_sess.extend([session_id]*eve_batch.shape[0]) val_boundaries.extend(boundary) val_feats = np.concatenate(val_feats, axis=0) val_labels = np.concatenate(val_labels, axis=0) print ("Shape of val_feats: ", val_feats.shape) # Start running the graph if cfg.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) with sess.as_default(): sess.run(tf.global_variables_initializer()) print ("Restoring pretrained model: %s" % cfg.model_path) restore_saver.restore(sess, cfg.model_path) fout_fp = open(os.path.join(os.path.dirname(cfg.model_path), 'val_fp.txt'), 'w') fout_fn = open(os.path.join(os.path.dirname(cfg.model_path), 'val_fn.txt'), 'w') fout_fp.write('id_A\tid_B\tlabel_A\tlabel_B\tprob_0\tprob_1\n') fout_fn.write('id_A\tid_B\tlabel_A\tlabel_B\tprob_0\tprob_1\n') count = 0 count_high = 0 # high confidence (0.9) count_fp = 0 count_fn = 0 for i in range(val_feats.shape[0]): print ("%d/%d" % (i,val_feats.shape[0])) if val_labels[i] == 0: continue A_input = np.tile(val_feats[i], (val_feats.shape[0]-i,1,1)) AB_input = np.vstack((A_input, val_feats[i:])) # concatenate along axis 0 temp_prob = sess.run(pddm, feed_dict={input_ph: AB_input, dropout_ph:1.0}) count += temp_prob.shape[0] threshold = 0.8 for j in range(temp_prob.shape[0]): if temp_prob[j, 0] > threshold or temp_prob[j, 1] > threshold: count_high += 1 if val_labels[i] == val_labels[i+j] and temp_prob[j, 0]>threshold: count_fn += 1 fout_fn.write("{}\t{}\t{}\t{}\t{:.4f}\t{:.4f}\n".format(i,i+j,val_labels[i,0],val_labels[i+j,0],temp_prob[j,0],temp_prob[j,1])) elif val_labels[i] != val_labels[i+j] and temp_prob[j,1] > threshold: count_fp += 1 fout_fp.write("{}\t{}\t{}\t{}\t{:.4f}\t{:.4f}\n".format(i,i+j,val_labels[i,0],val_labels[i+j,0],temp_prob[j,0],temp_prob[j,1])) fout_fp.close() fout_fn.close() print ("High confidence (%f) pairs ratio: %.4f" % (threshold, float(count_high)/count)) print ("Consistent pairs ratio: %.4f" % (float(count_high-count_fp-count_fn)/count_high)) print ("False positive pairs ratio: %.4f" % (float(count_fp)/count_high)) print ("False negative pairs ratio: %.4f" % (float(count_fn)/count_high))
def main(): cfg = TrainConfig().parse() print(cfg.name) result_dir = os.path.join( cfg.result_root, cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')) if not os.path.isdir(result_dir): os.makedirs(result_dir) utils.write_configure_to_file(cfg, result_dir) np.random.seed(seed=cfg.seed) # prepare dataset train_session = cfg.train_session train_set = prepare_dataset(cfg.feature_root, train_session, cfg.feat, cfg.label_root) train_set = train_set[:cfg.label_num] batch_per_epoch = len(train_set) // cfg.sess_per_batch val_session = cfg.val_session val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat, cfg.label_root) # construct the graph with tf.Graph().as_default(): tf.set_random_seed(cfg.seed) global_step = tf.Variable(0, trainable=False) lr_ph = tf.placeholder(tf.float32, name='learning_rate') # load backbone model if cfg.network == "tsn": model_emb = networks.TSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "rtsn": model_emb = networks.RTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim, n_input=cfg.n_input) elif cfg.network == "convtsn": model_emb = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "convrtsn": model_emb = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim, n_h=cfg.n_h, n_w=cfg.n_w, n_C=cfg.n_C, n_input=cfg.n_input) elif cfg.network == "convbirtsn": model_emb = networks.ConvBiRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) else: raise NotImplementedError model_ver = networks.PDDM(n_input=cfg.emb_dim) # get the embedding if cfg.feat == "sensors" or cfg.feat == "segment": input_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, None]) elif cfg.feat == "resnet" or cfg.feat == "segment_down": input_ph = tf.placeholder( tf.float32, shape=[None, cfg.num_seg, None, None, None]) dropout_ph = tf.placeholder(tf.float32, shape=[]) model_emb.forward(input_ph, dropout_ph) if cfg.normalized: embedding = tf.nn.l2_normalize(model_emb.hidden, axis=-1, epsilon=1e-10) else: embedding = model_emb.hidden # variable for visualizing the embeddings emb_var = tf.Variable([0.0], name='embeddings') set_emb = tf.assign(emb_var, embedding, validate_shape=False) # calculated for monitoring all-pair embedding distance diffs = utils.all_diffs_tf(embedding, embedding) all_dist = utils.cdist_tf(diffs) tf.summary.histogram('embedding_dists', all_dist) # split embedding into anchor, positive and negative and calculate triplet loss anchor, positive, negative = tf.unstack( tf.reshape(embedding, [-1, 3, cfg.emb_dim]), 3, 1) metric_loss = networks.triplet_loss(anchor, positive, negative, cfg.alpha) model_ver.forward(tf.stack((anchor, positive), axis=1)) pddm_ap = model_ver.prob[:, 0] model_ver.forward(tf.stack((anchor, negative), axis=1)) pddm_an = model_ver.prob[:, 0] pddm_loss = tf.reduce_mean( tf.maximum(tf.add(tf.subtract(pddm_ap, pddm_an), 0.6), 0.0), 0) regularization_loss = tf.reduce_sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) total_loss = pddm_loss + 0.5 * metric_loss + regularization_loss * cfg.lambda_l2 tf.summary.scalar('learning_rate', lr_ph) train_op = utils.optimize(total_loss, global_step, cfg.optimizer, lr_ph, tf.global_variables()) saver = tf.train.Saver(max_to_keep=10) summary_op = tf.summary.merge_all() # session iterator for session sampling feat_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) label_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) train_data = session_generator(feat_paths_ph, label_paths_ph, sess_per_batch=cfg.sess_per_batch, num_threads=2, shuffled=False, preprocess_func=model_emb.prepare_input) train_sess_iterator = train_data.make_initializable_iterator() next_train = train_sess_iterator.get_next() # prepare validation data val_feats = [] val_labels = [] for session in val_set: eve_batch, lab_batch, _ = load_data_and_label( session[0], session[1], model_emb.prepare_input_test ) # use prepare_input_test for testing time val_feats.append(eve_batch) val_labels.append(lab_batch) val_feats = np.concatenate(val_feats, axis=0) val_labels = np.concatenate(val_labels, axis=0) print("Shape of val_feats: ", val_feats.shape) # generate metadata.tsv for visualize embedding with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout: for v in val_labels: fout.write('%d\n' % int(v)) # Start running the graph if cfg.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) summary_writer = tf.summary.FileWriter(result_dir, sess.graph) with sess.as_default(): sess.run(tf.global_variables_initializer()) # load pretrain model, if needed if cfg.model_path: print("Restoring pretrained model: %s" % cfg.model_path) saver.restore(sess, cfg.model_path) ################## Training loop ################## epoch = -1 while epoch < cfg.max_epochs - 1: step = sess.run(global_step, feed_dict=None) epoch = step // batch_per_epoch # learning rate schedule, reference: "In defense of Triplet Loss" if epoch < cfg.static_epochs: learning_rate = cfg.learning_rate else: learning_rate = cfg.learning_rate * \ 0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs)) # prepare data for this epoch random.shuffle(train_set) feat_paths = [path[0] for path in train_set] label_paths = [path[1] for path in train_set] # reshape a list to list of list # interesting hacky code from: https://stackoverflow.com/questions/10124751/convert-a-flat-list-to-list-of-list-in-python feat_paths = list(zip(*[iter(feat_paths)] * cfg.sess_per_batch)) label_paths = list( zip(*[iter(label_paths)] * cfg.sess_per_batch)) sess.run(train_sess_iterator.initializer, feed_dict={ feat_paths_ph: feat_paths, label_paths_ph: label_paths }) # for each epoch batch_count = 1 while True: try: # Hierarchical sampling (same as fast rcnn) start_time_select = time.time() # First, sample sessions for a batch eve, se, lab = sess.run(next_train) select_time1 = time.time() - start_time_select # Get the similarity of all events sim_prob = np.zeros((eve.shape[0], eve.shape[0]), dtype='float32') * np.nan comb = list( itertools.combinations(range(eve.shape[0]), 2)) for start, end in zip( range(0, len(comb), cfg.batch_size), range(cfg.batch_size, len(comb) + cfg.batch_size, cfg.batch_size)): end = min(end, len(comb)) comb_idx = [] for c in comb[start:end]: comb_idx.extend([c[0], c[1], c[1]]) emb = sess.run(pddm_ap, feed_dict={ input_ph: eve[comb_idx], dropout_ph: 1.0 }) for i in range(emb.shape[0]): sim_prob[comb[start + i][0], comb[start + i][1]] = emb[i] sim_prob[comb[start + i][1], comb[start + i][0]] = emb[i] # Second, sample triplets within sampled sessions triplet_selected, active_count = utils.select_triplets_facenet( lab, sim_prob, cfg.triplet_per_batch, cfg.alpha) select_time2 = time.time( ) - start_time_select - select_time1 start_time_train = time.time() triplet_input_idx = [ idx for triplet in triplet_selected for idx in triplet ] triplet_input = eve[triplet_input_idx] # perform training on the selected triplets err, _, step, summ = sess.run( [total_loss, train_op, global_step, summary_op], feed_dict={ input_ph: triplet_input, dropout_ph: cfg.keep_prob, lr_ph: learning_rate }) train_time = time.time() - start_time_train print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tTriplet num: %d\tSelect_time1: %.3f\tSelect_time2: %.3f\tTrain_time: %.3f\tLoss %.4f" % \ (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], triplet_input.shape[0]//3, select_time1, select_time2, train_time, err)) summary = tf.Summary(value=[ tf.Summary.Value(tag="train_loss", simple_value=err), tf.Summary.Value(tag="active_count", simple_value=active_count), tf.Summary.Value( tag="triplet_num", simple_value=triplet_input.shape[0] // 3) ]) summary_writer.add_summary(summary, step) summary_writer.add_summary(summ, step) batch_count += 1 except tf.errors.OutOfRangeError: print("Epoch %d done!" % (epoch + 1)) break # validation on val_set print("Evaluating on validation set...") val_embeddings, _ = sess.run([embedding, set_emb], feed_dict={ input_ph: val_feats, dropout_ph: 1.0 }) mAP, mPrec = utils.evaluate_simple(val_embeddings, val_labels) val_sim_prob = np.zeros( (val_feats.shape[0], val_feats.shape[0]), dtype='float32') * np.nan val_comb = list( itertools.combinations(range(val_feats.shape[0]), 2)) for start, end in zip( range(0, len(val_comb), cfg.batch_size), range(cfg.batch_size, len(val_comb) + cfg.batch_size, cfg.batch_size)): end = min(end, len(val_comb)) comb_idx = [] for c in val_comb[start:end]: comb_idx.extend([c[0], c[1], c[1]]) emb = sess.run(pddm_ap, feed_dict={ input_ph: val_feats[comb_idx], dropout_ph: 1.0 }) for i in range(emb.shape[0]): val_sim_prob[val_comb[start + i][0], val_comb[start + i][1]] = emb[i] val_sim_prob[val_comb[start + i][1], val_comb[start + i][0]] = emb[i] mAP_PDDM = 0.0 count = 0 for i in range(val_labels.shape[0]): if val_labels[i] > 0: temp_labels = np.delete(val_labels, i, 0) temp = np.delete(val_sim_prob, i, 1) mAP_PDDM += average_precision_score( np.squeeze(temp_labels == val_labels[i, 0]), np.squeeze(1 - temp[i])) count += 1 mAP_PDDM /= count summary = tf.Summary(value=[ tf.Summary.Value(tag="Validation mAP", simple_value=mAP), tf.Summary.Value(tag="Validation mAP_PDDM", simple_value=mAP_PDDM), tf.Summary.Value(tag="Validation [email protected]", simple_value=mPrec) ]) summary_writer.add_summary(summary, step) print("Epoch: [%d]\tmAP: %.4f\tmPrec: %.4f\tmAP_PDDM: %.4f" % (epoch + 1, mAP, mPrec, mAP_PDDM)) # config for embedding visualization config = projector.ProjectorConfig() visual_embedding = config.embeddings.add() visual_embedding.tensor_name = emb_var.name visual_embedding.metadata_path = os.path.join( result_dir, 'metadata_val.tsv') projector.visualize_embeddings(summary_writer, config) # save model saver.save(sess, os.path.join(result_dir, cfg.name + '.ckpt'), global_step=step)
def main(): cfg = TrainConfig().parse() print(cfg.name) result_dir = os.path.join( cfg.result_root, cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')) if not os.path.isdir(result_dir): os.makedirs(result_dir) utils.write_configure_to_file(cfg, result_dir) np.random.seed(seed=cfg.seed) # prepare dataset train_session = cfg.train_session tfrecords_files = glob.glob(cfg.tfrecords_root + '*.tfrecords') tfrecords_files = sorted(tfrecords_files) train_set = [ f for f in tfrecords_files if os.path.basename(f).split('_')[0] in train_session ] print("Number of training events: %d" % len(train_set)) val_session = cfg.val_session val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat, cfg.label_root) # construct the graph with tf.Graph().as_default(): tf.set_random_seed(cfg.seed) global_step = tf.Variable(0, trainable=False) lr_ph = tf.placeholder(tf.float32, name='learning_rate') # load backbone model and get the embdding if cfg.network == "tsn": model = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) input_ph = tf.placeholder( tf.float32, shape=[None, cfg.num_seg, None, None, None]) seqlen_ph = tf.placeholder(tf.int32, shape=[None]) # fake, for consistency model.forward(input_ph) elif cfg.network == "lstm": model = networks.ConvLSTM(max_time=cfg.MAX_LENGTH_FRAMES, emb_dim=cfg.emb_dim) input_ph = tf.placeholder( tf.float32, shape=[None, cfg.MAX_LENGTH_FRAMES, None, None, None]) seqlen_ph = tf.placeholder(tf.int32, shape=[None]) model.forward(input_ph, seqlen_ph) if cfg.normalized: embedding = tf.nn.l2_normalize(model.hidden, axis=-1, epsilon=1e-10) else: embedding = model.hidden # variable for visualizing the embeddings emb_var = tf.Variable([0.0], name='embeddings') set_emb = tf.assign(emb_var, embedding, validate_shape=False) # calculated for monitoring all-pair embedding distance diffs = utils.all_diffs_tf(embedding, embedding) all_dist = utils.cdist_tf(diffs) tf.summary.histogram('embedding_dists', all_dist) # split embedding into anchor, positive and negative and calculate triplet loss anchor, positive, negative = tf.unstack( tf.reshape(embedding, [-1, 3, cfg.emb_dim]), 3, 1) triplet_loss = networks.triplet_loss(anchor, positive, negative, cfg.alpha) regularization_loss = tf.reduce_sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) total_loss = triplet_loss + regularization_loss * cfg.lambda_l2 tf.summary.scalar('learning_rate', lr_ph) train_op = utils.optimize(total_loss, global_step, cfg.optimizer, lr_ph, tf.global_variables()) saver = tf.train.Saver(max_to_keep=10) summary_op = tf.summary.merge_all() # session iterator for session sampling tf_paths_ph = tf.placeholder(tf.string, shape=[None]) feat_dict = {'resnet': 98304} context_dict = {'label': 'int', 'length': 'int'} train_data = event_generator(tf_paths_ph, feat_dict, context_dict, event_per_batch=cfg.event_per_batch, num_threads=4, shuffled=True, preprocess_func=model.prepare_input_tf) train_sess_iterator = train_data.make_initializable_iterator() next_train = train_sess_iterator.get_next() # prepare validation data val_feats = [] val_labels = [] val_lengths = [] for session in val_set: eve_batch, lab_batch, bou_batch = load_data_and_label( session[0], session[1], model.prepare_input) val_feats.append(eve_batch) val_labels.append(lab_batch) val_lengths.extend([b[1] - b[0] for b in bou_batch]) val_feats = np.concatenate(val_feats, axis=0) val_labels = np.concatenate(val_labels, axis=0) val_lengths = np.asarray(val_lengths, dtype='int32') print("Shape of val_feats: ", val_feats.shape) # generate metadata.tsv for visualize embedding with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout: for v in val_labels: fout.write('%d\n' % int(v)) # Start running the graph if cfg.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) summary_writer = tf.summary.FileWriter(result_dir, sess.graph) with sess.as_default(): sess.run(tf.global_variables_initializer()) # load pretrain model, if needed if cfg.pretrained_model: print("Restoring pretrained model: %s" % cfg.pretrained_model) saver.restore(sess, cfg.pretrained_model) ################## Training loop ################## epoch = 0 while epoch < cfg.max_epochs: step = sess.run(global_step, feed_dict=None) # learning rate schedule, reference: "In defense of Triplet Loss" if epoch < cfg.static_epochs: learning_rate = cfg.learning_rate else: learning_rate = cfg.learning_rate * \ 0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs)) sess.run(train_sess_iterator.initializer, feed_dict={tf_paths_ph: train_set}) # for each epoch batch_count = 1 while True: try: start_time_select = time.time() context, feature_lists = sess.run(next_train) select_time = time.time() - start_time_select eve = feature_lists[cfg.feat].reshape( (-1, cfg.num_seg) + cfg.feat_dim[cfg.feat]) lab = context['label'] seq_len = context['length'] # Get the embeddings of all events eve_embedding = np.zeros((eve.shape[0], cfg.emb_dim), dtype='float32') for start, end in zip( range(0, eve.shape[0], cfg.batch_size), range(cfg.batch_size, eve.shape[0] + cfg.batch_size, cfg.batch_size)): end = min(end, eve.shape[0]) emb = sess.run(embedding, feed_dict={ input_ph: eve[start:end], seqlen_ph: seq_len[start:end] }) eve_embedding[start:end] = emb # Second, sample triplets within sampled sessions # return the triplet input indices if cfg.triplet_select == 'random': triplet_input = select_triplets_random( eve, lab, cfg.triplet_per_batch) negative_count = 0 elif cfg.triplet_select == 'facenet': if epoch < cfg.negative_epochs: triplet_input = select_triplets_random( eve, lab, cfg.triplet_per_batch) negative_count = 0 else: triplet_input_idx, negative_count = select_triplets_facenet( lab, eve_embedding, cfg.triplet_per_batch, cfg.alpha, metric=cfg.metric) else: raise NotImplementedError select_time2 = time.time( ) - start_time_select - select_time1 if triplet_input_idx is not None: triplet_input = eve[triplet_input_idx] triplet_length = seq_len[triplet_input_idx] start_time_train = time.time() # perform training on the selected triplets err, _, step, summ = sess.run( [ total_loss, train_op, global_step, summary_op ], feed_dict={ input_ph: triplet_input, seqlen_ph: triplet_length, lr_ph: learning_rate }) train_time = time.time() - start_time_train print ("Epoch: [%d][%d/%d]\tEvent num: %d\tTriplet num: %d\tSelect_time1: %.3f\tSelect_time2: %.3f\tTrain_time: %.3f\tLoss %.4f" % \ (epoch+1, batch_count, batch_per_epoch, eve.shape[0], triplet_input.shape[0], select_time1, select_time2, train_time, err)) summary = tf.Summary(value=[ tf.Summary.Value(tag="train_loss", simple_value=err), tf.Summary.Value(tag="negative_count", simple_value=negative_count), tf.Summary.Value(tag="select_time1", simple_value=select_time1) ]) summary_writer.add_summary(summary, step) summary_writer.add_summary(summ, step) batch_count += 1 except tf.errors.OutOfRangeError: print("Epoch %d done!" % (epoch + 1)) break # validation on val_set print("Evaluating on validation set...") val_embeddings, _ = sess.run([embedding, set_emb], feed_dict={ input_ph: val_feats, seqlen_ph: val_lengths }) mAP, _ = utils.evaluate(val_embeddings, val_labels) summary = tf.Summary(value=[ tf.Summary.Value(tag="Valiation mAP", simple_value=mAP) ]) summary_writer.add_summary(summary, step) # config for embedding visualization config = projector.ProjectorConfig() visual_embedding = config.embeddings.add() visual_embedding.tensor_name = emb_var.name visual_embedding.metadata_path = os.path.join( result_dir, 'metadata_val.tsv') projector.visualize_embeddings(summary_writer, config) # save model saver.save(sess, os.path.join(result_dir, cfg.name + '.ckpt'), global_step=step)
def main(): # Load configurations and write to config.txt cfg = TrainConfig().parse() print(cfg.name) result_dir = os.path.join( cfg.result_root, cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')) if not os.path.isdir(result_dir): os.makedirs(result_dir) utils.write_configure_to_file(cfg, result_dir) np.random.seed(seed=cfg.seed) # prepare dataset train_session = cfg.train_session train_set = prepare_dataset(cfg.feature_root, train_session, cfg.feat, cfg.label_root) train_set = train_set[:cfg.label_num] batch_per_epoch = len(train_set) // cfg.sess_per_batch val_session = cfg.val_session val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat, cfg.label_root) # construct the graph with tf.Graph().as_default(): tf.set_random_seed(cfg.seed) global_step = tf.Variable(0, trainable=False) label_ph = tf.placeholder(tf.int32, shape=[None], name="label") lr_ph = tf.placeholder(tf.float32, name='learning_rate') ####################### Define model here ######################## # Load embedding model if cfg.network == "tsn": model_emb = networks.TSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "rtsn": model_emb = networks.RTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "convtsn": model_emb = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "convrtsn": model_emb = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim, n_h=cfg.n_h, n_w=cfg.n_w, n_C=cfg.n_C, n_input=cfg.n_input) elif cfg.network == "convbirtsn": model_emb = networks.ConvBiRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) else: raise NotImplementedError # get the embedding if cfg.feat == "sensors" or cfg.feat == "segment": input_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, None]) elif cfg.feat == "resnet" or cfg.feat == "segment_down": input_ph = tf.placeholder( tf.float32, shape=[None, cfg.num_seg, None, None, None]) dropout_ph = tf.placeholder(tf.float32, shape=[]) model_emb.forward(input_ph, dropout_ph) if cfg.normalized: embedding = tf.nn.l2_normalize(model_emb.hidden, axis=-1, epsilon=1e-10) else: embedding = model_emb.hidden # Use tensorflow implementation for loss functions if cfg.loss == 'triplet': metric_loss, active_count = loss_tf.triplet_semihard_loss( labels=label_ph, embeddings=embedding, margin=cfg.alpha) elif cfg.loss == 'lifted': metric_loss, active_count = loss_tf.lifted_struct_loss( labels=label_ph, embeddings=embedding, margin=cfg.alpha) else: raise NotImplementedError regularization_loss = tf.reduce_sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) total_loss = metric_loss + regularization_loss * cfg.lambda_l2 tf.summary.scalar('learning_rate', lr_ph) train_op = utils.optimize(total_loss, global_step, cfg.optimizer, lr_ph, tf.global_variables()) ####################### Define data loader ############################ # session iterator for session sampling feat_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) label_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) train_data = session_generator(feat_paths_ph, label_paths_ph, sess_per_batch=cfg.sess_per_batch, num_threads=2, shuffled=False, preprocess_func=model_emb.prepare_input) train_sess_iterator = train_data.make_initializable_iterator() next_train = train_sess_iterator.get_next() # Prepare validation data val_sess = [] val_feats = [] val_labels = [] val_boundaries = [] for session in val_set: session_id = os.path.basename(session[1]).split('_')[0] eve_batch, lab_batch, boundary = load_data_and_label( session[0], session[-1], model_emb.prepare_input_test ) # use prepare_input_test for testing time val_feats.append(eve_batch) val_labels.append(lab_batch) val_sess.extend([session_id] * eve_batch.shape[0]) val_boundaries.extend(boundary) val_feats = np.concatenate(val_feats, axis=0) val_labels = np.concatenate(val_labels, axis=0) print("Shape of val_feats: ", val_feats.shape) # generate metadata.tsv for visualize embedding with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout: fout.write('id\tlabel\tsession_id\tstart\tend\n') for i in range(len(val_sess)): fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format( i, val_labels[i, 0], val_sess[i], val_boundaries[i][0], val_boundaries[i][1])) # Variable for visualizing the embeddings emb_var = tf.Variable(tf.zeros([val_feats.shape[0], cfg.emb_dim]), name='embeddings') set_emb = tf.assign(emb_var, embedding, validate_shape=False) # calculated for monitoring all-pair embedding distance diffs = utils.all_diffs_tf(embedding, embedding) all_dist = utils.cdist_tf(diffs) tf.summary.histogram('embedding_dists', all_dist) summary_op = tf.summary.merge_all() saver = tf.train.Saver(max_to_keep=10) ######################################################################### # Start running the graph if cfg.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) summary_writer = tf.summary.FileWriter(result_dir, sess.graph) with sess.as_default(): sess.run(tf.global_variables_initializer()) # load pretrain model, if needed if cfg.model_path: print("Restoring pretrained model: %s" % cfg.model_path) saver.restore(sess, cfg.model_path) ################## Training loop ################## epoch = -1 while epoch < cfg.max_epochs - 1: step = sess.run(global_step, feed_dict=None) epoch = step // batch_per_epoch # learning rate schedule, reference: "In defense of Triplet Loss" if epoch < cfg.static_epochs: learning_rate = cfg.learning_rate else: learning_rate = cfg.learning_rate * \ 0.01**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs)) # prepare data for this epoch random.shuffle(train_set) feat_paths = [path[0] for path in train_set] label_paths = [path[1] for path in train_set] # reshape a list to list of list # interesting hacky code from: https://stackoverflow.com/questions/10124751/convert-a-flat-list-to-list-of-list-in-python feat_paths = list(zip(*[iter(feat_paths)] * cfg.sess_per_batch)) label_paths = list( zip(*[iter(label_paths)] * cfg.sess_per_batch)) sess.run(train_sess_iterator.initializer, feed_dict={ feat_paths_ph: feat_paths, label_paths_ph: label_paths }) # for each epoch batch_count = 1 while True: try: # Get a batch start_time_select = time.time() eve, se, lab = sess.run(next_train) # for memory concern, cfg.event_per_batch events are used in maximum if eve.shape[0] > cfg.event_per_batch: idx = np.random.permutation( eve.shape[0])[:cfg.event_per_batch] eve = eve[idx] se = se[idx] lab = lab[idx] select_time = time.time() - start_time_select start_time_train = time.time() # perform training on the batch err, _, step, summ = sess.run( [total_loss, train_op, global_step, summary_op], feed_dict={ input_ph: eve, label_ph: np.squeeze(lab), dropout_ph: cfg.keep_prob, lr_ph: learning_rate }) train_time = time.time() - start_time_train print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tSelect_time: %.3f\tTrain_time: %.3f\tLoss %.4f" % \ (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], select_time, train_time, err)) summary = tf.Summary(value=[ tf.Summary.Value(tag="train_loss", simple_value=err), ]) summary_writer.add_summary(summary, step) summary_writer.add_summary(summ, step) batch_count += 1 except tf.errors.OutOfRangeError: print("Epoch %d done!" % (epoch + 1)) break # validation on val_set print("Evaluating on validation set...") val_embeddings, _ = sess.run([embedding, set_emb], feed_dict={ input_ph: val_feats, dropout_ph: 1.0 }) mAP, mPrec, recall = utils.evaluate_simple( val_embeddings, val_labels) summary = tf.Summary(value=[ tf.Summary.Value(tag="Valiation mAP", simple_value=mAP), tf.Summary.Value(tag="Validation Recall@1", simple_value=recall), tf.Summary.Value(tag="Validation [email protected]", simple_value=mPrec) ]) summary_writer.add_summary(summary, step) print("Epoch: [%d]\tmAP: %.4f\tmPrec: %.4f" % (epoch + 1, mAP, mPrec)) # config for embedding visualization config = projector.ProjectorConfig() visual_embedding = config.embeddings.add() visual_embedding.tensor_name = emb_var.name visual_embedding.metadata_path = os.path.join( result_dir, 'metadata_val.tsv') projector.visualize_embeddings(summary_writer, config) # save model saver.save(sess, os.path.join(result_dir, cfg.name + '.ckpt'), global_step=step)
def main(): cfg = TrainConfig().parse() print(cfg.name) result_dir = os.path.join( cfg.result_root, cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')) if not os.path.isdir(result_dir): os.makedirs(result_dir) utils.write_configure_to_file(cfg, result_dir) np.random.seed(seed=cfg.seed) # prepare dataset train_session = cfg.train_session train_set = prepare_multimodal_dataset(cfg.feature_root, train_session, cfg.feat, cfg.label_root) train_set = train_set[:cfg.label_num] batch_per_epoch = len(train_set) // cfg.sess_per_batch val_session = cfg.val_session val_set = prepare_multimodal_dataset(cfg.feature_root, val_session, cfg.feat, cfg.label_root) # construct the graph with tf.Graph().as_default(): tf.set_random_seed(cfg.seed) global_step = tf.Variable(0, trainable=False) lr_ph = tf.placeholder(tf.float32, name='learning_rate') label_ph = tf.placeholder(tf.int32, shape=[None], name="label") ####################### Load models here ######################## with tf.variable_scope("modality_core"): # load backbone model if cfg.network == "convtsn": model_emb = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "convrtsn": model_emb = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) else: raise NotImplementedError input_ph = tf.placeholder( tf.float32, shape=[None, cfg.num_seg, None, None, None]) dropout_ph = tf.placeholder(tf.float32, shape=[]) model_emb.forward(input_ph, dropout_ph) # for lstm has variable scope with tf.variable_scope("modality_sensors"): sensors_emb_dim = 32 model_emb_sensors = networks.RTSN(n_seg=cfg.num_seg, emb_dim=sensors_emb_dim) input_sensors_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, 8]) model_emb_sensors.forward(input_sensors_ph, dropout_ph) var_list = {} for v in tf.global_variables(): if v.op.name.startswith("modality_sensors"): var_list[v.op.name.replace("modality_sensors/", "")] = v restore_saver_sensors = tf.train.Saver(var_list) with tf.variable_scope("hallucination_sensors"): # load backbone model if cfg.network == "convtsn": hal_emb_sensors = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=sensors_emb_dim) elif cfg.network == "convrtsn": hal_emb_sensors = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=sensors_emb_dim) else: raise NotImplementedError hal_emb_sensors.forward(input_ph, dropout_ph) # for lstm has variable scope with tf.variable_scope("modality_segment"): segment_emb_dim = 32 model_emb_segment = networks.RTSN(n_seg=cfg.num_seg, emb_dim=segment_emb_dim, n_input=357) input_segment_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, 357]) model_emb_segment.forward(input_segment_ph, dropout_ph) var_list = {} for v in tf.global_variables(): if v.op.name.startswith("modality_segment"): var_list[v.op.name.replace("modality_segment/", "")] = v restore_saver_segment = tf.train.Saver(var_list) with tf.variable_scope("hallucination_segment"): # load backbone model if cfg.network == "convtsn": hal_emb_segment = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=segment_emb_dim) elif cfg.network == "convrtsn": hal_emb_segment = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=segment_emb_dim) else: raise NotImplementedError hal_emb_segment.forward(input_ph, dropout_ph) # for lstm has variable scope ############################# Forward Pass ############################# # Core branch if cfg.normalized: embedding = tf.nn.l2_normalize(model_emb.hidden, axis=-1, epsilon=1e-10) embedding_sensors = tf.nn.l2_normalize(model_emb_sensors.hidden, axis=-1, epsilon=1e-10) embedding_hal_sensors = tf.nn.l2_normalize(hal_emb_sensors.hidden, axis=-1, epsilon=1e-10) embedding_segment = tf.nn.l2_normalize(model_emb_segment.hidden, axis=-1, epsilon=1e-10) embedding_hal_segment = tf.nn.l2_normalize(hal_emb_segment.hidden, axis=-1, epsilon=1e-10) else: embedding = model_emb.hidden embedding_sensors = model_emb_sensors.hidden embedding_hal_sensors = hal_emb_sensors.hidden embedding_segment = model_emb_segment.hidden embedding_hal_segment = hal_emb_segment.hidden # calculated for monitoring all-pair embedding distance diffs = utils.all_diffs_tf(embedding, embedding) all_dist = utils.cdist_tf(diffs) tf.summary.histogram('embedding_dists', all_dist) # a fusion embedding embedding_fused = tf.concat( (embedding, embedding_hal_sensors, embedding_hal_segment), axis=1) ############################# Calculate loss ############################# # Use tensorflow implementation for loss functions if cfg.loss == 'triplet': metric_loss1, active_count = loss_tf.triplet_semihard_loss( labels=label_ph, embeddings=embedding, margin=cfg.alpha) metric_loss2, _ = loss_tf.triplet_semihard_loss( labels=label_ph, embeddings=embedding_sensors, margin=cfg.alpha) metric_loss3, _ = loss_tf.triplet_semihard_loss( labels=label_ph, embeddings=embedding_hal_sensors, margin=cfg.alpha) metric_loss4, _ = loss_tf.triplet_semihard_loss( labels=label_ph, embeddings=embedding_segment, margin=cfg.alpha) metric_loss5, _ = loss_tf.triplet_semihard_loss( labels=label_ph, embeddings=embedding_hal_segment, margin=cfg.alpha) metric_loss6, _ = loss_tf.triplet_semihard_loss( labels=label_ph, embeddings=embedding_fused, margin=cfg.alpha) metric_loss = metric_loss1 + metric_loss2 + metric_loss3 + metric_loss4 + metric_loss5 + metric_loss6 # elif cfg.loss == 'lifted': # metric_loss, active_count = loss_tf.lifted_struct_loss( # labels=label_ph, # embeddings=embedding, # margin=cfg.alpha) else: raise NotImplementedError # hallucination loss (regression loss) hal_loss_sensors = tf.nn.l2_loss(embedding_sensors - embedding_hal_sensors) hal_loss_segment = tf.nn.l2_loss(embedding_segment - embedding_hal_segment) hal_loss = hal_loss_sensors + hal_loss_segment regularization_loss = tf.reduce_sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # use lambda_multimodal for hal_loss total_loss = metric_loss + cfg.lambda_multimodal * hal_loss + regularization_loss * cfg.lambda_l2 tf.summary.scalar('learning_rate', lr_ph) train_op = utils.optimize(total_loss, global_step, cfg.optimizer, lr_ph, tf.global_variables()) ######################################################################### # session iterator for session sampling feat_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) feat2_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) feat3_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) label_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) train_data = multimodal_session_generator( feat_paths_ph, feat2_paths_ph, feat3_paths_ph, label_paths_ph, sess_per_batch=cfg.sess_per_batch, num_threads=2, shuffled=False, preprocess_func=[ model_emb.prepare_input, model_emb_sensors.prepare_input, model_emb_segment.prepare_input ]) train_sess_iterator = train_data.make_initializable_iterator() next_train = train_sess_iterator.get_next() # prepare validation data val_sess = [] val_feats = [] val_feats2 = [] val_feats3 = [] val_labels = [] val_boundaries = [] for session in val_set: session_id = os.path.basename(session[1]).split('_')[0] eve_batch, lab_batch, boundary = load_data_and_label( session[0], session[-1], model_emb.prepare_input_test ) # use prepare_input_test for testing time val_feats.append(eve_batch) val_labels.append(lab_batch) val_sess.extend([session_id] * eve_batch.shape[0]) val_boundaries.extend(boundary) eve2_batch, _, _ = load_data_and_label( session[1], session[-1], model_emb_sensors.prepare_input_test) val_feats2.append(eve2_batch) eve3_batch, _, _ = load_data_and_label( session[2], session[-1], model_emb_segment.prepare_input_test) val_feats3.append(eve3_batch) val_feats = np.concatenate(val_feats, axis=0) val_feats2 = np.concatenate(val_feats2, axis=0) val_feats3 = np.concatenate(val_feats3, axis=0) val_labels = np.concatenate(val_labels, axis=0) print("Shape of val_feats: ", val_feats.shape) # generate metadata.tsv for visualize embedding with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout: fout.write('id\tlabel\tsession_id\tstart\tend\n') for i in range(len(val_sess)): fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format( i, val_labels[i, 0], val_sess[i], val_boundaries[i][0], val_boundaries[i][1])) # Variable for visualizing the embeddings emb_var = tf.Variable(tf.zeros([val_feats.shape[0], cfg.emb_dim]), name='embeddings') set_emb = tf.assign(emb_var, embedding, validate_shape=False) summary_op = tf.summary.merge_all() saver = tf.train.Saver(max_to_keep=10) ######################################################################### # Start running the graph if cfg.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) summary_writer = tf.summary.FileWriter(result_dir, sess.graph) with sess.as_default(): sess.run(tf.global_variables_initializer()) print("Restoring sensors model: %s" % cfg.sensors_path) restore_saver_sensors.restore(sess, cfg.sensors_path) print("Restoring segment model: %s" % cfg.segment_path) restore_saver_segment.restore(sess, cfg.segment_path) # load pretrain model, if needed if cfg.model_path: print("Restoring pretrained model: %s" % cfg.model_path) saver.restore(sess, cfg.model_path) ################## Training loop ################## epoch = -1 while epoch < cfg.max_epochs - 1: step = sess.run(global_step, feed_dict=None) epoch = step // batch_per_epoch # learning rate schedule, reference: "In defense of Triplet Loss" if epoch < cfg.static_epochs: learning_rate = cfg.learning_rate else: learning_rate = cfg.learning_rate * \ 0.01**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs)) # prepare data for this epoch random.shuffle(train_set) paths = list(zip(*[iter(train_set)] * cfg.sess_per_batch)) feat_paths = [[p[0] for p in path] for path in paths] feat2_paths = [[p[1] for p in path] for path in paths] feat3_paths = [[p[2] for p in path] for path in paths] label_paths = [[p[-1] for p in path] for path in paths] sess.run(train_sess_iterator.initializer, feed_dict={ feat_paths_ph: feat_paths, feat2_paths_ph: feat2_paths, feat3_paths_ph: feat3_paths, label_paths_ph: label_paths }) # for each epoch batch_count = 1 while True: try: ##################### Data loading ######################## start_time = time.time() eve, eve_sensors, eve_segment, lab, batch_sess = sess.run( next_train) # for memory concern, cfg.event_per_batch events are used in maximum if eve.shape[0] > cfg.event_per_batch: idx = np.random.permutation( eve.shape[0])[:cfg.event_per_batch] eve = eve[idx] eve_sensors = eve_sensors[idx] eve_segment = eve_segment[idx] batch_sess = batch_sess[idx] lab = lab[idx] load_time = time.time() - start_time ##################### Start training ######################## err, metric_err, hal_err, _, step, summ = sess.run( [ total_loss, metric_loss, hal_loss, train_op, global_step, summary_op ], feed_dict={ input_ph: eve, input_sensors_ph: eve_sensors, input_segment_ph: eve_segment, label_ph: np.squeeze(lab), dropout_ph: cfg.keep_prob, lr_ph: learning_rate }) print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tLoad time: %.3f\tMetric Loss %.4f\tHal Loss %.4f" % \ (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], triplet_input.shape[0]//3, load_time, select_time, metric_err, hal_err)) summary = tf.Summary(value=[ tf.Summary.Value(tag="train_loss", simple_value=err), tf.Summary.Value(tag="active_count", simple_value=active_count), tf.Summary.Value(tag="metric_loss", simple_value=metric_err), tf.Summary.Value(tag="hallucination_loss", simple_value=hal_err) ]) summary_writer.add_summary(summary, step) summary_writer.add_summary(summ, step) batch_count += 1 except tf.errors.OutOfRangeError: print("Epoch %d done!" % (epoch + 1)) break # validation on val_set print("Evaluating on validation set...") val_embeddings, hal_err, _ = sess.run( [embedding, hal_loss, set_emb], feed_dict={ input_ph: val_feats, input_sensors_ph: val_feats2, input_segment_ph: val_feats3, dropout_ph: 1.0 }) mAP, mPrec = utils.evaluate_simple(val_embeddings, val_labels) summary = tf.Summary(value=[ tf.Summary.Value(tag="Valiation mAP", simple_value=mAP), tf.Summary.Value(tag="Validation [email protected]", simple_value=mPrec), tf.Summary.Value(tag="Validation hal loss", simple_value=hal_err) ]) summary_writer.add_summary(summary, step) print("Epoch: [%d]\tmAP: %.4f\tmPrec: %.4f" % (epoch + 1, mAP, mPrec)) # config for embedding visualization config = projector.ProjectorConfig() visual_embedding = config.embeddings.add() visual_embedding.tensor_name = emb_var.name visual_embedding.metadata_path = os.path.join( result_dir, 'metadata_val.tsv') projector.visualize_embeddings(summary_writer, config) # save model saver.save(sess, os.path.join(result_dir, cfg.name + '.ckpt'), global_step=step)
def main(): cfg = TrainConfig().parse() print(cfg.name) result_dir = os.path.join( cfg.result_root, cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')) if not os.path.isdir(result_dir): os.makedirs(result_dir) utils.write_configure_to_file(cfg, result_dir) np.random.seed(seed=cfg.seed) # prepare dataset train_session = cfg.train_session train_set = prepare_multimodal_dataset(cfg.feature_root, train_session, cfg.feat, cfg.label_root) if cfg.task == "supervised": # fully supervised task train_set = train_set[:cfg.label_num] batch_per_epoch = len(train_set) // cfg.sess_per_batch labeled_session = train_session[:cfg.label_num] val_session = cfg.val_session val_set = prepare_multimodal_dataset(cfg.feature_root, val_session, cfg.feat, cfg.label_root) # construct the graph with tf.Graph().as_default(): tf.set_random_seed(cfg.seed) global_step = tf.Variable(0, trainable=False) lr_ph = tf.placeholder(tf.float32, name='learning_rate') ####################### Load models here ######################## sensors_emb_dim = 32 segment_emb_dim = 32 with tf.variable_scope("modality_core"): # load backbone model if cfg.network == "convtsn": model_emb = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "convrtsn": model_emb = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "convbirtsn": model_emb = networks.ConvBiRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) else: raise NotImplementedError input_ph = tf.placeholder( tf.float32, shape=[None, cfg.num_seg, None, None, None]) dropout_ph = tf.placeholder(tf.float32, shape=[]) model_emb.forward(input_ph, dropout_ph) # for lstm has variable scope with tf.variable_scope("modality_sensors"): model_emb_sensors = networks.RTSN(n_seg=cfg.num_seg, emb_dim=sensors_emb_dim) model_pairsim_sensors = networks.PDDM(n_input=sensors_emb_dim) input_sensors_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, 8]) model_emb_sensors.forward(input_sensors_ph, dropout_ph) var_list = {} for v in tf.global_variables(): if v.op.name.startswith("modality_sensors"): var_list[v.op.name.replace("modality_sensors/", "")] = v restore_saver_sensors = tf.train.Saver(var_list) with tf.variable_scope("modality_segment"): model_emb_segment = networks.RTSN(n_seg=cfg.num_seg, emb_dim=segment_emb_dim, n_input=357) model_pairsim_segment = networks.PDDM(n_input=segment_emb_dim) input_segment_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, 357]) model_emb_segment.forward(input_segment_ph, dropout_ph) var_list = {} for v in tf.global_variables(): if v.op.name.startswith("modality_segment"): var_list[v.op.name.replace("modality_segment/", "")] = v restore_saver_segment = tf.train.Saver(var_list) ############################# Forward Pass ############################# # Core branch if cfg.normalized: embedding = tf.nn.l2_normalize(model_emb.hidden, axis=-1, epsilon=1e-10) else: embedding = model_emb.hidden # get the number of multimodal triplets (x3) mul_num_ph = tf.placeholder(tf.int32, shape=[]) margins_ph = tf.placeholder(tf.float32, shape=[None]) struct_num = tf.shape(margins_ph)[0] * 3 # variable for visualizing the embeddings emb_var = tf.Variable([0.0], name='embeddings') set_emb = tf.assign(emb_var, embedding, validate_shape=False) # calculated for monitoring all-pair embedding distance diffs = utils.all_diffs_tf(embedding, embedding) all_dist = utils.cdist_tf(diffs) tf.summary.histogram('embedding_dists', all_dist) # split embedding into anchor, positive and negative and calculate triplet loss anchor, positive, negative = tf.unstack( tf.reshape(embedding[:(tf.shape(embedding)[0] - mul_num_ph)], [-1, 3, cfg.emb_dim]), 3, 1) anchor_hard, positive_hard, negative_hard = tf.unstack( tf.reshape(embedding[-mul_num_ph:-struct_num], [-1, 3, cfg.emb_dim]), 3, 1) anchor_struct, positive_struct, negative_struct = tf.unstack( tf.reshape(embedding[-struct_num:], [-1, 3, cfg.emb_dim]), 3, 1) # Sensors branch emb_sensors = model_emb_sensors.hidden A_sensors, B_sensors, C_sensors = tf.unstack( tf.reshape(emb_sensors, [-1, 3, sensors_emb_dim]), 3, 1) model_pairsim_sensors.forward(tf.stack([A_sensors, B_sensors], axis=1)) pddm_AB_sensors = model_pairsim_sensors.prob[:, 1] model_pairsim_sensors.forward(tf.stack([A_sensors, C_sensors], axis=1)) pddm_AC_sensors = model_pairsim_sensors.prob[:, 1] # Segment branch emb_segment = model_emb_segment.hidden A_segment, B_segment, C_segment = tf.unstack( tf.reshape(emb_segment, [-1, 3, segment_emb_dim]), 3, 1) model_pairsim_segment.forward(tf.stack([A_segment, B_segment], axis=1)) pddm_AB_segment = model_pairsim_segment.prob[:, 1] model_pairsim_segment.forward(tf.stack([A_segment, C_segment], axis=1)) pddm_AC_segment = model_pairsim_segment.prob[:, 1] # fuse prob from all modalities prob_AB = 0.5 * (pddm_AB_sensors + pddm_AB_segment) prob_AC = 0.5 * (pddm_AC_sensors + pddm_AC_segment) ############################# Calculate loss ############################# # triplet loss for labeled inputs metric_loss1 = networks.triplet_loss(anchor, positive, negative, cfg.alpha) # weighted triplet loss for multimodal inputs # if cfg.weighted: # metric_loss2, _ = networks.weighted_triplet_loss(anchor_hard, positive_hard, negative_hard, prob_AB, prob_AC, cfg.alpha) # else: # triplet loss for hard examples from multimodal data metric_loss2 = networks.triplet_loss(anchor_hard, positive_hard, negative_hard, cfg.alpha) # margin-based triplet loss for structure mining from multimodal data metric_loss3 = networks.triplet_loss(anchor_struct, positive_struct, negative_struct, margins_ph) # whether to apply joint optimization if cfg.no_joint: unimodal_var_list = [ v for v in tf.global_variables() if v.op.name.startswith("modality_core") ] train_var_list = unimodal_var_list else: multimodal_var_list = [ v for v in tf.global_variables() if not (v.op.name.startswith("modality_sensors/RTSN") or v.op.name.startswith("modality_segment/RTSN")) ] train_var_list = multimodal_var_list regularization_loss = tf.reduce_sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) total_loss = tf.cond( tf.greater(mul_num_ph, 0), lambda: tf.cond( tf.equal(mul_num_ph, tf.shape(embedding)[0]), lambda: (metric_loss2 + metric_loss3 * 0.3) * cfg.lambda_multimodal + regularization_loss * cfg.lambda_l2, lambda: metric_loss1 + (metric_loss2 + metric_loss3 * 0.3) * cfg.lambda_multimodal + regularization_loss * cfg.lambda_l2), lambda: metric_loss1 + regularization_loss * cfg.lambda_l2) tf.summary.scalar('learning_rate', lr_ph) train_op = utils.optimize(total_loss, global_step, cfg.optimizer, lr_ph, train_var_list) saver = tf.train.Saver(max_to_keep=10) summary_op = tf.summary.merge_all( ) # not logging histogram of variables because it will cause problem when only unimodal_train_op is called summ_prob_AB = tf.summary.histogram('Prob_AB_histogram', prob_AB) summ_prob_AC = tf.summary.histogram('Prob_AC_histogram', prob_AC) # summ_weights = tf.summary.histogram('Weights_histogram', weights) ######################################################################### # session iterator for session sampling feat_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) feat2_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) feat3_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) label_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) train_data = multimodal_session_generator( feat_paths_ph, feat2_paths_ph, feat3_paths_ph, label_paths_ph, sess_per_batch=cfg.sess_per_batch, num_threads=2, shuffled=False, preprocess_func=[ model_emb.prepare_input, model_emb_sensors.prepare_input, model_emb_segment.prepare_input ]) train_sess_iterator = train_data.make_initializable_iterator() next_train = train_sess_iterator.get_next() # prepare validation data val_sess = [] val_feats = [] val_feats2 = [] val_feats3 = [] val_labels = [] val_boundaries = [] for session in val_set: session_id = os.path.basename(session[1]).split('_')[0] eve_batch, lab_batch, boundary = load_data_and_label( session[0], session[-1], model_emb.prepare_input_test ) # use prepare_input_test for testing time val_feats.append(eve_batch) val_labels.append(lab_batch) val_sess.extend([session_id] * eve_batch.shape[0]) val_boundaries.extend(boundary) eve2_batch, _, _ = load_data_and_label( session[1], session[-1], model_emb_sensors.prepare_input_test) val_feats2.append(eve2_batch) eve3_batch, _, _ = load_data_and_label( session[2], session[-1], model_emb_segment.prepare_input_test) val_feats3.append(eve3_batch) val_feats = np.concatenate(val_feats, axis=0) val_feats2 = np.concatenate(val_feats2, axis=0) val_feats3 = np.concatenate(val_feats3, axis=0) val_labels = np.concatenate(val_labels, axis=0) print("Shape of val_feats: ", val_feats.shape) # generate metadata.tsv for visualize embedding with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout: fout.write('id\tlabel\tsession_id\tstart\tend\n') for i in range(len(val_sess)): fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format( i, val_labels[i, 0], val_sess[i], val_boundaries[i][0], val_boundaries[i][1])) ######################################################################### # Start running the graph if cfg.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) summary_writer = tf.summary.FileWriter(result_dir, sess.graph) with sess.as_default(): sess.run(tf.global_variables_initializer()) # load pretrain model, if needed if cfg.model_path: print("Restoring pretrained model: %s" % cfg.model_path) saver.restore(sess, cfg.model_path) print("Restoring sensors model: %s" % cfg.sensors_path) restore_saver_sensors.restore(sess, cfg.sensors_path) print("Restoring segment model: %s" % cfg.segment_path) restore_saver_segment.restore(sess, cfg.segment_path) ################## Training loop ################## # Initialize pairwise embedding distance for each class on validation set val_embeddings, _ = sess.run([embedding, set_emb], feed_dict={ input_ph: val_feats, dropout_ph: 1.0 }) dist_dict = {} for i in range(np.max(val_labels) + 1): temp_emb = val_embeddings[np.where(val_labels == i)[0]] dist_dict[i] = [ np.mean( utils.cdist(utils.all_diffs(temp_emb, temp_emb), metric=cfg.metric)) ] epoch = -1 while epoch < cfg.max_epochs - 1: step = sess.run(global_step, feed_dict=None) epoch = step // batch_per_epoch # learning rate schedule, reference: "In defense of Triplet Loss" if epoch < cfg.static_epochs: learning_rate = cfg.learning_rate else: learning_rate = cfg.learning_rate * \ 0.01**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs)) # prepare data for this epoch random.shuffle(train_set) paths = list(zip(*[iter(train_set)] * cfg.sess_per_batch)) feat_paths = [[p[0] for p in path] for path in paths] feat2_paths = [[p[1] for p in path] for path in paths] feat3_paths = [[p[2] for p in path] for path in paths] label_paths = [[p[-1] for p in path] for path in paths] sess.run(train_sess_iterator.initializer, feed_dict={ feat_paths_ph: feat_paths, feat2_paths_ph: feat2_paths, feat3_paths_ph: feat3_paths, label_paths_ph: label_paths }) # for each epoch batch_count = 1 while True: try: ##################### Data loading ######################## start_time = time.time() eve, eve_sensors, eve_segment, lab, batch_sess = sess.run( next_train) # for memory concern, 1000 events are used in maximum if eve.shape[0] > cfg.event_per_batch: idx = np.random.permutation( eve.shape[0])[:cfg.event_per_batch] eve = eve[idx] eve_sensors = eve_sensors[idx] eve_segment = eve_segment[idx] lab = lab[idx] batch_sess = batch_sess[idx] load_time = time.time() - start_time ##################### Triplet selection ##################### start_time = time.time() # Get the embeddings of all events eve_embedding = np.zeros((eve.shape[0], cfg.emb_dim), dtype='float32') for start, end in zip( range(0, eve.shape[0], cfg.batch_size), range(cfg.batch_size, eve.shape[0] + cfg.batch_size, cfg.batch_size)): end = min(end, eve.shape[0]) emb = sess.run(embedding, feed_dict={ input_ph: eve[start:end], dropout_ph: 1.0 }) eve_embedding[start:end] = np.copy(emb) # sample triplets within sampled sessions all_diff = utils.all_diffs(eve_embedding, eve_embedding) triplet_selected, active_count = utils.select_triplets_facenet( lab, utils.cdist(all_diff, metric=cfg.metric), cfg.triplet_per_batch, cfg.alpha) hard_count = 0 struct_count = 0 if epoch >= cfg.multimodal_epochs: # Get the similarity of all events sim_prob = np.zeros((eve.shape[0], eve.shape[0]), dtype='float32') * np.nan comb = list( itertools.combinations(range(eve.shape[0]), 2)) for start, end in zip( range(0, len(comb), cfg.batch_size), range(cfg.batch_size, len(comb) + cfg.batch_size, cfg.batch_size)): end = min(end, len(comb)) comb_idx = [] for c in comb[start:end]: comb_idx.extend([c[0], c[1], c[1]]) sim = sess.run(prob_AB, feed_dict={ input_sensors_ph: eve_sensors[comb_idx], input_segment_ph: eve_segment[comb_idx], dropout_ph: 1.0 }) for i in range(sim.shape[0]): sim_prob[comb[start + i][0], comb[start + i][1]] = sim[i] sim_prob[comb[start + i][1], comb[start + i][0]] = sim[i] # sample triplets from similarity prediction # maximum number not exceed the cfg.triplet_per_batch triplet_input_idx, margins, triplet_count, hard_count, struct_count = select_triplets_mul( triplet_selected, lab, sim_prob, dist_dict, cfg.triplet_per_batch, 3, 0.8, 0.2) # add up all multimodal triplets multimodal_count = hard_count + struct_count sensors_input = eve_sensors[ triplet_input_idx[-(3 * multimodal_count):]] segment_input = eve_segment[ triplet_input_idx[-(3 * multimodal_count):]] print(triplet_count, hard_count, struct_count) triplet_input = eve[triplet_input_idx] select_time = time.time() - start_time if len(triplet_input.shape) > 5: # debugging pdb.set_trace() ##################### Start training ######################## # supervised initialization if multimodal_count == 0: if triplet_count == 0: continue err, metric_err1, _, step, summ = sess.run( [ total_loss, metric_loss1, train_op, global_step, summary_op ], feed_dict={ input_ph: triplet_input, dropout_ph: cfg.keep_prob, mul_num_ph: 0, lr_ph: learning_rate }) metric_err2 = 0 metric_err3 = 0 else: err, metric_err1, metric_err2, metric_err3, _, step, summ, s_AB, s_AC = sess.run( [ total_loss, metric_loss1, metric_loss2, metric_loss3, train_op, global_step, summary_op, summ_prob_AB, summ_prob_AC ], feed_dict={ input_ph: triplet_input, input_sensors_ph: sensors_input, input_segment_ph: segment_input, mul_num_ph: multimodal_count * 3, margins_ph: margins, dropout_ph: cfg.keep_prob, lr_ph: learning_rate }) summary_writer.add_summary(s_AB, step) summary_writer.add_summary(s_AC, step) print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tTriplet num: %d\tLoad time: %.3f\tSelect time: %.3f\tLoss %.4f" % \ (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], triplet_count+multimodal_count, load_time, select_time, err)) summary = tf.Summary(value=[ tf.Summary.Value(tag="train_loss", simple_value=err), tf.Summary.Value(tag="active_count", simple_value=active_count), tf.Summary.Value(tag="triplet_count", simple_value=triplet_count), tf.Summary.Value(tag="hard_count", simple_value=hard_count), tf.Summary.Value(tag="struct_count", simple_value=struct_count), tf.Summary.Value(tag="metric_loss1", simple_value=metric_err1), tf.Summary.Value(tag="metric_loss3", simple_value=metric_err3), tf.Summary.Value(tag="metric_loss2", simple_value=metric_err2) ]) summary_writer.add_summary(summary, step) summary_writer.add_summary(summ, step) batch_count += 1 except tf.errors.OutOfRangeError: print("Epoch %d done!" % (epoch + 1)) break # validation on val_set print("Evaluating on validation set...") val_embeddings, _ = sess.run([embedding, set_emb], feed_dict={ input_ph: val_feats, dropout_ph: 1.0 }) mAP, mPrec, recall = utils.evaluate_simple( val_embeddings, val_labels) summary = tf.Summary(value=[ tf.Summary.Value(tag="Valiation mAP", simple_value=mAP), tf.Summary.Value(tag="Validation Recall@1", simple_value=recall), tf.Summary.Value(tag="Validation [email protected]", simple_value=mPrec) ]) summary_writer.add_summary(summary, step) print("Epoch: [%d]\tmAP: %.4f\tmPrec: %.4f" % (epoch + 1, mAP, mPrec)) # config for embedding visualization config = projector.ProjectorConfig() visual_embedding = config.embeddings.add() visual_embedding.tensor_name = emb_var.name visual_embedding.metadata_path = os.path.join( result_dir, 'metadata_val.tsv') projector.visualize_embeddings(summary_writer, config) # update dist_dict if (epoch + 1) == 50 or (epoch + 1) % 200 == 0: for i in dist_dict.keys(): temp_emb = val_embeddings[np.where(val_labels == i)[0]] dist_dict[i].append( np.mean( utils.cdist(utils.all_diffs( temp_emb, temp_emb), metric=cfg.metric))) pickle.dump( dist_dict, open(os.path.join(result_dir, 'dist_dict.pkl'), 'wb')) # save model saver.save(sess, os.path.join(result_dir, cfg.name + '.ckpt'), global_step=step)
def main(): cfg = TrainConfig().parse() print(cfg.name) np.random.seed(seed=cfg.seed) # prepare dataset val_session = cfg.val_session val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat, cfg.label_root) # construct the graph with tf.Graph().as_default(): tf.set_random_seed(cfg.seed) global_step = tf.Variable(0, trainable=False) lr_ph = tf.placeholder(tf.float32, name='learning_rate') with tf.variable_scope("modality_sensors"): sensors_emb_dim = 32 model_emb_sensors = networks.RTSN(n_seg=cfg.num_seg, emb_dim=sensors_emb_dim, n_input=cfg.n_input) model_pairsim_sensors = networks.PairSim(n_input=sensors_emb_dim) input_sensors_ph = tf.placeholder( tf.float32, shape=[None, cfg.num_seg, cfg.n_input]) dropout_ph = tf.placeholder(tf.float32, shape=[]) model_emb_sensors.forward(input_sensors_ph, dropout_ph) var_list = {} for v in tf.global_variables(): if v.op.name.startswith("modality_sensors"): var_list[v.op.name.replace("modality_sensors/", "")] = v restore_saver_sensors = tf.train.Saver(var_list) # Sensors branch emb_sensors = model_emb_sensors.hidden A_sensors = emb_sensors[:(tf.shape(emb_sensors)[0] // 2)] B_sensors = emb_sensors[(tf.shape(emb_sensors)[0] // 2):] AB_pairs_sensors = tf.stack([A_sensors, B_sensors], axis=1) model_pairsim_sensors.forward(AB_pairs_sensors, dropout_ph) prob_sensors = model_pairsim_sensors.prob # prepare validation data val_sess = [] val_feats = [] val_labels = [] val_boundaries = [] for session in val_set: session_id = os.path.basename(session[1]).split('_')[0] eve_batch, lab_batch, boundary = load_data_and_label( session[0], session[-1], model_emb_sensors.prepare_input_test ) # use prepare_input_test for testing time val_feats.append(eve_batch) val_labels.append(lab_batch) val_sess.extend([session_id] * eve_batch.shape[0]) val_boundaries.extend(boundary) val_feats = np.concatenate(val_feats, axis=0) val_labels = np.concatenate(val_labels, axis=0) print("Shape of val_feats: ", val_feats.shape) # Start running the graph if cfg.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) with sess.as_default(): sess.run(tf.global_variables_initializer()) print("Restoring pretrained model: %s" % cfg.model_path) restore_saver_sensors.restore(sess, cfg.model_path) fout = open( os.path.join(os.path.dirname(cfg.model_path), 'val_inconsistent.txt'), 'w') fout.write('id_A\tid_B\tlabel_A\tlabel_B\tprob_0\tprob_1\n') for i in range(val_feats.shape[0]): print("%d/%d" % (i, val_feats.shape[0])) if val_labels[i] == 0: continue A_input = np.tile(val_feats[i], (val_feats.shape[0], 1, 1)) AB_input = np.vstack((A_input, val_feats)) temp_prob = sess.run(prob_sensors, feed_dict={ input_sensors_ph: AB_input, dropout_ph: 1.0 }) for j in range(temp_prob.shape[0]): if val_labels[i] == val_labels[j] and temp_prob[j, 0] > 0.95: fout.write("{}\t{}\t{}\t{}\t{}\t{}\n".format( i, j, val_labels[i, 0], val_labels[j, 0], temp_prob[j, 0], temp_prob[j, 1])) elif val_labels[i] != val_labels[j] and temp_prob[ j, 1] > 0.95: fout.write("{}\t{}\t{}\t{}\t{}\t{}\n".format( i, j, val_labels[i, 0], val_labels[j, 0], temp_prob[j, 0], temp_prob[j, 1])) fout.close()
def main(): cfg = TrainConfig().parse() print(cfg.name) result_dir = os.path.join( cfg.result_root, cfg.name + '_' + datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')) if not os.path.isdir(result_dir): os.makedirs(result_dir) utils.write_configure_to_file(cfg, result_dir) np.random.seed(seed=cfg.seed) # prepare dataset train_session = cfg.train_session train_set = prepare_dataset(cfg.feature_root, train_session, cfg.feat, cfg.label_root) batch_per_epoch = len(train_set) // cfg.sess_per_batch val_session = cfg.val_session val_set = prepare_dataset(cfg.feature_root, val_session, cfg.feat, cfg.label_root) # construct the graph with tf.Graph().as_default(): tf.set_random_seed(cfg.seed) global_step = tf.Variable(0, trainable=False) lr_ph = tf.placeholder(tf.float32, name='learning_rate') # load backbone model if cfg.network == "tsn": model = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "rtsn": model = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) # get the embedding input_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, None, None, None]) label_ph = tf.placeholder(tf.float32, shape=[None]) dropout_ph = tf.placeholder(tf.float32, shape=[]) model.forward(input_ph, dropout_ph) if cfg.normalized: embedding = tf.nn.l2_normalize(model.hidden, axis=-1, epsilon=1e-10) else: embedding = model.hidden # variable for visualizing the embeddings emb_var = tf.Variable([0.0], name='embeddings') set_emb = tf.assign(emb_var, embedding, validate_shape=False) # calculated for monitoring all-pair embedding distance diffs = utils.all_diffs_tf(embedding, embedding) all_dist = utils.cdist_tf(diffs) tf.summary.histogram('embedding_dists', all_dist) metric_loss, num_active, diff, weights, fp, cn = networks.lifted_loss( all_dist, label_ph, cfg.alpha) regularization_loss = tf.reduce_sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) total_loss = metric_loss + regularization_loss * cfg.lambda_l2 tf.summary.scalar('active_ratio', num_active) tf.summary.scalar('learning_rate', lr_ph) train_op = utils.optimize(total_loss, global_step, cfg.optimizer, lr_ph, tf.global_variables()) saver = tf.train.Saver(max_to_keep=10) summary_op = tf.summary.merge_all() # session iterator for session sampling feat_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) label_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) train_data = session_generator(feat_paths_ph, label_paths_ph, sess_per_batch=cfg.sess_per_batch, num_threads=2, shuffled=False, preprocess_func=model.prepare_input) train_sess_iterator = train_data.make_initializable_iterator() next_train = train_sess_iterator.get_next() # prepare validation data val_feats = [] val_labels = [] for session in val_set: eve_batch, lab_batch, _ = load_data_and_label( session[0], session[1], model.prepare_input_test ) # use prepare_input_test for testing time val_feats.append(eve_batch) val_labels.append(lab_batch) val_feats = np.concatenate(val_feats, axis=0) val_labels = np.concatenate(val_labels, axis=0) print("Shape of val_feats: ", val_feats.shape) # generate metadata.tsv for visualize embedding with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout: for v in val_labels: fout.write('%d\n' % int(v)) # Start running the graph if cfg.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) summary_writer = tf.summary.FileWriter(result_dir, sess.graph) with sess.as_default(): sess.run(tf.global_variables_initializer()) # load pretrain model, if needed if cfg.model_path: print("Restoring pretrained model: %s" % cfg.model_path) saver.restore(sess, cfg.model_path) ################## Training loop ################## epoch = -1 while epoch < cfg.max_epochs - 1: step = sess.run(global_step, feed_dict=None) epoch = step // batch_per_epoch # learning rate schedule, reference: "In defense of Triplet Loss" if epoch < cfg.static_epochs: learning_rate = cfg.learning_rate else: learning_rate = cfg.learning_rate * \ 0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs)) # prepare data for this epoch random.shuffle(train_set) feat_paths = [path[0] for path in train_set] label_paths = [path[1] for path in train_set] # reshape a list to list of list # interesting hacky code from: https://stackoverflow.com/questions/10124751/convert-a-flat-list-to-list-of-list-in-python feat_paths = list(zip(*[iter(feat_paths)] * cfg.sess_per_batch)) label_paths = list( zip(*[iter(label_paths)] * cfg.sess_per_batch)) sess.run(train_sess_iterator.initializer, feed_dict={ feat_paths_ph: feat_paths, label_paths_ph: label_paths }) # for each epoch batch_count = 1 while True: try: # First, sample sessions for a batch start_time_select = time.time() eve, se, lab = sess.run(next_train) select_time1 = time.time() - start_time_select # Second, select samples for a batch batch_idx = select_batch(lab, cfg.batch_size) eve = eve[batch_idx] lab = lab[batch_idx] # Third, perform training on a batch start_time_train = time.time() err, _, step, summ, diff_v, weights_v, fp_v, cn_v, dist_v = sess.run( [ total_loss, train_op, global_step, summary_op, diff, weights, fp, cn, all_dist ], feed_dict={ input_ph: eve, dropout_ph: cfg.keep_prob, label_ph: np.squeeze(lab), lr_ph: learning_rate }) train_time = time.time() - start_time_train print ("Epoch: [%d][%d/%d]\tEvent num: %d\tSelect_time: %.3f\tTrain_time: %.3f\tLoss %.4f" % \ (epoch+1, batch_count, batch_per_epoch, eve.shape[0], select_time1, train_time, err)) summary = tf.Summary(value=[ tf.Summary.Value(tag="train_loss", simple_value=err), tf.Summary.Value(tag="select_time1", simple_value=select_time1) ]) summary_writer.add_summary(summary, step) summary_writer.add_summary(summ, step) batch_count += 1 except tf.errors.OutOfRangeError: print("Epoch %d done!" % (epoch + 1)) break # validation on val_set print("Evaluating on validation set...") val_embeddings, _ = sess.run([embedding, set_emb], feed_dict={ input_ph: val_feats, dropout_ph: 1.0 }) mAP, mPrec = utils.evaluate_simple(val_embeddings, val_labels) summary = tf.Summary(value=[ tf.Summary.Value(tag="Valiation mAP", simple_value=mAP), tf.Summary.Value(tag="Validation [email protected]", simple_value=mPrec) ]) summary_writer.add_summary(summary, step) # config for embedding visualization config = projector.ProjectorConfig() visual_embedding = config.embeddings.add() visual_embedding.tensor_name = emb_var.name visual_embedding.metadata_path = os.path.join( result_dir, 'metadata_val.tsv') projector.visualize_embeddings(summary_writer, config) # save model saver.save(sess, os.path.join(result_dir, cfg.name + '.ckpt'), global_step=step)
def main(): cfg = TrainConfig().parse() print (cfg.name) result_dir = os.path.join(cfg.result_root, cfg.name+'_'+datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')) if not os.path.isdir(result_dir): os.makedirs(result_dir) utils.write_configure_to_file(cfg, result_dir) np.random.seed(seed=cfg.seed) # prepare dataset train_session = cfg.train_session train_set = prepare_multimodal_dataset(cfg.feature_root, train_session, cfg.feat, cfg.label_root) batch_per_epoch = len(train_set)//cfg.sess_per_batch labeled_session = train_session[:cfg.label_num] val_session = cfg.val_session val_set = prepare_multimodal_dataset(cfg.feature_root, val_session, cfg.feat, cfg.label_root) # construct the graph with tf.Graph().as_default(): tf.set_random_seed(cfg.seed) global_step = tf.Variable(0, trainable=False) lr_ph = tf.placeholder(tf.float32, name='learning_rate') subtract_global_step_op = tf.assign(global_step, global_step-1) ####################### Load models here ######################## with tf.variable_scope("modality_core"): # load backbone model if cfg.network == "convtsn": model_emb = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) elif cfg.network == "convrtsn": model_emb = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=cfg.emb_dim) else: raise NotImplementedError input_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, None, None, None]) dropout_ph = tf.placeholder(tf.float32, shape=[]) model_emb.forward(input_ph, dropout_ph) # for lstm has variable scope with tf.variable_scope("modality_sensors"): sensors_emb_dim = 32 model_emb_sensors = networks.RTSN(n_seg=cfg.num_seg, emb_dim=sensors_emb_dim) input_sensors_ph = tf.placeholder(tf.float32, shape=[None, cfg.num_seg, 8]) model_emb_sensors.forward(input_sensors_ph, dropout_ph) var_list = {} for v in tf.global_variables(): if v.op.name.startswith("modality_sensors"): var_list[v.op.name.replace("modality_sensors/","")] = v restore_saver_sensors = tf.train.Saver(var_list) with tf.variable_scope("hallucination_sensors"): # load backbone model if cfg.network == "convtsn": hal_emb_sensors = networks.ConvTSN(n_seg=cfg.num_seg, emb_dim=sensors_emb_dim) elif cfg.network == "convrtsn": hal_emb_sensors = networks.ConvRTSN(n_seg=cfg.num_seg, emb_dim=sensors_emb_dim) else: raise NotImplementedError hal_emb_sensors.forward(input_ph, dropout_ph) # for lstm has variable scope ############################# Forward Pass ############################# # Core branch if cfg.normalized: embedding = tf.nn.l2_normalize(model_emb.hidden, axis=-1, epsilon=1e-10) embedding_sensors = tf.nn.l2_normalize(model_emb_sensors.hidden, axis=-1, epsilon=1e-10) embedding_hal_sensors = tf.nn.l2_normalize(hal_emb_sensors.hidden, axis=-1, epsilon=1e-10) else: embedding = model_emb.hidden embedding_sensors = model_emb_sensors.hidden embedding_hal_sensors = hal_emb_sensors.hidden # variable for visualizing the embeddings emb_var = tf.Variable([0.0], name='embeddings') set_emb = tf.assign(emb_var, embedding, validate_shape=False) # calculated for monitoring all-pair embedding distance diffs = utils.all_diffs_tf(embedding, embedding) all_dist = utils.cdist_tf(diffs) tf.summary.histogram('embedding_dists', all_dist) # split embedding into anchor, positive and negative and calculate triplet loss anchor, positive, negative = tf.unstack(tf.reshape(embedding, [-1,3,cfg.emb_dim]), 3, 1) anc_sensors, pos_sensors, neg_sensors = tf.unstack(tf.reshape(embedding_sensors, [-1,3,sensors_emb_dim]), 3, 1) anc_hal_sensors, pos_hal_sensors, neg_hal_sensors = tf.unstack(tf.reshape(embedding_hal_sensors, [-1,3,sensors_emb_dim]), 3, 1) # a fusion embedding anc_fused = tf.concat((anchor, anc_hal_sensors), axis=1) pos_fused = tf.concat((positive, pos_hal_sensors), axis=1) neg_fused = tf.concat((negative, neg_hal_sensors), axis=1) ############################# Calculate loss ############################# # triplet loss metric_loss = networks.triplet_loss(anchor, positive, negative, cfg.alpha) + \ networks.triplet_loss(anc_sensors, pos_sensors, neg_sensors, cfg.alpha) + \ networks.triplet_loss(anc_hal_sensors, pos_hal_sensors, neg_hal_sensors, cfg.alpha) + \ networks.triplet_loss(anc_fused, pos_fused, neg_fused, cfg.alpha) # hallucination loss (regression loss) hal_loss = tf.nn.l2_loss(embedding_sensors - embedding_hal_sensors) regularization_loss = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # use lambda_multimodal for hal_loss lambda_metric_ph = tf.placeholder(tf.float32, shape=[]) lambda_hal_ph = tf.placeholder(tf.float32, shape=[]) total_loss = lambda_metric_ph * metric_loss + lambda_hal_ph * hal_loss + regularization_loss * cfg.lambda_l2 tf.summary.scalar('learning_rate', lr_ph) train_op = utils.optimize(total_loss, global_step, cfg.optimizer, lr_ph, tf.global_variables()) saver = tf.train.Saver(max_to_keep=10) summary_op = tf.summary.merge_all() # not logging histogram of variables because it will cause problem when only unimodal_train_op is called ######################################################################### # session iterator for session sampling feat_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) feat2_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) label_paths_ph = tf.placeholder(tf.string, shape=[None, cfg.sess_per_batch]) train_data = multimodal_session_generator(feat_paths_ph, feat2_paths_ph, label_paths_ph, sess_per_batch=cfg.sess_per_batch, num_threads=2, shuffled=False, preprocess_func=[model_emb.prepare_input, model_emb_sensors.prepare_input]) train_sess_iterator = train_data.make_initializable_iterator() next_train = train_sess_iterator.get_next() # prepare validation data val_sess = [] val_feats = [] val_feats2 = [] val_labels = [] val_boundaries = [] for session in val_set: session_id = os.path.basename(session[1]).split('_')[0] eve_batch, lab_batch, boundary = load_data_and_label(session[0], session[-1], model_emb.prepare_input_test) # use prepare_input_test for testing time val_feats.append(eve_batch) val_labels.append(lab_batch) val_sess.extend([session_id]*eve_batch.shape[0]) val_boundaries.extend(boundary) eve2_batch, _,_ = load_data_and_label(session[1], session[-1], utils.mean_pool_input) val_feats2.append(eve2_batch) val_feats = np.concatenate(val_feats, axis=0) val_feats2 = np.concatenate(val_feats2, axis=0) val_labels = np.concatenate(val_labels, axis=0) print ("Shape of val_feats: ", val_feats.shape) # generate metadata.tsv for visualize embedding with open(os.path.join(result_dir, 'metadata_val.tsv'), 'w') as fout: fout.write('id\tlabel\tsession_id\tstart\tend\n') for i in range(len(val_sess)): fout.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format(i, val_labels[i,0], val_sess[i], val_boundaries[i][0], val_boundaries[i][1])) ######################################################################### # Start running the graph if cfg.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) summary_writer = tf.summary.FileWriter(result_dir, sess.graph) with sess.as_default(): sess.run(tf.global_variables_initializer()) # load pretrain model, if needed if cfg.model_path: print ("Restoring pretrained model: %s" % cfg.model_path) saver.restore(sess, cfg.model_path) print ("Restoring sensors model: %s" % cfg.sensors_path) restore_saver_sensors.restore(sess, cfg.sensors_path) ################## Training loop ################## epoch = -1 while epoch < cfg.max_epochs-1: step = sess.run(global_step, feed_dict=None) epoch = step // batch_per_epoch # learning rate schedule, reference: "In defense of Triplet Loss" if epoch < cfg.static_epochs: learning_rate = cfg.learning_rate else: learning_rate = cfg.learning_rate * \ 0.001**((epoch-cfg.static_epochs)/(cfg.max_epochs-cfg.static_epochs)) # prepare data for this epoch random.shuffle(train_set) paths = list(zip(*[iter(train_set)]*cfg.sess_per_batch)) feat_paths = [[p[0] for p in path] for path in paths] feat2_paths = [[p[1] for p in path] for path in paths] label_paths = [[p[-1] for p in path] for path in paths] sess.run(train_sess_iterator.initializer, feed_dict={feat_paths_ph: feat_paths, feat2_paths_ph: feat2_paths, label_paths_ph: label_paths}) # for each epoch batch_count = 1 while True: try: ##################### Data loading ######################## start_time = time.time() eve, eve_sensors, lab, batch_sess = sess.run(next_train) load_time = time.time() - start_time ##################### Triplet selection ##################### start_time = time.time() # for labeled sessions, use facenet sampling eve_labeled = [] eve_sensors_labeled = [] lab_labeled = [] for i in range(eve.shape[0]): if batch_sess[i,0] in labeled_session: eve_labeled.append(eve[i]) eve_sensors_labeled.append(eve_sensors[i]) lab_labeled.append(lab[i]) if len(eve_labeled): # if labeled sessions exist eve_labeled = np.concatenate(eve_labeled, axis=0) eve_sensors_labeled = np.concatenate(eve_sensors_labeled, axis=0) lab_labeled = np.concatenate(lab_labeled, axis=0) # Get the embeddings of all events eve_embedding = np.zeros((eve_labeled.shape[0], cfg.emb_dim), dtype='float32') for start, end in zip(range(0, eve_labeled.shape[0], cfg.batch_size), range(cfg.batch_size, eve_labeled.shape[0]+cfg.batch_size, cfg.batch_size)): end = min(end, eve_labeled.shape[0]) emb = sess.run(embedding, feed_dict={input_ph: eve_labeled[start:end], dropout_ph: 1.0}) eve_embedding[start:end] = np.copy(emb) # sample triplets within sampled sessions triplet_input_idx, negative_count = utils.select_triplets_facenet(lab_labeled,eve_embedding,cfg.triplet_per_batch,cfg.alpha,num_negative=cfg.num_negative) if triplet_input_idx is None: continue triplet_input = eve_labeled[triplet_input_idx] sensors_input = eve_sensors_labeled[triplet_input_idx] if len(triplet_input.shape) > 5: # debugging pdb.set_trace() # for all sessions temp_num = (eve.shape[0] // 3) * 3 # for triplet shape all_triplet_input = eve[:temp_num] all_sensors_input = eve_sensors[:temp_num] select_time = time.time() - start_time ##################### Start training ######################## # supervised initialization if epoch < cfg.multimodal_epochs: err, metric_err, hal_err, _, step, summ = sess.run( [total_loss, metric_loss, hal_loss, train_op, global_step, summary_op], feed_dict = {input_ph: triplet_input, input_sensors_ph: sensors_input, dropout_ph: cfg.keep_prob, lr_ph: learning_rate, lambda_metric_ph: 1.0, # only metric learning lambda_hal_ph: 0.0}) else: # supervised training if labeled sessions available if len(eve_labeled): err, metric_err, hal_err, _, step, summ = sess.run( [total_loss, metric_loss, hal_loss, train_op, global_step, summary_op], feed_dict = {input_ph: triplet_input, input_sensors_ph: sensors_input, dropout_ph: cfg.keep_prob, lr_ph: learning_rate, lambda_metric_ph: 1.0, lambda_hal_ph: cfg.lambda_multimodal}) # unsupervised learning on all sessions if len(eve_labeled): sess.run(subtract_global_step_op) err, metric_err, hal_err, _, step, summ = sess.run( [total_loss, metric_loss, hal_loss, train_op, global_step, summary_op], feed_dict = {input_ph: all_triplet_input, input_sensors_ph: all_sensors_input, dropout_ph: cfg.keep_prob, lr_ph: learning_rate, lambda_metric_ph: 0.0, # no metric learning lambda_hal_ph: 1.0}) # only hal loss print ("%s\tEpoch: [%d][%d/%d]\tEvent num: %d\tTriplet num: %d\tLoad time: %.3f\tSelect time: %.3f\tMetric Loss %.4f\tHal Loss %.4f" % \ (cfg.name, epoch+1, batch_count, batch_per_epoch, eve.shape[0], triplet_input.shape[0]//3, load_time, select_time, metric_err, hal_err)) summary = tf.Summary(value=[tf.Summary.Value(tag="train_loss", simple_value=err), tf.Summary.Value(tag="negative_count", simple_value=negative_count), tf.Summary.Value(tag="metric_loss", simple_value=metric_err), tf.Summary.Value(tag="hallucination_loss", simple_value=hal_err)]) summary_writer.add_summary(summary, step) summary_writer.add_summary(summ, step) batch_count += 1 except tf.errors.OutOfRangeError: print ("Epoch %d done!" % (epoch+1)) break # validation on val_set print ("Evaluating on validation set...") val_embeddings, _ = sess.run([embedding, set_emb], feed_dict = {input_ph: val_feats, dropout_ph: 1.0}) mAP, mPrec = utils.evaluate_simple(val_embeddings, val_labels) summary = tf.Summary(value=[tf.Summary.Value(tag="Valiation mAP", simple_value=mAP), tf.Summary.Value(tag="Validation [email protected]", simple_value=mPrec)]) summary_writer.add_summary(summary, step) print ("Epoch: [%d]\tmAP: %.4f\tmPrec: %.4f" % (epoch+1,mAP,mPrec)) # config for embedding visualization config = projector.ProjectorConfig() visual_embedding = config.embeddings.add() visual_embedding.tensor_name = emb_var.name visual_embedding.metadata_path = os.path.join(result_dir, 'metadata_val.tsv') projector.visualize_embeddings(summary_writer, config) # save model saver.save(sess, os.path.join(result_dir, cfg.name+'.ckpt'), global_step=step)