def start_reid_model(): global model_pcb global model_dense if model_pcb == None: opt_reid = get_opt('pcb') model_pcb = prepare_model(opt_reid) if model_dense == None: opt_reid_dense = get_opt('dense') model_dense = prepare_model(opt_reid_dense) return jsonify({'success': True, 'modelLoaded': True})
def process_command_line_args(args): """ e.g. dump.py [-f | --from <log-folder|identifier>] [-n | --name <dump-filename>] :param args: """ try: # short-opts: "ha:i" means opt '-h' & '-i' don't take arg, '-a' does take arg # long-opts: ["help", "add="] means opt '--add' does take arg pairs, unknowns = utils.get_opt( args, "f:n:c:", longopts=["from=", "node=", "config="]) arg_root, arg_from, arg_epoch, arg_node, arg_to = None, None, None, None, None mandatory_args = [('-f', '--from')] optional_args = [('-n', '--node')] opts = [each_pair[0] for each_pair in pairs] for some_arg in mandatory_args: # if some_opt[2] is None: if some_arg[0] not in opts and some_arg[1] not in opts: raise ValueError("Argument '%s|%s' is mandatory." % some_arg) for opt, val in pairs: if opt in ('-f', '--from'): try: val = utils.literal_eval(val) except ValueError, e: pass except SyntaxError, e: pass
def process_command_line_args(args): """ e.g. dump.py [-p | --path <path-log-folders>] [-f | --from <folder-name-log|identifier>] [-e | --epoch <iepoch>] [-n | --name <filename-dump>] [-t | --to <folder-name-dump>] :return: """ try: # short-opts: "ha:i" means opt '-h' & '-i' don't take arg, '-a' does take arg # long-opts: ["help", "add="] means opt '--add' does take arg pairs, unknowns = utils.get_opt(args, "p:f:e:n:t:c:", longopts=[ "path=", "from=", "epoch=", "name=", "to=", "config=" ]) arg_root, arg_from, arg_epoch, arg_name, arg_to = None, None, None, None, None mandatory_args = [('-p', '--path'), ('-f', '--from'), ('-e', '--epoch'), ('-n', '--name'), ('-t', '--to')] optional_args = [('-c', '--config')] opts = [each_pair[0] for each_pair in pairs] for some_arg in mandatory_args: # if some_opt[2] is None: if some_arg[0] not in opts and some_arg[1] not in opts: raise ValueError("Argument '%s|%s' is mandatory." % some_arg) for opt, val in pairs: if opt in ('-p', '--path'): try: val = utils.literal_eval(val) except ValueError, e: pass except SyntaxError, e: pass
extract words set Raises: ValueError: if corpus_file is not specified. """ return WordExtract(corpus_file, common_words_file=common_words_file, min_candidate_len=min_candidate_len, max_candidate_len=max_candidate_len, least_cnt_threshold=least_cnt_threshold, solid_rate_threshold=solid_rate_threshold, entropy_threshold=entropy_threshold, all_words=all_words).extract(save_file=save_file) if __name__ == '__main__': config = get_opt() if config.verbose: level = logging.ERROR else: level = logging.INFO logging.basicConfig(level=level, format="%(asctime)s-%(filename)s-%(levelname)s: %(message)s") extractor = WordExtract( corpus_file=config.corpus_file, common_words_file=config.common_words_file, min_candidate_len=config.min_candidate_len, max_candidate_len=config.max_candidate_len, least_cnt_threshold=config.least_cnt_threshold, solid_rate_threshold=config.solid_rate_threshold, entropy_threshold=config.entropy_threshold, all_words=config.all_words) extractor.extract(config.save_file)
def main(args): subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir( log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) if not os.path.isdir( model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) np.random.seed(seed=args.seed) print('load data...') if args.dataset == 'webface': train_set = utils.get_dataset(args.data_dir) elif args.dataset == 'mega': train_set = utils.dataset_from_cache(args.data_dir) #train_set.extend(ic_train_set) print('Loaded dataset: {} persons'.format(len(train_set))) def _sample_people(x): '''We sample people based on tf.data, where we can use transform and prefetch. ''' scale = 1 if args.mine_method != 'simi_online' else args.scale image_paths, num_per_class = sample_people( train_set, args.people_per_batch * args.num_gpus * scale, args.images_per_person) labels = [] for i in range(len(num_per_class)): labels.extend([i] * num_per_class[i]) return (np.array(image_paths), np.array(labels, dtype=np.int32)) def _parse_function(filename, label): file_contents = tf.read_file(filename) image = tf.image.decode_image(file_contents, channels=3) #image = tf.image.decode_jpeg(file_contents, channels=3) if args.random_flip: image = tf.image.random_flip_left_right(image) #pylint: disable=no-member image.set_shape((args.image_size, args.image_size, 3)) image = tf.cast(image, tf.float32) image = tf.subtract(image, 127.5) image = tf.div(image, 128.) return image, label gpus = range(args.num_gpus) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) if args.pretrained_model: print('Pre-trained model: %s' % os.path.expanduser(args.pretrained_model)) with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False, name='global_step') # Placeholder for the learning rate learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') #the image is generated by sequence with tf.device("/cpu:0"): dataset = tf_data.Dataset.range(args.epoch_size * args.max_nrof_epochs * 100) #dataset.repeat(args.max_nrof_epochs) #sample people based map dataset = dataset.map(lambda x: tf.py_func(_sample_people, [x], [tf.string, tf.int32])) dataset = dataset.flat_map(_from_tensor_slices) dataset = dataset.map(_parse_function, num_parallel_calls=8) dataset = dataset.batch(args.num_gpus * args.people_per_batch * args.images_per_person) iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() batch_image_split = tf.split(next_element[0], args.num_gpus) batch_label = next_element[1] global trip_thresh trip_thresh = args.num_gpus * args.people_per_batch * args.images_per_person * 10 #learning_rate = tf.train.exponential_decay(args.learning_rate, global_step, learning_rate = tf.train.exponential_decay( learning_rate_placeholder, global_step, args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) opt = utils.get_opt(args.optimizer, learning_rate) tower_embeddings = [] tower_feats = [] for i in range(len(gpus)): with tf.device("/gpu:" + str(gpus[i])): with tf.name_scope("tower_" + str(gpus[i])) as scope: with slim.arg_scope([slim.model_variable, slim.variable], device="/cpu:0"): # Build the inference graph with tf.variable_scope( tf.get_variable_scope()) as var_scope: reuse = False if i == 0 else True if args.network == 'resnet_v2': with slim.arg_scope( resnet_v2.resnet_arg_scope( args.weight_decay)): #prelogits, end_points = resnet_v1.resnet_v1_50(batch_image_split[i], is_training=phase_train_placeholder, output_stride=16, num_classes=args.embedding_size, reuse=reuse) prelogits, end_points = resnet_v2.resnet_v2_50( batch_image_split[i], is_training=True, output_stride=16, num_classes=args.embedding_size, reuse=reuse) prelogits = tf.squeeze( prelogits, [1, 2], name='SpatialSqueeze') elif args.network == 'resface': prelogits, end_points = resface.inference( batch_image_split[i], 1.0, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay, reuse=reuse) print('res face prelogits', prelogits) elif args.network == 'mobilenet': prelogits, net_points = mobilenet.inference( batch_image_split[i], bottleneck_layer_size=args.embedding_size, phase_train=True, weight_decay=args.weight_decay, reuse=reuse) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') tf.get_variable_scope().reuse_variables() tower_embeddings.append(embeddings) embeddings_gather = tf.concat(tower_embeddings, axis=0, name='embeddings_concat') # select triplet pair by tf op with tf.name_scope('triplet_part'): embeddings_norm = tf.nn.l2_normalize(embeddings_gather, axis=1) distances = utils._pairwise_distances(embeddings_norm, squared=True) if args.strategy == 'min_and_min': pair = tf.py_func(select_triplets_min_min, [distances, batch_label, args.alpha], tf.int64) elif args.strategy == 'min_and_max': pair = tf.py_func(select_triplets_min_max, [distances, batch_label, args.alpha], tf.int64) elif args.strategy == 'hardest': pair = tf.py_func(select_triplets_hardest, [distances, batch_label, args.alpha], tf.int64) elif args.strategy == 'batch_random': pair = tf.py_func(select_triplets_batch_random, [distances, batch_label, args.alpha], tf.int64) elif args.strategy == 'batch_all': pair = tf.py_func(select_triplets_batch_all, [distances, batch_label, args.alpha], tf.int64) else: raise ValueError('Not supported strategy {}'.format( args.strategy)) triplet_handle = {} triplet_handle['embeddings'] = embeddings_gather triplet_handle['labels'] = batch_label triplet_handle['pair'] = pair if args.mine_method == 'online': pair_reshape = tf.reshape(pair, [-1]) embeddings_gather = tf.gather(embeddings_gather, pair_reshape) anchor, positive, negative = tf.unstack( tf.reshape(embeddings_gather, [-1, 3, args.embedding_size]), 3, 1) triplet_loss, pos_d, neg_d = utils.triplet_loss( anchor, positive, negative, args.alpha) # Calculate the total losses regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) triplet_loss = tf.add_n([triplet_loss]) total_loss = triplet_loss + tf.add_n(regularization_losses) #total_loss = tf.add_n(regularization_losses) losses = {} losses['triplet_loss'] = triplet_loss losses['total_loss'] = total_loss update_vars = tf.trainable_variables() with tf.device("/gpu:" + str(gpus[0])): grads = opt.compute_gradients(total_loss, update_vars, colocate_gradients_with_ops=True) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) #update_ops = [op for op in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if 'pair_part' in op.name] update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) print('update ops', update_ops) with tf.control_dependencies(update_ops): train_op_dep = tf.group(apply_gradient_op) train_op = tf.cond(tf.is_nan(triplet_loss), lambda: tf.no_op('no_train'), lambda: train_op_dep) save_vars = [ var for var in tf.global_variables() if 'Adagrad' not in var.name and 'global_step' not in var.name ] restore_vars = [ var for var in tf.global_variables() if 'Adagrad' not in var.name and 'global_step' not in var.name and 'pair_part' not in var.name ] saver = tf.train.Saver(save_vars, max_to_keep=3) restorer = tf.train.Saver(restore_vars, max_to_keep=3) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Start running operations on the Graph. gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)) # Initialize variables sess.run(tf.global_variables_initializer(), feed_dict={phase_train_placeholder: True}) sess.run(tf.local_variables_initializer(), feed_dict={phase_train_placeholder: True}) sess.run(iterator.initializer) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) forward_embeddings = [] with sess.as_default(): if args.pretrained_model: print('Restoring pretrained model: %s' % args.pretrained_model) saver.restore(sess, os.path.expanduser(args.pretrained_model)) # Training and validation loop epoch = 0 while epoch < args.max_nrof_epochs: step = sess.run(global_step, feed_dict=None) epoch = step // args.epoch_size # Train for one epoch if args.mine_method == 'simi_online': train_simi_online(args, sess, epoch, len(gpus), embeddings_gather, batch_label, next_element[0], batch_image_split, learning_rate_placeholder, learning_rate, phase_train_placeholder, global_step, pos_d, neg_d, triplet_handle, losses, train_op, summary_op, summary_writer, args.learning_rate_schedule_file) elif args.mine_method == 'online': train_online(args, sess, epoch, learning_rate, phase_train_placeholder, global_step, losses, train_op, summary_op, summary_writer, args.learning_rate_schedule_file) else: raise ValueError('Not supported mini method {}'.format( args.mine_method)) # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) return model_dir
def run(opt_path, n_jobs=16, N_STEP=16, N_BATCH=8, N_REPEAT=1, run_cuml=False, quick_check=False, data_loaders=DATA_LOADERS, model_names=MODEL_NAMES, must_have_tag=None): start = time() in_path, out_path, name = get_paths_and_run_name() opt_root, opt = get_opt(opt_path) cmd = f"bayesmark-init -dir {out_path} -b {name}" run_cmd(cmd) copy_baseline(in_path, out_path, name, opt, N_STEP, N_BATCH, run_cuml) cmds = [] if quick_check: data_loaders = {'boston': (2, 2)} if run_cuml: model_names = ['xgb-cuml'] #['MLP-sgd-cuml'] else: model_names = ['xgb'] #['MLP-adam'] if run_cuml: model_names = [i for i in model_names if i.endswith('-cuml') ] # and 'MLP' not in i and 'xgb' not in i] if must_have_tag is not None: if isinstance(must_have_tag, list): model_names = [i for i in model_names if isin(i, must_have_tag)] else: model_names = [i for i in model_names if must_have_tag in i] print(model_names) for data in data_loaders: metrics = ['nll', 'acc' ] if data_loaders[data][1] == 1 else ['mse', 'mae'] for metric in metrics: for model in model_names: for _ in range(N_REPEAT): if run_cuml == False and '-cuml' in model: continue if run_cuml and model in no_multi_class_cuml and data in multi_class_data: continue if run_cuml and model == 'SVM-cuml' and data_loaders[data][ 1] == 1: continue cmd = f"bayesmark-launch -dir {out_path} -b {name} -n {N_STEP} -r 1 -p {N_BATCH} -o {opt} --opt-root {opt_root} -v -c {model} -d {data} -m {metric} -dr ./more_data&" cmds.append(cmd) N = len(cmds) cmds = run_cmds(cmds, min(n_jobs, N)) last = 0 while True: done, n = check_complete(N, out_path, name) sofar = time() - start print( f"{sofar:.1f} seconds passed, {N - len(cmds)} tasks launched, {n} out of {N} tasks finished ..." ) if done: break sleep(3) if last < n: lc = len(cmds) cmds = run_cmds(cmds, min(n - last, lc)) last = n cmd = f"bayesmark-agg -dir {out_path} -b {name}" run_cmd(cmd) cmd = f"bayesmark-anal -dir {out_path} -b {name} -v" run_cmd(cmd) duration = time() - start print(f"All done!! {name} Total time: {duration:.1f} seconds") return name, duration
def train(args): assert os.path.exists(args.cfg) with open(args.cfg, 'r') as f: cfg = yaml.load(f, Loader=yaml.FullLoader) cfg = dict2namespace(cfg) set_random_seed(getattr(cfg.trainer, "seed", 666)) os.makedirs(cfg.log.save_dir, exist_ok=True) if USE_WANDB: setup_wandb(cfg) logger = get_logger(logpath=os.path.join(cfg.log.save_dir, 'logs'), filepath=os.path.abspath(__file__)) logger.info(args.cfg) # sigmas if hasattr(cfg.trainer, "sigmas"): np_sigmas = cfg.trainer.sigmas else: sigma_begin = float(cfg.trainer.sigma_begin) sigma_end = float(cfg.trainer.sigma_end) num_classes = int(cfg.trainer.sigma_num) np_sigmas = np.exp( np.linspace(np.log(sigma_begin), np.log(sigma_end), num_classes)) sigmas = torch.tensor(np.array(np_sigmas)).float().to(device).view(-1, 1) sigmas = sigmas[-1:] #TODO: Just with one sigma for now! if USE_WANDB: wandb.config.sigma = sigmas.item() if cfg.models.scorenet.type == 'small_mlp': score_net = SmallMLP(in_dim=3) else: score_net = Scorenet() print(score_net) if cfg.models.criticnet.type == 'small_mlp': critic_net = SmallMLP(in_dim=3) else: critic_net = Criticnet() print(critic_net) critic_net.to(device) score_net.to(device) opt_scorenet, scheduler_scorenet = get_opt(score_net.parameters(), cfg.trainer.opt_scorenet) opt_criticnet, scheduler_criticnet = get_opt(critic_net.parameters(), cfg.trainer.opt_scorenet) itr = 0 data_lib = importlib.import_module(cfg.data.type) loaders = data_lib.get_data_loaders(cfg.data, args) train_loader = loaders['train_loader'] test_loader = loaders['test_loader'] for epoch in range(cfg.trainer.epochs): for data in train_loader: score_net.train() critic_net.train() opt_scorenet.zero_grad() opt_criticnet.zero_grad() tr_pts = data['tr_points'].to(device) tr_pts.requires_grad_() batch_size = tr_pts.size(0) # Randomly sample sigma labels = torch.randint(0, len(sigmas), (batch_size, ), device=tr_pts.device) used_sigmas = sigmas[labels].float() perturbed_points = tr_pts + torch.randn_like( tr_pts) * used_sigmas.view(batch_size, 1, 1) score_pred = score_net(perturbed_points, used_sigmas) critic_output = critic_net(perturbed_points, used_sigmas) t1 = (score_pred * critic_output).sum(-1) t2 = exact_jacobian_trace(critic_output, perturbed_points) stein = t1 + t2 l2_penalty = (critic_output * critic_output).sum(-1).mean() loss = stein.mean() cycle_iter = itr % (cfg.trainer.c_iters + cfg.trainer.s_iters) cpu_loss = loss.detach().cpu().item() cpu_t1 = t1.mean().detach().cpu().item() cpu_t2 = t2.mean().detach().cpu().item() if USE_WANDB: wandb.log({'epoch': epoch, 'loss_term1': cpu_t1, 'loss_term2': cpu_t2, \ 'loss': cpu_loss,'itaration': itr}) if cycle_iter < cfg.trainer.c_iters: (-loss + l2_penalty).backward() opt_criticnet.step() log_message = "Epoch %d itr %d (critic), Loss=%2.5f t1=%2.5f t2=%2.5f" % ( epoch, itr, cpu_loss, cpu_t1, cpu_t2) else: loss.backward() opt_scorenet.step() log_message = "Epoch %d itr %d (score), Loss=%2.5f t1=%2.5f t2=%2.5f" % ( epoch, itr, cpu_loss, cpu_t1, cpu_t2) if itr % cfg.log.log_freq == 0: logger.info(log_message) if itr % cfg.log.save_freq == 0: score_net.cpu() torch.save( { 'args': args, 'state_dict': score_net.state_dict(), }, os.path.join(cfg.log.save_dir, 'checkpt.pth')) score_net.to(device) if itr % cfg.log.viz_freq == 0: plt.clf() pt_cl, _ = langevin_dynamics(score_net, sigmas, eps=1e-4, num_steps=cfg.inference.num_steps) fig, ax = visualize(pt_cl, return_fig=True) if USE_WANDB: wandb.log({"langevin_dynamics": wandb.Image(ax)}) fig_filename = os.path.join(cfg.log.save_dir, 'figs', '{:04d}.png'.format(itr)) os.makedirs(os.path.dirname(fig_filename), exist_ok=True) plt.savefig(fig_filename) itr += 1
def train(args): assert os.path.exists(args.cfg) with open(args.cfg, 'r') as f: cfg = yaml.load(f, Loader=yaml.FullLoader) cfg = dict2namespace(cfg) os.makedirs(cfg.log.save_dir, exist_ok=True) logger = get_logger(logpath=os.path.join(cfg.log.save_dir, 'logs'), filepath=os.path.abspath(__file__)) logger.info(args.cfg) # sigmas if hasattr(cfg.trainer, "sigmas"): np_sigmas = cfg.trainer.sigmas else: sigma_begin = float(cfg.trainer.sigma_begin) sigma_end = float(cfg.trainer.sigma_end) num_classes = int(cfg.trainer.sigma_num) np_sigmas = np.exp(np.linspace(np.log(sigma_begin), np.log(sigma_end), num_classes)) sigmas = torch.tensor(np.array(np_sigmas)).float().to(device).view(-1, 1) score_net = Scorenet(in_dim=2) critic_net = Criticnet(in_dim=2) critic_net.to(device) score_net.to(device) opt_scorenet, scheduler_scorenet = get_opt(score_net.parameters(), cfg.trainer.opt_scorenet) opt_criticnet, scheduler_criticnet = get_opt(critic_net.parameters(), cfg.trainer.opt_scorenet) itr = 0 for epoch in range(cfg.trainer.epochs): tr_pts = sample_data('pinwheel', 2048).view(1, -1, 2) score_net.train() critic_net.train() opt_scorenet.zero_grad() opt_criticnet.zero_grad() #tr_pts = data.to(device) #tr_pts = tr_pts.view(1, -1, 2) tr_pts.requires_grad_() batch_size = tr_pts.size(0) # Randomly sample sigma labels = torch.randint(0, len(sigmas), (batch_size,), device=tr_pts.device) used_sigmas = sigmas[labels].float() perturbed_points = tr_pts + torch.randn_like(tr_pts) * used_sigmas.view(batch_size, 1, 1) score_pred = score_net(perturbed_points, used_sigmas) critic_output = critic_net(perturbed_points, used_sigmas) t1 = (score_pred * critic_output).sum(-1) t2 = exact_jacobian_trace(critic_output, perturbed_points) stein = t1 + t2 l2_penalty = (critic_output * critic_output).sum(-1).mean() loss = stein.mean() cycle_iter = itr % (cfg.trainer.c_iters + cfg.trainer.s_iters) cpu_loss = loss.detach().cpu().item() cpu_t1 = t1.mean().detach().cpu().item() cpu_t2 = t2.mean().detach().cpu().item() if cycle_iter < cfg.trainer.c_iters: (-loss + l2_penalty).backward() opt_criticnet.step() log_message = "Epoch %d itr %d (critic), Loss=%2.5f t1=%2.5f t2=%2.5f" % (epoch, itr, cpu_loss, cpu_t1, cpu_t2) else: loss.backward() opt_scorenet.step() log_message = "Epoch %d itr %d (score), Loss=%2.5f t1=%2.5f t2=%2.5f" % (epoch, itr, cpu_loss, cpu_t1, cpu_t2) logger.info(log_message) if itr % cfg.log.save_freq == 0: score_net.cpu() torch.save({ 'args': args, 'state_dict': score_net.state_dict(), }, os.path.join(cfg.log.save_dir, 'checkpt.pth')) score_net.to(device) if itr % cfg.log.viz_freq == 0: plt.clf() pt_cl, _ = langevin_dynamics(score_net, sigmas, dim=2, eps=1e-4, num_steps=cfg.inference.num_steps) visualize_2d(pt_cl) fig_filename = os.path.join(cfg.log.save_dir, 'figs', '{:04d}.png'.format(itr)) os.makedirs(os.path.dirname(fig_filename), exist_ok=True) plt.savefig(fig_filename) itr += 1