def load_config(args): """ Load the config .yml file. """ if args.cfg is None: raise Exception("No config file specified.") cfg_from_file(args.cfg) now = datetime.datetime.now(dateutil.tz.tzlocal()) timestamp = now.strftime("%Y_%m_%d_%H_%M_%S") print("timestamp: {}".format(timestamp)) cfg.TIMESTAMP = timestamp cfg.INPUT_DIR = args.dataset_dir cfg.METADATA_FILENAME = args.metadata_filename cfg.OUTPUT_DIR = os.path.join( args.results_dir, "%s_%s_%s" % (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp), ) mkdir_p(cfg.OUTPUT_DIR) copyfile(args.cfg, os.path.join(cfg.OUTPUT_DIR, "config.yml")) print("Data dir: {}".format(cfg.INPUT_DIR)) print("Output dir: {}".format(cfg.OUTPUT_DIR)) print("Using config:") pprint.pprint(cfg)
def load_config(): ''' Load the config .yml file. ''' args = parse_args() if args.cfg is None: raise Exception("No config file specified.") cfg_from_file(args.cfg) now = datetime.datetime.now(dateutil.tz.tzlocal()) timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') print('timestamp: {}'.format(timestamp)) cfg.TIMESTAMP = timestamp cfg.INPUT_DIR = args.dataset_dir cfg.METADATA_FILENAME = args.metadata_filename cfg.OUTPUT_DIR = os.path.join(args.results_dir, '%s_%s' % (cfg.DATASET_NAME, timestamp)) mkdir_p(cfg.OUTPUT_DIR) copyfile(args.cfg, os.path.join(cfg.OUTPUT_DIR, 'config.yml')) print('Data dir: {}'.format(cfg.INPUT_DIR)) print('Output dir: {}'.format(cfg.OUTPUT_DIR)) print('Using config:') pprint.pprint(cfg)
def load_config(): ''' Load the config .yml file. ''' args = parse_args() # If a previous configuration is available in the results directory, load. previous_config = os.path.join(args.results_dir, 'config.yml') if os.path.isfile(previous_config): args.cfg = previous_config # If we don't specify a config and no previous config is available. if args.cfg is None: raise Exception("No config file specified or available.") # Load configuration into memory. cfg_from_file(args.cfg) cfg.TIMESTAMP = TIMESTAMP cfg.INPUT_DIR = args.dataset_dir cfg.METADATA_FILENAME = args.metadata_filename cfg.OUTPUT_DIR = args.results_dir if args.cfg != previous_config: mkdir_p(cfg.OUTPUT_DIR) copyfile(args.cfg, os.path.join(cfg.OUTPUT_DIR, 'config.yml')) print('Data dir: {}'.format(cfg.INPUT_DIR)) print('Output dir: {}'.format(cfg.OUTPUT_DIR)) print('Using config {}:'.format(args.cfg)) pprint.pprint(cfg)
def run_SFC(seq, rp, bSaveImage, sess, tracker): tic = time.clock() # sorted_filenames = [osp.join(seq.path, f) for f in sorted(os.listdir(seq.path))] # sorted_filenames = sorted_filenames[seq.startFrame - 1: seq.endFrame] sorted_filenames = seq.s_frames raw_bb = seq.init_rect x, y, width, height = raw_bb # OTB format init_bb = Rectangle( x - 1, y - 1, width, height) # x, y minus one since python start index with zero handle = Sequence(sorted_filenames, init_bb) video_name = sorted_filenames[0].split(osp.sep)[-3] video_log_dir = '/tmp/OTB/tmp' mkdir_p(video_log_dir) tracker.track(sess, handle, video_log_dir) trajectory_py = handle.quit() trajectory = [ Rectangle(val.x + 1, val.y + 1, val.width, val.height) for val in trajectory_py ] # x, y add one to match OTB format duration = time.clock() - tic result = dict() result['res'] = trajectory result['type'] = 'rect' result['fps'] = round(seq.len / duration, 3) return result
def load_config(args): """ Load the config .yml file. """ if args.cfg is None: raise Exception("No config file specified.") cfg = cfg_from_file(args.cfg) now = datetime.datetime.now(dateutil.tz.tzlocal()) timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') cfg.TIMESTAMP = timestamp cfg.INPUT_DIR = args.dataset_dir cfg.CHECKPOINT_DIR = args.checkpoint_dir cfg.METADATA_FILENAME = args.metadata_filename cfg.OUTPUT_DIR = os.path.join( args.results_dir, '%s_%s_%s' % (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp)) mkdir_p(cfg.OUTPUT_DIR) copyfile(args.cfg, os.path.join(cfg.OUTPUT_DIR, 'config.yml')) return cfg
def write_video(frames, outfpath): # Following instructions from # https://github.com/ContinuumIO/anaconda-issues/issues/223#issuecomment-285523938 # noQA # Define the codec and create VideoWriter object misc.mkdir_p(osp.dirname(outfpath)) if 0: # Tried writing video with opencv, clearly didn't work fourcc = cv2.VideoWriter_fourcc(chr(ord('X')), chr(ord('V')), chr(ord('I')), chr(ord('D'))) out = cv2.VideoWriter(outfpath, fourcc, 20.0, frames.shape[-3:-1][::-1]) for i in range(frames.shape[0]): out.write(frames[i].astype(np.uint8)) out.release() logger.warning('Writen to %s', outfpath) else: frame_dir = outfpath + '_frames' misc.mkdir_p(frame_dir) for i in range(frames.shape[0]): cv2.imwrite(osp.join(frame_dir, '{0:06d}.jpg'.format(i)), frames[i].astype(np.uint8)) # convert to video and delete the frames misc.run_cmd( 'ffmpeg -loglevel panic -i {0}/%06d.jpg -crf {2} {1}'.format( frame_dir, outfpath, 24 / cfg.TEST.SAMPLE_RATE)) misc.run_cmd('rm -r {}'.format(frame_dir))
def main(): if not osp.isdir(args.checkpoint): mkdir_p(args.checkpoint) if torch.cuda.is_available(): device = torch.device('cuda:0') else: device = torch.device('cpu') L = Lifter().to(device) D = Discriminator().to(device) T = Discriminator().to(device) optim_L = optim.Adam(L.parameters(), lr=args.lift_lr) optim_D = optim.Adam(D.parameters(), lr=args.disc_lr) optim_T = optim.Adam(T.parameters(), lr=args.disc_lr) # use 2D results from Stack Hourglass Net train_loader = data.DataLoader( H36M(length=args.length, action='all', is_train=True, use_sh_detection=True), batch_size=1024, shuffle=True, pin_memory=True, ) test_loader = data.DataLoader( H36M(length=1, action='all', is_train=False, use_sh_detection=True), batch_size=512, shuffle=False, ) # Logger logger = Logger(osp.join(args.checkpoint, 'log.txt'), title='Human3.6M') logger_err = Logger(osp.join(args.checkpoint, 'log_err.txt'), title='Human3.6M MPJPE err') logger.set_names(['2d_loss ', '3d_loss ', 'adv_loss ', 'temporal_loss ']) logger_err.set_names(['err']) for epoch in range(args.epoches): print('\nEpoch: [%d / %d]' % (epoch+1, args.epoches)) loss_2d, loss_3d, loss_adv, loss_t = train(train_loader, L, D, T, optim_L, optim_D, optim_T, epoch+1, device, args) logger.append([loss_2d, loss_3d, loss_adv, loss_t]) if (epoch + 1) % args.checkpoint_save_interval == 0: save_checkpoint({ 'epoch': epoch + 1, 'state_dict_L': L.state_dict(), 'state_dict_D': D.state_dict(), 'state_dict_T': T.state_dict(), }, checkpoint=args.checkpoint) if (epoch + 1) % args.eval_interval == 0: ttl_err = test(test_loader, L, epoch, device, args) logger_err.append([ttl_err]) logger.close() logger_err.close()
def main(): if len(args.gpu_id.split(',')) == 1: local_rank = int(args.gpu_id.split(',')[0]) else: local_rank = -1 args.local_rank = local_rank num_gpus = len(args.gpu_id.split(',')) multi_gpu_testing = True if num_gpus > 1 else False if args.cfg_file is not None: merge_cfg_from_file(args.cfg_file) if args.opts is not None: merge_cfg_from_list(args.opts) if not os.path.isdir(os.path.join(cfg.CKPT, 'test')): mkdir_p(os.path.join(cfg.CKPT, 'test')) if cfg.VIS.ENABLED: if not os.path.exists(os.path.join(cfg.CKPT, 'vis')): mkdir_p(os.path.join(cfg.CKPT, 'vis')) assert_and_infer_cfg(make_immutable=False) args.test_net_file, _ = os.path.splitext(__file__) run_inference(args, ind_range=args.range, multi_gpu_testing=multi_gpu_testing)
def main(checkpoint, input_files): os.environ['CUDA_VISIBLE_DEVICES'] = '1' model_config, _, track_config = load_cfgs(checkpoint) track_config['log_level'] = 1 g = tf.Graph() with g.as_default(): model = inference_wrapper.InferenceWrapper() restore_fn = model.build_graph_from_config(model_config, track_config, checkpoint) g.finalize() if not osp.isdir(track_config['log_dir']): logging.info('Creating inference directory: %s', track_config['log_dir']) mkdir_p(track_config['log_dir']) video_dirs = [] for file_pattern in input_files.split(","): video_dirs.extend(glob(file_pattern)) logging.info("Running tracking on %d videos matching %s", len(video_dirs), input_files) gpu_options = tf.GPUOptions(allow_growth=True) sess_config = tf.ConfigProto(gpu_options=gpu_options) with tf.Session(graph=g, config=sess_config) as sess: restore_fn(sess) tracker = Tracker(model, model_config=model_config, track_config=track_config) for video_dir in video_dirs: if not osp.isdir(video_dir): logging.warning( '{} is not a directory, skipping...'.format(video_dir)) continue video_name = osp.basename(video_dir) video_log_dir = osp.join(track_config['log_dir'], video_name) mkdir_p(video_log_dir) filenames = sort_nicely(glob(video_dir + '/img/*.jpg')) first_line = open(video_dir + '/groundtruth_rect.txt').readline() bb = [int(v) for v in first_line.strip().split(',')] init_bb = Rectangle(bb[0] - 1, bb[1] - 1, bb[2], bb[3]) # 0-index in python trajectory = tracker.track(sess, init_bb, filenames, video_log_dir) with open(osp.join(video_log_dir, 'track_rect.txt'), 'w') as f: for region in trajectory: rect_str = '{},{},{},{}\n'.format(region.x + 1, region.y + 1, region.width, region.height) f.write(rect_str)
def test(test_loader, L, epoch, device, args): L.eval() if not osp.isdir(args.eval_dir): mkdir_p(args.eval_dir) all_dist = [] for batch_idx, (xy, X, ls) in enumerate(test_loader): bs = xy.shape[0] xy = xy.squeeze(1) # (BS,17*2) xy = xy.to(device) z_pred = L(xy) # (bs,17) pose_3d_t = torch.cat(( xy[:,0::2,None], \ xy[:,1::2,None], \ z_pred[:,:,None]), dim=2) pose_3d_t = pose_3d_t.view(-1, 17*3).cpu().detach().numpy() X = X.squeeze(1).numpy() ls = ls.squeeze(1).numpy() pose_3d_t[:,0::3] = pose_3d_t[:,0::3] - pose_3d_t[:,0][:,None] pose_3d_t[:,1::3] = pose_3d_t[:,1::3] - pose_3d_t[:,1][:,None] pose_3d_t[:,2::3] = pose_3d_t[:,2::3] - pose_3d_t[:,2][:,None] if batch_idx == 0: vis_3d_skeleton(pose_3d_t[0].reshape(17,3), np.ones((17,1)), epoch=epoch+1) # use Protocal-1 # for ba in range(bs): # gt = X[ba].reshape(-1,3) # out = pose_3d_t[ba].reshape(-1,3) # _, Z, T, b, c = get_transformation(gt,out,True) # out = (b*out.dot(T)) + c # pose_3d_t[ba, :] = out.reshape(51) sqerr = (pose_3d_t - X)**2 distance = np.zeros((bs, 17)) dist_idx = 0 for k in range(0, 17*3, 3): distance[:, dist_idx] = np.sqrt(np.sum(sqerr[:,k:k+3],axis=1))*ls dist_idx += 1 all_dist.append(distance) all_dist = np.vstack(all_dist) ttl_err = np.mean(all_dist) return ttl_err
def main(): if not os.path.isdir(cfg.CKPT): mkdir_p(cfg.CKPT) if args.cfg_file is not None: shutil.copyfile(args.cfg_file, os.path.join(cfg.CKPT, args.cfg_file.split('/')[-1])) assert_and_infer_cfg(make_immutable=False) # Create model model = Generalized_RCNN() logging_rank(model, distributed=args.distributed, local_rank=args.local_rank) # Create checkpointer checkpointer = CheckPointer(cfg.CKPT, weights_path=cfg.TRAIN.WEIGHTS, auto_resume=cfg.TRAIN.AUTO_RESUME, local_rank=args.local_rank) # Load model or random-initialization model = checkpointer.load_model(model, convert_conv1=cfg.MODEL.CONV1_RGB2BGR) if cfg.MODEL.BATCH_NORM == 'freeze': model = convert_bn2affine_model(model, merge=not checkpointer.resume) elif cfg.MODEL.BATCH_NORM == 'sync': model = convert_bn2syncbn_model(model) model.to(args.device) # Create optimizer optimizer = Optimizer(model, cfg.SOLVER, local_rank=args.local_rank).build() optimizer = checkpointer.load_optimizer(optimizer) logging_rank('The mismatch keys: {}'.format(mismatch_params_filter(sorted(checkpointer.mismatch_keys))), distributed=args.distributed, local_rank=args.local_rank) # Create scheduler scheduler = LearningRateScheduler(optimizer, cfg.SOLVER, start_iter=0, local_rank=args.local_rank) scheduler = checkpointer.load_scheduler(scheduler) # Create training dataset and loader datasets = build_dataset(cfg.TRAIN.DATASETS, is_train=True, local_rank=args.local_rank) train_loader = make_train_data_loader(datasets, is_distributed=args.distributed, start_iter=scheduler.iteration) # Model Distributed if args.distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank, ) else: model = torch.nn.DataParallel(model) # Build hooks all_hooks = build_train_hooks(cfg, optimizer, scheduler, max_iter=cfg.SOLVER.MAX_ITER, warmup_iter=cfg.SOLVER.WARM_UP_ITERS, ignore_warmup_time=False) # Train train(model, train_loader, optimizer, scheduler, checkpointer, all_hooks)
def save(self, model, epoch, cfg): """ Save the model and the config :param model: The PyTorch model :param epoch: The current epoch :param cfg: The training configuration of the model """ mkdir_p(self.checkpoint_dir) checkpoint_path = os.path.join(self.checkpoint_dir, "checkpoint_epoch{}.pth".format(epoch)) torch.save(model, checkpoint_path) # Augment the current config file with useful parameters for resuming training # Save current epoch in train and train_extra because we do not know which will be used cfg.TRAIN.CURRENT_EPOCH = epoch config_path = os.path.join(self.checkpoint_dir, "checkpoint_epoch{}.yml".format(epoch)) with open(config_path, 'w') as config_file: yaml.dump(cfg, config_file, default_flow_style=False) print("Checkpointing new model ...")
def fitness(learning_rate, num_dense_layers, dropout, Weigth_Decay): ''' Create and run model with a specified hyperparameter setting. Used for the hyperparameter optimization Parameters ---------- learning_rate: float The learning rate num_dense_layers: int Number of fully connected layer dropout: float Amount of Dropout weigth_decay: float Amount of weight decay ''' # Print the hyper-parameters. print("............................") print('learning rate: {0:.1e}'.format(learning_rate)) print('num_dense_layers:', num_dense_layers) print('Dropout:', dropout) print('Weight Decay:', Weigth_Decay) print() # Create the neural network with these hyper-parameters. model = ConvModel(num_dense_layers=num_dense_layers, dropout=dropout) # Dir-name for the TensorBoard log-files. log_dir = log_dir_name(learning_rate, num_dense_layers, dropout, Weigth_Decay) output_dir = cfg.OUTPUT_DIR + "/" + log_dir # Create the directory mkdir_p(output_dir) #Create the summaryWriter for Tensorboard writer = SummaryWriter(output_dir.replace("checkpoint", "logs")) # Train the model. best_model, accuracy = train_model(model, train_loader=train_loader, valid_loader=valid_loader, device=device, writer=writer, num_epochs=cfg.TRAIN.NUM_EPOCHS, lr=learning_rate, weight_decay=Weigth_Decay, output_dir=output_dir) # Save the model if it improves on the best-found performance. # We use the global keyword so we update the variable outside # of this function. global best_accuracy # If the classification accuracy of the saved model is improved ... if accuracy > best_accuracy: print("Updating best Model") # Save the new model to harddisk. torch.save(best_model, path_best_model) # Update the best classification accuracy. best_accuracy = accuracy # Delete the model with these hyper-parameters from memory. del model # NOTE: Scikit-optimize does minimization so it tries to # find a set of hyper-parameters with the LOWEST fitness-value. # Because we are interested in the HIGHEST classification # accuracy, we need to negate this number so it can be minimized. return -accuracy
def process_split(root_dir, save_dir, split, subdir='', ): data_dir = osp.join(root_dir, 'Data', 'VID', split) anno_dir = osp.join(root_dir, 'Annotations', 'VID', split, subdir) video_names = os.listdir(anno_dir) for idx, video in enumerate(video_names): print('{split}-{subdir} ({idx}/{total}): Processing {video}...'.format(split=split, subdir=subdir, idx=idx, total=len(video_names), video=video)) video_path = osp.join(anno_dir, video) xml_files = glob(osp.join(video_path, '*.xml')) for xml in xml_files: tree = ET.parse(xml) root = tree.getroot() folder = root.find('folder').text filename = root.find('filename').text # Read image img_file = osp.join(data_dir, folder, filename + '.JPEG') img = None # Get all object bounding boxes bboxs = [] for object in root.iter('object'): bbox = object.find('bndbox') xmax = float(bbox.find('xmax').text) xmin = float(bbox.find('xmin').text) ymax = float(bbox.find('ymax').text) ymin = float(bbox.find('ymin').text) width = xmax - xmin + 1 height = ymax - ymin + 1 bboxs.append([xmin, ymin, width, height]) for idx, object in enumerate(root.iter('object')): id = object.find('trackid').text class_name = object.find('name').text track_save_dir = get_track_save_directory(save_dir, 'train', subdir, video) mkdir_p(track_save_dir) annotation_save_dir = get_annotation_save_directory(save_dir, 'train', subdir, video) mkdir_p(annotation_save_dir) name= '{}.{:02d}.crop.x.jpg'.format(filename, int(id)) savename = osp.join(track_save_dir,name) if osp.isfile(savename): continue # skip existing images annotationname = osp.join(annotation_save_dir, name)#####where xml save if img is None: img = imread(img_file) # Get crop offsetx=random.randint(-30,30) offsety=random.randint(-30,30) target_box = convert_bbox(Rectangle(*bboxs[idx]), 'center-based',offsetx,offsety) crop, _ = get_crops(img, target_box, size_z=127, size_x=255, context_amount=0.5, ) imwrite(savename, crop, [int(cv2.IMWRITE_JPEG_QUALITY), 90]) write_xml(annotationname,savename,offsetx,offsety)#where annotation save,image where,offset off center
from models.model import Lifter, Discriminator from datasets.H36M import H36M parser = argparse.ArgumentParser(description='PyTorch Evaluation Human3.6M') parser.add_argument('--model_path', default='/home/lyuheng/vision/unsupervised_3d_pose_lift/checkpoints/checkpoint_50000.pth.tar', help='model path') parser.add_argument('--demo_dir', default='/home/lyuheng/vision/unsupervised_3d_pose_lift/demo', help='demo path') args = parser.parse_args() if not osp.isdir(args.demo_dir): mkdir_p(args.demo_dir) with open('./data/sh_detect_2d.pkl', 'rb') as f: p2d_sh = pkl.load(f) L = Lifter() def load_model(model, path): state = torch.load(path) model.load_state_dict(state['state_dict_L']) def normalize_2d(pose): """
if __name__ == '__main__': if args.dataset == "cifar100": num_classes = 100 (x_train, y_train), (x_test, y_test) = cifar100.load_data() else: num_classes = 10 (x_train, y_train), (x_test, y_test) = cifar10.load_data() y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) x_train, x_test = color_preprocessing(x_train, x_test) is_se = '_se' if args.is_se else '' base_path = 'logs/' + args.dataset + '/' + args.resnet + is_se + '/' + args.block + '/' mkdir_p(base_path) index = str(len(os.listdir(base_path)) + 1) base_path = base_path + index + '/' mkdir_p(base_path + 'board/') mkdir_p(base_path + 'check/') print(args) if str(args.resnet).startswith('resnet_'): model = resnet((3, 32, 32), num_classes, block=block, is_se=args.is_se) else: model = resnet((32, 32, 3), num_classes) with open(base_path + 'args.txt', 'w') as f: for arg in vars(args): f.write(str(arg) + ': ' + str(getattr(args, arg)) + '\n')
def main(args): # Seed torch.manual_seed(args.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = True np.random.seed(args.seed) if args.featurize_mode: msg = "To perform featurization, use evaluation mode" assert args.evaluate and args.evaluate_video, msg msg = ( f"Until we fully understand the implications of multi-worker caching, we " f"should avoid using multiple workers (requested {args.workers})") assert args.workers <= 1, msg # create checkpoint dir if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # Overload print statement to log to file setup_verbose_logging(Path(args.checkpoint)) logger_name = "train" if not args.evaluate else "eval" plog = logging.getLogger(logger_name) opts.print_args(args) opts.save_args(args, save_folder=args.checkpoint) if not args.debug: plt.switch_backend("agg") # create model plog.info(f"==> creating model '{args.arch}', out_dim={args.num_classes}") if args.arch == "InceptionI3d": model = models.__dict__[args.arch]( num_classes=args.num_classes, spatiotemporal_squeeze=True, final_endpoint="Logits", name="inception_i3d", in_channels=3, dropout_keep_prob=0.5, num_in_frames=args.num_in_frames, include_embds=args.include_embds, ) if args.save_features: msg = "Set --include_embds 1 to save_features" assert args.include_embds, msg elif args.arch == "Pose2Sign": model = models.Pose2Sign(num_classes=args.num_classes, ) else: model = models.__dict__[args.arch](num_classes=args.num_classes, ) device = "cuda" if torch.cuda.is_available() else "cpu" # adjust for opts for multi-gpu training. Note that we also apply warmup to the # learning rate. Can technically remove this if-statement, but leaving for now # to make the change explicit. if args.num_gpus > 1: num_gpus = torch.cuda.device_count() msg = f"Requested {args.num_gpus}, but {num_gpus} were visible" assert num_gpus == args.num_gpus, msg args.train_batch = args.train_batch * args.num_gpus args.test_batch = args.test_batch * args.num_gpus device_ids = list(range(args.num_gpus)) args.lr = args.lr * args.num_gpus else: device_ids = [0] model = torch.nn.DataParallel(model, device_ids=device_ids) model = model.to(device) optimizer = torch.optim.SGD( model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, ) # optionally resume from a checkpoint tic = time.time() title = f"{args.datasetname} - {args.arch}" if args.resume: if os.path.isfile(args.resume): plog.info(f"=> loading checkpoint '{args.resume}'") checkpoint = load_checkpoint(args.resume) model.load_state_dict(checkpoint["state_dict"]) optimizer.load_state_dict(checkpoint["optimizer"]) args.start_epoch = checkpoint["epoch"] plog.info( f"=> loaded checkpoint '{args.resume}' (epoch {checkpoint['epoch']})" ) logger = Logger(os.path.join(args.checkpoint, "log.txt"), title=title, resume=True) del checkpoint else: plog.info(f"=> no checkpoint found at '{args.resume}'") raise ValueError(f"Checkpoint not found at {args.resume}!") else: logger = Logger(os.path.join(args.checkpoint, "log.txt"), title=title) logger_names = ["Epoch", "LR", "train_loss", "val_loss"] for p in range(0, args.nloss - 1): logger_names.append("train_loss%d" % p) logger_names.append("val_loss%d" % p) for p in range(args.nperf): logger_names.append("train_perf%d" % p) logger_names.append("val_perf%d" % p) logger.set_names(logger_names) if args.pretrained: load_checkpoint_flexible(model, optimizer, args, plog) param_count = humanize.intword(sum(p.numel() for p in model.parameters())) plog.info(f" Total params: {param_count}") duration = time.strftime("%Hh%Mm%Ss", time.gmtime(time.time() - tic)) plog.info(f"Loaded parameters for model in {duration}") mdl = MultiDataLoader( train_datasets=args.datasetname, val_datasets=args.datasetname, ) train_loader, val_loader, meanstd = mdl._get_loaders(args) train_mean = meanstd[0] train_std = meanstd[1] val_mean = meanstd[2] val_std = meanstd[3] save_feature_dir = args.checkpoint save_fig_dir = Path(args.checkpoint) / "figs" if args.featurize_mode: save_feature_dir = Path( args.checkpoint) / "filtered" / args.featurize_mask save_feature_dir.mkdir(exist_ok=True, parents=True) save_fig_dir = Path(args.checkpoint) / "figs" / args.featurize_mask save_fig_dir.mkdir(exist_ok=True, parents=True) # Define criterion criterion = torch.nn.CrossEntropyLoss(reduction="mean") criterion = criterion.to(device) if args.evaluate or args.evaluate_video: plog.info("\nEvaluation only") loss, acc = do_epoch( "val", val_loader, model, criterion, num_classes=args.num_classes, debug=args.debug, checkpoint=args.checkpoint, mean=val_mean, std=val_std, feature_dim=args.feature_dim, save_logits=True, save_features=args.save_features, num_figs=args.num_figs, topk=args.topk, save_feature_dir=save_feature_dir, save_fig_dir=save_fig_dir, ) if args.featurize_mode: plog.info(f"Featurizing without metric evaluation") return # Summarize/save results evaluate.evaluate(args, val_loader.dataset, plog) logger_epoch = [0, 0] for p in range(len(loss)): logger_epoch.append(float(loss[p].avg)) logger_epoch.append(float(loss[p].avg)) for p in range(len(acc)): logger_epoch.append(float(acc[p].avg)) logger_epoch.append(float(acc[p].avg)) # append logger file logger.append(logger_epoch) return lr = args.lr for epoch in range(args.start_epoch, args.epochs): lr = adjust_learning_rate(optimizer, epoch, lr, args.schedule, args.gamma, num_gpus=args.num_gpus) plog.info("\nEpoch: %d | LR: %.8f" % (epoch + 1, lr)) # train for one epoch train_loss, train_perf = do_epoch( "train", train_loader, model, criterion, epochno=epoch, optimizer=optimizer, num_classes=args.num_classes, debug=args.debug, checkpoint=args.checkpoint, mean=train_mean, std=train_std, feature_dim=args.feature_dim, save_logits=False, save_features=False, num_figs=args.num_figs, topk=args.topk, save_feature_dir=save_feature_dir, save_fig_dir=save_fig_dir, ) # evaluate on validation set valid_loss, valid_perf = do_epoch( "val", val_loader, model, criterion, epochno=epoch, num_classes=args.num_classes, debug=args.debug, checkpoint=args.checkpoint, mean=val_mean, std=val_std, feature_dim=args.feature_dim, save_logits=False, save_features=False, num_figs=args.num_figs, topk=args.topk, save_feature_dir=save_feature_dir, save_fig_dir=save_fig_dir, ) logger_epoch = [epoch + 1, lr] for p in range(len(train_loss)): logger_epoch.append(float(train_loss[p].avg)) logger_epoch.append(float(valid_loss[p].avg)) for p in range(len(train_perf)): logger_epoch.append(float(train_perf[p].avg)) logger_epoch.append(float(valid_perf[p].avg)) # append logger file logger.append(logger_epoch) # save checkpoint save_checkpoint( { "epoch": epoch + 1, "arch": args.arch, "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), }, checkpoint=args.checkpoint, snapshot=args.snapshot, ) plt.clf() plt.subplot(121) logger.plot(["train_loss", "val_loss"]) plt.subplot(122) logger.plot(["train_perf0", "val_perf0"]) savefig(os.path.join(args.checkpoint, "log.pdf")) logger.close()
def download_or_skip(download_url, save_path): if not osp.exists(save_path): print('Downloading: {}'.format(download_url)) opener = urllib.request.URLopener() opener.retrieve(download_url, save_path) else: print('File {} exists, skip downloading.'.format(save_path)) if __name__ == '__main__': assets_dir = osp.join(ROOT_DIR, 'assets') # Make assets directory mkdir_p(assets_dir) # Download the pretrained color model download_base = 'https://www.robots.ox.ac.uk/~luca/stuff/siam-fc_nets/' model_name = '2016-08-17.net.mat' download_or_skip(download_base + model_name, osp.join(assets_dir, model_name)) # Download the pretrained gray model download_base = 'https://www.robots.ox.ac.uk/~luca/stuff/siam-fc_nets/' model_name = '2016-08-17_gray025.net.mat' download_or_skip(download_base + model_name, osp.join(assets_dir, model_name)) # Download one test sequence download_base = "http://cvlab.hanyang.ac.kr/tracker_benchmark/seq_new/" seq_name = 'KiteSurf.zip' download_or_skip(download_base + seq_name, osp.join(assets_dir, seq_name))
def main(model_config, train_config, track_config): # Create training directory which will be used to save: configurations, model files, TensorBoard logs train_dir = train_config['train_dir'] if not osp.isdir(train_dir): logging.info('Creating training directory: %s', train_dir) mkdir_p(train_dir) g = tf.Graph() with g.as_default(): # Set fixed seed for reproducible experiments random.seed(model_config['seed']) np.random.seed(model_config['seed']) tf.set_random_seed(model_config['seed']) # Build the training and validation model model = siamese_model.SiameseModel(model_config, train_config, mode='train') model.build() model_va = siamese_model.SiameseModel(model_config, train_config, mode='val') model_va.build(reuse=True) # Save configurations for future reference _save_cfgs(train_dir, model_config, train_config, track_config) learning_rate = _configure_learning_rate(train_config, model.global_step) optimizer = _configure_optimizer(train_config, learning_rate) tf.summary.scalar('learning_rate', learning_rate) # Set up the training ops opt_op = tf.contrib.layers.optimize_loss( loss=model.total_loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=optimizer, clip_gradients=train_config['clip_gradients'], learning_rate_decay_fn=None, summaries=['learning_rate']) with tf.control_dependencies([opt_op]): train_op = tf.no_op(name='train') summary_writer = tf.summary.FileWriter(train_dir, g) summary_op = tf.summary.merge_all() global_variables_init_op = tf.global_variables_initializer() local_variables_init_op = tf.local_variables_initializer() # Dynamically allocate GPU memory gpu_options = tf.GPUOptions(allow_growth=True) sess_config = tf.ConfigProto(gpu_options=gpu_options) sess = tf.Session(config=sess_config) model_path = tf.train.latest_checkpoint(train_config['save_dir']) if not model_path: sess.run(global_variables_init_op) sess.run(local_variables_init_op) start_step = 0 if model_config['embed_config']['embedding_checkpoint_file']: model.init_fn(sess) else: logging.info('Restore from last checkpoint: {}'.format(model_path)) sess.run(local_variables_init_op) sess.run(global_variables_init_op) e1 = re.compile(".*def.*") e3 = re.compile(".*global_step.*") e4 = re.compile(".*Momentum") e2 = re.compile(".*OptimizeLoss.*") variables_to_restore1 = tf.global_variables() var_0 = [ v for v in variables_to_restore1 if not e1.match(v.name) and not e4.match(v.name) and not e3.match(v.name) and not e2.match(v.name) ] # var_0 = [v for v in variables_to_restore1 if not e3.match(v.name) and not e4.match(v.name) and not e2.match(v.name)] # var_0 = [v for v in variables_to_restore1 if (re1.match(v.name) or re2.match(v.name) or re3.match(v.name) or re4.match(v.name)) and not e4.match(v.name) and not e2] # va=[v for v in variables_to_restore1 if not v in var_0 ] # start_step = 10001 # var_0 = [v for v in variables_to_restore1 ] # var_0 = [v for v in variables_to_restore1 if not e4.match(v.name)] # print(var_0) # print('kkkkkkkkkkkkkkkkkkkkkkk',va) saver0 = tf.train.Saver( var_0, max_to_keep=train_config['max_checkpoints_to_keep']) saver0.restore(sess, model_path) start_step = tf.train.global_step(sess, model.global_step.name) + 1 tf.train.start_queue_runners(sess=sess) model.dataloader.start_threads( sess=sess) # start customized queue runner model_va.dataloader.start_threads( sess=sess) # start customized queue runner total_steps = int(train_config['epoch'] * train_config['num_examples_per_epoch'] / train_config['batch_size']) logging.info('training for {} steps'.format(total_steps)) saver = tf.train.Saver( tf.global_variables(), max_to_keep=train_config['max_checkpoints_to_keep']) # g.finalize() # Finalize graph to avoid adding ops by mistake for step in range(0, total_steps): start_time = time.time() _, loss, batch_loss = sess.run( [train_op, model.total_loss, model.batch_loss]) duration = time.time() - start_time if step % 10 == 0: examples_per_sec = model_config['batch_size'] / float(duration) time_remain = train_config['batch_size'] * ( total_steps - step) / examples_per_sec m, s = divmod(time_remain, 60) h, m = divmod(m, 60) format_str = ( '%s: step %d, loss = %.2f, batch loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch; %dh:%02dm:%02ds remains)') logging.info(format_str % (datetime.now(), step, loss, batch_loss, examples_per_sec, duration, h, m, s)) # # Training loop # data_config = train_config['train_data_config'] # total_steps = int(data_config['epoch'] * # data_config['num_examples_per_epoch'] / # data_config['batch_size']) # logging.info('Train for {} steps'.format(total_steps)) # for step in range(start_step, total_steps): # start_time = time.time() # _, loss, batch_loss = sess.run([train_op, model.total_loss, model.batch_loss]) # duration = time.time() - start_time # # if step % 10 == 0: # examples_per_sec = data_config['batch_size'] / float(duration) # time_remain = data_config['batch_size'] * (total_steps - step) / examples_per_sec # m, s = divmod(time_remain, 60) # h, m = divmod(m, 60) # format_str = ('%s: step %d, total loss = %.2f, batch loss = %.2f (%.1f examples/sec; %.3f ' # 'sec/batch; %dh:%02dm:%02ds remains)') # logging.info(format_str % (datetime.now(), step, loss, batch_loss, # examples_per_sec, duration, h, m, s)) if step % 100 == 0: sess.run(tf.Print(model.gt, [model.gt], summarize=15 * 15)) summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) if step % train_config['save_model_every_n_step'] == 0 or ( step + 1) == total_steps: checkpoint_path = osp.join(train_config['train_dir'], 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)