def main(): create_exp_dir(config.save, scripts_to_save=glob.glob('*.py')+glob.glob('*.sh')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(config.save, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) logging.info("args = %s", str(config)) # preparation ################ torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True seed = config.seed np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) # Model ####################################### lasts = [] for idx, arch_idx in enumerate(config.arch_idx): if config.load_epoch == "last": state = torch.load(os.path.join(config.load_path, "arch_%d.pt"%arch_idx)) else: state = torch.load(os.path.join(config.load_path, "arch_%d_%d.pt"%(arch_idx, int(config.load_epoch)))) model = Network( [state["alpha_%d_0"%arch_idx].detach(), state["alpha_%d_1"%arch_idx].detach(), state["alpha_%d_2"%arch_idx].detach()], [None, state["beta_%d_1"%arch_idx].detach(), state["beta_%d_2"%arch_idx].detach()], [state["ratio_%d_0"%arch_idx].detach(), state["ratio_%d_1"%arch_idx].detach(), state["ratio_%d_2"%arch_idx].detach()], num_classes=config.num_classes, layers=config.layers, Fch=config.Fch, width_mult_list=config.width_mult_list, stem_head_width=config.stem_head_width[idx], ignore_skip=arch_idx==0) mIoU02 = state["mIoU02"]; latency02 = state["latency02"]; obj02 = objective_acc_lat(mIoU02, latency02) mIoU12 = state["mIoU12"]; latency12 = state["latency12"]; obj12 = objective_acc_lat(mIoU12, latency12) if obj02 > obj12: last = [2, 0] else: last = [2, 1] lasts.append(last) model.build_structure(last) logging.info("net: " + str(model)) for b in last: if len(config.width_mult_list) > 1: plot_op(getattr(model, "ops%d"%b), getattr(model, "path%d"%b), width=getattr(model, "widths%d"%b), head_width=config.stem_head_width[idx][1], F_base=config.Fch).savefig(os.path.join(config.save, "ops_%d_%d.png"%(arch_idx,b)), bbox_inches="tight") else: plot_op(getattr(model, "ops%d"%b), getattr(model, "path%d"%b), F_base=config.Fch).savefig(os.path.join(config.save, "ops_%d_%d.png"%(arch_idx,b)), bbox_inches="tight") plot_path_width(model.lasts, model.paths, model.widths).savefig(os.path.join(config.save, "path_width%d.png"%arch_idx)) plot_path_width([2, 1, 0], [model.path2, model.path1, model.path0], [model.widths2, model.widths1, model.widths0]).savefig(os.path.join(config.save, "path_width_all%d.png"%arch_idx)) flops, params = profile(model, inputs=(torch.randn(1, 3, 1024, 2048),), verbose=False) logging.info("params = %fMB, FLOPs = %fGB", params / 1e6, flops / 1e9) logging.info("ops:" + str(model.ops)) logging.info("path:" + str(model.paths)) model = model.cuda() ##################################################### print(config.save) latency = compute_latency(model, (1, 3, config.image_height, config.image_width)) logging.info("FPS:" + str(1000./latency))
def infer(models, evaluators, logger): mIoUs = [] for model, evaluator in zip(models, evaluators): model.eval() logging.info( 'model latency: %s' % compute_latency(model, (1, 3, config.image_height, config.image_width))) _, mIoU = evaluator.run_online() # _, mIoU = evaluator.run_online_multiprocess() mIoUs.append(mIoU) return mIoUs
def _latency(h, w, C_in, C_out, kernel_size=3, stride=1, dilation=1, groups=1): layer = BasicResidual2x(C_in, C_out, kernel_size, stride, dilation, groups, slimmable=False) latency = compute_latency(layer, (1, C_in, h, w)) return latency
def _latency(h, w, C_in, C_out, kernel_size=3, stride=1, padding=None, dilation=1, groups=1, bias=False): layer = ConvNorm(C_in, C_out, kernel_size, stride, padding, dilation, groups, bias, slimmable=False) latency = compute_latency(layer, (1, C_in, h, w)) return latency
def _latency(h, w, C_in, C_out=19): layer = Head(C_in, C_out) latency = compute_latency(layer, (1, C_in, h, w)) return latency
def _latency(h, w, C_in, C_out): layer = FeatureFusion(C_in, C_out) latency = compute_latency(layer, (1, C_in, h, w)) return latency
def _latency(h, w, C_in, C_out, stride=1): layer = FactorizedReduce(C_in, C_out, stride, slimmable=False) latency = compute_latency(layer, (1, C_in, h, w)) return latency
def main(): print("begin") # preparation ################ torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True seed = 12345 np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) # Configuration ############## use_boundary_2 = False use_boundary_4 = False use_boundary_8 = True use_boundary_16 = False use_conv_last = False n_classes = 19 # STDC1Seg-50 250.4FPS on NVIDIA GTX 1080Ti # backbone = 'STDCNet813' # methodName = 'STDC1-Seg' # inputSize = 512 # inputScale = 50 # inputDimension = (1, 3, 512, 1024) # # STDC1Seg-75 126.7FPS on NVIDIA GTX 1080Ti # backbone = 'STDCNet813' # methodName = 'STDC1-Seg' # inputSize = 768 # inputScale = 75 # inputDimension = (1, 3, 768, 1536) # # STDC2Seg-50 188.6FPS on NVIDIA GTX 1080Ti backbone = 'STDCNet1446' methodName = 'STDC2-Seg' inputSize = 512 inputScale = 50 inputDimension = (1, 3, 512, 1024) # # STDC2Seg-75 97.0FPS on NVIDIA GTX 1080Ti # backbone = 'STDCNet1446' # methodName = 'STDC2-Seg' # inputSize = 768 # inputScale = 75 # inputDimension = (1, 3, 768, 1536) model = BiSeNet(backbone=backbone, n_classes=n_classes, use_boundary_2=use_boundary_2, use_boundary_4=use_boundary_4, use_boundary_8=use_boundary_8, use_boundary_16=use_boundary_16, input_size=inputSize, use_conv_last=use_conv_last) print('loading parameters...') respth = '../checkpoints/{}/'.format(methodName) save_pth = os.path.join(respth, 'model_maxmIOU{}.pth'.format(inputScale)) model.load_state_dict(torch.load(save_pth)) model = model.cuda() ##################################################### latency = compute_latency(model, inputDimension) print("{}{} FPS:".format(methodName, inputScale) + str(1000. / latency)) logging.info("{}{} FPS:".format(methodName, inputScale) + str(1000. / latency)) # calculate FLOPS and params '''
def _latency(h, w, C_in, C_out, stride=1): layer = SkipConnect(C_in, C_out, stride, slimmable=False) latency = compute_latency(layer, (1, C_in, h, w)) return latency
def _latency(h, w, C_in, C_out, kernel_size=7, stride=1, dilation=1, groups=1): layer = Conv7x7(C_in, C_out, kernel_size, stride, dilation, groups, slimmable=False) latency = compute_latency(layer, (1, C_in, h, w)) return latency
def _latency(h, w, C_in, C_out, kernel_size=3, stride=1, dilation=1, groups=1): assert stride in [1, 2] layer = Self_Attn(dim=C_in, fmap_size=(128, 256), dim_out=C_out, downsample=(stride==2)) latency = compute_latency(layer, (1, C_in, h, w)) return latency