def get_model(conf, num_class=10, data_parallel=True): name = conf['type'] if name == 'resnet50': model = ResNet(dataset='imagenet', depth=50, num_classes=num_class, bottleneck=True) elif name == 'resnet200': model = ResNet(dataset='imagenet', depth=200, num_classes=num_class, bottleneck=True) elif name == 'wresnet40_2': model = WideResNet(40, 2, dropout_rate=None, num_classes=num_class) elif name == 'wresnet28_10': model = WideResNet(28, 10, dropout_rate=None, num_classes=num_class) elif name == 'shakeshake26_2x32d': model = ShakeResNet(26, 32, num_class) elif name == 'shakeshake26_2x64d': model = ShakeResNet(26, 64, num_class) elif name == 'shakeshake26_2x96d': model = ShakeResNet(26, 96, num_class) elif name == 'shakeshake26_2x112d': model = ShakeResNet(26, 112, num_class) elif name == 'shakeshake26_2x96d_next': model = ShakeResNeXt(26, 96, 4, num_class) elif name == 'pyramid': model = PyramidNet('cifar10', depth=conf['depth'], alpha=conf['alpha'], num_classes=num_class, bottleneck=conf['bottleneck']) else: raise NameError('no model named, %s' % name) if data_parallel: model = model.cuda() model = DataParallel(model) else: import horovod.torch as hvd device = torch.device('cuda', hvd.local_rank()) model = model.to(device) cudnn.benchmark = True return model
def get_model(conf, num_class=10, data_parallel=True): name = conf['type'] if name == 'wresnet40_2': model = WideResNet(40, 2, dropout_rate=0.0, num_classes=num_class) elif name == 'wresnet28_10': model = WideResNet(28, 10, dropout_rate=0.0, num_classes=num_class) elif name == 'shakeshake26_2x32d': model = ShakeResNet(26, 32, num_class) elif name == 'shakeshake26_2x96d': model = ShakeResNet(26, 96, num_class) elif name == 'shakeshake26_2x112d': model = ShakeResNet(26, 112, num_class) elif name == 'pyramid': model = PyramidNet('cifar10', depth=conf['depth'], alpha=conf['alpha'], num_classes=num_class, bottleneck=conf['bottleneck']) elif name == 'pyramid_skip': model = PyramidSkipNet(depth=conf['depth'], alpha=conf['alpha'], num_classes=num_class, bottleneck=conf['bottleneck']) elif name == 'resnet50': model = models.resnet50(num_classes=num_class, pretrained=None) model.avgpool = nn.AdaptiveAvgPool2d((1, 1)) model.fc = nn.Linear(512 * 1, num_class) elif name == 'resnet200': model = preresnet200(num_classes=num_class) model.features._modules['final_pool'] = nn.AdaptiveAvgPool2d((1, 1)) else: raise NameError('no model named, %s' % name) if data_parallel: model = model.cuda() model = DataParallel(model) else: import horovod.torch as hvd device = torch.device('cuda', hvd.local_rank()) model = model.to(device) cudnn.benchmark = True return model
def get_model(conf, num_class=10, local_rank=-1): """ 获取训练模型 :param conf: 模型的配置文件 :param num_class: 训练集类的数量 :param local_rank: gpu核心数量 :return: 返回各种网络的模型 """ name = conf['type'] if name == 'resnet50': model = ResNet(dataset='imagenet', depth=50, num_classes=num_class, bottleneck=True) elif name == 'resnet200': model = ResNet(dataset='imagenet', depth=200, num_classes=num_class, bottleneck=True) elif name == 'wresnet40_2': model = WideResNet(40, 2, dropout_rate=0.0, num_classes=num_class) elif name == 'wresnet28_10': model = WideResNet(28, 10, dropout_rate=0.0, num_classes=num_class) elif name == 'shakeshake26_2x32d': model = ShakeResNet(26, 32, num_class) elif name == 'shakeshake26_2x64d': model = ShakeResNet(26, 64, num_class) elif name == 'shakeshake26_2x96d': model = ShakeResNet(26, 96, num_class) elif name == 'shakeshake26_2x112d': model = ShakeResNet(26, 112, num_class) elif name == 'shakeshake26_2x96d_next': model = ShakeResNeXt(26, 96, 4, num_class) elif name == 'pyramid': model = PyramidNet('cifar10', depth=conf['depth'], alpha=conf['alpha'], num_classes=num_class, bottleneck=conf['bottleneck']) elif 'efficientnet' in name: model = EfficientNet.from_name( name, condconv_num_expert=conf['condconv_num_expert'], norm_layer=None) # TpuBatchNormalization if local_rank >= 0: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) def kernel_initializer(module): def get_fan_in_out(module): num_input_fmaps = module.weight.size(1) num_output_fmaps = module.weight.size(0) receptive_field_size = 1 if module.weight.dim() > 2: receptive_field_size = module.weight[0][0].numel() fan_in = num_input_fmaps * receptive_field_size fan_out = num_output_fmaps * receptive_field_size return fan_in, fan_out if isinstance(module, torch.nn.Conv2d): # https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/efficientnet_model.py#L58 fan_in, fan_out = get_fan_in_out(module) torch.nn.init.normal_(module.weight, mean=0.0, std=np.sqrt(2.0 / fan_out)) if module.bias is not None: torch.nn.init.constant_(module.bias, val=0.) elif isinstance(module, RoutingFn): torch.nn.init.xavier_uniform_(module.weight) torch.nn.init.constant_(module.bias, val=0.) elif isinstance(module, torch.nn.Linear): # https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/efficientnet_model.py#L82 fan_in, fan_out = get_fan_in_out(module) delta = 1.0 / np.sqrt(fan_out) torch.nn.init.uniform_(module.weight, a=-delta, b=delta) if module.bias is not None: torch.nn.init.constant_(module.bias, val=0.) model.apply(kernel_initializer) else: raise NameError('no model named, %s' % name) if local_rank >= 0: device = torch.device('cuda', local_rank) model = model.to(device) model = DistributedDataParallel(model, device_ids=[local_rank], output_device=local_rank) else: model = model.cuda() # model = DataParallel(model) cudnn.benchmark = True return model