def __init__(self, size): self._cpu_storage = torch.Storage(size) self._gpu_storages = [] if torch.cuda.is_available(): for device_idx in range(torch.cuda.device_count()): with torch.cuda.device(device_idx): self._gpu_storages.append(torch.Storage(size).cuda())
def __init__(self, in_num, out_num, layer_num, max_link, storage_size=1024): input_storage_1 = torch.Storage(storage_size) input_storage_2 = torch.Storage(storage_size) self.shared_allocation_1 = _SharedAllocation(input_storage_1) self.shared_allocation_2 = _SharedAllocation(input_storage_2) max_in_num = in_num + out_num * max_link self.final_num_features = max_in_num self.saved_features = [] self.max_link = max_link super(_IntermediaBlock, self).__init__() print('creating intermedia block ...') self.adapters = [] for i in range(0, layer_num - 1): if i < max_link: tmp_in_num = in_num + (i + 1) * out_num else: tmp_in_num = max_in_num print('intermedia layer %d input channel number is %d' % (i, tmp_in_num)) self.adapters.append( _EfficientDensenetBottleneck(self.shared_allocation_1, self.shared_allocation_2, tmp_in_num, out_num)) self.adapters = nn.ModuleList(self.adapters) print('intermedia layer output channel number is %d' % out_num)
def __init__(self, in_num, neck_size, growth_rate, layer_num, max_link, storage_size=1024, requires_skip=True, is_up=False): input_storage_1 = torch.Storage(storage_size) input_storage_2 = torch.Storage(storage_size) self.shared_allocation_1 = _SharedAllocation(input_storage_1) self.shared_allocation_2 = _SharedAllocation(input_storage_2) self.saved_features = [] self.max_link = max_link self.requires_skip = requires_skip super(_DenseBlock, self).__init__() max_in_num = in_num + max_link * growth_rate self.final_num_features = max_in_num self.layers = [] #print('layer number is %d' % layer_num) for i in range(0, layer_num): if i < max_link: tmp_in_num = in_num + i * growth_rate else: tmp_in_num = max_in_num #print('layer %d input channel number is %d' % (i, tmp_in_num)) self.layers.append( _DenseLayer(self.shared_allocation_1, self.shared_allocation_2, tmp_in_num, neck_size, growth_rate)) self.layers = nn.ModuleList(self.layers) self.adapters_ahead = [] adapter_in_nums = [] adapter_out_num = in_num if is_up: adapter_out_num = adapter_out_num / 2 for i in range(0, layer_num): if i < max_link: tmp_in_num = in_num + (i + 1) * growth_rate else: tmp_in_num = max_in_num + growth_rate adapter_in_nums.append(tmp_in_num) #print('adapter %d input channel number is %d' % (i, adapter_in_nums[i])) self.adapters_ahead.append( _EfficientDensenetBottleneck(self.shared_allocation_1, self.shared_allocation_2, adapter_in_nums[i], adapter_out_num)) self.adapters_ahead = nn.ModuleList(self.adapters_ahead) #print('adapter output channel number is %d' % adapter_out_num) if requires_skip: print('creating skip layers ...') self.adapters_skip = [] for i in range(0, layer_num): self.adapters_skip.append( _EfficientDensenetBottleneck(self.shared_allocation_1, self.shared_allocation_2, adapter_in_nums[i], adapter_out_num)) self.adapters_skip = nn.ModuleList(self.adapters_skip)
def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate, storage_size=1024): input_storage_1 = torch.Storage(storage_size) input_storage_2 = torch.Storage(storage_size) self.final_num_features = num_input_features + (growth_rate * num_layers) self.shared_allocation_1 = _SharedAllocation(input_storage_1) self.shared_allocation_2 = _SharedAllocation(input_storage_2) super(_DenseBlock, self).__init__() for i in range(num_layers): layer = _DenseLayer(self.shared_allocation_1, self.shared_allocation_2, num_input_features + i * growth_rate, growth_rate, bn_size, drop_rate) self.add_module('denselayer%d' % (i + 1), layer)
def test_backward_computes_backward_pass(): bn_weight = torch.randn(8).cuda() bn_bias = torch.randn(8).cuda() bn_running_mean = torch.randn(8).cuda() bn_running_var = torch.randn(8).abs().cuda() conv_weight = torch.randn(4, 8, 1, 1).cuda() input_1 = torch.randn(4, 6, 4, 4).cuda() input_2 = torch.randn(4, 2, 4, 4).cuda() layer = nn.Sequential(OrderedDict([ ('norm', nn.BatchNorm2d(8)), ('relu', nn.ReLU(inplace=True)), ('conv', nn.Conv2d(8, 4, bias=None, kernel_size=1, stride=1)), ])).cuda() layer.train() layer.norm.weight.data.copy_(bn_weight) layer.norm.bias.data.copy_(bn_bias) layer.norm.running_mean.copy_(bn_running_mean) layer.norm.running_var.copy_(bn_running_var) layer.conv.weight.data.copy_(conv_weight) input_1_var = Variable(input_1, requires_grad=True) input_2_var = Variable(input_2, requires_grad=True) out_var = layer(torch.cat([input_1_var, input_2_var], dim=1)) out_var.sum().backward() storage_1 = torch.Storage(4 * 8 * 3 * 3).cuda() storage_2 = torch.Storage(4 * 8 * 3 * 3).cuda() layer_efficient = _EfficientDensenetBottleneck( _SharedAllocation(storage_1), _SharedAllocation(storage_2), 8, 4 ).cuda() layer_efficient.train() layer_efficient.norm_weight.data.copy_(bn_weight) layer_efficient.norm_bias.data.copy_(bn_bias) layer_efficient.norm_running_mean.copy_(bn_running_mean) layer_efficient.norm_running_var.copy_(bn_running_var) layer_efficient.conv_weight.data.copy_(conv_weight) input_efficient_1_var = Variable(input_1, requires_grad=True) input_efficient_2_var = Variable(input_2, requires_grad=True) out_efficient_var = layer_efficient([input_efficient_1_var, input_efficient_2_var]) out_efficient_var.sum().backward() # print(input_1_var.grad.data[:, 0], input_efficient_1_var.grad.data[:, 0]) assert(almost_equal(out_var.data, out_efficient_var.data)) assert(almost_equal(layer.norm.running_mean, layer_efficient.norm_running_mean)) assert(almost_equal(layer.norm.running_var, layer_efficient.norm_running_var)) assert(almost_equal(layer.conv.weight.grad.data, layer_efficient.conv_weight.grad.data)) assert(almost_equal(layer.norm.weight.grad.data, layer_efficient.norm_weight.grad.data)) assert(almost_equal(layer.norm.bias.grad.data, layer_efficient.norm_bias.grad.data)) assert(almost_equal(input_1_var.grad.data, input_efficient_1_var.grad.data)) assert(almost_equal(input_2_var.grad.data, input_efficient_2_var.grad.data))
def create_multi_gpu_storage(size=1024): multi_storage = [] device_cnt = torch.cuda.device_count() for device_no in range(device_cnt): with torch.cuda.device(device_no): multi_storage.append(torch.Storage(size).cuda()) return multi_storage
def test_parse_numpy_int(self, device): # Only concrete class can be given where "Type[number[_64Bit]]" is expected self.assertRaisesRegex(RuntimeError, "Overflow", lambda: torch.mean( torch.randn(1, 1), np.uint64(-1))) # type: ignore[call-overload] # https://github.com/pytorch/pytorch/issues/29252 for nptype in [np.int16, np.int8, np.uint8, np.int32, np.int64]: scalar = 3 np_arr = np.array([scalar], dtype=nptype) np_val = np_arr[0] # np integral type can be treated as a python int in native functions with # int parameters: self.assertEqual( torch.ones(5).diag(scalar), torch.ones(5).diag(np_val)) self.assertEqual( torch.ones([2, 2, 2, 2]).mean(scalar), torch.ones([2, 2, 2, 2]).mean(np_val)) # numpy integral type parses like a python int in custom python bindings: self.assertEqual(torch.Storage(np_val).size(), scalar) # type: ignore[attr-defined] tensor = torch.tensor([2], dtype=torch.int) tensor[0] = np_val self.assertEqual(tensor[0], np_val) # Original reported issue, np integral type parses to the correct # PyTorch integral type when passed for a `Scalar` parameter in # arithmetic operations: t = torch.from_numpy(np_arr) self.assertEqual((t + np_val).dtype, t.dtype) self.assertEqual((np_val + t).dtype, t.dtype)
def get_up_network(env_name, num): import sys import os sys.path.append( os.path.join(os.path.abspath(os.path.dirname(__file__)), 'PT/policy_transfer/uposi')) sys.path.append( os.path.join(os.path.abspath(os.path.dirname(__file__)), 'PT/baselines')) from a2c_ppo_acktr import algo, utils from a2c_ppo_acktr.algo import gail from a2c_ppo_acktr.arguments import get_args from a2c_ppo_acktr.envs import make_vec_envs from a2c_ppo_acktr.model import Policy from a2c_ppo_acktr.storage import RolloutStorage env_name = env_name[:-5] if 'Dart' in env_name: path = f"/home/hza/policy_transfer/PT/trained_models/ppo/UP_{env_name}_{num}.pt" else: path = f"/home/hza/policy_transfer/PT/trained_models/ppo/UP_{env_name}_{num}.pt" result = torch.load(path, map_location=lambda a, b: torch.Storage().cuda()) actor_critic = result[0] actor_critic.cuda() ob_rms = result[1] return UP(actor_critic, ob_rms)
def test_forward_eval_mode_computes_forward_pass(): momentum = 0.1 eps = 1e-5 weight = torch.randn(10).cuda() bias = torch.randn(10).cuda() running_mean = torch.randn(10).cuda() running_var = torch.randn(10).abs().cuda() input_1 = torch.randn(4, 5).cuda() input_2 = torch.randn(4, 5).cuda() storage = torch.Storage(40).cuda() bn = F.batch_norm(input=Variable(torch.cat([input_1, input_2], dim=1)), running_mean=running_mean, running_var=running_var, weight=Parameter(weight), bias=Parameter(bias), training=False, momentum=momentum, eps=eps).data input_efficient = torch.cat([input_1, input_2], dim=1) func = _EfficientBatchNorm(storage=storage, running_mean=running_mean, running_var=running_var, training=False, momentum=momentum, eps=eps) bn_efficient = func.forward(weight, bias, input_efficient) assert (almost_equal(bn, bn_efficient)) assert (bn_efficient.storage().data_ptr() == storage.data_ptr())
def test_backward_train_mode_computes_forward_pass(): momentum = 0.1 eps = 1e-5 weight = torch.randn(10).cuda() bias = torch.randn(10).cuda() running_mean = torch.randn(10).cuda() running_var = torch.randn(10).abs().cuda() weight_efficient = weight.clone() bias_efficient = bias.clone() running_mean_efficient = running_mean.clone() running_var_efficient = running_var.clone() input_1 = torch.randn(4, 5).cuda() input_2 = torch.randn(4, 5).cuda() storage = torch.Storage(40).cuda() input_var = Variable(torch.cat([input_1, input_2], dim=1), requires_grad=True) weight_var = Parameter(weight) bias_var = Parameter(bias) bn_var = F.batch_norm(input=input_var, running_mean=running_mean, running_var=running_var, weight=weight_var, bias=bias_var, training=True, momentum=momentum, eps=eps) bn = bn_var.data bn_var.backward(gradient=input_var.data.clone().fill_(1)) input_grad = input_var.grad.data weight_grad = weight_var.grad.data bias_grad = bias_var.grad.data input_efficient = torch.cat([input_1, input_2], dim=1) input_efficient_orig = input_efficient.clone() func = _EfficientBatchNorm(storage=storage, running_mean=running_mean_efficient, running_var=running_var_efficient, training=True, momentum=momentum, eps=eps) bn_efficient = func.forward(weight_efficient, bias_efficient, input_efficient) grad_out_efficient = bn_efficient.clone().fill_(1) weight_grad_efficient, bias_grad_efficient, input_grad_efficient = func.backward( weight_efficient, bias_efficient, input_efficient_orig, grad_out_efficient) assert (almost_equal(bn, bn_efficient)) assert (grad_out_efficient.storage().data_ptr() == input_grad_efficient.storage().data_ptr()) assert (almost_equal(input_grad, input_grad_efficient)) assert (almost_equal(weight_grad, weight_grad_efficient)) assert (almost_equal(bias_grad, bias_grad_efficient))
def test_forward_training_false_computes_forward_pass(): bn_weight = torch.randn(8).cuda() bn_bias = torch.randn(8).cuda() bn_running_mean = torch.randn(8).cuda() bn_running_var = torch.randn(8).abs().cuda() conv_weight = torch.randn(4, 8, 3, 3).cuda() input_1 = torch.randn(4, 6, 4, 4).cuda() input_2 = torch.randn(4, 2, 4, 4).cuda() layer = nn.Sequential(OrderedDict([ ('norm', nn.BatchNorm2d(8)), ('relu', nn.ReLU(inplace=True)), ('conv', nn.Conv2d(8, 4, bias=None, kernel_size=3, stride=1, padding=1)), ])).cuda() layer.eval() layer.norm.weight.data.copy_(bn_weight) layer.norm.bias.data.copy_(bn_bias) layer.norm.running_mean.copy_(bn_running_mean) layer.norm.running_var.copy_(bn_running_var) layer.conv.weight.data.copy_(conv_weight) input_1_var = Variable(input_1) input_2_var = Variable(input_2) out_var = layer(torch.cat([input_1_var, input_2_var], dim=1)) storage_1 = torch.Storage(4 * 8 * 3 * 3).cuda() storage_2 = torch.Storage(4 * 8 * 3 * 3).cuda() layer_efficient = _EfficientDensenetBottleneck( _SharedAllocation(storage_1), _SharedAllocation(storage_2), 8, 4 ).cuda() layer_efficient.eval() layer_efficient.norm_weight.data.copy_(bn_weight) layer_efficient.norm_bias.data.copy_(bn_bias) layer_efficient.norm_running_mean.copy_(bn_running_mean) layer_efficient.norm_running_var.copy_(bn_running_var) layer_efficient.conv_weight.data.copy_(conv_weight) input_efficient_1_var = Variable(input_1) input_efficient_2_var = Variable(input_2) out_efficient_var = layer_efficient([input_efficient_1_var, input_efficient_2_var]) assert(almost_equal(out_var.data, out_efficient_var.data))
def forward(self, x): shared_alloc = torch.Storage().type(x.storage().type()) if self.efficient else None for module in self.features.children(): if isinstance(module, _DenseBlock): x = module(x, shared_alloc) else: x = module(x) x = F.relu(x) x = F.adaptive_avg_pool2d(x, output_size=1).view(x.size(0), -1) # x = F.dropout(x, p=0.5, training=self.training) out = self.classifier(x) return out
def test_tolist(self, device): list0D = [] tensor0D = torch.Tensor(list0D) self.assertEqual(tensor0D.tolist(), list0D) table1D = [1, 2, 3] tensor1D = torch.Tensor(table1D) storage = torch.Storage(table1D) self.assertEqual(tensor1D.tolist(), table1D) self.assertEqual(storage.tolist(), table1D) self.assertEqual(tensor1D.tolist(), table1D) self.assertEqual(storage.tolist(), table1D) table2D = [[1, 2], [3, 4]] tensor2D = torch.Tensor(table2D) self.assertEqual(tensor2D.tolist(), table2D) tensor3D = torch.Tensor([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) tensorNonContig = tensor3D.select(1, 1) self.assertFalse(tensorNonContig.is_contiguous()) self.assertEqual(tensorNonContig.tolist(), [[3, 4], [7, 8]])
def test_storage_can_be_converted_to_python_object(self): with enable_torch_dispatch_mode(LoggingTensorMode): s = torch.Storage() z = LoggingTensorMode(torch.empty([])) z.set_(s)