def reduce_gradients(model, _type='sum'): types = ['sum', 'avg'] assert _type in types, 'gradients method must be in "{}"'.format(types) log_once("gradients method is {}".format(_type)) if get_world_size() > 1: for param in model.parameters(): if param.requires_grad: dist.all_reduce(param.grad.data) if _type == 'avg': param.grad.data /= get_world_size() else: return None
def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) # print x.size() x = self.maxpool(x) # print x.size() p1 = self.layer1(x) p2 = self.layer2(p1) p3 = self.layer3(p2) # p3 = torch.cat([p2, p3], 1) log_once("p3 {}".format(p3.size())) p4 = self.layer4(p3) return p2, p3, p4