def set_swish(self, memory_efficient=True): """Sets swish function as memory efficient (for training) or standard (for export). Args: memory_efficient (bool): Whether to use memory-efficient version of swish. """ self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
def __init__(self, in_planes, out_planes, stride=1, activation='swish', dummy = False): super(SpMbBlock, self).__init__() self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False) self.bn1 = nn.BatchNorm2d(in_planes) #print(dummy) self.sp_conv = SpConvBlock(in_planes, out_planes, kernel_size=1, stride=1, padding=0, activation=activation, dummy = dummy) self.dummy = [] self.nn_act = Swish() if activation == 'swish' else nn.ReLU(inplace=True)
def __init__(self, in_planes, out_planes, stride=1, activation='swish'): super(ConvBlock, self).__init__() self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn = nn.BatchNorm2d(out_planes) self.nn_act = Swish() if activation == 'swish' else nn.ReLU( inplace=True)
def __init__(self, writer, state_dim=172, action_dim=5, n_latent_var=512, lr=3e-4, betas=(0.9, 0.999), gamma=0.99, ppo_epochs=3, icm_epochs=1, eps_clip=0.2, ppo_batch_size=128, icm_batch_size=16, intr_reward_strength=0.02, lamb=0.95, device='cpu'): self.lr = lr self.betas = betas self.gamma = gamma self.lambd = lamb self.eps_clip = eps_clip self.ppo_epochs = ppo_epochs self.icm_epochs = icm_epochs self.ppo_batch_size = ppo_batch_size self.icm_batch_size = icm_batch_size self.intr_reward_strength = intr_reward_strength self.device = device self.writer = writer self.timestep = 0 self.icm = ICM(activation=Swish()).to(self.device) self.policy = ActorCritic(state_dim=state_dim, action_dim=action_dim, n_latent_var=n_latent_var, activation=Swish(), device=self.device, ).to(self.device) self.policy_old = ActorCritic(state_dim, action_dim, n_latent_var, activation=Swish(), device=self.device ).to(self.device) self.optimizer = torch.optim.Adam(self.policy.parameters(), lr=lr, betas=betas) self.optimizer_icm = torch.optim.Adam(self.icm.parameters(), lr=lr, betas=betas) self.policy_old.load_state_dict(self.policy.state_dict()) self.MseLoss = nn.MSELoss(reduction='none')
def linear_block(in_features, out_features, activation, dropout_rate, adaptive_rate, adaptive_rate_scaler): activation_dispatcher = nn.ModuleDict([['lrelu', nn.LeakyReLU()], ['relu', nn.ReLU()], ['tanh', nn.Tanh()], ['sigmoid', nn.Sigmoid()], ['swish', Swish()]]) return nn.Sequential( AdaptiveLinear(in_features, out_features, adaptive_rate=adaptive_rate, adaptive_rate_scaler=adaptive_rate_scaler), activation_dispatcher[activation], nn.Dropout(dropout_rate), )
def set_swish(self, memory_efficient=True): """Sets swish function as memory efficient (for training) or standard (for export)""" self._swish = MemoryEfficientSwish() if memory_efficient else Swish() for block in self._blocks: block.set_swish(memory_efficient)
torch.cuda.empty_cache() ROW_AXIS = 2 COL_AXIS = 3 CHANNEL_AXIS = 1 BATCH_AXIS = 0 activation_dict = {'relu': nn.ReLU(), 'relu6': nn.ReLU6(), 'prelu': nn.PReLU(), 'hardtanh': nn.Hardtanh(), 'tanh': nn.Tanh(), 'elu': nn.ELU(), 'selu': nn.SELU(), 'gelu': nn.GELU(), 'glu': nn.GLU(), 'swish': Swish(), 'sigmoid': nn.Sigmoid(), 'leakyrelu': nn.LeakyReLU(), # 'hardsigmoid': nn.Hardsigmoid(), 'softsign': nn.Softsign(), 'softplus': nn.Softplus, 'softmin': nn.Softmin(), 'softmax': nn.Softmax()} optimizer_dict = {'adadelta': optim.Adadelta, 'adagrad': optim.Adagrad, 'adam': optim.Adam, 'adamw': optim.AdamW, 'sparse_adam': optim.SparseAdam, 'adamax': optim.Adamax, 'asgd': optim.ASGD, 'sgd': optim.SGD,