def __init__(self, size): super().__init__() self.condify = SineEmbedding(2 * size) self.skip = SineEmbedding(2 * size) self.blocks = nn.ModuleList( [nn.Conv2d(size, size, 3, padding=1) for idx in range(2)]) self.zero = ReZero(size)
def __init__(self, depth=4, level_repeat=2, scale=4, base=32, z=32): super().__init__() self.first = nn.Linear(z, base) self.first_mean = nn.Linear(base, z) self.first_mean_factor = nn.Parameter( torch.zeros(1, z, requires_grad=True)) self.first_logvar = nn.Linear(base, z) self.first_logvar_factor = nn.Parameter( torch.zeros(1, z, requires_grad=True)) self.blocks = nn.ModuleList([ ResBlock(base, 3, depth=2) for idx in range(depth * level_repeat) ]) self.modifiers = nn.ModuleList([ nn.Conv2d(z, base, 1, bias=False) for idx in range(depth * level_repeat) ]) self.zeros = nn.ModuleList( [ReZero(base) for idx in range(depth * level_repeat)]) self.mean = nn.ModuleList( [z_project(2 * base, z) for idx in range(depth * level_repeat)]) self.logvar = nn.ModuleList( [z_project(2 * base, z) for idx in range(depth * level_repeat)]) self.mean_factor = nn.ParameterList([ nn.Parameter(torch.zeros(1, z, 1, 1, requires_grad=True)) for idx in range(depth * level_repeat) ]) self.logvar_factor = nn.ParameterList([ nn.Parameter(torch.zeros(1, z, 1, 1, requires_grad=True)) for idx in range(depth * level_repeat) ]) self.level_repeat = level_repeat self.scale = scale
def __init__(self, size, kernel_size, depth=1): super().__init__() self.blocks = nn.ModuleList([ nn.Conv2d(size, size, kernel_size, padding=kernel_size // 2) for idx in range(depth) ]) self.zero = ReZero(size)
def __init__(self, in_size, out_size, activation=None): super().__init__() self.activation = activation or nn.LeakyReLU(0.2) self.blocks = nn.ModuleList([ lr_equal(nn.Conv2d(in_size, out_size, 3, padding=1)), lr_equal(nn.Conv2d(out_size, out_size, 3, padding=1)) ]) self.zero = ReZero(out_size)
def __init__(self, base_channels=128, cond_size=512, channel_factors=None, block_depth=2, scale=50.0, activation=None, add_noise=True): super().__init__() self.channel_factors = channel_factors self.base_channels = base_channels self.block_depth = block_depth self.project = ProjectCoordinates(cond_size=cond_size, in_size=2, out_size=cond_size, scale=scale) self.project_local = LocalMapping(cond_size=cond_size) self.combine = lr_equal( nn.Conv2d(2 * cond_size, base_channels * channel_factors[0], 1)) self.blocks = nn.ModuleList([ IndependentStyleGAN2Block(in_factor * base_channels, out_factor * base_channels, cond_size=cond_size, depth=block_depth, activation=activation, kernel_size=3, add_noise=add_noise) for idx, (in_factor, out_factor) in enumerate( zip(channel_factors[:-1], channel_factors[1:])) ]) self.zeros = nn.ModuleList([ ReZero(3) for idx, (in_factor, out_factor) in enumerate( zip(channel_factors[:-1], channel_factors[1:])) ])
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.zeros = nn.ModuleList([ReZero() for idx in range(self.depth)])
def __init__(self, node_in_size, node_out_size, edge_in_size, edge_out_size, attention_size=32, heads=8, value_size=32, dropout=0.1, kernel_size=1, dilation=1, activation=nn.ReLU(), full=False, similarity=None): r"""Transformer block with materialized attention maps and "edge-features" on these attention maps. Warning: Implementing this on a hunch after seeing the AlphaFold blogpost - this may not train or make sense at all. Args: node_in_size (int): number of sequence feature maps. These corresponds to "node features" if one interprets an attention map as a soft adjacency matrix. node_out_size (int): number of sequence output feature maps. edge_in_size (int): number of feature maps of a materialized attention map. Interpreting the attention map as a soft adjacency matrix, its entries correspond to edges, to which we can attach learned edge features. edge_out_size (int): number of output feature maps of a materialized attention map. attention_size (int): size of vectors compared in dot-product attention. heads (int): number of attention heads. value_size (int): size of the value embedding. kernel_size (int): kernel size of the local block. dilation (int): dilation of the local block, if applicable. dropout (float): dropout of the transformer block. activation (nn.Module): nonlinear activation function. Defaults to ReLU. """ super().__init__() padding = kernel_size // 2 * dilation self.dropout = nn.Dropout(dropout) self.attention = MaterializedMultiHeadAttention( node_in_size, node_out_size, edge_in_size, edge_out_size, attention_size=attention_size, heads=heads, value_size=value_size, full=full, similarity=similarity) if node_in_size != node_out_size: self.project_node = nn.Conv1d(node_in_size, node_out_size, 1, bias=False) else: self.project_node = nn.Identity() if edge_in_size != edge_out_size: self.project_edge = nn.Conv2d(edge_in_size, edge_out_size, 1, bias=False) else: self.project_edge = nn.Identity() self.zero_node = nn.ModuleList( [ReZero(node_out_size), ReZero(node_out_size)]) self.zero_edge = nn.ModuleList( [ReZero(edge_out_size), ReZero(edge_out_size)]) self.node_mlp = nn.Sequential( nn.Conv1d(node_in_size, node_in_size, kernel_size, padding=padding, dilation=dilation), activation, nn.Conv1d(node_in_size, node_in_size, kernel_size, padding=padding, dilation=dilation), activation) self.edge_mlp = nn.Sequential( nn.Conv2d(edge_in_size, edge_in_size, kernel_size, padding=padding, dilation=dilation), activation, nn.Conv2d(edge_in_size, edge_in_size, kernel_size, padding=padding, dilation=dilation), activation)