def __init__(self, *args, **kwargs): super().__init__() self.net = SE3Transformer(*args, **kwargs) self.to_refined_coords_delta = nn.Linear(kwargs['dim'], 1)
dl = cycle(data, data_cond) # model model = Alphafold2( dim=256, pos_tokens=3, # N-term, C-alpha, C-term depth=1, heads=8, dim_head=64).to(DEVICE) refiner = SE3Transformer( num_tokens= 10, # 10 unique atoms ([N-term, C-alpha, C-term, C-beta, =O, -O], C, O, N, S ) dim=64, depth=2, input_degrees=1, num_degrees=2, output_degrees=2, reduce_dim_out=True) # optimizer dispersion_weight = 0.1 criterion = nn.MSELoss() optim = Adam(model.parameters(), lr=LEARNING_RATE) # training loop for _ in range(NUM_BATCHES): for _ in range(GRADIENT_ACCUMULATE_EVERY):
def __init__(self, *args, **kwargs): super().__init__() self.net = SE3Transformer(*args, **kwargs) self.sidechains_proj = nn.Parameter(torch.randn(1, kwargs['dim']))
def __init__(self, *, dim, max_seq_len=2048, depth=6, heads=8, dim_head=64, pos_token=3, num_tokens=constants.NUM_AMINO_ACIDS, num_embedds=constants.NUM_EMBEDDS_TR, max_num_msas=constants.MAX_NUM_MSA, max_num_templates=constants.MAX_NUM_TEMPLATES, attn_dropout=0., ff_dropout=0., reversible=False, sparse_self_attn=False, cross_attn_compress_ratio=1, msa_tie_row_attn=False, template_attn_depth=2): super().__init__() layers_sparse_attn = cast_tuple(sparse_self_attn, depth) self.token_emb = nn.Embedding(num_tokens, dim) self.pos_emb = nn.Embedding(max_seq_len, dim) self.pos_emb_ax = nn.Embedding(max_seq_len, dim) # multiple sequence alignment position embedding self.msa_pos_emb = nn.Embedding(max_seq_len, dim) self.msa_num_pos_emb = nn.Embedding(max_num_msas, dim) # template embedding self.template_dist_emb = nn.Embedding(constants.DISTOGRAM_BUCKETS, dim) self.template_num_pos_emb = nn.Embedding(max_num_templates, dim) self.template_pos_emb = nn.Embedding(max_seq_len, dim) self.template_pos_emb_ax = nn.Embedding(max_seq_len, dim) # template sidechain encoding self.sidechains_proj = nn.Parameter(torch.randn(1, dim)) self.template_sidechain_emb = SE3Transformer(dim=dim, dim_head=dim, heads=1, num_neighbors=12, depth=4, input_degrees=2, num_degrees=2, output_degrees=1, reversible=True) # custom embedding projection self.embedd_project = nn.Linear(num_embedds, dim) # main trunk modules prenorm = partial(PreNorm, dim) prenorm_cross = partial(PreNormCross, dim) template_layers = nn.ModuleList([]) for _ in range(template_attn_depth): template_layers.append( nn.ModuleList([ prenorm( AxialAttention(dim=dim, seq_len=max_seq_len, heads=heads, dim_head=dim_head, dropout=attn_dropout)), prenorm( AxialAttention(dim=dim, seq_len=max_seq_len, heads=heads, dim_head=dim_head, dropout=attn_dropout)), prenorm( Attention(dim=dim, seq_len=max_seq_len, heads=heads, dim_head=dim_head, dropout=attn_dropout)), prenorm(FeedForward(dim=dim, dropout=ff_dropout)) ])) self.template_attn_net = template_layers layers = nn.ModuleList([]) for _, layer_sparse_attn in zip(range(depth), layers_sparse_attn): # self attention, for both main sequence and msa layers.append( nn.ModuleList([ prenorm( AxialAttention(dim=dim, seq_len=max_seq_len, heads=heads, dim_head=dim_head, dropout=attn_dropout, sparse_attn=sparse_self_attn)), prenorm(FeedForward(dim=dim, dropout=ff_dropout)), prenorm( AxialAttention(dim=dim, seq_len=max_seq_len, heads=heads, dim_head=dim_head, dropout=attn_dropout, tie_row_attn=msa_tie_row_attn)), prenorm(FeedForward(dim=dim, dropout=ff_dropout)), ])) # cross attention, for main sequence -> msa and then msa -> sequence layers.append( nn.ModuleList([ prenorm_cross( Attention(dim=dim, seq_len=max_seq_len, heads=heads, dim_head=dim_head, dropout=attn_dropout, compress_ratio=cross_attn_compress_ratio)), prenorm(FeedForward(dim=dim, dropout=ff_dropout)), prenorm_cross( Attention(dim=dim, seq_len=max_seq_len, heads=heads, dim_head=dim_head, dropout=attn_dropout, compress_ratio=cross_attn_compress_ratio)), prenorm(FeedForward(dim=dim, dropout=ff_dropout)), ])) if not reversible: layers = nn.ModuleList(list( map(lambda t: t[:3], layers))) # remove last feed forward if not reversible trunk_class = SequentialSequence if not reversible else ReversibleSequence self.net = trunk_class(layers) # to output self.to_distogram_logits = nn.Sequential( nn.LayerNorm(dim), nn.Linear(dim, constants.DISTOGRAM_BUCKETS))