def __init__( self, config, ): super(P, self).__init__() self.device = torch.device(config.get('device')) self.hidden_size = \ config.get('th2vec_transformer_hidden_size') layers = [] layers += [ nn.Linear(2 * self.hidden_size, self.hidden_size), GeLU(), nn.Dropout(config.get('th2vec_mlp_dropout')), LayerNorm(self.hidden_size), ] for _ in range(4): layers += [ nn.Linear(self.hidden_size, self.hidden_size), GeLU(), # nn.Dropout(config.get('th2vec_mlp_dropout')), LayerNorm(self.hidden_size), ] layers += [ nn.Linear(self.hidden_size, 1), nn.Sigmoid(), ] self.layers = nn.Sequential(*layers)
def __init__( self, config, ): super(DP, self).__init__() self.device = torch.device(config.get('device')) self.hidden_size = \ config.get('th2vec_transformer_hidden_size') self._E = E(config) self.inner_cnj = nn.Sequential(*[ nn.Linear(self.hidden_size, self.hidden_size), GeLU(), nn.Dropout(0.1), LayerNorm(self.hidden_size), ]) self.inner_thr = nn.Sequential(*[ nn.Linear(self.hidden_size, self.hidden_size), GeLU(), nn.Dropout(0.1), LayerNorm(self.hidden_size), ]) self.head = nn.Sequential(*[ nn.Linear(2 * self.hidden_size, self.hidden_size), GeLU(), nn.Dropout(0.1), LayerNorm(self.hidden_size), nn.Linear(self.hidden_size, 1), nn.Sigmoid(), ])
def __init__( self, config, ): super(D, self).__init__() self.device = torch.device(config.get('device')) self.token_count = \ config.get('th2vec_token_count') self.theorem_length = \ config.get('th2vec_theorem_length') self.embedding_size = \ config.get('th2vec_cnn_embedding_size') self.hidden_size = \ config.get('th2vec_cnn_hidden_size') self.dropout = \ config.get('th2vec_cnn_dropout') self.input_embedding = nn.Embedding( self.token_count, self.embedding_size, ) # self.position_embedding = nn.Embedding( # self.theorem_length, self.embedding_size # ) layers = [] layers += [ LayerNorm(self.embedding_size), nn.Linear(self.embedding_size, self.hidden_size), ] n = self.theorem_length while n > 1: layers += [ Downsample(self.hidden_size, 3, 2), GeLU(), nn.Dropout(self.dropout), LayerNorm(self.hidden_size), ] n = n // 2 layers += [ nn.Linear(self.hidden_size, self.hidden_size), GeLU(), nn.Dropout(self.dropout), LayerNorm(self.hidden_size), nn.Linear(self.hidden_size, 1), nn.Sigmoid(), ] self.layers = nn.Sequential(*layers)
def __init__( self, sequence_max_length, hidden_size, attention_head_count, dropout=0.1, attn_mask=None, ): super(TransformerBlock, self).__init__() self.attention = nn.MultiheadAttention( hidden_size, attention_head_count, dropout=dropout, ) self.attention_layer_norm = nn.LayerNorm(hidden_size, eps=1e-12) self.mlp = nn.Sequential( nn.Linear(hidden_size, 4 * hidden_size), GeLU(), nn.Linear(4 * hidden_size, hidden_size), ) self.mlp_layer_norm = nn.LayerNorm(hidden_size, eps=1e-12) self._attn_mask = attn_mask
def __init__( self, config, ): super(VH, self).__init__() self.device = torch.device(config.get('device')) self.hidden_size = \ config.get('prooftrace_hidden_size') self.head_hidden_size = \ config.get('prooftrace_head_hidden_size') self.adapter = nn.Linear(self.hidden_size, self.head_hidden_size) self.value_head = nn.Sequential( nn.LayerNorm(self.head_hidden_size), nn.Linear( self.head_hidden_size, self.head_hidden_size, ), GeLU(), nn.Linear(self.head_hidden_size, 1), nn.ReLU(), )
def __init__( self, config, ): super(S, self).__init__() self.device = torch.device(config.get('device')) self.variable_count = \ config.get('sat_dataset_variable_count') self.embedding_size = \ config.get('sat_solver_transformer_embedding_size') self.hidden_size = \ config.get('sat_solver_transformer_hidden_size') self.attention_head_count = \ config.get('sat_solver_transformer_attention_head_count') self.layer_count = \ config.get('sat_solver_transformer_layer_count') self.embedding = nn.Embedding( self.variable_count + 1, self.embedding_size, ) layers = [] layers += [ nn.Linear(self.embedding_size, self.hidden_size), ] for _ in range(self.layer_count): layers += [ TransformerBlock( self.hidden_size, self.attention_head_count, dropout=0.1, ), ] head = [ nn.Linear(self.hidden_size, self.hidden_size), GeLU(), nn.Dropout(0.1), LayerNorm(self.hidden_size), nn.Linear(self.hidden_size, 1), nn.Sigmoid(), ] self.layers = nn.Sequential(*layers) self.head = nn.Sequential(*head)
def __init__( self, config, ): super(G, self).__init__() self.device = torch.device(config.get('device')) self.token_count = \ config.get('th2vec_token_count') self.theorem_length = \ config.get('th2vec_theorem_length') self.embedding_size = \ config.get('th2vec_cnn_embedding_size') self.hidden_size = \ config.get('th2vec_cnn_hidden_size') self.dropout = \ config.get('th2vec_cnn_dropout') layers = [] n = self.theorem_length while n > 1: layers += [ Upsample(self.hidden_size, 3, 2), GeLU(), nn.Dropout(self.dropout), LayerNorm(self.hidden_size), ] n = n // 2 layers += [ nn.Linear(self.hidden_size, self.token_count), nn.LogSoftmax(dim=2), ] self.layers = nn.Sequential(*layers)
def __init__( self, config, ): super(PH, self).__init__() self.device = torch.device(config.get('device')) self.sequence_length = \ config.get('prooftrace_sequence_length') self.hidden_size = \ config.get('prooftrace_hidden_size') self.head_hidden_size = \ config.get('prooftrace_head_hidden_size') self.action_head = nn.Sequential( nn.Linear( self.head_hidden_size, self.head_hidden_size, ), GeLU(), nn.LayerNorm(self.head_hidden_size), nn.Linear( self.head_hidden_size, len(PROOFTRACE_TOKENS) - len(PREPARE_TOKENS) ), nn.LogSoftmax(dim=2), ) self.left_ptr_heads = nn.Linear( self.head_hidden_size, self.head_hidden_size, ) self.left_ptr_hiddens = nn.Linear( self.head_hidden_size, self.head_hidden_size, ) self.left_ptr_proj = nn.Sequential( GeLU(), nn.LayerNorm(self.head_hidden_size), nn.Linear( self.head_hidden_size, 1, ), ) self.right_ptr_heads = nn.Linear( self.head_hidden_size, self.head_hidden_size, ) self.right_ptr_hiddens = nn.Linear( self.head_hidden_size, self.head_hidden_size, ) self.right_ptr_proj = nn.Sequential( GeLU(), nn.LayerNorm(self.head_hidden_size), nn.Linear( self.head_hidden_size, 1, ), ) self.log_softmax = nn.LogSoftmax(dim=2)