예제 #1
0
    def __init__(self, config: Munch):
        r""" Init a new base-bert synapse.

            Args:
                config (:obj:`munch.Munch`, `required`): 
        """
        super(BertSynapseBase, self).__init__( config = config )

        # Hugging face config item.
        huggingface_config = BertConfig(    vocab_size=bittensor.__vocab_size__, 
                                            hidden_size=bittensor.__network_dim__, 
                                            num_hidden_layers=config.synapse.num_hidden_layers, 
                                            num_attention_heads=config.synapse.num_attention_heads, 
                                            intermediate_size=bittensor.__network_dim__, 
                                            is_decoder=False)

        # dendrite: (PKM layer) queries network using pooled embeddings as context.
        # [batch_size, -1] -> topk * [batch_size, bittensor.__network_dim__]
        self.dendrite = PKMDendrite( config, query_dim = bittensor.__network_dim__ )

        # encoder_layer: encodes tokenized sequences to network dim.
        # [batch_size, sequence_len] -> [batch_size, sequence_len, bittensor.__network_dim__]
        self.transformer = BertModel( huggingface_config, add_pooling_layer=True )

        # hidden_layer: transforms context and encoding to network_dim hidden units.
        # [batch_size, sequence_dim, bittensor.__network_dim__] -> [batch_size, sequence_len, bittensor.__network_dim__]
        self.hidden_layer = torch.nn.Linear( bittensor.__network_dim__, bittensor.__network_dim__ )

        # pooling_layer: transforms teh hidden layer into a pooled representation by taking the encoding of the first token
        # [batch_size, sequence_dim,  bittensor.__network_dim__] -> [batch_size, bittensor.__network_dim__]
        self.pooler = BertPooler( huggingface_config )

        self.to(self.device)
예제 #2
0
    def __init__(self, config: Munch):
        r""" Init a new ffnn synapse module.

            Args:
                config (:obj:`munch.Munch`, `required`): 
                    munched config class.
        """
        super(GPT2LMSynapse, self).__init__(config=config)

        # Build hugging face config.
        huggingface_config = GPT2Config(
            vocab_size=bittensor.__vocab_size__,
            n_embd=bittensor.__network_dim__,
            n_layer=config.synapse.n_layer,
            n_head=config.synapse.n_head,
            n_inner=config.synapse.n_inner,
            activation_function=config.synapse.activation_function,
            resid_pdrop=config.synapse.resid_pdrop,
            embd_pdrop=config.synapse.embd_pdrop,
            attn_pdrop=config.synapse.attn_pdrop,
            layer_norm_epsilon=config.synapse.layer_norm_epsilon,
            initializer_range=config.synapse.initializer_range,
            summary_type=config.synapse.summary_type,
            summary_use_proj=config.synapse.summary_use_proj,
            summary_activation=config.synapse.summary_activation,
            summary_proj_to_labels=config.synapse.summary_proj_to_labels,
            summary_first_dropout=config.synapse.summary_first_dropout,
        )

        # encoder_layer: encodes tokenized sequences to network dim.
        # [batch_size, sequence_len] -> [batch_size, sequence_len, bittensor.__network_dim__]
        self.transformer = GPT2Model(huggingface_config)

        # pooler_layer: pools the hidden units for use by the pkm dendrite rpc query.
        # [batch_size, bittensor.__network_dim__, sequence_len] -> [batch_size, bittensor.__network_dim__]
        self.pooler = GPT2Pooler(huggingface_config)

        # dendrite: (PKM layer) queries network using pooled embeddings as context.
        # [batch_size, bittensor.__network_dim__] -> topk * [batch_size, bittensor.__network_dim__]
        self.dendrite = PKMDendrite(config,
                                    query_dim=bittensor.__network_dim__)

        # hidden_layer: transforms context and encoding to network_dim hidden units.
        # [batch_size, sequence_dim, 2 * bittensor.__network_dim__] -> [batch_size, sequence_len, bittensor.__network_dim__]
        self.hidden_layer = torch.nn.Linear(bittensor.__network_dim__,
                                            bittensor.__network_dim__)

        # target_layer: maps from hidden layer to vocab dimension for each token. Used by MLM loss.
        # [batch_size, sequence_len, bittensor.__network_dim__] -> [batch_size, sequence_len, bittensor.__vocab_size__]
        self.target_layer = nn.Linear(bittensor.__network_dim__,
                                      bittensor.__vocab_size__,
                                      bias=False)

        # Loss function: MLM cross-entropy loss.
        # predicted: [batch_size, sequence_len, 1], targets: [batch_size, sequence_len, 1] -> [1]
        self.loss_fct = torch.nn.CrossEntropyLoss()

        self.to(self.device)
예제 #3
0
 def add_args( parser: argparse.ArgumentParser ):    
     r""" Add custom params to the parser.
     """
     parser.add_argument('--synapse.num_hidden_layers', default=2, type=int, 
                         help='Number of hidden layers in the Transformer encoder.')
     parser.add_argument('--synapse.num_attention_heads', default=2, type=int, 
                         help='Number of attention heads for each attention layer in the Transformer encoder.')
     parser.add_argument('--synapse.n_block_filter', default=100, type=int, help='Stale neurons are filtered after this many blocks.')
     PKMDendrite.add_args(parser)
예제 #4
0
 def add_args(parser: argparse.ArgumentParser):
     parser.add_argument(
         '--synapse.target_dim',
         default=10,
         type=int,
         help='Final logit layer dimension. i.e. 10 for MNIST.')
     parser = PKMDendrite.add_args(parser)
예제 #5
0
    def __init__(self, config: Munch):
        r""" Init a new ffnn synapse module.

            Args:
                config (:obj:`munch.Munch`, `required`): 
                    munch namespace config item.
        """
        super(FFNNSynapse, self).__init__(config=config)

        # transform_layer: transforms images to common dimension.
        # [batch_size, -1, -1, -1] -> [batch_size, self.transform_dim]
        self.transform = Normalize((0.1307, ), (0.3081, ), device=self.device)
        self.transform_pool = nn.AdaptiveAvgPool2d((28, 28))
        self.transform_conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.transform_conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.transform_drop = nn.Dropout2d()
        self.transform_dim = 320

        # context_layer: distills the remote_context from the transform layer.
        # [batch_size, transform_dim] -> [batch_size, bittensor.__network_dim__]
        self.context_layer1 = nn.Linear(self.transform_dim, 256)
        self.context_layer2 = nn.Linear(256, bittensor.__network_dim__)

        # hidden_layer: learns hidden units for network and target.
        # [batch_size, transform_dim + bittensor.__network_dim__] = [batch_size, bittensor.__network_dim__]
        self.hidden_layer1 = nn.Linear(
            self.transform_dim + bittensor.__network_dim__,
            bittensor.__network_dim__)
        self.hidden_layer2 = nn.Linear(bittensor.__network_dim__,
                                       bittensor.__network_dim__)

        # dendrite: (PKM layer) queries network using pooled embeddings as context.
        # [batch_size, -1] -> topk * [batch_size, bittensor.__network_dim__]
        self.dendrite = PKMDendrite(config,
                                    query_dim=bittensor.__network_dim__)

        # target_layer: Maps from hidden layer to target dimension
        # [batch_size, bittensor.__network_dim__] -> [batch_size, self.target_dim]
        self.target_layer1 = nn.Linear(bittensor.__network_dim__, 256)
        self.target_layer2 = nn.Linear(256, self.config.synapse.target_dim)

        self.to(self.device)
예제 #6
0
    def add_args(parser: argparse.ArgumentParser):
        r""" This function adds the configuration items for the DPN synapse.
        These args are use to instantiate a Dual Path model. 
        Instantiating a configuration with the defaults will yield a "shallow" DPN-26 configuration. 

        For deeper network configurations, it is possible to set the num_blocks parameter to (3, 4, 20, 3) for a
        DPN-92. 
        
        For DPN-98 set the following:
            in_planes: (160, 320, 640, 1280)
            out_planes: (256, 512, 1024, 2048)
            num_blocks: (3, 6, 20, 3)
            dense_depth: (16, 32, 32, 128)
        """
        def to_list(arg):
            return [int(i) for i in arg.split(",")]

        parser.add_argument('--synapse.in_planes',
                            default='160, 320, 640, 1280',
                            action="append",
                            type=to_list)
        parser.add_argument('--synapse.out_planes',
                            default='256, 512, 1024, 2048',
                            action="append",
                            type=to_list)
        parser.add_argument('--synapse.num_blocks',
                            default='3, 6, 20, 3',
                            action="append",
                            type=to_list)
        parser.add_argument('--synapse.dense_depth',
                            default='16, 32, 32, 128',
                            action="append",
                            type=to_list)
        parser.add_argument(
            '--synapse.target_dim',
            default=10,
            type=int,
            help='Final logit layer dimension. i.e. 10 for CIFAR-10.')
        parser = PKMDendrite.add_args(parser)
예제 #7
0
 def check_config(config: Munch):
     assert config.synapse.target_dim > 0, "target dimension must be greater than 0."
     config = PKMDendrite.check_config(config)
예제 #8
0
class FFNNSynapse(Synapse):
    """ Simple feed forward NN for images.
    """
    def __init__(self, config: Munch):
        r""" Init a new ffnn synapse module.

            Args:
                config (:obj:`munch.Munch`, `required`): 
                    munch namespace config item.
        """
        super(FFNNSynapse, self).__init__(config=config)

        # transform_layer: transforms images to common dimension.
        # [batch_size, -1, -1, -1] -> [batch_size, self.transform_dim]
        self.transform = Normalize((0.1307, ), (0.3081, ), device=self.device)
        self.transform_pool = nn.AdaptiveAvgPool2d((28, 28))
        self.transform_conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.transform_conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.transform_drop = nn.Dropout2d()
        self.transform_dim = 320

        # context_layer: distills the remote_context from the transform layer.
        # [batch_size, transform_dim] -> [batch_size, bittensor.__network_dim__]
        self.context_layer1 = nn.Linear(self.transform_dim, 256)
        self.context_layer2 = nn.Linear(256, bittensor.__network_dim__)

        # hidden_layer: learns hidden units for network and target.
        # [batch_size, transform_dim + bittensor.__network_dim__] = [batch_size, bittensor.__network_dim__]
        self.hidden_layer1 = nn.Linear(
            self.transform_dim + bittensor.__network_dim__,
            bittensor.__network_dim__)
        self.hidden_layer2 = nn.Linear(bittensor.__network_dim__,
                                       bittensor.__network_dim__)

        # dendrite: (PKM layer) queries network using pooled embeddings as context.
        # [batch_size, -1] -> topk * [batch_size, bittensor.__network_dim__]
        self.dendrite = PKMDendrite(config,
                                    query_dim=bittensor.__network_dim__)

        # target_layer: Maps from hidden layer to target dimension
        # [batch_size, bittensor.__network_dim__] -> [batch_size, self.target_dim]
        self.target_layer1 = nn.Linear(bittensor.__network_dim__, 256)
        self.target_layer2 = nn.Linear(256, self.config.synapse.target_dim)

        self.to(self.device)

    @staticmethod
    def add_args(parser: argparse.ArgumentParser):
        parser.add_argument(
            '--synapse.target_dim',
            default=10,
            type=int,
            help='Final logit layer dimension. i.e. 10 for MNIST.')
        parser = PKMDendrite.add_args(parser)

    @staticmethod
    def check_config(config: Munch):
        assert config.synapse.target_dim > 0, "target dimension must be greater than 0."
        config = PKMDendrite.check_config(config)

    def forward_image(self, images: torch.Tensor):
        r""" Forward image inputs through the FFNN synapse .

            Args:
                inputs (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_dim, channels, rows, cols)`, `required`): 
                    Image tensors produced by calling PIL.toTensor() and with sequence dimension.
            
            Returns:
                hidden (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_dim, bittensor.__network_dim__)`, `required`): 
                    Hidden layer encoding produced by using local_context.
        """
        # images: remove sequence dimension from images.
        # images.shape = [batch_size, channels, rows, cols]
        images = images.view(images.shape[0] * images.shape[1],
                             images.shape[2], images.shape[3],
                             images.shape[4]).to(self.device)

        # hidden: hidden layer using local_contextcontext for local computation only.
        # hidden.shape = [batch_size, __network_dim__]
        hidden = self.local_forward(images=images).local_hidden

        # hidden: re-add sequence dimension to outputs.
        # hidden.shape = [batch_size, sequence_dim, __network_dim__]
        hidden = torch.unsqueeze(hidden, 1)

        return hidden

    def local_forward(self,
                      images: torch.Tensor,
                      targets: torch.Tensor = None) -> SimpleNamespace:
        r""" Forward pass non-sequential image inputs and targets through the FFNN Synapse. The call does not make 
        remote queries to the network and returns only local hidden, target and losses.

            Args:
                images (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, channels, rows, cols)`, `required`): 
                    PIL.toTensor() encoded images.

                targets (:obj:`torch.FloatTensor`  of shape :obj:`(batch_size, target_dim)`, `optional`, defaults to None): 
                    Image labels.

            Returns:
                SimpleNamespace ( 
                    local_context (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, bittensor.__network_dim__)`, `required`):
                        Pre-Hidden layer context, trained to match the remote context.

                    local_hidden (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, bittensor.__network_dim__)`, `required`):
                        Hidden layer produced from the context.

                    local_target (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, target_dim)`, `optional`):
                        FFNN Target predictions using local_context. 

                    local_target_loss (:obj:`torch.FloatTensor` of shape :obj:`(1)`, `optional`): 
                        FFNN Classification loss using local_context.

                    local_accuracy (:obj:`torch.FloatTensor` of shape :obj:`(1)`, `optional`): 
                        Accuracy of target predictions.
                )
        """

        # Return vars to be filled.
        output = SimpleNamespace()

        # transform: transform images to common shape.
        # transform.shape = [batch_size, self.transform_dim]
        transform = self.transform(images).to(self.device)
        transform = F.relu(F.max_pool2d(self.transform_conv1(transform), 2))
        transform = F.relu(
            F.max_pool2d(self.transform_drop(self.transform_conv2(transform)),
                         2))
        output.transform = transform.view(-1, self.transform_dim)

        # local_context: distillation model for remote_context.
        # local_context.shape = [batch_size, bittensor.__network_dim__]
        local_context = self.context_layer1(output.transform.detach())
        output.local_context = self.context_layer2(local_context)

        # local_hidden: hidden layer encoding using local_context.
        # local_hidden.shape = [batch_size, bittensor.__network_dim__]
        local_hidden = torch.cat(
            (output.transform, output.local_context.detach()), dim=1)
        local_hidden = F.relu(self.hidden_layer1(local_hidden))
        output.local_hidden = F.relu(self.hidden_layer2(local_hidden))

        if targets is not None:
            # local_target: projection of local_hidden onto target dimension.
            # local_target.shape = [batch_size, target_dim]
            targets.to(self.device)
            local_target = self.target_layer1(output.local_hidden)
            local_target = self.target_layer2(local_target)
            output.local_target = F.log_softmax(local_target, dim=1)

            # local_target_loss: loss between local_target and passed targets.
            # local_target_loss.shape = [1]
            output.local_target_loss = F.nll_loss(output.local_target, targets)

            # Record extra metadata accuracy.
            max_logit = local_target.data.max(1, keepdim=True)[1]
            correct = max_logit.eq(targets.data.view_as(max_logit)).sum()
            output.local_accuracy = (100.0 * correct) / targets.shape[0]

        return output

    def remote_forward(self,
                       neuron: Neuron,
                       images: torch.Tensor,
                       targets: torch.Tensor = None) -> SimpleNamespace:
        """
            Forward pass non-sequential image inputs and targets through the remote context of the synapse. The call
            makes RPC queries accross the network using the passed neuron's metagraph and dendrite.
            
            Args:
                neuron (:obj: `bittensor.Neuron`, `required`):
                    Bittensor neuron, used for making queries to the remote network.

                images (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, channels, rows, cols)`, `required`): 
                    PIL.toTensor() encoded images.
                                
                targets (:obj:`torch.FloatTensor`  of shape :obj:`(batch_size, target_dim)`, `optional`, defaults to None): 
                    Image labels.
            
            Returns:
                self.local_forward() + SimpleNamespace ( 

                    dendrite (:obj:`SimpleNamespace`, `required`): 
                        Outputs from the pkm dendrite remote call.

                    distillation_loss (:obj:`torch.FloatTensor` of shape :obj:`(1)`, `optional`): 
                        Distillation loss between the local and remote context.

                    remote_hidden (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, bittensor.__network_dim__)`, `optional`): 
                        Hidden layer encoding produced using the remote context.

                    remote_target (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, target_dim)`, `optional`):
                        FFNN Target predictions using the remote_context.

                    remote_target_loss (:obj:`torch.FloatTensor` of shape :obj:`(1)`, `optional`):
                        FFNN Classification loss using the remote_context.
                )
        """
        # Call the local forward pass.
        # output = bittensor.SynapseOutput
        output = self.local_forward(images, targets)

        # Make remote queries using the PKMDendrite.
        # remote_context: responses from a bittensor remote network call.
        # remote_context.shape = [batch_size, bittensor.__network_dim__]
        images = torch.unsqueeze(images, 1)
        output.dendrite = self.dendrite.forward_image(neuron, images,
                                                      output.local_hidden)
        remote_context = torch.squeeze(output.dendrite.response,
                                       1).to(self.device)

        # Distill the local context to match the remote context.
        # distillation_loss: distillation loss between local_context and remote_context
        # distillation_loss.shape = [1]
        output.distillation_loss = F.mse_loss(output.local_context,
                                              remote_context.detach())

        # remote_hidden: hidden layer encoding using remote_context.
        # remote_hidden.shape = [batch_size, bittensor.__network_dim__]
        remote_hidden = torch.cat([output.transform, remote_context], dim=1)
        remote_hidden = self.hidden_layer1(remote_hidden)
        output.remote_hidden = self.hidden_layer2(remote_hidden)

        if targets is not None:
            # Project hidden units onto the targets.
            # remote_target: projection of remote_hidden onto target dimension.
            # remote_target.shape = [batch_size, target_dim]
            remote_target = self.target_layer1(remote_hidden)
            remote_target = self.target_layer2(remote_target)
            output.remote_target = F.log_softmax(remote_target, dim=1)

            # Compute the target loss.
            # remote_target_loss: loss between remote_target and passed targets.
            # remote_target_loss.shape = [1]
            output.remote_target_loss = F.nll_loss(output.remote_target,
                                                   targets)

            # Add extra metrics
            # Record extra metadata accuracy.
            max_logit = output.remote_target.data.max(1, keepdim=True)[1]
            correct = max_logit.eq(targets.data.view_as(max_logit)).sum()
            output.remote_accuracy = (100.0 * correct) / targets.shape[0]

        return output
예제 #9
0
class GPT2LMSynapse(Synapse):
    """ A Bittensor Synapse training GPT2 with Masked Language Modelling (MLM)
    """
    def __init__(self, config: Munch):
        r""" Init a new ffnn synapse module.

            Args:
                config (:obj:`munch.Munch`, `required`): 
                    munched config class.
        """
        super(GPT2LMSynapse, self).__init__(config=config)

        # Build hugging face config.
        huggingface_config = GPT2Config(
            vocab_size=bittensor.__vocab_size__,
            n_embd=bittensor.__network_dim__,
            n_layer=config.synapse.n_layer,
            n_head=config.synapse.n_head,
            n_inner=config.synapse.n_inner,
            activation_function=config.synapse.activation_function,
            resid_pdrop=config.synapse.resid_pdrop,
            embd_pdrop=config.synapse.embd_pdrop,
            attn_pdrop=config.synapse.attn_pdrop,
            layer_norm_epsilon=config.synapse.layer_norm_epsilon,
            initializer_range=config.synapse.initializer_range,
            summary_type=config.synapse.summary_type,
            summary_use_proj=config.synapse.summary_use_proj,
            summary_activation=config.synapse.summary_activation,
            summary_proj_to_labels=config.synapse.summary_proj_to_labels,
            summary_first_dropout=config.synapse.summary_first_dropout,
        )

        # encoder_layer: encodes tokenized sequences to network dim.
        # [batch_size, sequence_len] -> [batch_size, sequence_len, bittensor.__network_dim__]
        self.transformer = GPT2Model(huggingface_config)

        # pooler_layer: pools the hidden units for use by the pkm dendrite rpc query.
        # [batch_size, bittensor.__network_dim__, sequence_len] -> [batch_size, bittensor.__network_dim__]
        self.pooler = GPT2Pooler(huggingface_config)

        # dendrite: (PKM layer) queries network using pooled embeddings as context.
        # [batch_size, bittensor.__network_dim__] -> topk * [batch_size, bittensor.__network_dim__]
        self.dendrite = PKMDendrite(config,
                                    query_dim=bittensor.__network_dim__)

        # hidden_layer: transforms context and encoding to network_dim hidden units.
        # [batch_size, sequence_dim, 2 * bittensor.__network_dim__] -> [batch_size, sequence_len, bittensor.__network_dim__]
        self.hidden_layer = torch.nn.Linear(bittensor.__network_dim__,
                                            bittensor.__network_dim__)

        # target_layer: maps from hidden layer to vocab dimension for each token. Used by MLM loss.
        # [batch_size, sequence_len, bittensor.__network_dim__] -> [batch_size, sequence_len, bittensor.__vocab_size__]
        self.target_layer = nn.Linear(bittensor.__network_dim__,
                                      bittensor.__vocab_size__,
                                      bias=False)

        # Loss function: MLM cross-entropy loss.
        # predicted: [batch_size, sequence_len, 1], targets: [batch_size, sequence_len, 1] -> [1]
        self.loss_fct = torch.nn.CrossEntropyLoss()

        self.to(self.device)

    @staticmethod
    def add_args(parser: argparse.ArgumentParser):
        r""" Add custom params to the parser.
        """
        parser.add_argument(
            '--synapse.n_head',
            default=1,
            type=int,
            help=
            'Number of attention heads for each attention layer in the Transformer encoder.'
        )
        parser.add_argument(
            '--synapse.n_layer',
            default=2,
            type=int,
            help='Number of hidden layers in the Transformer encoder.')
        parser.add_argument(
            '--synapse.n_inner',
            default=8,
            type=int,
            help=
            'The dimensionality of the inner feed-forward layers. :obj:`None` will set it to 4 times n_embd'
        )
        parser.add_argument(
            '--synapse.activation_function',
            default='gelu_new',
            type=str,
            help=
            'Activation function, to be selected in the list :obj:`["relu", "silu", "gelu", "tanh", "gelu_new"]'
        )
        parser.add_argument('--synapse.resid_pdrop',
                            default=0.1,
                            type=float,
                            help='GPT residual dropout probabilit.')
        parser.add_argument('--synapse.embd_pdrop',
                            default=0.1,
                            type=float,
                            help='GPT embedding dropout probability.')
        parser.add_argument('--synapse.attn_pdrop',
                            default=0.1,
                            type=float,
                            help='GPT attention dropout probability.')
        parser.add_argument(
            '--synapse.layer_norm_epsilon',
            default=1e-05,
            type=float,
            help='GPT the epsilon to use in the layer normalization layers')
        parser.add_argument(
            '--synapse.summary_type',
            default='cls_index',
            type=str,
            help=
            'Supply a Tensor of classification token position (like GPT/GPT-2).'
        )
        parser.add_argument(
            '--synapse.initializer_range',
            default=0.02,
            type=float,
            help=
            'The standard deviation of the truncated_normal_initializer for initializing all weight matrices.'
        )
        parser.add_argument(
            '--synapse.summary_use_proj',
            default=True,
            type=bool,
            help=
            'Whether or not to add a projection after the vector extraction.')
        parser.add_argument(
            '--synapse.summary_activation',
            type=str,
            help=
            'Pass "tanh" for a tanh activation to the output, any other value will result in no activation.'
        )
        parser.add_argument(
            '--synapse.summary_proj_to_labels',
            default=True,
            type=bool,
            help=
            'Whether the projection outputs should have config.num_labels or config.hidden_size classes.'
        )
        parser.add_argument(
            '--synapse.summary_first_dropout',
            default=0.1,
            type=float,
            help=
            'The dropout ratio to be used after the projection and activation.'
        )
        parser.add_argument(
            '--synapse.n_block_filter',
            default=100,
            type=int,
            help='Stale neurons are filtered after this many blocks.')
        PKMDendrite.add_args(parser)

    @staticmethod
    def check_config(config: Munch):
        pass

    def forward_text(self, inputs: torch.LongTensor):
        """ Local forward inputs through the MLM GPT Synapse.

            Args:
                inputs (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_len)`, `required`): 
                    Batch_size length list of tokenized sentences.
            
            Returns:
                hidden (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_len, bittensor.__network_dim__)`, `required`): 
                    Hidden layer representation produced using the local_context.
        """
        hidden = self.local_forward(inputs=inputs.to(self.device),
                                    training=False).local_hidden
        return hidden

    def local_forward(self,
                      inputs: torch.LongTensor,
                      training: bool = True) -> SimpleNamespace:
        r""" Forward pass through GPT synapse.

            Args:
                inputs (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_len)`, `required`): 
                    Batch_size length list of text sentences.

                training (:obj:`bool')`, `optional`, defaults to True):
                    Switch to True if this forward pass computes an MLM loss.

            SimpleNamespace {
                    local_context (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_len, bittensor.__network_dim__)`, `required`):
                        Hidden layer context.

                    local_hidden (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_len, bittensor.__network_dim__)`, `required`):
                        Hidden layer encoding produced using local_context.

                    local_target (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_len, bittensor.__vocab_size__)`, `optional`):
                        GPT MLM Target predictions produced using local_context. 

                    local_target_loss (:obj:`torch.FloatTensor` of shape :obj:`(1)`, `optional`): 
                        GPT MLM loss using local_context.
                }
        """
        # Return vars to be filled.
        output = SimpleNamespace()

        # local_context: distilled version of remote_context.
        # local_context.shape = [batch_size, sequence_len, bittensor.__network_dim__]
        output.local_context = self.transformer(
            input_ids=inputs, return_dict=True).last_hidden_state

        # local_hidden: hidden layer encoding of sequence with local_context.
        # local_hidden.shape = [batch_size, sequence_len, bittensor.__network_dim__]
        output.local_hidden = self.hidden_layer(output.local_context)

        if training:
            # local_target: projection of local_hidden onto target dimension.
            # local_target.shape = [batch_size, sequence_len, bittensor.__vocab_size__]
            output.local_target = self.target_layer(output.local_hidden)

            # local_target_loss: MLM loss between local_target and passed targets.
            # local_target_loss.shape = [1]
            shift_logits = output.local_target[..., :-1, :].contiguous()
            shift_labels = inputs[..., 1:].contiguous()
            output.local_target_loss = self.loss_fct(
                shift_logits.view(-1, shift_logits.size(-1)),
                shift_labels.view(-1))

        return output

    def remote_forward(self, neuron, inputs, training) -> SimpleNamespace:
        """ Forward pass inputs and labels through the GPT2 module.


        Args:
            neuron (:obj: `bittensor.Neuron`, `required`):
                    Bittensor neuron, used for making queries to the remote network.

            inputs (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_len)`, `required`): 
                    Batch_size length list of text sentences.

            training (:obj:`bool')`, `optional`, defaults to True):
                Switch to True if this forward pass computes an MLM loss.

        Returns:
            self.local_forward() + SimpleNamespace ( 

                    remote_hidden (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_len, bittensor.__network_dim__)`, `optional`): 
                        Hidden layer encoding produced using the remote_context.

                    remote_target (:obj:`torch.FloatTensor` of shape :obj:`(batch_size,  bittensor.__vocab_size__)`, `optional`):
                        GPT MLM Target predictions using the remote_context.

                    remote_target_loss (:obj:`torch.FloatTensor` of shape :obj:`(1)`, `optional`):
                        GPT MLM loss using the remote_context.

                    distillation_loss (:obj:`torch.FloatTensor` of shape :obj:`(1)`, `optional`): 
                        Distillation loss between local_context and remote_context.

                    dendrite (:obj:`SimpleNamespace`, `required`): 
                        Outputs from the pkm dendrite.
            )
        """
        # Run the local model.
        # output = SimpleNamespace
        output = self.local_forward(inputs, training)

        # pooled: pooled hidden layer from local run, used as our query context.
        # pooled.shape = [batch_size, bittensor.__network_dim__]
        pooled = self.pooler(output.local_hidden.detach())

        # remote_context: joined responses from a dendrite.forward_text call.
        # remote_context.shape = [batch_size, sequence_len, bittensor.__network_dim__]
        output.dendrite = self.dendrite.forward_text(neuron,
                                                     inputs.to(self.device),
                                                     pooled)
        remote_context = output.dendrite.response

        # distillation_loss: distillation loss between local_context and remote_context
        # distillation_loss.shape = [1]
        output.distillation_loss = F.mse_loss(output.local_context,
                                              remote_context.detach())

        # remote_hidden: hidden layer encoding using remote_context.
        # remote_hidden.shape = [batch_size, sequence_len, bittensor.__network_dim__]
        output.remote_hidden = self.hidden_layer(remote_context)

        if training:
            # remote_target: projection of remote_hidden onto target dimension.
            # remote_target.shape = [batch_size, sequence_len, bittensor.__vocab_size__]
            output.remote_target = self.target_layer(output.remote_hidden)

            # remote_target_loss: MLM loss between remote_target and passed targets.
            # remote_target_loss.shape = [1]
            shift_logits = output.remote_target[..., :-1, :].contiguous()
            shift_labels = inputs[..., 1:].contiguous()
            output.remote_target_loss = self.loss_fct(
                shift_logits.view(-1, shift_logits.size(-1)),
                shift_labels.view(-1))

        return output
예제 #10
0
 def add_args(parser: argparse.ArgumentParser):
     r""" Add custom params to the parser.
     """
     parser.add_argument(
         '--synapse.n_head',
         default=1,
         type=int,
         help=
         'Number of attention heads for each attention layer in the Transformer encoder.'
     )
     parser.add_argument(
         '--synapse.n_layer',
         default=2,
         type=int,
         help='Number of hidden layers in the Transformer encoder.')
     parser.add_argument(
         '--synapse.n_inner',
         default=8,
         type=int,
         help=
         'The dimensionality of the inner feed-forward layers. :obj:`None` will set it to 4 times n_embd'
     )
     parser.add_argument(
         '--synapse.activation_function',
         default='gelu_new',
         type=str,
         help=
         'Activation function, to be selected in the list :obj:`["relu", "silu", "gelu", "tanh", "gelu_new"]'
     )
     parser.add_argument('--synapse.resid_pdrop',
                         default=0.1,
                         type=float,
                         help='GPT residual dropout probabilit.')
     parser.add_argument('--synapse.embd_pdrop',
                         default=0.1,
                         type=float,
                         help='GPT embedding dropout probability.')
     parser.add_argument('--synapse.attn_pdrop',
                         default=0.1,
                         type=float,
                         help='GPT attention dropout probability.')
     parser.add_argument(
         '--synapse.layer_norm_epsilon',
         default=1e-05,
         type=float,
         help='GPT the epsilon to use in the layer normalization layers')
     parser.add_argument(
         '--synapse.summary_type',
         default='cls_index',
         type=str,
         help=
         'Supply a Tensor of classification token position (like GPT/GPT-2).'
     )
     parser.add_argument(
         '--synapse.initializer_range',
         default=0.02,
         type=float,
         help=
         'The standard deviation of the truncated_normal_initializer for initializing all weight matrices.'
     )
     parser.add_argument(
         '--synapse.summary_use_proj',
         default=True,
         type=bool,
         help=
         'Whether or not to add a projection after the vector extraction.')
     parser.add_argument(
         '--synapse.summary_activation',
         type=str,
         help=
         'Pass "tanh" for a tanh activation to the output, any other value will result in no activation.'
     )
     parser.add_argument(
         '--synapse.summary_proj_to_labels',
         default=True,
         type=bool,
         help=
         'Whether the projection outputs should have config.num_labels or config.hidden_size classes.'
     )
     parser.add_argument(
         '--synapse.summary_first_dropout',
         default=0.1,
         type=float,
         help=
         'The dropout ratio to be used after the projection and activation.'
     )
     parser.add_argument(
         '--synapse.n_block_filter',
         default=100,
         type=int,
         help='Stale neurons are filtered after this many blocks.')
     PKMDendrite.add_args(parser)
예제 #11
0
    def __init__(
        self,
        config: Munch,
        neuron: Neuron,
    ):
        r""" Init a new DPN synapse module.

            Args:
                config (:obj: `munch.Munch`, `required`)
                    munch namespace config item.

                 neuron (:obj:`bittensor.Neuron`, `required`): 
                    bittensor neuron object. 
        """
        super(DPNSynapse, self).__init__(config=config, neuron=neuron)

        in_planes, out_planes = config.synapse.in_planes, config.synapse.out_planes
        num_blocks, dense_depth = config.synapse.num_blocks, config.synapse.dense_depth

        # Transform Network
        """ Transform network.
                Layers take in image inputs normalizes them and applies 
                4 convolutional layers. 
            Image encoder: transforms PIL-encoded tensors to a common shape.
            [batch_size, channels, rows, cols] -> [batch_size, -1, -1, -1] 

            Output: [batch_size, self.transform_dim (9728)]
        """
        self.transform = Normalize((0.1307, ), (0.3081, ), device=self.device)
        self.adaptive_pool = nn.AdaptiveAvgPool2d((32, 32))
        self.transform_conv1 = nn.Conv2d(3,
                                         64,
                                         kernel_size=3,
                                         stride=1,
                                         padding=1,
                                         bias=False)
        self.transform_bn1 = nn.BatchNorm2d(64)
        self.last_planes = 64
        self.transform_layer1 = self._make_layer(in_planes[0],
                                                 out_planes[0],
                                                 num_blocks[0],
                                                 dense_depth[0],
                                                 stride=1)
        self.transform_layer2 = self._make_layer(in_planes[1],
                                                 out_planes[1],
                                                 num_blocks[1],
                                                 dense_depth[1],
                                                 stride=2)
        self.transform_layer3 = self._make_layer(in_planes[2],
                                                 out_planes[2],
                                                 num_blocks[2],
                                                 dense_depth[2],
                                                 stride=1)
        self.transform_layer4 = self._make_layer(in_planes[3],
                                                 out_planes[3],
                                                 num_blocks[3],
                                                 dense_depth[3],
                                                 stride=2)
        self.transform_dim = (out_planes[3] * 4) + ((
            (num_blocks[3] + 1) * 4) * dense_depth[3])

        # dendrite: (PKM layer) queries network using pooled embeddings as context.
        # [batch_size, -1] -> topk * [batch_size, bittensor.__network_dim__]
        self.dendrite = PKMDendrite(config,
                                    neuron,
                                    query_dim=self.transform_dim)

        # Context layers.
        """
            Distillation model for remote context. This layer takes input 
            coming from transform layer, and runs it through 3 linear layers,
            projecting it to bittensor.__network_dim__.  
        """
        self.context_layer1 = nn.Linear(self.transform_dim, 512)
        self.context_layer2 = nn.Linear(512, 256)
        self.context_layer3 = nn.Linear(256, bittensor.__network_dim__)

        # hidden layer.
        self.hidden_layer1 = nn.Linear(
            self.transform_dim + bittensor.__network_dim__, 512)
        self.hidden_layer2 = nn.Linear(512, 256)
        self.hidden_layer3 = nn.Linear(256, bittensor.__network_dim__)

        # Layers to project target down to target size passed by config
        # (number of classes)
        self.target_layer1 = nn.Linear(bittensor.__network_dim__, 128)
        self.target_layer2 = nn.Linear(128, self.config.synapse.target_dim)

        self.to(self.device)
예제 #12
0
class DPNSynapse(Synapse):
    """ Bittensor endpoint trained on PIL images to detect objects using an DPN.
    """
    def __init__(
        self,
        config: Munch,
        neuron: Neuron,
    ):
        r""" Init a new DPN synapse module.

            Args:
                config (:obj: `munch.Munch`, `required`)
                    munch namespace config item.

                 neuron (:obj:`bittensor.Neuron`, `required`): 
                    bittensor neuron object. 
        """
        super(DPNSynapse, self).__init__(config=config, neuron=neuron)

        in_planes, out_planes = config.synapse.in_planes, config.synapse.out_planes
        num_blocks, dense_depth = config.synapse.num_blocks, config.synapse.dense_depth

        # Transform Network
        """ Transform network.
                Layers take in image inputs normalizes them and applies 
                4 convolutional layers. 
            Image encoder: transforms PIL-encoded tensors to a common shape.
            [batch_size, channels, rows, cols] -> [batch_size, -1, -1, -1] 

            Output: [batch_size, self.transform_dim (9728)]
        """
        self.transform = Normalize((0.1307, ), (0.3081, ), device=self.device)
        self.adaptive_pool = nn.AdaptiveAvgPool2d((32, 32))
        self.transform_conv1 = nn.Conv2d(3,
                                         64,
                                         kernel_size=3,
                                         stride=1,
                                         padding=1,
                                         bias=False)
        self.transform_bn1 = nn.BatchNorm2d(64)
        self.last_planes = 64
        self.transform_layer1 = self._make_layer(in_planes[0],
                                                 out_planes[0],
                                                 num_blocks[0],
                                                 dense_depth[0],
                                                 stride=1)
        self.transform_layer2 = self._make_layer(in_planes[1],
                                                 out_planes[1],
                                                 num_blocks[1],
                                                 dense_depth[1],
                                                 stride=2)
        self.transform_layer3 = self._make_layer(in_planes[2],
                                                 out_planes[2],
                                                 num_blocks[2],
                                                 dense_depth[2],
                                                 stride=1)
        self.transform_layer4 = self._make_layer(in_planes[3],
                                                 out_planes[3],
                                                 num_blocks[3],
                                                 dense_depth[3],
                                                 stride=2)
        self.transform_dim = (out_planes[3] * 4) + ((
            (num_blocks[3] + 1) * 4) * dense_depth[3])

        # dendrite: (PKM layer) queries network using pooled embeddings as context.
        # [batch_size, -1] -> topk * [batch_size, bittensor.__network_dim__]
        self.dendrite = PKMDendrite(config,
                                    neuron,
                                    query_dim=self.transform_dim)

        # Context layers.
        """
            Distillation model for remote context. This layer takes input 
            coming from transform layer, and runs it through 3 linear layers,
            projecting it to bittensor.__network_dim__.  
        """
        self.context_layer1 = nn.Linear(self.transform_dim, 512)
        self.context_layer2 = nn.Linear(512, 256)
        self.context_layer3 = nn.Linear(256, bittensor.__network_dim__)

        # hidden layer.
        self.hidden_layer1 = nn.Linear(
            self.transform_dim + bittensor.__network_dim__, 512)
        self.hidden_layer2 = nn.Linear(512, 256)
        self.hidden_layer3 = nn.Linear(256, bittensor.__network_dim__)

        # Layers to project target down to target size passed by config
        # (number of classes)
        self.target_layer1 = nn.Linear(bittensor.__network_dim__, 128)
        self.target_layer2 = nn.Linear(128, self.config.synapse.target_dim)

        self.to(self.device)

    @staticmethod
    def add_args(parser: argparse.ArgumentParser):
        r""" This function adds the configuration items for the DPN synapse.
        These args are use to instantiate a Dual Path model. 
        Instantiating a configuration with the defaults will yield a "shallow" DPN-26 configuration. 

        For deeper network configurations, it is possible to set the num_blocks parameter to (3, 4, 20, 3) for a
        DPN-92. 
        
        For DPN-98 set the following:
            in_planes: (160, 320, 640, 1280)
            out_planes: (256, 512, 1024, 2048)
            num_blocks: (3, 6, 20, 3)
            dense_depth: (16, 32, 32, 128)
        """
        def to_list(arg):
            return [int(i) for i in arg.split(",")]

        parser.add_argument('--synapse.in_planes',
                            default='160, 320, 640, 1280',
                            action="append",
                            type=to_list)
        parser.add_argument('--synapse.out_planes',
                            default='256, 512, 1024, 2048',
                            action="append",
                            type=to_list)
        parser.add_argument('--synapse.num_blocks',
                            default='3, 6, 20, 3',
                            action="append",
                            type=to_list)
        parser.add_argument('--synapse.dense_depth',
                            default='16, 32, 32, 128',
                            action="append",
                            type=to_list)
        parser.add_argument(
            '--synapse.target_dim',
            default=10,
            type=int,
            help='Final logit layer dimension. i.e. 10 for CIFAR-10.')
        parser = PKMDendrite.add_args(parser)

    @staticmethod
    def check_config(config: Munch):
        assert isinstance(
            config.synapse.in_planes,
            list), 'synapse.in_planes must be a tuple, got {}'.format(
                config.synapse.in_planes)
        assert isinstance(
            config.synapse.out_planes,
            list), 'synapse.out_planes must be a tuple, got {}'.format(
                config.synapse.out_planes)
        assert isinstance(
            config.synapse.num_blocks,
            list), 'synapse.num_blocks must be a tuple, got {}'.format(
                config.synapse.num_blocks)
        assert isinstance(
            config.synapse.dense_depth,
            list), 'synapse.dense_depth must be a tuple, got {}'.format(
                config.synapse.dense_depth)
        assert all(
            isinstance(el, int) for el in config.synapse.in_planes
        ), 'synapse.in_planes must be a tuple of ints, got {}'.format(
            config.synapse.in_planes)
        assert all(
            isinstance(el, int) for el in config.synapse.out_planes
        ), 'synapse.out_planes must be a tuple of ints, got {}'.format(
            config.synapse.out_planes)
        assert all(
            isinstance(el, int) for el in config.synapse.num_blocks
        ), 'synapse.num_blocks must be a tuple of ints, got {}'.format(
            config.synapse.num_blocks)
        assert all(
            isinstance(el, int) for el in config.synapse.dense_depth
        ), 'synapse.dense_depth must be a tuple of ints, got {}'.format(
            config.synapse.dense_depth)

    def forward_image(self, images: torch.Tensor):
        r""" Forward image inputs through the DPN synapse .

            Args:
                inputs (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_dim, channels, rows, cols)`, `required`): 
                    Image tensors produced by calling PIL.toTensor() and with sequence dimension.
            
            Returns:
                hidden (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_dim, bittensor.__network_dim__)`, `required`): 
                    Hidden layer encoding produced by using local_context.
        """
        # images: remove sequence dimension from images.
        # images.shape = [batch_size, channels, rows, cols]
        images = images.view(images.shape[0] * images.shape[1],
                             images.shape[2], images.shape[3], images.shape[4])

        # hidden: hidden layer using local context for local computation only.
        # hidden.shape = [batch_size, __network_dim__]
        hidden = self.forward(images=images.to(self.device),
                              remote=False).local_hidden

        # hidden: re-add sequence dimension to outputs.
        # hidden.shape = [batch_size, sequence_dim, __network_dim__]
        hidden = torch.unsqueeze(hidden, 1)

        return hidden

    def forward(self,
                images: torch.Tensor,
                targets: torch.Tensor = None,
                remote: bool = False):
        r""" Forward pass non-sequential image inputs and targets through the DPN Synapse.

            Args:
                images (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, channels, rows, cols)`, `required`): 
                    PIL.toTensor() encoded images.

                targets (:obj:`torch.FloatTensor`  of shape :obj:`(batch_size, config.target_size)`, `optional`): 
                    Image labels.

                remote (:obj:`bool')`, `optional`):
                    Switch between local and remote context. If true, function makes quries to the remote network.

            Returns:
                bittensor.SynapseOutput  (
                    loss  (:obj:`List[str]` of shape :obj:`(batch_size)`, `required`):
                        Total loss acumulation to be used by loss.backward()

                    local_hidden (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, bittensor.__network_dim__)`, `required`):
                        Hidden layer encoding produced by using local_context.

                    local_target (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, config.target_size)`, `optional`):
                        DPN Target predictions using local_context. 

                    local_target_loss (:obj:`torch.FloatTensor` of shape :obj:`(1)`, `optional`): 
                        DPN Classification loss using local_context.

                    remote_hidden (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, bittensor.__network_dim__)`, `optional`): 
                        Hidden layer encoding produced using the remote_context.

                    remote_target (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, config.target_size)`, `optional`):
                        DPN Target predictions using the remote_context.

                    remote_target_loss (:obj:`torch.FloatTensor` of shape :obj:`(1)`, `optional`):
                        DPN Classification loss using the remote_context.

                    distillation_loss (:obj:`torch.FloatTensor` of shape :obj:`(1)`, `optional`): 
                        Distillation loss between local_context and remote_context.

                    weights (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, metagraph.state.n)`, `optional`): 
                        weights for each active neuron.

                    return_codes (:obj:`List[torch.LongTensor]` of shape :obj:`[metagraph.state.n]`, `required`):
                        dendrite call return codes. 0 for success.

                    metadata (:obj:`dict {'accuracy', torch.FloatTensor} ` of shape :obj:`(1)`, `optional`):
                        additional metadata output, specifically accuracy.
                )
        """
        # Return vars to be filled.
        output = SynapseOutput(loss=torch.tensor(0.0))
        r"""
            Transform the images into a common shape (32x32)
        """
        # transform: transform images to common shape.
        # transform.shape = [batch_size, self.transform_dim]
        transform = self.transform(images)
        transform = self.adaptive_pool(transform)
        transform = F.relu(
            self.transform_bn1(self.transform_conv1(transform.detach())))
        transform = self.transform_layer1(transform)
        transform = self.transform_layer2(transform)
        transform = self.transform_layer3(transform)
        transform = self.transform_layer4(transform)
        transform = F.avg_pool2d(transform, 4)
        transform = torch.flatten(transform, start_dim=1)

        # local_context: distillation model for remote_context.
        # local_context.shape = [batch_size, bittensor.__network_dim__]
        local_context = self.context_layer1(transform.detach())
        local_context = self.context_layer2(local_context)
        local_context = self.context_layer3(local_context)

        # local_hidden: hidden layer encoding using local_context.
        # local_hidden.shape = [batch_size, bittensor.__network_dim__]
        local_hidden = torch.cat([transform, local_context], dim=1)
        local_hidden = self.hidden_layer1(local_hidden)
        local_hidden = self.hidden_layer2(local_hidden)
        local_hidden = self.hidden_layer3(local_hidden)
        output.local_hidden = local_hidden

        if targets is not None:
            # local_target: projection of local_hidden onto target dimension.
            # local_target.shape = [batch_size, target_dim]
            targets.to(self.device)
            local_target = self.target_layer1(local_hidden)
            local_target = self.target_layer2(local_target)
            local_target = F.log_softmax(local_target, dim=1)
            output.local_target = local_target

            # local_target_loss: loss between local_target and passed targets.
            # local_target_loss.shape = [1]
            local_target_loss = F.nll_loss(local_target, targets)
            output.local_target_loss = local_target_loss
            output.loss = output.loss + local_target_loss

            # Record extra metadata accuracy.
            max_logit = local_target.data.max(1, keepdim=True)[1]
            correct = max_logit.eq(targets.data.view_as(max_logit)).sum()
            local_accuracy = (100.0 * correct) / targets.shape[0]
            output.metadata['local_accuracy'] = local_accuracy

        if remote:
            output = self.forward_remote(local_context, output, images,
                                         transform, targets)

        return output

    def forward_remote(self, local_context, output, images, transform,
                       targets):
        """  Forward pass non-sequential image inputs and targets through the remote context of the synapse.

        Args:
            local_context (:obj: `torch.FloatTensor` of shape :obj: `(batch_size, bittensor.__network_dim__)`, `required`)
                    Distillation model for remote_context.

            output (:obj: `Bittensor.SynapseOutput`, `required`)
                    The object containing the output thus far of the local context run

            images (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, channels, rows, cols)`, `required`): 
                    PIL.toTensor() encoded images.

            transform (:obj: `torch.FloatTensor` of shape :obj: `(batch_size, self.transform_dim)`, `required`):
                transform images to common shape.
            
            targets (:obj:`torch.FloatTensor`  of shape :obj:`(batch_size, target_dim)`, `optional`, defaults to None): 
                Image labels.

        Returns:
            bittensor.SynapseOutput ( 
                    loss  (:obj:`List[str]` of shape :obj:`(batch_size)`, `required`):
                        Total loss acumulation to be used by loss.backward()

                    local_hidden (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, bittensor.__network_dim__)`, `required`):
                        Hidden layer encoding produced using local_context.

                    local_target (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, target_dim)`, `optional`):
                        FFNN Target predictions using local_context. 

                    local_target_loss (:obj:`torch.FloatTensor` of shape :obj:`(1)`, `optional`): 
                        FFNN Classification loss using local_context.

                    remote_hidden (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, bittensor.__network_dim__)`, `optional`): 
                        Hidden layer encoding produced using the remote_context.

                    remote_target (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, target_dim)`, `optional`):
                        FFNN Target predictions using the remote_context.

                    remote_target_loss (:obj:`torch.FloatTensor` of shape :obj:`(1)`, `optional`):
                        FFNN Classification loss using the remote_context.

                    distillation_loss (:obj:`torch.FloatTensor` of shape :obj:`(1)`, `optional`): 
                        Distillation loss between local_context and remote_context.

                    weights (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, metagraph.state.n)`, `optional`): 
                        weights for each active neuron.

                    requests_sizes (:obj:`torch.LongTensor` of shape :obj:`(metagraph.state.n)`, `optional`): 
                        number of requests sent to each uid in this batch.

                    return_codes (:obj:`List[torch.LongTensor]` of shape :obj:`[metagraph.state.n]`, `required`):
                        dendrite call return codes. 0 for success.

                    metadata (:obj:`dict {'accuracy', torch.FloatTensor} ` of shape :obj:`(1)`, `optional`):
                        additional metadata output, specifically accuracy.
                )
        """
        # remote_context: responses from a bittensor remote network call.
        # remote_context.shape = [batch_size, bittensor.__network_dim__]
        # make a remote call.
        images = torch.unsqueeze(images, 1)
        remote_context, weights, sizes, return_codes = self.dendrite.forward_image(
            images, transform)
        remote_context = torch.squeeze(remote_context, 1)
        output.weights = weights
        output.request_sizes = sizes
        output.return_codes = return_codes
        remote_context = remote_context.to(self.device)

        # distillation_loss: distillation loss between local_context and remote_context
        # distillation_loss.shape = [1]
        distillation_loss = F.mse_loss(local_context, remote_context.detach())
        output.distillation_loss = distillation_loss
        output.loss = output.loss + distillation_loss

        # remote_hidden: hidden layer encoding using remote_context.
        # remote_hidden.shape = [batch_size, bittensor.__network_dim__]
        remote_hidden = torch.cat([transform, remote_context], dim=1)
        remote_hidden = self.hidden_layer1(remote_hidden)
        remote_hidden = self.hidden_layer2(remote_hidden)
        remote_hidden = self.hidden_layer3(remote_hidden)
        output.remote_hidden = remote_hidden

        if targets is not None:
            # remote_target: projection of remote_hidden onto target dimension.
            # remote_target.shape = [batch_size, config.target_size]
            remote_target = self.target_layer1(remote_hidden)
            remote_target = self.target_layer2(remote_target)
            remote_target = F.log_softmax(remote_target, dim=1)
            output.remote_target = remote_target

            # remote_target_loss: loss between remote_target and passed targets.
            # remote_target_loss.shape = [1]
            remote_target_loss = F.nll_loss(remote_target, targets)
            output.loss = output.loss + remote_target_loss
            output.remote_target_loss = remote_target_loss

        return output

    def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth,
                    stride):
        """ Generates a sequential container containing Bottleneck layers.  

        Args:
            in_planes (tuple): 
                4-element tuple describing the in_planes config.

            out_planes (tuple): 
                4-element tuple describing the out_planes config.

            num_blocks (tuple): 
                4-element tuple describing the number of blocks at this layer.

            dense_depth (tuple): 
                4-element tuple describing the depth of this layer.
           
            stride (int): 
                Convolutional stride length.

        Returns:
            nn.Sequential: A torch.nn sequential container containing the layers outlined in the inputs.
        """
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for i, stride in enumerate(strides):
            layers.append(
                self.Bottleneck(self.last_planes, in_planes, out_planes,
                                dense_depth, stride, i == 0))
            self.last_planes = out_planes + (i + 2) * dense_depth
        return nn.Sequential(*layers)

    class Bottleneck(nn.Module):
        def __init__(self, last_planes, in_planes, out_planes, dense_depth,
                     stride, first_layer):
            super(DPNSynapse.Bottleneck, self).__init__()
            self.out_planes = out_planes
            self.dense_depth = dense_depth

            self.conv1 = nn.Conv2d(last_planes,
                                   in_planes,
                                   kernel_size=1,
                                   bias=False)
            self.bn1 = nn.BatchNorm2d(in_planes)
            self.conv2 = nn.Conv2d(in_planes,
                                   in_planes,
                                   kernel_size=3,
                                   stride=stride,
                                   padding=1,
                                   groups=32,
                                   bias=False)
            self.bn2 = nn.BatchNorm2d(in_planes)
            self.conv3 = nn.Conv2d(in_planes,
                                   out_planes + dense_depth,
                                   kernel_size=1,
                                   bias=False)
            self.bn3 = nn.BatchNorm2d(out_planes + dense_depth)

            self.shortcut = nn.Sequential()
            if first_layer:
                self.shortcut = nn.Sequential(
                    nn.Conv2d(last_planes,
                              out_planes + dense_depth,
                              kernel_size=1,
                              stride=stride,
                              bias=False),
                    nn.BatchNorm2d(out_planes + dense_depth))

        def forward(self, x):
            out = F.relu(self.bn1(self.conv1(x)))
            out = F.relu(self.bn2(self.conv2(out)))
            out = self.bn3(self.conv3(out))
            x = self.shortcut(x)
            d = self.out_planes
            out = torch.cat([
                x[:, :d, :, :] + out[:, :d, :, :], x[:, d:, :, :],
                out[:, d:, :, :]
            ], 1)
            out = F.relu(out)
            return out
예제 #13
0
class BertSynapseBase (Synapse):
    def __init__(self, config: Munch):
        r""" Init a new base-bert synapse.

            Args:
                config (:obj:`munch.Munch`, `required`): 
        """
        super(BertSynapseBase, self).__init__( config = config )

        # Hugging face config item.
        huggingface_config = BertConfig(    vocab_size=bittensor.__vocab_size__, 
                                            hidden_size=bittensor.__network_dim__, 
                                            num_hidden_layers=config.synapse.num_hidden_layers, 
                                            num_attention_heads=config.synapse.num_attention_heads, 
                                            intermediate_size=bittensor.__network_dim__, 
                                            is_decoder=False)

        # dendrite: (PKM layer) queries network using pooled embeddings as context.
        # [batch_size, -1] -> topk * [batch_size, bittensor.__network_dim__]
        self.dendrite = PKMDendrite( config, query_dim = bittensor.__network_dim__ )

        # encoder_layer: encodes tokenized sequences to network dim.
        # [batch_size, sequence_len] -> [batch_size, sequence_len, bittensor.__network_dim__]
        self.transformer = BertModel( huggingface_config, add_pooling_layer=True )

        # hidden_layer: transforms context and encoding to network_dim hidden units.
        # [batch_size, sequence_dim, bittensor.__network_dim__] -> [batch_size, sequence_len, bittensor.__network_dim__]
        self.hidden_layer = torch.nn.Linear( bittensor.__network_dim__, bittensor.__network_dim__ )

        # pooling_layer: transforms teh hidden layer into a pooled representation by taking the encoding of the first token
        # [batch_size, sequence_dim,  bittensor.__network_dim__] -> [batch_size, bittensor.__network_dim__]
        self.pooler = BertPooler( huggingface_config )

        self.to(self.device)

    @staticmethod
    def add_args( parser: argparse.ArgumentParser ):    
        r""" Add custom params to the parser.
        """
        parser.add_argument('--synapse.num_hidden_layers', default=2, type=int, 
                            help='Number of hidden layers in the Transformer encoder.')
        parser.add_argument('--synapse.num_attention_heads', default=2, type=int, 
                            help='Number of attention heads for each attention layer in the Transformer encoder.')
        parser.add_argument('--synapse.n_block_filter', default=100, type=int, help='Stale neurons are filtered after this many blocks.')
        PKMDendrite.add_args(parser)

    @staticmethod
    def check_config( config: Munch ):    
        r""" Add custom checks to the config.
        """
        pass

    def forward_text(self, inputs: torch.LongTensor):
        """ Local forward inputs through the BERT NSP Synapse.

            Args:
                inputs (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_len)`, `required`): 
                    Batch_size length list of tokenized sentences.
            
            Returns:
                hidden (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_len, bittensor.__network_dim__)`, `required`): 
                    Hidden layer representation produced using the local_context.
        """
        hidden = self.base_local_forward( inputs=inputs ).local_hidden
        return hidden

    def base_local_forward(self, inputs: torch.LongTensor, attention_mask: torch.LongTensor = None):
        r""" Forward pass inputs and labels through the NSP BERT module.

            Args:
                inputs (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_len)`, `required`): 
                    Batch_size length list of text sentences.

                attention_mask (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_len)`, `optional`): 
                    Mask to avoid performing attention on padding token indices.
                    Mask values selected in ``[0, 1]``:
                        - 1 for tokens that are **not masked**,
                        - 0 for tokens that are **maked**.    

             SimpleNamespace {
                    local_context (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_len, bittensor.__network_dim__)`, `required`):
                        Hidden layer context.

                    local_hidden (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_len, bittensor.__network_dim__)`, `required`):
                        Hidden layer encoding produced using local_context.

                    local_pooled (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, bittensor.__network_dim__)`, `required`):
                        Local hidden state pooled by returning the encoding of the first token.
                }
        """        
        # Return vars to be filled.
        output = SimpleNamespace()
   
        # local_context: distilled version of remote_context.
        # local_context.shape = [batch_size, sequence_len, bittensor.__network_dim__]
        output.local_context = self.transformer( input_ids = inputs, return_dict = True, attention_mask = attention_mask ).last_hidden_state

        # local_hidden: hidden layer encoding of sequence using local context
        # local_hidden.shape = [batch_size, sequence_len, bittensor.__network_dim__]
        output.local_hidden = self.hidden_layer( output.local_context )
        output.local_pooled = self.pooler( output.local_hidden )

        return output

    def base_remote_forward(self, neuron, inputs: torch.LongTensor, attention_mask: torch.LongTensor = None):
        """Forward pass inputs and labels through the remote BERT networks.

        Args:
            neuron (:obj: `bittensor.Neuron`, `required`):
                    Bittensor neuron, used for making queries to the remote network.

            inputs (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_len)`, `required`): 
                    Batch_size length list of text sentences.                

            attention_mask (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_len)`, `optional`): 
                    Mask to avoid performing attention on padding token indices.
                    Mask values selected in ``[0, 1]``:
                        - 1 for tokens that are **not masked**,
                        - 0 for tokens that are **maked**.        

        Returns:
            SimpleNamespace ( 
                    distillation_loss (:obj:`torch.FloatTensor` of shape :obj:`(1)`, `optional`): 
                        Distillation loss between local_context and remote_context.

                    remote_hidden (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_len, bittensor.__network_dim__)`, `optional`): 
                        Hidden layer encoding produced using the remote_context.

                    dendrite (:obj:`SimpleNamespace`, `required`): 
                        Outputs from the pkm dendrite.
                )
        """
        output = self.base_local_forward( inputs = inputs, attention_mask = attention_mask )

        # remote_context: joined responses from a bittensor.forward_text call.
        # remote_context.shape = [batch_size, sequence_len, bittensor.__network_dim__]
        output.dendrite = self.dendrite.forward_text( neuron = neuron, text = inputs, query = output.local_pooled )

        # distillation_loss: distillation loss between local_context and remote_context
        # distillation_loss.shape = [1]
        output.distillation_loss = F.mse_loss( output.local_context, output.dendrite.response.detach() )

        # remote_hidden: hidden layer encoding using remote_context.
        # remote_hidden.shape = [batch_size, sequence_len, bittensor.__network_dim__]
        output.remote_hidden = self.hidden_layer( output.dendrite.response )
        output.remote_pooled = self.pooler( output.remote_hidden )

        return output