예제 #1
0
class Session():
    def __init__(self, config: Munch):
        if config == None:
            config = Session.default_config()
        self.config = config

        # ---- Neuron ----
        self.neuron = Neuron(self.config)

        # ---- Model ----
        self.model = FFNNSynapse(
            config)  # Feedforward neural network with PKMRouter.
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)  # Set model to device

        # ---- Optimizer ----
        self.optimizer = optim.SGD(self.model.parameters(),
                                   lr=self.config.miner.learning_rate,
                                   momentum=self.config.miner.momentum)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer,
                                                         step_size=10.0,
                                                         gamma=0.1)

        # ---- Dataset ----
        self.train_data = torchvision.datasets.MNIST(
            root=self.config.miner.root_dir + "datasets/",
            train=True,
            download=True,
            transform=transforms.ToTensor())
        self.trainloader = torch.utils.data.DataLoader(
            self.train_data,
            batch_size=self.config.miner.batch_size_train,
            shuffle=True,
            num_workers=2)
        self.test_data = torchvision.datasets.MNIST(
            root=self.config.miner.root_dir + "datasets/",
            train=False,
            download=True,
            transform=transforms.ToTensor())
        self.testloader = torch.utils.data.DataLoader(
            self.test_data,
            batch_size=self.config.miner.batch_size_test,
            shuffle=False,
            num_workers=2)

        # ---- Tensorboard ----
        self.global_step = 0
        self.tensorboard = SummaryWriter(log_dir=self.config.miner.full_path)

    @staticmethod
    def default_config() -> Munch:
        parser = argparse.ArgumentParser()
        Session.add_args(parser)
        config = Config.to_config(parser)
        Session.check_config(config)
        return config

    @staticmethod
    def add_args(parser: argparse.ArgumentParser):
        parser.add_argument('--session.learning_rate',
                            default=0.01,
                            type=float,
                            help='Training initial learning rate.')
        parser.add_argument('--session.momentum',
                            default=0.9,
                            type=float,
                            help='Training initial momentum for SGD.')
        parser.add_argument('--session.batch_size_train',
                            default=64,
                            type=int,
                            help='Training batch size.')
        parser.add_argument('--session.batch_size_test',
                            default=64,
                            type=int,
                            help='Testing batch size.')
        parser.add_argument(
            '--session.log_interval',
            default=150,
            type=int,
            help='Batches until session prints log statements.')
        parser.add_argument(
            '--session.sync_interval',
            default=150,
            type=int,
            help='Batches before we we sync with chain and emit new weights.')
        parser.add_argument(
            '--session.root_dir',
            default='data/',
            type=str,
            help='Root path to load and save data associated with each session'
        )
        parser.add_argument(
            '--session.name',
            default='mnist',
            type=str,
            help='Trials for this session go in session.root / session.name')
        parser.add_argument(
            '--session.uid',
            default=str(time.time()).split('.')[0],
            type=str,
            help=
            'Saved models go in session.root_dir / session.name / session.uid')
        Neuron.add_args(parser)
        FFNNSynapse.add_args(parser)

    @staticmethod
    def check_config(config: Munch):
        assert config.miner.log_interval > 0, "log_interval dimension must be positive"
        assert config.miner.momentum > 0 and config.miner.momentum < 1, "momentum must be a value between 0 and 1"
        assert config.miner.batch_size_train > 0, "batch_size_train must be a positive value"
        assert config.miner.batch_size_test > 0, "batch_size_test must be a positive value"
        assert config.miner.learning_rate > 0, "learning rate must be be a positive value."
        full_path = '{}/{}/{}/'.format(config.miner.root_dir,
                                       config.miner.name, config.miner.uid)
        config.miner.full_path = full_path
        if not os.path.exists(config.miner.full_path):
            os.makedirs(config.miner.full_path)
        FFNNSynapse.check_config(config)
        Neuron.check_config(config)

    # --- Main loop ----
    def run(self):

        # ---- Subscribe neuron ----
        with self.neuron:

            # ---- Loop forever ----
            start_time = time.time()
            self.epoch = -1
            self.best_test_loss = math.inf
            self.global_step = 0
            self.weights = self.neuron.metagraph.row  # Trained weights.
            while True:
                self.epoch += 1

                # ---- Emit ----
                self.neuron.metagraph.set_weights(
                    self.weights, wait_for_inclusion=True
                )  # Sets my row-weights on the chain.

                # ---- Sync ----
                self.neuron.metagraph.sync(
                )  # Pulls the latest metagraph state (with my update.)
                self.weights = self.neuron.metagraph.row.to(self.device)

                # ---- Train ----
                self.train()
                self.scheduler.step()

                # ---- Test ----
                test_loss, test_accuracy = self.test()

                # ---- Test checks ----
                time_elapsed = time.time() - start_time
                assert test_accuracy > 0.8
                assert test_loss < 0.2
                assert len(self.neuron.metagraph.state.neurons) > 0
                assert time_elapsed < 300  # 1 epoch of MNIST should take less than 5 mins.

                # ---- End test ----
                break

    # ---- Train epoch ----
    def train(self):
        # ---- Init training state ----
        self.model.train()  # Turn on dropout etc.
        for batch_idx, (images, targets) in enumerate(self.trainloader):
            self.global_step += 1

            # ---- Remote Forward pass ----
            output = self.model.remote_forward(
                neuron=self.neuron,
                images=images.to(self.device),
                targets=torch.LongTensor(targets).to(self.device),
            )

            # ---- Remote Backward pass ----
            loss = output.remote_target_loss + output.local_target_loss + output.distillation_loss
            loss.backward()  # Accumulates gradients on the model.
            self.optimizer.step()  # Applies accumulated gradients.
            self.optimizer.zero_grad(
            )  # Zeros out gradients for next accummulation

            # ---- Train weights ----
            batch_weights = torch.mean(output.router.weights,
                                       axis=0)  # Average over batch.
            self.weights = (
                1 - 0.03
            ) * self.weights + 0.03 * batch_weights  # Moving avg update.
            self.weights = F.normalize(self.weights, p=1,
                                       dim=0)  # Ensure normalization.

            # ---- Step Logs + Tensorboard ----
            processed = ((batch_idx + 1) * self.config.miner.batch_size_train)
            progress = (100. * processed) / len(self.train_data)
            logger.info(
                'GS: {}\t Epoch: {} [{}/{} ({})]\t Loss: {}\t Acc: {}\t Axon: {}\t Dendrite: {}',
                colored('{}'.format(self.global_step), 'blue'),
                colored('{}'.format(self.epoch), 'blue'),
                colored('{}'.format(processed), 'green'),
                colored('{}'.format(len(self.train_data)), 'red'),
                colored('{:.2f}%'.format(progress), 'green'),
                colored('{:.4f}'.format(output.local_target_loss.item()),
                        'green'),
                colored('{:.4f}'.format(output.local_accuracy.item()),
                        'green'), self.neuron.axon, self.neuron.dendrite)
            self.tensorboard.add_scalar('Rloss',
                                        output.remote_target_loss.item(),
                                        self.global_step)
            self.tensorboard.add_scalar('Lloss',
                                        output.local_target_loss.item(),
                                        self.global_step)
            self.tensorboard.add_scalar('Dloss',
                                        output.distillation_loss.item(),
                                        self.global_step)

    # --- Test epoch ----
    def test(self):
        with torch.no_grad(
        ):  # Turns off gradient computation for inference speed up.
            self.model.eval()  # Turns off Dropoutlayers, BatchNorm etc.
            loss = 0.0
            accuracy = 0.0
            for _, (images, labels) in enumerate(self.testloader):

                # ---- Local Forward pass ----
                outputs = self.model.local_forward(
                    images=images.to(self.device),
                    targets=torch.LongTensor(labels).to(self.device),
                )
                loss += outputs.local_target_loss.item()
                accuracy += outputs.local_accuracy.item()

            return loss / len(self.testloader), accuracy / len(self.testloader)
예제 #2
0
class Miner():
    def __init__(self, config: Munch = None, **kwargs):
        if config == None:
            config = Miner.default_config()
        bittensor.config.Config.update_with_kwargs(config.miner, kwargs)
        Miner.check_config(config)
        self.config = config

        # ---- Neuron ----
        self.neuron = bittensor.neuron.Neuron(self.config)

        # ---- Model ----
        self.model = FFNNSynapse(
            config)  # Feedforward neural network with PKMRouter.
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)  # Set model to device

        # ---- Optimizer ----
        self.optimizer = optim.SGD(self.model.parameters(),
                                   lr=self.config.miner.learning_rate,
                                   momentum=self.config.miner.momentum)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer,
                                                         step_size=10.0,
                                                         gamma=0.1)

        # ---- Model Load/Save tools ----
        self.model_toolbox = ModelToolbox(FFNNSynapse, optim.SGD)

        # ---- Dataset ----
        self.train_data = torchvision.datasets.MNIST(
            root=self.config.miner.root_dir + "datasets/",
            train=True,
            download=True,
            transform=transforms.ToTensor())
        self.trainloader = torch.utils.data.DataLoader(
            self.train_data,
            batch_size=self.config.miner.batch_size_train,
            shuffle=True,
            num_workers=2)
        self.test_data = torchvision.datasets.MNIST(
            root=self.config.miner.root_dir + "datasets/",
            train=False,
            download=True,
            transform=transforms.ToTensor())
        self.testloader = torch.utils.data.DataLoader(
            self.test_data,
            batch_size=self.config.miner.batch_size_test,
            shuffle=False,
            num_workers=2)

        # ---- Tensorboard ----
        self.global_step = 0
        self.tensorboard = SummaryWriter(log_dir=self.config.miner.full_path)
        if self.config.miner.record_log:
            logger.add(
                self.config.miner.full_path + "/{}_{}.log".format(
                    self.config.miner.name, self.config.miner.trial_uid),
                format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}")

    @staticmethod
    def default_config() -> Munch:
        parser = argparse.ArgumentParser()
        Miner.add_args(parser)
        config = bittensor.config.Config.to_config(parser)
        return config

    @staticmethod
    def add_args(parser: argparse.ArgumentParser):
        parser.add_argument('--miner.learning_rate',
                            default=0.01,
                            type=float,
                            help='Training initial learning rate.')
        parser.add_argument('--miner.momentum',
                            default=0.9,
                            type=float,
                            help='Training initial momentum for SGD.')
        parser.add_argument('--miner.n_epochs',
                            default=int(sys.maxsize),
                            type=int,
                            help='Number of training epochs.')
        parser.add_argument(
            '--miner.epoch_length',
            default=int(sys.maxsize),
            type=int,
            help='Iterations of training per epoch (or dataset EOF)')
        parser.add_argument('--miner.batch_size_train',
                            default=64,
                            type=int,
                            help='Training batch size.')
        parser.add_argument('--miner.batch_size_test',
                            default=64,
                            type=int,
                            help='Testing batch size.')
        parser.add_argument('--miner.log_interval',
                            default=150,
                            type=int,
                            help='Batches until miner prints log statements.')
        parser.add_argument(
            '--miner.sync_interval',
            default=10,
            type=int,
            help='Batches before we we sync with chain and emit new weights.')
        parser.add_argument(
            '--miner.root_dir',
            default='~/.bittensor/miners/',
            type=str,
            help='Root path to load and save data associated with each miner')
        parser.add_argument(
            '--miner.name',
            default='mnist',
            type=str,
            help='Trials for this miner go in miner.root / miner.name')
        parser.add_argument(
            '--miner.trial_uid',
            default=str(time.time()).split('.')[0],
            type=str,
            help='Saved models go in miner.root_dir / miner.name / miner.uid')
        parser.add_argument('--miner.record_log',
                            default=False,
                            help='Record all logs when running this miner')
        parser.add_argument(
            '--miner.config_file',
            type=str,
            help=
            'config file to run this neuron, if not using cmd line arguments.')
        bittensor.neuron.Neuron.add_args(parser)
        FFNNSynapse.add_args(parser)

    @staticmethod
    def check_config(config: Munch):
        assert config.miner.log_interval > 0, "log_interval dimension must be positive"
        assert config.miner.momentum > 0 and config.miner.momentum < 1, "momentum must be a value between 0 and 1"
        assert config.miner.batch_size_train > 0, "batch_size_train must be a positive value"
        assert config.miner.batch_size_test > 0, "batch_size_test must be a positive value"
        assert config.miner.learning_rate > 0, "learning rate must be be a positive value."
        full_path = '{}/{}/{}/'.format(config.miner.root_dir,
                                       config.miner.name,
                                       config.miner.trial_uid)
        config.miner.full_path = os.path.expanduser(full_path)
        if not os.path.exists(config.miner.full_path):
            os.makedirs(config.miner.full_path)

    # --- Main loop ----
    def run(self):

        # ---- Subscribe neuron ----
        with self.neuron:

            # ---- Weights ----
            self.row = self.neuron.metagraph.row.to(self.model.device)

            # --- Loop for epochs ---
            self.best_test_loss = math.inf
            self.global_step = 0
            for self.epoch in range(self.config.miner.n_epochs):
                # ---- Serve ----
                self.neuron.axon.serve(self.model)

                # ---- Train ----
                self.train()
                self.scheduler.step()

                # If model has borked for some reason, we need to make sure it doesn't emit weights
                # Instead, reload into previous version of model
                if torch.any(
                        torch.isnan(
                            torch.cat([
                                param.view(-1)
                                for param in self.model.parameters()
                            ]))):
                    self.model, self.optimizer = self.model_toolbox.load_model(
                        self.config)
                    continue

                # ---- Test ----
                test_loss, test_accuracy = self.test()

                # ---- Emit ----
                self.neuron.metagraph.set_weights(
                    self.row, wait_for_inclusion=True
                )  # Sets my row-weights on the chain.

                # ---- Sync ----
                self.neuron.metagraph.sync(
                )  # Pulls the latest metagraph state (with my update.)
                self.row = self.neuron.metagraph.row.to(self.device)

                # --- Display Epoch ----
                print(self.neuron.axon.__full_str__())
                print(self.neuron.dendrite.__full_str__())
                print(self.neuron.metagraph)

                # ---- Update Tensorboard ----
                self.neuron.dendrite.__to_tensorboard__(
                    self.tensorboard, self.global_step)
                self.neuron.metagraph.__to_tensorboard__(
                    self.tensorboard, self.global_step)
                self.neuron.axon.__to_tensorboard__(self.tensorboard,
                                                    self.global_step)

                # ---- Save ----
                if test_loss < self.best_test_loss:
                    self.best_test_loss = test_loss  # Update best loss.
                    self.model_toolbox.save_model(
                        self.config.miner.full_path, {
                            'epoch': self.epoch,
                            'model_state_dict': self.model.state_dict(),
                            'loss': self.best_test_loss,
                            'optimizer_state_dict':
                            self.optimizer.state_dict(),
                        })
                    self.tensorboard.add_scalar('Test loss', test_loss,
                                                self.global_step)

    # ---- Train epoch ----
    def train(self):
        # ---- Init training state ----
        self.model.train()  # Turn on dropout etc.
        for batch_idx, (images, targets) in enumerate(self.trainloader):
            if batch_idx >= self.config.miner.epoch_length:
                break
            self.global_step += 1

            # ---- Remote Forward pass ----
            output = self.model.remote_forward(
                neuron=self.neuron,
                images=images.to(self.device),
                targets=torch.LongTensor(targets).to(self.device),
            )

            # ---- Remote Backward pass ----
            loss = output.remote_target_loss + output.local_target_loss + output.distillation_loss
            loss.backward()  # Accumulates gradients on the model.
            self.optimizer.step()  # Applies accumulated gradients.
            self.optimizer.zero_grad(
            )  # Zeros out gradients for next accummulation

            # ---- Train weights ----
            batch_weights = torch.mean(output.router.weights, axis=0).to(
                self.model.device)  # Average over batch.
            self.row = (
                1 -
                0.03) * self.row + 0.03 * batch_weights  # Moving avg update.
            self.row = F.normalize(self.row, p=1,
                                   dim=0)  # Ensure normalization.

            # ---- Step Logs + Tensorboard ----
            processed = ((batch_idx + 1) * self.config.miner.batch_size_train)
            progress = (100. * processed) / len(self.train_data)
            logger.info(
                'GS: {}\t Epoch: {} [{}/{} ({})]\tLoss: {}\tAcc: {}\tAxon: {}\tDendrite: {}',
                colored('{}'.format(self.global_step), 'blue'),
                colored('{}'.format(self.epoch), 'blue'),
                colored('{}'.format(processed), 'green'),
                colored('{}'.format(len(self.train_data)), 'red'),
                colored('{:.2f}%'.format(progress), 'green'),
                colored('{:.4f}'.format(output.local_target_loss.item()),
                        'green'),
                colored('{:.4f}'.format(output.local_accuracy.item()),
                        'green'), self.neuron.axon, self.neuron.dendrite)
            self.tensorboard.add_scalar('Rloss',
                                        output.remote_target_loss.item(),
                                        self.global_step)
            self.tensorboard.add_scalar('Lloss',
                                        output.local_target_loss.item(),
                                        self.global_step)
            self.tensorboard.add_scalar('Dloss',
                                        output.distillation_loss.item(),
                                        self.global_step)

    # --- Test epoch ----
    def test(self):
        with torch.no_grad(
        ):  # Turns off gradient computation for inference speed up.
            self.model.eval()  # Turns off Dropoutlayers, BatchNorm etc.
            loss = 0.0
            accuracy = 0.0
            for _, (images, labels) in enumerate(self.testloader):

                # ---- Local Forward pass ----
                outputs = self.model.local_forward(
                    images=images.to(self.device),
                    targets=torch.LongTensor(labels).to(self.device),
                )
                loss += outputs.local_target_loss.item()
                accuracy += outputs.local_accuracy.item()

            return loss / len(self.testloader), accuracy / len(self.testloader)
예제 #3
0
class Miner():
    def __init__(self, config: Munch = None, **kwargs):
        if config == None:
            config = Miner.default_config()
        bittensor.config.Config.update_with_kwargs(config.miner, kwargs)
        Miner.check_config(config)
        self.config = config

        # ---- Build Neuron ----
        self.neuron = bittensor.neuron.Neuron(config)

        # ---- Build FFNN Model ----
        self.model = FFNNSynapse(self.config)
        self.model.to(
            torch.device("cuda" if torch.cuda.is_available() else "cpu"))
        self.neuron.axon.serve(self.model)

        # ---- Optimizer ----
        self.optimizer = torch.optim.SGD(self.model.parameters(),
                                         lr=self.config.miner.learning_rate,
                                         momentum=self.config.miner.momentum)

        # ---- Model Load/Save tools ----
        self.model_toolbox = ModelToolbox(FFNNSynapse, torch.optim.SGD)

        # ---- Logging ----
        self.tensorboard = SummaryWriter(log_dir=self.config.miner.full_path)
        if self.config.miner.record_log:
            logger.add(
                self.config.miner.full_path + "/{}_{}.log".format(
                    self.config.miner.name, self.config.miner.trial_uid),
                format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}")

    @staticmethod
    def default_config() -> Munch:
        parser = argparse.ArgumentParser()
        Miner.add_args(parser)
        config = bittensor.config.Config.to_config(parser)
        return config

    @staticmethod
    def add_args(parser: argparse.ArgumentParser):
        parser.add_argument('--miner.learning_rate',
                            default=0.01,
                            type=float,
                            help='Training initial learning rate.')
        parser.add_argument('--miner.momentum',
                            default=0.9,
                            type=float,
                            help='Training initial momentum for SGD.')
        parser.add_argument('--miner.n_epochs',
                            default=int(sys.maxsize),
                            type=int,
                            help='Number of training epochs.')
        parser.add_argument(
            '--miner.sync_interval',
            default=150,
            type=int,
            help='Batches before we we sync with chain and emit new weights.')
        parser.add_argument(
            '--miner.root_dir',
            default='~/.bittensor/miners/',
            type=str,
            help='Root path to load and save data associated with each miner')
        parser.add_argument(
            '--miner.name',
            default='ffnn-grunt',
            type=str,
            help='Trials for this miner go in miner.root / miner.name')
        parser.add_argument(
            '--miner.trial_uid',
            default=str(time.time()).split('.')[0],
            type=str,
            help='Saved models go in miner.root_dir / miner.name / miner.uid')
        parser.add_argument('--miner.record_log',
                            default=False,
                            help='Record all logs when running this miner')
        parser.add_argument(
            '--miner.config_file',
            type=str,
            help=
            'config file to run this neuron, if not using cmd line arguments.')
        bittensor.neuron.Neuron.add_args(parser)
        FFNNSynapse.add_args(parser)

    @staticmethod
    def check_config(config: Munch):
        assert config.miner.momentum > 0 and config.miner.momentum < 1, "momentum must be a value between 0 and 1"
        assert config.miner.learning_rate > 0, "learning rate must be be a positive value."
        full_path = '{}/{}/{}/'.format(config.miner.root_dir,
                                       config.miner.name,
                                       config.miner.trial_uid)
        config.miner.full_path = os.path.expanduser(full_path)
        if not os.path.exists(config.miner.full_path):
            os.makedirs(config.miner.full_path)

    # ---- Main loop ----
    def run(self):

        # --- Subscribe / Update neuron ---
        with self.neuron:

            # ---- Loop for epochs ----
            self.model.train()
            for self.epoch in range(self.config.miner.n_epochs):

                # ---- Poll until gradients ----
                public_key, inputs_x, grads_dy, modality_x = self.neuron.axon.gradients.get(
                    block=True)

                # ---- Backward Gradients ----
                # TODO (const): batch normalization over the gradients for consistency.
                grads_dy = torch.where(torch.isnan(grads_dy),
                                       torch.zeros_like(grads_dy), grads_dy)
                self.model.backward(inputs_x, grads_dy, modality_x)

                # ---- Apply Gradients ----
                self.optimizer.step()  # Apply accumulated gradients.
                self.optimizer.zero_grad()  # Clear any lingering gradients

                # If model has borked for some reason, we need to make sure it doesn't emit weights
                # Instead, reload into previous version of the model
                if torch.any(
                        torch.isnan(
                            torch.cat([
                                param.view(-1)
                                for param in self.model.parameters()
                            ]))):
                    self.model, self.optimizer = self.model_toolbox.load_model(
                        self.config)

                # ---- Serve latest model ----
                self.neuron.axon.serve(self.model)  # Serve the newest model.
                logger.info('Step: {} \t Key: {} \t sum(W[:,0])', self.epoch,
                            public_key,
                            torch.sum(self.neuron.metagraph.col).item())

                # ---- Sync State ----
                if (self.epoch + 1) % self.config.miner.sync_interval == 0:

                    # --- Display Epoch ----
                    print(self.neuron.axon.__full_str__())
                    print(self.neuron.dendrite.__full_str__())
                    print(self.neuron.metagraph)

                    # ---- Sync metagrapn from chain ----
                    self.neuron.metagraph.sync()  # Sync with the chain.

                    # --- Save Model ----
                    self.model_toolbox.save_model(
                        self.config.miner.full_path, {
                            'epoch': self.epoch,
                            'model_state_dict': self.model.state_dict(),
                            'optimizer_state_dict':
                            self.optimizer.state_dict(),
                        })