Beispiel #1
0
    def query(self, inputs):

        # ---- Get active peers and their weights ----
        active_uids = torch.where(self.metagraph().active > 0)[0]
        active_peer_weights = self.peer_weights[active_uids]

        # ---- Topk Weights ---- (TODO: check if the gaussians are enough disrupt the chain weights)
        real_topk = min(self.config.nucleus.topk,
                        self.metagraph().n.item(), len(active_uids))
        noise = torch.normal(
            0,
            torch.std(active_peer_weights).item() + self.noise_offset,
            size=(active_peer_weights.size())).to(self.config.neuron.device)
        topk_weights, topk_idx = bittensor.unbiased_topk(active_peer_weights +
                                                         noise,
                                                         real_topk,
                                                         dim=0)
        topk_uids = active_uids[topk_idx]

        # ---- Query network ----
        responses, return_ops, query_times = self.dendrite.forward_text(
            endpoints=self.metagraph().endpoints[topk_uids], inputs=inputs)

        # ---- Join based on weights ----
        joining_uids = torch.where(
            return_ops == bittensor.proto.ReturnCode.Success)[0]
        joining_weights = F.softmax(
            topk_weights[(return_ops == bittensor.proto.ReturnCode.Success)],
            dim=0)
        output = torch.zeros((inputs.shape[0], inputs.shape[1],
                              bittensor.__network_dim__)).to(self.device)
        for index, joining_weight in enumerate(joining_weights):
            output += responses[joining_uids[index]].to(
                self.device) * joining_weight

        # ---- Punish peers with non-successful return ops ----
        with torch.no_grad():
            self.peer_weights[topk_uids[(return_ops !=
                                         bittensor.proto.ReturnCode.Success
                                         )]] -= self.config.nucleus.punishment
            self.peer_weights[
                self.peer_weights < -1] = -1  # lower bound for chain weights

        return output
Beispiel #2
0
    def remote(self, inputs: torch.int64 ) -> torch.float32:
        """ Forwards the inputs through the network, selects the topk peers based on self.peer_weights.
        Args:
            inputs (:obj:`torch.int64` of shape :obj:`(batch_size, sequence_len)`, `required`):
                Batch_size length list of text sentences.
        Returns:
            outputs (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_len, bittensor.__network_dim__)`, `optional`):
                Joined hidden layer responses from peers.
        """

        # ---- Get active peers and their weights ---- 
        active_uids = torch.where(self.metagraph().active > 0)[0]
        active_peer_weights = self.peer_weights[active_uids]

        # ---- Topk Weights ---- (TODO: check if the gaussians are enough disrupt the chain weights)
        real_topk = min( self.config.nucleus.topk, self.metagraph().n.item(), len(active_uids))
        std = torch.std(active_peer_weights).item() if torch.std(active_peer_weights).item() else self.config.nucleus.noise_offset
        noise = torch.normal( 0, std, size=( active_peer_weights.size())).to( self.config.neuron.device ) * self.noise_multiplier
        topk_weights, topk_idx = bittensor.unbiased_topk(active_peer_weights + noise , real_topk, dim=0)
        topk_uids = active_uids[topk_idx]

        # ---- Filter endpoints ----
        endpoints = self.metagraph().endpoints[ topk_uids ]

        # ---- Query network ----
        responses, return_ops, query_times = self.dendrite.forward_text (
            endpoints = endpoints.to('cpu'),
            inputs = inputs
        )

        # ---- Join based on weights ----
        joining_uids= torch.where( return_ops == bittensor.proto.ReturnCode.Success )[0]
        joining_weights = F.softmax( topk_weights[(return_ops == bittensor.proto.ReturnCode.Success)], dim = 0 ) 
        output = torch.zeros( (inputs.shape[0], inputs.shape[1], bittensor.__network_dim__)).to( self.config.neuron.device )
        for index, joining_weight in enumerate( joining_weights ):
            output += responses[joining_uids[index]].to( self.config.neuron.device ) * joining_weight

        # ---- Punish peers with non-successful return ops ----
        with torch.no_grad():
            self.peer_weights[topk_uids[(return_ops != bittensor.proto.ReturnCode.Success)]] -=  self.config.nucleus.punishment
            self.peer_weights[self.peer_weights < -1] = -1 #lower bound for chain weights
        
        return output, topk_uids[joining_uids]
Beispiel #3
0
    def set_peer_weights( self ):
        r""" Sets the fisher ema score to peers.
        """

        try:
            k = min( self.config.neuron.n_topk_peer_weights, self.metagraph.n.item() )
            inactive_uids = torch.where(self.metagraph.active == 0)[0]
            self.stats.ema_scores[inactive_uids] = 0
            topk_scores, topk_uids = bittensor.unbiased_topk( self.stats.ema_scores , k = k )
            topk_uids = topk_uids.detach().to('cpu')
            topk_scores = topk_scores.detach().to('cpu')
            self.subtensor.set_weights(
                uids = topk_uids,
                weights = topk_scores,
                wait_for_inclusion = False,
                wallet = self.wallet,
            )

        except Exception as e:
            logger.error('Failure setting weights on chain with error: {}', e)
Beispiel #4
0
def run( config , validator, subtensor, wallet, metagraph, dataset, device, uid, dendrite):
    
    print(config)
    config.to_defaults()
    validator = validator.to(device)
    optimizer = torch.optim.SGD(
        validator.parameters(),
        lr = config.neuron.learning_rate,
        momentum = config.neuron.momentum,
    )
    if config.wandb.api_key != 'default':
        # Create wandb for telemetry.
        bittensor.wandb(
            config = config,
            cold_pubkey = wallet.coldkeypub.ss58_address,
            hot_pubkey = wallet.hotkey.ss58_address,
            root_dir = config.neuron.full_path
        )

    # Optionally resume.
    if config.neuron.no_restart != True:
        try:
            validator.load_state_dict( torch.load("{}/validator.torch".format( config.neuron.full_path ))['validator'], strict=False )
        except Exception as e:
            logger.error('Error reloading model: {} '.format(e))

    # --- last sync block 
    last_sync_block = subtensor.get_current_block()

    # --- Run Forever.
    epoch = 0
    global_step = 0
    best_loss = math.inf
    ema_score_decay = 0.995
    ema_scores = torch.nn.Parameter(torch.zeros_like(validator.peer_weights, device = device) * (1 / metagraph.n.item()), requires_grad = False)

    while True:

        # --- Run epoch.
        start_block = subtensor.get_current_block() + 1
        end_block = start_block + config.neuron.blocks_per_epoch
        blocks = [ block for block in range(start_block, end_block) ]
        progress = qqdm( blocks, total=len(blocks), desc=format_str('white', f'Epoch'))
        progress.set_bar = partial(progress.set_bar,  element='#')

        # --- Reset the epoch logs
        total_epoch_score = torch.zeros(metagraph.n.item(), device = device)
        total_epoch_loss = 0
        batch_count = 0
        
        for block in progress:
            
            # --- Training step.
            current_block = subtensor.get_current_block()
            while block >= current_block:
                loss, _, query_uids = validator( next( dataset ) )
                val_score = validator.scores()
                scores = torch.nn.functional.normalize ( torch.relu( val_score ), p=1, dim = 0 )
                scores[query_uids] += 1e-6
                loss.backward()
                clip_grad_norm_(validator.parameters(), config.neuron.clip_gradients)
                optimizer.step()
                optimizer.zero_grad() 
                global_step += 1
                batch_count += 1
                total_epoch_score += scores.detach()
                total_epoch_loss += loss.item()
                ema_scores = (ema_score_decay * ema_scores) + (1 - ema_score_decay) * scores.detach()
                current_block = subtensor.get_current_block()

            # --- Step logs.
            info = {
                'Step': colored('{}'.format(global_step), 'red'),
                'Epoch': colored('{}'.format(epoch), 'yellow'),
                'Best-loss': colored('{:.4f}'.format(best_loss), 'green'),            
                'Loss': colored('{:.4f}'.format(loss.item()), 'blue'),            
                'nPeers': colored(metagraph.n.item(), 'red'),
                'Stake(\u03C4)': colored('{:.3f}'.format(metagraph.S[uid].item()), 'yellow'),
                'Rank(\u03C4)': colored('{:.3f}'.format(metagraph.R[uid].item()), 'green'),
                'Incentive(\u03C4/block)': colored('{:.6f}'.format(metagraph.I[uid].item()), 'blue'),
                'Dividends': colored('{:.4f}'.format(metagraph.D[ uid ].item()), 'red'),
                'Current Block': colored('{}'.format(block), 'yellow')
            }
            
            topk_scores, topk_idx = bittensor.unbiased_topk(ema_scores, 5, dim=0)
            for idx, ema_score in zip(topk_idx, topk_scores) :
                color =  'green' if scores[idx] - ema_score > 0 else 'red'
                info[f'uid_{idx.item()}'] = colored('{:.4f}'.format(ema_score), color) 
            
            
            progress.set_infos( info )
        
        # --- End of epoch
        # --- Set mechanism weights.
        inactive_uids = torch.where(metagraph.active == 0)[0]
        ema_scores[inactive_uids] = 0
        topk_scores, topk_uids = bittensor.unbiased_topk( ema_scores.detach().to('cpu'), k = min(config.neuron.n_topk_peer_weights, metagraph.n.item()))
        subtensor.set_weights(
            uids = topk_uids,
            weights = topk_scores,
            wait_for_inclusion = False,
            wallet = wallet,
        )

        # --- Log.
        epoch_loss = total_epoch_loss / batch_count
        active_uids = torch.where(metagraph.active > 0)[0]

        nn = subtensor.neuron_for_pubkey(wallet.hotkey.ss58_address)
                
        if config.wandb.api_key != 'default':
            wandb_data = {
                'stake': nn.stake,
                'dividends': nn.dividends,
                'epoch_loss': epoch_loss,
                'STD in scores': torch.std(ema_scores[active_uids]).item(),
            } 
            df = pandas.concat( [
                bittensor.utils.indexed_values_to_dataframe( prefix = 'fisher_ema_score', index = topk_uids, values = ema_scores ),
                dendrite.to_dataframe( metagraph = metagraph )
            ], axis = 1)
            df['uid'] = df.index
            wandb_dendrite = dendrite.to_wandb()
            wandb.log( {**wandb_data, **wandb_dendrite}, step = current_block )
            wandb.log( { 'stats': wandb.Table( dataframe = df ) }, step = current_block )

        # --- Save.
        if best_loss > epoch_loss : 
            best_loss = epoch_loss
            torch.save( { 'validator': validator.state_dict() }, "{}/validator.torch".format( config.neuron.full_path ))

        if current_block - last_sync_block > config.neuron.metagraph_sync:
            metagraph.sync()
            last_sync_block = current_block
            validator.sync_with_chain_state()
            chain_growth = max(0, metagraph.n.item() - torch.numel( ema_scores ))
            ema_scores = torch.nn.Parameter(torch.cat([ema_scores, torch.zeros([chain_growth], dtype=torch.float32, requires_grad=False, device = device)]))

        epoch += 1
Beispiel #5
0
    def logs( self, progress_bar, iteration:int, output: SimpleNamespace ):
        r""" Called after every training step. Displays miner state to screen.
        """
        self_neuron = self.subtensor.neuron_for_pubkey( self.wallet.hotkey.ss58_address )
        self_uid = self_neuron.uid
        stake = self_neuron.stake
        rank = self_neuron.rank
        incentive = self_neuron.incentive
        normalized_peer_weights = F.softmax (self.nucleus.peer_weights.detach(), dim=0)
        current_block = self.subtensor.get_current_block()

        # ---- Progress bar log
        info = {
            'Step': colored('{}'.format(self.stats.global_step), 'red'),
            'Epoch': colored('{}'.format(self.epoch+1), 'yellow'),
            'Best-loss': colored('{:.4f}'.format(self.stats.best_epoch_loss), 'green'),          
            'L-loss': colored('{:.4f}'.format(output.local_target_loss.item()), 'blue'),
            'R-loss': colored('{:.4f}'.format(output.remote_target_loss.item()), 'red'),
            'D-loss': colored('{:.4f}'.format(output.distillation_loss.item()), 'yellow'),
            'L-acc': colored('{:.4f}'.format(output.local_accuracy), 'green'),
            'nPeers': colored(self.metagraph.n.item(), 'blue'),
            'Stake(\u03C4)': colored('{:.3f}'.format(stake), 'red'),
            'Rank(\u03C4)': colored('{:.3f}'.format(rank), 'yellow'),
            'Incentive(\u03C4/block)': colored('{:.6f}'.format(incentive), 'green'),
            'Current Block': colored('{}'.format(current_block), 'blue'),
            'Synced Block': colored('{}'.format(self.stats.last_sync_block), 'yellow'),
        }
        # ---- Miner summary per peer for progress bar
        k = min( self.config.neuron.n_topk_peer_weights, self.metagraph.n.item() )
        topk_scores, topk_uids = bittensor.unbiased_topk( self.stats.ema_scores, k, dim=0 )
        for uid, ema_score in zip( topk_uids, topk_scores ) :
            color =  'green' if self.stats.scores[uid] - ema_score > 0 else 'red'
            info[f'uid_{uid.item()}'] = colored('{:.4f}'.format(ema_score), color)

        progress_bar.set_infos( info )

        # ---- wandb log if it is the end of epoch 
        if self.config.neuron.use_wandb and ((iteration + 1) % (self.config.neuron.epoch_length ) == 0):
            # ---- Miner summary for wandb
            wandb_info = {
                'neuron/stake':stake,
                'neuron/rank':rank,
                'neuron/incentive':incentive,
                'neuron/num_peers':self.metagraph.n.item(),
                'nucleus/remote_target_epoch_loss': self.stats.remote_target_epoch_loss,
                'nucleus/distillation_epoch_loss': self.stats.distillation_epoch_loss,
                'nucleus/local_target_epoch_loss': self.stats.local_target_epoch_loss,
                'nucleus/local_epoch_acc': self.stats.local_epoch_acc,
                'neuron/num_sync_metagraph': self.stats.epoch_sync_count,
                'neuron/data_size': self.stats.epoch_data_size,
            }

            # Build stats dataframe.
            df = pandas.concat( [
                bittensor.utils.indexed_values_to_dataframe( prefix = 'fisher_ema_score', index = topk_uids, values = self.stats.ema_scores, filter_zeros = True),
                bittensor.utils.indexed_values_to_dataframe( prefix = 'raw_peer_weight', index = topk_uids, values = self.nucleus.peer_weights, filter_zeros = True),
                bittensor.utils.indexed_values_to_dataframe( prefix = 'normalized_peer_weight', index = topk_uids, values = normalized_peer_weights, filter_zeros = True),
                bittensor.utils.indexed_values_to_dataframe( prefix = 'w_{}_i'.format(self_uid), index = topk_uids, values = self.metagraph.W[ self_uid, : ], filter_zeros = True),
                bittensor.utils.indexed_values_to_dataframe( prefix = 'w_i_{}'.format(self_uid), index = topk_uids, values = self.metagraph.W[ :, self_uid ], filter_zeros = True),
                self.axon.to_dataframe( metagraph = self.metagraph ),
                self.dendrite.to_dataframe( metagraph = self.metagraph )
            ], axis = 1)
            df['uid'] = df.index
            stats_data_table = wandb.Table( dataframe = df)

            wandb_info_axon = self.axon.to_wandb()
            wandb_info_dend = self.dendrite.to_wandb()
            wandb.log( { **wandb_info, **wandb_info_axon, **wandb_info_dend }, step = current_block)
            wandb.log( { 'stats': stats_data_table}, step = current_block)
            wandb.log( { 'axon_query_times': wandb.plot.scatter( stats_data_table, "uid", "axon_query_time", title="Axon Query time vs UID") } )
            wandb.log( { 'dendrite_query_times': wandb.plot.scatter( stats_data_table, "uid", "dendrite_query_time", title="Dendrite Query time vs UID") } )