Esempio n. 1
0
 def _load_model_and_optimizer(fea_dict,model,config,arch_dict,use_cuda,multi_gpu,to_do):
     inp_out_dict = fea_dict
     nns, costs = model_init(inp_out_dict,model,config,arch_dict,use_cuda,multi_gpu,to_do)
     optimizers = optimizer_init(nns,config,arch_dict)
     for net in nns.keys():
         pt_file_arch=config[arch_dict[net][0]]['arch_pretrain_file']
         if pt_file_arch!='none':        
             if use_cuda:
                 checkpoint_load = torch.load(pt_file_arch)
             else:
                 checkpoint_load = torch.load(pt_file_arch, map_location='cpu')
             nns[net].load_state_dict(checkpoint_load['model_par'])
             if net in optimizers:
                 optimizers[net].load_state_dict(checkpoint_load['optimizer_par'])
                 optimizers[net].param_groups[0]['lr']=float(config[arch_dict[net][0]]['arch_lr']) # loading lr of the cfg file for pt
         if multi_gpu:
             nns[net] = torch.nn.DataParallel(nns[net])
     return nns, costs, optimizers, inp_out_dict
Esempio n. 2
0
def grid_search(train_loader, test_loader, comm_matrix, num_rounds, epochs, num_clients,
                net='net', optimizer='sgd', lrs=np.logspace(-12, -1, 13, base=2.0)):
    """
    Runs a decentralized optimization algorithme for the given learning rates for
    a number of rounds, over some network. Outputs the accuracies and returns them.

    Params:
        train_loader (array): the list of all train datasets, one per client
        test_loader (array): the list of test datasets, one per client
        comm_matrix (numpy.array): the communication matric modeling the network
        num_rounds (int): the number of data exchanges between nodes
        epochs (int): the number of optimization steps between each communication (minimum 1)
        num_clients (int): the number of clients in the network
        net (string): the neural network framework we use
        optimizer (string): the chosen optimizer, SGD by default
        lrs (array): the list of stepsizes to test

    Returns:
        accs (array): the corresponding accuracies, with the same shape as lrs
    """
    accs = []
    for lr in lrs:
        global_model, client_models = model_init(num_clients, net)
        opt = optimizer_init(client_models, lr, optimizer)

        loss, test_loss, acc = 0.0, 0.0, 0.0
        for r in range(num_rounds):
            for i in range(num_clients):
                loss += client_update(client_models[i], opt[i], train_loader[i], epoch=epochs)
            diffuse_params(client_models, comm_matrix)
            average_models(global_model, client_models)
            test_loss, acc = evaluate(global_model, test_loader)

        print('lr %g | average train loss %0.3g | test loss %0.3g | test acc: %0.3f' % (lr, loss / num_clients, test_loss, acc))
        accs.append(acc)
    return accs
Esempio n. 3
0
def run_nn(data_name,data_set,data_end_index,fea_dict,lab_dict,arch_dict,cfg_file,processed_first,next_config_file):
    
    # This function processes the current chunk using the information in cfg_file. In parallel, the next chunk is load into the CPU memory
    
    # Reading chunk-specific cfg file (first argument-mandatory file) 
    if not(os.path.exists(cfg_file)):
         sys.stderr.write('ERROR: The config file %s does not exist!\n'%(cfg_file))
         sys.exit(0)
    else:
        config = configparser.ConfigParser()
        config.read(cfg_file)
    
    # Setting torch seed
    seed=int(config['exp']['seed'])
    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    
    
    # Reading config parameters
    output_folder=config['exp']['out_folder']
    use_cuda=strtobool(config['exp']['use_cuda'])
    multi_gpu=strtobool(config['exp']['multi_gpu'])
    
    to_do=config['exp']['to_do']
    info_file=config['exp']['out_info']
    
    model=config['model']['model'].split('\n')
    
    forward_outs=config['forward']['forward_out'].split(',')
    forward_normalize_post=list(map(strtobool,config['forward']['normalize_posteriors'].split(',')))
    forward_count_files=config['forward']['normalize_with_counts_from'].split(',')
    require_decodings=list(map(strtobool,config['forward']['require_decoding'].split(',')))
    
    use_cuda=strtobool(config['exp']['use_cuda'])
    save_gpumem=strtobool(config['exp']['save_gpumem'])
    is_production=strtobool(config['exp']['production'])

    if to_do=='train':
        batch_size=int(config['batches']['batch_size_train'])
    
    if to_do=='valid':
        batch_size=int(config['batches']['batch_size_valid'])
    
    if to_do=='forward':
        batch_size=1
        
    
    # ***** Reading the Data********
    if processed_first:
        
        # Reading all the features and labels for this chunk
        shared_list=[]
        
        p=threading.Thread(target=read_lab_fea, args=(cfg_file,is_production,shared_list,output_folder,))
        p.start()
        p.join()
        
        data_name=shared_list[0]
        data_end_index=shared_list[1]
        fea_dict=shared_list[2]
        lab_dict=shared_list[3]
        arch_dict=shared_list[4]
        data_set=shared_list[5]


        
        # converting numpy tensors into pytorch tensors and put them on GPUs if specified
        if not(save_gpumem) and use_cuda:
           data_set=torch.from_numpy(data_set).float().cuda()
        else:
           data_set=torch.from_numpy(data_set).float()
                           
    # Reading all the features and labels for the next chunk
    shared_list=[]
    p=threading.Thread(target=read_lab_fea, args=(next_config_file,is_production,shared_list,output_folder,))
    p.start()
    
    # Reading model and initialize networks
    inp_out_dict=fea_dict
    
    [nns,costs]=model_init(inp_out_dict,model,config,arch_dict,use_cuda,multi_gpu,to_do)
       
    # optimizers initialization
    optimizers=optimizer_init(nns,config,arch_dict)
           
    
    # pre-training and multi-gpu init
    for net in nns.keys():
      pt_file_arch=config[arch_dict[net][0]]['arch_pretrain_file']
            
      if pt_file_arch!='none':        
          checkpoint_load = torch.load(pt_file_arch)
          nns[net].load_state_dict(checkpoint_load['model_par'])
          optimizers[net].load_state_dict(checkpoint_load['optimizer_par'])
          optimizers[net].param_groups[0]['lr']=float(config[arch_dict[net][0]]['arch_lr']) # loading lr of the cfg file for pt
       
      if multi_gpu:
        nns[net] = torch.nn.DataParallel(nns[net])
          
    
    
    
    if to_do=='forward':
        
        post_file={}
        for out_id in range(len(forward_outs)):
            if require_decodings[out_id]:
                out_file=info_file.replace('.info','_'+forward_outs[out_id]+'_to_decode.ark')
            else:
                out_file=info_file.replace('.info','_'+forward_outs[out_id]+'.ark')
            post_file[forward_outs[out_id]]=open_or_fd(out_file,output_folder,'wb')


    # check automatically if the model is sequential
    seq_model=is_sequential_dict(config,arch_dict)
    
    # ***** Minibatch Processing loop********
    if seq_model or to_do=='forward':
        N_snt=len(data_name)
        N_batches=int(N_snt/batch_size)
    else:
        N_ex_tr=data_set.shape[0]
        N_batches=int(N_ex_tr/batch_size)
        
    
    beg_batch=0
    end_batch=batch_size 
    
    snt_index=0
    beg_snt=0 
    

    start_time = time.time()
    
    # array of sentence lengths
    arr_snt_len=shift(shift(data_end_index, -1,0)-data_end_index,1,0)
    arr_snt_len[0]=data_end_index[0]
    
    
    loss_sum=0
    err_sum=0
    
    inp_dim=data_set.shape[1]
    for i in range(N_batches):   
        
        max_len=0
    
        if seq_model:
         
         max_len=int(max(arr_snt_len[snt_index:snt_index+batch_size]))  
         inp= torch.zeros(max_len,batch_size,inp_dim).contiguous()
    
            
         for k in range(batch_size):
              
                  snt_len=data_end_index[snt_index]-beg_snt
                  N_zeros=max_len-snt_len
                  
                  # Appending a random number of initial zeros, tge others are at the end. 
                  N_zeros_left=random.randint(0,N_zeros)
                 
                  # randomizing could have a regularization effect
                  inp[N_zeros_left:N_zeros_left+snt_len,k,:]=data_set[beg_snt:beg_snt+snt_len,:]
                  
                  beg_snt=data_end_index[snt_index]
                  snt_index=snt_index+1
                
        else:
            # features and labels for batch i
            if to_do!='forward':
                inp= data_set[beg_batch:end_batch,:].contiguous()
            else:
                snt_len=data_end_index[snt_index]-beg_snt
                inp= data_set[beg_snt:beg_snt+snt_len,:].contiguous()
                beg_snt=data_end_index[snt_index]
                snt_index=snt_index+1
    
        # use cuda
        if use_cuda:
            inp=inp.cuda()
    
        if to_do=='train':
            # Forward input, with autograd graph active
            outs_dict=forward_model(fea_dict,lab_dict,arch_dict,model,nns,costs,inp,inp_out_dict,max_len,batch_size,to_do,forward_outs)
            
            for opt in optimizers.keys():
                optimizers[opt].zero_grad()
                
    
            outs_dict['loss_final'].backward()
            
            # Gradient Clipping (th 0.1)
            #for net in nns.keys():
            #    torch.nn.utils.clip_grad_norm_(nns[net].parameters(), 0.1)
            
            
            for opt in optimizers.keys():
                if not(strtobool(config[arch_dict[opt][0]]['arch_freeze'])):
                    optimizers[opt].step()
        else:
            with torch.no_grad(): # Forward input without autograd graph (save memory)
                outs_dict=forward_model(fea_dict,lab_dict,arch_dict,model,nns,costs,inp,inp_out_dict,max_len,batch_size,to_do,forward_outs)
    
                    
        if to_do=='forward':
            for out_id in range(len(forward_outs)):
                
                out_save=outs_dict[forward_outs[out_id]].data.cpu().numpy()
                
                if forward_normalize_post[out_id]:
                    # read the config file
                    counts = load_counts(forward_count_files[out_id])
                    out_save=out_save-np.log(counts/np.sum(counts))             
                    
                # save the output    
                write_mat(output_folder,post_file[forward_outs[out_id]], out_save, data_name[i])
        else:
            loss_sum=loss_sum+outs_dict['loss_final'].detach()
            err_sum=err_sum+outs_dict['err_final'].detach()
           
        # update it to the next batch 
        beg_batch=end_batch
        end_batch=beg_batch+batch_size
        
        # Progress bar
        if to_do == 'train':
          status_string="Training | (Batch "+str(i+1)+"/"+str(N_batches)+")"+" | L:" +str(round(loss_sum.cpu().item()/(i+1),3))
          if i==N_batches-1:
             status_string="Training | (Batch "+str(i+1)+"/"+str(N_batches)+")"

             
        if to_do == 'valid':
          status_string="Validating | (Batch "+str(i+1)+"/"+str(N_batches)+")"
        if to_do == 'forward':
          status_string="Forwarding | (Batch "+str(i+1)+"/"+str(N_batches)+")"
          
        progress(i, N_batches, status=status_string)
    
    elapsed_time_chunk=time.time() - start_time 
    
    loss_tot=loss_sum/N_batches
    err_tot=err_sum/N_batches
    
    # clearing memory
    del inp, outs_dict, data_set
    
    # save the model
    if to_do=='train':
     
    
         for net in nns.keys():
             checkpoint={}
             if multi_gpu:
                checkpoint['model_par']=nns[net].module.state_dict()
             else:
                checkpoint['model_par']=nns[net].state_dict()
             
             checkpoint['optimizer_par']=optimizers[net].state_dict()
             
             out_file=info_file.replace('.info','_'+arch_dict[net][0]+'.pkl')
             torch.save(checkpoint, out_file)
         
    if to_do=='forward':
        for out_name in forward_outs:
            post_file[out_name].close()
         
    
         
    # Write info file
    with open(info_file, "w") as text_file:
        text_file.write("[results]\n")
        if to_do!='forward':
            text_file.write("loss=%s\n" % loss_tot.cpu().numpy())
            text_file.write("err=%s\n" % err_tot.cpu().numpy())
        text_file.write("elapsed_time_chunk=%f\n" % elapsed_time_chunk)
    
    text_file.close()
    
    
    # Getting the data for the next chunk (read in parallel)    
    p.join()
    data_name=shared_list[0]
    data_end_index=shared_list[1]
    fea_dict=shared_list[2]
    lab_dict=shared_list[3]
    arch_dict=shared_list[4]
    data_set=shared_list[5]
    
    
    # converting numpy tensors into pytorch tensors and put them on GPUs if specified
    if not(save_gpumem) and use_cuda:
       data_set=torch.from_numpy(data_set).float().cuda()
    else:
       data_set=torch.from_numpy(data_set).float()
       
       
    return [data_name,data_set,data_end_index,fea_dict,lab_dict,arch_dict]
Esempio n. 4
0
def run_probas(train_loader,
               test_loader,
               comm_matrix,
               num_rounds,
               epochs,
               num_clients,
               failure_rounds,
               corr='global',
               net='net',
               optimizer='sgd',
               lr=0.1):
    """
    Runs a decentralized optimization algorithm for the given learning rate for
    a number of rounds, over some network. Links may fail for some rounds according
    to a pre-defined probabilistic model. Outputs the accuracies and returns them.

    Params:
        train_loader (array): the list of all train datasets, one per client
        test_loader (array): the list of test datasets, one per client
        comm_matrix (numpy.array): the communication matric modeling the network
        num_rounds (int): the number of data exchanges between nodes
        epochs (int): the number of optimization steps between each communication (minimum 1)
        num_clients (int): the number of clients in the network
        failure_rounds (array): list representing the number of failing links at each round
        corr (string): the correction policy, global by default
        net (string): the neural network framework we use
        optimizer (string): the chosen optimizer, SGD by default
        lr (double): the learning rate for the optimizaion algorithm

    Returns:
        global_model (nn.Module): the final global neural network averaging all the clients
        client_models (array of Net): the list of all the final client neural networks
        accs (array): the corresponding accuracies, with the same shape as lrs
    """
    assert corr in ['global', 'local', 'none']

    accs = []
    global_model, client_models = model_init(num_clients, net)
    opt = optimizer_init(client_models, lr, optimizer)

    loss, test_loss, acc = 0.0, 0.0, 0.0
    for r in range(num_rounds):
        num_failures = np.count_nonzero(failure_rounds == r)
        if corr == 'global':
            actual_comm_matrix = network_failures_global(
                comm_matrix, num_failures)
        elif corr == 'local':
            actual_comm_matrix = network_failures_local(
                comm_matrix, num_failures)
        else:  # corr == 'none'
            actual_comm_matrix = network_failures_no_correction(
                comm_matrix, num_failures)

        for i in range(num_clients):
            loss += client_update(client_models[i],
                                  opt[i],
                                  train_loader[i],
                                  epoch=epochs)
        diffuse_params(client_models, actual_comm_matrix)
        average_models(global_model, client_models)
        test_loss, acc = evaluate(global_model, test_loader)

        print('%d-th round' % r)
        print('average train loss %0.3g | test loss %0.3g | test acc: %0.3f' %
              (loss / num_clients, test_loss, acc))
        accs.append(acc)
    return global_model, client_models, accs
Esempio n. 5
0
def run_latency_changing_topo(train_loader,
                              test_loader,
                              num_rounds,
                              epochs,
                              num_clients,
                              latency_nodes,
                              net='net',
                              optimizer='sgd',
                              lr=0.1):
    """
    Runs a decentralized optimization algorithm for the given learning rate for a
    number of rounds, over some network. Some nodes send their weights with a one-rounds
    latency, for the entire execution. The network topology evolves over time. Outputs
    the accuracies and returns them.

    Params:
        train_loader (array): the list of all train datasets, one per client
        test_loader (array): the list of test datasets, one per client
        comm_matrix (numpy.array): the communication matric modeling the network
        num_rounds (int): the number of data exchanges between nodes
        epochs (int): the number of optimization steps between each communication (minimum 1)
        num_clients (int): the number of clients in the network
        latency_nodes (array): the list of delayed nodes
        net (string): the neural network framework we use
        optimizer (string): the chosen optimizer, SGD by default
        lr (double): the learning rate for the optimizaion algorithm

    Returns:
        global_model (nn.Module): the final global neural network averaging all the clients
        client_models (array of Net): the list of all the final client neural networks
        accs (array): the corresponding accuracies, with the same shape as lrs
    """
    accs = []
    global_model, client_models = model_init(num_clients, net)
    opt = optimizer_init(client_models, lr, optimizer)
    topos = ['centralized', 'ring', 'grid']
    topo = np.random.choice(topos)
    comm_matrix = create_mixing_matrix(topo, num_clients)

    loss, test_loss, acc = 0.0, 0.0, 0.0
    for r in range(num_rounds):
        old_client_models = client_models
        old_topo = topo
        old_comm_matrix = comm_matrix
        topo = np.random.choice(topos)

        # client update
        for i in range(num_clients):
            loss += client_update(client_models[i],
                                  opt[i],
                                  train_loader[i],
                                  epoch=epochs)

        # diffuse params
        diffuse_params_latency(client_models, comm_matrix, latency_nodes)
        if (r > 0):
            diffuse_params_latency(
                old_client_models, old_comm_matrix,
                np.setdiff1d(np.array(range(num_clients)), latency_nodes))
        print("old topo: {}, new topo: {}".format(old_topo, topo))

        average_models(global_model, client_models)
        test_loss, acc = evaluate(global_model, test_loader)

        print('%d-th round' % r)
        print('average train loss %0.3g | test loss %0.3g | test acc: %0.3f' %
              (loss / num_clients, test_loss, acc))
        accs.append(acc)
    return global_model, client_models, accs
Esempio n. 6
0
def run_latency_per_round(train_loader,
                          test_loader,
                          comm_matrix,
                          num_rounds,
                          epochs,
                          num_clients,
                          latency_nodes,
                          latency_rounds,
                          net='net',
                          optimizer='sgd',
                          lr=0.1):
    """
    Runs a decentralized optimization algorithm for the given learning rate for a
    number of rounds, over some network. Some nodes send their weights with a one-rounds
    latency, only during specific rounds. Outputs the accuracies and returns them.

    Params:
        train_loader (array): the list of all train datasets, one per client
        test_loader (array): the list of test datasets, one per client
        comm_matrix (numpy.array): the communication matric modeling the network
        num_rounds (int): the number of data exchanges between nodes
        epochs (int): the number of optimization steps between each communication (minimum 1)
        num_clients (int): the number of clients in the network
        latency_nodes (array): the list of delayed nodes
        latency_rounds (array): the rounds at which latency will occur across the network
        net (string): the neural network framework we use
        optimizer (string): the chosen optimizer, SGD by default
        lr (double): the learning rate for the optimizaion algorithm

    Returns:
        global_model (nn.Module): the final global neural network averaging all the clients
        client_models (array of Net): the list of all the final client neural networks
        accs (array): the corresponding accuracies, with the same shape as lrs
    """
    accs = []
    global_model, client_models = model_init(num_clients, net)
    opt = optimizer_init(client_models, lr, optimizer)

    loss, test_loss, acc = 0.0, 0.0, 0.0
    for r in range(num_rounds):
        old_client_models = client_models

        # client update
        for i in range(num_clients):
            loss += client_update(client_models[i],
                                  opt[i],
                                  train_loader[i],
                                  epoch=epochs)

        # diffuse params
        if (r in latency_rounds):
            diffuse_params_latency(client_models, comm_matrix, latency_nodes)
            print("round {}, delay".format(r))
        elif (r in latency_rounds + 1):
            diffuse_params(client_models, comm_matrix)
            diffuse_params_latency(
                old_client_models, comm_matrix,
                np.setdiff1d(np.array(range(num_clients)), latency_nodes))
            print("round {}, delay recovery".format(r))
        else:
            diffuse_params(client_models, comm_matrix)
            print("round {}, normal".format(r))

        average_models(global_model, client_models)
        test_loss, acc = evaluate(global_model, test_loader)

        print('%d-th round' % r)
        print('average train loss %0.3g | test loss %0.3g | test acc: %0.3f' %
              (loss / num_clients, test_loss, acc))
        accs.append(acc)
    return global_model, client_models, accs
Esempio n. 7
0
elapsed_time_reading = time.time() - start_time

# converting numpy tensors into pytorch tensors and put them on GPUs if specified
start_time = time.time()
if not (save_gpumem) and use_cuda:
    data_set = torch.from_numpy(data_set).float().cuda()
else:
    data_set = torch.from_numpy(data_set).float()

elapsed_time_load = time.time() - start_time

# Reading model and initialize networks
inp_out_dict = fea_dict

[nns, costs] = model_init(inp_out_dict, model, config, arch_dict, use_cuda,
                          multi_gpu, to_do)

# optimizers initialization
optimizers = optimizer_init(nns, config, arch_dict)

# pre-training
for net in nns.keys():
    pt_file_arch = config[arch_dict[net][0]]['arch_pretrain_file']

    if pt_file_arch != 'none':
        checkpoint_load = torch.load(pt_file_arch)
        nns[net].load_state_dict(checkpoint_load['model_par'])
        optimizers[net].load_state_dict(checkpoint_load['optimizer_par'])
        optimizers[net].param_groups[0]['lr'] = float(config[
            arch_dict[net][0]]['arch_lr'])  # loading lr of the cfg file for pt
Esempio n. 8
0
def run_topos(train_loader,
              test_loader,
              num_rounds,
              epochs,
              num_clients,
              topos,
              shuffle='random',
              net='net',
              optimizer='sgd',
              lr=0.1):
    """
    Runs a decentralized optimization algorithm for the given learning rate for
    a number of rounds, over some network. Outputs the accuracies and returns them.

    Params:
        train_loader (array): the list of all train datasets, one per client
        test_loader (array): the list of test datasets, one per client
        num_rounds (int): the number of data exchanges between nodes
        epochs (int): the number of optimization steps between each communication (minimum 1)
        num_clients (int): the number of clients in the network
        topos (array): list of possible network topologies
        shuffle (string): defines how topology evolves over time, randomly by default
        net (string): the neural network framework we use
        optimizer (string): the chosen optimizer, SGD by default
        lr (double): the learning rate for the optimizaion algorithm

    Returns:
        global_model (nn.Module): the final global neural network averaging all the clients
        client_models (array of Net): the list of all the final client neural networks
        accs (array): the corresponding accuracies, with the same shape as lrs
    """
    assert shuffle in ['random', 'modulo', 'fraction']

    accs = []
    global_model, client_models = model_init(num_clients, net)
    opt = optimizer_init(client_models, lr, optimizer)

    loss, test_loss, acc = 0.0, 0.0, 0.0
    for r in range(num_rounds):
        for i in range(num_clients):
            loss += client_update(client_models[i],
                                  opt[i],
                                  train_loader[i],
                                  epoch=epochs)

        if shuffle == 'fraction':
            t = int(r * 5 / num_rounds)
            comm_matrix = create_mixing_matrix(topos[t], num_clients)
        elif shuffle == 'modulo':
            t = r % 5
            comm_matrix = create_mixing_matrix(topos[t], num_clients)
        else:  # shuffle == 'random'
            t = np.random.choice(range(5))
            comm_matrix = create_mixing_matrix(topos[t], num_clients)

        diffuse_params(client_models, comm_matrix)
        average_models(global_model, client_models)
        test_loss, acc = evaluate(global_model, test_loader)

        print('%d-th round, %s topology' % (r, topos[t]))
        print('average train loss %0.3g | test loss %0.3g | test acc: %0.3f' %
              (loss / num_clients, test_loss, acc))
        accs.append(acc)
    return global_model, client_models, accs
Esempio n. 9
0
              pf_dim=PF_DIM,
              decode_layer=DecoderLayer,
              self_attention=SelfAttention,
              positionwise_feedforward=PositionFeedForward,
              dropout=DROPOUT,
              device=device,
              PositionalEncoding=True).to(device)

pad_idx = SRC.vocab.stoi['<pad>']

model = Seq2Seq(encoder=enc, decoder=dec, pad_idx=pad_idx,
                device=device).to(device)

#-------------------------train & valid--------------------------------

model_init(model)

optimizer = NoamOpt(HID_DIM,
                    factor=1,
                    warmup=2000,
                    optimizer=optim.Adam(model.parameters(),
                                         lr=0,
                                         betas=(0.9, 0.98),
                                         eps=1e-9))
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)

for epoch in range(N_EPOCH):

    start_time = time.time()

    train_loss, train_score = train(model, train_iterator, optimizer,
Esempio n. 10
0
import fwdProp
import backProp
import modelMods

if __name__ == '__main__':
    d = 10  # Number of data samples
    L0 = 10  # Width of input vector - X[L0, m]
    margin = 0.35
    learning_rate = 0.0002
    epochs = 1000000

    # Load the data and define the network
    X = dataUtils.load_simple_data(d, L0)
    d = X.shape[1]
    cache = utils.cache_init(X)
    model = utils.model_init()

    for e in range(epochs):
        layer = 1
        model, cache, A = fwdProp.forward_prop_one_layer(model, cache, 1)
        error, target = backProp.compute_error(A, margin)
        model = modelMods.set_win_count(model, layer, target)

        # For every sample with no winner (target[...,d] == 0) create new unit
        Ln = A.shape[0]
        num_winners = np.sum(target, axis=0)  # Number of winner in each column
        for index in range(d):
            if num_winners[index] == 0:
                memory = True
                model = modelDefinition.add_unit(model, layer, memory, X,
                                                 index)