def gen_model_dict(bopmodel, dir):
    # generates dictionary of models and according to their BOPS values, given a folder containing the model files
    # Provide an instance of the model you're loading (to calculate # params)
    # and the path to folder containing model files, returns a dict with the format {BOPS:path to model}
    # and the total count of params in that model. Excepts if a model with a different total param count is found
    model_dict = {}
    first = True
    total_param = 0
    if os.path.isdir(dir):
        print("Directory found! Loading dir: " + dir)
        dir_list = os.listdir(dir)
        dir_list.sort()
        #print(dir_list)
        for file in dir_list:
            try:
                bopmodel.load_state_dict(
                    torch.load(os.path.join(dir, file), map_location=device))
                count, total_cnt, _, _ = countNonZeroWeights(bopmodel)
                bops = calc_BOPS(bopmodel)
                if first:  #Assume first alphabetical is the first model, for the sake of checking all pth are same model
                    total_param = total_cnt
                    first = False
                else:
                    if total_cnt != total_param:
                        raise RuntimeError(
                            "Error! Model mismatch while creating model dict! Expected {} total params, found {}"
                            .format(total_param, total_cnt))

                model_dict.update({int(bops): file})
            except Exception as e:
                print("Warning! Failed to load file " + file)
                print(e)
        return model_dict, total_param
    else:
        raise RuntimeError("Error! Unable to find directory " + dir)
Beispiel #2
0
def gen_bo_model_dict(dir, bits=32, loadmodel=None):
    # Modified to load a set of BO models (with varying layer sizes)
    # Provide an instance of the model you're loading (to calculate # params)
    # and the path to folder containing model files, returns a dict with the format {pruned params:path to model}
    # and the total count of params in that model.
    model_dict = {}
    model_sizes = []
    first = True
    total_param = 0
    if bits > 9:
        bitlen = 2
    else:
        bitlen = 1
    if os.path.isdir(dir):
        print("Directory found! Loading dir: " + dir)
        dir_list = os.listdir(dir)
        dir_list.sort()
        for file in dir_list:
            try:
                #sizestr = re.search('(\d\d?-\d\d?-\d\d?_)',file).group().strip('_').replace('-',', ') #Get the model side from the filename, just saves a bunch of headache
                sizestr1 = file[6 + bitlen:]
                sizestr = sizestr1[:-5]
                dims = [int(m) for m in sizestr.replace(' ', '').split(',')]
                print(dims)
                prune_masks = {
                    "fc1": torch.ones(dims[0], 16),
                    "fc2": torch.ones(dims[1], dims[0]),
                    "fc3": torch.ones(dims[2], dims[1]),
                    "fc4": torch.ones(5, dims[2])
                }
                if bits < 32:
                    bomodel = models.three_layer_model_bv_tunable(
                        prune_masks, dims, bits)
                else:
                    bomodel = models.three_layer_model_tunable(
                        prune_masks, dims)  # 32b, non quantized model
                bomodel.load_state_dict(
                    torch.load(os.path.join(dir, file), map_location=device))
                count, total_param, _, _ = countNonZeroWeights(bomodel)
                bops = calc_BOPS(bomodel)
                model_dict.update({int(bops): file})
            except Exception as e:
                print("Warning! Failed to load file " + file)
                print(e)
        return model_dict, total_param
    else:
        raise RuntimeError("Error! Unable to find directory " + dir)
Beispiel #3
0
def post_bo_train(dims, train_loader, val_loader, eval_loader, best=False):
    val_losses = []
    train_losses = []
    roc_auc_scores = []
    avg_precision_scores = []
    avg_train_losses = []
    avg_valid_losses = []
    accuracy_scores = []
    iter_eff = []

    early_stopping = EarlyStopping(patience=options.patience, verbose=True)
    # dims = {'fc1':dims[0],'fc2':dims[1],'fc3':dims[2]}
    model, prune_mask = create_model(dims, post=True)
    dims_str = str([dims['fc1s'], dims['fc2s'], dims['fc3s']])
    model.update_masks(
        prune_mask)  # Make sure to update the masks within the model

    optimizer = optim.Adam(model.parameters(), lr=0.0001)
    criterion = nn.BCELoss()

    L1_factor = 0.0001  # Default Keras L1 Loss
    estop = False
    epoch_counter = 0

    model.to(device)
    model.mask_to_device(device)

    print(
        "~~~~~~~~~~~~~~~~~Starting Post BO Training for Model Size {}~~~~~~~~~~~~~~~~~~~~"
        .format(dims))

    if options.efficiency_calc and epoch_counter == 0:  # Get efficiency of un-initalized model
        aiq_dict, aiq_time = calc_AiQ(model, eval_loader, True, device=device)
        epoch_eff = aiq_dict['net_efficiency']
        iter_eff.append(aiq_dict)
        print('[epoch 0] Model Efficiency: %.7f' % epoch_eff)
        for layer in aiq_dict["layer_metrics"]:
            print('[epoch 0]\t Layer %s Efficiency: %.7f' %
                  (layer, aiq_dict['layer_metrics'][layer]['efficiency']))

    for epoch in range(options.epochs):  # loop over the dataset multiple times
        epoch_counter += 1
        # Train
        model, train_losses = train(model,
                                    optimizer,
                                    criterion,
                                    train_loader,
                                    L1_factor=L1_factor)

        # Validate
        val_losses, val_avg_precision_list, val_roc_auc_scores_list = val(
            model, criterion, val_loader, L1_factor=L1_factor)

        # Calculate average epoch statistics
        try:
            train_loss = np.average(train_losses)
        except:
            train_loss = torch.mean(torch.stack(train_losses)).cpu().numpy()

        try:
            valid_loss = np.average(val_losses)
        except:
            valid_loss = torch.mean(torch.stack(val_losses)).cpu().numpy()

        val_roc_auc_score = np.average(val_roc_auc_scores_list)
        val_avg_precision = np.average(val_avg_precision_list)

        if options.efficiency_calc:
            aiq_dict, aiq_time = calc_AiQ(model,
                                          eval_loader,
                                          True,
                                          device=device)
            epoch_eff = aiq_dict['net_efficiency']
            iter_eff.append(aiq_dict)

        avg_train_losses.append(train_loss.tolist())
        avg_valid_losses.append(valid_loss.tolist())
        avg_precision_scores.append(val_avg_precision)

        # Print epoch statistics
        print('[epoch %d] train batch loss: %.7f' % (epoch + 1, train_loss))
        print('[epoch %d] val batch loss: %.7f' % (epoch + 1, valid_loss))
        print('[epoch %d] val ROC AUC Score: %.7f' %
              (epoch + 1, val_roc_auc_score))
        print('[epoch %d] val Avg Precision Score: %.7f' %
              (epoch + 1, val_avg_precision))
        if options.efficiency_calc:
            print('[epoch %d] Model Efficiency: %.7f' % (epoch + 1, epoch_eff))
            print('[epoch %d] aIQ Calc Time: %.7f seconds' %
                  (epoch + 1, aiq_time))
            for layer in aiq_dict["layer_metrics"]:
                print('[epoch %d]\t Layer %s Efficiency: %.7f' %
                      (epoch + 1, layer,
                       aiq_dict['layer_metrics'][layer]['efficiency']))
        # Check if we need to early stop
        early_stopping(valid_loss, model)
        if early_stopping.early_stop:
            print("Early stopping")
            estop = True
            epoch_counter -= options.patience
            break

        # Load last/best checkpoint model saved via earlystopping
    model.load_state_dict(torch.load('checkpoint.pt'))

    # Time for plots
    now = datetime.now()
    time = now.strftime("%d-%m-%Y_%H-%M-%S")

    # Plot & save losses for this iteration
    loss_plt = plt.figure()
    loss_ax = loss_plt.add_subplot()

    loss_ax.plot(range(1,
                       len(avg_train_losses) + 1),
                 avg_train_losses,
                 label='Training Loss')
    loss_ax.plot(range(1,
                       len(avg_valid_losses) + 1),
                 avg_valid_losses,
                 label='Validation Loss')

    # find position of lowest validation loss
    if estop:
        minposs = avg_valid_losses.index(min(avg_valid_losses))
    else:
        minposs = options.epochs

    # update our epoch counter to represent where the model actually stopped training
    epoch_counter -= ((len(avg_valid_losses)) - minposs)

    nbits = model.weight_precision if hasattr(model,
                                              'weight_precision') else 32
    # Plot losses for this iter

    loss_ax.axvline(minposs,
                    linestyle='--',
                    color='r',
                    label='Early Stopping Checkpoint')
    loss_ax.set_xlabel('epochs')
    loss_ax.set_ylabel('loss')
    loss_ax.grid(True)
    loss_ax.legend()
    filename = 'loss_plot_{}b_e{}_{}_.png'.format(nbits, epoch_counter,
                                                  str(dims_str))
    loss_ax.set_title('Loss from epoch 1 to {}, {}b model'.format(
        epoch_counter, nbits))
    loss_plt.savefig(path.join(options.outputDir, filename),
                     bbox_inches='tight')
    loss_plt.show()
    plt.close(loss_plt)
    if options.efficiency_calc:
        # Plot & save eff for this iteration
        loss_plt = plt.figure()
        loss_ax = loss_plt.add_subplot()
        loss_ax.set_title('Net Eff. from epoch 0 to {}, {}b {} model'.format(
            epoch_counter, nbits, dims_str))
        loss_ax.plot(range(0, len(iter_eff)),
                     [z['net_efficiency'] for z in iter_eff],
                     label='Net Efficiency',
                     color='green')

        # loss_ax.plot(range(1, len(iter_eff) + 1), [z["layer_metrics"][layer]['efficiency'] for z in iter_eff])
        loss_ax.axvline(minposs,
                        linestyle='--',
                        color='r',
                        label='Early Stopping Checkpoint')
        loss_ax.set_xlabel('epochs')
        loss_ax.set_ylabel('Net Efficiency')
        loss_ax.grid(True)
        loss_ax.legend()
        filename = 'eff_plot_{}b_e{}_{}_.png'.format(nbits, epoch_counter,
                                                     dims_str)
        loss_plt.savefig(path.join(options.outputDir, filename),
                         bbox_inches='tight')
        loss_plt.show()
        plt.close(loss_plt)

    model_filename = "BO_{}b_{}.pth".format(nbits, dims_str)
    model_filename2 = "BO_{}b_{}_full.pth".format(nbits, dims_str)
    os.makedirs(path.join(options.outputDir, "full_models"), exist_ok=True)
    torch.save(model.state_dict(), path.join(options.outputDir,
                                             model_filename))
    #torch.save(model,path.join(options.outputDir,"full_models", model_filename2))

    final_aiq = calc_AiQ(model,
                         eval_loader,
                         batnorm=True,
                         device=device,
                         full_results=True,
                         testlabels=test_dataset.labels_list)

    model_totalloss_json_dict = {
        options.bits: [[avg_train_losses, avg_valid_losses], iter_eff,
                       [minposs]]
    }

    filename = 'model_losses_{}_{}.json'.format(options.bits, dims_str)
    with open(path.join(options.outputDir, filename), 'w') as fp:
        json.dump(model_totalloss_json_dict, fp)
    final_aiq.update({'dims': str(dims_str), 'best': best})
    aiq_entry = {int(calc_BOPS(model)): final_aiq}

    return aiq_entry
Beispiel #4
0
        plt.close(loss_plt)

    model_filename = "BO_{}b_best_{}.pth".format(nbits, time)
    torch.save(model.state_dict(), path.join(options.outputDir,
                                             model_filename))
    final_aiq = calc_AiQ(model,
                         test_loader,
                         batnorm=True,
                         device=device,
                         full_results=True,
                         testlabels=test_dataset.labels_list)

    model_totalloss_json_dict = {
        options.bits: [[avg_train_losses, avg_valid_losses], iter_eff,
                       [minposs]]
    }

    filename = 'model_losses_{}_{}.json'.format(options.size, options.bits)
    with open(path.join(options.outputDir, filename), 'w') as fp:
        json.dump(model_totalloss_json_dict, fp)

    filename = 'model_AIQ_{}_{}.json'.format(options.size, options.bits)
    with open(path.join(options.outputDir, filename), 'w') as fp:
        json.dump(
            {
                '{}b'.format(options.bits): {
                    int(calc_BOPS(model)): final_aiq,
                    'dims': str(model_size)
                }
            }, fp)