def runQualityBenchForModel(arch: str, only_best=False, persist_results=False)-> None:
    #data_path = 'data/imagenet_full'
    data_path = DATA_PATH

    arch_states = [d for d in os.listdir(STATE_DICT_PATH) if arch.split("-")[0] in d]
    if only_best:
        arch_states = [d for d in arch_states if 'best' in d]
    
    print(arch_states)

    _, _, val_loader =  get_zipped_dataloaders(data_path, BATCH_SIZE, use_valid=True)
    with torch.no_grad():
        for state in arch_states:
            measurements = []
            for _ in range(0, QUALITY_RUNS):
                model = getModelWithOptimized(arch, n=LAYERS_TO_SKIP, batch_size=BATCH_SIZE)
                model.eval()
                model, _, _, _ = resumeFromPath(os.path.join(STATE_DICT_PATH, state), model)
                #logging.info(f"Resuming {state} from epoch {epoch} with best precision {prec}...")
                classes = getLabelToClassMapping(os.path.join(os.getcwd(), data_path))

                grndT, pred = evaluateModel(model, val_loader, classes, BATCH_SIZE)
                measurements.append(metrics.accuracy_score(grndT, pred))
                if persist_results:
                    logging.info(metrics.classification_report(grndT, pred, digits=3))
                    generateAndStoreClassificationReportCSV(grndT, pred, f'{arch}_report.csv')
                logging.debug(measurements)
            logging.info('')
            logging.info(measurements)
            logging.info(f'Precission: {sum(measurements) / len(measurements):.4f}')    
Ejemplo n.º 2
0
def loadAndEvaluate(args):
    model = getModel(args.arch)

    if os.path.exists(
            os.path.join(CHECKPOINT_DIR, args.arch + '_model_best.pth.tar')):
        logging.debug("Loading best model")
        load_path = os.path.join(CHECKPOINT_DIR,
                                 args.arch + '_model_best.pth.tar')
    else:
        logging.debug("Loading default model")
        load_path = os.path.join(CHECKPOINT_DIR,
                                 args.arch + '_checkpoint.pth.tar')

    logging.debug('Loading: ' + load_path)

    model, _, _, _ = resumeFromPath(load_path, model)

    logging.debug('Loading Test Data..')

    _, _, testLoader = get_zipped_dataloaders(DATA_PATH,
                                              BATCH_SIZE,
                                              use_valid=True)
    grndT, pred = evaluateModel(model, testLoader)

    printStats(grndT, pred)
Ejemplo n.º 3
0
 def test000_testDenseNet121Output_withLoss_noException(self):
     test_batch = 1
     test_loader, _, _ = get_zipped_dataloaders(self.TEST_DATASET_PATH, test_batch)
     
     test_criterion = nn.CrossEntropyLoss()
 
     model = getModel('densenet121')
     for i, (img, target) in enumerate(test_loader):
         output = model(img)
         test_criterion(output, target)
         if i == 0: break
Ejemplo n.º 4
0
    def test040_DenseNetWithDenseNetDropLastNPolicy_NoExcpetion_OnForwardingWithBatchSize(self):
        test_batch_size = 8
        test_loader, _ , _ = get_zipped_dataloaders(self.TEST_DATASET_PATH, test_batch_size)

        img, _ = next(iter(test_loader))

        test_net = getModelWithOptimized('densenet121-skip-last', 3, test_batch_size)

        with torch.no_grad():
            output = test_net(img)

            self.assertIsNotNone(output)
            self.assertEqual(img.shape[0], test_batch_size)
Ejemplo n.º 5
0
    def test030_labelAndIndexMapping(self):
        test_batch = 1
        test_loader, _, _ = get_zipped_dataloaders(self.TEST_DATASET_PATH, test_batch)
        img, target = next(iter(test_loader))
        
        index_path = os.path.join(self.TEST_DATASET_PATH, 'index-train.txt')
        class_to_global_index = getClassToIndexMapping(index_path)

        label_to_class = list(set(class_to_global_index))
        label_to_class.sort()

        self.assertEqual(len(label_to_class), 40)
        self.assertEqual(len(class_to_global_index), len(test_loader))
    
        index_path = os.path.join(self.TEST_DATASET_PATH, 'index-val.txt')
        class_to_global_val_index = getClassToIndexMapping(index_path)

        label_to_class_val = list(set(class_to_global_val_index))
        label_to_class_val.sort()
        self.assertEqual(len(label_to_class_val), len(label_to_class))
        self.assertEqual(label_to_class_val, label_to_class)
Ejemplo n.º 6
0
def executeBenchmark(args):

    _, _, loader = get_zipped_dataloaders(args.data_root,
                                          args.batch_size,
                                          use_valid=True)
    label_to_classes = getLabelToClassMapping(
        os.path.join(os.getcwd(), args.data_root))

    for bench_type in args.bench_types:
        for arch, pol in args.arch_pol_tupl_ls:
            d = {
                'run': [],
                'skip_n': [],
                'bench_type': [],
                'arch': [],
                'pol': [],
                'prec': [],
                'rec': [],
                'acc': [],
                'f1': [],
                'time': []
            }
            #config tqdm
            logging.info(f'Running {bench_type}-Bench on {arch}-{pol}...')

            skip_layers_list = args.skip_layers_values
            runs = args.runs

            arch_name = f'{arch}-{pol}'
            if pol == 'none':
                arch_name = arch
                skip_layers_list = [0]
                if bench_type == 'quality':
                    runs = 1

            with tqdm(total=(len(skip_layers_list) * runs),
                      ncols=80,
                      desc=f'Progress-{bench_type}-{arch}-{pol}') as pbar:
                for skip_n in skip_layers_list:
                    for run in range(runs):
                        prec = 0.0
                        rec = 0.0
                        acc = 0.0
                        f1 = 0.0
                        time = 0.0

                        try:
                            if bench_type == 'quality':
                                prec, acc, rec, f1 = executeQualityBench(
                                    arch_name, loader, skip_n,
                                    label_to_classes, args.batch_size)
                                #print(f'{run} - {skip_n} - {bench_type} - {arch} - {pol} - {prec:.6f} - {rec:.6f} - {acc:.6f} - {f1:.6f}')
                            elif bench_type == 'speed':
                                time = executeSpeedBench(arch_name, skip_n)
                                #print(f'{run} - {skip_n} - {bench_type} - {arch} - {pol} - {time:.6f}')
                            else:
                                print('Benchmark type not supported')
                                quit(1)
                        except Exception as e:
                            logging.info(
                                f'Exception occured in {bench_type}: {e}\n continueing...'
                            )
                            print(f'run: {run}')
                            print(f'skip: {skip_n}')
                            print(f'bench_type: {bench_type}')
                            print(f'arch: {arch}')
                            print(f'pol: {pol}')
                            traceback.print_exc()
                            continue

                        d['run'].append(run)
                        d['skip_n'].append(skip_n)
                        d['bench_type'].append(bench_type)
                        d['arch'].append(arch)
                        d['pol'].append(pol)
                        d['prec'].append(prec)
                        d['rec'].append(rec)
                        d['acc'].append(acc)
                        d['f1'].append(f1)
                        d['time'].append(time)

                        pbar.update(1)

            filename = f'{bench_type}-{arch}-{pol}-run.csv'
            storeReportToCSV(args.reports_path, filename, d)
Ejemplo n.º 7
0
def main(args):

    torch.cuda.empty_cache()

    n_gpus_per_node = torch.cuda.device_count()
    logging.info(f"Found {n_gpus_per_node} GPU(-s)")

    # MAIN LOOP
    #model = get_msd_net_model()
    model = msdnet.models.msdnet(args)

    criterion = nn.CrossEntropyLoss()

    if torch.cuda.is_available():
        logging.debug("Cuda is available.")
        logging.info("Using all available GPUs")
        for i in range(torch.cuda.device_count()):
            logging.info(f"gpu:{i} - {torch.cuda.get_device_name(i)}")
        model = nn.DataParallel(model).cuda()
        logging.info("Moving criterion to device.")
        criterion = criterion.cuda()
        cudnn.benchmark = True
    else:
        logging.info("Using slow CPU training.")

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    calc_lr = lambda epoch: args.lr**(1 + epoch // 30)
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=calc_lr)

    train_loader, val_loader, test_loader = get_zipped_dataloaders(
        args.data_root, args.batch_size, use_valid=True)

    best_prec1, best_epoch, start_epoch = 0.0, 0, 0

    if hasattr(args, 'epoch') and args.epoch:  # RESUME
        model, optimizer, start_epoch, best_prec1 = resumeFromPath(
            os.path.join(os.getcwd(), CHECKPOINT_DIR,
                         f'msdnet10_{args.epoch}{CHECKPOINT_POSTFIX}'), model,
            optimizer)

    for epoch in range(start_epoch, args.epochs):
        logging.info(f"Started Epoch {epoch + 1}/{args.epochs}")
        # train()
        train_loss, train_prec1, train_prec5, lr = train(
            train_loader, model, criterion, optimizer, scheduler, epoch)
        logging.info('******** Train result: *********')
        logging.info(f'Train - Epoch: [{epoch}]\t'
                     f'Loss {train_loss:.4f}\t'
                     f'Acc@1 {train_prec1:.4f}\t'
                     f'Acc@5 {train_prec5:.4f}\t'
                     f'LR {lr}\t')
        # validate()
        val_loss, val_prec1, val_prec5 = validate(val_loader, model, criterion)
        scheduler.step()

        is_best = val_prec1 > best_prec1
        if is_best:
            best_prec1 = val_prec1
            best_epoch = epoch
            logging.info(f'Best val_prec1 {best_prec1}')

        if is_best or epoch % CHECKPOINT_INTERVALL == 0:
            save_checkpoint(
                getStateDict(model, epoch, 'msdnet10', best_prec1, optimizer),
                is_best, 'msdnet10', CHECKPOINT_DIR)

        if epoch % args.test_interval == 0:
            avg_loss, avg_top1, avg_top5 = validate(test_loader, model,
                                                    criterion)

    logging.info(f'Best val_prec1: {best_prec1:.4f} at epoch {best_epoch}')

    logging.info('*************** Final prediction results ***************')
    validate(test_loader, model, criterion)
Ejemplo n.º 8
0
def main(args):
    torch.cuda.empty_cache()

    n_gpus_per_node = torch.cuda.device_count()
    logging.info(f"Found {n_gpus_per_node} GPU(-s)")

    # create model
    model = getModel(args.arch)

    logging.info(f"Training Arch:{args.arch}")

    if not torch.cuda.is_available():
        logging.warning("Using CPU for slow training process")
    else:
        logging.debug("Cuda is available")
        if GPU_ID is not None:
            logging.info(f"Using specific GPU: {GPU_ID}")
            logging.warning(
                "This will reduce the training speed significantly.")
            torch.cuda.set_device(GPU_ID)
            model.cuda(GPU_ID)
        else:
            logging.info("Using all available GPUs")
            for i in range(torch.cuda.device_count()):
                logging.info(f"gpu:{i} - {torch.cuda.get_device_name(i)}")
            model = nn.DataParallel(model).cuda()

    # loss function (criterion) and optimizer
    if torch.cuda.is_available():
        logging.info("Move cross entropy to device")
        criterion = nn.CrossEntropyLoss().cuda()
    else:
        criterion = nn.CrossEntropyLoss()

    optimizer = torch.optim.SGD(model.parameters(),
                                LEARNING_RATE,
                                momentum=MOMENTUM,
                                weight_decay=WEIGHT_DECAY)

    cudnn.benchmark = True

    batch_size = BATCH_SIZE if not 'batch' in args else args.batch

    train_loader, test_loader, _ = get_zipped_dataloaders(os.path.join(
        os.getcwd(), "data", "imagenet_full"),
                                                          batch_size,
                                                          use_valid=True)

    # size of batch:
    logging.debug(get_batch_size_stats(train_loader))

    if args.resume:
        model, optimizer, start_epoch, best_acc = resumeFromPath(
            os.path.join(os.getcwd(), CHECKPOINT_DIR,
                         f"{args.arch}_{args.epoch}{CHECKPOINT_POSTFIX}"),
            model, optimizer)
    else:
        start_epoch = START_EPOCH
        best_acc = 0.0

    checkpoint_time = AverageMeter('Checkpoint Time', ':6.3f')
    epoch_time = AverageMeter('Epoch Time', ':6.3f')
    # train loop
    end = time.time()
    for epoch in range(start_epoch, EPOCHS):
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        logging.debug('Running train loop')
        train(train_loader, model, criterion, optimizer, epoch)

        #evaluate the network on test set
        logging.debug('Compute accuracy')
        acc = validate(test_loader, model, criterion)

        # remember top acc
        is_best = acc > best_acc
        best_acc = max(acc, best_acc)

        # safe model
        if epoch % CHECKPOINT_INTERVALL == 0 or is_best or IS_DEBUG or epoch == EPOCHS - 1:
            start = time.time()
            save_checkpoint(
                getStateDict(model, epoch, args.arch, best_acc, optimizer),
                is_best, args.arch, os.path.join(os.getcwd(), CHECKPOINT_DIR))
            checkpoint_time.update(time.time() - start)
            logging.info(checkpoint_time)
        if IS_DEBUG:
            break
        epoch_time.update(time.time() - end)
        end = time.time()
        logging.info(epoch)
        logging.info(
            f"Avg-Epoch={epoch_time.avg}sec, Avg-Checkp.={checkpoint_time.avg}sec"
        )
    logging.info(f"Best accuracy: {best_acc}")
Ejemplo n.º 9
0
def getDataLoader(args):
    _, _, loader = get_zipped_dataloaders(args.data_root, args.batch_size, use_valid=True)
    return loader