Esempio n. 1
0
def finetune_checkpoint(ckpt_file, gpu, app_args, loaders):
    # Usually when we train, we also want to look at and graph, the validation score of each epoch.
    # When we run many fine-tuning sessions at once, we don't care to look at the validation score.
    # However, we want to perform a sort-of "early-stopping" in which we use the checkpoint of the
    # best performing training epoch, and not the checkpoint created by the last epoch.
    # We evaluate what is the "best" checkpoint by looking at the validation accuracy
    name = os.path.basename(ckpt_file)
    print("Fine-tuning checkpoint %s" % name)

    app_args.gpus = str(gpu)
    app_args.name = name
    app_args.deprecated_resume = ckpt_file
    app = classifier.ClassifierCompressor(app_args,
                                          script_dir=os.path.dirname(__file__))
    app.train_loader, app.val_loader, app.test_loader = loaders
    best = [float("-inf"), float("-inf"), float("inf")]
    for epoch in range(app_args.epochs):
        validate = epoch >= math.floor(
            (1 - app_args.validate_enable_factor) * app_args.epochs)
        top1, top5, loss = app.train_validate_with_scheduling(
            epoch, validate=validate, verbose=False)
        if validate:
            if top1 > best[0]:
                best = [top1, top5, loss]
    if app_args.validate_enable_factor == 0:
        # We did not validate, so our score is the performance on the Test dataset
        return (name, app.test())
    return (name, best)

def fine_module_by_name(model, name_in):
    for module_name, module in model.named_modules():
        # print("module_name: " + str(module_name))
        # print("norm_name: " + str(norm_name))
        q_weight_name = module_name + ".weight"
        if q_weight_name == name_in:
            return module


if __name__ == "__main__":
    print("START")
    args = get_default_args()
    # args.epochs = float('inf')  # hack for args parsing so there's no error in epochs
    cc = classifier.ClassifierCompressor(args,
                                         script_dir=os.path.dirname(__file__))
    args = deepcopy(
        cc.args
    )  # Get back args after modifications in ClassifierCompressor.__init__
    eval_data_loader = classifier.load_data(args,
                                            load_train=False,
                                            load_val=False,
                                            load_test=True)

    # logging
    logging.getLogger().setLevel(logging.WARNING)
    msglogger = logging.getLogger(__name__)
    msglogger.setLevel(logging.INFO)

    def test_fn(model):
        top1, top5, losses = classifier.test(eval_data_loader, model,
Esempio n. 3
0
def finetune_directory(ft_dir,
                       stats_file,
                       app_args,
                       cleanup_ft_dir=False,
                       checkpoints=None):
    """Fine tune all the checkpoint files we find in the immediate-directory specified.

    For each checkpoint file we find, we create and queue a FinetuningTask.  
    A FinetuningProcess will pickup the FinetuningTask and process it.
    """
    print("Fine-tuning directory %s" % ft_dir)
    if not checkpoints:
        # Get a list of the checkpoint files
        checkpoints = glob.glob(os.path.join(ft_dir, "*checkpoint.pth.tar"))
    assert checkpoints

    # We create a subdirectory, where we will write all of our output
    ft_output_dir = os.path.join(ft_dir, "ft")
    os.makedirs(ft_output_dir, exist_ok=True)
    print("Writing results to directory %s" % ft_output_dir)
    app_args.output_dir = ft_output_dir

    # Multi-process queues
    tasks = multiprocessing.JoinableQueue()
    results = multiprocessing.Queue()

    # Create and launch the fine-tuning processes
    processes = []
    n_processes = min(app_args.processes, len(checkpoints))
    for i in range(n_processes):
        # Pre-load the data-loaders of each fine-tuning process once
        app = classifier.ClassifierCompressor(
            app_args, script_dir=os.path.dirname(__file__))
        data_loader = classifier.load_data(app.args)
        # Delete log directories
        shutil.rmtree(app.logdir)
        processes.append(FinetuningProcess(tasks, results, data_loader))
        # Start the process
        processes[-1].start()

    n_gpus = torch.cuda.device_count()

    # Enqueue all of the fine-tuning tasks
    for (instance, ckpt_file) in enumerate(checkpoints):
        tasks.put(FinetuningTask(args=(ckpt_file, instance % n_gpus,
                                       app_args)))

    # Push an end-of-tasks marker
    for i in range(len(processes)):
        tasks.put(None)

    # Wait until all tasks finish
    tasks.join()

    # Start printing results
    results_dict = OrderedDict()
    while not results.empty():
        result = results.get()
        results_dict[result[0]] = result[1]

    # Read the results of the AMC experiment (we'll want to use some of the data)
    import pandas as pd
    df = pd.read_csv(os.path.join(ft_dir, "amc.csv"))
    assert len(results_dict) > 0
    # Log some info for each checkpoint
    for ckpt_name in sorted(results_dict.keys()):
        net_search_results = df[df["ckpt_name"] ==
                                ckpt_name[:-len("_checkpoint.pth.tar")]]
        search_top1 = net_search_results["top1"].iloc[0]
        normalized_macs = net_search_results["normalized_macs"].iloc[0]
        log_entry = (ft_output_dir, ckpt_name, normalized_macs, search_top1,
                     *results_dict[ckpt_name])
        print("%s <>  %s: %.2f %.2f %.2f %.2f %.2f" % log_entry)
        stats_file.add_record(log_entry)
    if cleanup_ft_dir:
        # cleanup: remove the "ft" directory
        shutil.rmtree(ft_output_dir)