Ejemplo n.º 1
0
def train_individual_model(run_args):
    # Call the training script with the json file as args
    # The json file contains all training parameters, including the files for training and validation
    # Note: It is necessary to launch a new thread because the command might be prefixed (e. g. use slurm as job
    #       skeduler to train all folds on different machines
    args = run_args["args"]
    train_args_json = run_args["json"]
    fold_logger = logging.getLogger(f"FOLD {args['id']}")
    for handler in fold_logger.handlers:
        handler.terminator = ''

    for out, err in run(prefix_run_command(
        [
            sys.executable,
            "-u",
            args["train_script"],
            "--files",
            train_args_json,
        ], args.get("run", None), {"threads": args.get('num_threads', -1)}),
                        verbose=args.get("verbose", False)):
        # Print the output of the thread
        if args.get("verbose", False):
            if out:
                fold_logger.info(out.rstrip("\n"))
            if err:
                fold_logger.info(err.rstrip("\n"))

    return args
Ejemplo n.º 2
0
def train_individual_model(run_args):
    # Call the training script with the json file as args
    # The json file contains all training parameters, including the files for training and validation
    # Note: It is necessary to launch a new thread because the command might be prefixed (e. g. use slurm as job
    #       skeduler to train all folds on different machines
    args = run_args["args"]
    train_args_json = run_args["json"]
    for line in run(prefix_run_command([
        "python3", "-u",
        args["train_script"],
        "--files", train_args_json,

    ], args["run"], {"threads": args['num_threads']}), verbose=args["verbose"]):
        # Print the output of the thread
        if args["verbose"]:
            print(line)

    return args
Ejemplo n.º 3
0
def train_individual_model(run_args):
    # Call the training script with the json file as args
    # The json file contains all training parameters, including the files for training and validation
    # Note: It is necessary to launch a new thread because the command might be prefixed (e. g. use slurm as job
    #       skeduler to train all folds on different machines
    args = run_args["args"]
    train_args_json = run_args["json"]
    for line in run(prefix_run_command([
        "python3", "-u",
        args["train_script"],
        "--files", train_args_json,

    ], args["run"], {"threads": args['num_threads']}), verbose=args["verbose"]):
        # Print the output of the thread
        if args["verbose"]:
            print("FOLD {} | {}".format(args["id"], line), end="")

    return args
def train_individual_model(run_args):
    # Call the training script with the json file as args
    # The json file contains all training parameters, including the files for training and validation
    # Note: It is necessary to launch a new thread because the command might be prefixed (e. g. use slurm as job
    #       skeduler to train all folds on different machines
    args = run_args["args"]
    train_args_json = run_args["json"]
    for line in run(prefix_run_command([
            sys.executable,
            "-u",
            args["train_script"],
            "--files",
            train_args_json,
    ], args.get("run", None), {"threads": args.get('num_threads', -1)}),
                    verbose=args.get("verbose", False)):
        # Print the output of the thread
        if args.get("verbose", False):
            print("FOLD {} | {}".format(args["id"], line), end="")

    return args
Ejemplo n.º 5
0
def run_for_single_line(args):
    # lines/network/pretraining as base dir
    args.base_dir = os.path.join(args.base_dir, "all" if args.n_lines < 0 else str(args.n_lines))
    pretrain_prefix = "scratch"
    if args.weights and len(args.weights) > 0:
        pretrain_prefix = ",".join([split_all_ext(os.path.basename(path))[0] for path in args.weights])

    args.base_dir = os.path.join(args.base_dir, args.network, pretrain_prefix)

    if not os.path.exists(args.base_dir):
        os.makedirs(args.base_dir)

    tmp_dir = os.path.join(args.base_dir, "tmp")
    if not os.path.exists(tmp_dir):
        os.makedirs(tmp_dir)

    best_models_dir = os.path.join(args.base_dir, "models")
    if not os.path.exists(best_models_dir):
        os.makedirs(best_models_dir)

    prediction_dir = os.path.join(args.base_dir, "predictions")
    if not os.path.exists(prediction_dir):
        os.makedirs(prediction_dir)

    # select number of files
    files = args.train_files
    if args.n_lines > 0:
        all_files = glob_all(args.train_files)
        files = random.sample(all_files, args.n_lines)

    # run the cross-fold-training
    setattr(args, "max_parallel_models", args.max_parallel_models)
    setattr(args, "best_models_dir", best_models_dir)
    setattr(args, "temporary_dir", tmp_dir)
    setattr(args, "keep_temporary_files", False)
    setattr(args, "files", files)
    setattr(args, "text_files", None)
    setattr(args, "gt_extension", None)
    setattr(args, "dataset", DataSetType.FILE)
    setattr(args, "best_model_label", "{id}")
    if not args.skip_train:
        cross_fold_train.main(args)

    dump_file = os.path.join(tmp_dir, "prediction.pkl")

    # run the prediction
    if not args.skip_eval:
        # locate the eval script (must be in the same dir as "this")
        predict_script_path = os.path.join(this_absdir, "experiment_eval.py")

        if len(args.single_fold) > 0:
            models = [os.path.join(best_models_dir, "{}.ckpt.json".format(sf)) for sf in args.single_fold]
            for m in models:
                if not os.path.exists(m):
                    raise Exception("Expected model at '{}', but file does not exist".format(m))
        else:
            models = [os.path.join(best_models_dir, d) for d in sorted(os.listdir(best_models_dir)) if d.endswith("json")]
            if len(models) != args.n_folds:
                raise Exception("Expected {} models, one for each fold respectively, but only {} models were found".format(
                    args.n_folds, len(models)
                ))

        for line in run(prefix_run_command([
                "python3", "-u",
                predict_script_path,
                "-j", str(args.num_threads),
                "--batch_size", str(args.batch_size),
                "--dump", dump_file,
                "--eval_imgs"] + args.eval_files + [
                ] + (["--verbose"] if args.verbose else []) + [
                "--checkpoint"] + models + [
                ], args.run, {"threads": args.num_threads}), verbose=args.verbose):
            # Print the output of the thread
            if args.verbose:
                print(line)

    import pickle
    with open(dump_file, 'rb') as f:
        prediction = pickle.load(f)

    return prediction
Ejemplo n.º 6
0
def run_for_single_line(args):
    # lines/network/pretraining as base dir
    args.base_dir = os.path.join(args.base_dir, "all" if args.n_lines < 0 else str(args.n_lines))
    pretrain_prefix = "scratch"
    if args.weights and len(args.weights) > 0:
        pretrain_prefix = ",".join([split_all_ext(os.path.basename(path))[0] for path in args.weights])

    args.base_dir = os.path.join(args.base_dir, args.network, pretrain_prefix)

    if not os.path.exists(args.base_dir):
        os.makedirs(args.base_dir)

    tmp_dir = os.path.join(args.base_dir, "tmp")
    if not os.path.exists(tmp_dir):
        os.makedirs(tmp_dir)

    best_models_dir = os.path.join(args.base_dir, "models")
    if not os.path.exists(best_models_dir):
        os.makedirs(best_models_dir)

    prediction_dir = os.path.join(args.base_dir, "predictions")
    if not os.path.exists(prediction_dir):
        os.makedirs(prediction_dir)

    # select number of files
    files = args.train_files
    if args.n_lines > 0:
        all_files = glob_all(args.train_files)
        files = random.sample(all_files, args.n_lines)

    # run the cross-fold-training
    setattr(args, "max_parallel_models", args.max_parallel_models)
    setattr(args, "best_models_dir", best_models_dir)
    setattr(args, "temporary_dir", tmp_dir)
    setattr(args, "keep_temporary_files", False)
    setattr(args, "files", files)
    setattr(args, "best_model_label", "{id}")
    if not args.skip_train:
        cross_fold_train.main(args)

    dump_file = os.path.join(tmp_dir, "prediction.pkl")

    # run the prediction
    if not args.skip_eval:
        # locate the eval script (must be in the same dir as "this")
        predict_script_path = os.path.join(this_absdir, "experiment_eval.py")

        if len(args.single_fold) > 0:
            models = [os.path.join(best_models_dir, "{}.ckpt.json".format(sf)) for sf in args.single_fold]
            for m in models:
                if not os.path.exists(m):
                    raise Exception("Expected model at '{}', but file does not exist".format(m))
        else:
            models = [os.path.join(best_models_dir, d) for d in sorted(os.listdir(best_models_dir)) if d.endswith("json")]
            if len(models) != args.n_folds:
                raise Exception("Expected {} models, one for each fold respectively, but only {} models were found".format(
                    args.n_folds, len(models)
                ))

        for line in run(prefix_run_command([
                "python3", "-u",
                predict_script_path,
                "-j", str(args.num_threads),
                "--batch_size", str(args.batch_size),
                "--dump", dump_file,
                "--eval_imgs"] + args.eval_files + [
                ] + (["--verbose"] if args.verbose else []) + [
                "--checkpoint"] + models + [
                ], args.run, {"threads": args.num_threads}), verbose=args.verbose):
            # Print the output of the thread
            if args.verbose:
                print(line)

    import pickle
    with open(dump_file, 'rb') as f:
        prediction = pickle.load(f)

    return prediction
Ejemplo n.º 7
0
def run_for_single_line(args):
    # lines/network/pretraining as base dir
    args.base_dir = os.path.join(args.base_dir, "all" if args.n_lines < 0 else str(args.n_lines))
    pretrain_prefix = "scratch"
    if args.weights and len(args.weights) > 0:
        pretrain_prefix = ",".join([split_all_ext(os.path.basename(path))[0] for path in args.weights])

    args.base_dir = os.path.join(args.base_dir, args.network, pretrain_prefix)

    if not os.path.exists(args.base_dir):
        os.makedirs(args.base_dir)

    tmp_dir = os.path.join(args.base_dir, "tmp")
    if not os.path.exists(tmp_dir):
        os.makedirs(tmp_dir)

    best_models_dir = os.path.join(args.base_dir, "models")
    if not os.path.exists(best_models_dir):
        os.makedirs(best_models_dir)

    prediction_dir = os.path.join(args.base_dir, "predictions")
    if not os.path.exists(prediction_dir):
        os.makedirs(prediction_dir)

    # select number of files
    files = args.files
    if args.n_lines > 0:
        all_files = glob_all(args.files)
        files = random.sample(all_files, args.n_lines)

    # run the cross-fold-training
    setattr(args, "early_stopping_best_model_output_dir", best_models_dir)
    setattr(args, "output_dir", tmp_dir)
    setattr(args, "keep_temporary_files", False)
    setattr(args, "files", files)
    setattr(args, "text_files", None)
    setattr(args, "gt_extension", None)
    setattr(args, "dataset", DataSetType.FILE)
    setattr(args, "best_model_label", "{id}")
    if not args.skip_train:
        train_script.main(args)

    dump_file = os.path.join(tmp_dir, "prediction.pkl")

    # run the prediction
    if not args.skip_eval:
        # locate the eval script (must be in the same dir as "this")
        predict_script_path = os.path.join(this_absdir, "predict_and_eval.py")

        model = os.path.join(best_models_dir, "best.ckpt.json")
        if not os.path.exists(model):
            raise Exception(f"Expected model at '{model}', but file does not exist")

        for line in run(prefix_run_command([
                "python3", "-u",
                predict_script_path,
                "-j", str(args.num_threads),
                "--batch_size", str(args.batch_size),
                "--dump", dump_file,
                "--files"] + args.eval_files + [
                ] + (["--verbose"] if args.verbose else []) + [
                "--checkpoint"] + [model] + [
                ], args.run, {"threads": args.num_threads}), verbose=args.verbose):
            # Print the output of the thread
            if args.verbose:
                print(line)

    import pickle
    with open(dump_file, 'rb') as f:
        prediction = pickle.load(f)

    return prediction