Esempio n. 1
0
    )
    parser.add_argument(
        "indexfile",
        nargs="?",
        default=TRAINING_INDEX,
        help="path to index of resulting parquet files",
    )
    parser.add_argument(
        "outdir",
        nargs="?",
        default=TRAINING_DIR,
        help="directory of parquet files",
    )
    parser.add_argument(
        "--max-token-count",
        type=int,
        default=5,
        help="maximum number of contiguous tokens to match against each label",
    )
    parser.add_argument("--log-level", dest="log_level", default="INFO")
    args = parser.parse_args()
    logger.setLevel(args.log_level.upper())

    logger.info(f"Reading {Path(args.manifest).resolve()}")
    manifest = pd.read_csv(args.manifest)

    indir, index, outdir = Path(args.indir), Path(args.indexfile), Path(args.outdir)
    index.parent.mkdir(parents=True, exist_ok=True)
    outdir.mkdir(parents=True, exist_ok=True)
    extend_and_write_docs(indir, manifest, index, outdir, args.max_token_count)
Esempio n. 2
0
    # First read in the initial configuration.
    os.environ["WANDB_CONFIG_PATHS"] = "config-defaults.yaml"
    run = wandb.init(
        project=WANDB_PROJECT,
        job_type="train",
        allow_val_change=True,
    )
    config = run.config
    # Then override it with any parameters passed along the command line.
    parser = argparse.ArgumentParser()

    # Anything in the config is fair game to be overridden by a command line flag.
    for key, value in config.items():
        cli_flag = f"--{key}".replace("_", "-")
        parser.add_argument(cli_flag,
                            dest=key,
                            type=type(value),
                            default=value)

    args = parser.parse_args()
    config.update(args, allow_val_change=True)

    if not config.use_wandb:
        os.environ["WANDB_SILENT"] = "true"
        os.environ["WANDB_MODE"] = "dryrun"
        wandb.log = lambda *args, **kwargs: None

    logger.setLevel(config.log_level)

    main(config)