예제 #1
0
def main(args, uargs=None):
    """Run the ``catalyst-data split-dataframe`` script."""
    dataframe = pd.read_csv(args.in_csv)

    train_folds = (utils.folds_to_list(args.train_folds)
                   if args.train_folds is not None else None)
    valid_folds = (utils.folds_to_list(args.valid_folds)
                   if args.valid_folds is not None else None)
    infer_folds = (utils.folds_to_list(args.infer_folds)
                   if args.infer_folds is not None else None)

    tag2class = (json.load(open(args.tag2class))
                 if args.tag2class is not None else None)

    df_all, train, valid, infer = utils.split_dataframe(
        dataframe,
        train_folds=train_folds,
        valid_folds=valid_folds,
        infer_folds=infer_folds,
        tag2class=tag2class,
        tag_column=args.tag_column,
        class_column=args.class_column,
        seed=args.seed,
        n_folds=args.num_folds,
    )

    out_csv: str = args.out_csv
    if out_csv.endswith(".csv"):
        out_csv = out_csv[:-4]

    df_all.to_csv(f"{out_csv}.csv", index=False)
    train.to_csv(f"{out_csv}_train.csv", index=False)
    valid.to_csv(f"{out_csv}_valid.csv", index=False)
    infer.to_csv(f"{out_csv}_infer.csv", index=False)
def main(args, uargs=None):
    dataframe = pd.read_csv(args.in_csv)

    train_folds = \
        folds_to_list(args.train_folds) \
        if args.train_folds is not None else None
    valid_folds = \
        folds_to_list(args.valid_folds) \
        if args.valid_folds is not None else None
    infer_folds = \
        folds_to_list(args.infer_folds) \
        if args.infer_folds is not None else None

    tag2class = json.load(open(args.tag2class)) \
        if args.tag2class is not None else None

    df_all, train, valid, infer = split_dataframe(
        dataframe,
        train_folds=train_folds,
        valid_folds=valid_folds,
        infer_folds=infer_folds,
        tag2class=tag2class,
        tag_column=args.tag_column,
        class_column=args.class_column,
        seed=args.seed,
        n_folds=args.num_folds)

    out_csv: str = args.out_csv
    if out_csv.endswith(".csv"):
        out_csv = out_csv[:-4]

    df_all.to_csv(f"{out_csv}.csv", index=False)
    train.to_csv(f"{out_csv}_train.csv", index=False)
    valid.to_csv(f"{out_csv}_valid.csv", index=False)
    infer.to_csv(f"{out_csv}_infer.csv", index=False)