def split_processing(dataset, api, args, resume, multi_label_data=None, session_file=None, path=None, log=None): """Splits a dataset into train and test datasets """ train_dataset = None test_dataset = None sample_rate = 1 - args.test_split dataset_alternative_args = r.set_dataset_split_args( "%s - train (%s %%)" % ( args.name, int(sample_rate * 100)), args.description_, args, sample_rate, out_of_bag=False, multi_label_data=multi_label_data) train_dataset, resume = alternative_dataset_processing( dataset, "train", dataset_alternative_args, api, args, resume, session_file=session_file, path=path, log=log) dataset_alternative_args = r.set_dataset_split_args( "%s - test (%s %%)" % ( args.name, int(args.test_split * 100)), args.description_, args, sample_rate, out_of_bag=True, multi_label_data=multi_label_data) test_dataset, resume = alternative_dataset_processing( dataset, "test", dataset_alternative_args, api, args, resume, session_file=session_file, path=path, log=log) return train_dataset, test_dataset, resume
def split_range_processing(dataset, api, args, resume, multi_label_data=None, session_file=None, path=None, log=None): """Splits a dataset into train and test datasets using ranges """ train_dataset = None test_dataset = None test_rate = args.test_split train_rate = 1 - test_rate split_row = int(dataset["object"]["rows"] * train_rate) args.range_ = [1, split_row] args.test_split = 0 dataset_alternative_args = r.set_dataset_split_args( "%s - train (%s %%)" % ( args.name, int(train_rate * 100)), args.description_, args, multi_label_data=multi_label_data) train_dataset, resume = alternative_dataset_processing( dataset, "train", dataset_alternative_args, api, args, resume, session_file=session_file, path=path, log=log) args.range_ = [split_row + 1, dataset["object"]["rows"]] dataset_alternative_args = r.set_dataset_split_args( "%s - test (%s %%)" % ( args.name, int(test_rate * 100)), args.description_, args, multi_label_data=multi_label_data) test_dataset, resume = alternative_dataset_processing( dataset, "test", dataset_alternative_args, api, args, resume, session_file=session_file, path=path, log=log) return train_dataset, test_dataset, resume
def split_range_processing(dataset, api, args, resume, multi_label_data=None, session_file=None, path=None, log=None): """Splits a dataset into train and test datasets using ranges """ train_dataset = None test_dataset = None test_rate = args.test_split train_rate = 1 - test_rate split_row = int(dataset["object"]["rows"] * train_rate) args.range_ = [1, split_row] args.test_split = 0 dataset_alternative_args = r.set_dataset_split_args( "%s - train (%s %%)" % (args.name, int(train_rate * 100)), args.description_, args, multi_label_data=multi_label_data) train_dataset, resume = alternative_dataset_processing( dataset, "train", dataset_alternative_args, api, args, resume, session_file=session_file, path=path, log=log) args.range_ = [split_row + 1, dataset["object"]["rows"]] dataset_alternative_args = r.set_dataset_split_args( "%s - test (%s %%)" % (args.name, int(test_rate * 100)), args.description_, args, multi_label_data=multi_label_data) test_dataset, resume = alternative_dataset_processing( dataset, "test", dataset_alternative_args, api, args, resume, session_file=session_file, path=path, log=log) return train_dataset, test_dataset, resume
def split_processing(dataset, name, description, api, args, resume, session_file=None, path=None, log=None): """Splits a dataset into train and test datasets """ train_dataset = None test_dataset = None sample_rate = 1 - args.test_split # if resuming, try to extract train dataset form log files if resume: message = u.dated("Dataset not found. Resuming.\n") resume, train_dataset = c.checkpoint( c.is_dataset_created, path, "_train", debug=args.debug, message=message, log_file=session_file, console=args.verbosity) if train_dataset is None: dataset_split_args = r.set_dataset_split_args( "%s - train (%s %%)" % (name, int(sample_rate * 100)), description, args, sample_rate, out_of_bag=False) train_dataset = r.create_dataset( dataset, dataset_split_args, args, api, path, session_file, log, "train") if train_dataset: train_dataset = r.get_dataset(train_dataset, api, args.verbosity, session_file) # if resuming, try to extract test dataset form log files if resume: message = u.dated("Dataset not found. Resuming.\n") resume, test_dataset = c.checkpoint( c.is_dataset_created, path, "_test", debug=args.debug, message=message, log_file=session_file, console=args.verbosity) if test_dataset is None: dataset_split_args = r.set_dataset_split_args( "%s - test (%s %%)" % (name, int(args.test_split * 100)), description, args, sample_rate, out_of_bag=True) test_dataset = r.create_dataset( dataset, dataset_split_args, args, api, path, session_file, log, "test") if test_dataset: test_dataset = r.get_dataset(test_dataset, api, args.verbosity, session_file) return train_dataset, test_dataset, resume
def split_processing(dataset, name, description, api, args, resume, session_file=None, path=None, log=None): """Splits a dataset into train and test datasets """ train_dataset = None test_dataset = None sample_rate = 1 - args.test_split # if resuming, try to extract train dataset form log files if resume: message = u.dated("Dataset not found. Resuming.\n") resume, train_dataset = c.checkpoint(c.is_dataset_created, path, "_train", debug=args.debug, message=message, log_file=session_file, console=args.verbosity) if train_dataset is None: dataset_split_args = r.set_dataset_split_args( "%s - train (%s %%)" % (name, int(sample_rate * 100)), description, args, sample_rate, out_of_bag=False) train_dataset = r.create_dataset(dataset, dataset_split_args, args, api, path, session_file, log, "train") if train_dataset: train_dataset = r.get_dataset(train_dataset, api, args.verbosity, session_file) # if resuming, try to extract test dataset form log files if resume: message = u.dated("Dataset not found. Resuming.\n") resume, test_dataset = c.checkpoint(c.is_dataset_created, path, "_test", debug=args.debug, message=message, log_file=session_file, console=args.verbosity) if test_dataset is None: dataset_split_args = r.set_dataset_split_args( "%s - test (%s %%)" % (name, int(args.test_split * 100)), description, args, sample_rate, out_of_bag=True) test_dataset = r.create_dataset(dataset, dataset_split_args, args, api, path, session_file, log, "test") if test_dataset: test_dataset = r.get_dataset(test_dataset, api, args.verbosity, session_file) return train_dataset, test_dataset, resume