def _write(source: ArraySource, array: tables.CArray, batchrows: int, n_workers: int, transform: Worker) -> None: n_rows = len(source) slices = list(batch_slices(batchrows, n_rows)) out_it = task_list(slices, source, transform, n_workers) for s, d in with_slices(out_it): array[s.start:s.stop] = d array.flush()
def write_trainingdata(args: ProcessTrainingArgs) -> None: log.info("Testing data is fold {} of {}".format(args.testfold, args.folds.K)) log.info("Writing training data to tfrecord in {}-point batches".format( args.batchsize)) n_rows = len(args.target_src) worker = _TrainingDataProcessor(args.feature_path, args.image_spec, args.halfwidth) tasks = list(batch_slices(args.batchsize, n_rows)) out_it = task_list(tasks, args.target_src, worker, args.nworkers) fold_it = args.folds.iterator(args.batchsize) tfwrite.training(out_it, n_rows, args.directory, args.testfold, fold_it)
def write_querydata(args: ProcessQueryArgs) -> None: log.info("Query data is strip {} of {}".format(args.strip_idx, args.total_strips)) log.info("Writing query data to tfrecord in {}-point batches".format( args.batchsize)) reader_src = IdReader() it, n_total = indices_strip(args.image_spec, args.strip_idx, args.total_strips, args.batchsize) worker = _QueryDataProcessor(args.feature_path, args.image_spec, args.halfwidth) tasks = list(it) out_it = task_list(tasks, reader_src, worker, args.nworkers) tfwrite.query(out_it, n_total, args.directory, args.tag)
def _run(self, index_list: List[np.ndarray]) -> Iterator[XData]: """Slice array at indices defined by `tasks`.""" da_it = task_list(index_list, IdReader(), self.worker, self.nworkers) xdata_it = (dataarrays_to_xdata(d, self.meta) for d in da_it) return xdata_it