Example #1
0
def _create_dataset(uri, batch_size, shuffle, no_image_normalization, cache_dir, overwrite_cache, create_cache_explicitly, prepare_data_iterator):
    class Dataset:
        pass
    dataset = Dataset()
    dataset.uri = uri
    dataset.normalize = not no_image_normalization

    if prepare_data_iterator:
        if cache_dir == '':
            cache_dir = None
        if cache_dir and create_cache_explicitly:
            if not os.path.exists(cache_dir) or len(os.listdir(cache_dir)) == 0 or overwrite_cache:
                if not os.path.exists(cache_dir):
                    os.mkdir(cache_dir)
                logger.log(99, 'Creating cache data for "' + uri + '"')
                with data_iterator_csv_dataset(uri, batch_size, shuffle, normalize=False, cache_dir=cache_dir) as di:
                    index = 0
                    while index < di.size:
                        progress('', (1.0 * di.position) / di.size)
                        di.next()
                        index += batch_size
            dataset.data_iterator = (lambda: data_iterator_cache(
                cache_dir, batch_size, shuffle, normalize=dataset.normalize))
        elif not cache_dir or overwrite_cache or not os.path.exists(cache_dir) or len(os.listdir(cache_dir)) == 0:
            if cache_dir and not os.path.exists(cache_dir):
                os.mkdir(cache_dir)
            dataset.data_iterator = (lambda: data_iterator_csv_dataset(
                uri, batch_size, shuffle, normalize=dataset.normalize, cache_dir=cache_dir))
        else:
            dataset.data_iterator = (lambda: data_iterator_cache(
                cache_dir, batch_size, shuffle, normalize=dataset.normalize))
    else:
        dataset.data_iterator = None
    return dataset
Example #2
0
def _create_dataset(uri, batch_size, shuffle, no_image_normalization, cache_dir, overwrite_cache, create_cache_explicitly, prepare_data_iterator):
    class Dataset:
        pass
    dataset = Dataset()
    dataset.uri = uri
    dataset.normalize = not no_image_normalization

    if prepare_data_iterator:
        if cache_dir == '':
            cache_dir = None
        if cache_dir and create_cache_explicitly:
            if not os.path.exists(cache_dir) or overwrite_cache:
                if not os.path.exists(cache_dir):
                    os.mkdir(cache_dir)
                logger.info('Creating cache data for "' + uri + '"')
                with data_iterator_csv_dataset(uri, batch_size, shuffle, normalize=False, cache_dir=cache_dir) as di:
                    index = 0
                    while index < di.size:
                        progress('', (1.0 * di.position) / di.size)
                        di.next()
                        index += batch_size
            dataset.data_iterator = (lambda: data_iterator_cache(
                cache_dir, batch_size, shuffle, normalize=dataset.normalize))
        elif not cache_dir or overwrite_cache or not os.path.exists(cache_dir):
            if cache_dir and not os.path.exists(cache_dir):
                os.mkdir(cache_dir)
            dataset.data_iterator = (lambda: data_iterator_csv_dataset(
                uri, batch_size, shuffle, normalize=dataset.normalize, cache_dir=cache_dir))
        else:
            dataset.data_iterator = (lambda: data_iterator_cache(
                cache_dir, batch_size, shuffle, normalize=dataset.normalize))
    else:
        dataset.data_iterator = None
    return dataset
Example #3
0
def _create_dataset(uri, batch_size, shuffle, no_image_normalization, cache_dir, overwrite_cache, create_cache_explicitly, prepare_data_iterator):
    class Dataset:
        pass
    dataset = Dataset()
    dataset.uri = uri
    dataset.normalize = not no_image_normalization

    comm = current_communicator()

    # use same random state for each process until slice is called
    rng = numpy.random.RandomState(0)
    use_memory_cache = comm.size == 1 if comm else True

    if prepare_data_iterator:
        if cache_dir == '':
            cache_dir = None

        # Disable implicit cache creation when MPI is available.
        if cache_dir and (create_cache_explicitly or comm):
            cache_index = os.path.join(cache_dir, "cache_index.csv")
            if not os.path.exists(cache_index) or overwrite_cache:
                if single_or_rankzero():
                    logger.log(99, 'Creating cache data for "' + uri + '"')

                    try:
                        os.makedirs(cache_dir)
                    except OSError:
                        pass  # python2 does not support exists_ok arg

                    with data_iterator_csv_dataset(uri, batch_size, shuffle, rng=rng, normalize=False, cache_dir=cache_dir, with_memory_cache=False) as di:
                        pass

            rng = numpy.random.RandomState(0)
            dataset.data_iterator = (lambda: data_iterator_cache(
                cache_dir, batch_size, shuffle, rng=rng, normalize=dataset.normalize, with_memory_cache=use_memory_cache))
        elif not cache_dir or overwrite_cache or not os.path.exists(cache_dir) or len(os.listdir(cache_dir)) == 0:
            if comm:
                logger.critical(
                    'Implicit cache creation does not support with MPI')
                import sys
                sys.exit(-1)
            else:
                if cache_dir:
                    try:
                        os.makedirs(cache_dir)
                    except OSError:
                        pass  # python2 does not support exists_ok arg
                dataset.data_iterator = (lambda: data_iterator_csv_dataset(
                    uri, batch_size, shuffle, rng=rng, normalize=dataset.normalize, cache_dir=cache_dir))
        else:
            dataset.data_iterator = (lambda: data_iterator_cache(
                cache_dir, batch_size, shuffle, rng=rng, normalize=dataset.normalize, with_memory_cache=use_memory_cache))
    else:
        dataset.data_iterator = None
    return dataset
Example #4
0
def loadData(batch_size):

    cache_dir = "./cache"
    if os.path.isdir(cache_dir) == False:
        os.mkdir(cache_dir)
        dataset = data_iterator_csv_dataset(
            "../AIStudy.BoardGame/experience.csv",
            batch_size,
            shuffle=True,
            normalize=True,
            cache_dir=cache_dir)
    else:
        dataset = data_iterator_cache(cache_dir,
                                      batch_size,
                                      shuffle=True,
                                      normalize=True)

    variables = dataset.variables
    print(variables)
    """

    s0Index = variables.index('s0')
    print(("index(s0)={}").format(s0Index))
    for n in range(1000):
        data = dataset.next()
        if n==0:
            print(("shape(s0)={}").format(data[s0Index].shape))
        print(("epoch={},position={},size={}, data_size={}").format(dataset.epoch, dataset.position,dataset.size, len(data[0])))
    """

    return dataset
Example #5
0
def test_sliced_data_iterator_race_condition(num_of_slices, size, batch_size, shuffle):
    from nnabla.utils.data_source_implements import CacheDataSource
    from nnabla.utils.data_iterator import data_iterator_cache

    with generate_cache_dir(size) as cache_dir:
        rng = np.random.RandomState(313)
        iterator = data_iterator_cache(cache_dir, batch_size, shuffle=True)
        sliced_it = iterator.slice(rng, num_of_slices, 1)

        for i in range(size + 5):
            d = sliced_it.next()
        iterator.close()
Example #6
0
def imagenet_iterator(config, comm, train=True):
    if config['dataset']['dali']:
        if train:
            pipe = DataPipeline(config['dataset']['path'],
                                config['train']['batch_size'], config['dataset']['dali_threads'], comm.rank,
                                num_gpus=comm.n_procs, seed=1, train=train)
        else:
            pipe = DataPipeline(config['dataset']['val_path'],
                                config['train']['batch_size'], config['dataset']['dali_threads'], comm.rank,
                                num_gpus=comm.n_procs, seed=1, train=train)

        data_iterator_ = dali_iterator.DaliIterator(pipe)
        data_iterator_.size = np.ceil(pipe.epoch_size("Reader")/comm.n_procs)
        data_iterator_.batch_size = config['train']['batch_size']

        return data_iterator_
    else:
        return data_iterator_cache(config['dataset']['cache_dir'], config['train']['batch_size'],
                                   shuffle=True, normalize=True)
Example #7
0
def forward_command(args):
    callback.update_status(args)

    configure_progress(os.path.join(args.outdir, 'progress.txt'))
    files = []
    files.append(args.config)
    if args.param:
        files.append(args.param)
    batch_size = args.batch_size
    if batch_size < 1:
        batch_size = None

    class ForwardConfig:
        pass

    config = ForwardConfig
    info = load.load(files, prepare_data_iterator=False, batch_size=batch_size)
    config.global_config = info.global_config

    config.executors = info.executors.values()

    config.networks = []
    for e in config.executors:
        if e.network.name in info.networks.keys():
            config.networks.append(info.networks[e.network.name])
        else:
            logger.critical('Network {} is not found.'.format(
                config.executor.network.name))
            return False

    normalize = True
    for d in info.datasets.values():
        if d.uri == args.dataset or d.cache_dir == args.dataset:
            normalize = d.normalize
    for e in config.executors:
        normalize = normalize and not e.no_image_normalization

    orders = {}
    # With CSV
    if os.path.splitext(args.dataset)[1] == '.csv':
        data_iterator = (lambda: data_iterator_csv_dataset(
            uri=args.dataset,
            batch_size=config.networks[0].batch_size,
            shuffle=False,
            normalize=normalize,
            with_memory_cache=False,
            with_file_cache=False))

        # load dataset as csv
        filereader = FileReader(args.dataset)
        with filereader.open(textmode=True, encoding='utf-8-sig') as f:
            rows = [row for row in csv.reader(f)]
        row0 = rows.pop(0)
        if args.replace_path:
            root_path = os.path.dirname(args.dataset)
            root_path = os.path.abspath(root_path.replace('/|\\', os.path.sep))
        else:
            root_path = '.'
        rows = [row for row in rows if len(row)]
        rows = list(
            map(
                lambda row: list(
                    map(
                        lambda i, x: x if row0[i][0] == '#' or is_float(
                            x) else compute_full_path(root_path, x),
                        range(len(row)), row)), rows))
        for i in range(len(rows)):
            orders[i] = i
    # With Cache
    elif os.path.splitext(args.dataset)[1] == '.cache':
        data_iterator = (lambda: data_iterator_cache(uri=args.dataset,
                                                     batch_size=config.
                                                     networks[0].batch_size,
                                                     shuffle=False,
                                                     normalize=normalize))

        # Get original CSV
        original_csv = os.path.join(args.dataset, 'original.csv')
        try:
            # load dataset as csv
            filereader = FileReader(original_csv)
            with filereader.open(textmode=True, encoding='utf-8-sig') as f:
                rows = [row for row in csv.reader(f)]
            row0 = rows.pop(0)
            root_path = '.'
            rows = list(
                map(
                    lambda row: list(
                        map(
                            lambda x: x if is_float(x) else compute_full_path(
                                root_path, x), row)), rows))
        except:
            print('Cannot open', original_csv)
            pass

        # Get original Data order.
        order_csv = os.path.join(args.dataset, 'order.csv')
        try:
            filereader = FileReader(order_csv)
            with filereader.open(textmode=True) as f:
                for original, shuffled in [[int(x) for x in row]
                                           for row in csv.reader(f)]:
                    orders[original] = shuffled
        except:
            print('Cannot open', order_csv)
            for i in range(len(rows)):
                orders[i] = i
    else:
        print('Unsupported extension "{}" in "{}".'.format(
            os.path.splitext(args.dataset)[1], args.dataset))

    callback.update_status(('data.max', len(rows)))
    callback.update_status(('data.current', 0))
    callback.update_status('processing', True)

    result_csv_filename = os.path.join(args.outdir, args.outfile)
    with open(result_csv_filename, 'w', encoding='utf-8') as f:
        writer = csv.writer(f, lineterminator='\n')
        with data_iterator() as di:
            index = 0
            while index < di.size:
                data = di.next()
                result, outputs = _forward(args, index, config, data,
                                           di.variables)
                if index == 0:
                    for name, dim in zip(result.names, result.dims):
                        if dim == 1:
                            if e.repeat_evaluation_type == "std":
                                name = "Uncertainty(Std)"
                            row0.append(name)
                        else:
                            for d in range(dim):
                                row0.append(name + '__' + str(d))
                    writer.writerow(row0)
                for i, output in enumerate(outputs):
                    if index + i < len(rows):
                        import copy
                        row = copy.deepcopy(rows[orders[index + i]])
                        row.extend(output)
                        writer.writerow(row)
                index += len(outputs)

                callback.update_status(('data.current', min([index,
                                                             len(rows)])))
                callback.update_forward_time()
                callback.update_status()

                logger.log(
                    99, 'data {} / {}'.format(min([index, len(rows)]),
                                              len(rows)))

    callback.process_evaluation_result(args.outdir, result_csv_filename)

    logger.log(99, 'Forward Completed.')
    progress(None)

    callback.update_status(('output_result.csv_header', ','.join(row0)))
    callback.update_status(('output_result.column_num', len(row0)))
    callback.update_status(('output_result.data_num', len(rows)))
    callback.update_status('finished')

    return True
Example #8
0
                         'vision', 'imagenet'))
        from tiny_imagenet_data import data_iterator_tiny_imagenet
        with data_iterator_tiny_imagenet(args.batch_size, 'train') as di:
            test_data_iterator(di, args)
    elif args.uri == 'TINY_IMAGENET_VAL':
        sys.path.append(
            os.path.join(os.path.dirname(os.path.abspath(__file__)), '..',
                         'vision', 'imagenet'))
        from tiny_imagenet_data import data_iterator_tiny_imagenet
        with data_iterator_tiny_imagenet(args.batch_size, 'val') as di:
            test_data_iterator(di, args)
    else:
        if os.path.splitext(args.uri)[1].lower() == '.cache':
            from nnabla.utils.data_iterator import data_iterator_cache
            with data_iterator_cache(uri=args.uri,
                                     batch_size=args.batch_size,
                                     shuffle=args.shuffle,
                                     with_memory_cache=args.memory_cache,
                                     normalize=args.normalize) as di:
                test_data_iterator(di, args)
        else:
            from nnabla.utils.data_iterator import data_iterator_csv_dataset
            with data_iterator_csv_dataset(uri=args.uri,
                                           batch_size=args.batch_size,
                                           shuffle=args.shuffle,
                                           normalize=args.normalize,
                                           with_memory_cache=args.memory_cache,
                                           with_file_cache=args.file_cache,
                                           cache_dir=args.output) as di:
                test_data_iterator(di, args)
Example #9
0
def data_iterator_imagenet(batch_size, cache_dir, rng=None):
    return data_iterator_cache(cache_dir,
                               batch_size,
                               shuffle=True,
                               normalize=False,
                               rng=rng)