Beispiel #1
0
    def __init__(self, input_csv_filename, rng=None, shuffle=False):
        self._cache_size = int(
            nnabla_config.get('DATA_ITERATOR', 'data_source_file_cache_size'))
        logger.info('Cache size is {}'.format(self._cache_size))

        self._filereader = FileReader(input_csv_filename)
        self._original_source_uri = input_csv_filename
        if rng is None:
            self._rng = numpy.random.RandomState(313)
        else:
            self._rng = rng
        self._shuffle = shuffle

        # Binary mode is required to use seek and tell function.
        self._file = open(input_csv_filename, 'rb')

        self._line_positions = []
        line = self._file.readline().decode('utf-8')
        csvreader = csv.reader([line])
        self._process_header(next(csvreader))

        # Store file positions of each data.
        self._size = 0
        while True:
            self._line_positions.append(self._file.tell())
            line = self._file.readline()
            if line is None or len(line) == 0:
                break
            self._size += 1

        # rewind
        self._file.seek(0)

        self._cache_file_order = []
        self._cache_file_data_orders = []
        self._cache_file_names = []

        # Adjust data size into reseted position. In most case it means
        # multiple of bunch(mini-batch) size.
        num_of_cache_files = int(
            numpy.ceil(float(self._size) / self._cache_size))
        self._cache_file_order = self._cache_file_order[0:num_of_cache_files]
        self._cache_file_data_orders = self._cache_file_data_orders[
            0:num_of_cache_files]
        if self._size % self._cache_size != 0:
            self._cache_file_data_orders[num_of_cache_files -
                                         1] = self._cache_file_data_orders[
                                             num_of_cache_files -
                                             1][0:self._size %
                                                self._cache_size]

        self._original_order = list(range(self._size))
        self._order = list(range(self._size))
        self._variables = tuple(self._variables_dict.keys())

        # Shuffle
        if self._shuffle:
            self._order = list(self._rng.permutation(list(range(self._size))))
        else:
            self._order = list(range(self._size))
Beispiel #2
0
    def __init__(self,
                 input_csv_filename,
                 rng=None,
                 shuffle=False,
                 num_of_threads=None):
        self._cache_size = int(
            nnabla_config.get('DATA_ITERATOR', 'data_source_file_cache_size'))
        logger.info('Cache size is {}'.format(self._cache_size))

        self._filereader = FileReader(input_csv_filename)
        self._original_source_uri = input_csv_filename
        if rng is None:
            self._rng = numpy.random.RandomState(313)
        else:
            self._rng = rng
        self._shuffle = shuffle

        # read index.csv
        self._file = open(input_csv_filename, 'r', encoding='utf-8')
        csvreader = csv.reader(self._file)

        header = next(csvreader)

        # Store file positions of each data.
        self._csv_data = list(csvreader)
        self._size = len(self._csv_data)

        self._file.close()

        self._remove_comment_cols(header, self._csv_data)
        self._process_header(header)
        self._variables = tuple(self._variables_dict.keys())

        self._original_order = list(range(self._size))

        # Shuffle, the order is processing csv file order
        if self._shuffle:
            self._order = list(self._rng.permutation(list(range(self._size))))
        else:
            self._order = list(range(self._size))

        if num_of_threads:
            self._num_of_threads = num_of_threads
        else:
            self._num_of_threads = int(
                nnabla_config.get('DATA_ITERATOR',
                                  'data_source_file_cache_num_of_threads'))
        logger.info('Num of thread is {}'.format(self._num_of_threads))
Beispiel #3
0
    def __init__(self,
                 input_csv_filename,
                 rng=None,
                 shuffle=False,
                 process_num=None):
        self._cache_size = int(
            nnabla_config.get('DATA_ITERATOR', 'data_source_file_cache_size'))
        logger.info('Cache size is {}'.format(self._cache_size))

        self._filereader = FileReader(input_csv_filename)
        self._original_source_uri = input_csv_filename
        if rng is None:
            self._rng = numpy.random.RandomState(313)
        else:
            self._rng = rng
        self._shuffle = shuffle

        # read index.csv
        self._file = open(input_csv_filename, 'r')
        csvreader = csv.reader(self._file)

        self._process_header(next(csvreader))
        self._variables = tuple(self._variables_dict.keys())

        # Store file positions of each data.
        self._csv_data = list(csvreader)
        self._size = len(self._csv_data)

        self._file.close()

        self._original_order = list(range(self._size))

        # Shuffle, the order is processing csv file order
        if self._shuffle:
            self._order = list(self._rng.permutation(list(range(self._size))))
        else:
            self._order = list(range(self._size))

        # multiprocess num
        if process_num:
            self._process_num = process_num
        else:
            self._process_num = multiprocessing.cpu_count()
        logger.info('Num of process is {}'.format(self._process_num))
Beispiel #4
0
def forward_command(args):
    callback.update_status(args)

    configure_progress(os.path.join(args.outdir, 'progress.txt'))
    files = []
    files.append(args.config)
    if args.param:
        files.append(args.param)
    batch_size = args.batch_size
    if batch_size < 1:
        batch_size = None

    class ForwardConfig:
        pass

    config = ForwardConfig
    info = load.load(files, prepare_data_iterator=False, batch_size=batch_size)
    config.global_config = info.global_config

    config.executors = info.executors.values()

    config.networks = []
    for e in config.executors:
        if e.network.name in info.networks.keys():
            config.networks.append(info.networks[e.network.name])
        else:
            logger.critical('Network {} is not found.'.format(
                config.executor.network.name))
            return False

    normalize = True
    for d in info.datasets.values():
        if d.uri == args.dataset or d.cache_dir == args.dataset:
            normalize = d.normalize
    for e in config.executors:
        normalize = normalize and not e.no_image_normalization

    orders = {}
    # With CSV
    if os.path.splitext(args.dataset)[1] == '.csv':
        data_iterator = (lambda: data_iterator_csv_dataset(
            uri=args.dataset,
            batch_size=config.networks[0].batch_size,
            shuffle=False,
            normalize=normalize,
            with_memory_cache=False,
            with_file_cache=False))

        # load dataset as csv
        filereader = FileReader(args.dataset)
        with filereader.open(textmode=True, encoding='utf-8-sig') as f:
            rows = [row for row in csv.reader(f)]
        row0 = rows.pop(0)
        if args.replace_path:
            root_path = os.path.dirname(args.dataset)
            root_path = os.path.abspath(root_path.replace('/|\\', os.path.sep))
        else:
            root_path = '.'
        rows = [row for row in rows if len(row)]
        rows = list(
            map(
                lambda row: list(
                    map(
                        lambda i, x: x if row0[i][0] == '#' or is_float(
                            x) else compute_full_path(root_path, x),
                        range(len(row)), row)), rows))
        for i in range(len(rows)):
            orders[i] = i
    # With Cache
    elif os.path.splitext(args.dataset)[1] == '.cache':
        data_iterator = (lambda: data_iterator_cache(uri=args.dataset,
                                                     batch_size=config.
                                                     networks[0].batch_size,
                                                     shuffle=False,
                                                     normalize=normalize))

        # Get original CSV
        original_csv = os.path.join(args.dataset, 'original.csv')
        try:
            # load dataset as csv
            filereader = FileReader(original_csv)
            with filereader.open(textmode=True, encoding='utf-8-sig') as f:
                rows = [row for row in csv.reader(f)]
            row0 = rows.pop(0)
            root_path = '.'
            rows = list(
                map(
                    lambda row: list(
                        map(
                            lambda x: x if is_float(x) else compute_full_path(
                                root_path, x), row)), rows))
        except:
            print('Cannot open', original_csv)
            pass

        # Get original Data order.
        order_csv = os.path.join(args.dataset, 'order.csv')
        try:
            filereader = FileReader(order_csv)
            with filereader.open(textmode=True) as f:
                for original, shuffled in [[int(x) for x in row]
                                           for row in csv.reader(f)]:
                    orders[original] = shuffled
        except:
            print('Cannot open', order_csv)
            for i in range(len(rows)):
                orders[i] = i
    else:
        print('Unsupported extension "{}" in "{}".'.format(
            os.path.splitext(args.dataset)[1], args.dataset))

    callback.update_status(('data.max', len(rows)))
    callback.update_status(('data.current', 0))
    callback.update_status('processing', True)

    result_csv_filename = os.path.join(args.outdir, args.outfile)
    with open(result_csv_filename, 'w', encoding='utf-8') as f:
        writer = csv.writer(f, lineterminator='\n')
        with data_iterator() as di:
            index = 0
            while index < di.size:
                data = di.next()
                result, outputs = _forward(args, index, config, data,
                                           di.variables)
                if index == 0:
                    for name, dim in zip(result.names, result.dims):
                        if dim == 1:
                            if e.repeat_evaluation_type == "std":
                                name = "Uncertainty(Std)"
                            row0.append(name)
                        else:
                            for d in range(dim):
                                row0.append(name + '__' + str(d))
                    writer.writerow(row0)
                for i, output in enumerate(outputs):
                    if index + i < len(rows):
                        import copy
                        row = copy.deepcopy(rows[orders[index + i]])
                        row.extend(output)
                        writer.writerow(row)
                index += len(outputs)

                callback.update_status(('data.current', min([index,
                                                             len(rows)])))
                callback.update_forward_time()
                callback.update_status()

                logger.log(
                    99, 'data {} / {}'.format(min([index, len(rows)]),
                                              len(rows)))

    callback.process_evaluation_result(args.outdir, result_csv_filename)

    logger.log(99, 'Forward Completed.')
    progress(None)

    callback.update_status(('output_result.csv_header', ','.join(row0)))
    callback.update_status(('output_result.column_num', len(row0)))
    callback.update_status(('output_result.data_num', len(rows)))
    callback.update_status('finished')

    return True
Beispiel #5
0
def forward_command(args):
    configure_progress(os.path.join(args.outdir, 'progress.txt'))
    files = []
    files.append(args.config)
    if args.param:
        files.append(args.param)
    batch_size = args.batch_size
    if batch_size < 1:
        batch_size = None

    class ForwardConfig:
        pass

    config = ForwardConfig
    info = load.load(files, prepare_data_iterator=False, batch_size=batch_size)
    config.global_config = info.global_config

    config.executors = info.executors.values()

    config.networks = []
    for e in config.executors:
        if e.network.name in info.networks.keys():
            config.networks.append(info.networks[e.network.name])
        else:
            logger.critical('Network {} is not found.'.format(
                config.executor.network.name))
            return False

    normalize = True
    for d in info.datasets.values():
        if d.uri == args.dataset:
            normalize = d.normalize
    for e in config.executors:
        normalize = normalize and not e.no_image_normalization

    data_iterator = (lambda: data_iterator_csv_dataset(uri=args.dataset,
                                                       batch_size=config.
                                                       networks[0].batch_size,
                                                       shuffle=False,
                                                       normalize=normalize,
                                                       with_memory_cache=False,
                                                       with_file_cache=False))

    # load dataset as csv
    filereader = FileReader(args.dataset)
    with filereader.open(textmode=True) as f:
        rows = [row for row in csv.reader(f)]
    row0 = rows.pop(0)
    root_path = os.path.dirname(args.dataset)
    root_path = os.path.abspath(root_path.replace('/|\\', os.path.sep))
    rows = list(
        map(
            lambda row: list(
                map(
                    lambda x: x
                    if is_float(x) else compute_full_path(root_path, x), row)),
            rows))

    with open(os.path.join(args.outdir, 'output_result.csv'), 'w') as f:
        writer = csv.writer(f, lineterminator='\n')
        with data_iterator() as di:
            index = 0
            while index < di.size:
                data = di.next()
                result, outputs = _forward(args, index, config, data,
                                           di.variables)
                if index == 0:
                    for name, dim in zip(result.names, result.dims):
                        if dim == 1:
                            row0.append(name)
                        else:
                            for d in range(dim):
                                row0.append(name + '__' + str(d))
                    writer.writerow(row0)
                for i, output in enumerate(outputs):
                    if index + i < len(rows):
                        import copy
                        row = copy.deepcopy(rows[index + i])
                        row.extend(output)
                        writer.writerow(row)
                index += len(outputs)
                logger.log(
                    99, 'data {} / {}'.format(min([index, len(rows)]),
                                              len(rows)))

    logger.log(99, 'Forward Completed.')
    progress(None)
    return True