def __init__(self, data_dir, split, headers=True, columns=None, **kwargs): """Initializes the reader, allowing to override internal settings. Arguments: data_dir: Path to base directory where all the files are located. See class docstring for a description on the expected structure. split: Split to read. Possible values depend on the dataset itself. headers (boolean): Whether the CSV file has headers indicating field names, in which case those will be considered. columns (list or str): Column names for when `headers` is `False` (i.e. the CSV file has no headers). Will be ignored if `headers` is `True`. """ super(CSVReader, self).__init__(**kwargs) self._data_dir = data_dir self._split = split self._annotations_path = os.path.join( self._data_dir, '{}.csv'.format(self._split) ) if not tf.gfile.Exists(self._annotations_path): raise InvalidDataDirectory( 'CSV annotation file not found. Should be located at ' '`{}`'.format(self._annotations_path) ) self._images_dir = os.path.join(self._data_dir, self._split) if not tf.gfile.Exists(self._images_dir): raise InvalidDataDirectory( 'Image directory not found. Should be located at ' '`{}`'.format(self._images_dir) ) if columns is not None: if is_basestring(columns): columns = columns.split(',') else: columns = self.DEFAULT_COLUMNS self._columns = columns self._column_names = set(self._columns) self._has_headers = headers # Cache for the records. # TODO: Don't read it all upfront. self._records = None # Whether the structure of the CSV file has been checked already. self._csv_checked = False self.errors = 0 self.yielded_records = 0
def __init__(self, data_dir, split, columns=DEFAULT_COLUMNS, field_mapper=FIELD_MAPPER, with_header=False, **kwargs): super(CSVReader, self).__init__(**kwargs) self._data_dir = data_dir self._split = split self._labels_filename = self._get_labels_filename() if is_basestring(columns): columns = columns.split(',') self._columns = columns self._field_mapper = field_mapper self._with_header = with_header self._files = None self.errors = 0 self.yielded_records = 0