def __init__(self, data, checklines=10, transform=None, force_dialect_check=False, dialect=None): self.data = data self.checklines = checklines self.current_item = None self.current_item_number = None self.dialect = None self._observed_dialects = [] self.directives = [] self.transform = transform self.warnings = [] if force_dialect_check and dialect is not None: raise ValueError("force_dialect_check is True, but a dialect " "is provided") if force_dialect_check: # In this case, self.dialect remains None. When # parser._split_keyvals gets None as a dialect, it tries to infer # a dialect. self._iter = self._custom_iter() elif dialect is not None: self._observed_dialects = [dialect] self.dialect = helpers._choose_dialect(self._observed_dialects) self._iter = self._custom_iter() else: # Otherwise, check some lines to determine what the dialect should # be self.peek, self._iter = peek(self._custom_iter(), checklines) self._observed_dialects = [i.dialect for i in self.peek] self.dialect = helpers._choose_dialect(self._observed_dialects)
def __init__(self, data, checklines=10, transform=None, force_dialect_check=False, dialect=None): """ Base class for iterating over features. In general, you should use DataIterator -- so see the docstring of class for argument descriptions. All subclasses -- _FileIterator, _URLIterator, _FeatureIterator, _StringIterator -- gain the following behavior: - self.current_item and self.current_item_number are set on every iteration. This is very useful for debugging, or reporting to the user exactly what item or line number caused the issue. - transform a Feature before it gets yielded, filter out a Feature - auto-detect dialect by peeking `checklines` items into the iterator, and then re-reading those, applying the detected dialect. If multiple dialects are found, use helpers._choose_dialect to figure out the best one. - keep track of directives """ self.data = data self.checklines = checklines self.current_item = None self.current_item_number = None self.dialect = None self._observed_dialects = [] self.directives = [] self.transform = transform self.warnings = [] if force_dialect_check and dialect is not None: raise ValueError("force_dialect_check is True, but a dialect " "is provided") if force_dialect_check: # In this case, self.dialect remains None. When # parser._split_keyvals gets None as a dialect, it tries to infer # a dialect. self._iter = self._custom_iter() elif dialect is not None: self._observed_dialects = [dialect] self.dialect = helpers._choose_dialect(self._observed_dialects) self._iter = self._custom_iter() else: # Otherwise, check some lines to determine what the dialect should # be self.peek, self._iter = peek(self._custom_iter(), checklines) self._observed_dialects = [i.dialect for i in self.peek] self.dialect = helpers._choose_dialect(self._observed_dialects)