def iget_records(custom_headers=None, **keywords): """ Obtain a generator of a list of records from an excel source It is similiar to :meth:`pyexcel.get_records` but it has less memory footprint but requires the headers to be in the first row. And the data matrix should be of equal length. It should consume less memory and should work well with large files. """ sheet_stream = sources.get_sheet_stream(on_demand=True, **keywords) headers = None for row_index, row in enumerate(sheet_stream.payload): if row_index == 0: headers = row else: if custom_headers: # custom order tmp_dict = dict( zip_longest(headers, row, fillvalue=constants.DEFAULT_NA)) ordered_dict = OrderedDict() for name in custom_headers: ordered_dict[name] = tmp_dict[name] yield ordered_dict else: # default order yield OrderedDict( zip_longest(headers, row, fillvalue=constants.DEFAULT_NA))
def yield_dict_to_array(the_dict, with_keys=True): """Convert a dictionary of columns to an array The example dict is:: { "Column 1": [1, 2, 3], "Column 2": [5, 6, 7, 8], "Column 3": [9, 10, 11, 12, 13], } The output will be:: [ ["Column 1", "Column 2", "Column 3"], [1, 5, 9], [2, 6, 10], [3, 7, 11], ['', 8, 12], ['', '', 13] ] :param dict the_dict: the dictionary to be converted. :param bool with_keys: to write the keys as the first row or not """ keys = the_dict.keys() if not PY2: keys = list(keys) if not isinstance(the_dict, OrderedDict): keys = sorted(keys) if with_keys: yield keys sorted_values = (the_dict[key] for key in keys) for row in zip_longest(*sorted_values, fillvalue=''): yield list(row)
def row_iterator(self): keys = self._native_sheet.keys() if not PY2: keys = list(keys) if not isinstance(self._native_sheet, OrderedDict): keys = sorted(keys) if self._keywords.get('with_keys', True): yield keys sorted_values = (self._native_sheet[key] for key in keys) for row in zip_longest(*sorted_values, fillvalue=constants.DEFAULT_NA): yield row
def iget_records(**keywords): """Obtain a generator of a list of records from an excel source It is similiar to :meth:`pyexcel.get_records` but it has less memory footprint but requires the headers to be in the first row. And the data matrix should be of equal length. It should consume less memory and should work well with large files. """ sheet_stream = sources.get_sheet_stream(**keywords) headers = None for row_index, row in enumerate(sheet_stream.payload): if row_index == 0: headers = row else: yield dict(zip_longest(headers, row, fillvalue=constants.DEFAULT_NA))
def iget_records(**keywords): """ Obtain a generator of a list of records from an excel source It is similiar to :meth:`pyexcel.get_records` but it has less memory footprint but requires the headers to be in the first row. And the data matrix should be of equal length. It should consume less memory and should work well with large files. """ sheet_stream = sources.get_sheet_stream(on_demand=True, **keywords) headers = None for row_index, row in enumerate(sheet_stream.payload): if row_index == 0: headers = row else: yield dict(zip_longest(headers, row, fillvalue=constants.DEFAULT_NA))
def row_iterator(self): keys = self._native_sheet.keys() if not PY2: keys = list(keys) if not isinstance(self._native_sheet, OrderedDict): keys = sorted(keys) if self._keywords.get('with_keys', True): yield keys if isinstance(self._native_sheet[keys[0]], list): sorted_values = (self._native_sheet[key] for key in keys) for row in zip_longest( *sorted_values, fillvalue=constants.DEFAULT_NA): yield row else: row = [self._native_sheet[key] for key in keys] yield row