Beispiel #1
0
    def test_skip(self):
        obj = XLSXObject(data_path("data.xlsx"), FieldList("number", "name"),
                        skip_rows=2)
        self.assertEqual(["number", "name"], obj.fields.names())

        rows = list(obj.rows())
        self.assertEqual(2, len(obj))
        self.assertEqual(2, len(rows))

        self.assertSequenceEqual([3.0, "Cecil"], rows[0])
Beispiel #2
0
    def __init__(self,
                 resource=None,
                 fields=None,
                 sheet=0,
                 encoding=None,
                 skip_rows=0,
                 has_header=True):
        """Creates a XLSX spreadsheet data source stream.

        Attributes:

        * resource: file name, URL or file-like object
        * sheet: sheet index number (as int) or sheet name
        * has_header: flag determining whether first line contains header or
          not. ``True`` by default.
        """
        if isinstance(resource, openpyxl.Workbook):
            self.workbook = resource
        else:
            self.workbook = _load_workbook(resource)

        if isinstance(sheet, int):
            self.sheet = self.workbook.worksheets[sheet]
        elif isinstance(sheet, str):
            self.sheet = self.workbook[sheet]
        else:
            raise ArgumentError('sheet has to be a string or an integer')

        if has_header:
            self.first_row = skip_rows + 1
        else:
            self.first_row = skip_rows

        if fields:
            self.fields = fields
        else:
            rows = enumerate(self.sheet.rows)
            first_row = next(dropwhile(lambda x: x[0] < self.first_row,
                                       rows))[1]
            if has_header:
                header_row = next(self.sheet.rows)
                names = [str(c.value) for c in header_row]
            else:
                names = ['col%d' % i for i in range(len(first_row))]

            self.fields = FieldList()
            for name, cell in zip(names, first_row):
                if cell.is_date:
                    storage_type = 'date'
                else:
                    storage_type = CELL_TYPES.get(cell.data_type, 'unknown')
                field = Field(name, storage_type=storage_type)
                self.fields.append(field)
Beispiel #3
0
    def test_no_header(self):
        with self.assertRaises(ArgumentError):
            obj = CSVSource(data_path("fruits-sk.csv"), read_header=False)

        fields = FieldList("id", "fruit", "type")
        obj = CSVSource(data_path("fruits-sk.csv"),
                        read_header=False,
                        fields=fields)
        self.assertEqual(["id", "fruit", "type"], obj.fields.names())

        rows = list(obj.rows())
        self.assertEqual(17, len(rows))

        self.assertEqual(["id", "fruit", "type"], rows[0])
        obj.release()