Beispiel #1
0
    def initialize(self):
        """Initialize XLS source stream:
        """

        self.file, self.close_file = open_resource(self.resource)

        self.workbook = xlrd.open_workbook(file_contents=self.file.read(),
                                           encoding_override=self.encoding)

        if not self.sheet_reference:
            self.sheet_reference = 0

        if type(self.sheet_reference) == int:
            self.sheet = self.workbook.sheet_by_index(self.sheet_reference)
        else:
            self.sheet = self.workbook.sheet_by_name(self.sheet_reference)

        self.row_count = self.sheet.nrows

        self.read_fields()
Beispiel #2
0
    def initialize(self):
        """Initialize CSV source stream:
        
        #. perform autodetection if required:
            #. detect encoding from a sample data (if requested)
            #. detect whether CSV has headers from a sample data (if
            requested)
        #.  create CSV reader object
        #.  read CSV headers if requested and initialize stream fields
        
        If fields are explicitly set prior to initialization, and header
        reading is requested, then the header row is just skipped and fields
        that were set before are used. Do not set fields if you want to read
        the header.

        All fields are set to `storage_type` = ``string`` and
        `analytical_type` = ``unknown``.
        """

        self.file, self.close_file = open_resource(self.resource)

        handle = None
        
        if self.detect_header:
            
            sample = self.file.read(self.sample_size)

            # Encoding test
            sample = sample.encode('utf-8')
            sniffer = csv.Sniffer()
            self.read_header = sniffer.has_header(sample)

            self.file.seek(0)
            
        if self.dialect:
            if type(self.dialect) == str:
                dialect = csv.get_dialect(self.dialect)
            else:
                dialect = self.dialect
                
            self.reader_args["dialect"] = dialect

        # self.reader = csv.reader(handle, **self.reader_args)
        self.reader = UnicodeReader(self.file, encoding=self.encoding,
                                    empty_as_null=self.empty_as_null,
                                    delim=self.delim)

        if self.skip_rows:
            for i in range(0, self.skip_rows):
                self.reader.next()
                
        # Initialize field list
        if self.read_header:
            field_names = self.reader.next()
            
            # Fields set explicitly take priority over what is read from the
            # header. (Issue #17 might be somehow related)
            if not self.fields:
                fields = [ (name, "string", "default") for name in field_names]
                self.fields = FieldList(fields)

        if not self.fields:
            raise RuntimeError("Fields are not initialized. "
                               "Either read fields from CSV header or "
                               "set them manually")

        else:
            self.fields = FieldList(self.fields)

        self.reader.set_fields(self.fields)