def guess_column_types(path, dialect, sample_size, encoding='utf-8'): """ Guess column types based on a sample of data. """ book = xlrd.open_workbook(path, on_demand=True) sheet = book.sheet_by_index(0) column_types = [] for i in range(sheet.ncols): values = sheet.col_values(i)[1:sample_size + 1] types = sheet.col_types(i)[1:sample_size + 1] nominal_type = determine_column_type(types) if nominal_type == xlrd.biffh.XL_CELL_EMPTY: column_types.append(None) elif nominal_type == xlrd.biffh.XL_CELL_TEXT: column_types.append(unicode) elif nominal_type == xlrd.biffh.XL_CELL_NUMBER: column_types.append(determine_number_type(values)) elif nominal_type == xlrd.biffh.XL_CELL_DATE: column_types.append(determine_date_type(values, datemode=book.datemode)) elif nominal_type == xlrd.biffh.XL_CELL_BOOLEAN: column_types.append(bool) elif nominal_type == xlrd.biffh.XL_CELL_ERROR: column_types.append(unicode) else: raise TypeInferenceError(_('Unknown column type found in xls file: %s') % nominal_type) return [t.__name__ if t else None for t in column_types]
def guess_column_types(path, dialect, sample_size, encoding='utf-8'): """ Guess column types based on a sample of data. """ book = xlrd.open_workbook(path, on_demand=True) sheet = book.sheet_by_index(0) column_types = [] for i in range(sheet.ncols): values = sheet.col_values(i)[1:sample_size + 1] types = sheet.col_types(i)[1:sample_size + 1] nominal_type = determine_column_type(types) if nominal_type == xlrd.biffh.XL_CELL_EMPTY: column_types.append(None) elif nominal_type == xlrd.biffh.XL_CELL_TEXT: column_types.append(unicode) elif nominal_type == xlrd.biffh.XL_CELL_NUMBER: column_types.append(determine_number_type(values)) elif nominal_type == xlrd.biffh.XL_CELL_DATE: column_types.append( determine_date_type(values, datemode=book.datemode)) elif nominal_type == xlrd.biffh.XL_CELL_BOOLEAN: column_types.append(bool) elif nominal_type == xlrd.biffh.XL_CELL_ERROR: column_types.append(unicode) else: raise TypeInferenceError( 'Unknown column type found in xls file: %s' % nominal_type) return [t.__name__ if t else None for t in column_types]
def test_determine_column_type_empty(self): column_type = xls.determine_column_type([xlrd.biffh.XL_CELL_EMPTY, xlrd.biffh.XL_CELL_EMPTY, xlrd.biffh.XL_CELL_EMPTY]) self.assertEquals(column_type, xlrd.biffh.XL_CELL_EMPTY)
def test_determine_column_type_multiple(self): column_type = xls.determine_column_type([xlrd.biffh.XL_CELL_NUMBER, xlrd.biffh.XL_CELL_TEXT, xlrd.biffh.XL_CELL_EMPTY]) self.assertEquals(column_type, xlrd.biffh.XL_CELL_TEXT)
def test_determine_column_type_single(self): column_type = xls.determine_column_type([ xlrd.biffh.XL_CELL_NUMBER, xlrd.biffh.XL_CELL_NUMBER, xlrd.biffh.XL_CELL_EMPTY ]) self.assertEquals(column_type, xlrd.biffh.XL_CELL_NUMBER)
def test_determine_column_type_empty(self): column_type = xls.determine_column_type([ xlrd.biffh.XL_CELL_EMPTY, xlrd.biffh.XL_CELL_EMPTY, xlrd.biffh.XL_CELL_EMPTY ]) self.assertEquals(column_type, xlrd.biffh.XL_CELL_EMPTY)