Exemple #1
0
def guess_column_types(path, dialect, sample_size, encoding='utf-8'):
    """
    Guess column types based on a sample of data.
    """
    book = xlrd.open_workbook(path, on_demand=True)
    sheet = book.sheet_by_index(0)

    column_types = []

    for i in range(sheet.ncols):
        values = sheet.col_values(i)[1:sample_size + 1]
        types = sheet.col_types(i)[1:sample_size + 1]
        nominal_type = determine_column_type(types)

        if nominal_type == xlrd.biffh.XL_CELL_EMPTY:
            column_types.append(None)
        elif nominal_type == xlrd.biffh.XL_CELL_TEXT:
            column_types.append(unicode)
        elif nominal_type == xlrd.biffh.XL_CELL_NUMBER:
            column_types.append(determine_number_type(values))
        elif nominal_type == xlrd.biffh.XL_CELL_DATE:
            column_types.append(determine_date_type(values, datemode=book.datemode))
        elif nominal_type == xlrd.biffh.XL_CELL_BOOLEAN:
            column_types.append(bool)
        elif nominal_type == xlrd.biffh.XL_CELL_ERROR:
            column_types.append(unicode)
        else:
            raise TypeInferenceError(_('Unknown column type found in xls file: %s') % nominal_type) 

    return [t.__name__ if t else None for t in column_types]
Exemple #2
0
def guess_column_types(path, dialect, sample_size, encoding='utf-8'):
    """
    Guess column types based on a sample of data.
    """
    book = xlrd.open_workbook(path, on_demand=True)
    sheet = book.sheet_by_index(0)

    column_types = []

    for i in range(sheet.ncols):
        values = sheet.col_values(i)[1:sample_size + 1]
        types = sheet.col_types(i)[1:sample_size + 1]
        nominal_type = determine_column_type(types)

        if nominal_type == xlrd.biffh.XL_CELL_EMPTY:
            column_types.append(None)
        elif nominal_type == xlrd.biffh.XL_CELL_TEXT:
            column_types.append(unicode)
        elif nominal_type == xlrd.biffh.XL_CELL_NUMBER:
            column_types.append(determine_number_type(values))
        elif nominal_type == xlrd.biffh.XL_CELL_DATE:
            column_types.append(
                determine_date_type(values, datemode=book.datemode))
        elif nominal_type == xlrd.biffh.XL_CELL_BOOLEAN:
            column_types.append(bool)
        elif nominal_type == xlrd.biffh.XL_CELL_ERROR:
            column_types.append(unicode)
        else:
            raise TypeInferenceError(
                'Unknown column type found in xls file: %s' % nominal_type)

    return [t.__name__ if t else None for t in column_types]
Exemple #3
0
 def test_determine_column_type_empty(self):
     column_type = xls.determine_column_type([xlrd.biffh.XL_CELL_EMPTY, xlrd.biffh.XL_CELL_EMPTY, xlrd.biffh.XL_CELL_EMPTY])
     self.assertEquals(column_type, xlrd.biffh.XL_CELL_EMPTY) 
Exemple #4
0
 def test_determine_column_type_multiple(self):
     column_type = xls.determine_column_type([xlrd.biffh.XL_CELL_NUMBER, xlrd.biffh.XL_CELL_TEXT, xlrd.biffh.XL_CELL_EMPTY])
     self.assertEquals(column_type, xlrd.biffh.XL_CELL_TEXT) 
Exemple #5
0
 def test_determine_column_type_single(self):
     column_type = xls.determine_column_type([
         xlrd.biffh.XL_CELL_NUMBER, xlrd.biffh.XL_CELL_NUMBER,
         xlrd.biffh.XL_CELL_EMPTY
     ])
     self.assertEquals(column_type, xlrd.biffh.XL_CELL_NUMBER)
Exemple #6
0
 def test_determine_column_type_empty(self):
     column_type = xls.determine_column_type([
         xlrd.biffh.XL_CELL_EMPTY, xlrd.biffh.XL_CELL_EMPTY,
         xlrd.biffh.XL_CELL_EMPTY
     ])
     self.assertEquals(column_type, xlrd.biffh.XL_CELL_EMPTY)