Python CSVTableSet Examples

Programming Language: Python

Namespace/Package Name: messytables

Method/Function: CSVTableSet

Examples at hotexamples.com: 6

Python CSVTableSet - 6 examples found. These are the top rated real world Python examples of messytables.CSVTableSet extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: csv_helpers.py Project: google/report2bq

def get_column_types(data: io.BytesIO) \
        -> Tuple[List[str], List[types.CellType]]:
    """derive the column types

  Using messytables' CSV API, attempt to derive the column types based on a
  best-guess of a sample of the rows.

  This is still a WIP due to the parlous state of the DV360/CM CSV data formats
  in general

  Arguments:
      data (io.BytesIO):  sample of the CSV file

  Returns:
      (List[str], List[str]): tuple of list of header names and list of
                                column types
  """
    table_set = messytables.CSVTableSet(data)
    row_set = table_set.tables[0]
    offset, csv_headers = messytables.headers_guess(row_set.sample)
    row_set.register_processor(messytables.headers_processor(csv_headers))
    row_set.register_processor(messytables.offset_processor(offset + 1))
    csv_types = messytables.type_guess(row_set.sample, strict=True)

    return (csv_headers, csv_types)

Example #2

Show file

File: csv_to_h5vaex.py Project: cycle13/h5toGrid

def determine_messytables_types(file_handle, types=messytables.types.TYPES):
    """

    :param file_handle: file handle opened in binary mode
    :return: (headers, types, row_set)
    """

    # Load a file object:
    table_set = messytables.CSVTableSet(file_handle)

    # If you aren't sure what kind of file it is
    # table_set = messytables.any_tableset(file_handle)

    # A table set is a collection of tables:
    row_set = table_set.tables[0]

    # A row set is an iterator over the table, but it can only
    # be run once. To peek, a sample is provided:
    print(next(row_set.sample))

    # guess header names and the offset of the header:
    offset, headers = messytables.headers_guess(row_set.sample)
    row_set.register_processor(messytables.headers_processor(headers))

    # add one to begin with content, not the header:
    row_set.register_processor(messytables.offset_processor(offset + 1))

    # guess column types:
    types = messytables.type_guess(row_set.sample, types, strict=True)

    # and tell the row set to apply these types to
    # each row when traversing the iterator:
    row_set.register_processor(messytables.types_processor(types))

    # now run some operation on the data:
    return headers, types, row_set

Example #3

Show file

File: CSVtoSQLite_ManyColumns.py Project: timpjohns/python-pandas

def headersDataTypes(CSV):  
    '''Get column headers and data types using messytables'''  
    table = open(path[0]+CSV, 'rb')
    # Creates a set of tables as file object, although it'll just be one
    tableset = messytables.CSVTableSet(table) 
    rowset = tableset.tables[0] # get first and only table as iterator
    # guesses header names and offset of header, returns headers as list
    offset, headers = messytables.headers_guess(rowset.sample) 
    print "Here is the offset", str(offset), "\nHere are the headers:\n"\
    , str(headers) # test 
    # establish headers in table
    rowset.register_processor(messytables.headers_processor(headers))
    # begin iterator at content, rather than header
    rowset.register_processor(messytables.offset_processor(offset + 1))
    # guess column types, return as list
    types = messytables.type_guess(rowset.sample, strict=True)
    print "Here are the data types", str(types)  
    dtypedict = {} # empty dictionary to append columns and datatype needed
    # for pandas csv to dataframe conversion
    colcount = 0  # location to append datatypes to match columns in dict
    for column in types:
        dtypedict[headers[colcount]]=column
        colcount+=1
    return headers, dtypedict

Example #4

Show file

File: sniff_format.py Project: GeoinformationSystems/ckanext-qa

def is_psv(buf, log):
    '''If the buffer is a PSV file then return True.'''
    buf_rows = StringIO.StringIO(buf)
    table_set = messytables.CSVTableSet(buf_rows, delimiter='|')
    return _is_spreadsheet(table_set, 'PSV', log)

Example #5

Show file

File: sniff_format.py Project: GeoinformationSystems/ckanext-qa

def is_csv(buf, log):
    '''If the buffer is a CSV file then return True.'''
    buf_rows = StringIO.StringIO(buf)
    table_set = messytables.CSVTableSet(buf_rows)
    return _is_spreadsheet(table_set, 'CSV', log)

Example #6

Show file

File: sniff_format.py Project: qld-gov-au/ckanext-qa

def is_psv(buf):
    '''If the buffer is a PSV file then return True.'''
    buf_rows = six.BytesIO(six.ensure_binary(buf))
    table_set = messytables.CSVTableSet(buf_rows, delimiter='|')
    return _is_spreadsheet(table_set, 'PSV')