def main(self): rows = CSVToolsReader(self.input_file, **self.reader_kwargs) # Make a default header row if none exists print(self.args) if self.args.no_header_row: row = next(rows) column_names = make_default_headers(len(row)) # Put the row back on top rows = itertools.chain([row], rows) else: column_names = next(rows) column_names = list(column_names) # prepend 'line_number' column with line numbers if --linenumbers option if self.args.line_numbers: column_names.insert(0, 'line_number') rows = [ list(itertools.chain([str(i + 1)], row)) for i, row in enumerate(rows) ] # Convert to normal list of rows rows = list(rows) # Insert the column names at the top rows.insert(0, column_names) widths = [] for row in rows: for i, v in enumerate(row): try: if len(v) > widths[i]: widths[i] = len(v) except IndexError: widths.append(len(v)) # Dashes span each width with '+' character at intersection of # horizontal and vertical dividers. divider = '|--' + '-+-'.join('-' * w for w in widths) + '--|' self.output_file.write('%s\n' % divider) for i, row in enumerate(rows): output = [] for j, d in enumerate(row): if d is None: d = '' output.append(' %s ' % six.text_type(d).ljust(widths[j])) self.output_file.write('| %s |\n' % ('|'.join(output))) if (i == 0 or i == len(rows) - 1): self.output_file.write('%s\n' % divider)
def main(self): rows = CSVToolsReader(self.input_file, **self.reader_kwargs) # Make a default header row if none exists print(self.args) if self.args.no_header_row: row = next(rows) column_names = make_default_headers(len(row)) # Put the row back on top rows = itertools.chain([row], rows) else: column_names = next(rows) column_names = list(column_names) # prepend 'line_number' column with line numbers if --linenumbers option if self.args.line_numbers: column_names.insert(0, 'line_number') rows = [list(itertools.chain([str(i + 1)], row)) for i, row in enumerate(rows)] # Convert to normal list of rows rows = list(rows) # Insert the column names at the top rows.insert(0, column_names) widths = [] for row in rows: for i, v in enumerate(row): try: if len(v) > widths[i]: widths[i] = len(v) except IndexError: widths.append(len(v)) # Dashes span each width with '+' character at intersection of # horizontal and vertical dividers. divider = '|--' + '-+-'.join('-'* w for w in widths) + '--|' self.output_file.write('%s\n' % divider) for i, row in enumerate(rows): output = [] for j, d in enumerate(row): if d is None: d = '' output.append(' %s ' % six.text_type(d).ljust(widths[j])) self.output_file.write('| %s |\n' % ('|'.join(output))) if (i == 0 or i == len(rows) - 1): self.output_file.write('%s\n' % divider)
def from_csv(cls, f, name='from_csv_table', snifflimit=None, column_ids=None, blanks_as_nulls=True, zero_based=False, infer_types=True, no_header_row=False, **kwargs): """ Creates a new Table from a file-like object containing CSV data. Note: the column_ids argument will cause only those columns with a matching identifier to be parsed, type inferred, etc. However, their order/index property will reflect the original data (e.g. column 8 will still be "order" 7, even if it's the third column in the resulting Table. """ # This bit of nonsense is to deal with "files" from stdin, # which are not seekable and thus must be buffered contents = f.read() print('hello') # snifflimit == 0 means do not sniff if snifflimit is None: kwargs['dialect'] = sniffer.sniff_dialect(contents) elif snifflimit > 0: kwargs['dialect'] = sniffer.sniff_dialect(contents[:snifflimit]) f = six.StringIO(contents) rows = CSVToolsReader(f, **kwargs) if no_header_row: # Peek at a row to infer column names from row = next(rows) print("if") print(row) headers = make_default_headers(len(row)) column_ids = parse_column_identifiers(column_ids, headers, zero_based) headers = [headers[c] for c in column_ids] data_columns = [[] for c in headers] # Put row back on top rows = itertools.chain([row], rows) else: headers = next(rows) print("else") print(headers) if column_ids: column_ids = parse_column_identifiers(column_ids, headers, zero_based) headers = [headers[c] for c in column_ids] else: column_ids = range(len(headers)) data_columns = [[] for c in headers] width = len(data_columns) for i, row in enumerate(rows): for j, d in enumerate(row): try: data_columns[j].append(row[column_ids[j]].strip()) except IndexError: # Non-rectangular data is truncated break j += 1 # Populate remaining columns with None while j < width: data_columns[j].append(None) j += 1 columns = [] for i, c in enumerate(data_columns): columns.append(Column(column_ids[i], headers[i], c, blanks_as_nulls=blanks_as_nulls, infer_types=infer_types)) return Table(columns, name=name)