def main(self): if self.args.names_only: print_column_names(self.args.file, self.output_file, **self.reader_kwargs) return if self.args.file.name != '<stdin>': # Use filename as table name table_name = os.path.splitext( os.path.split(self.args.file.name)[1])[0] else: table_name = 'csvsql_table' tab = table.Table.from_csv(self.args.file, name=table_name, snifflimit=self.args.snifflimit, **self.reader_kwargs) column_ids = parse_column_identifiers(self.args.columns, tab.headers()) rows = tab.to_rows(serialize_dates=True) rows.sort(key=lambda r: [r[c] for c in column_ids], reverse=self.args.reverse) rows.insert(0, tab.headers()) output = CSVKitWriter(self.output_file, **self.writer_kwargs) for row in rows: output.writerow(row)
def main(self): if self.args.names_only: print_column_names(self.args.file, self.output_file, **self.reader_kwargs) return if not self.args.regex and not self.args.pattern and not self.args.matchfile: self.argparser.error("One of -r, -m or -f must be specified, unless using the -n option.") rows = CSVKitReader(self.args.file, **self.reader_kwargs) column_names = rows.next() column_ids = parse_column_identifiers(self.args.columns, column_names) if self.args.regex: pattern = re.compile(self.args.regex) elif self.args.matchfile: lines = [line.rstrip() for line in self.args.matchfile] pattern = lambda x: x in lines else: pattern = self.args.pattern patterns = dict((c, pattern) for c in column_ids) output = CSVKitWriter(self.output_file, **self.writer_kwargs) output.writerow(column_names) filter_reader = FilteringCSVReader(rows, header=False, patterns=patterns, inverse=self.args.inverse) for i, row in enumerate(filter_reader): self.input_line_number = i + 1 output.writerow(row)
def main(self): if self.args.names_only: print_column_names(self.args.file, self.output_file, **self.reader_kwargs) return rows = CSVKitReader(self.args.file, **self.reader_kwargs) column_names = rows.next() column_ids = parse_column_identifiers(self.args.columns, column_names) output = CSVKitWriter(self.output_file, **self.writer_kwargs) output.writerow([column_names[c] for c in column_ids]) for i, row in enumerate(rows): self.input_line_number = i + 1 out_row = [row[c] if c < len(row) else None for c in column_ids] if self.args.delete_empty: if ''.join(out_row) == '': continue output.writerow(out_row)
def main(self): if self.args.names_only: print_column_names(self.args.file, self.output_file, **self.reader_kwargs) return if self.args.file.name != '<stdin>': # Use filename as table name table_name = os.path.splitext(os.path.split(self.args.file.name)[1])[0] else: table_name = 'csvsql_table' tab = table.Table.from_csv(self.args.file, name=table_name, snifflimit=self.args.snifflimit, **self.reader_kwargs) column_ids = parse_column_identifiers(self.args.columns, tab.headers()) rows = tab.to_rows(serialize_dates=True) rows.sort(key=lambda r: [r[c] for c in column_ids], reverse=self.args.reverse) rows.insert(0, tab.headers()) output = CSVKitWriter(self.output_file, **self.writer_kwargs) for row in rows: output.writerow(row)