def main(self): self.input_files = [] for path in self.args.input_paths: self.input_files.append(self._open_input_file(path)) if not self.input_files: self.argparser.error('You must specify at least one file to stack.') if self.args.group_by_filenames: groups = [os.path.split(f.name)[1] for f in self.input_files] elif self.args.groups: groups = self.args.groups.split(',') if len(groups) != len(self.input_files): self.argparser.error('The number of grouping values must be equal to the number of CSV files being stacked.') else: groups = None group_name = self.args.group_name if self.args.group_name else 'group' output = agate.csv.writer(self.output_file, **self.writer_kwargs) for i, f in enumerate(self.input_files): rows = agate.csv.reader(f, **self.reader_kwargs) # If we have header rows, use them if not self.args.no_header_row: headers = next(rows, []) if i == 0: if groups: headers.insert(0, group_name) output.writerow(headers) # If we don't generate simple column names based on first row else: row = next(rows, []) headers = make_default_headers(len(row)) if i == 0: if groups: headers.insert(0, group_name) output.writerow(headers) if groups: row.insert(0, groups[i]) output.writerow(row) for row in rows: if groups: row.insert(0, groups[i]) output.writerow(row) f.close()
def main(self): if self.additional_input_expected(): sys.stderr.write( 'No input file or piped data provided. Waiting for standard input:\n' ) reader = agate.csv.reader(self.skip_lines(), **self.reader_kwargs) writer = agate.csv.writer(self.output_file, **self.writer_kwargs) if self.args.no_header_row: # Peek at a row to get the number of columns. _row = next(reader) reader = itertools.chain([_row], reader) headers = make_default_headers(len(_row)) writer.writerow(headers) writer.writerows(reader)
def main(self): if sys.stdin.isatty() and not self.args.input_paths: sys.stderr.write( 'No input file or piped data provided. Waiting for standard input:\n' ) has_groups = self.args.group_by_filenames or self.args.groups if self.args.groups and not self.args.group_by_filenames: groups = self.args.groups.split(',') if len(groups) != len(self.args.input_paths): self.argparser.error( 'The number of grouping values must be equal to the number of CSV files being stacked.' ) else: groups = None group_name = self.args.group_name if self.args.group_name else 'group' output = agate.csv.writer(self.output_file, **self.writer_kwargs) for i, path in enumerate(self.args.input_paths): f = self._open_input_file(path) if isinstance(self.args.skip_lines, int): skip_lines = self.args.skip_lines while skip_lines > 0: f.readline() skip_lines -= 1 else: raise ValueError('skip_lines argument must be an int') rows = agate.csv.reader(f, **self.reader_kwargs) if has_groups: if groups: group = groups[i] else: group = os.path.basename(f.name) # If we have header rows, use them if not self.args.no_header_row: headers = next(rows, []) if i == 0: if has_groups: headers.insert(0, group_name) output.writerow(headers) # If we don't generate simple column names based on first row else: row = next(rows, []) headers = make_default_headers(len(row)) if i == 0: if has_groups: headers.insert(0, group_name) output.writerow(headers) if has_groups: row.insert(0, group) output.writerow(row) for row in rows: if has_groups: row.insert(0, group) output.writerow(row) f.close()
def from_csv(cls, f, name='from_csv_table', sniff_limit=None, column_ids=None, blanks_as_nulls=True, column_offset=1, infer_types=True, no_header_row=False, **kwargs): """ Creates a new Table from a file-like object containing CSV data. Note: the column_ids argument will cause only those columns with a matching identifier to be parsed, type inferred, etc. However, their order/index property will reflect the original data (e.g. column 8 will still be "order" 7, even if it's the third column in the resulting Table. """ # This bit of nonsense is to deal with "files" from stdin, # which are not seekable and thus must be buffered contents = f.read() # sniff_limit == 0 means do not sniff if sniff_limit is None: kwargs['dialect'] = sniff_dialect(contents) elif sniff_limit > 0: kwargs['dialect'] = sniff_dialect(contents[:sniff_limit]) f = six.StringIO(contents) rows = agate.csv.reader(f, **kwargs) try: if no_header_row: # Peek at a row to infer column names from, and put it back on top row = next(rows) rows = itertools.chain([row], rows) headers = make_default_headers(len(row)) else: headers = next(rows) except StopIteration: # The file is `/dev/null`. headers = [] pass if no_header_row or column_ids: column_ids = parse_column_identifiers(column_ids, headers, column_offset) headers = [headers[c] for c in column_ids] else: column_ids = range(len(headers)) data_columns = [[] for c in headers] width = len(data_columns) for i, row in enumerate(rows): j = 0 for j, d in enumerate(row): try: data_columns[j].append(row[column_ids[j]].strip()) except IndexError: # Non-rectangular data is truncated break j += 1 # Populate remaining columns with None while j < width: data_columns[j].append(None) j += 1 columns = [] for i, c in enumerate(data_columns): columns.append(Column(column_ids[i], headers[i], c, blanks_as_nulls=blanks_as_nulls, infer_types=infer_types)) return Table(columns, name=name)
def main(self): if sys.stdin.isatty() and not self.args.input_paths: sys.stderr.write('No input file or piped data provided. Waiting for standard input:\n') has_groups = self.args.group_by_filenames or self.args.groups if self.args.groups and not self.args.group_by_filenames: groups = self.args.groups.split(',') if len(groups) != len(self.args.input_paths): self.argparser.error('The number of grouping values must be equal to the number of CSV files being stacked.') else: groups = None group_name = self.args.group_name if self.args.group_name else 'group' output = agate.csv.writer(self.output_file, **self.writer_kwargs) for i, path in enumerate(self.args.input_paths): f = self._open_input_file(path) if isinstance(self.args.skip_lines, int): skip_lines = self.args.skip_lines while skip_lines > 0: f.readline() skip_lines -= 1 else: raise ValueError('skip_lines argument must be an int') rows = agate.csv.reader(f, **self.reader_kwargs) if has_groups: if groups: group = groups[i] else: group = os.path.basename(f.name) # If we have header rows, use them if not self.args.no_header_row: headers = next(rows, []) if i == 0: if has_groups: headers.insert(0, group_name) output.writerow(headers) # If we don't generate simple column names based on first row else: row = next(rows, []) headers = make_default_headers(len(row)) if i == 0: if has_groups: headers.insert(0, group_name) output.writerow(headers) if has_groups: row.insert(0, group) output.writerow(row) for row in rows: if has_groups: row.insert(0, group) output.writerow(row) f.close()
def from_csv(cls, f, name='from_csv_table', sniff_limit=None, column_ids=None, blanks_as_nulls=True, column_offset=1, infer_types=True, no_header_row=False, **kwargs): """ Creates a new Table from a file-like object containing CSV data. Note: the column_ids argument will cause only those columns with a matching identifier to be parsed, type inferred, etc. However, their order/index property will reflect the original data (e.g. column 8 will still be "order" 7, even if it's the third column in the resulting Table. """ # This bit of nonsense is to deal with "files" from stdin, # which are not seekable and thus must be buffered contents = f.read() # sniff_limit == 0 means do not sniff if sniff_limit is None: kwargs['dialect'] = sniff_dialect(contents) elif sniff_limit > 0: kwargs['dialect'] = sniff_dialect(contents[:sniff_limit]) f = six.StringIO(contents) rows = agate.csv.reader(f, **kwargs) try: if no_header_row: # Peek at a row to infer column names from, and put it back on top row = next(rows) rows = itertools.chain([row], rows) headers = make_default_headers(len(row)) else: headers = next(rows) except StopIteration: # The file is `/dev/null`. headers = [] pass if no_header_row or column_ids: column_ids = parse_column_identifiers(column_ids, headers, column_offset) headers = [headers[c] for c in column_ids] else: column_ids = range(len(headers)) data_columns = [[] for c in headers] width = len(data_columns) for i, row in enumerate(rows): j = 0 for j, d in enumerate(row): try: data_columns[j].append(row[column_ids[j]].strip()) except IndexError: # Non-rectangular data is truncated break j += 1 # Populate remaining columns with None while j < width: data_columns[j].append(None) j += 1 columns = [] for i, c in enumerate(data_columns): columns.append( Column(column_ids[i], headers[i], c, blanks_as_nulls=blanks_as_nulls, infer_types=infer_types)) return Table(columns, name=name)