def main(self): reader = CSVKitReader(self.args.file, **self.reader_kwargs) if self.args.dryrun: checker = RowChecker(reader) for row in checker.checked_rows(): pass if checker.errors: for e in checker.errors: self.output_file.write("Line %i: %s\n" % (e.line_number, e.msg)) else: self.output_file.write("No errors.\n") if checker.joins: self.output_file.write( "%i rows would have been joined/reduced to %i rows after eliminating expected internal line breaks.\n" % (checker.rows_joined, checker.joins) ) else: base, ext = splitext(self.args.file.name) with open("%s_out.csv" % base, "w") as f: clean_writer = CSVKitWriter(f, **self.writer_kwargs) checker = RowChecker(reader) clean_writer.writerow(checker.column_names) for row in checker.checked_rows(): clean_writer.writerow(row) if checker.errors: error_filename = "%s_err.csv" % base with open(error_filename, "w") as f: error_writer = CSVKitWriter(f, **self.writer_kwargs) error_header = ["line_number", "msg"] error_header.extend(checker.column_names) error_writer.writerow(error_header) error_count = len(checker.errors) for e in checker.errors: error_writer.writerow(self._format_error_row(e)) self.output_file.write( "%i error%s logged to %s\n" % (error_count, "" if error_count == 1 else "s", error_filename) ) else: self.output_file.write("No errors.\n") if checker.joins: self.output_file.write( "%i rows were joined/reduced to %i rows after eliminating expected internal line breaks.\n" % (checker.rows_joined, checker.joins) )
def main(self): if self.additional_input_expected(): sys.stderr.write('No input file or piped data provided. Waiting for standard input:\n') reader = agate.csv.reader(self.skip_lines(), **self.reader_kwargs) if self.args.dryrun: checker = RowChecker(reader) for row in checker.checked_rows(): pass if checker.errors: for e in checker.errors: self.output_file.write('Line %i: %s\n' % (e.line_number, e.msg)) else: self.output_file.write('No errors.\n') if checker.joins: self.output_file.write('%i rows would have been joined/reduced to %i rows after eliminating expected internal line breaks.\n' % (checker.rows_joined, checker.joins)) else: if self.input_file == sys.stdin: base = 'stdin' # "<stdin>_out.csv" is invalid on Windows else: base = splitext(self.input_file.name)[0] with open('%s_out.csv' % base, 'w') as f: clean_writer = agate.csv.writer(f, **self.writer_kwargs) checker = RowChecker(reader) clean_writer.writerow(checker.column_names) for row in checker.checked_rows(): clean_writer.writerow(row) if checker.errors: error_filename = '%s_err.csv' % base with open(error_filename, 'w') as f: error_writer = agate.csv.writer(f, **self.writer_kwargs) error_header = ['line_number', 'msg'] error_header.extend(checker.column_names) error_writer.writerow(error_header) error_count = len(checker.errors) for e in checker.errors: error_writer.writerow(self._format_error_row(e)) self.output_file.write('%i error%s logged to %s\n' % (error_count, '' if error_count == 1 else 's', error_filename)) else: self.output_file.write('No errors.\n') if checker.joins: self.output_file.write('%i rows were joined/reduced to %i rows after eliminating expected internal line breaks.\n' % (checker.rows_joined, checker.joins))
def iterColumn(self, col_idx): with open(self.fpath, 'r', encoding=self.encoding) as f: reader = csv.reader(f, delimiter=self.delimiter, quoting=self.quoting) header = next(reader) checker = RowChecker(reader) for row in checker.checked_rows(): try: yield row[col_idx] except IndexError: continue
def transform(self): with open(self.file_path, "r", encoding="latin1") as f: reader = csv.reader(f, delimiter="\t", quoting=csv.QUOTE_NONE) checker = RowChecker(reader) for row in checker.checked_rows(): if row: for idx, cell in enumerate(row): row[idx] = cell.strip() if not row[idx]: row[idx] = None yield OrderedDict(zip(self.header, row))
def writeRawToDisk(self): with open(self.file_path, 'r', encoding='latin-1') as inp: reader = csv.reader(inp, delimiter='\t', quoting=csv.QUOTE_NONE) self.raw_header = next(reader) checker = RowChecker(reader) with open('%s_raw.csv' % self.file_path, 'w') as outp: writer = csv.writer(outp) writer.writerow(self.raw_header) for row in checker.checked_rows(): writer.writerow(row)
def main(self): reader = agate.reader(self.input_file, **self.reader_kwargs) if self.args.dryrun: checker = RowChecker(reader) for row in checker.checked_rows(): pass if checker.errors: for e in checker.errors: self.output_file.write('Line %i: %s\n' % (e.line_number, e.msg)) else: self.output_file.write('No errors.\n') if checker.joins: self.output_file.write('%i rows would have been joined/reduced to %i rows after eliminating expected internal line breaks.\n' % (checker.rows_joined, checker.joins)) else: base, ext = splitext(self.input_file.name) with open('%s_out.csv' % base, 'w') as f: clean_writer = agate.writer(f, **self.writer_kwargs) checker = RowChecker(reader) clean_writer.writerow(checker.column_names) for row in checker.checked_rows(): clean_writer.writerow(row) if checker.errors: error_filename = '%s_err.csv' % base with open(error_filename, 'w') as f: error_writer = agate.writer(f, **self.writer_kwargs) error_header = ['line_number', 'msg'] error_header.extend(checker.column_names) error_writer.writerow(error_header) error_count = len(checker.errors) for e in checker.errors: error_writer.writerow(self._format_error_row(e)) self.output_file.write('%i error%s logged to %s\n' % (error_count, '' if error_count == 1 else 's', error_filename)) else: self.output_file.write('No errors.\n') if checker.joins: self.output_file.write('%i rows were joined/reduced to %i rows after eliminating expected internal line breaks.\n' % (checker.rows_joined, checker.joins))
def main(self): reader = CSVKitReader(self.args.file, **self.reader_kwargs) if self.args.dryrun: checker = RowChecker(reader) for row in checker.checked_rows(): pass if checker.errs: for e in checker.errs: self.output_file.write("Line %i: %s\n" % (e.line_number,e.msg)) else: self.output_file.write("No errors.\n") if checker.joins: self.output_file.write("%i rows would have been joined/reduced to %i rows after eliminating expected internal line breaks.\n" % (checker.rows_joined, checker.joins)) else: base,ext = splitext(self.args.file.name) # should we preserve delimiters and other dialect args from CLI? cleaned_file = CSVKitWriter(open("%s_out.csv" % base,"w"), **self.writer_kwargs) checker = RowChecker(reader) cleaned_file.writerow(checker.column_names) for row in checker.checked_rows(): cleaned_file.writerow(row) if checker.errs: # should we preserve delimiters and other dialect args from CLI? err_filename = "%s_err.csv" % base err_file = CSVKitWriter(open(err_filename, "w"), **self.writer_kwargs) err_header = ['line_number','msg'] err_header.extend(checker.column_names) err_file.writerow(err_header) for e in checker.errs: err_file.writerow(self._format_error_row(e)) err_count = len(checker.errs) self.output_file.write("%i error%s logged to %s\n" % (err_count,"" if err_count == 1 else "s", err_filename)) else: self.output_file.write("No errors.\n") if checker.joins: self.output_file.write("%i rows were joined/reduced to %i rows after eliminating expected internal line breaks.\n" % (checker.rows_joined, checker.joins))
def main(self): if self.additional_input_expected(): sys.stderr.write('No input file or piped data provided. Waiting for standard input:\n') reader = agate.csv.reader(self.skip_lines(), **self.reader_kwargs) if self.args.dryrun: checker = RowChecker(reader) for row in checker.checked_rows(): pass if checker.errors: for e in checker.errors: self.output_file.write('Line %i: %s\n' % (e.line_number, e.msg)) else: self.output_file.write('No errors.\n') if checker.joins: self.output_file.write('%i rows would have been joined/reduced to %i rows after eliminating expected ' 'internal line breaks.\n' % (checker.rows_joined, checker.joins)) else: if self.input_file == sys.stdin: base = 'stdin' # "<stdin>_out.csv" is invalid on Windows else: base = splitext(self.input_file.name)[0] with open('%s_out.csv' % base, 'w') as f: clean_writer = agate.csv.writer(f, **self.writer_kwargs) checker = RowChecker(reader) clean_writer.writerow(checker.column_names) for row in checker.checked_rows(): clean_writer.writerow(row) if checker.errors: error_filename = '%s_err.csv' % base with open(error_filename, 'w') as f: error_writer = agate.csv.writer(f, **self.writer_kwargs) error_header = ['line_number', 'msg'] error_header.extend(checker.column_names) error_writer.writerow(error_header) error_count = len(checker.errors) for e in checker.errors: error_writer.writerow(self._format_error_row(e)) self.output_file.write('%i error%s logged to %s\n' % ( error_count, '' if error_count == 1 else 's', error_filename)) else: self.output_file.write('No errors.\n') if checker.joins: self.output_file.write('%i rows were joined/reduced to %i rows after eliminating expected internal ' 'line breaks.\n' % (checker.rows_joined, checker.joins))
def main(self): reader = CSVKitReader(self.input_file, **self.reader_kwargs) if self.args.dryrun: checker = RowChecker(reader) for row in checker.checked_rows(): pass if checker.errors: for e in checker.errors: self.output_file.write('Line %i: %s\n' % (e.line_number, e.msg)) else: self.output_file.write('No errors.\n') if checker.joins: self.output_file.write( '%i rows would have been joined/reduced to %i rows after eliminating expected internal line breaks.\n' % (checker.rows_joined, checker.joins)) else: base, ext = splitext(self.input_file.name) with open('%s_out.csv' % base, 'w') as f: clean_writer = CSVKitWriter(f, **self.writer_kwargs) checker = RowChecker(reader) clean_writer.writerow(checker.column_names) for row in checker.checked_rows(): clean_writer.writerow(row) if checker.errors: error_filename = '%s_err.csv' % base with open(error_filename, 'w') as f: error_writer = CSVKitWriter(f, **self.writer_kwargs) error_header = ['line_number', 'msg'] error_header.extend(checker.column_names) error_writer.writerow(error_header) error_count = len(checker.errors) for e in checker.errors: error_writer.writerow(self._format_error_row(e)) self.output_file.write( '%i error%s logged to %s\n' % (error_count, '' if error_count == 1 else 's', error_filename)) else: self.output_file.write('No errors.\n') if checker.joins: self.output_file.write( '%i rows were joined/reduced to %i rows after eliminating expected internal line breaks.\n' % (checker.rows_joined, checker.joins))
def main(self): reader = CSVKitReader(self.args.file, **self.reader_kwargs) if self.args.dryrun: checker = RowChecker(reader) for row in checker.checked_rows(): pass if checker.errs: for e in checker.errs: self.output_file.write("Line %i: %s\n" % (e.line_number, e.msg)) else: self.output_file.write("No errors.\n") if checker.joins: self.output_file.write( "%i rows would have been joined/reduced to %i rows after eliminating expected internal line breaks.\n" % (checker.rows_joined, checker.joins)) else: base, ext = splitext(self.args.file.name) # should we preserve delimiters and other dialect args from CLI? cleaned_file = CSVKitWriter(open("%s_out.csv" % base, "w"), **self.writer_kwargs) checker = RowChecker(reader) cleaned_file.writerow(checker.column_names) for row in checker.checked_rows(): cleaned_file.writerow(row) if checker.errs: # should we preserve delimiters and other dialect args from CLI? err_filename = "%s_err.csv" % base err_file = CSVKitWriter(open(err_filename, "w"), **self.writer_kwargs) err_header = ['line_number', 'msg'] err_header.extend(checker.column_names) err_file.writerow(err_header) for e in checker.errs: err_file.writerow(self._format_error_row(e)) err_count = len(checker.errs) self.output_file.write( "%i error%s logged to %s\n" % (err_count, "" if err_count == 1 else "s", err_filename)) else: self.output_file.write("No errors.\n") if checker.joins: self.output_file.write( "%i rows were joined/reduced to %i rows after eliminating expected internal line breaks.\n" % (checker.rows_joined, checker.joins))
def main(self, dryRun): # reader = CSVKitReader(self.input_file, **self.reader_kwargs) reader = CSVKitReader(self.file_name) csvErrorCheked = '' print 'dryRun: ' print dryRun print '....\n' print reader if dryRun: checker = RowChecker(reader) for row in checker.checked_rows(): pass if checker.errors: for e in checker.errors: # self.output_file.write('Line %i: %s\n' % (e.line_number, e.msg)) csvErrorCheked += ('Line %i: %s\n' % (e.line_number, e.msg)) print csvErrorCheked return csvErrorCheked # else: # self.output_file.write('No errors.\n') if checker.joins: self.output_file.write('%i rows would have been joined/reduced to %i rows after eliminating expected internal line breaks.\n' % (checker.rows_joined, checker.joins)) else: base, ext = splitext(self.input_file.name) with open('%s_out.csv' % base,'w') as f: clean_writer = CSVKitWriter(f, **self.writer_kwargs) checker = RowChecker(reader) clean_writer.writerow(checker.column_names) for row in checker.checked_rows(): clean_writer.writerow(row) if checker.errors: error_filename = '%s_err.csv' % base with open(error_filename, 'w') as f: error_writer = CSVKitWriter(f, **self.writer_kwargs) error_header = ['line_number', 'msg'] error_header.extend(checker.column_names) error_writer.writerow(error_header) error_count = len(checker.errors) for e in checker.errors: error_writer.writerow(self._format_error_row(e)) self.output_file.write('%i error%s logged to %s\n' % (error_count,'' if error_count == 1 else 's', error_filename)) else: return 'Non ci sono errori' self.output_file.write('No errors.\n') if checker.joins: self.output_file.write('%i rows were joined/reduced to %i rows after eliminating expected internal line breaks.\n' % (checker.rows_joined, checker.joins))