Beispiel #1
0
    def main(self):
        if self.additional_input_expected():
            sys.stderr.write('No input file or piped data provided. Waiting for standard input:\n')

        reader = agate.csv.reader(self.skip_lines(), **self.reader_kwargs)

        if self.args.dryrun:
            checker = RowChecker(reader)

            for row in checker.checked_rows():
                pass

            if checker.errors:
                for e in checker.errors:
                    self.output_file.write('Line %i: %s\n' % (e.line_number, e.msg))
            else:
                self.output_file.write('No errors.\n')

            if checker.joins:
                self.output_file.write('%i rows would have been joined/reduced to %i rows after eliminating expected '
                                       'internal line breaks.\n' % (checker.rows_joined, checker.joins))
        else:
            if self.input_file == sys.stdin:
                base = 'stdin'  # "<stdin>_out.csv" is invalid on Windows
            else:
                base = splitext(self.input_file.name)[0]

            with open('%s_out.csv' % base, 'w') as f:
                clean_writer = agate.csv.writer(f, **self.writer_kwargs)

                checker = RowChecker(reader)
                clean_writer.writerow(checker.column_names)

                for row in checker.checked_rows():
                    clean_writer.writerow(row)

            if checker.errors:
                error_filename = '%s_err.csv' % base

                with open(error_filename, 'w') as f:
                    error_writer = agate.csv.writer(f, **self.writer_kwargs)

                    error_header = ['line_number', 'msg']
                    error_header.extend(checker.column_names)
                    error_writer.writerow(error_header)

                    error_count = len(checker.errors)

                    for e in checker.errors:
                        error_writer.writerow(self._format_error_row(e))

                self.output_file.write('%i error%s logged to %s\n' % (
                    error_count, '' if error_count == 1 else 's', error_filename))
            else:
                self.output_file.write('No errors.\n')

            if checker.joins:
                self.output_file.write('%i rows were joined/reduced to %i rows after eliminating expected internal '
                                       'line breaks.\n' % (checker.rows_joined, checker.joins))
Beispiel #2
0
    def main(self):
        reader = CSVKitReader(self.input_file, **self.reader_kwargs)

        if self.args.dryrun:
            checker = RowChecker(reader)

            for row in checker.checked_rows():
                pass

            if checker.errors:
                for e in checker.errors:
                    self.output_file.write('Line %i: %s\n' %
                                           (e.line_number, e.msg))
            else:
                self.output_file.write('No errors.\n')

            if checker.joins:
                self.output_file.write(
                    '%i rows would have been joined/reduced to %i rows after eliminating expected internal line breaks.\n'
                    % (checker.rows_joined, checker.joins))
        else:
            base, ext = splitext(self.input_file.name)

            with open('%s_out.csv' % base, 'w') as f:
                clean_writer = CSVKitWriter(f, **self.writer_kwargs)

                checker = RowChecker(reader)
                clean_writer.writerow(checker.column_names)

                for row in checker.checked_rows():
                    clean_writer.writerow(row)

            if checker.errors:
                error_filename = '%s_err.csv' % base

                with open(error_filename, 'w') as f:
                    error_writer = CSVKitWriter(f, **self.writer_kwargs)

                    error_header = ['line_number', 'msg']
                    error_header.extend(checker.column_names)
                    error_writer.writerow(error_header)

                    error_count = len(checker.errors)

                    for e in checker.errors:
                        error_writer.writerow(self._format_error_row(e))

                self.output_file.write(
                    '%i error%s logged to %s\n' %
                    (error_count, '' if error_count == 1 else 's',
                     error_filename))
            else:
                self.output_file.write('No errors.\n')

            if checker.joins:
                self.output_file.write(
                    '%i rows were joined/reduced to %i rows after eliminating expected internal line breaks.\n'
                    % (checker.rows_joined, checker.joins))
Beispiel #3
0
    def main(self):
        reader = CSVKitReader(self.args.file, **self.reader_kwargs)

        if self.args.dryrun:
            checker = RowChecker(reader)

            for row in checker.checked_rows():
                pass

            if checker.errors:
                for e in checker.errors:
                    self.output_file.write("Line %i: %s\n" % (e.line_number, e.msg))
            else:
                self.output_file.write("No errors.\n")

            if checker.joins:
                self.output_file.write(
                    "%i rows would have been joined/reduced to %i rows after eliminating expected internal line breaks.\n"
                    % (checker.rows_joined, checker.joins)
                )
        else:
            base, ext = splitext(self.args.file.name)

            with open("%s_out.csv" % base, "w") as f:
                clean_writer = CSVKitWriter(f, **self.writer_kwargs)

                checker = RowChecker(reader)
                clean_writer.writerow(checker.column_names)

                for row in checker.checked_rows():
                    clean_writer.writerow(row)

            if checker.errors:
                error_filename = "%s_err.csv" % base

                with open(error_filename, "w") as f:
                    error_writer = CSVKitWriter(f, **self.writer_kwargs)

                    error_header = ["line_number", "msg"]
                    error_header.extend(checker.column_names)
                    error_writer.writerow(error_header)

                    error_count = len(checker.errors)

                    for e in checker.errors:
                        error_writer.writerow(self._format_error_row(e))

                self.output_file.write(
                    "%i error%s logged to %s\n" % (error_count, "" if error_count == 1 else "s", error_filename)
                )
            else:
                self.output_file.write("No errors.\n")

            if checker.joins:
                self.output_file.write(
                    "%i rows were joined/reduced to %i rows after eliminating expected internal line breaks.\n"
                    % (checker.rows_joined, checker.joins)
                )
Beispiel #4
0
    def main(self):
        if self.additional_input_expected():
            sys.stderr.write('No input file or piped data provided. Waiting for standard input:\n')

        reader = agate.csv.reader(self.skip_lines(), **self.reader_kwargs)

        if self.args.dryrun:
            checker = RowChecker(reader)

            for row in checker.checked_rows():
                pass

            if checker.errors:
                for e in checker.errors:
                    self.output_file.write('Line %i: %s\n' % (e.line_number, e.msg))
            else:
                self.output_file.write('No errors.\n')

            if checker.joins:
                self.output_file.write('%i rows would have been joined/reduced to %i rows after eliminating expected internal line breaks.\n' % (checker.rows_joined, checker.joins))
        else:
            if self.input_file == sys.stdin:
                base = 'stdin'  # "<stdin>_out.csv" is invalid on Windows
            else:
                base = splitext(self.input_file.name)[0]

            with open('%s_out.csv' % base, 'w') as f:
                clean_writer = agate.csv.writer(f, **self.writer_kwargs)

                checker = RowChecker(reader)
                clean_writer.writerow(checker.column_names)

                for row in checker.checked_rows():
                    clean_writer.writerow(row)

            if checker.errors:
                error_filename = '%s_err.csv' % base

                with open(error_filename, 'w') as f:
                    error_writer = agate.csv.writer(f, **self.writer_kwargs)

                    error_header = ['line_number', 'msg']
                    error_header.extend(checker.column_names)
                    error_writer.writerow(error_header)

                    error_count = len(checker.errors)

                    for e in checker.errors:
                        error_writer.writerow(self._format_error_row(e))

                self.output_file.write('%i error%s logged to %s\n' % (error_count, '' if error_count == 1 else 's', error_filename))
            else:
                self.output_file.write('No errors.\n')

            if checker.joins:
                self.output_file.write('%i rows were joined/reduced to %i rows after eliminating expected internal line breaks.\n' % (checker.rows_joined, checker.joins))
Beispiel #5
0
    def main(self):
        reader = agate.reader(self.input_file, **self.reader_kwargs)

        if self.args.dryrun:
            checker = RowChecker(reader)

            for row in checker.checked_rows():
                pass

            if checker.errors:
                for e in checker.errors:
                    self.output_file.write('Line %i: %s\n' % (e.line_number, e.msg))
            else:
                self.output_file.write('No errors.\n')

            if checker.joins:
                self.output_file.write('%i rows would have been joined/reduced to %i rows after eliminating expected internal line breaks.\n' % (checker.rows_joined, checker.joins))
        else:
            base, ext = splitext(self.input_file.name)

            with open('%s_out.csv' % base, 'w') as f:
                clean_writer = agate.writer(f, **self.writer_kwargs)

                checker = RowChecker(reader)
                clean_writer.writerow(checker.column_names)

                for row in checker.checked_rows():
                    clean_writer.writerow(row)

            if checker.errors:
                error_filename = '%s_err.csv' % base

                with open(error_filename, 'w') as f:
                    error_writer = agate.writer(f, **self.writer_kwargs)

                    error_header = ['line_number', 'msg']
                    error_header.extend(checker.column_names)
                    error_writer.writerow(error_header)

                    error_count = len(checker.errors)

                    for e in checker.errors:
                        error_writer.writerow(self._format_error_row(e))

                self.output_file.write('%i error%s logged to %s\n' % (error_count, '' if error_count == 1 else 's', error_filename))
            else:
                self.output_file.write('No errors.\n')

            if checker.joins:
                self.output_file.write('%i rows were joined/reduced to %i rows after eliminating expected internal line breaks.\n' % (checker.rows_joined, checker.joins))
Beispiel #6
0
    def main(self):
        reader = CSVKitReader(self.args.file, **self.reader_kwargs)

        if self.args.dryrun:
            checker = RowChecker(reader)
            for row in checker.checked_rows():
                pass
            if checker.errs:
                for e in checker.errs:
                    self.output_file.write("Line %i: %s\n" %
                                           (e.line_number, e.msg))
            else:
                self.output_file.write("No errors.\n")
            if checker.joins:
                self.output_file.write(
                    "%i rows would have been joined/reduced to %i rows after eliminating expected internal line breaks.\n"
                    % (checker.rows_joined, checker.joins))
        else:
            base, ext = splitext(self.args.file.name)
            # should we preserve delimiters and other dialect args from CLI?
            cleaned_file = CSVKitWriter(open("%s_out.csv" % base, "w"),
                                        **self.writer_kwargs)

            checker = RowChecker(reader)
            cleaned_file.writerow(checker.column_names)
            for row in checker.checked_rows():
                cleaned_file.writerow(row)

            if checker.errs:
                # should we preserve delimiters and other dialect args from CLI?
                err_filename = "%s_err.csv" % base
                err_file = CSVKitWriter(open(err_filename, "w"),
                                        **self.writer_kwargs)
                err_header = ['line_number', 'msg']
                err_header.extend(checker.column_names)
                err_file.writerow(err_header)
                for e in checker.errs:
                    err_file.writerow(self._format_error_row(e))
                    err_count = len(checker.errs)
                self.output_file.write(
                    "%i error%s logged to %s\n" %
                    (err_count, "" if err_count == 1 else "s", err_filename))
            else:
                self.output_file.write("No errors.\n")

            if checker.joins:
                self.output_file.write(
                    "%i rows were joined/reduced to %i rows after eliminating expected internal line breaks.\n"
                    % (checker.rows_joined, checker.joins))
Beispiel #7
0
 def iterColumn(self, col_idx):
     with open(self.fpath, 'r', encoding=self.encoding) as f:
         reader = csv.reader(f, delimiter=self.delimiter, quoting=self.quoting)
         header = next(reader)
         checker = RowChecker(reader)
         for row in checker.checked_rows():
             try:
                 yield row[col_idx]
             except IndexError:
                 continue
 def transform(self):
     with open(self.file_path, "r", encoding="latin1") as f:
         reader = csv.reader(f, delimiter="\t", quoting=csv.QUOTE_NONE)
         checker = RowChecker(reader)
         for row in checker.checked_rows():
             if row:
                 for idx, cell in enumerate(row):
                     row[idx] = cell.strip()
                     if not row[idx]:
                         row[idx] = None
                 yield OrderedDict(zip(self.header, row))
Beispiel #9
0
 def iterColumn(self, col_idx):
     with open(self.fpath, 'r', encoding=self.encoding) as f:
         reader = csv.reader(f,
                             delimiter=self.delimiter,
                             quoting=self.quoting)
         header = next(reader)
         checker = RowChecker(reader)
         for row in checker.checked_rows():
             try:
                 yield row[col_idx]
             except IndexError:
                 continue
Beispiel #10
0
    def main(self):
        reader = CSVKitReader(self.args.file, **self.reader_kwargs)

        if self.args.dryrun:
            checker = RowChecker(reader)
            for row in checker.checked_rows():
                pass
            if checker.errs:
                for e in checker.errs:
                    self.output_file.write("Line %i: %s\n" % (e.line_number,e.msg))
            else:
                self.output_file.write("No errors.\n")
            if checker.joins:
                self.output_file.write("%i rows would have been joined/reduced to %i rows after eliminating expected internal line breaks.\n" % (checker.rows_joined, checker.joins))
        else:
            base,ext = splitext(self.args.file.name)
            # should we preserve delimiters and other dialect args from CLI?
            cleaned_file = CSVKitWriter(open("%s_out.csv" % base,"w"), **self.writer_kwargs)

            checker = RowChecker(reader)
            cleaned_file.writerow(checker.column_names)
            for row in checker.checked_rows():
                cleaned_file.writerow(row)
            
            if checker.errs:
                # should we preserve delimiters and other dialect args from CLI?
                err_filename = "%s_err.csv" % base
                err_file = CSVKitWriter(open(err_filename, "w"), **self.writer_kwargs)
                err_header = ['line_number','msg']
                err_header.extend(checker.column_names)
                err_file.writerow(err_header)
                for e in checker.errs:
                    err_file.writerow(self._format_error_row(e))
                    err_count = len(checker.errs)
                self.output_file.write("%i error%s logged to %s\n" % (err_count,"" if err_count == 1 else "s", err_filename))
            else:
                self.output_file.write("No errors.\n")

            if checker.joins:
                self.output_file.write("%i rows were joined/reduced to %i rows after eliminating expected internal line breaks.\n" % (checker.rows_joined, checker.joins))
Beispiel #11
0
    def writeRawToDisk(self):
        with open(self.file_path, 'r', encoding='latin-1') as inp:
            reader = csv.reader(inp, delimiter='\t', quoting=csv.QUOTE_NONE)
            self.raw_header = next(reader)
            checker = RowChecker(reader)
            
            with open('%s_raw.csv' % self.file_path, 'w') as outp:
                writer = csv.writer(outp)

                writer.writerow(self.raw_header)
                
                for row in checker.checked_rows():
                    writer.writerow(row)
Beispiel #12
0
    def writeRawToDisk(self):
        with open(self.file_path, 'r', encoding='latin-1') as inp:
            reader = csv.reader(inp, delimiter='\t', quoting=csv.QUOTE_NONE)
            self.raw_header = next(reader)
            checker = RowChecker(reader)

            with open('%s_raw.csv' % self.file_path, 'w') as outp:
                writer = csv.writer(outp)

                writer.writerow(self.raw_header)

                for row in checker.checked_rows():
                    writer.writerow(row)
Beispiel #13
0
    def main(self, dryRun):
#        reader = CSVKitReader(self.input_file, **self.reader_kwargs)
        reader = CSVKitReader(self.file_name)
        csvErrorCheked = ''
        
        print 'dryRun: '
        print dryRun
        print '....\n'
        print reader

        if dryRun:
            checker = RowChecker(reader)

            for row in checker.checked_rows():
                pass
            
            if checker.errors:
                for e in checker.errors:
#                    self.output_file.write('Line %i: %s\n' % (e.line_number, e.msg))
                    csvErrorCheked += ('Line %i: %s\n' % (e.line_number, e.msg))
                    print csvErrorCheked
                    return csvErrorCheked
#             else:
#                 self.output_file.write('No errors.\n')
            
            if checker.joins:
                self.output_file.write('%i rows would have been joined/reduced to %i rows after eliminating expected internal line breaks.\n' % (checker.rows_joined, checker.joins))
        else:
            base, ext = splitext(self.input_file.name)

            with open('%s_out.csv' % base,'w') as f:
                clean_writer = CSVKitWriter(f, **self.writer_kwargs)

                checker = RowChecker(reader)
                clean_writer.writerow(checker.column_names)

                for row in checker.checked_rows():
                    clean_writer.writerow(row)
            
            if checker.errors:
                error_filename = '%s_err.csv' % base

                with open(error_filename, 'w') as f:
                    error_writer = CSVKitWriter(f, **self.writer_kwargs)

                    error_header = ['line_number', 'msg']
                    error_header.extend(checker.column_names)
                    error_writer.writerow(error_header)

                    error_count = len(checker.errors)

                    for e in checker.errors:
                        error_writer.writerow(self._format_error_row(e))

                self.output_file.write('%i error%s logged to %s\n' % (error_count,'' if error_count == 1 else 's', error_filename))
            else:
                return 'Non ci sono errori'
                self.output_file.write('No errors.\n')

            if checker.joins:
                self.output_file.write('%i rows were joined/reduced to %i rows after eliminating expected internal line breaks.\n' % (checker.rows_joined, checker.joins))