Example #1
0
 def test_extract_joinable_row_errors_4(self):
     e1 = CSVTestException(1, ['foo', 'bar', 'baz'], "A throwaway message.")
     e2 = LengthMismatchError(2, ['foo', 'bar', 'baz'], 10)
     e3 = LengthMismatchError(4, ['foo', 'bar', 'baz'], 10)
     errs = [e1, e2, e3]
     joinable = extract_joinable_row_errors(errs)
     self.assertEqual(1, len(joinable))
     self.assertTrue(next(iter(joinable)) is e3)
Example #2
0
 def test_extract_joinable_row_errors(self):
     e1 = LengthMismatchError(1, ['foo', 'bar', 'baz'], 10)
     e2 = LengthMismatchError(2, ['foo', 'bar', 'baz'], 10)
     e3 = LengthMismatchError(3, ['foo', 'bar', 'baz'], 10)
     errs = [e1, e2, e3]
     joinable = extract_joinable_row_errors(errs)
     self.assertEqual(3, len(joinable))
     for e, j in zip(errs, joinable):
         self.assertTrue(e is j)
Example #3
0
 def test_extract_joinable_row_errors_3(self):
     e1 = CSVTestException(1, ['foo', 'bar', 'baz'], "A throwaway message.")
     e2 = LengthMismatchError(2, ['foo', 'bar', 'baz'], 10)
     e3 = LengthMismatchError(3, ['foo', 'bar', 'baz'], 10)
     errs = [e1, e2, e3]
     joinable = extract_joinable_row_errors(errs)
     self.assertEqual(2, len(joinable))
     joinable = list(joinable)
     self.assertTrue(joinable[0] is e2)
     self.assertTrue(joinable[1] is e3)
Example #4
0
 def test_fix_length_errors_basic(self):
     expected_length = 4
     errs = [LengthMismatchError(1,['alpha','beta','gam'],expected_length)]
     errs.append(LengthMismatchError(2,['ma','delta'],expected_length))
     fixed = fix_length_errors(errs,expected_length)
     self.assertEqual(1,len(fixed))
     fixed = fixed[0]
     self.assertEqual('alpha',fixed[0])
     self.assertEqual('beta',fixed[1])
     self.assertEqual('gam ma',fixed[2])
     self.assertEqual('delta',fixed[3])
Example #5
0
    def checked_rows(self):
        """A generator which yields OK rows which are ready to write to output."""
        for row in self.reader:
            self.input_rows += 1
            line_number = self.input_rows + 1  # add one for 1-based counting

            try:
                if len(row) != len(self.column_names):
                    raise LengthMismatchError(line_number, row,
                                              len(self.column_names))
                # any other tests?
                yield row
            except LengthMismatchError, e:
                self.errs.append(e)
                # see if we can actually clean up those length mismatches
                joinable_row_errors = extract_joinable_row_errors(self.errs)
                while joinable_row_errors:
                    fixed_row = join_rows(
                        [err.row for err in joinable_row_errors], joiner=' ')
                    if len(fixed_row) < len(self.column_names): break
                    if len(fixed_row) == len(self.column_names):
                        self.rows_joined += len(joinable_row_errors)
                        self.joins += 1
                        yield fixed_row
                        for fixed in joinable_row_errors:
                            self.errs.remove(fixed)
                        break
                    joinable_row_errors = joinable_row_errors[
                        1:]  # keep trying in case we're too long because of a straggler

            except CSVTestException, e:
                self.errs.append(e)
Example #6
0
    def checked_rows(self):
        """
        A generator which yields rows which are ready to write to output.
        """
        length = len(self.column_names)
        line_number = self.reader.line_num
        joinable_row_errors = []

        for row in self.reader:
            try:
                if len(row) != length:
                    raise LengthMismatchError(line_number, row, length)

                yield row

                # Don't join rows across valid rows.
                joinable_row_errors = []
            except LengthMismatchError as e:
                self.errors.append(e)

                # Don't join with long rows.
                if len(row) > length:
                    joinable_row_errors = []
                else:
                    joinable_row_errors.append(e)

                    while joinable_row_errors:
                        fixed_row = join_rows(
                            [error.row for error in joinable_row_errors],
                            joiner=' ')

                        if len(fixed_row) < length:
                            break

                        if len(fixed_row) == length:
                            self.rows_joined += len(joinable_row_errors)
                            self.joins += 1

                            yield fixed_row

                            for fixed in joinable_row_errors:
                                joinable_row_errors.remove(fixed)
                                self.errors.remove(fixed)

                            break

                        joinable_row_errors = joinable_row_errors[
                            1:]  # keep trying in case we're too long because of a straggler

            except CSVTestException as e:
                self.errors.append(e)

                # Don't join rows across other errors.
                joinable_row_errors = []

            line_number = self.reader.line_num
Example #7
0
    def checked_rows(self):
        """
        A generator which yields rows which are ready to write to output.
        """
        line_number = self.reader.line_num

        for row in self.reader:
            try:
                if len(row) != len(self.column_names):
                    raise LengthMismatchError(line_number, row,
                                              len(self.column_names))

                yield row
            except LengthMismatchError as e:
                self.errors.append(e)

                joinable_row_errors = extract_joinable_row_errors(self.errors)

                while joinable_row_errors:
                    fixed_row = join_rows(
                        [err.row for err in joinable_row_errors], joiner=' ')

                    if len(fixed_row) < len(self.column_names):
                        break

                    if len(fixed_row) == len(self.column_names):
                        self.rows_joined += len(joinable_row_errors)
                        self.joins += 1

                        yield fixed_row

                        for fixed in joinable_row_errors:
                            self.errors.remove(fixed)

                        break

                    joinable_row_errors = joinable_row_errors[
                        1:]  # keep trying in case we're too long because of a straggler

            except CSVTestException as e:
                self.errors.append(e)

            line_number = self.reader.line_num