def test_extract_joinable_row_errors_4(self): e1 = CSVTestException(1, ['foo', 'bar', 'baz'], "A throwaway message.") e2 = LengthMismatchError(2, ['foo', 'bar', 'baz'], 10) e3 = LengthMismatchError(4, ['foo', 'bar', 'baz'], 10) errs = [e1, e2, e3] joinable = extract_joinable_row_errors(errs) self.assertEqual(1, len(joinable)) self.assertTrue(next(iter(joinable)) is e3)
def test_extract_joinable_row_errors(self): e1 = LengthMismatchError(1, ['foo', 'bar', 'baz'], 10) e2 = LengthMismatchError(2, ['foo', 'bar', 'baz'], 10) e3 = LengthMismatchError(3, ['foo', 'bar', 'baz'], 10) errs = [e1, e2, e3] joinable = extract_joinable_row_errors(errs) self.assertEqual(3, len(joinable)) for e, j in zip(errs, joinable): self.assertTrue(e is j)
def test_extract_joinable_row_errors_3(self): e1 = CSVTestException(1, ['foo', 'bar', 'baz'], "A throwaway message.") e2 = LengthMismatchError(2, ['foo', 'bar', 'baz'], 10) e3 = LengthMismatchError(3, ['foo', 'bar', 'baz'], 10) errs = [e1, e2, e3] joinable = extract_joinable_row_errors(errs) self.assertEqual(2, len(joinable)) joinable = list(joinable) self.assertTrue(joinable[0] is e2) self.assertTrue(joinable[1] is e3)
def test_fix_length_errors_basic(self): expected_length = 4 errs = [LengthMismatchError(1,['alpha','beta','gam'],expected_length)] errs.append(LengthMismatchError(2,['ma','delta'],expected_length)) fixed = fix_length_errors(errs,expected_length) self.assertEqual(1,len(fixed)) fixed = fixed[0] self.assertEqual('alpha',fixed[0]) self.assertEqual('beta',fixed[1]) self.assertEqual('gam ma',fixed[2]) self.assertEqual('delta',fixed[3])
def checked_rows(self): """A generator which yields OK rows which are ready to write to output.""" for row in self.reader: self.input_rows += 1 line_number = self.input_rows + 1 # add one for 1-based counting try: if len(row) != len(self.column_names): raise LengthMismatchError(line_number, row, len(self.column_names)) # any other tests? yield row except LengthMismatchError, e: self.errs.append(e) # see if we can actually clean up those length mismatches joinable_row_errors = extract_joinable_row_errors(self.errs) while joinable_row_errors: fixed_row = join_rows( [err.row for err in joinable_row_errors], joiner=' ') if len(fixed_row) < len(self.column_names): break if len(fixed_row) == len(self.column_names): self.rows_joined += len(joinable_row_errors) self.joins += 1 yield fixed_row for fixed in joinable_row_errors: self.errs.remove(fixed) break joinable_row_errors = joinable_row_errors[ 1:] # keep trying in case we're too long because of a straggler except CSVTestException, e: self.errs.append(e)
def checked_rows(self): """ A generator which yields rows which are ready to write to output. """ length = len(self.column_names) line_number = self.reader.line_num joinable_row_errors = [] for row in self.reader: try: if len(row) != length: raise LengthMismatchError(line_number, row, length) yield row # Don't join rows across valid rows. joinable_row_errors = [] except LengthMismatchError as e: self.errors.append(e) # Don't join with long rows. if len(row) > length: joinable_row_errors = [] else: joinable_row_errors.append(e) while joinable_row_errors: fixed_row = join_rows( [error.row for error in joinable_row_errors], joiner=' ') if len(fixed_row) < length: break if len(fixed_row) == length: self.rows_joined += len(joinable_row_errors) self.joins += 1 yield fixed_row for fixed in joinable_row_errors: joinable_row_errors.remove(fixed) self.errors.remove(fixed) break joinable_row_errors = joinable_row_errors[ 1:] # keep trying in case we're too long because of a straggler except CSVTestException as e: self.errors.append(e) # Don't join rows across other errors. joinable_row_errors = [] line_number = self.reader.line_num
def checked_rows(self): """ A generator which yields rows which are ready to write to output. """ line_number = self.reader.line_num for row in self.reader: try: if len(row) != len(self.column_names): raise LengthMismatchError(line_number, row, len(self.column_names)) yield row except LengthMismatchError as e: self.errors.append(e) joinable_row_errors = extract_joinable_row_errors(self.errors) while joinable_row_errors: fixed_row = join_rows( [err.row for err in joinable_row_errors], joiner=' ') if len(fixed_row) < len(self.column_names): break if len(fixed_row) == len(self.column_names): self.rows_joined += len(joinable_row_errors) self.joins += 1 yield fixed_row for fixed in joinable_row_errors: self.errors.remove(fixed) break joinable_row_errors = joinable_row_errors[ 1:] # keep trying in case we're too long because of a straggler except CSVTestException as e: self.errors.append(e) line_number = self.reader.line_num