def testTooFewItems(self): spec = ""","Interface with two fields with the second being required" "D","Format","CSV" "D","Line delimiter","LF" "D","Item delimiter","," "D","Encoding","ISO-8859-1" , ,"Name","Example","Empty","Length","Type","Rule" "F","customer_id",,,,Integer,0: "F","first_name","John",,,"Text" """ # Test that a specifically empty item is rejected. icd = interface.InterfaceControlDocument() icd.read(StringIO.StringIO(spec)) dataText = "123," dataReadable = StringIO.StringIO(dataText) icd.addValidationListener(_defaultIcdListener) try: icd.validate(dataReadable) self.assertEqual(icd.acceptedCount, 0) self.assertEqual(icd.rejectedCount, 1) finally: icd.removeValidationListener(_defaultIcdListener) # Test that a missing item is rejected. dataText = "234" dataReadable = StringIO.StringIO(dataText) icd.addValidationListener(_defaultIcdListener) try: icd.validate(dataReadable) self.assertEqual(icd.acceptedCount, 0) self.assertEqual(icd.rejectedCount, 1) finally: icd.removeValidationListener(_defaultIcdListener)
def createDefaultTestFixedIcd(): spec = u""",Interface: customer , ,Data format , D,Format,Fixed D,Line delimiter,any D,Encoding,ISO-8859-1 D,Allowed characters,32: , ,Fields , ,Name,Example,Empty,Length,Type,Rule F,branch_id,38123,,5,RegEx,38\d\d\d F,customer_id,12345,,5,Integer,0:99999 F,first_name,John,X,15,Text F,surname,Doe,,15,Text F,gender,male,,7,Choice,"male, female, unknown" F,date_of_birth,08.03.1957,,10,DateTime,DD.MM.YYYY , ,Checks , ,Description,Type,Rule C,customer must be unique,IsUnique,"branch_id, customer_id" C,distinct branches must be within limit,DistinctCount,branch_id <= 3 """ result = interface.InterfaceControlDocument() result.read(StringIO.StringIO(spec)) return result
def setIcdFromFile(self, newIcdPath): assert newIcdPath is not None newIcd = interface.InterfaceControlDocument() if self.options is not None: newIcd.logTrace = self.options.isLogTrace newIcd.read(newIcdPath, self.icdEncoding) self.icd = newIcd self.interfaceSpecificationPath = newIcdPath
def testCanSniffAndValidateUsingMain(self): testIcdPath = dev_test.getTestOutputPath( "icd_sniffed_valid_customers.csv") testDataPath = dev_test.getTestInputPath("valid_customers.csv") exitCode = _cutsniff.main(["test", testIcdPath, testDataPath]) self.assertEqual(exitCode, 0) sniffedIcd = interface.InterfaceControlDocument() sniffedIcd.read(testIcdPath) for _ in interface.validatedRows(sniffedIcd, testDataPath): pass
def testValidatedRowsWithBrokenDataFormat(self): try: icdPath = dev_test.getTestIcdPath("native_excel_formats.ods") icd = interface.InterfaceControlDocument() icd.read(icdPath) for _ in icd.validatedRows(self._validCostumersCsvPath): pass self.fail("XLRDError expected") except xlrd.XLRDError: # Ignore expected error cause by wrong data format. pass
def testCanSniffAndValidateUsingMainWithHeaderAndEncoding(self): testIcdPath = dev_test.getTestOutputPath( "icd_sniffed_valid_customers_with_header_iso-8859-15.csv") testDataPath = dev_test.getTestInputPath( "valid_customers_with_header_iso-8859-15.csv") exitCode = _cutsniff.main([ "test", "--data-encoding", "iso-8859-15", "--head", "1", testIcdPath, testDataPath ]) self.assertEqual(exitCode, 0) sniffedIcd = interface.InterfaceControlDocument() sniffedIcd.read(testIcdPath) for _ in interface.validatedRows(sniffedIcd, testDataPath): pass
def testTooManyItems(self): spec = ""","Interface with a single field" "D","Format","CSV" "D","Line delimiter","LF" "D","Item delimiter","," "D","Encoding","ISO-8859-1" , ,"Name","Example","Empty","Length","Type","Rule" "F","first_name","John","X",,"Text" """ icd = interface.InterfaceControlDocument() icd.read(StringIO.StringIO(spec)) dataText = "John, Doe" dataReadable = StringIO.StringIO(dataText) icd.validate(dataReadable) self.assertEqual(icd.rejectedCount, 1)
def testFieldTypeWithModule(self): spec = ""","Interface with field using a fully qualified type" "D","Format","CSV" "D","Line delimiter","LF" "D","Item delimiter","," "D","Encoding","ISO-8859-1" , ,"Name","Example","Empty","Length","Type","Rule" "F","first_name","John","X",,"fields.Text" """ icd = interface.InterfaceControlDocument() icd.read(StringIO.StringIO(spec)) dataText = """"John" Jane""" dataReadable = StringIO.StringIO(dataText) icd.validate(dataReadable)
def testCanSniffAndValidateUsingMainWithFieldNames(self): testIcdPath = dev_test.getTestOutputPath( "icd_sniffed_valid_customers.csv") testDataPath = dev_test.getTestInputPath("valid_customers.csv") exitCode = _cutsniff.main([ "test", "--names", " branchId,customerId, firstName,surName ,gender,dateOfBirth ", testIcdPath, testDataPath ]) self.assertEqual(exitCode, 0) sniffedIcd = interface.InterfaceControlDocument() sniffedIcd.read(testIcdPath) self.assertEqual(sniffedIcd.fieldNames, [ "branchId", "customerId", "firstName", "surName", "gender", "dateOfBirth" ]) for _ in interface.validatedRows(sniffedIcd, testDataPath): pass
def testBrokenFieldWithTooFewItems(self): baseSpec = ""","Broken Interface with a field that has too few items" "D","Format","CSV" "D","Line delimiter","LF" "D","Item delimiter","," "D","Encoding","ISO-8859-1" , ,"Name","Example","Empty","Length","Type","Rule" "F","branch_id",,,,"RegEx","38\d\d\d" """ # First of all, make sure `baseSpec` is in order by building a valid ICD. spec = baseSpec + "F,customer_id" icd = interface.InterfaceControlDocument() readable = StringIO.StringIO(spec) icd.read(readable) # Now comes the real meat: broken ICD with incomplete field formats. spec = baseSpec + "F" self._testBroken(spec, fields.FieldSyntaxError)
def testSkipHeader(self): spec = ""","Interface for data with header rows" "D","Format","CSV" "D","Line delimiter","Any" "D","Item delimiter","," "D","Header","1" , ,"Name","Example","Empty","Length","Type","Rule" "F","first_name","John","X",,"Text" "F","gender","male",X,,"Choice","female, male" "F","date_of_birth",08.03.1957,,,"DateTime","DD.MM.YYYY" """ icd = interface.InterfaceControlDocument() icd.read(StringIO.StringIO(spec)) dataText = """First Name,Gender,Date of birth John,male,08.03.1957 Mike,male,23.12.1974""" dataReadable = StringIO.StringIO(dataText) icd.validate(dataReadable)
def testEmptyChoiceWithLength(self): spec = ""","Interface with a Choice field (gender) that can be empty and has a field length > 0" "D","Format","CSV" "D","Line delimiter","LF" "D","Item delimiter","," "D","Encoding","ISO-8859-1" , ,"Name","Example","Empty","Length","Type","Rule" "F","first_name","John","X",,"Text" "F","gender","male",X,4:6,"Choice","female, male" "F","date_of_birth",08.03.1957,,,"DateTime","DD.MM.YYYY" """ icd = interface.InterfaceControlDocument() readable = StringIO.StringIO(spec) icd.read(readable) dataText = """"John",,"08.03.1957" "Jane","female","04.10.1946" """ dataReadable = StringIO.StringIO(dataText) icd.validate(dataReadable)
def testCanValidateFieldFormatFromPlugin(self): spec = ""","Interface for data with plugged field format" "D","Format","CSV" "D","Line delimiter","Any" "D","Item delimiter","," , ,"Name","Example","Empty","Length","Type","Rule" "F","first_name","John","X",,"Text" "F","sirname","Smith","X",,"CapitalizedText" """ _log.info(u"subclasses before=%s", sorted(fields.AbstractFieldFormat.__subclasses__())) # @UndefinedVariable interface.importPlugins(dev_test.getTestPluginsPath()) _log.info(u"subclasses after=%s", sorted(fields.AbstractFieldFormat.__subclasses__())) # @UndefinedVariable icd = interface.InterfaceControlDocument() icd.read(StringIO.StringIO(spec)) dataText = """First Name,Gender,Date of birth John,Smith Bärbel,Müller""" dataReadable = StringIO.StringIO(dataText) icd.validate(dataReadable)
def testBrokenCheckTooFewItems(self): baseSpec = ""","Broken Interface with duplicate check description" "D","Format","CSV" "D","Line delimiter","LF" "D","Item delimiter","," "D","Encoding","ISO-8859-1" , ,"Name","Example","Empty","Length","Rule","Type","Example" "F","branch_id",,,,"RegEx","38\d\d\d" "F","customer_id",,,,"Integer","0:99999" , ,Description,Type,Rule C,customer must be unique,IsUnique,"branch_id, customer_id" """ icd = interface.InterfaceControlDocument() readable = StringIO.StringIO(baseSpec) icd.read(readable) spec = baseSpec + "C" self._testBroken(spec, checks.CheckSyntaxError) spec = baseSpec + "C,incomplete check" self._testBroken(spec, checks.CheckSyntaxError)
def testBrokenCheckDuplicateDescription(self): spec = ""","Broken Interface with duplicate check description" "D","Format","CSV" "D","Line delimiter","LF" "D","Item delimiter","," "D","Encoding","ISO-8859-1" , ,"Name","Example","Empty","Length","Rule","Type","Example" "F","branch_id",,,,"RegEx","38\d\d\d" "F","customer_id",,,,"Integer","0:99999" , ,Description,Type,Rule C,customer must be unique,IsUnique,"branch_id, customer_id" C,distinct branches must be within limit,DistinctCount,branch_id <= 3 C,customer must be unique,IsUnique,"branch_id, customer_id" """ icd = interface.InterfaceControlDocument() try: icd.read(StringIO.StringIO(spec), "iso-8859-15") except checks.CheckSyntaxError, error: errorText = str(error) self.assertTrue("check description must be used only once" in errorText, "unexpected error text: %r" % errorText) self.assertTrue("see also:" in errorText, "unexpected error text: %r" % errorText)
def createDefaultTestIcd(dataFormatName, lineDelimiter="\n"): assert dataFormatName in [data.FORMAT_CSV, data.FORMAT_EXCEL, data.FORMAT_ODS], "dataFormatName=%r" % dataFormatName assert lineDelimiter spec = u""","Interface: customer" , ,"Data dataFormatName" , "D","Format","%s", """ % dataFormatName if dataFormatName.lower() == data.FORMAT_CSV: spec += u""""D","Line delimiter","LF" "D","Item delimiter",44 "D","Encoding","ISO-8859-1" "D","Allowed characters","32:" """ spec += u""", ,"Fields" , ,"Name","Example","Empty","Length","Type","Rule" "F","branch_id",38123,,,"RegEx","38\d\d\d" "F","customer_id",12345,,,"Integer","0:99999" "F","first_name","John","X",,"Text" "F","surname","Doe",,"1:60","Text" "F","gender","male",,,"Choice","female, male, other, unknown" "F","date_of_birth",08.03.1957,,,"DateTime","DD.MM.YYYY" , ,"Checks" , ,"Description","Type","Rule" "C","customer must be unique","IsUnique","branch_id, customer_id" "C","number of branches must be in range","DistinctCount","branch_id < %d" """ % len(_TooManyTestBranchIds) result = interface.InterfaceControlDocument() readable = StringIO.StringIO(spec) result.read(readable) return result
def testLastOptionalField(self): spec = ""","Interface with two fields with the second being optional" "D","Format","CSV" "D","Line delimiter","LF" "D","Item delimiter","," "D","Encoding","ISO-8859-1" , ,"Name","Example","Empty","Length","Type","Rule" "F","customer_id",,,,Integer,0: "F","first_name","John","X",,"Text" """ icd = interface.InterfaceControlDocument() icd.read(StringIO.StringIO(spec)) dataText = """123,John 234, """ dataReadable = StringIO.StringIO(dataText) icd.addValidationListener(_defaultIcdListener) try: icd.validate(dataReadable) self.assertEqual(icd.acceptedCount, 2) self.assertEqual(icd.rejectedCount, 0) finally: icd.removeValidationListener(_defaultIcdListener)
def do_POST(self): log = logging.getLogger("cutplace.web") log.info("%s %r" % (self.command, self.path)) # Parse POST option. Based on code by Pierre Quentel. ctype, pdict = cgi.parse_header(self.headers.getheader('content-type')) length = int(self.headers.getheader('content-length')) if ctype == 'multipart/form-data': fileMap = cgi.parse_multipart(self.rfile, pdict) elif ctype == 'application/x-www-form-urlencoded': qs = self.rfile.read(length) fileMap = cgi.parse_qs(qs, keep_blank_values=1) else: fileMap = {} # Unknown content-type if "icd" in fileMap: icdContent = fileMap["icd"][0] else: icdContent = None if "data" in fileMap: dataContent = fileMap["data"][0] else: dataContent = None if icdContent: try: icdData = StringIO.StringIO(icdContent) icd = interface.InterfaceControlDocument() icd.read(icdData) if dataContent: validationHtmlFile = tempfile.TemporaryFile( suffix=".html", prefix="cutplace-web-") try: log.debug(u"writing html to temporary file: %r", validationHtmlFile.name) validationHtmlFile.write(u"<table><tr>") # Write table headings. for title in icd.fieldNames: validationHtmlFile.write(u"<th>%s</th>" % cgi.escape(title)) validationHtmlFile.write(u"</tr>") # Start listening to validation events. htmlListener = _HtmlWritingValidationListener( validationHtmlFile, len(icd.fieldNames)) icd.addValidationListener(htmlListener) try: dataReadable = StringIO.StringIO(dataContent) icd.validate(dataReadable) icd.removeValidationListener(htmlListener) validationHtmlFile.write("</table>") self.send_response(200) self.send_header("Content-type", "text/html") self.end_headers() # Write the contents of the temporary HTML file to the web page. self.wfile.write( u"""<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> <html> <head> <title>Validation results</title> <style type="text/css">%s </style> </head><body> <h1>Validation results</h1> """ % (Handler._STYLE)) self.wfile.write(u"""<table> <tr><td>Rows accepted:</td><td>%d</td></tr> <tr><td>Rows rejected:</td><td>%d</td></tr> <tr><td>Checks at end failed:</td><td>%d</td></tr> </table> """ % (htmlListener.acceptedCount, htmlListener.rejectedCount, htmlListener.checkAtEndFailedCount)) validationHtmlFile.seek(0) htmlFileBuffer = validationHtmlFile.read( Handler._IO_BUFFER_SIZE) while htmlFileBuffer: self.wfile.write(htmlFileBuffer) htmlFileBuffer = validationHtmlFile.read( Handler._IO_BUFFER_SIZE) self.wfile.write(Handler._FOOTER) except: self.send_error( 400, u"cannot validate data: %s" % cgi.escape(str(sys.exc_info()[1]))) finally: validationHtmlFile.close() else: log.info("ICD is valid") self.send_response(200) self.send_header("Content-type", "text/html") self.end_headers() self.wfile.write(u"ICD file is valid.") except: log.error(u"cannot parse ICD", exc_info=1) self.send_error( 400, u"cannot parse ICD: %s" % cgi.escape(str(sys.exc_info()[1]))) else: errorMessage = "ICD file must be specified" log.error(errorMessage) self.send_error(400, u"%s." % cgi.escape(errorMessage))
def setUp(self): self._validCostumersCsvPath = dev_test.getTestInputPath("valid_customers.csv") self._brokenCostumersCsvPath = dev_test.getTestInputPath("broken_customers.csv") icdPath = dev_test.getTestIcdPath("customers.csv") self._icd = interface.InterfaceControlDocument() self._icd.read(icdPath)
def _testBroken(self, spec, expectedError): assert spec is not None assert expectedError is not None icd = interface.InterfaceControlDocument() self.assertRaises(expectedError, icd.read, StringIO.StringIO(spec), "iso-8859-1")
def createCid(readable, **keywords): import interface result = interface.InterfaceControlDocument() icdRows = createCidRows(readable, **keywords) result.readFromRows(icdRows) return result
def testBrokenAsciiIcd(self): spec = u",Broken ASCII interface with with non ASCII character\n,\u00fd" icd = interface.InterfaceControlDocument() readable = StringIO.StringIO(spec) self.assertRaises(tools.CutplaceUnicodeError, icd.read, readable)