def testCanCreateInterfaceControlDocument(self): def assertFieldTypeEquals(cidRows, fieldName, typeName): fieldRowIndex = None rowToExamineIndex = 0 while (rowToExamineIndex < len(cidRows)) and (fieldRowIndex is None): cidRowToExamine = cidRows[rowToExamineIndex] if (len(cidRowToExamine) >= 6) and (cidRowToExamine[0] == u"f") and (cidRowToExamine[1] == fieldName): fieldRowIndex = rowToExamineIndex else: rowToExamineIndex += 1 assert fieldRowIndex is not None, "field must be found in cid rows: %r <-- %s" % ( fieldName, cidRows) typeNameFromCidRow = cidRowToExamine[5] self.assertEqual(typeName, typeNameFromCidRow) testFileNames = [ "valid_customers.csv", "valid_customers.ods", "valid_customers.xls" ] for testFileName in testFileNames: testFilePath = dev_test.getTestInputPath(testFileName) testFile = open(testFilePath, "rb") try: cidRows = sniff.createCidRows(testFile) assertFieldTypeEquals(cidRows, u"column_a", u"Integer") # branch assertFieldTypeEquals(cidRows, u"column_c", u"Text") # first name assertFieldTypeEquals(cidRows, u"column_f", u"Text") # date of birth finally: testFile.close()
def testCanCreateInterfaceControlDocument(self): def assertFieldTypeEquals(cidRows, fieldName, typeName): fieldRowIndex = None rowToExamineIndex = 0 while (rowToExamineIndex < len(cidRows)) and (fieldRowIndex is None): cidRowToExamine = cidRows[rowToExamineIndex] if (len(cidRowToExamine) >= 6) and (cidRowToExamine[0] == u"f") and (cidRowToExamine[1] == fieldName): fieldRowIndex = rowToExamineIndex else: rowToExamineIndex += 1 assert fieldRowIndex is not None, "field must be found in cid rows: %r <-- %s" % (fieldName, cidRows) typeNameFromCidRow = cidRowToExamine[5] self.assertEqual(typeName, typeNameFromCidRow) testFileNames = ["valid_customers.csv", "valid_customers.ods", "valid_customers.xls"] for testFileName in testFileNames: testFilePath = dev_test.getTestInputPath(testFileName) testFile = open(testFilePath, "rb") try: cidRows = sniff.createCidRows(testFile) assertFieldTypeEquals(cidRows, u"column_a", u"Integer") # branch assertFieldTypeEquals(cidRows, u"column_c", u"Text") # first name assertFieldTypeEquals(cidRows, u"column_f", u"Text") # date of birth finally: testFile.close()
def main(argv=None): """ Main routine that might raise errors but won't ``sys.exit()`` unless ``argv`` is broken. Before calling this, module ``logging`` has to be set up properly. For example, by calling ``logging.basicConfig()``. """ if argv is None: argv = sys.argv assert argv programName = os.path.basename(argv[0]) usage = u"""usage: %s [options] ICDFILE DATAFILE Write interface control document to ICDFILE describing the data found in DATAFILE. The resulting ICD is stored in CSV format.""" % programName epilog = u""" Example: %s --data-format=delimited --data-encoding iso-8859-15 icd_customers.csv some_customers.csv Analyze data file some_customers.csv assuming ISO-8859-15 as character encoding and store the resulting ICD in icd_customers.csv """ % programName parser = _tools.OptionParserWithPreformattedEpilog(usage=usage, epilog=epilog, version="%prog " + version.VERSION_NUMBER) parser.add_option("-d", "--icd-delimiter", default=',', metavar="DELIMITER", type="choice", choices=(",", ";"), dest="icdDelimiter", help="delimiter to separate rows in ICDFILE (default: %default)") parser.add_option("-e", "--data-encoding", default="ascii", metavar="ENCODING", dest="dataEncoding", help="character encoding to use when reading the data (default: %default)") parser.add_option("-f", "--data-format", default=sniff.FORMAT_AUTO, metavar="FORMAT", type="choice", choices=(sniff.FORMAT_AUTO, data.FORMAT_CSV, data.FORMAT_DELIMITED, data.FORMAT_EXCEL, data.FORMAT_ODS), dest="dataFormat", help="data format to assume for DATAFILE (default: %default)") parser.add_option("-a", "--stop-after", default=0, metavar="NUMBER", type="long", dest="stopAfter", help="number of data rows after which to stop analyzing; 0=analyze all data (default: %default)") parser.add_option("-H", "--head", default=0, metavar="NUMBER", type="long", help="number of header rows to skip before to start analyzing (default: %default)") parser.add_option("-n", "--names", metavar="FIELDNAMES", dest="fieldNameList", help="comma separated list of field names (default: use row specified by --head or generate names)") parser.add_option("--log", default=logging.getLevelName(logging.INFO).lower(), metavar="LEVEL", type="choice", choices=_tools.LogLevelNameToLevelMap.keys(), dest="logLevel", help="set log level to LEVEL (default: %default)") (options, others) = parser.parse_args(argv[1:]) logging.getLogger("cutplace").setLevel(_tools.LogLevelNameToLevelMap[options.logLevel]) othersCount = len(others) if othersCount == 0: parser.error(u"ICDFILE and DATAFILE must be specified") elif othersCount == 1: parser.error(u"DATAFILE must be specified") elif othersCount > 2: parser.error(u"only ICDFILE and DATAFILE must be specified but also found: %s" % others[2:]) if options.fieldNameList: fieldNames = [fieldName.strip() for fieldName in options.fieldNameList.split(",")] else: fieldNames = None icdPath = others[0] dataPath = others[1] exitCode = 1 try: with open(icdPath, "wb") as icdFile: icdCsvWriter = _tools.UnicodeCsvWriter( icdFile, delimiter=options.icdDelimiter, encoding="utf-8" ) with open(dataPath, "rb") as dataFile: for icdRowToWrite in sniff.createCidRows(dataFile, dataFormat=options.dataFormat, encoding=options.dataEncoding, header=options.head, fieldNames=fieldNames, stopAfter=options.stopAfter): icdCsvWriter.writerow(icdRowToWrite) exitCode = 0 except EnvironmentError, error: exitCode = 3 _log.error(u"%s", error)
def main(argv=None): """ Main routine that might raise errors but won't ``sys.exit()`` unless ``argv`` is broken. Before calling this, module ``logging`` has to be set up properly. For example, by calling ``logging.basicConfig()``. """ if argv is None: argv = sys.argv assert argv programName = os.path.basename(argv[0]) usage = u"""usage: %s [options] ICDFILE DATAFILE Write interface control document to ICDFILE describing the data found in DATAFILE. The resulting ICD is stored in CSV format.""" % programName epilog = u""" Example: %s --data-format=delimited --data-encoding iso-8859-15 icd_customers.csv some_customers.csv Analyze data file some_customers.csv assuming ISO-8859-15 as character encoding and store the resulting ICD in icd_customers.csv """ % programName parser = _tools.OptionParserWithPreformattedEpilog(usage=usage, epilog=epilog, version="%prog " + version.VERSION_NUMBER) parser.add_option( "-d", "--icd-delimiter", default=',', metavar="DELIMITER", type="choice", choices=(",", ";"), dest="icdDelimiter", help="delimiter to separate rows in ICDFILE (default: %default)") parser.add_option( "-e", "--data-encoding", default="ascii", metavar="ENCODING", dest="dataEncoding", help= "character encoding to use when reading the data (default: %default)") parser.add_option( "-f", "--data-format", default=sniff.FORMAT_AUTO, metavar="FORMAT", type="choice", choices=(sniff.FORMAT_AUTO, data.FORMAT_CSV, data.FORMAT_DELIMITED, data.FORMAT_EXCEL, data.FORMAT_ODS), dest="dataFormat", help="data format to assume for DATAFILE (default: %default)") parser.add_option( "-a", "--stop-after", default=0, metavar="NUMBER", type="long", dest="stopAfter", help= "number of data rows after which to stop analyzing; 0=analyze all data (default: %default)" ) parser.add_option( "-H", "--head", default=0, metavar="NUMBER", type="long", help= "number of header rows to skip before to start analyzing (default: %default)" ) parser.add_option( "-n", "--names", metavar="FIELDNAMES", dest="fieldNameList", help= "comma separated list of field names (default: use row specified by --head or generate names)" ) parser.add_option("--log", default=logging.getLevelName(logging.INFO).lower(), metavar="LEVEL", type="choice", choices=_tools.LogLevelNameToLevelMap.keys(), dest="logLevel", help="set log level to LEVEL (default: %default)") (options, others) = parser.parse_args(argv[1:]) logging.getLogger("cutplace").setLevel( _tools.LogLevelNameToLevelMap[options.logLevel]) othersCount = len(others) if othersCount == 0: parser.error(u"ICDFILE and DATAFILE must be specified") elif othersCount == 1: parser.error(u"DATAFILE must be specified") elif othersCount > 2: parser.error( u"only ICDFILE and DATAFILE must be specified but also found: %s" % others[2:]) if options.fieldNameList: fieldNames = [ fieldName.strip() for fieldName in options.fieldNameList.split(",") ] else: fieldNames = None icdPath = others[0] dataPath = others[1] exitCode = 1 try: with open(icdPath, "wb") as icdFile: icdCsvWriter = _tools.UnicodeCsvWriter( icdFile, delimiter=options.icdDelimiter, encoding="utf-8") with open(dataPath, "rb") as dataFile: for icdRowToWrite in sniff.createCidRows( dataFile, dataFormat=options.dataFormat, encoding=options.dataEncoding, header=options.head, fieldNames=fieldNames, stopAfter=options.stopAfter): icdCsvWriter.writerow(icdRowToWrite) exitCode = 0 except EnvironmentError, error: exitCode = 3 _log.error(u"%s", error)