def _icrSubFileToHtml(self, output, icrJson, subFile): logger.debug('subFile is %s', subFile) # TODO: Is 'icrJson' the correct name for this variable? logger.debug('icrJson is %s', icrJson) fieldList = SUBFILE_FIELDS[subFile] if subFile not in fieldList: fieldList.append(subFile) for icrEntry in icrJson: output.write ("<li>\n") for field in fieldList: if field in icrEntry: # we have this field value = icrEntry[field] logger.debug('current field is %s', field) if isSubFile(field) and field != subFile: # avoid recursive subfile for now logger.debug('field is a subfile %s', field) output.write ("<dl><dt>%s:</dt>\n" % field) output.write ("<dd>\n") output.write ("<ol>\n") self._icrSubFileToHtml(output, value, field) output.write ("</ol>\n") output.write ("</dd></dl>\n") continue value = self._convertIndividualFieldValue(field, icrEntry, value) output.write ("<dt>%s: %s</dt>\n" % (field, value)) output.write ("</li>\n")
def parse(self, inputFilename, outputFilename): with open(inputFilename, 'r') as ICRFile: for line in ICRFile: line = line.rstrip("\r\n") self._curLineNo += 1 """ get rid of lines that are ignored """ if self.isIgnoredLine(line): continue match = START_OF_RECORD.match(line) if match: self._startOfNewItem(match, line) continue match = GENERIC_START_OF_RECORD.search(line) if not match: match = DBA_COMMENTS.match(line) if match and match.group('name') in ICR_FILE_KEYWORDS: fieldName = match.group('name') if isSubFile(fieldName): self._curField = fieldName self._startOfSubFile(match, line) else: logger.debug('field name is: %s', fieldName) logger.debug('cur field is: %s', self._curField) """ Check to see if fieldName is already in the out list """ if isWordProcessingField(self._curField): if self._ignoreKeywordInWordProcessingFields( fieldName): self._appendWordsFieldLine(line) continue # figure out where to store the record self._curField = fieldName self._rewindStack() self._findKeyValueInLine(match, line, self._curRecord) elif self._curField and self._curField in self._curRecord: if len(line.strip()) == 0 and not isWordProcessingField( self._curField): logger.warn( 'Ignore blank line for current field: [%s]', self._curField) continue self._appendWordsFieldLine(line) else: if self._curRecord: if len(line.strip()) == 0: continue print 'No field associated with line %s: %s ' % ( self._curLineNo, line) logger.info('End of file now') if len(self._curStack) > 0: self._curField = None self._rewindStack() if self._curRecord: logger.info('Add last record: %s', self._curRecord) self._outObject.append(self._curRecord) # pprint.pprint(self._outObject); with open(outputFilename, 'w') as out_file: json.dump(self._outObject, out_file, indent=4)
def _icrSubFileToPDF(self, pdf, icrJson, subFile): fieldList = SUBFILE_FIELDS[subFile] if subFile not in fieldList: fieldList.append(subFile) for icrEntry in icrJson: for field in fieldList: if field in icrEntry: # we have this field value = icrEntry[field] if isSubFile(field) and field != subFile: # avoid recursive subfile for now self._icrSubFileToPDF(pdf, value, field) continue pdf.append(self._convertIndividualFieldValuePDF(field, value, True))
def _icrSubFileToPDF(pdf, icrJson, subFile): fieldList = SUBFILE_FIELDS[subFile] if subFile not in fieldList: fieldList.append(subFile) for icrEntry in icrJson: for field in fieldList: if field in icrEntry: # we have this field value = icrEntry[field] if isSubFile(field) and field != subFile: # avoid recursive subfile for now _icrSubFileToPDF(pdf, value, field) continue pdf.append(_convertIndividualFieldValuePDF(field, value, True))
def parse(self, inputFilename, outputFilename): with open(inputFilename, "r") as ICRFile: for line in ICRFile: line = line.rstrip("\r\n") self._curLineNo += 1 """ get rid of lines that are ignored """ if self.isIgnoredLine(line): continue match = START_OF_RECORD.match(line) if match: self._startOfNewItem(match, line) continue match = GENERIC_START_OF_RECORD.search(line) if not match: match = DBA_COMMENTS.match(line) if match and match.group("name") in ICR_FILE_KEYWORDS: fieldName = match.group("name") if isSubFile(fieldName): self._curField = fieldName self._startOfSubFile(match, line) else: logger.debug("field name is: %s", fieldName) logger.debug("cur field is: %s", self._curField) """ Check to see if fieldName is already in the out list """ if isWordProcessingField(self._curField): if self._ignoreKeywordInWordProcessingFields(fieldName): self._appendWordsFieldLine(line) continue # figure out where to store the record self._curField = fieldName self._rewindStack() self._findKeyValueInLine(match, line, self._curRecord) elif self._curField and self._curField in self._curRecord: if len(line.strip()) == 0 and not isWordProcessingField(self._curField): logger.warn("Ignore blank line for current field: [%s]", self._curField) continue self._appendWordsFieldLine(line) else: if self._curRecord: if len(line.strip()) == 0: continue print "No field associated with line %s: %s " % (self._curLineNo, line) logger.info("End of file now") if len(self._curStack) > 0: self._curField = None self._rewindStack() if self._curRecord: logger.info("Add last record: %s", self._curRecord) self._outObject.append(self._curRecord) # pprint.pprint(self._outObject); with open(outputFilename, "w") as out_file: json.dump(self._outObject, out_file, indent=4)
def _icrDataEntryToPDF(self, pdf, icrJson): # Write the ICR data as a document (list) instead of # a table. Otherwise, the rows can become taller than # a page and reportlab will fail to create the pdf. fieldList = ICR_FILE_KEYWORDS_LIST # As we do not have a real schema to define the field order, # we will have to guess the order here description = "" globalReferenceSections = [] for field in fieldList: if field in icrJson: # we have this field value = icrJson[field] if "GLOBAL REFERENCE" == field: self._writeGlobalReferenceToPDF(value, pdf) continue ############################################################### if "COMPONENT/ENTRY POINT" == field: self._writeComponentEntryPointToPDF(value, pdf) continue ############################################################### if "GENERAL DESCRIPTION" == field: description = [] description.append( Paragraph('GENERAL DESCRIPTION', styles['Heading3'])) if type(value) is list: for line in value: description.append( Paragraph(cgi.escape(line), styles['Normal'])) else: description.append( Paragraph(cgi.escape(value), styles['Normal'])) if description: pdf.append(KeepTogether(description)) continue ############################################################### if isSubFile(field): pdf.append(Paragraph(field, styles['Heading3'])) self._icrSubFileToPDF(pdf, value, field) continue ##################################################### value = self._convertIndividualFieldValuePDF(field, value) row = [] row.append(Paragraph(field, styles['Heading3'])) row.append(value) pdf.append(KeepTogether(row))
def _icrDataEntryToPDF(self, pdf, icrJson): # Write the ICR data as a document (list) instead of # a table. Otherwise, the rows can become taller than # a page and reportlab will fail to create the pdf. fieldList = ICR_FILE_KEYWORDS_LIST # As we do not have a real schema to define the field order, # we will have to guess the order here description = "" globalReferenceSections = [] for field in fieldList: if field in icrJson: # we have this field value = icrJson[field] if "GLOBAL REFERENCE" == field: self._writeGlobalReferenceToPDF(value, pdf) continue ############################################################### if "COMPONENT/ENTRY POINT" == field: self._writeComponentEntryPointToPDF(value, pdf) continue ############################################################### if "GENERAL DESCRIPTION" == field: description = [] description.append(Paragraph('GENERAL DESCRIPTION', styles['Heading3'])) if type(value) is list: for line in value: description.append(Paragraph(cgi.escape(line), styles['Normal'])) else: description.append(Paragraph(cgi.escape(value), styles['Normal'])) if description: pdf.append(KeepTogether(description)) continue ############################################################### if isSubFile(field): pdf.append(Paragraph(field, styles['Heading3'])) self._icrSubFileToPDF(pdf, value, field) continue ##################################################### value = self._convertIndividualFieldValuePDF(field, value) row = [] row.append(Paragraph(field, styles['Heading3'])) row.append(value) pdf.append(KeepTogether(row))
def _icrSubFileToHtml(output, icrJson, subFile, crossRef): fieldList = SUBFILE_FIELDS[subFile] if subFile not in fieldList: fieldList.append(subFile) for icrEntry in icrJson: output.write ("<li>\n") for field in fieldList: if field in icrEntry: # we have this field value = icrEntry[field] if isSubFile(field) and field != subFile: # avoid recursive subfile for now output.write ("<dl><dt>%s:</dt>\n" % field) output.write ("<dd>\n") output.write ("<ol>\n") _icrSubFileToHtml(output, value, field, crossRef) output.write ("</ol>\n") output.write ("</dd></dl>\n") continue value = _convertIndividualFieldValue(field, icrEntry, value, crossRef) output.write ("<dt>%s: %s</dt>\n" % (field, value)) output.write ("</li>\n")
def _icrSubFileToHtml(output, icrJson, subFile, crossRef): fieldList = SUBFILE_FIELDS[subFile] if subFile not in fieldList: fieldList.append(subFile) for icrEntry in icrJson: output.write ("<li>\n") for field in fieldList: if field in icrEntry: # we have this field value = icrEntry[field] if isSubFile(field) and field != subFile: # avoid recursive subfile for now if type(value) is list: _writeTableOfValue(output, field, value, crossRef) else: output.write ("<dl><dt>%s:</dt>\n" % field) output.write ("<dd>\n") _icrSubFileToHtml(output, value, field, crossRef) output.write ("</dd></dl>\n") continue value = _convertIndividualFieldValue(field, icrEntry, value, crossRef) output.write ("<dt>%s: %s</dt>\n" % (field, value)) output.write ("</li>\n")
def _icrDataEntryToHtml(self, output, icrJson): fieldList = ICR_FILE_KEYWORDS_LIST # As we do not have a real schema to define the field order, # we will have to guess the order here for field in fieldList: if field in icrJson: # we have this field value = icrJson[field] if isSubFile(field): output.write ("<tr>\n") output.write("<td>%s</td>\n" % field) output.write("<td>\n") output.write ("<ol>\n") self._icrSubFileToHtml(output, value, field) output.write ("</ol>\n") output.write("</td>\n") output.write ("</tr>\n") continue value = self._convertIndividualFieldValue(field, icrJson, value) output.write ("<tr>\n") output.write ("<td>%s</td>\n" % field) output.write ("<td>%s</td>\n" % value) output.write ("</tr>\n")
def _icrDataEntryToHtml(output, icrJson, crossRef): fieldList = ['NUMBER'] + ICR_FILE_KEYWORDS_LIST # As we do not have a real schema to define the field order, # we will have to guess the order here for field in fieldList: if field in icrJson: # we have this field value = icrJson[field] if isSubFile(field): output.write ("<tr>\n") output.write("<td>%s</td>\n" % field) output.write("<td>\n") if isinstance(value, list) and isinstance(value[0], dict): _writeTableOfValue(output, field, value, crossRef) else: _icrSubFileToHtml(output, value, field, crossRef) output.write("</td>\n") output.write ("</tr>\n") continue value = _convertIndividualFieldValue(field, icrJson, value, crossRef) output.write ("<tr>\n") output.write ("<td>%s</td>\n" % field) output.write ("<td>%s</td>\n" % value) output.write ("</tr>\n")
def _icrDataEntryToHtml(output, icrJson, crossRef): fieldList = ['NUMBER'] + ICR_FILE_KEYWORDS_LIST # As we do not have a real schema to define the field order, # we will have to guess the order here for field in fieldList: if field in icrJson: # we have this field value = icrJson[field] if isSubFile(field): output.write("<tr>\n") output.write("<td>%s</td>\n" % field) output.write("<td>\n") if isinstance(value, list) and isinstance(value[0], dict): _writeTableOfValue(output, field, value, crossRef) else: _icrSubFileToHtml(output, value, field, crossRef) output.write("</td>\n") output.write("</tr>\n") continue value = _convertIndividualFieldValue(field, icrJson, value, crossRef) output.write("<tr>\n") output.write("<td>%s</td>\n" % field) output.write("<td>%s</td>\n" % value) output.write("</tr>\n")
def parse(self, inputFilename, outputFilename): with open(inputFilename,'r') as ICRFile: curLineNo = 0 curNumber = None # Free text fields may contain field names and # need special parsing rules DBAComments = False generalDescription = False subscribingDetails = False componentDescription = False for line in ICRFile: line = line.rstrip("\r\n") curLineNo +=1 # get rid of lines that are ignored if self.isIgnoredLine(line): continue match = INTEGRATION_REFERENCES_LIST.match(line) if match: # Skip this line. Use getDate() to parse date continue match = START_OF_RECORD.match(line) if match: name = match.group('name') number = match.group('number') skipField = False isFreeTextField = DBAComments or generalDescription or \ subscribingDetails or componentDescription if isFreeTextField: # Check if the number is matches what # we're currently processing skipField = number == curNumber if not skipField: curNumber = number DBAComments = False generalDescription = False subscribingDetails = False componentDescription = False self._startOfNewItem(name, number, match, line) continue match = GENERIC_START_OF_RECORD.search(line) if not match: # DBA Comments doesn't match regex for other fields, # check separately. Even if we get a match here, can't # assume that we're in a DBA Comments field, might be in # a different free text field match = DBA_COMMENTS.match(line) if match and match.group('name') in ICR_FILE_KEYWORDS: fieldName = match.group('name') # First check if we are at the end of a free text field if DBAComments: if fieldName in ['DATE/TIME EDITED', 'NUMBER', 'DATE ACTIVATED']: DBAComments = False elif generalDescription: # Starts with exactly 2 spaces if line.startswith(" STATUS:") or fieldName == 'VIEWER': generalDescription = False elif subscribingDetails: # This assumes that 'Subscribing Details' may start # with a field name or may contain 'GLOBAL REFERENCE' # but won't contain any other field names in the middle if fieldName in ICR_FILE_KEYWORDS and \ fieldName != 'GLOBAL REFERENCE' and \ 'SUBSCRIBING DETAILS' in self._curRecord: subscribingDetails = False elif componentDescription: # At most one space before 'VARIABLES:' if line.startswith("VARIABLES:") or \ line.startswith(" VARIABLES:") or \ fieldName in ['COMPONENT/ENTRY POINT', 'SUBSCRIBING PACKAGE']: componentDescription = False # Are we at the beginning of a free text field? if DBAComments or generalDescription or \ subscribingDetails or componentDescription: # Free text fields are never nested pass elif fieldName == 'DBA Comments': DBAComments = True elif fieldName == 'GENERAL DESCRIPTION': generalDescription = True elif fieldName == 'SUBSCRIBING DETAILS': subscribingDetails = True elif fieldName == 'COMPONENT DESCRIPTION': componentDescription = True # Process line # Start with free text fields if DBAComments: fieldName = 'DBA Comments' if self._curField == fieldName: self._appendWordsFieldLine(line) else: self._curField = fieldName self._rewindStack() name = match.group('name') # this is the name part restOfLine = line[match.end():] self._curRecord[name] = restOfLine.strip() elif generalDescription: fieldName = 'GENERAL DESCRIPTION' if self._curField == fieldName: self._appendWordsFieldLine(line) else: # Starting to process general description self._curField = fieldName self._rewindStack(); self._findKeyValueInLine(match, line) elif subscribingDetails: fieldName = 'SUBSCRIBING DETAILS' if self._curField == fieldName: self._appendWordsFieldLine(line) else: self._curField = fieldName name = match.group('name') # this is the name part restOfLine = line[match.end():] self._curRecord[name] = restOfLine.strip() elif componentDescription: fieldName = 'COMPONENT DESCRIPTION' if self._curField == fieldName: self._appendWordsFieldLine(line) else: # Starting to process component description self._curField = fieldName self._rewindStack() self._findKeyValueInLine(match, line) elif isSubFile(fieldName): self._curField = fieldName self._startOfSubFile(match, line) else: # Check to see if fieldName is already in the out list if isWordProcessingField(self._curField): if self._ignoreKeywordInWordProcessingFields(fieldName): self._appendWordsFieldLine(line) continue # figure out where to store the record self._curField = fieldName self._rewindStack() self._findKeyValueInLine(match, line) elif self._curField and self._curField in self._curRecord: if not line.strip() and not isWordProcessingField(self._curField): # Ignore blank line continue self._appendWordsFieldLine(line) else: if self._curRecord: if not line.strip(): continue logger.error('No field associated with line %s: %s ' % (curLineNo, line)) # TODO: Copy + paste from '_startOfNewItem()' self._curField = None self._rewindStack() if self._curRecord: self._outObject.append(self._curRecord) outputDir = os.path.dirname(outputFilename) if not os.path.exists(outputDir): # Will also create intermediate directories if needed os.makedirs(outputDir) with open(outputFilename, 'w') as out_file: json.dump(self._outObject,out_file, indent=4)
def parse(self, inputFilename, outputFilename): global date with open(inputFilename,'r') as ICRFile: for line in ICRFile: line = line.rstrip("\r\n") self._curLineNo +=1 # get rid of lines that are ignored if self.isIgnoredLine(line): continue match = INTEGRATION_REFERENCES_LIST.match(line) if match: date = match.group(1).strip() continue match = START_OF_RECORD.match(line) if match and not self._DBAComments and not self._generalDescription: self._startOfNewItem(match, line) continue match = GENERIC_START_OF_RECORD.search(line) if not match: match = DBA_COMMENTS.match(line) if match: self._DBAComments = True if match and match.group('name') in ICR_FILE_KEYWORDS: fieldName = match.group('name') if fieldName == 'DBA Comments': self._DBAComments = True elif fieldName == 'GENERAL DESCRIPTION': self._generalDescription = True if self._DBAComments: if fieldName in ICR_FILE_KEYWORDS: self._DBAComments = False elif self._generalDescription: if line.startswith(" STATUS:"): # Starts with exactly 2 spaces self._generalDescription = False if self._DBAComments: fieldName = 'DBA Comments' if self._curField == fieldName: self._appendWordsFieldLine(line) else: self._curField = fieldName name = match.group('name') # this is the name part restOfLine = line[match.end():] self._curRecord[name] = restOfLine.strip() elif self._generalDescription: fieldName = 'GENERAL DESCRIPTION' if self._curField == fieldName: self._appendWordsFieldLine(line) else: # Starting to process general description self._curField = fieldName self._rewindStack(); self._findKeyValueInLine(match, line, self._curRecord) elif isSubFile(fieldName): self._curField = fieldName self._startOfSubFile(match, line) else: logger.debug('field name is: %s', fieldName) logger.debug('cur field is: %s', self._curField) """ Check to see if fieldName is already in the out list """ if isWordProcessingField(self._curField): if self._ignoreKeywordInWordProcessingFields(fieldName): self._appendWordsFieldLine(line) continue # figure out where to store the record self._curField = fieldName self._rewindStack(); self._findKeyValueInLine(match, line, self._curRecord) elif self._curField and self._curField in self._curRecord: if len(line.strip()) == 0 and not isWordProcessingField(self._curField): logger.warn('Ignore blank line for current field: [%s]', self._curField) continue self._appendWordsFieldLine(line) else: if self._curRecord: if len(line.strip()) == 0: continue print 'No field associated with line %s: %s ' % (self._curLineNo, line) logger.info('End of file now') if len(self._curStack) > 0: self._curField = None self._rewindStack() if self._curRecord: logger.info('Add last record: %s', self._curRecord) self._outObject.append(self._curRecord) # pprint.pprint(self._outObject); with open(outputFilename, 'w') as out_file: json.dump(self._outObject,out_file, indent=4)
def parse(self, inputFilename, outputFilename): with open(inputFilename, 'r') as ICRFile: curLineNo = 0 curNumber = None # Free text fields may contain field names and # need special parsing rules DBAComments = False generalDescription = False subscribingDetails = False componentDescription = False for line in ICRFile: line = line.rstrip("\r\n") curLineNo += 1 # get rid of lines that are ignored if self.isIgnoredLine(line): continue match = INTEGRATION_REFERENCES_LIST.match(line) if match: # Skip this line. Use getDate() to parse date continue match = START_OF_RECORD.match(line) if match: name = match.group('name') number = match.group('number') skipField = False isFreeTextField = DBAComments or generalDescription or \ subscribingDetails or componentDescription if isFreeTextField: # Check if the number is matches what # we're currently processing skipField = number == curNumber if not skipField: curNumber = number DBAComments = False generalDescription = False subscribingDetails = False componentDescription = False self._startOfNewItem(name, number, match, line) continue match = GENERIC_START_OF_RECORD.search(line) if not match: # DBA Comments doesn't match regex for other fields, # check separately. Even if we get a match here, can't # assume that we're in a DBA Comments field, might be in # a different free text field match = DBA_COMMENTS.match(line) if match and match.group('name') in ICR_FILE_KEYWORDS: fieldName = match.group('name') # First check if we are at the end of a free text field if DBAComments: if fieldName in [ 'DATE/TIME EDITED', 'NUMBER', 'DATE ACTIVATED' ]: DBAComments = False elif generalDescription: # Starts with exactly 2 spaces if line.startswith( " STATUS:") or fieldName == 'VIEWER': generalDescription = False elif subscribingDetails: # This assumes that 'Subscribing Details' may start # with a field name or may contain 'GLOBAL REFERENCE' # but won't contain any other field names in the middle if fieldName in ICR_FILE_KEYWORDS and \ fieldName != 'GLOBAL REFERENCE' and \ 'SUBSCRIBING DETAILS' in self._curRecord: subscribingDetails = False elif componentDescription: # At most one space before 'VARIABLES:' if line.startswith("VARIABLES:") or \ line.startswith(" VARIABLES:") or \ fieldName in ['COMPONENT/ENTRY POINT', 'SUBSCRIBING PACKAGE']: componentDescription = False # Are we at the beginning of a free text field? if DBAComments or generalDescription or \ subscribingDetails or componentDescription: # Free text fields are never nested pass elif fieldName == 'DBA Comments': DBAComments = True elif fieldName == 'GENERAL DESCRIPTION': generalDescription = True elif fieldName == 'SUBSCRIBING DETAILS': subscribingDetails = True elif fieldName == 'COMPONENT DESCRIPTION': componentDescription = True # Process line # Start with free text fields if DBAComments: fieldName = 'DBA Comments' if self._curField == fieldName: self._appendWordsFieldLine(line) else: self._curField = fieldName self._rewindStack() name = match.group('name') # this is the name part restOfLine = line[match.end():] self._curRecord[name] = restOfLine.strip() elif generalDescription: fieldName = 'GENERAL DESCRIPTION' if self._curField == fieldName: self._appendWordsFieldLine(line) else: # Starting to process general description self._curField = fieldName self._rewindStack() self._findKeyValueInLine(match, line) elif subscribingDetails: fieldName = 'SUBSCRIBING DETAILS' if self._curField == fieldName: self._appendWordsFieldLine(line) else: self._curField = fieldName name = match.group('name') # this is the name part restOfLine = line[match.end():] self._curRecord[name] = restOfLine.strip() elif componentDescription: fieldName = 'COMPONENT DESCRIPTION' if self._curField == fieldName: self._appendWordsFieldLine(line) else: # Starting to process component description self._curField = fieldName self._rewindStack() self._findKeyValueInLine(match, line) elif isSubFile(fieldName): self._curField = fieldName self._startOfSubFile(match, line) else: # Check to see if fieldName is already in the out list if isWordProcessingField(self._curField): if self._ignoreKeywordInWordProcessingFields( fieldName): self._appendWordsFieldLine(line) continue # figure out where to store the record self._curField = fieldName self._rewindStack() self._findKeyValueInLine(match, line) elif self._curField and self._curField in self._curRecord: if not line.strip() and not isWordProcessingField( self._curField): # Ignore blank line continue self._appendWordsFieldLine(line) else: if self._curRecord: if not line.strip(): continue logger.error('No field associated with line %s: %s ' % (curLineNo, line)) # TODO: Copy + paste from '_startOfNewItem()' self._curField = None self._rewindStack() if self._curRecord: self._outObject.append(self._curRecord) outputDir = os.path.dirname(outputFilename) if not os.path.exists(outputDir): # Will also create intermediate directories if needed os.makedirs(outputDir) with open(outputFilename, 'w') as out_file: json.dump(self._outObject, out_file, indent=4)
def parse(self, inputFilename, outputFilename): with open(inputFilename, 'r') as ICRFile: curLineNo = 0 DBAComments = False generalDescription = False for line in ICRFile: line = line.rstrip("\r\n") curLineNo += 1 # get rid of lines that are ignored if self.isIgnoredLine(line): continue match = INTEGRATION_REFERENCES_LIST.match(line) if match: # Skip this line. Use getDate() to parse date continue match = START_OF_RECORD.match(line) if match and not DBAComments and not generalDescription: self._startOfNewItem(match, line) continue match = GENERIC_START_OF_RECORD.search(line) if not match: match = DBA_COMMENTS.match(line) if match: DBAComments = True if match and match.group('name') in ICR_FILE_KEYWORDS: fieldName = match.group('name') if fieldName == 'DBA Comments': DBAComments = True elif fieldName == 'GENERAL DESCRIPTION': generalDescription = True if DBAComments: if fieldName in ICR_FILE_KEYWORDS: DBAComments = False elif generalDescription: if line.startswith( " STATUS:"): # Starts with exactly 2 spaces generalDescription = False if DBAComments: fieldName = 'DBA Comments' if self._curField == fieldName: self._appendWordsFieldLine(line) else: self._curField = fieldName name = match.group('name') # this is the name part restOfLine = line[match.end():] self._curRecord[name] = restOfLine.strip() elif generalDescription: fieldName = 'GENERAL DESCRIPTION' if self._curField == fieldName: self._appendWordsFieldLine(line) else: # Starting to process general description self._curField = fieldName self._rewindStack() self._findKeyValueInLine(match, line, self._curRecord) elif isSubFile(fieldName): self._curField = fieldName self._startOfSubFile(match, line) else: """ Check to see if fieldName is already in the out list """ if isWordProcessingField(self._curField): if self._ignoreKeywordInWordProcessingFields( fieldName): self._appendWordsFieldLine(line) continue # figure out where to store the record self._curField = fieldName self._rewindStack() self._findKeyValueInLine(match, line, self._curRecord) elif self._curField and self._curField in self._curRecord: if len(line.strip()) == 0 and not isWordProcessingField( self._curField): # Ignore blank line continue self._appendWordsFieldLine(line) else: if self._curRecord: if len(line.strip()) == 0: continue logger.debug('No field associated with line %s: %s ' % (curLineNo, line)) if len(self._curStack) > 0: self._curField = None self._rewindStack() if self._curRecord: self._outObject.append(self._curRecord) outputDir = os.path.dirname(outputFilename) if not os.path.exists(outputDir): # Will also create intermediate directories if needed os.makedirs(outputDir) with open(outputFilename, 'w') as out_file: json.dump(self._outObject, out_file, indent=4)
def parse(self, inputFilename, outputFilename): global date with open(inputFilename,'r') as ICRFile: for line in ICRFile: line = line.rstrip("\r\n") self._curLineNo +=1 """ get rid of lines that are ignored """ if self.isIgnoredLine(line): continue match = INTEGRATION_REFERENCES_LIST.match(line) if match: date = match.group(1).strip() continue match = START_OF_RECORD.match(line) if match and not self._DBAComments and not self._generalDescription: self._startOfNewItem(match, line) continue match = GENERIC_START_OF_RECORD.search(line) if not match: match = DBA_COMMENTS.match(line) if match: self._DBAComments = True if match and match.group('name') in ICR_FILE_KEYWORDS: fieldName = match.group('name') if fieldName == 'DBA Comments': self._DBAComments = True elif fieldName == 'GENERAL DESCRIPTION': self._generalDescription = True if self._DBAComments: if fieldName in ICR_FILE_KEYWORDS: self._DBAComments = False elif self._generalDescription: if line.startswith(" STATUS:"): # Starts with exactly 2 spaces self._generalDescription = False if self._DBAComments: fieldName = 'DBA Comments' if self._curField == fieldName: self._appendWordsFieldLine(line) else: self._curField = fieldName name = match.group('name') # this is the name part restOfLine = line[match.end():] self._curRecord[name] = restOfLine.strip() elif self._generalDescription: fieldName = 'GENERAL DESCRIPTION' if self._curField == fieldName: self._appendWordsFieldLine(line) else: self._curField = fieldName name = match.group('name') # this is the name part restOfLine = line[match.end():] self._curRecord[name] = restOfLine.strip() elif isSubFile(fieldName): self._curField = fieldName self._startOfSubFile(match, line) else: logger.debug('field name is: %s', fieldName) logger.debug('cur field is: %s', self._curField) """ Check to see if fieldName is already in the out list """ if isWordProcessingField(self._curField): if self._ignoreKeywordInWordProcessingFields(fieldName): self._appendWordsFieldLine(line) continue # figure out where to store the record self._curField = fieldName self._rewindStack(); self._findKeyValueInLine(match, line, self._curRecord) elif self._curField and self._curField in self._curRecord: if len(line.strip()) == 0 and not isWordProcessingField(self._curField): logger.warn('Ignore blank line for current field: [%s]', self._curField) continue self._appendWordsFieldLine(line) else: if self._curRecord: if len(line.strip()) == 0: continue print 'No field associated with line %s: %s ' % (self._curLineNo, line) logger.info('End of file now') if len(self._curStack) > 0: self._curField = None self._rewindStack() if self._curRecord: logger.info('Add last record: %s', self._curRecord) self._outObject.append(self._curRecord) # pprint.pprint(self._outObject); with open(outputFilename, 'w') as out_file: json.dump(self._outObject,out_file, indent=4)