def __init__(self, MRepositDir, crossRef): self.patchDir = None self.MRepositDir = MRepositDir self._dataRoot = None self._crossRef = crossRef self._curFileNo = None self._glbData = {} # fileNo => FileManData self._pointerRef = {} self._fileKeyIndex = {} # File: => ien => Value self._glbLocMap = initGlobalLocationMap # File: => Global Location self._rtnRefDict = {} # dict of rtn => fileNo => Details self.allFiles = self._getAllFileManZWRFiles() # Dict of fileNum => Global file self.schemaParser = FileManSchemaParser() self._allSchemaDict = self.schemaParser.parseSchemaDDFileV2(self.allFiles['0']['path'])
def testGlobalParser(crosRef=None): parser = createArgParser() result = parser.parse_args() print result from InitCrossReferenceGenerator import parseCrossRefGeneratorWithArgs from FileManDataToHtml import FileManDataToHtml outputFile = open(os.path.join(result.outdir, "filesInfo.json"), 'wb') __generateGitRepositoryKey__(result.gitPath, result.MRepositDir, outputFile) crossRef = parseCrossRefGeneratorWithArgs(result) glbDataParser = FileManGlobalDataParser(crossRef) #glbDataParser.parseAllZWRGlobaFilesBySchema(result.MRepositDir, allSchemaDict) allFiles = glbDataParser.getAllFileManZWRFiles(os.path.join(result.MRepositDir, 'Packages'), "*/Globals/*.zwr") assert '0' in allFiles and '1' in allFiles and set(result.fileNos).issubset(allFiles) schemaParser = FileManSchemaParser() allSchemaDict = schemaParser.parseSchemaDDFileV2(allFiles['0']['path']) isolatedFiles = schemaParser.isolatedFiles glbDataParser.parseZWRGlobalFileBySchemaV2(allFiles['1']['path'], allSchemaDict, '1', '^DIC(') glbDataParser._allFiles = allFiles glbDataParser._allSchemaDict = allSchemaDict for fileNo in result.fileNos: assert fileNo in glbDataParser.globalLocationMap if result.outdir: glbDataParser.outDir = result.outdir if result.patchRepositDir: glbDataParser.patchDir = result.patchRepositDir htmlGen = FileManDataToHtml(crossRef, result.outdir) if not result.all or set(result.fileNos).issubset(isolatedFiles): for fileNo in result.fileNos: gdFile = allFiles[fileNo]['path'] logging.info("Parsing file: %s at %s" % (fileNo, gdFile)) glbDataParser.parseZWRGlobalFileBySchemaV2(gdFile, allSchemaDict, fileNo) if result.outdir: htmlGen.outputFileManDataAsHtml(glbDataParser) else: fileManDataMap = glbDataParser.outFileManData for file in getKeys(fileManDataMap.iterkeys(), float): printFileManFileData(fileManDataMap[file]) del glbDataParser.outFileManData[fileNo] glbDataParser.outRtnReferenceDict() return """ Also generate all required files as well """ sccSet = schemaParser.sccSet fileSet = set(result.fileNos) for idx, value in enumerate(sccSet): fileSet.difference_update(value) if not fileSet: break for i in xrange(0,idx+1): fileSet = sccSet[i] fileSet &= set(allFiles.keys()) fileSet -= isolatedFiles fileSet.discard('757') if len(fileSet) > 1: for file in fileSet: zwrFile = allFiles[file]['path'] globalSub = allFiles[file]['name'] logging.info("Generate file key index for: %s at %s" % (file, zwrFile)) glbDataParser.generateFileIndex(zwrFile, allSchemaDict, file) for file in fileSet: zwrFile = allFiles[file]['path'] globalSub = allFiles[file]['name'] logging.info("Parsing file: %s at %s" % (file, zwrFile)) glbDataParser.parseZWRGlobalFileBySchemaV2(zwrFile, allSchemaDict, file) if result.outdir: htmlGen.outputFileManDataAsHtml(glbDataParser) del glbDataParser.outFileManData[file]
def getAllSchema(allFiles): schemaParser = FileManSchemaParser() allSchemaDict = schemaParser.parseSchemaDDFileV2(allFiles['0']['path']) return allSchemaDict;
def testGlobalParser(crosRef=None): parser = createArgParser() result = parser.parse_args() print result from InitCrossReferenceGenerator import parseCrossRefGeneratorWithArgs from FileManDataToHtml import FileManDataToHtml crossRef = parseCrossRefGeneratorWithArgs(result) glbDataParser = FileManGlobalDataParser(crossRef) #glbDataParser.parseAllZWRGlobaFilesBySchema(result.MRepositDir, allSchemaDict) allFiles = glbDataParser.getAllFileManZWRFiles( os.path.join(result.MRepositDir, 'Packages'), "*/Globals/*.zwr") assert '0' in allFiles and '1' in allFiles and set( result.fileNos).issubset(allFiles) schemaParser = FileManSchemaParser() allSchemaDict = schemaParser.parseSchemaDDFileV2(allFiles['0']['path']) isolatedFiles = schemaParser.isolatedFiles glbDataParser.parseZWRGlobalFileBySchemaV2(allFiles['1']['path'], allSchemaDict, '1', '^DIC(') glbDataParser._allFiles = allFiles glbDataParser._allSchemaDict = allSchemaDict for fileNo in result.fileNos: assert fileNo in glbDataParser.globalLocationMap if result.outdir: glbDataParser.outDir = result.outdir if result.patchRepositDir: glbDataParser.patchDir = result.patchRepositDir htmlGen = FileManDataToHtml(crossRef, result.outdir) if not result.all or set(result.fileNos).issubset(isolatedFiles): for fileNo in result.fileNos: gdFile = allFiles[fileNo]['path'] logging.info("Parsing file: %s at %s" % (fileNo, gdFile)) glbDataParser.parseZWRGlobalFileBySchemaV2(gdFile, allSchemaDict, fileNo) if result.outdir: htmlGen.outputFileManDataAsHtml(glbDataParser) else: fileManDataMap = glbDataParser.outFileManData for file in getKeys(fileManDataMap.iterkeys(), float): printFileManFileData(fileManDataMap[file]) del glbDataParser.outFileManData[fileNo] glbDataParser.outRtnReferenceDict() return """ Also generate all required files as well """ sccSet = schemaParser.sccSet fileSet = set(result.fileNos) for idx, value in enumerate(sccSet): fileSet.difference_update(value) if not fileSet: break for i in xrange(0, idx + 1): fileSet = sccSet[i] fileSet &= set(allFiles.keys()) fileSet -= isolatedFiles fileSet.discard('757') if len(fileSet) > 1: for file in fileSet: zwrFile = allFiles[file]['path'] globalSub = allFiles[file]['name'] logging.info("Generate file key index for: %s at %s" % (file, zwrFile)) glbDataParser.generateFileIndex(zwrFile, allSchemaDict, file) for file in fileSet: zwrFile = allFiles[file]['path'] globalSub = allFiles[file]['name'] logging.info("Parsing file: %s at %s" % (file, zwrFile)) glbDataParser.parseZWRGlobalFileBySchemaV2(zwrFile, allSchemaDict, file) if result.outdir: htmlGen.outputFileManDataAsHtml(glbDataParser) del glbDataParser.outFileManData[file]
class FileManGlobalDataParser(object): def __init__(self, MRepositDir, crossRef): self.patchDir = None self.MRepositDir = MRepositDir self._dataRoot = None self._crossRef = crossRef self._curFileNo = None self._glbData = {} # fileNo => FileManData self._pointerRef = {} self._fileKeyIndex = {} # File: => ien => Value self._glbLocMap = initGlobalLocationMap # File: => Global Location self._rtnRefDict = {} # dict of rtn => fileNo => Details self.allFiles = self._getAllFileManZWRFiles( ) # Dict of fileNum => Global file self.schemaParser = FileManSchemaParser() self._allSchemaDict = self.schemaParser.parseSchemaDDFileV2( self.allFiles['0']['path']) @property def outFileManData(self): return self._glbData @property def globalLocationMap(self): return self._glbLocMap def getFileNoByGlobalLocation(self, glbLoc): """ get the file no by global location return fileNo if found, otherwise return None """ outLoc = normalizeGlobalLocation(glbLoc) for key, value in self._glbLocMap.iteritems(): if value == outLoc: return key return None def getFileManFileNameByFileNo(self, fileNo): fileManFile = self._crossRef.getGlobalByFileNo(fileNo) if fileManFile: return fileManFile.getFileManName() return "" def _createDataRootByZWRFile(self, inputFileName): self._dataRoot = createGlobalNodeByZWRFile(inputFileName) def _getAllFileManZWRFiles(self): dirName = os.path.join(self.MRepositDir, 'Packages') pattern = "*/Globals/*.zwr" searchFiles = glob.glob(os.path.join(dirName, pattern)) outFiles = {} for file in searchFiles: fileName = os.path.basename(file) if fileName == 'DD.zwr': outFiles['0'] = { 'name': 'Schema File', 'path': os.path.normpath(os.path.abspath(file)) } continue result = re.search( "(?P<fileNo>^[0-9.]+)(-[1-9])?\+(?P<des>.*)\.zwr$", fileName) if result: if result.groups()[1]: logging.info("Ignore file %s" % fileName) continue fileNo = result.group('fileNo') if fileNo.startswith('0'): fileNo = fileNo[1:] globalDes = result.group('des') outFiles[fileNo] = { 'name': globalDes, 'path': os.path.normpath(os.path.abspath(file)) } return outFiles def generateFileIndex(self, inputFileName, fileNumber): schemaFile = self._allSchemaDict[fileNumber] if not schemaFile.hasField('.01'): logging.error("File does not have a .01 field, ignore") return keyField = schemaFile.getFileManFieldByFieldNo('.01') keyLoc = keyField.getLocation() if not keyLoc: logging.error(".01 field does not have a location") return self._curFileNo = fileNumber if fileNumber in self._glbLocMap: glbLoc = self._glbLocMap[fileNumber] for dataRoot in readGlobalNodeFromZWRFileV2(inputFileName, glbLoc): if not dataRoot: continue self._dataRoot = dataRoot fileDataRoot = dataRoot (ien, detail) = self._getKeyNameBySchema(fileDataRoot, keyLoc, keyField) if detail: self._addFileKeyIndex(fileNumber, ien, detail) elif ien: logging.info("No name associated with ien: %s, file: %s" % (ien, fileNumber)) else: logging.info("No index for data with ien: %s, file: %s" % (ien, fileNumber)) """ Generate a map Field Value => IEN """ def generateFileFieldMap(self, inputFileName, fileNumber, fieldNo): schemaFile = self._allSchemaDict[fileNumber] if not schemaFile.hasField(fieldNo): logging.error("File does not have a [%s] field, ignore", fieldNo) return keyField = schemaFile.getFileManFieldByFieldNo(fieldNo) keyLoc = keyField.getLocation() if not keyLoc: logging.error("[%s] field does not have a location", fieldNo) return glbLoc = self._glbLocMap[fileNumber] fieldMap = {} for dataRoot in readGlobalNodeFromZWRFileV2(inputFileName, glbLoc): if not dataRoot: continue fileDataRoot = dataRoot (ien, detail) = self._getKeyNameBySchema(fileDataRoot, keyLoc, keyField) if detail: fieldMap[detail] = ien elif ien: logging.info("No name associated with ien: %s, file: %s" % (ien, fileNumber)) else: logging.info("No index for data with ien: %s, file: %s" % (ien, fileNumber)) return fieldMap def _getKeyNameBySchema(self, dataRoot, keyLoc, keyField): floatKey = getKeys(dataRoot, float) logging.debug('Total # of entry is %s' % len(floatKey)) for ien in floatKey: if float(ien) <= 0: continue dataEntry = dataRoot[ien] index, loc = keyLoc.split(';') if not index or index not in dataEntry: continue dataEntry = dataEntry[index] if not dataEntry.value: return (ien, None) values = dataEntry.value.split('^') dataValue = None if convertToType(loc, int): intLoc = int(loc) if intLoc > 0 and intLoc <= len(values): dataValue = values[intLoc - 1] else: dataValue = str(dataEntry.value) if dataValue: return (ien, self._parseIndividualFieldDetail( dataValue, keyField, None)) return (None, None) def parseZWRGlobalFileBySchemaV2(self, inputFileName, fileNumber, glbLoc=None): schemaFile = self._allSchemaDict[fileNumber] self._glbData[fileNumber] = FileManFileData( fileNumber, self.getFileManFileNameByFileNo(fileNumber)) self._curFileNo = fileNumber if not glbLoc: glbLoc = self._glbLocMap.get(fileNumber) logging.info("File: %s global loc: %s" % (fileNumber, glbLoc)) elif fileNumber in self._glbLocMap: logging.info("global loc %s, %s" % (glbLoc, self._glbLocMap[fileNumber])) for dataRoot in readGlobalNodeFromZWRFileV2(inputFileName, glbLoc): if not dataRoot: continue self._dataRoot = dataRoot fileDataRoot = dataRoot self._parseDataBySchema(fileDataRoot, schemaFile, self._glbData[fileNumber]) self._resolveSelfPointer() if self._crossRef: self._updateCrossReference() def _updateCrossReference(self): if '8994' in self._glbData: self._updateRPCRefence() if '101' in self._glbData: self._updateHL7Reference() if '779.2' in self._glbData: self._updateHLOReference() if '9.7' in self._glbData: self._updateInstallReference() def outRtnReferenceDict(self): if len(self._rtnRefDict): """ generate the dependency in json file """ with open(os.path.join(self.outdir, "Routine-Ref.json"), 'w') as output: logging.info("Generate File: %s" % output.name) json.dump(self._rtnRefDict, output) def _updateHLOReference(self): hlo = self._glbData['779.2'] for ien in sorted(hlo.dataEntries.keys(), key=lambda x: float(x)): hloEntry = hlo.dataEntries[ien] entryName = hloEntry.name namespace, package = \ self._crossRef.__categorizeVariableNameByNamespace__(entryName) if package: package.hlo.append(hloEntry) logging.info("Adding hlo: %s to Package: %s" % (entryName, package.getName())) def _updateHL7Reference(self): protocol = self._glbData['101'] outJSON = {} for ien in sorted(protocol.dataEntries.keys(), key=lambda x: float(x)): protocolEntry = protocol.dataEntries[ien] if '4' in protocolEntry.fields: type = protocolEntry.fields['4'].value if (type != 'event driver' and type != 'subscriber'): logging.info("Adding Protocol Entry of type: %s" % (type)) entryName = protocolEntry.name namespace, package = \ self._crossRef.__categorizeVariableNameByNamespace__(entryName) if package: package.protocol.append(protocolEntry) logging.info( "Adding Protocol Entry: %s to Package: %s" % (entryName, package.getName())) # only care about the event drive and subscriber type elif (type == 'event driver' or type == 'subscriber'): entryName = protocolEntry.name namespace, package = \ self._crossRef.__categorizeVariableNameByNamespace__(entryName) if package: package.hl7.append(protocolEntry) logging.info("Adding HL7: %s to Package: %s" % (entryName, package.getName())) elif '12' in protocolEntry.fields: # check the packge it belongs pass else: logging.warn("Cannot find a package for HL7: %s" % entryName) for field in ('771', '772'): if field not in protocolEntry.fields: continue hl7Rtn = protocolEntry.fields[field].value if not hl7Rtn: continue for rtn, tag, pos in getMumpsRoutine(hl7Rtn): hl7Info = {"name": entryName, "ien": ien} if tag: hl7Info['tag'] = tag self._rtnRefDict.setdefault(rtn, {}).setdefault( '101', []).append(hl7Info) def _updateRPCRefence(self): rpcData = self._glbData['8994'] for ien in sorted(rpcData.dataEntries.keys(), key=lambda x: float(x)): rpcEntry = rpcData.dataEntries[ien] rpcRoutine = None if rpcEntry.name: namespace, package = \ self._crossRef.__categorizeVariableNameByNamespace__(rpcEntry.name) if package: package.rpcs.append(rpcEntry) logging.info("Adding RPC: %s to Package: %s" % (rpcEntry.name, package.getName())) if '.03' in rpcEntry.fields: rpcRoutine = rpcEntry.fields['.03'].value else: if rpcRoutine: """ try to categorize by routine called """ namespace, package = \ self._crossRef.__categorizeVariableNameByNamespace__(rpcRoutine) if package: package.rpcs.append(rpcEntry) logging.info( "Adding RPC: %s to Package: %s based on routine calls" % (rpcEntry.name, package.getName())) else: logging.error("Cannot find package for RPC: %s" % (rpcEntry.name)) """ Generate the routine referenced based on RPC Call """ if rpcRoutine: rpcInfo = {"name": rpcEntry.name, "ien": ien} if '.02' in rpcEntry.fields: rpcTag = rpcEntry.fields['.02'].value rpcInfo['tag'] = rpcTag self._rtnRefDict.setdefault(rpcRoutine, {}).setdefault( '8994', []).append(rpcInfo) def _findInstallPackage(self, packageList, installEntryName, checkNamespace=True): package = None """ checkNamespace is used by the "version change" check to match the package name in the install name but not the namespace in the install name, which should help eliminate multibuilds from being found as package changes """ if checkNamespace: namespace, package = self._crossRef.__categorizeVariableNameByNamespace__( installEntryName) # A check to remove the mis-categorized installs which happen to fall in a namespace if installEntryName in INSTALL_PACKAGE_FIX: package = INSTALL_PACKAGE_FIX[installEntryName] # If it cannot match a package by namespace, capture the name via Regular Expression if package is None: pkgMatch = re.match("[A-Z./ \&\-\']+", installEntryName) if pkgMatch: # if a match is found, switch to title case and remove extra spaces targetName = pkgMatch.group(0).title().strip() # First check it against the list of package names if targetName in packageList: package = targetName # Then check it against the dictionary above for some odd spellings or capitalization elif targetName in INSTALL_RENAME_DICT: package = INSTALL_RENAME_DICT[targetName] # If all else fails, assign it to the "Unknown" else: package = "Unknown" package = str(package).strip() return package def _updateInstallReference(self): if not os.path.exists(self.outdir + "/9_7"): os.mkdir(self.outdir + "/9_7") installData = self._glbData['9.7'] output = os.path.join(self.outdir, "install_information.json") installJSONData = {} packageList = self._crossRef.getAllPackages() patchOrderGen = PatchOrderGenerator() patchOrderGen.analyzeVistAPatchDir(self.patchDir + "/Packages") with open(output, 'w') as installDataOut: logging.warn("inside the _updateInstallReference") for ien in sorted(installData.dataEntries.keys(), key=lambda x: float(x)): installItem = {} installEntry = installData.dataEntries[ien] package = self._findInstallPackage(packageList, installEntry.name) # if this is the first time the package is found, add an entry in the install JSON data. if package not in installJSONData: installJSONData[package] = {} if installEntry.name: logging.warn("Gathering info for: %s" % installEntry.name) installItem['name'] = installEntry.name installItem['ien'] = installEntry.ien installItem['label'] = installEntry.name installItem['value'] = installEntry.name installItem['parent'] = package if installEntry.name in patchOrderGen._kidsDepBuildDict: installchildren = [] for child in patchOrderGen._kidsDepBuildDict[ installEntry.name]: childPackage = self._findInstallPackage( packageList, child) installchildren.append({ "name": child, "package": childPackage }) installItem['children'] = installchildren if '11' in installEntry.fields: installItem['installDate'] = installEntry.fields[ '11'].value.strftime("%Y-%m-%d") if '1' in installEntry.fields: installItem['packageLink'] = installEntry.fields[ '1'].value if '40' in installEntry.fields: installItem['numRoutines'] = len( installEntry.fields['40'].value.dataEntries) if '14' in installEntry.fields: installItem['numFiles'] = len( installEntry.fields['14'].value.dataEntries) # Checks for the absence of asterisks which usually denotes a package change, also make it more specific to # eliminate the multibuilds that are being marked as package changes testMatch = re.search("\*+", installEntry.name) if testMatch is None: # Assume a package switch name will be just a package name and a version capture = re.match( "(?P<packageName>[A-Z./ \&\-\']+) (?P<packageVal>[.0-9]+)", installEntry.name) if capture: checkPackage = self._findInstallPackage( packageList, capture.groups()[0], False) if (not (checkPackage == "Unknown") or (len(capture.groups()[0]) <= 4)): installItem['packageSwitch'] = True installJSONData[package][installEntry.name] = installItem installJSONData['MultiBuild'] = {} for multiBuildFile in patchOrderGen._multiBuildDict: multibuildItem = {} multibuildItem['name'] = os.path.basename(multiBuildFile) multibuildItem['children'] = [] for installName in patchOrderGen._multiBuildDict[ multiBuildFile]: package = self._findInstallPackage(packageList, installName) multibuildItem['children'].append({ "name": installName, "package": package }) installJSONData['MultiBuild'][os.path.basename( multiBuildFile)] = multibuildItem logging.warn("About to dump data into %s" % output) json.dump(installJSONData, installDataOut) def _resolveSelfPointer(self): """ Replace self-reference with meaningful data """ for fileNo in self._pointerRef: if fileNo in self._glbData: fileData = self._glbData[fileNo] for ien, fields in self._pointerRef[fileNo].iteritems(): if ien in fileData.dataEntries: name = fileData.dataEntries[ien].name if not name: name = str(ien) for field in fields: field.value = "^".join((field.value, name)) del self._pointerRef self._pointerRef = {} def _parseFileDetail(self, dataEntry, ien): if 'GL' in dataEntry: loc = dataEntry['GL'].value loc = normalizeGlobalLocation(loc) self._glbLocMap[ien] = loc def _parseDataBySchema(self, dataRoot, fileSchema, outGlbData): """ first sort the schema Root by location """ locFieldDict = sortSchemaByLocation(fileSchema) """ for each data entry, parse data by location """ floatKey = getKeys(dataRoot, float) for ien in floatKey: if float(ien) <= 0: continue dataEntry = dataRoot[ien] outDataEntry = FileManDataEntry(fileSchema.getFileNo(), ien) dataKeys = [x for x in dataEntry] sortedKey = sorted(dataKeys, cmp=sortDataEntryFloatFirst) for locKey in sortedKey: if locKey == '0' and fileSchema.getFileNo() == '1': self._parseFileDetail(dataEntry[locKey], ien) if locKey in locFieldDict: fieldDict = locFieldDict[locKey] # a dict of {pos: field} curDataRoot = dataEntry[locKey] if len(fieldDict) == 1: fieldAttr = fieldDict.values()[0] if fieldAttr.isSubFilePointerType(): # Multiple self._parseSubFileField(curDataRoot, fieldAttr, outDataEntry) else: self._parseSingleDataValueField( curDataRoot, fieldAttr, outDataEntry) else: self._parseDataValueField(curDataRoot, fieldDict, outDataEntry) outGlbData.addFileManDataEntry(ien, outDataEntry) if fileSchema.getFileNo() == self._curFileNo: self._addFileKeyIndex(self._curFileNo, ien, outDataEntry.name) def _parseSingleDataValueField(self, dataEntry, fieldAttr, outDataEntry): if not dataEntry.value: return values = dataEntry.value.split('^') location = fieldAttr.getLocation() dataValue = None if location: index, loc = location.split(';') if loc: if convertToType(loc, int): intLoc = int(loc) if intLoc > 0 and intLoc <= len(values): dataValue = values[intLoc - 1] else: dataValue = str(dataEntry.value) else: dataValue = str(dataEntry.value) if dataValue: self._parseIndividualFieldDetail(dataValue, fieldAttr, outDataEntry) def _parseDataValueField(self, dataRoot, fieldDict, outDataEntry): if not dataRoot.value: return values = dataRoot.value.split('^') if not values: return # this is very import to check for idx, value in enumerate(values, 1): if value and str(idx) in fieldDict: fieldAttr = fieldDict[str(idx)] self._parseIndividualFieldDetail(value, fieldAttr, outDataEntry) def _parseIndividualFieldDetail(self, value, fieldAttr, outDataEntry): if not value.strip(' '): return value = value.strip(' ') fieldDetail = value pointerFileNo = None if fieldAttr.isSetType(): setDict = fieldAttr.getSetMembers() if setDict and value in setDict: fieldDetail = setDict[value] elif fieldAttr.isFilePointerType() or fieldAttr.isVariablePointerType( ): fileNo = None ien = None if fieldAttr.isFilePointerType(): filePointedTo = fieldAttr.getPointedToFile() if filePointedTo: fileNo = filePointedTo.getFileNo() ien = value else: fieldDetail = 'No Pointed to File' else: # for variable pointer type vpInfo = value.split(';') if len(vpInfo) != 2: logging.error("Unknown variable pointer format: %s" % value) fieldDetail = "Unknow Variable Pointer" else: fileNo = self.getFileNoByGlobalLocation(vpInfo[1]) ien = vpInfo[0] if not fileNo: logging.warn("Could not find File for %s" % value) fieldDetail = 'Global Root: %s, IEN: %s' % (vpInfo[1], ien) if fileNo and ien: fieldDetail = '^'.join((fileNo, ien)) idxName = self._getFileKeyIndex(fileNo, ien) if idxName: idxes = str(idxName).split('^') if len(idxes) == 1: fieldDetail = '^'.join((fieldDetail, str(idxName))) elif len(idxes) == 3: fieldDetail = '^'.join((fieldDetail, str(idxes[-1]))) elif fileNo == self._curFileNo: pointerFileNo = fileNo else: logging.warn("Cannot find value for %s, %s" % (ien, fileNo)) elif fieldAttr.getType( ) == FileManField.FIELD_TYPE_DATE_TIME: # datetime if value.find(',') >= 0: fieldDetail = horologToDateTime(value) else: outDt = fmDtToPyDt(value) if outDt: fieldDetail = outDt else: logging.warn("Could not parse Date/Time: %s" % value) elif fieldAttr.getName().upper().startswith( "TIMESTAMP"): # timestamp field if value.find(',') >= 0: fieldDetail = horologToDateTime(value) if outDataEntry: dataField = FileManDataField(fieldAttr.getFieldNo(), fieldAttr.getType(), fieldAttr.getName(), fieldDetail) if pointerFileNo: self._addDataFieldToPointerRef(pointerFileNo, value, dataField) outDataEntry.addField(dataField) if fieldAttr.getFieldNo() == '.01': outDataEntry.name = fieldDetail outDataEntry.type = fieldAttr.getType() return fieldDetail def _addDataFieldToPointerRef(self, fileNo, ien, dataField): self._pointerRef.setdefault(fileNo, {}).setdefault(ien, set()).add(dataField) def _addFileKeyIndex(self, fileNo, ien, value): ienDict = self._fileKeyIndex.setdefault(fileNo, {}) if ien not in ienDict: ienDict[ien] = value def _getFileKeyIndex(self, fileNo, ien): if fileNo in self._fileKeyIndex: if ien in self._fileKeyIndex[fileNo]: return self._fileKeyIndex[fileNo][ien] return None def _addFileFieldMap(self, fileNo, ien, value): fldDict = self._fileKeyIndex.setdefault(fileNo, {}) if ien not in ienDict: ienDict[ien] = value def _parseSubFileField(self, dataRoot, fieldAttr, outDataEntry): logging.debug("%s" % (fieldAttr.getName() + ':')) subFile = fieldAttr.getPointedToSubFile() if fieldAttr.hasSubType(FileManField.FIELD_TYPE_WORD_PROCESSING): outLst = self._parsingWordProcessingNode(dataRoot) outDataEntry.addField( FileManDataField(fieldAttr.getFieldNo(), FileManField.FIELD_TYPE_WORD_PROCESSING, fieldAttr.getName(), outLst)) elif subFile: subFileData = FileManFileData(subFile.getFileNo(), subFile.getFileManName()) self._parseDataBySchema(dataRoot, subFile, subFileData) outDataEntry.addField( FileManDataField(fieldAttr.getFieldNo(), FileManField.FIELD_TYPE_SUBFILE_POINTER, fieldAttr.getName(), subFileData)) else: logging.info("Sorry, do not know how to intepret the schema %s" % fieldAttr) def _parsingWordProcessingNode(self, dataRoot): outLst = [] for key in getKeys(dataRoot, int): if '0' in dataRoot[key]: outLst.append("%s" % dataRoot[key]['0'].value) return outLst
class FileManGlobalDataParser(object): def __init__(self, MRepositDir, crossRef): self.patchDir = None self.MRepositDir = MRepositDir self._dataRoot = None self._crossRef = crossRef self._curFileNo = None self._glbData = {} # fileNo => FileManData self._pointerRef = {} self._fileKeyIndex = {} # File: => ien => Value self._glbLocMap = initGlobalLocationMap # File: => Global Location self._rtnRefDict = {} # dict of rtn => fileNo => Details self.allFiles = self._getAllFileManZWRFiles() # Dict of fileNum => Global file self.schemaParser = FileManSchemaParser() self._allSchemaDict = self.schemaParser.parseSchemaDDFileV2(self.allFiles['0']['path']) @property def outFileManData(self): return self._glbData @property def globalLocationMap(self): return self._glbLocMap def getFileNoByGlobalLocation(self, glbLoc): """ get the file no by global location return fileNo if found, otherwise return None """ outLoc = normalizeGlobalLocation(glbLoc) for key, value in self._glbLocMap.iteritems(): if value == outLoc: return key return None def getFileManFileNameByFileNo(self, fileNo): fileManFile = self._crossRef.getGlobalByFileNo(fileNo) if fileManFile: return fileManFile.getFileManName() return "" def _createDataRootByZWRFile(self, inputFileName): self._dataRoot = createGlobalNodeByZWRFile(inputFileName) def _getAllFileManZWRFiles(self): dirName = os.path.join(self.MRepositDir,'Packages') pattern = "*/Globals/*.zwr" searchFiles = glob.glob(os.path.join(dirName, pattern)) outFiles = {} for file in searchFiles: fileName = os.path.basename(file) if fileName == 'DD.zwr': outFiles['0'] = {'name': 'Schema File', 'path': os.path.normpath(os.path.abspath(file))} continue result = re.search("(?P<fileNo>^[0-9.]+)(-[1-9])?\+(?P<des>.*)\.zwr$", fileName) if result: if result.groups()[1]: logging.info("Ignore file %s" % fileName) continue fileNo = result.group('fileNo') if fileNo.startswith('0'): fileNo = fileNo[1:] globalDes = result.group('des') outFiles[fileNo] = {'name': globalDes, 'path': os.path.normpath(os.path.abspath(file))} return outFiles def generateFileIndex(self, inputFileName, fileNumber): schemaFile = self._allSchemaDict[fileNumber] if not schemaFile.hasField('.01'): logging.error("File does not have a .01 field, ignore") return keyField = schemaFile.getFileManFieldByFieldNo('.01') keyLoc = keyField.getLocation() if not keyLoc: logging.error(".01 field does not have a location") return self._curFileNo = fileNumber if fileNumber in self._glbLocMap: glbLoc = self._glbLocMap[fileNumber] for dataRoot in readGlobalNodeFromZWRFileV2(inputFileName, glbLoc): if not dataRoot: continue self._dataRoot = dataRoot fileDataRoot = dataRoot (ien, detail) = self._getKeyNameBySchema(fileDataRoot, keyLoc, keyField) if detail: self._addFileKeyIndex(fileNumber, ien, detail) elif ien: logging.info("No name associated with ien: %s, file: %s" % (ien, fileNumber)) else: logging.info("No index for data with ien: %s, file: %s" % (ien, fileNumber)) """ Generate a map Field Value => IEN """ def generateFileFieldMap(self, inputFileName, fileNumber, fieldNo): schemaFile = self._allSchemaDict[fileNumber] if not schemaFile.hasField(fieldNo): logging.error("File does not have a [%s] field, ignore", fieldNo) return keyField = schemaFile.getFileManFieldByFieldNo(fieldNo) keyLoc = keyField.getLocation() if not keyLoc: logging.error("[%s] field does not have a location", fieldNo) return glbLoc = self._glbLocMap[fileNumber] fieldMap = {} for dataRoot in readGlobalNodeFromZWRFileV2(inputFileName, glbLoc): if not dataRoot: continue fileDataRoot = dataRoot (ien, detail) = self._getKeyNameBySchema(fileDataRoot, keyLoc, keyField) if detail: fieldMap[detail] = ien elif ien: logging.info("No name associated with ien: %s, file: %s" % (ien, fileNumber)) else: logging.info("No index for data with ien: %s, file: %s" % (ien, fileNumber)) return fieldMap def _getKeyNameBySchema(self, dataRoot, keyLoc, keyField): floatKey = getKeys(dataRoot, float) logging.debug('Total # of entry is %s' % len(floatKey)) for ien in floatKey: if float(ien) <=0: continue dataEntry = dataRoot[ien] index, loc = keyLoc.split(';') if not index or index not in dataEntry: continue dataEntry = dataEntry[index] if not dataEntry.value: return (ien, None) values = dataEntry.value.split('^') dataValue = None if convertToType(loc, int): intLoc = int(loc) if intLoc > 0 and intLoc <= len(values): dataValue = values[intLoc-1] else: dataValue = str(dataEntry.value) if dataValue: return (ien, self._parseIndividualFieldDetail(dataValue, keyField, None)) return (None, None) def parseZWRGlobalFileBySchemaV2(self, inputFileName, fileNumber, glbLoc=None): schemaFile = self._allSchemaDict[fileNumber] self._glbData[fileNumber] = FileManFileData(fileNumber, self.getFileManFileNameByFileNo(fileNumber)) self._curFileNo = fileNumber if not glbLoc: glbLoc = self._glbLocMap.get(fileNumber) logging.info("File: %s global loc: %s" % (fileNumber, glbLoc)) elif fileNumber in self._glbLocMap: logging.info("global loc %s, %s" % (glbLoc, self._glbLocMap[fileNumber])) for dataRoot in readGlobalNodeFromZWRFileV2(inputFileName, glbLoc): if not dataRoot: continue self._dataRoot = dataRoot fileDataRoot = dataRoot self._parseDataBySchema(fileDataRoot, schemaFile, self._glbData[fileNumber]) self._resolveSelfPointer() if self._crossRef: self._updateCrossReference() def _updateCrossReference(self): if '8994' in self._glbData: self._updateRPCRefence() if '101' in self._glbData: self._updateHL7Reference() if '779.2' in self._glbData: self._updateHLOReference() if '9.7' in self._glbData: self._updateInstallReference() def outRtnReferenceDict(self): if len(self._rtnRefDict): """ generate the dependency in json file """ with open(os.path.join(self.outdir, "Routine-Ref.json"), 'w') as output: logging.info("Generate File: %s" % output.name) json.dump(self._rtnRefDict, output) def _updateHLOReference(self): hlo = self._glbData['779.2'] for ien in sorted(hlo.dataEntries.keys(),key=lambda x: float(x)): hloEntry = hlo.dataEntries[ien] entryName = hloEntry.name namespace, package = \ self._crossRef.__categorizeVariableNameByNamespace__(entryName) if package: package.hlo.append(hloEntry) logging.info("Adding hlo: %s to Package: %s" % (entryName, package.getName())) def _updateHL7Reference(self): protocol = self._glbData['101'] outJSON = {} for ien in sorted(protocol.dataEntries.keys(), key=lambda x: float(x)): protocolEntry = protocol.dataEntries[ien] if '4' in protocolEntry.fields: type = protocolEntry.fields['4'].value if (type != 'event driver' and type != 'subscriber'): logging.info("Adding Protocol Entry of type: %s" % (type)) entryName = protocolEntry.name namespace, package = \ self._crossRef.__categorizeVariableNameByNamespace__(entryName) if package: package.protocol.append(protocolEntry) logging.info("Adding Protocol Entry: %s to Package: %s" % (entryName, package.getName())) # only care about the event drive and subscriber type elif (type == 'event driver' or type == 'subscriber'): entryName = protocolEntry.name namespace, package = \ self._crossRef.__categorizeVariableNameByNamespace__(entryName) if package: package.hl7.append(protocolEntry) logging.info("Adding HL7: %s to Package: %s" % (entryName, package.getName())) elif '12' in protocolEntry.fields: # check the packge it belongs pass else: logging.warn("Cannot find a package for HL7: %s" % entryName) for field in ('771', '772'): if field not in protocolEntry.fields: continue hl7Rtn = protocolEntry.fields[field].value if not hl7Rtn: continue for rtn, tag, pos in getMumpsRoutine(hl7Rtn): hl7Info = {"name": entryName, "ien": ien} if tag: hl7Info['tag'] = tag self._rtnRefDict.setdefault(rtn,{}).setdefault('101',[]).append(hl7Info) def _updateRPCRefence(self): rpcData = self._glbData['8994'] for ien in sorted(rpcData.dataEntries.keys(), key=lambda x: float(x)): rpcEntry = rpcData.dataEntries[ien] rpcRoutine = None if rpcEntry.name: namespace, package = \ self._crossRef.__categorizeVariableNameByNamespace__(rpcEntry.name) if package: package.rpcs.append(rpcEntry) logging.info("Adding RPC: %s to Package: %s" % (rpcEntry.name, package.getName())) if '.03' in rpcEntry.fields: rpcRoutine = rpcEntry.fields['.03'].value else: if rpcRoutine: """ try to categorize by routine called """ namespace, package = \ self._crossRef.__categorizeVariableNameByNamespace__(rpcRoutine) if package: package.rpcs.append(rpcEntry) logging.info("Adding RPC: %s to Package: %s based on routine calls" % (rpcEntry.name, package.getName())) else: logging.error("Cannot find package for RPC: %s" % (rpcEntry.name)) """ Generate the routine referenced based on RPC Call """ if rpcRoutine: rpcInfo = {"name": rpcEntry.name, "ien" : ien } if '.02' in rpcEntry.fields: rpcTag = rpcEntry.fields['.02'].value rpcInfo['tag'] = rpcTag self._rtnRefDict.setdefault(rpcRoutine,{}).setdefault('8994',[]).append(rpcInfo) def _findInstallPackage(self,packageList, installEntryName,checkNamespace=True): package=None """ checkNamespace is used by the "version change" check to match the package name in the install name but not the namespace in the install name, which should help eliminate multibuilds from being found as package changes """ if checkNamespace: namespace, package = self._crossRef.__categorizeVariableNameByNamespace__(installEntryName) # A check to remove the mis-categorized installs which happen to fall in a namespace if installEntryName in INSTALL_PACKAGE_FIX: package = INSTALL_PACKAGE_FIX[installEntryName] # If it cannot match a package by namespace, capture the name via Regular Expression if package is None: pkgMatch = re.match("[A-Z./ \&\-\']+",installEntryName) if pkgMatch: # if a match is found, switch to title case and remove extra spaces targetName = pkgMatch.group(0).title().strip() # First check it against the list of package names if targetName in packageList: package = targetName # Then check it against the dictionary above for some odd spellings or capitalization elif targetName in INSTALL_RENAME_DICT: package = INSTALL_RENAME_DICT[targetName] # If all else fails, assign it to the "Unknown" else: package = "Unknown" package = str(package).strip() return package def _updateInstallReference(self): if not os.path.exists(self.outdir+"/9_7"): os.mkdir(self.outdir+"/9_7") installData = self._glbData['9.7'] output = os.path.join(self.outdir, "install_information.json") installJSONData = {} packageList = self._crossRef.getAllPackages() patchOrderGen = PatchOrderGenerator() patchOrderGen.analyzeVistAPatchDir(self.patchDir +"/Packages") with open(output, 'w') as installDataOut: logging.warn("inside the _updateInstallReference") for ien in sorted(installData.dataEntries.keys(), key=lambda x: float(x)): installItem = {} installEntry = installData.dataEntries[ien] package = self._findInstallPackage(packageList, installEntry.name) # if this is the first time the package is found, add an entry in the install JSON data. if package not in installJSONData: installJSONData[package]={} if installEntry.name: logging.warn("Gathering info for: %s" % installEntry.name) installItem['name'] = installEntry.name installItem['ien'] = installEntry.ien installItem['label'] = installEntry.name installItem['value'] = installEntry.name installItem['parent']= package if installEntry.name in patchOrderGen._kidsDepBuildDict: installchildren = [] for child in patchOrderGen._kidsDepBuildDict[installEntry.name]: childPackage = self._findInstallPackage(packageList,child) installchildren.append({"name": child, "package": childPackage}); installItem['children'] = installchildren if '11' in installEntry.fields: installItem['installDate'] = installEntry.fields['11'].value.strftime("%Y-%m-%d") if '1' in installEntry.fields: installItem['packageLink'] = installEntry.fields['1'].value if '40' in installEntry.fields: installItem['numRoutines'] = len(installEntry.fields['40'].value.dataEntries) if '14' in installEntry.fields: installItem['numFiles'] = len(installEntry.fields['14'].value.dataEntries) # Checks for the absence of asterisks which usually denotes a package change, also make it more specific to # eliminate the multibuilds that are being marked as package changes testMatch = re.search("\*+",installEntry.name) if testMatch is None: # Assume a package switch name will be just a package name and a version capture = re.match("(?P<packageName>[A-Z./ \&\-\']+) (?P<packageVal>[.0-9]+)",installEntry.name) if capture: checkPackage = self._findInstallPackage(packageList, capture.groups()[0],False) if (not (checkPackage == "Unknown") or (len(capture.groups()[0]) <= 4 )): installItem['packageSwitch'] = True installJSONData[package][installEntry.name] = installItem installJSONData['MultiBuild']={} for multiBuildFile in patchOrderGen._multiBuildDict: multibuildItem = {} multibuildItem['name']=os.path.basename(multiBuildFile); multibuildItem['children'] = [] for installName in patchOrderGen._multiBuildDict[multiBuildFile]: package = self._findInstallPackage(packageList, installName) multibuildItem['children'].append({"name": installName, "package": package}); installJSONData['MultiBuild'][os.path.basename(multiBuildFile)] = multibuildItem logging.warn("About to dump data into %s" % output) json.dump(installJSONData,installDataOut) def _resolveSelfPointer(self): """ Replace self-reference with meaningful data """ for fileNo in self._pointerRef: if fileNo in self._glbData: fileData = self._glbData[fileNo] for ien, fields in self._pointerRef[fileNo].iteritems(): if ien in fileData.dataEntries: name = fileData.dataEntries[ien].name if not name: name = str(ien) for field in fields: field.value = "^".join((field.value, name)) del self._pointerRef self._pointerRef = {} def _parseFileDetail(self, dataEntry, ien): if 'GL' in dataEntry: loc = dataEntry['GL'].value loc = normalizeGlobalLocation(loc) self._glbLocMap[ien] = loc def _parseDataBySchema(self, dataRoot, fileSchema, outGlbData): """ first sort the schema Root by location """ locFieldDict = sortSchemaByLocation(fileSchema) """ for each data entry, parse data by location """ floatKey = getKeys(dataRoot, float) for ien in floatKey: if float(ien) <=0: continue dataEntry = dataRoot[ien] outDataEntry = FileManDataEntry(fileSchema.getFileNo(), ien) dataKeys = [x for x in dataEntry] sortedKey = sorted(dataKeys, cmp=sortDataEntryFloatFirst) for locKey in sortedKey: if locKey == '0' and fileSchema.getFileNo() == '1': self._parseFileDetail(dataEntry[locKey], ien) if locKey in locFieldDict: fieldDict = locFieldDict[locKey] # a dict of {pos: field} curDataRoot = dataEntry[locKey] if len(fieldDict) == 1: fieldAttr = fieldDict.values()[0] if fieldAttr.isSubFilePointerType(): # Multiple self._parseSubFileField(curDataRoot, fieldAttr, outDataEntry) else: self._parseSingleDataValueField(curDataRoot, fieldAttr, outDataEntry) else: self._parseDataValueField(curDataRoot, fieldDict, outDataEntry) outGlbData.addFileManDataEntry(ien, outDataEntry) if fileSchema.getFileNo() == self._curFileNo: self._addFileKeyIndex(self._curFileNo, ien, outDataEntry.name) def _parseSingleDataValueField(self, dataEntry, fieldAttr, outDataEntry): if not dataEntry.value: return values = dataEntry.value.split('^') location = fieldAttr.getLocation() dataValue = None if location: index, loc = location.split(';') if loc: if convertToType(loc, int): intLoc = int(loc) if intLoc > 0 and intLoc <= len(values): dataValue = values[intLoc-1] else: dataValue = str(dataEntry.value) else: dataValue = str(dataEntry.value) if dataValue: self._parseIndividualFieldDetail(dataValue, fieldAttr, outDataEntry) def _parseDataValueField(self, dataRoot, fieldDict, outDataEntry): if not dataRoot.value: return values = dataRoot.value.split('^') if not values: return # this is very import to check for idx, value in enumerate(values, 1): if value and str(idx) in fieldDict: fieldAttr = fieldDict[str(idx)] self._parseIndividualFieldDetail(value, fieldAttr, outDataEntry) def _parseIndividualFieldDetail(self, value, fieldAttr, outDataEntry): if not value.strip(' '): return value = value.strip(' ') fieldDetail = value pointerFileNo = None if fieldAttr.isSetType(): setDict = fieldAttr.getSetMembers() if setDict and value in setDict: fieldDetail = setDict[value] elif fieldAttr.isFilePointerType() or fieldAttr.isVariablePointerType(): fileNo = None ien = None if fieldAttr.isFilePointerType(): filePointedTo = fieldAttr.getPointedToFile() if filePointedTo: fileNo = filePointedTo.getFileNo() ien = value else: fieldDetail = 'No Pointed to File' else: # for variable pointer type vpInfo = value.split(';') if len(vpInfo) != 2: logging.error("Unknown variable pointer format: %s" % value) fieldDetail = "Unknow Variable Pointer" else: fileNo = self.getFileNoByGlobalLocation(vpInfo[1]) ien = vpInfo[0] if not fileNo: logging.warn("Could not find File for %s" % value) fieldDetail = 'Global Root: %s, IEN: %s' % (vpInfo[1], ien) if fileNo and ien: fieldDetail = '^'.join((fileNo, ien)) idxName = self._getFileKeyIndex(fileNo, ien) if idxName: idxes = str(idxName).split('^') if len(idxes) == 1: fieldDetail = '^'.join((fieldDetail, str(idxName))) elif len(idxes) == 3: fieldDetail = '^'.join((fieldDetail, str(idxes[-1]))) elif fileNo == self._curFileNo: pointerFileNo = fileNo else: logging.warn("Cannot find value for %s, %s" % (ien, fileNo)) elif fieldAttr.getType() == FileManField.FIELD_TYPE_DATE_TIME: # datetime if value.find(',') >=0: fieldDetail = horologToDateTime(value) else: outDt = fmDtToPyDt(value) if outDt: fieldDetail = outDt else: logging.warn("Could not parse Date/Time: %s" % value) elif fieldAttr.getName().upper().startswith("TIMESTAMP"): # timestamp field if value.find(',') >=0: fieldDetail = horologToDateTime(value) if outDataEntry: dataField = FileManDataField(fieldAttr.getFieldNo(), fieldAttr.getType(), fieldAttr.getName(), fieldDetail) if pointerFileNo: self._addDataFieldToPointerRef(pointerFileNo, value, dataField) outDataEntry.addField(dataField) if fieldAttr.getFieldNo() == '.01': outDataEntry.name = fieldDetail outDataEntry.type = fieldAttr.getType() return fieldDetail def _addDataFieldToPointerRef(self, fileNo, ien, dataField): self._pointerRef.setdefault(fileNo, {}).setdefault(ien, set()).add(dataField) def _addFileKeyIndex(self, fileNo, ien, value): ienDict = self._fileKeyIndex.setdefault(fileNo, {}) if ien not in ienDict: ienDict[ien] = value def _getFileKeyIndex(self, fileNo, ien): if fileNo in self._fileKeyIndex: if ien in self._fileKeyIndex[fileNo]: return self._fileKeyIndex[fileNo][ien] return None def _addFileFieldMap(self, fileNo, ien, value): fldDict = self._fileKeyIndex.setdefault(fileNo, {}) if ien not in ienDict: ienDict[ien] = value def _parseSubFileField(self, dataRoot, fieldAttr, outDataEntry): logging.debug ("%s" % (fieldAttr.getName() + ':')) subFile = fieldAttr.getPointedToSubFile() if fieldAttr.hasSubType(FileManField.FIELD_TYPE_WORD_PROCESSING): outLst = self._parsingWordProcessingNode(dataRoot) outDataEntry.addField(FileManDataField(fieldAttr.getFieldNo(), FileManField.FIELD_TYPE_WORD_PROCESSING, fieldAttr.getName(), outLst)) elif subFile: subFileData = FileManFileData(subFile.getFileNo(), subFile.getFileManName()) self._parseDataBySchema(dataRoot, subFile, subFileData) outDataEntry.addField(FileManDataField(fieldAttr.getFieldNo(), FileManField.FIELD_TYPE_SUBFILE_POINTER, fieldAttr.getName(), subFileData)) else: logging.info ("Sorry, do not know how to intepret the schema %s" % fieldAttr) def _parsingWordProcessingNode(self, dataRoot): outLst = [] for key in getKeys(dataRoot, int): if '0' in dataRoot[key]: outLst.append("%s" % dataRoot[key]['0'].value) return outLst
def getAllSchema(allFiles): schemaParser = FileManSchemaParser() allSchemaDict = schemaParser.parseSchemaDDFileV2(allFiles['0']['path']) return allSchemaDict