def __init__(self, MRepositDir, crossRef):
   self.patchDir = None
   self.MRepositDir = MRepositDir
   self._dataRoot = None
   self._crossRef = crossRef
   self._curFileNo =  None
   self._glbData = {} # fileNo => FileManData
   self._pointerRef = {}
   self._fileKeyIndex = {} # File: => ien => Value
   self._glbLocMap = initGlobalLocationMap # File: => Global Location
   self._rtnRefDict = {} # dict of rtn => fileNo => Details
   self.allFiles = self._getAllFileManZWRFiles()  # Dict of fileNum => Global file
   self.schemaParser = FileManSchemaParser()
   self._allSchemaDict = self.schemaParser.parseSchemaDDFileV2(self.allFiles['0']['path'])
Exemple #2
0
def testGlobalParser(crosRef=None):
  parser = createArgParser()
  result = parser.parse_args()
  print result
  from InitCrossReferenceGenerator import parseCrossRefGeneratorWithArgs
  from FileManDataToHtml import FileManDataToHtml
  outputFile = open(os.path.join(result.outdir, "filesInfo.json"), 'wb')
  __generateGitRepositoryKey__(result.gitPath, result.MRepositDir, outputFile)
  crossRef = parseCrossRefGeneratorWithArgs(result)
  glbDataParser = FileManGlobalDataParser(crossRef)
  #glbDataParser.parseAllZWRGlobaFilesBySchema(result.MRepositDir, allSchemaDict)

  allFiles = glbDataParser.getAllFileManZWRFiles(os.path.join(result.MRepositDir,
                                                     'Packages'),
                                                   "*/Globals/*.zwr")
  assert '0' in allFiles and '1' in allFiles and set(result.fileNos).issubset(allFiles)
  schemaParser = FileManSchemaParser()
  allSchemaDict = schemaParser.parseSchemaDDFileV2(allFiles['0']['path'])
  isolatedFiles = schemaParser.isolatedFiles
  glbDataParser.parseZWRGlobalFileBySchemaV2(allFiles['1']['path'],
                                             allSchemaDict, '1', '^DIC(')
  glbDataParser._allFiles = allFiles
  glbDataParser._allSchemaDict = allSchemaDict
  for fileNo in result.fileNos:
    assert fileNo in glbDataParser.globalLocationMap
  if result.outdir:
    glbDataParser.outDir = result.outdir
  if result.patchRepositDir:
    glbDataParser.patchDir = result.patchRepositDir
  htmlGen = FileManDataToHtml(crossRef, result.outdir)
  if not result.all or set(result.fileNos).issubset(isolatedFiles):
    for fileNo in result.fileNos:
      gdFile = allFiles[fileNo]['path']
      logging.info("Parsing file: %s at %s" % (fileNo, gdFile))
      glbDataParser.parseZWRGlobalFileBySchemaV2(gdFile,
                                                 allSchemaDict,
                                                 fileNo)
      if result.outdir:
        htmlGen.outputFileManDataAsHtml(glbDataParser)
      else:
        fileManDataMap = glbDataParser.outFileManData
        for file in getKeys(fileManDataMap.iterkeys(), float):
          printFileManFileData(fileManDataMap[file])
      del glbDataParser.outFileManData[fileNo]
    glbDataParser.outRtnReferenceDict()
    return
  """ Also generate all required files as well """
  sccSet = schemaParser.sccSet
  fileSet = set(result.fileNos)
  for idx, value in enumerate(sccSet):
    fileSet.difference_update(value)
    if not fileSet:
      break
  for i in xrange(0,idx+1):
    fileSet = sccSet[i]
    fileSet &= set(allFiles.keys())
    fileSet -= isolatedFiles
    fileSet.discard('757')
    if len(fileSet) > 1:
      for file in fileSet:
        zwrFile = allFiles[file]['path']
        globalSub = allFiles[file]['name']
        logging.info("Generate file key index for: %s at %s" % (file, zwrFile))
        glbDataParser.generateFileIndex(zwrFile, allSchemaDict, file)
    for file in fileSet:
      zwrFile = allFiles[file]['path']
      globalSub = allFiles[file]['name']
      logging.info("Parsing file: %s at %s" % (file, zwrFile))
      glbDataParser.parseZWRGlobalFileBySchemaV2(zwrFile,
                                                 allSchemaDict,
                                                 file)
      if result.outdir:
        htmlGen.outputFileManDataAsHtml(glbDataParser)
      del glbDataParser.outFileManData[file]
Exemple #3
0
def getAllSchema(allFiles):
  schemaParser = FileManSchemaParser()
  allSchemaDict = schemaParser.parseSchemaDDFileV2(allFiles['0']['path'])
  return allSchemaDict;
def testGlobalParser(crosRef=None):
    parser = createArgParser()
    result = parser.parse_args()
    print result
    from InitCrossReferenceGenerator import parseCrossRefGeneratorWithArgs
    from FileManDataToHtml import FileManDataToHtml
    crossRef = parseCrossRefGeneratorWithArgs(result)
    glbDataParser = FileManGlobalDataParser(crossRef)
    #glbDataParser.parseAllZWRGlobaFilesBySchema(result.MRepositDir, allSchemaDict)

    allFiles = glbDataParser.getAllFileManZWRFiles(
        os.path.join(result.MRepositDir, 'Packages'), "*/Globals/*.zwr")
    assert '0' in allFiles and '1' in allFiles and set(
        result.fileNos).issubset(allFiles)
    schemaParser = FileManSchemaParser()
    allSchemaDict = schemaParser.parseSchemaDDFileV2(allFiles['0']['path'])
    isolatedFiles = schemaParser.isolatedFiles
    glbDataParser.parseZWRGlobalFileBySchemaV2(allFiles['1']['path'],
                                               allSchemaDict, '1', '^DIC(')
    glbDataParser._allFiles = allFiles
    glbDataParser._allSchemaDict = allSchemaDict
    for fileNo in result.fileNos:
        assert fileNo in glbDataParser.globalLocationMap
    if result.outdir:
        glbDataParser.outDir = result.outdir
    if result.patchRepositDir:
        glbDataParser.patchDir = result.patchRepositDir
    htmlGen = FileManDataToHtml(crossRef, result.outdir)
    if not result.all or set(result.fileNos).issubset(isolatedFiles):
        for fileNo in result.fileNos:
            gdFile = allFiles[fileNo]['path']
            logging.info("Parsing file: %s at %s" % (fileNo, gdFile))
            glbDataParser.parseZWRGlobalFileBySchemaV2(gdFile, allSchemaDict,
                                                       fileNo)
            if result.outdir:
                htmlGen.outputFileManDataAsHtml(glbDataParser)
            else:
                fileManDataMap = glbDataParser.outFileManData
                for file in getKeys(fileManDataMap.iterkeys(), float):
                    printFileManFileData(fileManDataMap[file])
            del glbDataParser.outFileManData[fileNo]
        glbDataParser.outRtnReferenceDict()
        return
    """ Also generate all required files as well """
    sccSet = schemaParser.sccSet
    fileSet = set(result.fileNos)
    for idx, value in enumerate(sccSet):
        fileSet.difference_update(value)
        if not fileSet:
            break
    for i in xrange(0, idx + 1):
        fileSet = sccSet[i]
        fileSet &= set(allFiles.keys())
        fileSet -= isolatedFiles
        fileSet.discard('757')
        if len(fileSet) > 1:
            for file in fileSet:
                zwrFile = allFiles[file]['path']
                globalSub = allFiles[file]['name']
                logging.info("Generate file key index for: %s at %s" %
                             (file, zwrFile))
                glbDataParser.generateFileIndex(zwrFile, allSchemaDict, file)
        for file in fileSet:
            zwrFile = allFiles[file]['path']
            globalSub = allFiles[file]['name']
            logging.info("Parsing file: %s at %s" % (file, zwrFile))
            glbDataParser.parseZWRGlobalFileBySchemaV2(zwrFile, allSchemaDict,
                                                       file)
            if result.outdir:
                htmlGen.outputFileManDataAsHtml(glbDataParser)
            del glbDataParser.outFileManData[file]
class FileManGlobalDataParser(object):
    def __init__(self, MRepositDir, crossRef):
        self.patchDir = None
        self.MRepositDir = MRepositDir
        self._dataRoot = None
        self._crossRef = crossRef
        self._curFileNo = None
        self._glbData = {}  # fileNo => FileManData
        self._pointerRef = {}
        self._fileKeyIndex = {}  # File: => ien => Value
        self._glbLocMap = initGlobalLocationMap  # File: => Global Location
        self._rtnRefDict = {}  # dict of rtn => fileNo => Details
        self.allFiles = self._getAllFileManZWRFiles(
        )  # Dict of fileNum => Global file
        self.schemaParser = FileManSchemaParser()
        self._allSchemaDict = self.schemaParser.parseSchemaDDFileV2(
            self.allFiles['0']['path'])

    @property
    def outFileManData(self):
        return self._glbData

    @property
    def globalLocationMap(self):
        return self._glbLocMap

    def getFileNoByGlobalLocation(self, glbLoc):
        """
      get the file no by global location
      return fileNo if found, otherwise return None
    """
        outLoc = normalizeGlobalLocation(glbLoc)
        for key, value in self._glbLocMap.iteritems():
            if value == outLoc:
                return key
        return None

    def getFileManFileNameByFileNo(self, fileNo):
        fileManFile = self._crossRef.getGlobalByFileNo(fileNo)
        if fileManFile:
            return fileManFile.getFileManName()
        return ""

    def _createDataRootByZWRFile(self, inputFileName):
        self._dataRoot = createGlobalNodeByZWRFile(inputFileName)

    def _getAllFileManZWRFiles(self):
        dirName = os.path.join(self.MRepositDir, 'Packages')
        pattern = "*/Globals/*.zwr"
        searchFiles = glob.glob(os.path.join(dirName, pattern))
        outFiles = {}
        for file in searchFiles:
            fileName = os.path.basename(file)
            if fileName == 'DD.zwr':
                outFiles['0'] = {
                    'name': 'Schema File',
                    'path': os.path.normpath(os.path.abspath(file))
                }
                continue
            result = re.search(
                "(?P<fileNo>^[0-9.]+)(-[1-9])?\+(?P<des>.*)\.zwr$", fileName)
            if result:
                if result.groups()[1]:
                    logging.info("Ignore file %s" % fileName)
                    continue
                fileNo = result.group('fileNo')
                if fileNo.startswith('0'): fileNo = fileNo[1:]
                globalDes = result.group('des')
                outFiles[fileNo] = {
                    'name': globalDes,
                    'path': os.path.normpath(os.path.abspath(file))
                }
        return outFiles

    def generateFileIndex(self, inputFileName, fileNumber):
        schemaFile = self._allSchemaDict[fileNumber]
        if not schemaFile.hasField('.01'):
            logging.error("File does not have a .01 field, ignore")
            return
        keyField = schemaFile.getFileManFieldByFieldNo('.01')
        keyLoc = keyField.getLocation()
        if not keyLoc:
            logging.error(".01 field does not have a location")
            return
        self._curFileNo = fileNumber
        if fileNumber in self._glbLocMap:
            glbLoc = self._glbLocMap[fileNumber]
            for dataRoot in readGlobalNodeFromZWRFileV2(inputFileName, glbLoc):
                if not dataRoot: continue
                self._dataRoot = dataRoot
                fileDataRoot = dataRoot
                (ien,
                 detail) = self._getKeyNameBySchema(fileDataRoot, keyLoc,
                                                    keyField)
                if detail:
                    self._addFileKeyIndex(fileNumber, ien, detail)
                elif ien:
                    logging.info("No name associated with ien: %s, file: %s" %
                                 (ien, fileNumber))
                else:
                    logging.info("No index for data with ien: %s, file: %s" %
                                 (ien, fileNumber))

    """
  Generate a map Field Value => IEN
  """

    def generateFileFieldMap(self, inputFileName, fileNumber, fieldNo):
        schemaFile = self._allSchemaDict[fileNumber]
        if not schemaFile.hasField(fieldNo):
            logging.error("File does not have a [%s] field, ignore", fieldNo)
            return
        keyField = schemaFile.getFileManFieldByFieldNo(fieldNo)
        keyLoc = keyField.getLocation()
        if not keyLoc:
            logging.error("[%s] field does not have a location", fieldNo)
            return
        glbLoc = self._glbLocMap[fileNumber]
        fieldMap = {}
        for dataRoot in readGlobalNodeFromZWRFileV2(inputFileName, glbLoc):
            if not dataRoot: continue
            fileDataRoot = dataRoot
            (ien, detail) = self._getKeyNameBySchema(fileDataRoot, keyLoc,
                                                     keyField)
            if detail:
                fieldMap[detail] = ien
            elif ien:
                logging.info("No name associated with ien: %s, file: %s" %
                             (ien, fileNumber))
            else:
                logging.info("No index for data with ien: %s, file: %s" %
                             (ien, fileNumber))
        return fieldMap

    def _getKeyNameBySchema(self, dataRoot, keyLoc, keyField):
        floatKey = getKeys(dataRoot, float)
        logging.debug('Total # of entry is %s' % len(floatKey))
        for ien in floatKey:
            if float(ien) <= 0:
                continue
            dataEntry = dataRoot[ien]
            index, loc = keyLoc.split(';')
            if not index or index not in dataEntry:
                continue
            dataEntry = dataEntry[index]
            if not dataEntry.value:
                return (ien, None)
            values = dataEntry.value.split('^')
            dataValue = None
            if convertToType(loc, int):
                intLoc = int(loc)
                if intLoc > 0 and intLoc <= len(values):
                    dataValue = values[intLoc - 1]
            else:
                dataValue = str(dataEntry.value)
            if dataValue:
                return (ien,
                        self._parseIndividualFieldDetail(
                            dataValue, keyField, None))
        return (None, None)

    def parseZWRGlobalFileBySchemaV2(self,
                                     inputFileName,
                                     fileNumber,
                                     glbLoc=None):
        schemaFile = self._allSchemaDict[fileNumber]
        self._glbData[fileNumber] = FileManFileData(
            fileNumber, self.getFileManFileNameByFileNo(fileNumber))
        self._curFileNo = fileNumber
        if not glbLoc:
            glbLoc = self._glbLocMap.get(fileNumber)
            logging.info("File: %s global loc: %s" % (fileNumber, glbLoc))
        elif fileNumber in self._glbLocMap:
            logging.info("global loc %s, %s" %
                         (glbLoc, self._glbLocMap[fileNumber]))
        for dataRoot in readGlobalNodeFromZWRFileV2(inputFileName, glbLoc):
            if not dataRoot:
                continue
            self._dataRoot = dataRoot
            fileDataRoot = dataRoot
            self._parseDataBySchema(fileDataRoot, schemaFile,
                                    self._glbData[fileNumber])
        self._resolveSelfPointer()
        if self._crossRef:
            self._updateCrossReference()

    def _updateCrossReference(self):
        if '8994' in self._glbData:
            self._updateRPCRefence()
        if '101' in self._glbData:
            self._updateHL7Reference()
        if '779.2' in self._glbData:
            self._updateHLOReference()
        if '9.7' in self._glbData:
            self._updateInstallReference()

    def outRtnReferenceDict(self):
        if len(self._rtnRefDict):
            """ generate the dependency in json file """
            with open(os.path.join(self.outdir, "Routine-Ref.json"),
                      'w') as output:
                logging.info("Generate File: %s" % output.name)
                json.dump(self._rtnRefDict, output)

    def _updateHLOReference(self):
        hlo = self._glbData['779.2']
        for ien in sorted(hlo.dataEntries.keys(), key=lambda x: float(x)):
            hloEntry = hlo.dataEntries[ien]
            entryName = hloEntry.name
            namespace, package = \
              self._crossRef.__categorizeVariableNameByNamespace__(entryName)
            if package:
                package.hlo.append(hloEntry)
                logging.info("Adding hlo: %s to Package: %s" %
                             (entryName, package.getName()))

    def _updateHL7Reference(self):
        protocol = self._glbData['101']
        outJSON = {}
        for ien in sorted(protocol.dataEntries.keys(), key=lambda x: float(x)):
            protocolEntry = protocol.dataEntries[ien]
            if '4' in protocolEntry.fields:
                type = protocolEntry.fields['4'].value
                if (type != 'event driver' and type != 'subscriber'):
                    logging.info("Adding Protocol Entry of type: %s" % (type))
                    entryName = protocolEntry.name
                    namespace, package = \
                      self._crossRef.__categorizeVariableNameByNamespace__(entryName)
                    if package:
                        package.protocol.append(protocolEntry)
                        logging.info(
                            "Adding Protocol Entry: %s to Package: %s" %
                            (entryName, package.getName()))
                # only care about the event drive and subscriber type
                elif (type == 'event driver' or type == 'subscriber'):
                    entryName = protocolEntry.name
                    namespace, package = \
                      self._crossRef.__categorizeVariableNameByNamespace__(entryName)
                    if package:
                        package.hl7.append(protocolEntry)
                        logging.info("Adding HL7: %s to Package: %s" %
                                     (entryName, package.getName()))
                    elif '12' in protocolEntry.fields:  # check the packge it belongs
                        pass
                    else:
                        logging.warn("Cannot find a package for HL7: %s" %
                                     entryName)
                    for field in ('771', '772'):
                        if field not in protocolEntry.fields:
                            continue
                        hl7Rtn = protocolEntry.fields[field].value
                        if not hl7Rtn:
                            continue
                        for rtn, tag, pos in getMumpsRoutine(hl7Rtn):
                            hl7Info = {"name": entryName, "ien": ien}
                            if tag:
                                hl7Info['tag'] = tag
                            self._rtnRefDict.setdefault(rtn, {}).setdefault(
                                '101', []).append(hl7Info)

    def _updateRPCRefence(self):
        rpcData = self._glbData['8994']
        for ien in sorted(rpcData.dataEntries.keys(), key=lambda x: float(x)):
            rpcEntry = rpcData.dataEntries[ien]
            rpcRoutine = None
            if rpcEntry.name:
                namespace, package = \
                self._crossRef.__categorizeVariableNameByNamespace__(rpcEntry.name)
                if package:
                    package.rpcs.append(rpcEntry)
                    logging.info("Adding RPC: %s to Package: %s" %
                                 (rpcEntry.name, package.getName()))

                if '.03' in rpcEntry.fields:
                    rpcRoutine = rpcEntry.fields['.03'].value
                else:
                    if rpcRoutine:
                        """ try to categorize by routine called """
                        namespace, package = \
                        self._crossRef.__categorizeVariableNameByNamespace__(rpcRoutine)
                        if package:
                            package.rpcs.append(rpcEntry)
                            logging.info(
                                "Adding RPC: %s to Package: %s based on routine calls"
                                % (rpcEntry.name, package.getName()))
                    else:
                        logging.error("Cannot find package for RPC: %s" %
                                      (rpcEntry.name))
                """ Generate the routine referenced based on RPC Call """
                if rpcRoutine:
                    rpcInfo = {"name": rpcEntry.name, "ien": ien}
                    if '.02' in rpcEntry.fields:
                        rpcTag = rpcEntry.fields['.02'].value
                        rpcInfo['tag'] = rpcTag
                    self._rtnRefDict.setdefault(rpcRoutine, {}).setdefault(
                        '8994', []).append(rpcInfo)

    def _findInstallPackage(self,
                            packageList,
                            installEntryName,
                            checkNamespace=True):
        package = None
        """
      checkNamespace is used by the "version change" check to match the
      package name in the install name but not the namespace in the install
      name, which should help eliminate multibuilds from being found as
      package changes
    """
        if checkNamespace:
            namespace, package = self._crossRef.__categorizeVariableNameByNamespace__(
                installEntryName)
        # A check to remove the mis-categorized installs which happen to fall in a namespace
        if installEntryName in INSTALL_PACKAGE_FIX:
            package = INSTALL_PACKAGE_FIX[installEntryName]
        # If it cannot match a package by namespace, capture the name via Regular Expression
        if package is None:
            pkgMatch = re.match("[A-Z./ \&\-\']+", installEntryName)
            if pkgMatch:
                # if a match is found, switch to title case and remove extra spaces
                targetName = pkgMatch.group(0).title().strip()
                # First check it against the list of package names
                if targetName in packageList:
                    package = targetName
                # Then check it against the dictionary above for some odd spellings or capitalization
                elif targetName in INSTALL_RENAME_DICT:
                    package = INSTALL_RENAME_DICT[targetName]
                # If all else fails, assign it to the "Unknown"
                else:
                    package = "Unknown"
        package = str(package).strip()
        return package

    def _updateInstallReference(self):
        if not os.path.exists(self.outdir + "/9_7"):
            os.mkdir(self.outdir + "/9_7")
        installData = self._glbData['9.7']
        output = os.path.join(self.outdir, "install_information.json")
        installJSONData = {}
        packageList = self._crossRef.getAllPackages()
        patchOrderGen = PatchOrderGenerator()
        patchOrderGen.analyzeVistAPatchDir(self.patchDir + "/Packages")
        with open(output, 'w') as installDataOut:
            logging.warn("inside the _updateInstallReference")
            for ien in sorted(installData.dataEntries.keys(),
                              key=lambda x: float(x)):
                installItem = {}
                installEntry = installData.dataEntries[ien]
                package = self._findInstallPackage(packageList,
                                                   installEntry.name)
                # if this is the first time the package is found, add an entry in the install JSON data.
                if package not in installJSONData:
                    installJSONData[package] = {}
                if installEntry.name:
                    logging.warn("Gathering info for: %s" % installEntry.name)
                    installItem['name'] = installEntry.name
                    installItem['ien'] = installEntry.ien
                    installItem['label'] = installEntry.name
                    installItem['value'] = installEntry.name
                    installItem['parent'] = package
                    if installEntry.name in patchOrderGen._kidsDepBuildDict:
                        installchildren = []
                        for child in patchOrderGen._kidsDepBuildDict[
                                installEntry.name]:
                            childPackage = self._findInstallPackage(
                                packageList, child)
                            installchildren.append({
                                "name": child,
                                "package": childPackage
                            })
                        installItem['children'] = installchildren
                    if '11' in installEntry.fields:
                        installItem['installDate'] = installEntry.fields[
                            '11'].value.strftime("%Y-%m-%d")
                    if '1' in installEntry.fields:
                        installItem['packageLink'] = installEntry.fields[
                            '1'].value
                    if '40' in installEntry.fields:
                        installItem['numRoutines'] = len(
                            installEntry.fields['40'].value.dataEntries)
                    if '14' in installEntry.fields:
                        installItem['numFiles'] = len(
                            installEntry.fields['14'].value.dataEntries)
                    # Checks for the absence of asterisks which usually denotes a package change, also make it more specific to
                    # eliminate the multibuilds that are being marked as package changes
                    testMatch = re.search("\*+", installEntry.name)
                    if testMatch is None:
                        # Assume a package switch name will be just a package name and a version
                        capture = re.match(
                            "(?P<packageName>[A-Z./ \&\-\']+) (?P<packageVal>[.0-9]+)",
                            installEntry.name)
                        if capture:
                            checkPackage = self._findInstallPackage(
                                packageList,
                                capture.groups()[0], False)
                            if (not (checkPackage == "Unknown")
                                    or (len(capture.groups()[0]) <= 4)):
                                installItem['packageSwitch'] = True
                    installJSONData[package][installEntry.name] = installItem
            installJSONData['MultiBuild'] = {}
            for multiBuildFile in patchOrderGen._multiBuildDict:
                multibuildItem = {}
                multibuildItem['name'] = os.path.basename(multiBuildFile)
                multibuildItem['children'] = []
                for installName in patchOrderGen._multiBuildDict[
                        multiBuildFile]:
                    package = self._findInstallPackage(packageList,
                                                       installName)
                    multibuildItem['children'].append({
                        "name": installName,
                        "package": package
                    })
                installJSONData['MultiBuild'][os.path.basename(
                    multiBuildFile)] = multibuildItem
            logging.warn("About to dump data into %s" % output)
            json.dump(installJSONData, installDataOut)

    def _resolveSelfPointer(self):
        """ Replace self-reference with meaningful data """
        for fileNo in self._pointerRef:
            if fileNo in self._glbData:
                fileData = self._glbData[fileNo]
                for ien, fields in self._pointerRef[fileNo].iteritems():
                    if ien in fileData.dataEntries:
                        name = fileData.dataEntries[ien].name
                        if not name: name = str(ien)
                        for field in fields:
                            field.value = "^".join((field.value, name))
        del self._pointerRef
        self._pointerRef = {}

    def _parseFileDetail(self, dataEntry, ien):
        if 'GL' in dataEntry:
            loc = dataEntry['GL'].value
            loc = normalizeGlobalLocation(loc)
            self._glbLocMap[ien] = loc

    def _parseDataBySchema(self, dataRoot, fileSchema, outGlbData):
        """ first sort the schema Root by location """
        locFieldDict = sortSchemaByLocation(fileSchema)
        """ for each data entry, parse data by location """
        floatKey = getKeys(dataRoot, float)
        for ien in floatKey:
            if float(ien) <= 0:
                continue
            dataEntry = dataRoot[ien]
            outDataEntry = FileManDataEntry(fileSchema.getFileNo(), ien)
            dataKeys = [x for x in dataEntry]
            sortedKey = sorted(dataKeys, cmp=sortDataEntryFloatFirst)
            for locKey in sortedKey:
                if locKey == '0' and fileSchema.getFileNo() == '1':
                    self._parseFileDetail(dataEntry[locKey], ien)
                if locKey in locFieldDict:
                    fieldDict = locFieldDict[locKey]  # a dict of {pos: field}
                    curDataRoot = dataEntry[locKey]
                    if len(fieldDict) == 1:
                        fieldAttr = fieldDict.values()[0]
                        if fieldAttr.isSubFilePointerType():  # Multiple
                            self._parseSubFileField(curDataRoot, fieldAttr,
                                                    outDataEntry)
                        else:
                            self._parseSingleDataValueField(
                                curDataRoot, fieldAttr, outDataEntry)
                    else:
                        self._parseDataValueField(curDataRoot, fieldDict,
                                                  outDataEntry)
            outGlbData.addFileManDataEntry(ien, outDataEntry)
            if fileSchema.getFileNo() == self._curFileNo:
                self._addFileKeyIndex(self._curFileNo, ien, outDataEntry.name)

    def _parseSingleDataValueField(self, dataEntry, fieldAttr, outDataEntry):
        if not dataEntry.value:
            return
        values = dataEntry.value.split('^')
        location = fieldAttr.getLocation()
        dataValue = None
        if location:
            index, loc = location.split(';')
            if loc:
                if convertToType(loc, int):
                    intLoc = int(loc)
                    if intLoc > 0 and intLoc <= len(values):
                        dataValue = values[intLoc - 1]
                else:
                    dataValue = str(dataEntry.value)
        else:
            dataValue = str(dataEntry.value)
        if dataValue:
            self._parseIndividualFieldDetail(dataValue, fieldAttr,
                                             outDataEntry)

    def _parseDataValueField(self, dataRoot, fieldDict, outDataEntry):
        if not dataRoot.value:
            return
        values = dataRoot.value.split('^')
        if not values: return  # this is very import to check
        for idx, value in enumerate(values, 1):
            if value and str(idx) in fieldDict:
                fieldAttr = fieldDict[str(idx)]
                self._parseIndividualFieldDetail(value, fieldAttr,
                                                 outDataEntry)

    def _parseIndividualFieldDetail(self, value, fieldAttr, outDataEntry):
        if not value.strip(' '):
            return
        value = value.strip(' ')
        fieldDetail = value
        pointerFileNo = None
        if fieldAttr.isSetType():
            setDict = fieldAttr.getSetMembers()
            if setDict and value in setDict:
                fieldDetail = setDict[value]
        elif fieldAttr.isFilePointerType() or fieldAttr.isVariablePointerType(
        ):
            fileNo = None
            ien = None
            if fieldAttr.isFilePointerType():
                filePointedTo = fieldAttr.getPointedToFile()
                if filePointedTo:
                    fileNo = filePointedTo.getFileNo()
                    ien = value
                else:
                    fieldDetail = 'No Pointed to File'
            else:  # for variable pointer type
                vpInfo = value.split(';')
                if len(vpInfo) != 2:
                    logging.error("Unknown variable pointer format: %s" %
                                  value)
                    fieldDetail = "Unknow Variable Pointer"
                else:
                    fileNo = self.getFileNoByGlobalLocation(vpInfo[1])
                    ien = vpInfo[0]
                    if not fileNo:
                        logging.warn("Could not find File for %s" % value)
                        fieldDetail = 'Global Root: %s, IEN: %s' % (vpInfo[1],
                                                                    ien)
            if fileNo and ien:
                fieldDetail = '^'.join((fileNo, ien))
                idxName = self._getFileKeyIndex(fileNo, ien)
                if idxName:
                    idxes = str(idxName).split('^')
                    if len(idxes) == 1:
                        fieldDetail = '^'.join((fieldDetail, str(idxName)))
                    elif len(idxes) == 3:
                        fieldDetail = '^'.join((fieldDetail, str(idxes[-1])))
                elif fileNo == self._curFileNo:
                    pointerFileNo = fileNo
                else:
                    logging.warn("Cannot find value for %s, %s" %
                                 (ien, fileNo))
        elif fieldAttr.getType(
        ) == FileManField.FIELD_TYPE_DATE_TIME:  # datetime
            if value.find(',') >= 0:
                fieldDetail = horologToDateTime(value)
            else:
                outDt = fmDtToPyDt(value)
                if outDt:
                    fieldDetail = outDt
                else:
                    logging.warn("Could not parse Date/Time: %s" % value)
        elif fieldAttr.getName().upper().startswith(
                "TIMESTAMP"):  # timestamp field
            if value.find(',') >= 0:
                fieldDetail = horologToDateTime(value)
        if outDataEntry:
            dataField = FileManDataField(fieldAttr.getFieldNo(),
                                         fieldAttr.getType(),
                                         fieldAttr.getName(), fieldDetail)
            if pointerFileNo:
                self._addDataFieldToPointerRef(pointerFileNo, value, dataField)
            outDataEntry.addField(dataField)
            if fieldAttr.getFieldNo() == '.01':
                outDataEntry.name = fieldDetail
                outDataEntry.type = fieldAttr.getType()
        return fieldDetail

    def _addDataFieldToPointerRef(self, fileNo, ien, dataField):
        self._pointerRef.setdefault(fileNo,
                                    {}).setdefault(ien, set()).add(dataField)

    def _addFileKeyIndex(self, fileNo, ien, value):
        ienDict = self._fileKeyIndex.setdefault(fileNo, {})
        if ien not in ienDict:
            ienDict[ien] = value

    def _getFileKeyIndex(self, fileNo, ien):
        if fileNo in self._fileKeyIndex:
            if ien in self._fileKeyIndex[fileNo]:
                return self._fileKeyIndex[fileNo][ien]
        return None

    def _addFileFieldMap(self, fileNo, ien, value):
        fldDict = self._fileKeyIndex.setdefault(fileNo, {})
        if ien not in ienDict:
            ienDict[ien] = value

    def _parseSubFileField(self, dataRoot, fieldAttr, outDataEntry):
        logging.debug("%s" % (fieldAttr.getName() + ':'))
        subFile = fieldAttr.getPointedToSubFile()
        if fieldAttr.hasSubType(FileManField.FIELD_TYPE_WORD_PROCESSING):
            outLst = self._parsingWordProcessingNode(dataRoot)
            outDataEntry.addField(
                FileManDataField(fieldAttr.getFieldNo(),
                                 FileManField.FIELD_TYPE_WORD_PROCESSING,
                                 fieldAttr.getName(), outLst))
        elif subFile:
            subFileData = FileManFileData(subFile.getFileNo(),
                                          subFile.getFileManName())
            self._parseDataBySchema(dataRoot, subFile, subFileData)
            outDataEntry.addField(
                FileManDataField(fieldAttr.getFieldNo(),
                                 FileManField.FIELD_TYPE_SUBFILE_POINTER,
                                 fieldAttr.getName(), subFileData))
        else:
            logging.info("Sorry, do not know how to intepret the schema %s" %
                         fieldAttr)

    def _parsingWordProcessingNode(self, dataRoot):
        outLst = []
        for key in getKeys(dataRoot, int):
            if '0' in dataRoot[key]:
                outLst.append("%s" % dataRoot[key]['0'].value)
        return outLst
class FileManGlobalDataParser(object):
  def __init__(self, MRepositDir, crossRef):
    self.patchDir = None
    self.MRepositDir = MRepositDir
    self._dataRoot = None
    self._crossRef = crossRef
    self._curFileNo =  None
    self._glbData = {} # fileNo => FileManData
    self._pointerRef = {}
    self._fileKeyIndex = {} # File: => ien => Value
    self._glbLocMap = initGlobalLocationMap # File: => Global Location
    self._rtnRefDict = {} # dict of rtn => fileNo => Details
    self.allFiles = self._getAllFileManZWRFiles()  # Dict of fileNum => Global file
    self.schemaParser = FileManSchemaParser()
    self._allSchemaDict = self.schemaParser.parseSchemaDDFileV2(self.allFiles['0']['path'])

  @property
  def outFileManData(self):
    return self._glbData

  @property
  def globalLocationMap(self):
    return self._glbLocMap

  def getFileNoByGlobalLocation(self, glbLoc):
    """
      get the file no by global location
      return fileNo if found, otherwise return None
    """
    outLoc = normalizeGlobalLocation(glbLoc)
    for key, value in self._glbLocMap.iteritems():
      if value == outLoc:
        return key
    return None

  def getFileManFileNameByFileNo(self, fileNo):
    fileManFile = self._crossRef.getGlobalByFileNo(fileNo)
    if fileManFile:
      return fileManFile.getFileManName()
    return ""

  def _createDataRootByZWRFile(self, inputFileName):
    self._dataRoot = createGlobalNodeByZWRFile(inputFileName)

  def _getAllFileManZWRFiles(self):
    dirName = os.path.join(self.MRepositDir,'Packages')
    pattern = "*/Globals/*.zwr"
    searchFiles = glob.glob(os.path.join(dirName, pattern))
    outFiles = {}
    for file in searchFiles:
      fileName = os.path.basename(file)
      if fileName == 'DD.zwr':
        outFiles['0'] = {'name': 'Schema File',
                         'path': os.path.normpath(os.path.abspath(file))}
        continue
      result = re.search("(?P<fileNo>^[0-9.]+)(-[1-9])?\+(?P<des>.*)\.zwr$", fileName)
      if result:
        if result.groups()[1]:
          logging.info("Ignore file %s" % fileName)
          continue
        fileNo = result.group('fileNo')
        if fileNo.startswith('0'): fileNo = fileNo[1:]
        globalDes = result.group('des')
        outFiles[fileNo] = {'name': globalDes,
                            'path': os.path.normpath(os.path.abspath(file))}
    return outFiles

  def generateFileIndex(self, inputFileName, fileNumber):
    schemaFile = self._allSchemaDict[fileNumber]
    if not schemaFile.hasField('.01'):
      logging.error("File does not have a .01 field, ignore")
      return
    keyField = schemaFile.getFileManFieldByFieldNo('.01')
    keyLoc = keyField.getLocation()
    if not keyLoc:
      logging.error(".01 field does not have a location")
      return
    self._curFileNo = fileNumber
    if fileNumber in self._glbLocMap:
      glbLoc = self._glbLocMap[fileNumber]
      for dataRoot in readGlobalNodeFromZWRFileV2(inputFileName, glbLoc):
        if not dataRoot: continue
        self._dataRoot = dataRoot
        fileDataRoot = dataRoot
        (ien, detail) = self._getKeyNameBySchema(fileDataRoot, keyLoc, keyField)
        if detail:
          self._addFileKeyIndex(fileNumber, ien, detail)
        elif ien:
          logging.info("No name associated with ien: %s, file: %s" % (ien, fileNumber))
        else:
          logging.info("No index for data with ien: %s, file: %s" % (ien, fileNumber))

  """
  Generate a map Field Value => IEN
  """
  def generateFileFieldMap(self, inputFileName, fileNumber, fieldNo):
    schemaFile = self._allSchemaDict[fileNumber]
    if not schemaFile.hasField(fieldNo):
      logging.error("File does not have a [%s] field, ignore", fieldNo)
      return
    keyField = schemaFile.getFileManFieldByFieldNo(fieldNo)
    keyLoc = keyField.getLocation()
    if not keyLoc:
      logging.error("[%s] field does not have a location", fieldNo)
      return
    glbLoc = self._glbLocMap[fileNumber]
    fieldMap = {}
    for dataRoot in readGlobalNodeFromZWRFileV2(inputFileName, glbLoc):
      if not dataRoot: continue
      fileDataRoot = dataRoot
      (ien, detail) = self._getKeyNameBySchema(fileDataRoot, keyLoc, keyField)
      if detail:
        fieldMap[detail] = ien
      elif ien:
        logging.info("No name associated with ien: %s, file: %s" % (ien, fileNumber))
      else:
        logging.info("No index for data with ien: %s, file: %s" % (ien, fileNumber))
    return fieldMap

  def _getKeyNameBySchema(self, dataRoot, keyLoc, keyField):
    floatKey = getKeys(dataRoot, float)
    logging.debug('Total # of entry is %s' % len(floatKey))
    for ien in floatKey:
      if float(ien) <=0:
        continue
      dataEntry = dataRoot[ien]
      index, loc = keyLoc.split(';')
      if not index or index not in dataEntry:
        continue
      dataEntry = dataEntry[index]
      if not dataEntry.value:
        return (ien, None)
      values = dataEntry.value.split('^')
      dataValue = None
      if convertToType(loc, int):
        intLoc = int(loc)
        if intLoc > 0 and intLoc <= len(values):
          dataValue = values[intLoc-1]
      else:
        dataValue = str(dataEntry.value)
      if dataValue:
        return (ien, self._parseIndividualFieldDetail(dataValue, keyField, None))
    return (None, None)

  def parseZWRGlobalFileBySchemaV2(self, inputFileName,
                                   fileNumber, glbLoc=None):
    schemaFile = self._allSchemaDict[fileNumber]
    self._glbData[fileNumber] = FileManFileData(fileNumber,
                                                self.getFileManFileNameByFileNo(fileNumber))
    self._curFileNo = fileNumber
    if not glbLoc:
      glbLoc = self._glbLocMap.get(fileNumber)
      logging.info("File: %s global loc: %s" % (fileNumber, glbLoc))
    elif fileNumber in self._glbLocMap:
      logging.info("global loc %s, %s" % (glbLoc, self._glbLocMap[fileNumber]))
    for dataRoot in readGlobalNodeFromZWRFileV2(inputFileName, glbLoc):
      if not dataRoot:
        continue
      self._dataRoot = dataRoot
      fileDataRoot = dataRoot
      self._parseDataBySchema(fileDataRoot, schemaFile,
                              self._glbData[fileNumber])
    self._resolveSelfPointer()
    if self._crossRef:
      self._updateCrossReference()

  def _updateCrossReference(self):
    if '8994' in self._glbData:
      self._updateRPCRefence()
    if '101' in self._glbData:
      self._updateHL7Reference()
    if '779.2' in self._glbData:
      self._updateHLOReference()
    if '9.7' in self._glbData:
      self._updateInstallReference()

  def outRtnReferenceDict(self):
    if len(self._rtnRefDict):
      """ generate the dependency in json file """
      with open(os.path.join(self.outdir, "Routine-Ref.json"), 'w') as output:
        logging.info("Generate File: %s" % output.name)
        json.dump(self._rtnRefDict, output)

  def _updateHLOReference(self):
    hlo = self._glbData['779.2']
    for ien in sorted(hlo.dataEntries.keys(),key=lambda x: float(x)):
      hloEntry = hlo.dataEntries[ien]
      entryName = hloEntry.name
      namespace, package = \
        self._crossRef.__categorizeVariableNameByNamespace__(entryName)
      if package:
        package.hlo.append(hloEntry)
        logging.info("Adding hlo: %s to Package: %s" %
                     (entryName, package.getName()))

  def _updateHL7Reference(self):
    protocol = self._glbData['101']
    outJSON = {}
    for ien in sorted(protocol.dataEntries.keys(), key=lambda x: float(x)):
      protocolEntry = protocol.dataEntries[ien]
      if '4' in protocolEntry.fields:
        type = protocolEntry.fields['4'].value
        if (type != 'event driver' and type != 'subscriber'):
          logging.info("Adding Protocol Entry of type: %s" % (type))
          entryName = protocolEntry.name
          namespace, package = \
            self._crossRef.__categorizeVariableNameByNamespace__(entryName)
          if package:
            package.protocol.append(protocolEntry)
            logging.info("Adding Protocol Entry: %s to Package: %s" %
                         (entryName, package.getName()))
        # only care about the event drive and subscriber type
        elif (type == 'event driver' or type == 'subscriber'):
          entryName = protocolEntry.name
          namespace, package = \
            self._crossRef.__categorizeVariableNameByNamespace__(entryName)
          if package:
            package.hl7.append(protocolEntry)
            logging.info("Adding HL7: %s to Package: %s" %
                         (entryName, package.getName()))
          elif '12' in protocolEntry.fields: # check the packge it belongs
            pass
          else:
            logging.warn("Cannot find a package for HL7: %s" % entryName)
          for field in ('771', '772'):
            if field not in protocolEntry.fields:
              continue
            hl7Rtn = protocolEntry.fields[field].value
            if not hl7Rtn:
              continue
            for rtn, tag, pos in getMumpsRoutine(hl7Rtn):
              hl7Info = {"name": entryName,
                         "ien": ien}
              if tag:
                hl7Info['tag'] = tag
              self._rtnRefDict.setdefault(rtn,{}).setdefault('101',[]).append(hl7Info)

  def _updateRPCRefence(self):
    rpcData = self._glbData['8994']
    for ien in sorted(rpcData.dataEntries.keys(), key=lambda x: float(x)):
      rpcEntry = rpcData.dataEntries[ien]
      rpcRoutine = None
      if rpcEntry.name:
        namespace, package = \
        self._crossRef.__categorizeVariableNameByNamespace__(rpcEntry.name)
        if package:
          package.rpcs.append(rpcEntry)
          logging.info("Adding RPC: %s to Package: %s" %
                      (rpcEntry.name, package.getName()))

        if '.03' in rpcEntry.fields:
          rpcRoutine = rpcEntry.fields['.03'].value
        else:
          if rpcRoutine:
            """ try to categorize by routine called """
            namespace, package = \
            self._crossRef.__categorizeVariableNameByNamespace__(rpcRoutine)
            if package:
              package.rpcs.append(rpcEntry)
              logging.info("Adding RPC: %s to Package: %s based on routine calls" %
                          (rpcEntry.name, package.getName()))
          else:
            logging.error("Cannot find package for RPC: %s" %
                          (rpcEntry.name))
        """ Generate the routine referenced based on RPC Call """
        if rpcRoutine:
          rpcInfo = {"name": rpcEntry.name,
                     "ien" : ien
                    }
          if '.02' in rpcEntry.fields:
            rpcTag = rpcEntry.fields['.02'].value
            rpcInfo['tag'] = rpcTag
          self._rtnRefDict.setdefault(rpcRoutine,{}).setdefault('8994',[]).append(rpcInfo)

  def _findInstallPackage(self,packageList, installEntryName,checkNamespace=True):
    package=None
    """
      checkNamespace is used by the "version change" check to match the
      package name in the install name but not the namespace in the install
      name, which should help eliminate multibuilds from being found as
      package changes
    """
    if checkNamespace:
      namespace, package = self._crossRef.__categorizeVariableNameByNamespace__(installEntryName)
    # A check to remove the mis-categorized installs which happen to fall in a namespace
    if installEntryName in INSTALL_PACKAGE_FIX:
      package = INSTALL_PACKAGE_FIX[installEntryName]
    # If it cannot match a package by namespace, capture the name via Regular Expression
    if package is None:
      pkgMatch = re.match("[A-Z./ \&\-\']+",installEntryName)
      if pkgMatch:
        # if a match is found, switch to title case and remove extra spaces
        targetName = pkgMatch.group(0).title().strip()
        # First check it against the list of package names
        if targetName in packageList:
          package = targetName
        # Then check it against the dictionary above for some odd spellings or capitalization
        elif targetName in INSTALL_RENAME_DICT:
          package = INSTALL_RENAME_DICT[targetName]
        # If all else fails, assign it to the "Unknown"
        else:
          package = "Unknown"
    package = str(package).strip()
    return package

  def _updateInstallReference(self):
    if not os.path.exists(self.outdir+"/9_7"):
      os.mkdir(self.outdir+"/9_7")
    installData = self._glbData['9.7']
    output = os.path.join(self.outdir, "install_information.json")
    installJSONData = {}
    packageList = self._crossRef.getAllPackages()
    patchOrderGen = PatchOrderGenerator()
    patchOrderGen.analyzeVistAPatchDir(self.patchDir +"/Packages")
    with open(output, 'w') as installDataOut:
      logging.warn("inside the _updateInstallReference")
      for ien in sorted(installData.dataEntries.keys(), key=lambda x: float(x)):
        installItem = {}
        installEntry = installData.dataEntries[ien]
        package = self._findInstallPackage(packageList, installEntry.name)
        # if this is the first time the package is found, add an entry in the install JSON data.
        if package not in installJSONData:
          installJSONData[package]={}
        if installEntry.name:
          logging.warn("Gathering info for: %s" % installEntry.name)
          installItem['name'] = installEntry.name
          installItem['ien'] = installEntry.ien
          installItem['label'] = installEntry.name
          installItem['value'] = installEntry.name
          installItem['parent']= package
          if installEntry.name in patchOrderGen._kidsDepBuildDict:
            installchildren = []
            for child in patchOrderGen._kidsDepBuildDict[installEntry.name]:
              childPackage = self._findInstallPackage(packageList,child)
              installchildren.append({"name": child, "package": childPackage});
            installItem['children'] = installchildren
          if '11' in installEntry.fields:
            installItem['installDate'] = installEntry.fields['11'].value.strftime("%Y-%m-%d")
          if '1' in installEntry.fields:
            installItem['packageLink'] = installEntry.fields['1'].value
          if '40' in installEntry.fields:
            installItem['numRoutines'] = len(installEntry.fields['40'].value.dataEntries)
          if '14' in installEntry.fields:
            installItem['numFiles'] = len(installEntry.fields['14'].value.dataEntries)
          # Checks for the absence of asterisks which usually denotes a package change, also make it more specific to
          # eliminate the multibuilds that are being marked as package changes
          testMatch = re.search("\*+",installEntry.name)
          if testMatch is None:
            # Assume a package switch name will be just a package name and a version
            capture = re.match("(?P<packageName>[A-Z./ \&\-\']+) (?P<packageVal>[.0-9]+)",installEntry.name)
            if capture:
                  checkPackage = self._findInstallPackage(packageList, capture.groups()[0],False)
                  if (not (checkPackage == "Unknown") or (len(capture.groups()[0]) <= 4 )):
                    installItem['packageSwitch'] = True
          installJSONData[package][installEntry.name] = installItem
      installJSONData['MultiBuild']={}
      for multiBuildFile in patchOrderGen._multiBuildDict:
        multibuildItem = {}
        multibuildItem['name']=os.path.basename(multiBuildFile);
        multibuildItem['children'] = []
        for installName in patchOrderGen._multiBuildDict[multiBuildFile]:
          package = self._findInstallPackage(packageList, installName)
          multibuildItem['children'].append({"name": installName, "package": package});
        installJSONData['MultiBuild'][os.path.basename(multiBuildFile)] = multibuildItem
      logging.warn("About to dump data into %s" % output)
      json.dump(installJSONData,installDataOut)

  def _resolveSelfPointer(self):
    """ Replace self-reference with meaningful data """
    for fileNo in self._pointerRef:
      if fileNo in self._glbData:
        fileData = self._glbData[fileNo]
        for ien, fields in self._pointerRef[fileNo].iteritems():
          if ien in fileData.dataEntries:
            name = fileData.dataEntries[ien].name
            if not name: name = str(ien)
            for field in fields:
              field.value = "^".join((field.value, name))
    del self._pointerRef
    self._pointerRef = {}

  def _parseFileDetail(self, dataEntry, ien):
    if 'GL' in dataEntry:
      loc = dataEntry['GL'].value
      loc = normalizeGlobalLocation(loc)
      self._glbLocMap[ien] = loc

  def _parseDataBySchema(self, dataRoot, fileSchema, outGlbData):
    """ first sort the schema Root by location """
    locFieldDict = sortSchemaByLocation(fileSchema)
    """ for each data entry, parse data by location """
    floatKey = getKeys(dataRoot, float)
    for ien in floatKey:
      if float(ien) <=0:
        continue
      dataEntry = dataRoot[ien]
      outDataEntry = FileManDataEntry(fileSchema.getFileNo(), ien)
      dataKeys = [x for x in dataEntry]
      sortedKey = sorted(dataKeys, cmp=sortDataEntryFloatFirst)
      for locKey in sortedKey:
        if locKey == '0' and fileSchema.getFileNo() == '1':
          self._parseFileDetail(dataEntry[locKey], ien)
        if locKey in locFieldDict:
          fieldDict = locFieldDict[locKey] # a dict of {pos: field}
          curDataRoot = dataEntry[locKey]
          if len(fieldDict) == 1:
            fieldAttr = fieldDict.values()[0]
            if fieldAttr.isSubFilePointerType(): # Multiple
              self._parseSubFileField(curDataRoot, fieldAttr, outDataEntry)
            else:
              self._parseSingleDataValueField(curDataRoot, fieldAttr,
                                              outDataEntry)
          else:
            self._parseDataValueField(curDataRoot, fieldDict, outDataEntry)
      outGlbData.addFileManDataEntry(ien, outDataEntry)
      if fileSchema.getFileNo() == self._curFileNo:
        self._addFileKeyIndex(self._curFileNo, ien, outDataEntry.name)

  def _parseSingleDataValueField(self, dataEntry, fieldAttr, outDataEntry):
    if not dataEntry.value:
      return
    values = dataEntry.value.split('^')
    location = fieldAttr.getLocation()
    dataValue = None
    if location:
      index, loc = location.split(';')
      if loc:
        if convertToType(loc, int):
          intLoc = int(loc)
          if intLoc > 0 and intLoc <= len(values):
            dataValue = values[intLoc-1]
        else:
          dataValue = str(dataEntry.value)
    else:
      dataValue = str(dataEntry.value)
    if dataValue:
      self._parseIndividualFieldDetail(dataValue, fieldAttr, outDataEntry)

  def _parseDataValueField(self, dataRoot, fieldDict, outDataEntry):
    if not dataRoot.value:
      return
    values = dataRoot.value.split('^')
    if not values: return # this is very import to check
    for idx, value in enumerate(values, 1):
      if value and str(idx) in fieldDict:
        fieldAttr = fieldDict[str(idx)]
        self._parseIndividualFieldDetail(value, fieldAttr, outDataEntry)

  def _parseIndividualFieldDetail(self, value, fieldAttr, outDataEntry):
    if not value.strip(' '):
      return
    value = value.strip(' ')
    fieldDetail = value
    pointerFileNo = None
    if fieldAttr.isSetType():
      setDict = fieldAttr.getSetMembers()
      if setDict and value in setDict:
        fieldDetail = setDict[value]
    elif fieldAttr.isFilePointerType() or fieldAttr.isVariablePointerType():
      fileNo = None
      ien = None
      if fieldAttr.isFilePointerType():
        filePointedTo = fieldAttr.getPointedToFile()
        if filePointedTo:
          fileNo = filePointedTo.getFileNo()
          ien = value
        else:
          fieldDetail = 'No Pointed to File'
      else: # for variable pointer type
        vpInfo = value.split(';')
        if len(vpInfo) != 2:
          logging.error("Unknown variable pointer format: %s" % value)
          fieldDetail = "Unknow Variable Pointer"
        else:
          fileNo = self.getFileNoByGlobalLocation(vpInfo[1])
          ien = vpInfo[0]
          if not fileNo:
            logging.warn("Could not find File for %s" % value)
            fieldDetail = 'Global Root: %s, IEN: %s' % (vpInfo[1], ien)
      if fileNo and ien:
        fieldDetail = '^'.join((fileNo, ien))
        idxName = self._getFileKeyIndex(fileNo, ien)
        if idxName:
          idxes = str(idxName).split('^')
          if len(idxes) == 1:
            fieldDetail = '^'.join((fieldDetail, str(idxName)))
          elif len(idxes) == 3:
            fieldDetail = '^'.join((fieldDetail, str(idxes[-1])))
        elif fileNo == self._curFileNo:
          pointerFileNo = fileNo
        else:
          logging.warn("Cannot find value for %s, %s" % (ien, fileNo))
    elif fieldAttr.getType() == FileManField.FIELD_TYPE_DATE_TIME: # datetime
      if value.find(',') >=0:
        fieldDetail = horologToDateTime(value)
      else:
        outDt = fmDtToPyDt(value)
        if outDt:
          fieldDetail = outDt
        else:
          logging.warn("Could not parse Date/Time: %s" % value)
    elif fieldAttr.getName().upper().startswith("TIMESTAMP"): # timestamp field
      if value.find(',') >=0:
        fieldDetail = horologToDateTime(value)
    if outDataEntry:
      dataField = FileManDataField(fieldAttr.getFieldNo(),
                                   fieldAttr.getType(),
                                   fieldAttr.getName(),
                                   fieldDetail)
      if pointerFileNo:
        self._addDataFieldToPointerRef(pointerFileNo, value, dataField)
      outDataEntry.addField(dataField)
      if fieldAttr.getFieldNo() == '.01':
        outDataEntry.name = fieldDetail
        outDataEntry.type = fieldAttr.getType()
    return fieldDetail

  def _addDataFieldToPointerRef(self, fileNo, ien, dataField):
    self._pointerRef.setdefault(fileNo, {}).setdefault(ien, set()).add(dataField)

  def _addFileKeyIndex(self, fileNo, ien, value):
    ienDict = self._fileKeyIndex.setdefault(fileNo, {})
    if ien not in ienDict:
      ienDict[ien] = value

  def _getFileKeyIndex(self, fileNo, ien):
    if fileNo in self._fileKeyIndex:
      if ien in self._fileKeyIndex[fileNo]:
        return self._fileKeyIndex[fileNo][ien]
    return None

  def _addFileFieldMap(self, fileNo, ien, value):
    fldDict = self._fileKeyIndex.setdefault(fileNo, {})
    if ien not in ienDict:
      ienDict[ien] = value

  def _parseSubFileField(self, dataRoot, fieldAttr, outDataEntry):
    logging.debug ("%s" % (fieldAttr.getName() + ':'))
    subFile = fieldAttr.getPointedToSubFile()
    if fieldAttr.hasSubType(FileManField.FIELD_TYPE_WORD_PROCESSING):
      outLst = self._parsingWordProcessingNode(dataRoot)
      outDataEntry.addField(FileManDataField(fieldAttr.getFieldNo(),
                                       FileManField.FIELD_TYPE_WORD_PROCESSING,
                                       fieldAttr.getName(),
                                       outLst))
    elif subFile:
      subFileData = FileManFileData(subFile.getFileNo(),
                                    subFile.getFileManName())
      self._parseDataBySchema(dataRoot, subFile, subFileData)
      outDataEntry.addField(FileManDataField(fieldAttr.getFieldNo(),
                                        FileManField.FIELD_TYPE_SUBFILE_POINTER,
                                        fieldAttr.getName(),
                                        subFileData))
    else:
      logging.info ("Sorry, do not know how to intepret the schema %s" %
                    fieldAttr)

  def _parsingWordProcessingNode(self, dataRoot):
    outLst = []
    for key in getKeys(dataRoot, int):
      if '0' in dataRoot[key]:
        outLst.append("%s" % dataRoot[key]['0'].value)
    return outLst
Exemple #7
0
def getAllSchema(allFiles):
    schemaParser = FileManSchemaParser()
    allSchemaDict = schemaParser.parseSchemaDDFileV2(allFiles['0']['path'])
    return allSchemaDict