def reportInterfaceAssembly(): rpcInterfaceDefinition = json.load( open("../Definitions/rpcInterfaceDefinition.bjsn"), object_pairs_hook=OrderedDict) mu = """## VA VistA RPC Interface Definition """ mu += muRPCInterfaceDefinition(rpcInterfaceDefinition) # Add in information on Sources cntSSOs = Counter() cntFromSSO = Counter() for defn in rpcInterfaceDefinition: for sso in defn["inVistAs"]: cntSSOs[sso] += 1 cntFromSSO[defn["fromVistA"]] += 1 mu += """### Source Information for Integrated Definition The integrated interface definition combines RPC definitions from multiple real VistA and FOIA. Overall __{}__ RPCs are not in FOIA. """.format( reportAbsAndPercent( len(rpcInterfaceDefinition) - cntSSOs["999"], len(rpcInterfaceDefinition))) tbl = MarkdownTable(["Station", "RPCs", "Definition Contribution"]) for sso in sorted(cntSSOs): tbl.addRow([sso, cntSSOs[sso], cntFromSSO[sso]]) mu += tbl.md() + "\n\n" print mu open("../Reports/rpcInterfaceDefinition.md", "w").write(mu)
def reportRPCOptions(stationNo): mu = """## RPC Options of {} Using _Active, Used RPC Options_ to subset 8994 and Build named RPCs. Expect a __15% reduction__ if we add a requirement that an RPC needs to belong to [1] an active option [2] belonging to a recently signed on user. """.format(stationNo) if stationNo == "999": mu += "__Exception: with its inconsisent builds and 8994 and its lack of user sign ons and types, FOIA (999) does not follow a regular VistA pattern so many observations below don't apply to it.__\n\n" # Raw RPC Broker Options to give # that don't have "rpcs" (purely for reporting) _19Reductions = json.load(open(VISTA_RED_LOCN_TEMPL.format(stationNo) + "_19Reduction.json")) """ Form: {"label": rpc, "options": [{"label" "isRemoved", sUsersCount, usersCount}] ... note: not ALL broker options as some have no RPCs (must get from raw 19) and will flip to byOption: {"label" (option), ... "rpcs": []} ie/ so options under rpcs and rpcs under options and then two sets: activeNUsedOptions and the RPCs of those options """ rpcOptionsWithUse = json.load(open(VISTA_RED_LOCN_TEMPL.format(stationNo) + "_rpcOptionsWithUse.json")) rpcOptionInfoByLabel = {} # includes RPCs of options for roi in rpcOptionsWithUse: rpc = roi["label"] for oi in roi["options"]: if oi["label"] not in rpcOptionInfoByLabel: rpcOptionInfoByLabel[oi["label"]] = oi # flip oi["rpcs"] = [] rpcOptionInfoByLabel[oi["label"]]["rpcs"].append(roi["label"]) activeUsedOptions = set(option for option in rpcOptionInfoByLabel if "isRemoved" not in rpcOptionInfoByLabel[option] and "sUsersCount" in rpcOptionInfoByLabel[option]) rpcsOfActiveUsedOptions = set(rpc for option in activeUsedOptions for rpc in rpcOptionInfoByLabel[option]["rpcs"]) mu += """There are {:,} RPC Broker options, {:,} of which name __{:,}__ RPCs. {:,} of these options are marked 'deleted', leaving __{:,}__ of such option-backed RPCs. A further {:,} options are not assigned to an active, recently signed on user - of these, {:,} had older, no longer active users. When those without signed-on users are removed, we're left with __{}__ RPCs backed by __{:,}__ active options with users who recently signed on. __Note__: options _{}_ require keys and {:,} options have Proxy Users - both need testing and analysis. """.format( len(_19Reductions), len(rpcOptionInfoByLabel), len(rpcOptionsWithUse), sum(1 for option in rpcOptionInfoByLabel if "isRemoved" in rpcOptionInfoByLabel[option]), len(set(rpc for option in rpcOptionInfoByLabel if "isRemoved" not in rpcOptionInfoByLabel[option] for rpc in rpcOptionInfoByLabel[option]["rpcs"])), sum(1 for option in rpcOptionInfoByLabel if not ("isRemoved" in rpcOptionInfoByLabel[option] or "sUsersCount" in rpcOptionInfoByLabel[option])), sum(1 for option in rpcOptionInfoByLabel if "usersCount" in rpcOptionInfoByLabel[option] and "sUsersCount" not in rpcOptionInfoByLabel[option]), reportAbsAndPercent( len(rpcsOfActiveUsedOptions), len(rpcOptionsWithUse) ), len(activeUsedOptions), ", ".join(sorted(["\"{}\"".format(option) for option in rpcOptionInfoByLabel if "keyRequired" in rpcOptionInfoByLabel[option] and "sUsersCount" in rpcOptionInfoByLabel[option]])), sum(1 for option in rpcOptionInfoByLabel if "proxyUsersCount" in rpcOptionInfoByLabel[option]) ) """ Let's see how many of these option RPCs are """ bpis = json.load(open(VISTA_RED_LOCN_TEMPL.format(stationNo) + "_rpcBPIs.json")) _buildActiveRPCs = set(re.sub(r'\_', '/', bpi["label"]) for bpi in bpis if "isDeleted" not in bpi) _inOptionsButNotInBuilds = set(rpc for rpc in rpcsOfActiveUsedOptions if rpc not in _buildActiveRPCs) # few _inBuildsButNotOptions = set(rpc for rpc in _buildActiveRPCs if rpc not in rpcsOfActiveUsedOptions) _8994Reduction = json.load(open(VISTA_RED_LOCN_TEMPL.format(stationNo) + "_8994Reduction.json")) _8994Labels = set(re.sub(r'\_', '/', red["label"]) for red in _8994Reduction) _allBuildActiveAnd8994RPCs = _buildActiveRPCs.union(_8994Labels) _inOptionsButNot8994 = set(rpc for rpc in rpcsOfActiveUsedOptions if rpc not in _8994Labels) _in8994ButNotOptions = set(rpc for rpc in _8994Labels if rpc not in rpcsOfActiveUsedOptions) mu += """When compared to _Build RPCs_ and _8994 RPCs_: * Installed Builds name __{}__ RPCs not in used options while those options name __{:,}__ RPCs not in these Builds ({}). * 8994 defines __{}__ RPCs not in user options while those options name __{:,}__ RPCs not in 8994. __Conclusion:__ _Used Options_ reduce the __{:,}__ RPCs named by both Builds and 8994s to __{}__. """.format( reportAbsAndPercent(len(_inBuildsButNotOptions), len(_buildActiveRPCs)), len(_inOptionsButNotInBuilds), ", ".join(["\"{}\"".format(rpc) for rpc in _inOptionsButNotInBuilds]), reportAbsAndPercent(len(_in8994ButNotOptions), len(_8994Labels)), len(_inOptionsButNot8994), len(_allBuildActiveAnd8994RPCs), reportAbsAndPercent(len(rpcsOfActiveUsedOptions), len(_allBuildActiveAnd8994RPCs)) ) # Show Active RPC Option details cols = ["Option", "RPC \#", "Exclusive RPC \#"] if stationNo != "999": cols.append("\# User / SO / SO0 / Proxy") tbl = MarkdownTable(cols) for option in sorted(activeUsedOptions, key=lambda x: len(rpcOptionInfoByLabel[x]["rpcs"]) if stationNo == "999" else rpcOptionInfoByLabel[x]["sUsersCount"], reverse=True): rpcsOfOtherOptions = set(rpc for ooption in activeUsedOptions if ooption != option for rpc in rpcOptionInfoByLabel[ooption]["rpcs"]) # of other ACTIVE/SO options! exclusiveRPCCount = sum(1 for rpc in rpcOptionInfoByLabel[option]["rpcs"] if rpc not in rpcsOfOtherOptions) row = ["__{}__".format(option), len(rpcOptionInfoByLabel[option]["rpcs"]), exclusiveRPCCount] if stationNo != "999": optionInfo = rpcOptionInfoByLabel[option] userCountMU = optionInfo["usersCount"] _0SUsersCountMU = "{:,}".format(optionInfo["_0SUsersCount"]) if "_0SUsersCount" in optionInfo else "-" proxyUserCountMU = "{:,}".format(rpcOptionInfoByLabel[option]["proxyUsersCount"]) if "proxyUsersCount" in rpcOptionInfoByLabel[option] else "-" userCountMU = "{:,} / {:,} / {} / {}".format(userCountMU, optionInfo["sUsersCount"], _0SUsersCountMU, proxyUserCountMU) row.append(userCountMU) tbl.addRow(row) mu += "{:,} Active, SO User Options ...\n\n".format(len(activeUsedOptions)) mu += tbl.md() + "\n\n" # Excluded Options, their RPCs, exclusive or otherwise excludedOptions = set(option for option in rpcOptionInfoByLabel if "isRemoved" in rpcOptionInfoByLabel[option] or "sUsersCount" not in rpcOptionInfoByLabel[option]) rpcsOfExcludedOptions = set(rpc for option in excludedOptions for rpc in rpcOptionInfoByLabel[option]["rpcs"]) rpcsExclusiveToExcludedOptions = rpcsOfExcludedOptions - rpcsOfActiveUsedOptions tbl = MarkdownTable(["Option", "RPC \#", "E+E RPC \#", "(No SO) User \#", "Is Deleted"]) for option in sorted(excludedOptions, key=lambda x: len(rpcOptionInfoByLabel[x]["rpcs"]), reverse=True): oInfo = rpcOptionInfoByLabel[option] userCountMU = oInfo["usersCount"] if "usersCount" in oInfo else "" isRemovedMU = "__YES__" if "isRemoved" in oInfo else "" exclusiveExcludedRPCCount = sum(1 for rpc in oInfo["rpcs"] if rpc in rpcsExclusiveToExcludedOptions) exclusiveExcludedRPCCountMU = exclusiveExcludedRPCCount if exclusiveExcludedRPCCount > 0 else "" tbl.addRow([option, len(oInfo["rpcs"]), exclusiveExcludedRPCCountMU, userCountMU, isRemovedMU]) mu += "{:,} Excluded (removed or no SO User) Options with {:,} RPCs, {:,} of which don't appear in active options. Note that only a small minority of these options are formally deleted ...\n\n".format(len(excludedOptions), len(rpcsOfExcludedOptions), len(rpcsExclusiveToExcludedOptions)) mu += tbl.md() + "\n\n" mu += """__TODO__: * Enhance: Add Build data for options using option info in builds => see first introduction etc * Besides the CPRS option, pay attention to Active/SO options with a high proproportion of 0 users: MAG WINDOWS, CAPRI, MAGJ VISTARAD WINDOWS, KPA VRAM GUI, VPR APPLICATION PROXY * Focus on options with many 'Exclusive RPCs' like CAPRI, MAG DICOM VISA, YS BROKER1, R1SDCI and others which also have a highish number of users - unlike the OVERLAPPING options, these introduce whole new sets of RPCs * SO0 is responsible for most of the logins for many of the most significant SO's (MAG, KPA etc) * PROXY users (see user class in user reduction): see the proxy users count. If close to all then very special option * Implication of DELETING Excluded Options and their exclusive RPCs - reducing VistA size """ open(VISTA_REP_LOCN_TEMPL.format(stationNo) + "rpcOptions.md", "w").write(mu)
def reportRPCOptionDetails(stationNo): if stationNo == "999": raise Exception("Can't support FOIA 999 as not enough representative sign ons") # Four inputs: users (with sign ons inside), RPC options, Apps and Options userInfos = json.load(open(VISTA_RED_LOCN_TEMPL.format(stationNo) + "_200Reduction.json")) _19Reductions = json.load(open(VISTA_RED_LOCN_TEMPL.format(stationNo) + "_19Reduction.json")) rpcsOfOptions = dict((_19Reduction["label"], _19Reduction["rpcs"]) for _19Reduction in _19Reductions if "rpcs" in _19Reduction) remoteAppsAndOptions = dict((_8994_5Reduction["label"], _8994_5Reduction["option"]) for _8994_5Reduction in json.load(open(VISTA_RED_LOCN_TEMPL.format(stationNo) + "_8994_5Reduction.json"))) moUserCounts = Counter() mo0UserCount = Counter() moNon0UserCount = Counter() qualifierMOUserCountByMO = defaultdict(lambda: Counter()) moAloneUserCounts = Counter() menuOptionCombinationCounts = defaultdict(lambda: Counter()) # ie/ # of combos expected jmoUserCounts = Counter() cntSOUsers = 0 cntSOUsersRPCOptions = 0 cntSOUsersNoRPCOptions = 0 cntNon0SOUsersRPCOptions = 0 for userInfo in userInfos: if "signOnCount" not in userInfo: continue cntSOUsers += 1 if "menuOptions" not in userInfo: raise Exception("Expect ALL signed on users to have at least one menu option") # Leaving in "userClasses"/ proxies userHasRPCMenuOption = False jMenuOptions = "/".join([mo for mo in sorted(userInfo["menuOptions"]) if mo in rpcsOfOptions]) if jMenuOptions != "": jmoUserCounts[jMenuOptions] += 1 for menuOption in userInfo["menuOptions"]: if menuOption not in rpcsOfOptions: continue if menuOption == "CG FMQL QP USER": continue moUserCounts[menuOption] += 1 if "isCreatedBy0" in userInfo: mo0UserCount[menuOption] += 1 else: moNon0UserCount[menuOption] += 1 userHasRPCMenuOption = True combined = False for cmenuOption in userInfo["menuOptions"]: if cmenuOption not in rpcsOfOptions: continue if cmenuOption == menuOption: continue if cmenuOption == "CG FMQL QP USER": continue qualifierMOUserCountByMO[menuOption][cmenuOption] += 1 combined = True if not combined: moAloneUserCounts[menuOption] += 1 else: menuOptionCombinationCounts[menuOption][len(userInfo["menuOptions"]) - 1] += 1 # There are SO users with no RPC menu options (important subset!) if userHasRPCMenuOption: cntSOUsersRPCOptions += 1 if "isCreatedBy0" not in userInfo: cntNon0SOUsersRPCOptions += 1 else: cntSOUsersNoRPCOptions += 1 pureAlones = set(mo for mo in moAloneUserCounts if float(moAloneUserCounts[mo])/float(moUserCounts[mo]) > 0.15) pureQualifierOptions = set(mo for mo in moUserCounts if mo not in moAloneUserCounts) otherQualifierOptions = set(mo for mo in moUserCounts if mo in moAloneUserCounts and float(moAloneUserCounts[mo])/float(moUserCounts[mo]) <= 0.15) allQualifierOptions = pureQualifierOptions.union(otherQualifierOptions) mu = """## RPC Options of {} Classified Based on active (SO) users use. """.format(stationNo) tbl = MarkdownTable(["Type", "\#"]) tbl.addRow(["Users", len(userInfos)]) tbl.addRow(["SO Users", cntSOUsers]) tbl.addRow(["SO Users with RPC Options", cntSOUsersRPCOptions]) tbl.addRow(["SO Users with other than RPC Options", cntSOUsersNoRPCOptions]) tbl.addRow(["Zero SO Users w/RPC Options", cntSOUsersRPCOptions - cntNon0SOUsersRPCOptions]) tbl.addRow(["Non Zero SO Users w/RPC Options", cntNon0SOUsersRPCOptions]) tbl.addRow(["RPC Options", len(rpcsOfOptions)]) tbl.addRow(["Used RPC Options", len(moUserCounts)]) tbl.addRow(["Zero SO Users RPC Options", len(mo0UserCount)]) tbl.addRow(["Non Zero SO Users RPC Options", len(moNon0UserCount)]) tbl.addRow(["Pure Alone Options", len(pureAlones)]) tbl.addRow(["Singleton Alone Options (NEVER COMBINED)", sum(1 for mo in moUserCounts if mo not in qualifierMOUserCountByMO)]) tbl.addRow(["Pure Qualifier Options - NEVER on their own", len(pureQualifierOptions)]) tbl.addRow(["Other Qualifier Options - < 15% on their own (so not _Pure Alones_)", len(otherQualifierOptions)]) mu += tbl.md() + "\n\n" optionsWithGT1PRCTUsers = set(option for option in moUserCounts if float(moUserCounts[option])/float(cntSOUsersRPCOptions) > 0.01) mu += """Despite there being __{:,}__ employed RPC Options, only __{:,}__ are used by more than 1% of sign on users, the vast majority of which are remote/0 users. """.format(len(moUserCounts), len(optionsWithGT1PRCTUsers)) tbl = MarkdownTable(["Option", "Total Users", "0 Users", "Non 0 Users", "RPCs"]) for mo in sorted(list(optionsWithGT1PRCTUsers), key=lambda x: moUserCounts[x], reverse=True): tbl.addRow([ mo, moUserCounts[mo], reportAbsAndPercent(mo0UserCount[mo], moUserCounts[mo]) if mo in mo0UserCount else "", reportAbsAndPercent(moNon0UserCount[mo], moUserCounts[mo]) if mo in moNon0UserCount else "", len(rpcsOfOptions[mo]) ]) mu += tbl.md() + "\n\n" mu += "The {:,} active Non zero users need separate consideration. The top non Zero user options - > 10% non Zeros have it - are listed below. Note that other than the mainstream options, most are highlighted as they are mainly in Non Zero users\n\n".format(cntNon0SOUsersRPCOptions) optionsWithGT10PRCTUsers = set(option for option in moNon0UserCount if float(moNon0UserCount[option])/float(cntNon0SOUsersRPCOptions) > 0.1) tbl = MarkdownTable(["Option", "Non 0 Users", "Of Total", "RPCs"]) for mo in sorted(list(optionsWithGT10PRCTUsers), key=lambda x: moNon0UserCount[x], reverse=True): level = round(float(moNon0UserCount[mo])/float(moUserCounts[mo]), 2) tbl.addRow([ "__{}__".format(mo) if level > 0.75 else mo, reportAbsAndPercent(moNon0UserCount[mo], cntNon0SOUsersRPCOptions), level, len(rpcsOfOptions[mo]) ]) mu += tbl.md() + "\n\n" mu += "\nThere are {:,} _Pure Alones_, options that can exist on their own (> 15% of users with them have only them)\n\n".format(len(pureAlones)) tbl = MarkdownTable(["Option", "Total Users", "0 Users", "Alone Users", "CPRS Combos", "Other Combos", "Quals", "Alone Quals", "Top Quals", "RPCs"]) for i, mo in enumerate(sorted(list(pureAlones), key=lambda x: moUserCounts[x], reverse=True), 1): cntNonCPRSQuals = sum(jmoUserCounts[jmo] for jmo in jmoUserCounts if not re.search(r'OR CPRS GUI CHART', jmo) and jmo != mo and re.search(mo, jmo)) thres = .1 if moUserCounts[mo] > 100 else .9 topQualsMU = "" if mo in qualifierMOUserCountByMO: topQuals = sorted([cmo for cmo in qualifierMOUserCountByMO[mo] if float(qualifierMOUserCountByMO[mo][cmo])/float(moUserCounts[mo]) > thres], key=lambda x: qualifierMOUserCountByMO[mo][x], reverse=True) if len(topQuals): topQualsMU = ", ".join(["{} ({:,})".format(cmo, qualifierMOUserCountByMO[mo][cmo]) for cmo in topQuals]) _0Level = float(mo0UserCount[mo])/float(moUserCounts[mo]) tbl.addRow([ "__{}__".format(mo) if _0Level > 0.5 else mo, # only highlight if a lot of 0's moUserCounts[mo], reportAbsAndPercent(mo0UserCount[mo], moUserCounts[mo]) if mo in mo0UserCount else "", reportAbsAndPercent(moAloneUserCounts[mo], moUserCounts[mo]), reportAbsAndPercent( qualifierMOUserCountByMO[mo]["OR CPRS GUI CHART"], moUserCounts[mo] ) if mo in qualifierMOUserCountByMO and "OR CPRS GUI CHART" in qualifierMOUserCountByMO[mo] else "", reportAbsAndPercent( cntNonCPRSQuals, moUserCounts[mo] ) if cntNonCPRSQuals > 0 else "", len(qualifierMOUserCountByMO[mo].keys()), reportAbsAndPercent( sum(1 for cmo in qualifierMOUserCountByMO[mo] if cmo in pureAlones), len(qualifierMOUserCountByMO[mo].keys()) ), topQualsMU, len(rpcsOfOptions[mo]) ]) mu += tbl.md() + "\n\n" mu += """__Note__ (mostly put into the Source Artifact manualRPCApplications): * _KPA VRAM GUI_ belongs to __VistA Remote Access Management (VRAM) Graphical User Interface (GUI)__ according to this [patch](https://github.com/OSEHRA/VistA/blob/master/Packages/Kernel/Patches/XU_8.0_629/XU-8_SEQ-502_PAT-629.TXT). It has a 8995 application entry and seems to sync credentials from the VBA 'VistA' to a local VistA - check out the RPCs it allows. Note that half its users are stand alone while the rest use CAPRI and very few use CPRS. Note too that this option DOES NOT HAVE MANY QUALIFIERS (unlike other 'alones') * _MAGJ VISTARAD WINDOWS_ is a __VistARad__ option according [to](https://www.va.gov/vdl/documents/clinical/vista_imaging_sys/imginstallgd_f.pdf). Additionally, note that the _Rad/Nuc Med Personnel menu_ defines further user permissions (where stored?) and there are a series of security keys guarding actions. Note that this option DOES NOT HAVE MANY QUALIFIERS (unlike other 'alones') * _MAG WINDOWS_ for __VistA Imaging and Capture Software__ according to [this](https://www.va.gov/vdl/documents/clinical/vista_imaging_sys/imginstallgd_f.pdf). Note that ala Rad, there are keys to further restrict options. * _DSIY ABOVE PAR_ belongs to __Above PAR (APAR)__ by the [TRM](https://www.oit.va.gov/Services/TRM/ToolPage.aspx?tid=7725) * _RMPR PURCHASE ORDER GUI_ is part of __PROSTHETICS PURCHASE ORDER GUI__ * _OOP GUI EMPLOYEE_ is from __ASISTS__ which is being decommissioned in Jan 2019. """ mu += "The balance of the \"Qualifier\" Options are defined at the end of this report.\n\n" # 8994_5 and use mu += "### File 8994_5 Applications and their options\n\n" mu += """File 8994_5 defines 'Remote Applications'. Each is given a (default) option. There are {:,} applications using/sharing {:,} options. Note that {:,} of these options are NOT RPC options and {:,} are not assigned to any active user. Note that _JLV_ (for now) lacks an entry here or its own option (it uses CPRS, CAPRI and VPR options). The following shows the applications by option ... """.format( len(remoteAppsAndOptions), len(set(remoteAppsAndOptions.values())), sum(1 for option in set(remoteAppsAndOptions.values()) if option not in rpcsOfOptions), sum(1 for option in set(remoteAppsAndOptions.values()) if option not in moUserCounts) ) byOption = defaultdict(list) for label, option in remoteAppsAndOptions.iteritems(): byOption[option].append(label) tbl = MarkdownTable(["Option", "RPCs", "Users", "Applications"]) for option in sorted(byOption): optionMU = "__{}__".format(option) if option in moUserCounts else option tbl.addRow([optionMU, "NO" if option not in rpcsOfOptions else "", moUserCounts[option] if option in moUserCounts else "", ", ".join(sorted(byOption[option]))]) mu += tbl.md() + "\n\n" stats = {} for userInfo in userInfos: if "signOnCount" not in userInfo: continue if "menuOptions" not in userInfo: raise Exception("Expect ALL signed on users to have at least one menu option") # Leaving in "userClasses" / proxies rpcMOs = [mo for mo in userInfo["menuOptions"] if mo in rpcsOfOptions] if not len(rpcMOs): continue if "remoteApps" in userInfo["signOnDetails"]: for rapp in userInfo["signOnDetails"]["remoteApps"]: if rapp not in remoteAppsAndOptions: raise Exception("New Unexpected Remote App {}".format(rapp)) # for lbl, app, mo in appMOPairs: for app, mo in remoteAppsAndOptions.iteritems(): lbl = app if lbl not in stats: stats[lbl] = {"moLabel": mo, "appLabel": app, "mo": set(), "app": set(), "app0User": 0} if mo in rpcMOs: stats[lbl]["mo"].add(userInfo["userId"]) if "remoteApps" in userInfo["signOnDetails"] and app in userInfo["signOnDetails"]["remoteApps"]: stats[lbl]["app"].add(userInfo["userId"]) if "isCreatedBy0" in userInfo: stats[lbl]["app0User"] += 1 # note if app use by 0 user tblRowCount = 0 tbl = MarkdownTable(["App", "Option", "App Users", "App 0 Users", "App+MO", "!App MO", "App !MO"]) for lbl in sorted(stats, key=lambda x: len(stats[x]["app"]), reverse=True): stat = stats[lbl] # clean if all mo and no app if len(stat["app"]) == 0: continue if float(len(stats[lbl]["mo"].intersection(stats[lbl]["app"])))/float(len(stats[lbl]["app"])) < 0.9: moAndAppMU = "{:,} [UNDER MATCH]".format(len(stats[lbl]["mo"].intersection(stats[lbl]["app"]))) else: moAndAppMU = reportAbsAndPercent(len(stats[lbl]["mo"].intersection(stats[lbl]["app"])), len(stats[lbl]["app"])) moNoApp = len(stats[lbl]["mo"] - stats[lbl]["app"]) moNoAppMU = "{:,} [APP-OPTION MATCH]".format(moNoApp) if float(moNoApp)/float(len(stat["app"])) < 0.1 else moNoApp row = [ stat["appLabel"], stat["moLabel"], len(stat["app"]), reportAbsAndPercent(stat["app0User"], len(stat["app"])), moAndAppMU, moNoAppMU, len(stats[lbl]["app"] - stats[lbl]["mo"]) ] tbl.addRow(row) tblRowCount += 1 mu += """What 8994.5 applications are used? It's {:,} out of the {:,}. What option best matches an app - does the __presence of an option predict the use of a (8994.5) application?__ Note that even 8994.5 shows option sharing and such sharing is borne out in the table of signon and user information below. The low counts in the _App !MO_ column shows that the apps are good indicators that an option is present but in general _!App MO_ shows many cases where an option is too broadly given (CPRS, MAG WINDOWS ...) to predict app use. _VRAM_ is the only clear exception though VISTARAD and its singular option is probably an exception too. Note that _DVBA CAPRI GUI_ is the only _qualifier_ option here. It is actually a _stand alone_ but is always paired with _OR CPRS GUI CHART_ by the CAPRI-style setup code. It's high _!App MO_ count is because of this pairing which is used by JLV and other apps. """.format(tblRowCount, len(remoteAppsAndOptions)) mu += tbl.md() + "\n\n" # Back to Qualifier Details tblMU = MarkdownTable(["Option", "Total Users", "0 Users", "Others Quals", "Alone Quals", "Top Quals", "RPCs"]) tblLU = MarkdownTable(["Option", "Total Users", "0 Users", "Others Quals", "Alone Quals", "Top Quals", "RPCs"]) lessUsedThreshold = 30 tblMUCount = 0 tblLUCount = 0 for i, mo in enumerate(sorted(list(allQualifierOptions), key=lambda x: moUserCounts[x], reverse=True), 1): row = [ "__{}__".format(mo) if len(qualifierMOUserCountByMO[mo]) < moUserCounts[mo] else mo, moUserCounts[mo], reportAbsAndPercent(mo0UserCount[mo], moUserCounts[mo]) if mo in mo0UserCount else "", len(qualifierMOUserCountByMO[mo]), sum(1 for cmo in qualifierMOUserCountByMO[mo] if cmo in pureAlones) ] tcmus = [] for cmo in sorted(qualifierMOUserCountByMO[mo], key=lambda x: qualifierMOUserCountByMO[mo][x], reverse=True): level = round(float(qualifierMOUserCountByMO[mo][cmo]) / float(moUserCounts[mo]), 2) # want Alones or High Match if not (level > 0.25 or cmo in pureAlones): continue if level == 1: tcmu = "__{}__ (ALL)".format(cmo) elif cmo in moAloneUserCounts: tcmu = "__{}__ ({})".format(cmo, level) else: tcmu = "{} ({})".format(cmo, level) tcmus.append(tcmu) if len(tcmus) < len(qualifierMOUserCountByMO[mo]): tcmus.append("...") row.append(", ".join(tcmus)) row.append(len(rpcsOfOptions[mo])) if moUserCounts[mo] < lessUsedThreshold: tblLU.addRow(row) tblLUCount += 1 continue tblMU.addRow(row) tblMUCount += 1 # Back to Qualifiers mu += "### {:,} Qualifier Option Details\n\n".format(tblMUCount + tblLUCount) mu += "There are {:,} more used (> {:,} users) qualifiers. Those with more users than other qualifiers are highlighted as are combinations with primary/alone options ...\n\n".format(tblMUCount, lessUsedThreshold) mu += tblMU.md() + "\n\n" mu += "There are {:,} less used (< {:,} users) qualifiers ...\n\n".format(tblLUCount, lessUsedThreshold) mu += tblLU.md() + "\n\n" byCombo = Counter() for userInfo in userInfos: if "signOnCount" not in userInfo: continue if "menuOptions" not in userInfo: continue # note: leaving in userClasses if sum(1 for mo in userInfo["menuOptions"] if mo in rpcsOfOptions) == 0: continue if "CG FMQL QP USER" in userInfo["menuOptions"]: continue if sum(1 for mo in userInfo["menuOptions"] if mo in moAloneUserCounts) == 0: combo = "/".join(sorted([mo for mo in userInfo["menuOptions"] if mo in moUserCounts])) byCombo[combo] += 1 if len(byCombo): mu += """There are {:,} users w/o Alones - ie/ their 'apps' are option combos. """.format(sum(byCombo[combo] for combo in byCombo)) tbl = MarkdownTable(["Combination", "Users"]) for combo in sorted(byCombo, key=lambda x: byCombo[x], reverse=True): tbl.addRow([combo, byCombo[combo]]) mu += tbl.md() + "\n\n" qualifiersWithoutAlones = [mo for mo in allQualifierOptions if sum(1 for cmo in qualifierMOUserCountByMO[mo] if cmo in moAloneUserCounts) == 0] if len(qualifiersWithoutAlones): mu += "__Note__: the following Qualifiers (ie/ not alones) are NOT combined with Alones: {}\n\n\n".format(", ".join(qualifiersWithoutAlones)) open(VISTA_REP_LOCN_TEMPL.format(stationNo) + "rpcOptionDetails.md", "w").write(mu) """
def assembleIntegrated(): rpcInterfaceDefinitionBySNO = {} lastInstallBySNO = {} for sno in SNOS: rpcInterfaceDefinitionBySNO[sno] = json.load( open( VISTA_RPCD_LOCN_TEMPL.format(sno) + "_rpcInterfaceDefinition.json")) installs = set( defn["installed"].split("T")[0] for defn in rpcInterfaceDefinitionBySNO[sno] if "installed" in defn and defn["label"] != "CG FMQL QP") lastInstallBySNO[sno] = sorted(list(installs))[-1] rpcDefinitionsById = {} for sno in sorted(SNOS, key=lambda x: lastInstallBySNO[x] if x != "999" else "1900-01-01", reverse=True): for rpcDefinition in rpcInterfaceDefinitionBySNO[sno]: rpc = rpcDefinition["label"] if rpc in rpcDefinitionsById: rpcDefinitionsById[rpc]["inVistAs"].append(sno) continue rpcDefinition["fromVistA"] = sno rpcDefinition["inVistAs"] = [sno] rpcDefinitionsById[rpc] = rpcDefinition integratedRPCInterfaceDefinition = sorted( [rpcDefinitionsById[rpc] for rpc in rpcDefinitionsById], key=lambda x: x["label"]) print "Integrated Definitions: __{:,}__\n".format( len(integratedRPCInterfaceDefinition)) for sno in sorted(SNOS): print " * {}: {} / Last: {} / {}".format( sno if sno != "999" else "FOIA (999)", reportAbsAndPercent( sum(1 for defn in integratedRPCInterfaceDefinition if defn["fromVistA"] == sno), len(integratedRPCInterfaceDefinition)), lastInstallBySNO[sno], reportAbsAndPercent(len(rpcInterfaceDefinitionBySNO[sno]), len(integratedRPCInterfaceDefinition)), ) # Could add appearances ie/ in 1, 2 or 3 iActives = set(defn["label"] for defn in integratedRPCInterfaceDefinition if "isActive" in defn) print "\nActive: __{}__\n".format( reportAbsAndPercent(len(iActives), len(integratedRPCInterfaceDefinition))) for sno in sorted(SNOS): sActives = set(defn["label"] for defn in rpcInterfaceDefinitionBySNO[sno] if "isActive" in defn) print " * {}: {:,} - not I {:,}".format( sno if sno != "999" else "FOIA (999)", len(sActives), len(sActives - iActives) # 0 if base! ) print json.dump(integratedRPCInterfaceDefinition, open("../Definitions/rpcInterfaceDefinition.bjsn", "w"), indent=4)
def reportBuildsNInstallsOld(stationNo): buildsReduction = json.load( open(VISTA_RED_LOCN_TEMPL.format(stationNo) + "_9_6Reduction.json")) buildsRPCReduction = [bi for bi in buildsReduction if "rpcs" in bi] # For report - will OVERRIDE based on ACTIVE from Builds or Not (soon options too) _8994Reduction = json.load( open(VISTA_RED_LOCN_TEMPL.format(stationNo) + "_8994Reduction.json")) _8994Labels = set(red["label"] for red in _8994Reduction) buildsByRPC = defaultdict(list) buildsWNewRPCReduction = [] buildsByPackage = defaultdict(list) packagesByRPC = defaultdict(lambda: Counter()) # want latest to come out dateDistributeds = [] countBuildsByYr = Counter() countNewRPCBuildsByYr = Counter() # Builds per RPC form an "audit" trail of RPC introduction, change and deletion rpcsSeen = set() for buildInfo in buildsRPCReduction: newRPCSeen = False if "package" in buildInfo: buildsByPackage[buildInfo["package"]].append(buildInfo) if "dateInstalledFirst" in buildInfo: installed = buildInfo["dateInstalledFirst"] for actionType in buildInfo["rpcs"]: for rpc in buildInfo["rpcs"][actionType]: info = {"build": buildInfo["label"], "action": actionType} if "package" in buildInfo: info["package"] = buildInfo["package"] packagesByRPC[rpc][buildInfo["package"]] += 1 if "dateDistributed" in buildInfo: info["distributed"] = buildInfo["dateDistributed"] if rpc not in rpcsSeen: newRPCSeen = True rpcsSeen.add(rpc) if "dateInstalledFirst" in buildInfo: info["installed"] = installed buildsByRPC[rpc].append(info) if newRPCSeen: buildsWNewRPCReduction.append(buildInfo) if "dateDistributed" in buildInfo: distributed = buildInfo["dateDistributed"] if not re.search(r'FMQL', buildInfo["label"]): dateDistributeds.append(distributed) countBuildsByYr[distributed.split("-")[0]] += 1 if newRPCSeen: countNewRPCBuildsByYr[distributed.split("-")[0]] += 1 else: print x """ Flips from Builds to view from RPC side: only considers Builds that are installed and if the first build seen for an RPC, it must be a SEND TO SITE (create) build. """ rpcBPIByRPC = {} nixBuildAsNotInstalled = 0 nixBuildForRPCAsFirstButNotSend = 0 for buildInfo in buildsRPCReduction: if buildInfo["isInstalled"] == False: # let's not count it! nixBuildAsNotInstalled += 1 continue for actionType in buildInfo["rpcs"]: for rpc in buildInfo["rpcs"][actionType]: if rpc not in rpcBPIByRPC: if actionType != "SEND TO SITE": # let's not count until get a SEND nixBuildForRPCAsFirstButNotSend += 1 continue rpcBPIByRPC[rpc] = { "label": rpc, "installed": buildInfo["dateInstalledFirst"], "builds": [] } if "dateDistributed" in buildInfo: rpcBPIByRPC[rpc]["distributed"] = buildInfo[ "dateDistributed"] bir = { "label": buildInfo["label"], "action": actionType, "installed": buildInfo["dateInstalledFirst"] } rpcBPIByRPC[rpc]["builds"].append(bir) if "package" in buildInfo: bir["package"] = buildInfo["package"] if "dateDistributed" in buildInfo: bir["distributed"] = buildInfo["dateDistributed"] for rpc in rpcBPIByRPC: bpi = rpcBPIByRPC[rpc] if bpi["builds"][-1]["action"] == "DELETE AT SITE": bpi["isDeleted"] = True bpi["deleteInstalled"] = bpi["builds"][-1]["installed"] if "distributed" in bpi["builds"][-1]: bpi["deleteDistributed"] = bpi["builds"][-1]["distributed"] packages = [ info["package"] for info in buildsByRPC[rpc] if "package" in info ] if len(set(packages)) == 1: bpi["package"] = packages[0] elif len(packages): # can be none! packages.reverse() # ex override: [u'ORDER ENTRY/RESULTS REPORTING', u'GEN. MED. REC. - VITALS'] bpi["package"] = [ pkg for pkg in packages if pkg != "ORDER ENTRY/RESULTS REPORTING" ][0] # last which isn't the overused OE rpcBPIs = json.load( open(VISTA_RED_LOCN_TEMPL.format(stationNo) + "_rpcBPIs.json"), indent=4) # PICK ANYTHING BUT 'ORDER ENTRY/RESULTS REPORTING' if another there rpcsByPackage = defaultdict(list) for rpc in packagesByRPC: if len(packagesByRPC[rpc] ) > 1 and "ORDER ENTRY/RESULTS REPORTING" in packagesByRPC[rpc]: del packagesByRPC[rpc]["ORDER ENTRY/RESULTS REPORTING"] for pkg in packagesByRPC[rpc]: rpcsByPackage[pkg].append(rpc) dateDistributeds = sorted(dateDistributeds) rpcWithMostBuilds = sorted(buildsByRPC, key=lambda x: len(buildsByRPC[x]), reverse=True)[0] packageWithTheMostRPCs = sorted(rpcsByPackage, key=lambda x: len(rpcsByPackage[x]), reverse=True)[0] deletedRPCs = set( rpc for rpc in buildsByRPC if buildsByRPC[rpc][-1]["action"] == "DELETE AT SITE") # this will include those never even installed! activeRPCs = set(buildsByRPC) - deletedRPCs _8994MissingActiveRPCs = activeRPCs - _8994Labels # should be there but not _8994DeletedRPCs = _8994Labels.intersection(deletedRPCs) _8994NoBuildRPCs = _8994Labels - activeRPCs # beyond active/still there mu = """## RPCs According to Builds and Installs of {} There are __{}__ builds defining __{:,}__ RPCs distributed from _{}_ to _{}_, __{}__ of which introduce new RPCs. RPC _{}_ appears in the most builds, __{:,}__. The median number of RPCs per Build is __{:,}__. RPCs are spread across __{:,}__ packages. Package _{}_ has the most RPCs, __{:,}__. __{:,}__ RPCs have more than one Package usually because of re-organization and splitting of Packages over the years. __{:,}__ RPCs have no package because their builds weren't assigned a package (yet). Builds can delete as well as add RPCs - __{:,}__ of the RPCs were deleted by the final Build they appeared in, leaving __{:,}__ RPCs active and installed. File _8994_ is suppossed to define the active RPCs in a VistA. However the 8994 of this system has __{:,}__ deleted RPCs, is missing __{:,}__ active RPCs and has __{:,}__ extra RPCs that never appear in a Build. """.format( stationNo, reportAbsAndPercent(len(buildsRPCReduction), len(buildsReduction)), len(buildsByRPC), dateDistributeds[0], dateDistributeds[-1], reportAbsAndPercent(len(buildsWNewRPCReduction), len(buildsRPCReduction)), rpcWithMostBuilds, len(buildsByRPC[rpcWithMostBuilds]), numpy.percentile([len(buildsByRPC[rpc]) for rpc in buildsByRPC], 50), len(buildsByPackage), packageWithTheMostRPCs, len(rpcsByPackage[packageWithTheMostRPCs]), sum(1 for rpc in packagesByRPC if len(packagesByRPC[rpc]) > 1), sum(1 for rpc in buildsByRPC if rpc not in packagesByRPC), len(deletedRPCs), len(activeRPCs), len(_8994DeletedRPCs), len(_8994MissingActiveRPCs), len(_8994NoBuildRPCs)) """ Packages by RPC """ tbl = MarkdownTable(["Package", "\# RPCs", "Example RPC"]) for pkg in sorted(rpcsByPackage, key=lambda x: len(rpcsByPackage[x]), reverse=True): tbl.addRow([ "__{}__".format(pkg), len(rpcsByPackage[pkg]), sorted(list(rpcsByPackage[pkg]))[0] ]) mu += "{:,} Packages have RPCs, while {:,} RPCs have no Package. Clearly the top Packages (ORDERS, IMAGES and some COTS packages) need to be examined first ...\n\n".format( len(rpcsByPackage), sum(1 for rpc in buildsByRPC if rpc not in packagesByRPC)) mu += tbl.md() + "\n\n" """ Builds by Year - may be restated subsequently so distinguish builds introducing fresh RPCs from those just restating. Note that the total of "new RPCs" is roughly the total of RPCs (some builds lack a date which accounts for the discrepency) """ tbl = MarkdownTable(["Year", "All RPC Builds", "New RPC Builds"]) for yr in sorted(countBuildsByYr, key=lambda x: int(x), reverse=True): tbl.addRow([ yr, reportAbsAndPercent(countBuildsByYr[yr], len(buildsRPCReduction)), reportAbsAndPercent(countNewRPCBuildsByYr[yr], len(buildsWNewRPCReduction)) ]) mu += """RPC Builds by distribution year. Note that as builds often restate pre-existing RPCs, the following distinguishes all builds with RPCs from those that introduce new RPCs ... """ mu += tbl.md() + "\n\n" # distributed | installed def muDate(buildInfos, dtProp): def calcDate(buildInfos, dtProp, first=False): dt = "" for buildInfo in buildInfos: if dtProp in buildInfo: dt = buildInfo[dtProp] if re.search(r'T', dt): # only day dt = dt.split("T")[0] if first: break return dt last = calcDate(buildsByRPC[rpc], dtProp, False) first = calcDate(buildsByRPC[rpc], dtProp, True) return first, last tbl = MarkdownTable([ "RPC", "Builds", "(Latest) Package", "Distributed", "[First] Install Gap" ]) lastBuildMNMU = "" gaps = [] noGapRPCs = [] badGapRPCs = [] for i, rpc in enumerate(sorted(activeRPCs, key=lambda x: x), 1): firstD, lastD = muDate(buildsByRPC[rpc], "distributed") distribMU = lastD if firstD != lastD: distribMU = "{} - {}".format(firstD, distribMU) firstI, lastI = muDate(buildsByRPC[rpc], "installed") if firstI == "" or firstD == "": installGapMU = "__N/A__" noGapRPCs.append(rpc) elif firstI > firstD: installGapMU = str( datetime.strptime(firstI, "%Y-%m-%d") - datetime.strptime(firstD, "%Y-%m-%d")).split(",")[0] gaps.append(int(re.match(r'(\d+)', installGapMU).group(1))) elif firstI == firstD: installGapMU = "" gaps.append(0) else: installGapMU = "__RERELEASE: D > I__: {} > {}".format( firstD, firstI) badGapRPCs.append(rpc) # Alt: move out of 'ORDER ENTRY/RESULTS REPORTING' if another as more precise too packageMU = sorted(packagesByRPC[rpc], key=lambda x: packagesByRPC[rpc][x], reverse=True)[0] if rpc in packagesByRPC else "" tbl.addRow([ "__{}__".format(rpc), len(buildsByRPC[rpc]), packageMU, distribMU, installGapMU ]) mu += "__{:,}__ Active/Installed RPCs. The maximum gap in days between distribution and install is {:,}, the median is {:,}, {:,} have no gap at all. The gap isn't available if necessary dates are missing ({:,}) or the first install date comes BEFORE the build distribution date (__{:,}__) ...\n\n".format( len(activeRPCs), max(gaps), numpy.percentile(gaps, 50), sum(1 for g in gaps if g == 0), len(noGapRPCs), len(badGapRPCs)) mu += tbl.md() + "\n\n" """ Deleted RPCs - note that there are probably more? or should be more (retired packages). Note that only 'SHOULD BE DELETED' RPCs (see below) need concern the integrated RPC Interface definition. TODO: work out retirement better to enforce more retireds """ tbl = MarkdownTable( ["RPC", "(Last) Deleting Build", "When (Dist/Install)"]) for i, rpc in enumerate(sorted(deletedRPCs), 1): lastDelBuildInfo = buildsByRPC[rpc][-1] # a/cs for when no install info whenMU = "{} / {}".format( lastDelBuildInfo["distributed"], lastDelBuildInfo["installed"].split("T")[0] if "installed" in lastDelBuildInfo else "-") tbl.addRow( [rpc, re.sub(r'\*', '\\*', lastDelBuildInfo["build"]), whenMU]) mu += "{:,} Deleted/Uninstalled RPCs ...\n\n".format(len(deletedRPCs)) mu += tbl.md() + "\n\n" """ 8994 tie: Rogue RPCs those [1] builds says SHOULD be there but aren't ("MISSING") and [2] builds don't account for them or should be deleted ("EXTRA") and [3] builds delete but are still in 8994 ("SHOULD BE DELETED") Note: possible build logic wrong OR builds badly built (remote of RPC not done but code removed?) etc """ rogueRPCs = ( _8994MissingActiveRPCs.union(_8994NoBuildRPCs)).union(_8994DeletedRPCs) tbl = MarkdownTable(["RPC", "Problem"]) for rpc in sorted(list(rogueRPCs)): problem = "EXTRA" if rpc in _8994MissingActiveRPCs: problem = "MISSING" elif rpc in _8994DeletedRPCs: problem = "SHOULD BE DELETED" problem = "MISSING" if rpc in _8994MissingActiveRPCs else "EXTRA" tbl.addRow([rpc, problem]) mu += "__8994 Rogue RPCs__ are [1] in 8994 but are not active according to Builds (\"EXTRA\" {:,}) or active by builds but not in 8994 (\"MISSING\" {:,}) or deleted by builds but in 8994 (\"SHOULD BE DELETED\" {:,}). __IMPORTANT__: must __test__ if the extra are still active (have code etc) and if so, why ...\n\n".format( len(_8994NoBuildRPCs), len(_8994MissingActiveRPCs), len(_8994DeletedRPCs), len(rogueRPCs)) mu += tbl.md() + "\n\n" if stationNo == "999": mu += "__Note__: FOIA (999) has MANY _Rogues_. It seems that redaction is partial for non Open Source RPCs. It seems that the code is removed but the RPC remains.\n\n" open( VISTA_REP_LOCN_TEMPL.format(stationNo) + "rpcsByBuildsNInstalls.md", "w").write(mu)
def muRPCInterfaceDefinition(rpcInterfaceDefinition, isFOIA=False): # allows global use # First Install/Last Install; first distrib/ last distrib (not FMQL) distributeds = sorted( list( set(rpcDefn["distributed"] for rpcDefn in rpcInterfaceDefinition if "distributed" in rpcDefn and not re.search(r'FMQL', rpcDefn["label"])))) firstDistributed = distributeds[0] lastDistributed = distributeds[-1] installeds = sorted( list( set(rpcDefn["installed"] for rpcDefn in rpcInterfaceDefinition if "installed" in rpcDefn and not re.search(r'CG FMQL', rpcDefn["label"])))) lastInstalled = installeds[-1].split("T")[0] mu = "" # TODO: may add 'lastSignon' for user that may use RPC => can see last possible use mu += "There are __{:,}__ RPCs, __{}__ of which are active. The first RPCs were distributed on _{}_, the last on _{}_. The last installation happened on _{}_.".format( len(rpcInterfaceDefinition), reportAbsAndPercent( sum(1 for rpcDefn in rpcInterfaceDefinition if "isActive" in rpcDefn), len(rpcInterfaceDefinition)), firstDistributed, lastDistributed, lastInstalled) # Walk the deletion/marking of inactive mu += """ RPCs are marked inactive in stages ... """ tbl = MarkdownTable(["Stage", "\# At/After"]) tbl.addRow(["Total", len(rpcInterfaceDefinition)]) # has8994FullEntry tbl.addRow([ "8994 Full Entry", sum(1 for defn in rpcInterfaceDefinition if "has8994FullEntry" in defn) ]) # Is in installed build tbl.addRow([ "Installed Build", sum(1 for defn in rpcInterfaceDefinition if "has8994FullEntry" in defn and "hasInstalledBuild" in defn) ]) # Is in ActiveSO Option (999 just in active option) if isFOIA: tbl.addRow([ "Has Current Option", sum(1 for defn in rpcInterfaceDefinition if "has8994FullEntry" in defn and "hasInstalledBuild" in defn and "options" in defn and sum( 1 for optionInfo in defn["options"] if "isRemoved" not in optionInfo)) ]) elif "inVistAs" in rpcInterfaceDefinition[0]: # merged defns # issue is that FOIA only RPCs are marked active JUST for having options # and these needed to be added in baseSet = set( defn["label"] for defn in rpcInterfaceDefinition if "has8994FullEntry" in defn and "hasInstalledBuild" in defn and "hasActiveSOUsedOptions" in defn) foiaOnlySet = set( defn["label"] for defn in rpcInterfaceDefinition if len(defn["inVistAs"]) == 1 and defn["inVistAs"][0] == "999" and "has8994FullEntry" in defn and "hasInstalledBuild" in defn and "options" in defn and sum(1 for optionInfo in defn["options"] if "isRemoved" not in optionInfo)) cnt = len(baseSet) + len(foiaOnlySet - baseSet) tbl.addRow(["Has currently used Active Option", cnt]) else: tbl.addRow([ "Has currently used Active Option", sum(1 for defn in rpcInterfaceDefinition if "has8994FullEntry" in defn and "hasInstalledBuild" in defn and "hasActiveSOUsedOptions" in defn) ]) mu += tbl.md() + "\n\n" # Summarize RPC introduction over the years ... """ Can add in 'for HMP for 2016?' etc + add in # removed each year too as extra col """ byYrDistrib = Counter() byYrDeleted = Counter() byYrDeleteDistrib = Counter() byYrDistribInactive = Counter() noDistrib = 0 withDistribYr = 0 totalDeleted = 0 for defn in rpcInterfaceDefinition: # allow for edge case that delete distributed date but no distrib date if "deleteDistributed" in defn: byYrDeleteDistrib[int(defn["distributed"].split("-")[0])] += 1 totalDeleted += 1 if "distributed" not in defn: noDistrib += 1 continue withDistribYr += 1 byYrDistrib[int(defn["distributed"].split("-")[0])] += 1 if "isActive" not in defn: byYrDistribInactive[int(defn["distributed"].split("-")[0])] += 1 mu += """### RPC Distribution by Year {:,} RPCs have no 'first distributed' date as their first builds lacked a date - the other {:,} all have dates. Here is RPC distribution year by year, along with the small amount of deletion too. Note that only __{}__ RPCs are formally deleted though __{}__ should be. """.format( noDistrib, sum(byYrDistrib[yr] for yr in byYrDistrib), reportAbsAndPercent(totalDeleted, len(rpcInterfaceDefinition)), reportAbsAndPercent( sum(1 for rpcDefn in rpcInterfaceDefinition if "isActive" not in rpcDefn), len(rpcInterfaceDefinition))) # Note: deleted = deleted in a year while Inactive == of the active tbl = MarkdownTable(["Year", "Added \#", "Deleted \#", "Inactive \#"]) for yr in sorted(byYrDistrib, reverse=True): ddMU = "" if yr not in byYrDeleteDistrib else byYrDeleteDistrib[yr] tbl.addRow([ str(yr), reportAbsAndPercent(byYrDistrib[yr], withDistribYr), ddMU, reportAbsAndPercent(byYrDistribInactive[yr], byYrDistrib[yr]) if yr in byYrDistribInactive else "" ]) mu += tbl.md() + "\n\n" """ Want MUMPS entry pts for ALL """ cntMUMPSEntry = Counter() noMUMPSEntry = 0 activeRoutines = set() inactiveRoutines = set() # overlap == MIX for defn in rpcInterfaceDefinition: if "routine" not in defn: noMUMPSEntry += 1 continue cntMUMPSEntry[defn["routine"]] += 1 if "isActive" in defn: activeRoutines.add(defn["routine"]) else: inactiveRoutines.add(defn["routine"]) mu += """### MUMPS Routine Implementation __{}__ RPCs are implemented in __{}__ separate MUMPS routines, while __{}__ identified RPCs lack an implementation. The highest number of RPCs per routine is __{}__ (_{}_), the median is __{}__, the lowest is __{}__. __{}__ routines implement only active RPCs, __{:,}__ only inactive RPCs (candidates for deletion?), while __{:,}__ implement a mix of active and inactive RPCs.\n\n""".format( sum(cntMUMPSEntry[routine] for routine in cntMUMPSEntry), len(cntMUMPSEntry), noMUMPSEntry, numpy.max(cntMUMPSEntry.values()), [ routine for routine in cntMUMPSEntry if cntMUMPSEntry[routine] == numpy.max(cntMUMPSEntry.values()) ][0], numpy.percentile(cntMUMPSEntry.values(), 50), numpy.min(cntMUMPSEntry.values()), reportAbsAndPercent(len(activeRoutines - inactiveRoutines), len(cntMUMPSEntry)), len(inactiveRoutines - activeRoutines), len(activeRoutines.intersection(inactiveRoutines))) mu += "The (outliers) that implement the most RPCs are ...\n\n" routinesByRPCCnt = defaultdict(list) for routine in cntMUMPSEntry: routinesByRPCCnt[cntMUMPSEntry[routine]].append(routine) tbl = MarkdownTable(["\# RPCs", "Routine(s)"]) ohto = numpy.percentile(cntMUMPSEntry.values(), 75) + ( 3 * (numpy.percentile(cntMUMPSEntry.values(), 75) - numpy.percentile(cntMUMPSEntry.values(), 25))) for cnt in sorted(routinesByRPCCnt, reverse=True): if cnt < ohto: break tbl.addRow([ cnt, ", ".join([ "__{}__ [INACTIVE]".format(routine) if routine in inactiveRoutines else routine for routine in sorted(routinesByRPCCnt[cnt]) ]) ]) mu += tbl.md() + "\n\n" # Packages noPackageRPCs = [] rpcByPackage = defaultdict(list) firstRPCDistribByPackage = {} activeRPCs = set() for defn in rpcInterfaceDefinition: if "isActive" in defn: activeRPCs.add(defn["label"]) if "package" in defn: package = defn["package"] rpcByPackage[package].append(defn["label"]) if "distributed" in defn: if package in firstRPCDistribByPackage: if defn["distributed"] < firstRPCDistribByPackage[package]: firstRPCDistribByPackage[package] = defn["distributed"] else: firstRPCDistribByPackage[package] = defn["distributed"] else: noPackageRPCs.append(defn["label"]) inactiveOnlyPackages = set(package for package in rpcByPackage if sum( 1 for rpc in rpcByPackage[package] if rpc in activeRPCs) == 0) someInactivesPackages = set( package for package in rpcByPackage if package not in inactiveOnlyPackages and sum( 1 for rpc in rpcByPackage[package] if rpc in activeRPCs) < len(rpcByPackage[package])) # Based on reduction alg - needs to evolve as see effectiveness mu += """### Packages _Package_ is a sometimes inconsistently used breakdown of VistA into a set of cooperating applications. All but __{}__ RPCs are assigned to __{}__ different packages, __{:,}__ of which only have _inactive_ RPCs and __{:,}__ more have a mix of active and inactive RPCs. Those with at least one active RPC are - note ORDER ENTRY has a huge proportion which MAY be due to redundant/overlapping purposes of individual RPCs ... """.format( reportAbsAndPercent(len(noPackageRPCs), len(rpcInterfaceDefinition)), len(rpcByPackage), len(inactiveOnlyPackages), len(someInactivesPackages)) tbl = MarkdownTable( ["Package", "First Distributed RPC", "Active RPCs", "Inactive RPCs"]) for package in sorted(rpcByPackage, key=lambda x: len(rpcByPackage[x]), reverse=True): if package in inactiveOnlyPackages: continue noActives = sum(1 for rpc in rpcByPackage[package] if rpc in activeRPCs) noInactives = sum(1 for rpc in rpcByPackage[package] if rpc not in activeRPCs) tbl.addRow([ package, firstRPCDistribByPackage[package] if package in firstRPCDistribByPackage else "", noActives if noActives > 0 else "", noInactives if noInactives > 0 else "" ]) mu += tbl.md() + "\n\n" mu += "The 'inactive-only' Packages are ...\n\n" tbl = MarkdownTable( ["Package", "First Distributed RPC", "RPCs (Inactive)"]) for package in sorted(rpcByPackage, key=lambda x: len(rpcByPackage[x]), reverse=True): if package not in inactiveOnlyPackages: continue noInactives = sum(1 for rpc in rpcByPackage[package] if rpc not in activeRPCs) tbl.addRow([ package, firstRPCDistribByPackage[package] if package in firstRPCDistribByPackage else "", len(rpcByPackage[package]) ]) mu += tbl.md() + "\n\n" return mu
def reportPackagesNBuilds(stationNo): _9_6Reduction = json.load( open(VISTA_RED_LOCN_TEMPL.format(stationNo) + "_9_6Reduction.json")) buildsByPackage = defaultdict(list) noPackageBuilds = [] allDatesDistributeds = set() for buildInfo in _9_6Reduction: if "dateDistributed" in buildInfo and not re.search( r'FMQL', buildInfo["label"]): allDatesDistributeds.add(buildInfo["dateDistributed"]) if "package" in buildInfo: buildsByPackage[buildInfo["package"]].append(buildInfo) continue noPackageBuilds.append(buildInfo) packagesWith2013OnBuilds = [ pkg for pkg in buildsByPackage if sum(1 for bi in buildsByPackage[pkg] if "dateDistributed" in bi and int(bi["dateDistributed"].split("-")[0]) >= 2013) ] allDatesDistributeds = sorted(list(allDatesDistributeds)) firstDateDistributed = allDatesDistributeds[0] lastDateDistributed = allDatesDistributeds[-1] countBuildsPerPackage = dict( (pkg, len(buildsByPackage[pkg])) for pkg in buildsByPackage) medianBuildsPerPackage = numpy.percentile(countBuildsPerPackage.values(), 50) maxBuildsPerPackage = max(countBuildsPerPackage.values()) pkgsOrdered = [ pkg for pkg in sorted(countBuildsPerPackage, key=lambda x: countBuildsPerPackage[x], reverse=True) ] pkgWithMostBuilds = pkgsOrdered[0] mu = """## Packages and Builds There are {:,} builds, distributed between {} and {}. {:,} packages cover {} of the builds, median number of builds per package is {:,}, maximum is {:,} in __{}__. Only {} packages have builds distributed from 2013 on (_should the balance be retired?_). {} builds have no package and only {} builds have RPCs. """.format( len(_9_6Reduction), firstDateDistributed, lastDateDistributed, len(buildsByPackage), reportAbsAndPercent( len([bi for pkg in buildsByPackage for bi in buildsByPackage[pkg]]), len(_9_6Reduction)), medianBuildsPerPackage, maxBuildsPerPackage, pkgWithMostBuilds, reportAbsAndPercent(len(packagesWith2013OnBuilds), len(buildsByPackage)), reportAbsAndPercent(len(noPackageBuilds), len(_9_6Reduction)), reportAbsAndPercent(sum(1 for bi in _9_6Reduction if "rpcs" in bi), len(_9_6Reduction))) mu += "{:,} Packages and their builds, highlight for the {:,} packages with at least one RPC build ...\n\n".format( len(buildsByPackage), sum(1 for pkg in buildsByPackage if sum(1 for bi in buildsByPackage[pkg] if "rpcs" in bi))) tbl = MarkdownTable([ "Package", "Build \#", "Build Dates", "Build w/RPC \#", "Build w/RPC Delete \#" ]) for pkg in sorted(buildsByPackage, key=lambda x: len(buildsByPackage[x]), reverse=True): pkgMU = "__{}__".format(pkg) if sum(1 for bi in buildsByPackage[pkg] if "rpcs" in bi) else pkg dateDistributeds = sorted( list( set(bi["dateDistributed"].split("-")[0] for bi in buildsByPackage[pkg] if "dateDistributed" in bi))) if len(dateDistributeds) == 0: ddMU = "" elif len(dateDistributeds) > 1: ddMU = "{} - {}".format(dateDistributeds[0], dateDistributeds[-1]) else: ddMU = dateDistributeds[0] rpcBuildInfos = [bi for bi in buildsByPackage[pkg] if "rpcs" in bi] rpcBuildInfosDelete = [ bi for bi in rpcBuildInfos if "DELETE AT SITE" in bi["rpcs"] ] tbl.addRow([ pkgMU, len(buildsByPackage[pkg]), ddMU, len(rpcBuildInfos) if len(rpcBuildInfos) > 0 else "", len(rpcBuildInfosDelete) if len(rpcBuildInfosDelete) > 0 else "" ]) mu += tbl.md() + "\n\n" noPackageBuildsWRPCs = [bi for bi in noPackageBuilds if "rpcs" in bi] mu += "{:,} Builds without a Package but with RPCs ...\n\n".format( len(noPackageBuildsWRPCs)) tbl = MarkdownTable(["Build", "RPC \#s"]) for bi in sorted(noPackageBuildsWRPCs, key=lambda x: x["label"]): tbl.addRow([bi["label"], sum(len(bi["rpcs"][x]) for x in bi["rpcs"])]) mu += tbl.md() + "\n\n" open(VISTA_REP_LOCN_TEMPL.format(stationNo) + "packagesAndBuilds.md", "w").write(mu)