def __init__(self, node): self.statuses = [] self.role = None self.pupLink = None for child in node.childNodes: if child.nodeName == "LeadOrgProtocolStatuses": for grandchild in child.childNodes: if grandchild.nodeName in ("PreviousOrgStatus", "CurrentOrgStatus"): name = date = "" for greatgrandchild in grandchild.childNodes: if greatgrandchild.nodeName == "StatusDate": date = cdr.getTextContent(greatgrandchild) elif greatgrandchild.nodeName == "StatusName": name = cdr.getTextContent(greatgrandchild) if name and date: ps = Protocol.Status(name, date) self.statuses.append(ps) elif child.nodeName == "LeadOrgRole": self.role = cdr.getTextContent(child).strip() or None elif child.nodeName == 'LeadOrgPersonnel': role = link = "" for grandchild in child.childNodes: if grandchild.nodeName == 'PersonRole': role = cdr.getTextContent(grandchild).strip() elif grandchild.nodeName == 'Person': link = grandchild.getAttribute('cdr:ref').strip() if role.upper() == 'UPDATE PERSON': self.pupLink = link self.statuses.sort() for i in range(len(self.statuses)): if i == len(self.statuses) - 1: self.statuses[i].endDate = time.strftime("%Y-%m-%d") else: self.statuses[i].endDate = self.statuses[i + 1].startDate
def __init__(self, xmlFragment, personTitleHandling=TITLE_OMITTED): """ Parameters: xmlFragment - Either DOM object for parsed address XML, or the string containing the XML for the address. The top node should be <AddressElements> """ self.__addressee = None self.__personalName = None self.__orgs = [] # Main + parent orgs in right order self.__street = [] self.__city = None self.__citySuffix = None self.__state = None self.__country = None self.__postalCode = None self.__codePos = None self.__personTitle = None self.__phone = None self.__fax = None self.__email = None self.__ptHandling = personTitleHandling if type(xmlFragment) in (str, bytes): dom = xml.dom.minidom.parseString(xmlFragment) else: dom = xmlFragment # No organization name nodes identified yet orgParentNode = None # Parse parts of an address if dom: for node in dom.documentElement.childNodes: if node.nodeName == 'PostalAddress': self.__parsePostalAddress(node) elif node.nodeName in ('Name', 'PersonName'): self.__personalName = PersonalName(node) self.__addressee = self.__personalName.format() elif node.nodeName == 'OrgName': self.__orgs.append(cdr.getTextContent(node).strip()) elif node.nodeName == 'ParentNames': orgParentNode = node elif node.nodeName == 'PersonTitle': self.__personTitle = cdr.getTextContent(node).strip() elif node.nodeName == 'Phone': self.__phone = cdr.getTextContent(node).strip() elif node.nodeName == 'Fax': self.__fax = cdr.getTextContent(node).strip() elif node.nodeName == 'Email': self.__email = cdr.getTextContent(node).strip() # If we got them, get org parent names to __orgs in right order if orgParentNode: self.__parseOrgParents(orgParentNode)
def __parseBoardMemberAssistantInfo(self, node): for child in node.childNodes: if child.nodeName == "AssistantName": self.asstName = cdr.getTextContent(child) elif child.nodeName == "AssistantPhone": self.asstPhone = cdr.getTextContent(child) elif child.nodeName == "AssistantFax": self.asstFax = cdr.getTextContent(child) elif child.nodeName == "AssistantEmail": self.asstEmail = cdr.getTextContent(child)
def __init__(self, nameNode, phoneNode, emailNode): if not nameNode: raise Exception("Missing BoardManager element") elif not phoneNode: raise Exception("Missing required phone for board manager") elif not emailNode: raise Exception("Missing required email for board manager") self.name = cdr.getTextContent(nameNode).strip() self.phone = cdr.getTextContent(phoneNode).strip() self.email = cdr.getTextContent(emailNode).strip() if not self.name: raise Exception("Name required for board manager") if not self.phone: raise Exception("Phone required for board manager") if not self.email: raise Exception("Email required for board manager")
def collectErrors(node): "Returns a list of Unicode strings, one for each <Err/> child." errors = [] for child in node.childNodes: if child.nodeName == "Err": errors.append(cdr.getTextContent(child)) return errors
def __init__(self, node): """ Parameters: node - PersonName or Name subelement DOM node """ self.__givenName = "" self.__middleInitial = "" self.__surname = "" self.__prefix = "" self.__genSuffix = "" self.__proSuffixes = [] suffixElems = ("StandardProfessionalSuffix", "CustomProfessionalSuffix") for child in node.childNodes: if child.nodeName == "GivenName": self.__givenName = cdr.getTextContent(child).strip() elif child.nodeName == "MiddleInitial": self.__middleInitial = cdr.getTextContent(child).strip() elif child.nodeName == "SurName": self.__surname = cdr.getTextContent(child).strip() elif child.nodeName == "ProfessionalSuffix": for grandchild in child.childNodes: if grandchild.nodeName in suffixElems: suffix = cdr.getTextContent(grandchild).strip() if suffix: self.__proSuffixes.append(suffix) elif child.nodeName == "Prefix": self.__prefix = cdr.getTextContent(child).strip() elif child.nodeName == "GenerationSuffix": self.__genSuffix = cdr.getTextContent(child).strip()
def __parseBoardMembershipDetails(self, node): boardId = None frequency = None for child in node.childNodes: if child.nodeName == "BoardName": attr = child.getAttribute("cdr:ref") if attr: id = cdr.exNormalize(attr) boardId = id[1] if boardId != self.board.id: return elif child.nodeName == "TermRenewalFrequency": frequency = cdr.getTextContent(child) if boardId and frequency: self.renewalFrequency = frequency
def __parseMemberDoc(self, id, ver): doc = cdr.getDoc('guest', id, version=str(ver), getObject=True) errors = cdr.getErrors(doc, errorsExpected=False, asSequence=True) if errors: raise Exception("loading member doc: %s" % "; ".join(errors)) dom = xml.dom.minidom.parseString(doc.xml) for node in dom.documentElement.childNodes: if node.nodeName == "BoardMemberContact": for child in node.childNodes: if child.nodeName == "PersonContactID": self.contactId = cdr.getTextContent(child) elif node.nodeName == "BoardMembershipDetails": self.__parseBoardMembershipDetails(node) elif node.nodeName == "BoardMemberAssistant": self.__parseBoardMemberAssistantInfo(node)
def __parseOrgParents(self, node): """ Parses a ParentNames element, extracting organization names and appending them, in the right order, to the list of organizations. Pass: node - DOM node of ParentNames element """ # Attribute tells us the order in which to place parents parentsFirst = False pfAttr = node.getAttribute("OrderParentNameFirst") if pfAttr == "Yes": parentsFirst = True for child in node.childNodes: if child.nodeName == "ParentName": self.__orgs.append(cdr.getTextContent(child).strip()) if parentsFirst: self.__orgs.reverse()
def __parsePostalAddress(self, node): """ Extracts individual elements from street address, storing each in a field of the Address object. Pass: node - DOM node of PostalAddress element """ for child in node.childNodes: if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE: if child.nodeName == "Street": self.__street.append(cdr.getTextContent(child).strip()) elif child.nodeName == "City": self.__city = cdr.getTextContent(child).strip() elif child.nodeName == "CitySuffix": self.__citySuffix = cdr.getTextContent(child).strip() elif child.nodeName in ("State", "PoliticalSubUnit_State"): self.__state = cdr.getTextContent(child).strip() elif child.nodeName == "Country": self.__country = cdr.getTextContent(child).strip() elif child.nodeName == "PostalCode_ZIP": self.__postalCode = cdr.getTextContent(child).strip() elif child.nodeName == "CodePosition": self.__codePos = cdr.getTextContent(child).strip()
def __init__(self, id, node): "Create a protocol object from the XML document." self.id = id self.leadOrgs = [] self.statuses = [] self.status = "" self.primaryId = "" self.otherIds = [] self.firstPub = "" self.closed = "" self.completed = "" self.studyTypes = [] self.categories = [] self.sources = [] self.designs = [] self.pupLink = [] self.sponsors = [] self.title = "" self.origTitle = "" self.phases = [] profTitle = "" patientTitle = "" originalTitle = "" for child in node.childNodes: if child.nodeName == "ProtocolSponsors": for grandchild in child.childNodes: if grandchild.nodeName == "SponsorName": value = cdr.getTextContent(grandchild) if value: self.sponsors.append(value) elif child.nodeName == 'ProtocolSources': for grandchild in child.childNodes: if grandchild.nodeName == 'ProtocolSource': for greatgrandchild in grandchild.childNodes: if greatgrandchild.nodeName == 'SourceName': source = cdr.getTextContent(greatgrandchild) source = source.strip() if source: self.sources.append(source) elif child.nodeName == 'ProtocolDesign': design = cdr.getTextContent(child).strip() if design: self.designs.append(design) elif child.nodeName == "ProtocolIDs": for grandchild in child.childNodes: if grandchild.nodeName == "PrimaryID": for greatgrandchild in grandchild.childNodes: if greatgrandchild.nodeName == "IDString": value = cdr.getTextContent(greatgrandchild) self.primaryId = value if grandchild.nodeName == "OtherID": for greatgrandchild in grandchild.childNodes: if greatgrandchild.nodeName == "IDString": value = cdr.getTextContent(greatgrandchild) if value: self.otherIds.append(value) elif child.nodeName == "ProtocolTitle": titleType = child.getAttribute("Type") value = cdr.getTextContent(child) if value: if titleType == "Professional": profTitle = value elif titleType == "Patient": patientTitle = value elif titleType == "Original": originalTitle = self.origTitle = value elif child.nodeName == "ProtocolAdminInfo": for grandchild in child.childNodes: if grandchild.nodeName == "ProtocolLeadOrg": self.leadOrgs.append(self.LeadOrg(grandchild)) elif grandchild.nodeName == "CurrentProtocolStatus": value = cdr.getTextContent(grandchild) if value: self.status = value elif child.nodeName == "ProtocolDetail": for grandchild in child.childNodes: if grandchild.nodeName == 'StudyCategory': for greatgrandchild in grandchild.childNodes: if greatgrandchild.nodeName == 'StudyCategoryName': cat = cdr.getTextContent(greatgrandchild) cat = cat.strip() if cat: self.categories.append(cat) elif grandchild.nodeName == 'StudyType': studyType = cdr.getTextContent(grandchild).strip() if studyType: self.studyTypes.append(studyType) elif child.nodeName == 'ProtocolPhase': self.phases.append(cdr.getTextContent(child)) if profTitle: self.title = profTitle elif originalTitle: self.title = originalTitle elif patientTitle: self.title = patientTitle orgStatuses = [] statuses = {} i = 0 for leadOrg in self.leadOrgs: if leadOrg.role == 'Primary' and leadOrg.pupLink: self.pupLink = leadOrg.pupLink orgStatuses.append("") for orgStatus in leadOrg.statuses: startDate = orgStatus.startDate val = (i, orgStatus.name) #print "val: %s" % repr(val) #print "orgStatuses: %s" % repr(orgStatuses) statuses.setdefault(startDate, []).append(val) i += 1 keys = sorted(statuses.keys()) for startDate in keys: for i, orgStatus in statuses[startDate]: try: orgStatuses[i] = orgStatus except: print("statuses: %s" % repr(statuses)) print("orgStatuses: %s" % repr(orgStatuses)) raise protStatus = self.getProtStatus(orgStatuses) if protStatus == "Active" and not self.firstPub: self.firstPub = startDate if protStatus in ("Active", "Approved-not yet active", "Temporarily closed"): self.closed = "" elif not self.closed: self.closed = startDate if protStatus == 'Completed': self.completed = startDate else: self.completed = "" if self.statuses: self.statuses[-1].endDate = startDate self.statuses.append(Protocol.Status(protStatus, startDate)) if self.statuses: self.statuses[-1].endDate = time.strftime("%Y-%m-%d")
def __init__(self, node): self.date = "0000-00-00" for child in node.childNodes: if child.nodeName == 'MeetingDate': self.date = cdr.getTextContent(child)
def __parseBoardDoc(self, id): today = str(self.today) docId = "CDR%d" % id versions = cdr.lastVersions('guest', docId) ver = str(versions[0]) doc = cdr.getDoc('guest', docId, version=ver, getObject=True) errors = cdr.getErrors(doc, errorsExpected=False, asSequence=True) if errors: raise Exception("loading doc for board %d: %s" % (id, "; ".join(errors))) dom = xml.dom.minidom.parseString(doc.xml) for node in dom.documentElement.childNodes: if node.nodeName == "OrganizationNameInformation": for child in node.childNodes: if child.nodeName == "OfficialName": for grandchild in child.childNodes: if grandchild.nodeName == "Name": self.name = cdr.getTextContent(grandchild) elif node.nodeName == "OrganizationType": self.boardType = cdr.getTextContent(node) elif node.nodeName == "PDQBoardInformation": managerNode = None phoneNode = None emailNode = None for child in node.childNodes: if child.nodeName == "BoardManager": managerNode = child elif child.nodeName == "BoardManagerPhone": phoneNode = child elif child.nodeName == "BoardManagerEmail": emailNode = child elif child.nodeName == "BoardMeetings": for grandchild in child.childNodes: if grandchild.nodeName == 'BoardMeeting': md = self.MeetingDate(grandchild) if md.date >= today: self.meetingDates.append(md) elif child.nodeName == "AdvisoryBoardFor": edBoardId = child.getAttribute("cdr:ref") self.edBoardId = cdr.exNormalize(edBoardId)[1] self.manager = self.Manager(managerNode, phoneNode, emailNode) if not self.name or not self.name.strip(): raise Exception("no name found for board in document %d" % id) if not self.manager: raise Exception("no board manager found in document %d" % id) self.boardValues = BoardValues.findBoardValues(self.name) self.summaryType = self.boardValues.summaryType self.workingGroups = self.boardValues.workingGroupBlock self.invitePara = self.boardValues.invitationParagraph self.meetingDates.sort(key=lambda a: a.date) self.name = self.name.strip() if self.boardType.upper() == 'PDQ ADVISORY BOARD': self.advBoardId = self.id self.advBoardName = self.name self.edBoardName = self.__getBoardName(self.edBoardId) elif self.boardType.upper() == 'PDQ EDITORIAL BOARD': self.edBoardId = self.id self.edBoardName = self.name self.advBoardId = self.__findAdvBoardFor(self.id) self.advBoardName = self.__getBoardName(self.advBoardId) else: raise Exception('Board type: %s' % self.boardType)
def __init__(self, specNode): """ Constructor loads SweepSpec from a dom node. Pass: DOM node of a SweepSpec element in a configuration file. """ # Initialize specification invalid values self.specName = "Unknown" # Name for report self.action = None # What to do with files self.root = None # Where to look for files self.inFiles = [] # File paths to look for self.outFile = None # Output file for archive self.oldSpec = None # If at least one file older than this self.youngSpec = None # Files must be older than this self.maxSizeSpec = None # If file bigger than this self.truncSizeSpec = None # Truncate file to this size self.customProc = None # Name of custom sweep routine, if any # Start with the assumption that spec applies to all tiers. self.tiers = None # Set this flag to true when the archive is successfully saved self.okayToDelete = False # These fields track what actually matches the specification # Initialized to invalid values, filled in by self.statFiles() self.oldestDate = 0 # Date of oldest file found in root/inFiles self.youngestDate = 0 # Date of youngest found self.biggestSize = 0 # Size of biggest file found self.smallestSize = 0 # Size of smallest self.totalList = [] # All names of files found in root/inFiles self.qualifiedList = [] # qualFile objects qualified for action self.totalBytes = 0 # Total bytes in all files self.qualifiedBytes= 0 # Total bytes in qualified files self.archivedFiles = 0 # Number successfully archived self.archivedBytes = 0 # " " " self.truncFiles = 0 # Number successfully truncated self.truncBytes = 0 # " " " self.msgs = [] # Messages accrued during processing self.statted = False # Info has been collected # All times relative to right now, normalized to previous midnight now = normTime(time.time()) # Find out if this spec only applies to specific tiers. tiers = specNode.getAttribute("Tiers") if tiers: self.tiers = set(tiers.split()) # Load all significant fields for node in specNode.childNodes: if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE: elem = node.nodeName if elem == 'Name': self.specName = cdr.getTextContent(node) elif elem == 'Action': self.action = cdr.getTextContent(node) elif elem == 'InputRoot': self.root = cdr.getTextContent(node) elif elem == 'InputFiles': for child in node.childNodes: if child.nodeType == xml.dom.minidom.Node.ELEMENT_NODE: if child.nodeName == 'File': self.inFiles.append( cdr.getTextContent(child)) elif child.nodeName == 'Comment': pass else: msg = "Unrecognized element %r in SweepSpec %r" args = child.nodeName, self.specName fatalError(msg.format(*args)) elif elem == 'OutputFile': self.outFile = cdr.getTextContent(node) elif elem == 'Oldest': # Convert to UNIX time = seconds since epoch days = int(cdr.getTextContent(node)) self.oldSpec = now - (days * DAY_SECS) elif elem == 'Youngest': days = int(cdr.getTextContent(node)) self.youngSpec = now - (days * DAY_SECS) elif elem == 'Biggest': self.maxSizeSpec = int(cdr.getTextContent(node)) elif elem == 'Smallest': self.truncSizeSpec = int(cdr.getTextContent(node)) elif elem == 'CustomProc': self.customProc = cdr.getTextContent(node) elif elem == 'Comment': pass else: msg = "Unrecognized element %r in SweepSpec %r" fatalError(msg.format(elem, self.specName)) # Validate if self.specName == "Unknown": fatalError("No Name subelement in one of the SweepSpec elements") if not self.action: fatalError(f"No Action in SweepSpec {self.specName!r}") if self.action not in ('Archive', 'Delete', 'TruncateArchive', 'TruncateDelete', 'Custom'): msg = "Invalid Action '{}' in SweepSpec '{}'" fatalError(msg.format(self.action, self.specName)) if self.inFiles == []: if self.action != 'Custom': msg = "No File (or InputFiles?) in SweepSpec '{}'" fatalError(msg.format(self.specName)) # Validate combinations of specs if not self.outFile and self.action in ('Archive','TruncateArchive'): msg = "No output file specified for SweepSpec {} with Action={}" fatalError(msg.format(self.specName, self.action)) if not (self.oldSpec and self.youngSpec): if self.action == 'Archive': msg = "Must specify Oldest/Youngest for Archive SweepSpec {}" fatalError(msg.format(repr(self.specName))) if not (self.maxSizeSpec and self.truncSizeSpec): if self.action.startswith('Truncate'): msg = "Must specify Biggest/Smallest for Truncate " msg += "SweepSpec '{}'" fatalError(msg.format(self.specName)) if self.customProc and self.customProc == 'expireMeetingRecordings': if not self.oldSpec: msg = "Must specify Oldest for Custom SweepSpec '{}'" fatalError(msg.format(self.specName)) # Times should be reasonable e.g., now until 10 years before now if self.oldSpec: if self.oldSpec >= now or self.youngSpec and self.youngSpec >= now: fatalError('A date >= current date in SweepSpec "%s"' % self.specName) longAgo = now - LONG_TIME if self.oldSpec and self.oldSpec < longAgo: fatalError( '"Oldest" date is older than %d years in SweepSpec "%s"' % (YEARS_OLD, self.specName)) if self.youngSpec and self.youngSpec < longAgo: fatalError( '"Youngest" date is older than %d years in SweepSpec "%s"' % (YEARS_OLD, self.specName)) if self.oldSpec and self.maxSizeSpec: fatalError("Can't specify both big/small and old/young")