def doc(self): """The subject of the report.""" if not hasattr(self, "_doc"): doc_id = self.fields.getvalue("id") if doc_id: self._doc = Doc(Session("guest"), id=doc_id) else: self._doc = None return self._doc
def compare(self): """Compare the definitions on two tiers.""" session = Session("guest", tier=self.lower) lower = set([d.path for d in QueryTermDef.get_definitions(session)]) session = Session("guest", tier=self.upper) upper = set([d.path for d in QueryTermDef.get_definitions(session)]) buttons = ( self.HTMLPage.button(self.MANAGE), self.HTMLPage.button(self.DEVMENU), self.HTMLPage.button(self.ADMINMENU), self.HTMLPage.button(self.LOG_OUT), ) opts = dict( action=self.script, buttons=buttons, subtitle=self.subtitle, session=self.session, method=self.method, ) page = self.HTMLPage(self.title, **opts) diffs = False only_lower = lower - upper only_upper = upper - lower if only_lower: fieldset = page.fieldset(f"Only on {self.lower}") ul = page.B.UL() for path in sorted(only_lower, key=str.lower): ul.append(page.B.LI(path)) fieldset.append(ul) page.form.append(fieldset) if only_upper: fieldset = page.fieldset(f"Only on {self.upper}") ul = page.B.UL() for path in sorted(only_upper, key=str.upper): ul.append(page.B.LI(path)) fieldset.append(ul) page.form.append(fieldset) if not only_lower and not only_upper: p = page.B.P(f"{self.lower} and {self.upper} match.") p.set("class", "news info center") page.form.append(p) page.send()
def transform(self): """XSL/T filter used for this load.""" if not hasattr(self, "_transform"): title = f"Index {self.type.capitalize()} Dictionary" doc_id = Doc.id_from_title(title, self.cursor) doc = Doc(Session("guest", tier=self.tier), id=doc_id) self._transform = etree.XSLT(doc.root) self.logger.info("Loaded %r filter", title) return self._transform
def __init__(self, session, job_id, spec_id, *docs, **opts): """ Fetch information for this run from the database """ self.session = Session(session) self.opts = opts self.job_id = job_id query = cdrdb.Query("export_spec", "filters", "subdir") query.where(query.Condition("job_id", job_id)) query.where(query.Condition("spec_id", spec_id)) row = query.execute(self.cursor).fetchone() self.filters = eval(row.filters) self.subdir = row.subdir self.docs = [] for d in docs: doc_id, doc_version = d.split("/") opts = dict(id=doc_id, version=doc_version) if doc_version == "lastp": opts["before"] = self.job_start self.docs.append(Doc(self.session, **opts))
def session(self): """For fetching blobs from media documents.""" if not hasattr(self, "_session"): self._session = Session("guest") return self._session
def session(self): """CDR Session object for retrieving documents.""" if not hasattr(self, "_session"): self._session = Session("guest") return self._session
parser.add_argument("--comment", "-c", help="save comment") parser.add_argument("--version", "-v", help="create version", action="store_true") parser.add_argument("--publishable", "-p", help="make version publishable", action="store_true") parser.add_argument("--force", "-f", help="force checkout", action="store_true") opts = parser.parse_args() if opts.session: session = Session(opts.session, tier=opts.tier) else: session = Session.create_session(opts.user, tier=opts.tier) print(session) with open(opts.xml, "rb") as fp: xml = fp.read() doc = Doc(session, id=opts.id, doctype=opts.doctype, xml=xml) if opts.id: doc.check_out(force=opts.force) version = opts.version or opts.publishable val_types = ["schema", "links"] if opts.publishable else None save_opts = dict( version=version, publishable=opts.publishable, val_types=val_types, comment=opts.comment,
#!/usr/bin/env python #---------------------------------------------------------------------- # Sends the raw XML for a document to a browser. Useful with IE5.x, # which by default shows a hierarchical tree display for the data. #---------------------------------------------------------------------- from cgi import FieldStorage from cdrcgi import sendPage, bail, DOCID from cdrapi.docs import Doc from cdrapi.users import Session #---------------------------------------------------------------------- # Get the parameters from the request. #---------------------------------------------------------------------- title = "CDR Document XML" fields = FieldStorage() docId = fields.getvalue(DOCID) or bail("No Document", title) #---------------------------------------------------------------------- # Filter the document's XML. #---------------------------------------------------------------------- session = Session("guest") doc = Doc(session, id=docId) xml = doc.xml #---------------------------------------------------------------------- # Send it. #---------------------------------------------------------------------- sendPage(xml, "xml")
class Video: """Information about a YouTube video.""" IMAGE_URL = "https://img.youtube.com/vi/{}/hqdefault.jpg" VIDEO_URL = "https://www.youtube.com/watch?v={}" SESSION = Session("guest") def __init__(self, node): """Capture the caller's information. Pass: node - wrapper node for the video information """ self.__node = node @property def id(self): """CDR ID for the video's Media document.""" if not hasattr(self, "_id"): node = self.node.find("VideoID") self._id = None if node is not None: try: self._id = Doc.extract_id(node.get(f"{{{Doc.NS}}}ref")) except: pass return self._id @property def img(self): """Still image displayed for the video.""" if not hasattr(self, "_img"): url = self.IMAGE_URL.format(self.youtube_id) self._img = builder.IMG(src=url) return self._img @property def link(self): """Link for playing the YouTube video.""" if not hasattr(self, "_link"): url = self.VIDEO_URL.format(self.youtube_id) self._link = builder.A("Watch video on YouTube", href=url) return self._link @property def node(self): """Wrapper element for the video information.""" return self.__node @property def row(self): """HTML markup for displaying the video info and link.""" if not hasattr(self, "_row"): B = builder args = self.text, B.BR(), self.img, B.BR(), self.link self._row = B.TR(B.TD("Video Link"), B.TD(*args)) return self._row @property def text(self): """String describing the video, displayed at the top.""" if not hasattr(self, "_text"): self._text = None node = self.node.find("SpecificMediaTitle") if node is not None: self._text = Doc.get_text(node, "").strip() if not self._text: node = self.node.find("VideoID") if node is not None: self._text = Doc.get_text(node, "").strip() return self._text @property def youtube_id(self): """Token for the URL to play the video.""" if not hasattr(self, "_youtube_id"): doc = Doc(self.SESSION, id=self.id) node = doc.root.find("PhysicalMedia/VideoData/HostingID") self._youtube_id = Doc.get_text(node, "").strip() or None return self._youtube_id
DrugInformationSummary=Control.assemble_values_for_dis, ) # Collect the options for this run. parser = ArgumentParser() parser.add_argument("--session", required=True, help="CDR login key") parser.add_argument("--tier", help="publish from another tier") parser.add_argument("--base", help="override base URL for Drupal site") parser.add_argument("--password", help="override password for PDQ account") parser.add_argument("--dumpfile", help="where to store the serialized doc") parser.add_argument("--id", type=int, help="CDR ID for Summary", required=True) opts = parser.parse_args() auth = ("PDQ", opts.password) if opts.password else None # Make sure we are allowed to publish to the CMS. session = Session(opts.session, tier=opts.tier) if not session.can_do("USE PUBLISHING SYSTEM"): raise Exception("Not authorized") # Prepare the document. doc = Doc(session, id=opts.id) print(("Pushing {} document {}".format(doc.doctype.name, doc.cdr_id))) root = Control.fetch_exported_doc(session, doc.id, "pub_proc_cg") xsl = Doc.load_single_filter(session, FILTERS[doc.doctype.name]) values = ASSEMBLE[doc.doctype.name](session, doc.id, xsl, root) if opts.dumpfile: with open(opts.dumpfile, "w") as fp: fp.write(dumps(values)) # Store the document and mark it publishable. client = DrupalClient(session, auth=auth, base=opts.base, tier=opts.tier)
def setUp(self): password = Tier().password(self.USERNAME) opts = dict(comment="filter testing", password=password) Tests.session = Session.create_session(self.USERNAME, **opts)
# only identifies which schema files do not match the corresponding # CDR document and reports errors. # #---------------------------------------------------------------------- import cdr, sys, glob, difflib, os.path from argparse import ArgumentParser from cdrapi.docs import Doc from cdrapi.users import Session differ = difflib.Differ() parser = ArgumentParser() parser.add_argument("--quiet", "-q", action="store_true") parser.add_argument("--tier", "-t") parser.add_argument("files", nargs="*", default="*.xml") opts = parser.parse_args() session = Session('guest', tier=opts.tier) for pattern in opts.files: for name in glob.glob(pattern): baseName = os.path.basename(name) try: with open(name, encoding="utf-8") as fp: localDoc = fp.read().replace("\r", "").splitlines(True) except Exception as e: print(f"... unable to open {name}: {e}") continue query = f"CdrCtl/Title = {baseName}" results = cdr.search(session, query, doctypes=["schema"], tier=opts.tier) if len(results) < 1: print(f"... schema {baseName} not found in CDR") else:
def session(self): if not hasattr(self, "_session"): opts = dict(password=cdr.getpw("pdqcontent")) self._session = Session.create_session("pdqcontent", **opts) return self._session
#!/usr/bin/env python """Check for English summaries on the CMS for publishable Spanish translations. """ from argparse import ArgumentParser from cdrapi import db from cdrapi.publishing import DrupalClient from cdrapi.users import Session parser = ArgumentParser() parser.add_argument("--tier") parser.add_argument("--cms") opts = parser.parse_args() cursor = db.connect(user="******", tier=opts.tier).cursor() query = db.Query("query_term_pub t", "t.doc_id", "t.int_val", "e.id") query.join("pub_proc_cg s", "s.id = t.doc_id") query.outer("pub_proc_cg e", "e.id = t.int_val") query.where("t.path = '/Summary/TranslationOf/@cdr:ref'") rows = query.execute(cursor).fetchall() for es, en, published in rows: if not published: print(f"CDR{es} is translation of unpublished CDR{en}") client = DrupalClient(Session("guest"), base=opts.cms) catalog = client.list() on_cms = set([summary.cdr_id for summary in catalog]) for es, en, published in rows: if en not in on_cms: print(f"CDR{es} is translation of CDR{en} which is not on the CMS")
def populate_form(self, page): # Make sure the user is allowed to use this menu. user = Session.User(self.session, id=self.session.user_id) if self.GROUP not in user.groups: bail("User not authorized for this menu") # Section 1: summary and module reports. page.form.append(page.B.H3("Summary and Module Reports")) ol = page.B.OL() page.form.append(ol) for display, script in ( ("Most Recent Changes to Summaries", "ChangesToSummaries.py"), ("History of Changes to a Single Summary", "SummaryChanges.py"), ("Date Last Modified", "SummaryDateLastModified.py"), ("Comprehensive Review Dates", "SummaryCRD.py"), ("Titles in Alphabetical Order", "SummariesLists.py"), ("Metadata", "SummaryMetaData.py"), ("TOC Levels", "SummariesTocReport.py"), ("Type of Comments", "SummaryComments.py"), ("Current Markup", "SummariesWithMarkup.py"), ("Type Of Change", "SummaryTypeChangeReport.py"), ("Standard Wording", "SummaryStandardWording.py"), ("Citations in Alphabetical Order", "SummaryCitations.py"), ("Non-Journal Article Citations Report", "SummariesWithNonJournalArticleCitations.py"), ): ol.append(page.B.LI(page.menu_link(script, display))) # Section 2: management reports. page.form.append(page.B.H3("PCIB Management Reports")) ol = page.B.OL() page.form.append(ol) for display, script in ( ("Board Meeting Dates", "BoardMeetingDates.py"), ("PCIB Statistics Report", "RunPCIBStatReport.py"), ): ol.append(page.B.LI(page.menu_link(script, display))) # Section 3: other reports. page.body.set("class", "admin-menu") name = "Quick Links to Other Reports and Report Menus" page.form.append(page.B.H3(name)) ol = page.B.OL() page.form.append(ol) for display, script in ( ("Checked Out Documents", "CheckedOutDocs.py"), ("Drug Information", "DrugInfoReports.py"), ("Glossary Terms", "GlossaryTermReports.py"), ("Linked Documents", "LinkedDocs.py"), ("Media", "MediaReports.py"), ("General Use Reports", "GeneralReports.py"), ): ol.append(page.B.LI(page.menu_link(script, display))) # Section 4: board members. page.body.set("class", "admin-menu") page.form.append(page.B.H3("Board Member Information Reports")) ol = page.B.OL() page.form.append(ol) opts = dict(DocType="PDQBoardMemberInfo") display = "Board Member Information QC Report" link = page.menu_link("QcReport.py", display, **opts) ol.append(page.B.LI(link)) for display, script in ( ("Board Roster Reports", "BoardRoster.py"), ("Board Roster Reports (Combined)", "BoardRosterFull.py"), ("Invitation History Report", "BoardInvitationHistory.py"), ("Board Members and Topics", "PdqBoards.py"), ): ol.append(page.B.LI(page.menu_link(script, display))) # Section 5: mailers. page.form.append(page.B.H3("Mailers")) links = ( page.menu_link( "BoardMemberMailerReqForm.py", "Board Member Correspondence Mailers" ), page.menu_link( "SummaryMailerReport.py", "Summary Mailer History Report", flavor="history" ), page.menu_link( "SummaryMailerReport.py", "Summary Mailer Report", flavor="standard" ), ) items = [page.B.LI(link) for link in links] ol = page.B.OL(*items) page.form.append(ol) # Section 6: miscellaneous docs. page.form.append(page.B.H3("Miscellaneous Document QC Report")) ol = page.B.OL() page.form.append(ol) link = page.menu_link("MiscSearch.py", "Miscellaneous Documents") ol.append(page.B.LI(link)) # Section 7: QC. page.form.append(page.B.H3("Summary QC Reports")) ol = page.B.OL() page.form.append(ol) opts = dict(DocType="Summary", DocVersion="-1") for display, report_key in ( ("HP Bold/Underline QC Report", "bu"), ("HP Redline/Strikeout QC Report", "rs"), ("PT Bold/Underline QC Report", "patbu"), ("PT Redline/Strikeout QC Report", "pat"), ("Publish Preview Report", "pp") ): opts["ReportType"] = report_key link = page.menu_link("QcReport.py", display, **opts) ol.append(page.B.LI(link))
def get_sets(tier): session = Session("guest", tier=tier) sets = {} for id, name in FilterSet.get_filter_sets(session): sets[name] = FilterSet(session, id=id) return sets
class Media: """Subject of the report.""" TITLE = "Media QC Report (Title)" SUBTITLE = "Media QC Report" MEDIA_ELEMENTS = "ContentDescription", "MediaCaption" GUEST = Session("guest") LANGUAGES = dict(en="English", es="Spanish") #AUDIENCES = "Patients", "Health_professionals" AUDIENCES = "Patients", FILTER = "set:QC Media Set" EN_INGLES = " (en ingl\xe9s)" CSS = "../../stylesheets/MediaSideBySide.css" def __init__(self, control): """Save the control object, which has everything we need. Pass: control - access to the database and the report parameters """ self.__control = control def show_report(self): """Send the report back to the browser.""" opts = dict( pretty_print=True, doctype="<!DOCTYPE html>", encoding="utf-8", ) sys.stdout.buffer.write(b"Content-type: text/html;charset=utf-8\n\n") sys.stdout.buffer.write(html.tostring(self.report, **opts)) sys.exit(0) @property def control(self): """Access to all the information we need for the report.""" return self.__control @property def doc(self): """CDR `Doc` object for the Media document.""" if not hasattr(self, "_doc"): self._doc_en = Doc(self.control.session, id=self.control.idpair[0]) self._doc_es = Doc(self.control.session, id=self.control.idpair[1]) #self._doc = Doc(self.control.session, id=self.control.id) if self._doc_en.doctype.name != "Media": self.control.bail("Not a Media document") self._doc = [ self._doc_en, self._doc_es ] return self._doc @property def report(self): """`HTMLPage` object for the report.""" if not hasattr(self, "_report"): B = builder meta = B.META(charset="utf-8") link = B.LINK(href=self.CSS, rel="stylesheet") icon = B.LINK(href="/favicon.ico", rel="icon") head = B.HEAD(meta, B.TITLE(self.TITLE), icon, link) time = B.SPAN(self.control.started.ctime()) args = self.SUBTITLE, B.BR(), "Side-by-Side", B.BR(), time cdrId = self.control.fields.getvalue("DocId") orig_id = B.P(f"{cdrId}", id="media-id") wrapper = body = B.BODY(B.E("header", B.H1(*args)), orig_id) self._report = B.HTML(head, body) #for langcode in sorted(self.captions): for cdrdoc in self.doc: #language = self.LANGUAGES[langcode] media_id = B.P(f"CDR{self.doc[0].id}", id="media-id") wrapper = B.DIV(B.CLASS("lang-wrapper")) body.append(wrapper) #self.control.bail(self.control.id) #for caption in self.captions[langcode]: # Display the language if uniquely identified lang = self.getLanguage(cdrdoc.id) if not lang: self.control.bail("Found none or multiple languages") #wrapper.append(B.H2(section)) # Display the CDR-ID media_id = B.P(f"{lang} - CDR{cdrdoc.id}", id="media-id") wrapper.append(media_id) # Display the image title media_title = self.getTitle(cdrdoc.id) media_id = B.P(media_title, B.CLASS("media-title")) wrapper.append(media_id) # Display the image if self.isImage(cdrdoc.id): image = f"/cgi-bin/cdr/GetCdrImage.py?id=CDR{cdrdoc.id}-400.jpg" wrapper.append(B.IMG(src=image)) else: host_id = self.getHostID(cdrdoc.id) image = f"https://img.youtube.com/vi/{host_id}/hqdefault.jpg" wrapper.append(B.P(B.IMG(src=image))) # Display the image labels label_hdr = B.P(f"Label", B.CLASS("section-hdr")) labels = self.getLabel(cdrdoc.id) if labels: ul = B.UL() for label in labels: ul.append(B.LI(label)) wrapper.append(label_hdr) wrapper.append(ul) desc_hdr = B.P(f"Content Description", B.CLASS("section-hdr")) wrapper.append(desc_hdr) base_path = "/Media/MediaContent" description_path = "/ContentDescriptions/ContentDescription" caption_path = "/Captions/MediaCaption" descriptions = self.getInfo(cdrdoc.id, f"{base_path}{description_path}") if descriptions: for description in descriptions: wrapper.append(B.P(B.B(f"{description[0]}:"), B.BR(), f" {description[1]}")) caption_hdr = B.P(f"Caption", B.CLASS("section-hdr")) wrapper.append(caption_hdr) captions = self.getInfo(cdrdoc.id, f"{base_path}{caption_path}") if captions: for caption in captions: wrapper.append(B.P(B.B(f"{caption[0]}:"), B.BR(), f" {caption[1]}")) return self._report # Select the language of the document. Each document for which # this report is used should only include one language code (en/es) # For documents including both languages there won't exist a # translated Spanish document with the TranslationOf reference # to the current "English" version. Those document will be # QC'ed using the original Media QC report. # --------------------------------------------------------------- def getLanguage(self, id): _id = id query = self.control.Query("query_term", "DISTINCT value") query.where("path like '/Media%@language'") query.where(f"doc_id = {_id}") rows = query.execute(self.control.cursor).fetchall() if rows and len(rows) == 1: return 'English' if rows[0][0] == 'en' else 'Spanish' return None # Get the Media title # ------------------------------------------------------------ def getTitle(self, id): _id = id query = self.control.Query("query_term", "value") query.where("path = '/Media/MediaTitle'") query.where(f"doc_id = {_id}") rows = query.execute(self.control.cursor).fetchall() if rows: return rows[0][0] return None # Need to know if this is an image or video document # ------------------------------------------------------------ def isImage(self, id): _id = id query = self.control.Query("query_term", "value") query.where("path = '/Media/PhysicalMedia/ImageData/ImageEncoding'") query.where(f"doc_id = {_id}") rows = query.execute(self.control.cursor).fetchall() if rows: return True return False # Grab the YouTube hosting ID # ------------------------------------------------------------ def getHostID(self, id): _id = id query = self.control.Query("query_term", "value") query.where("path = '/Media/PhysicalMedia/VideoData/HostingID'") query.where(f"doc_id = {_id}") rows = query.execute(self.control.cursor).fetchall() if rows: return rows[0][0] return None # Create a list containing all of the labels for this image # ------------------------------------------------------------ def getLabel(self, id): _id = id query = self.control.Query("query_term", "value") query.where("path = '/Media/PhysicalMedia/ImageData/LabelName'") query.where(f"doc_id = {_id}") rows = query.execute(self.control.cursor).fetchall() if rows: return [ x[0] for x in rows ] return None # Create a list of the descriptions or captions (based on the path # parameter passed. There could be two elements in the list # Patiens or Health_professionals # ------------------------------------------------------------------ def getInfo(self, id, path): _id = id _path = path query = self.control.Query( "query_term q", "a.value", "q.value").join( "query_term a", "q.doc_id = a.doc_id").order("a.value DESC") query.where(f"q.path = '{_path}'") query.where(f"a.path = '{_path}/@audience'") query.where("left(a.node_loc, 12) = left(q.node_loc, 12)") query.where(f"q.doc_id = {_id}") rows = query.execute(self.control.cursor).fetchall() if rows: return rows return None
class Concept: """Subject of the report.""" TITLE = "Glossary Term Concept" SUBTITLE = "Glossary Term Concept - Full" DEFINITION_ELEMENTS = "TermDefinition", "TranslatedTermDefinition" GUEST = Session("guest") LANGUAGES = dict(en="English", es="Spanish") AUDIENCES = "Patient", "Health professional" FILTER = "name:Glossary Term Definition Update" EN_INGLES = " (en ingl\xe9s)" CSS = "../../stylesheets/GlossaryConceptFull.css" def __init__(self, control): """Save the control object, which has everything we need. Pass: control - access to the database and the report parameters """ self.__control = control def show_report(self): """Send the report back to the browser.""" opts = dict( pretty_print=True, doctype="<!DOCTYPE html>", encoding="utf-8", ) sys.stdout.buffer.write(b"Content-type: text/html;charset=utf-8\n\n") sys.stdout.buffer.write(html.tostring(self.report, **opts)) sys.exit(0) @property def control(self): """Access to all the information we need for the report.""" return self.__control @property def definitions(self): """`Definition` objects for the concept, indexed by langcode.""" if not hasattr(self, "_definitions"): self._definitions = {} for langcode in self.LANGUAGES: self._definitions[langcode] = {} for name in self.DEFINITION_ELEMENTS: for node in self.doc.root.findall(name): definition = self.Definition(self, node) self._definitions[definition.langcode].update( {definition.audience: definition}) return self._definitions @property def doc(self): """CDR `Doc` object for the GlossaryTermConcept document.""" if not hasattr(self, "_doc"): self._doc = Doc(self.control.session, id=self.control.id) if self._doc.doctype.name != "GlossaryTermConcept": self.control.bail("Not a GlossaryTermConcept document") return self._doc @property def drug_links(self): """Drug summary links for the concept.""" if not hasattr(self, "_drug_links"): self._drug_links = self.Link.get_links(self, "drug") return self._drug_links @property def external_refs(self): """Links from this concept to pages outside the CDR.""" if not hasattr(self, "_external_refs"): self._external_refs = self.Link.get_links(self, "xref") return self._external_refs @property def media_links(self): """Links to images not associated with a specific definition.""" if not hasattr(self, "_media_links"): nodes = self.doc.root.findall("MediaLink") self._media_links = [self.MediaLink(node) for node in nodes] return self._media_links @property def media_table(self): """Table showing all the non-definition-specific images.""" if not hasattr(self, "_media_table"): self._media_table = builder.TABLE() for link in self.media_links: self._media_table.append(link.row) return self._media_table @property def name_links(self): """Links to other glossary term names.""" if not hasattr(self, "_name_links"): self._name_links = self.Link.get_links(self, "term") return self._name_links @property def names(self): """Objects for the concept's GlossaryTermName documents.""" if not hasattr(self, "_names"): query = self.control.Query("query_term", "doc_id") query.where(query.Condition("path", self.control.CONCEPT_PATH)) query.where(query.Condition("int_val", self.doc.id)) rows = query.execute(self.control.cursor).fetchall() self._names = [self.Name(self, row.doc_id) for row in rows] return self._names @property def pdq_terms(self): """Links to PDQ term documents.""" if not hasattr(self, "_pdq_terms"): self._pdq_terms = self.Link.get_links(self, "pdqt") return self._pdq_terms @property def related_info_table(self): """Table at the bottom of the report to links to other information.""" if not hasattr(self, "_related_info_table"): self._related_info_table = None rows = [drug_link.row for drug_link in self.drug_links] rows += [summary_ref.row for summary_ref in self.summary_refs] rows += [external_ref.row for external_ref in self.external_refs] rows += [name_link.row for name_link in self.name_links] rows += [pdq_term.row for pdq_term in self.pdq_terms] if self.thesaurus_ids: label = "NCI Thesaurus ID" args = [self.thesaurus_ids[0]] for id in self.thesaurus_ids[1:]: args += [builder.BR(), id] rows.append(builder.TR(builder.TD(label), builder.TD(*args))) if rows: self._related_info_table = builder.TABLE(*rows) self._related_info_table.set("class", "related-info-table") return self._related_info_table @property def report(self): """`HTMLPage` object for the report.""" if not hasattr(self, "_report"): B = builder meta = B.META(charset="utf-8") link = B.LINK(href=self.CSS, rel="stylesheet") icon = B.LINK(href="/favicon.ico", rel="icon") jqry = B.SCRIPT(src=self.control.HTMLPage.JQUERY) head = B.HEAD(meta, B.TITLE(self.TITLE), icon, link, jqry) time = B.SPAN(self.control.started.ctime()) args = self.SUBTITLE, B.BR(), "QC Report", B.BR(), time concept_id = B.P(f"CDR{self.doc.id}", id="concept-id") wrapper = body = B.BODY(B.E("header", B.H1(*args)), concept_id) self._report = B.HTML(head, body) for langcode in sorted(self.definitions): language = self.LANGUAGES[langcode] if self.parallel: wrapper = B.DIV(B.CLASS("lang-wrapper")) body.append(wrapper) for audience in sorted(self.definitions[langcode], reverse=True): aud = self.definitions[langcode][audience].audience section = f"{language} - {aud.title()}" wrapper.append(B.H2(section)) wrapper.append( self.definitions[langcode][audience].term_table) wrapper.append( self.definitions[langcode][audience].info_table) if self.media_table is not None: body.append(self.media_table) body.append(self.term_type_table) if self.related_info_table is not None: body.append(B.H2("Related Information")) body.append(self.related_info_table) body.append( B.SCRIPT("""\ jQuery(function() { jQuery("a.sound").click(function() { var url = jQuery(this).attr("href"); var audio = document.createElement("audio"); audio.setAttribute("src", url); audio.load(); audio.addEventListener("canplay", function() { audio.play(); }); return false; }); });""")) return self._report @property def parallel(self): """True if the English and Spanish should be side-by-side.""" if not hasattr(self, "_parallel"): self._parallel = self.control.layout == Control.SIDE_BY_SIDE return self._parallel @property def summary_refs(self): """Links to Cancer Information Summary documents.""" if not hasattr(self, "_summary_refs"): self._summary_refs = self.Link.get_links(self, "sref") return self._summary_refs @property def term_type_table(self): """Table showing all of the term type string for the concept.""" args = [self.term_types[0]] for term_type in self.term_types[1:]: args += [builder.BR(), term_type] term_types = builder.TD(*args) table = builder.TABLE(builder.TR(builder.TD("Term Type"), term_types)) table.set("class", "term-type-table") return table @property def term_types(self): """Sequence of term type strings for the concept, in document order.""" if not hasattr(self, "_term_types"): self._term_types = [] for node in self.doc.root.findall("TermType"): self._term_types.append(Doc.get_text(node, "").strip()) return self._term_types @property def thesaurus_ids(self): """Links to concepts in the NCI thesaurus.""" if not hasattr(self, "_thesaurus_ids"): self._thesaurus_ids = [] for node in self.doc.root.findall("NCIThesaurusID"): self._thesaurus_ids.append(Doc.get_text(node, "").strip()) return self._thesaurus_ids @property def videos(self): """Embedded videos not associated with a specific definition. Note that we're collecting these, but never displaying them. That is surely a mistake. """ if not hasattr(self, "_videos"): nodes = self.doc.root.findall("EmbeddedVideo") self._videos = [Concept.Video(node) for node in nodes] return self._videos class Definition: """One concept definition for a specifc language/audience combo.""" ROWS = ( ("definitions", "Definition Resource"), ("media_links", "Media Link"), ) RESOURCES = dict(en="Definition Resource", es="Translation Resource") STATUSES = dict(en="Definition Status", es="Translation Status") def __init__(self, concept, node): """Capture the caller's information. Pass: concept - `Concept` to which this definition belongs node - wrapper element for this definition """ self.__concept = concept self.__node = node @property def audience(self): """Audience for this definition.""" if not hasattr(self, "_audience"): self._audience = Doc.get_text(self.node.find("Audience")) return self._audience @property def comments(self): """`Comment` objects belonging to the definition.""" if not hasattr(self, "_comments"): self._comments = [] for node in self.node.findall("Comment"): self._comments.append(self.Comment(node)) return self._comments @property def concept(self): """Access to the terms connected with the concept.""" return self.__concept @property def dictionaries(self): """Dictionaries in which this definition should appear.""" if not hasattr(self, "_dictionaries"): self._dictionaries = [] for node in self.node.findall("Dictionary"): self._dictionaries.append(Doc.get_text(node, "").strip()) return self._dictionaries @property def info_table(self): """Table with meta information about this definition.""" table = builder.TABLE(builder.CLASS("definition-info")) for row in self.rows: table.append(row) return table @property def langcode(self): """Language code for this definition ("en" or "es").""" return "en" if self.node.tag == "TermDefinition" else "es" @property def last_modified(self): """Date the definition was last modified.""" if not hasattr(self, "_last_modified"): self._last_modified = None node = self.node.find("DateLastModified") if node is not None: self._last_modified = Doc.get_text(node, "").strip() return self._last_modified @property def last_reviewed(self): """Date the definition was last reviewed.""" if not hasattr(self, "_last_reviewed"): self._last_reviewed = None node = self.node.find("DateLastReviewed") if node is not None: self._last_reviewed = Doc.get_text(node, "").strip() return self._last_reviewed @property def media_links(self): """Links to images for the definition.""" if not hasattr(self, "_media_links"): nodes = self.node.findall("MediaLink") self._media_links = [Concept.MediaLink(node) for node in nodes] return self._media_links @property def node(self): """Parsed XML node for the definition.""" return self.__node @property def replacements(self): """Replacement strings not specific to any term name.""" if not hasattr(self, "_replacements"): self._replacements = {} for node in self.node.findall("ReplacementText"): self._replacements[node.get("name")] = node return self._replacements @property def resources(self): """Resources used for this definition.""" if not hasattr(self, "_resources"): self._resources = [] for tag in ("DefinitionResource", "TranslationResource"): for node in self.node.findall(tag): self._resources.append(Doc.get_text(node, "").strip()) return self._resources @property def rows(self): """Table rows for this definition's meta data.""" if not hasattr(self, "_rows"): rows = [] resources = self.RESOURCES[self.langcode] self.__add_row(rows, "resources", resources) for media_link in self.media_links: rows.append(media_link.row) for video in self.videos: rows.append(video.row) self.__add_row(rows, "dictionaries", "Dictionary") self.__add_row(rows, "status", self.STATUSES[self.langcode]) self.__add_row(rows, "status_date", "Status Date") # Only include the topmost comment for comment in self.comments[:1]: rows.append(comment.row) self.__add_row(rows, "last_modified", "Date Last Modified") self.__add_row(rows, "last_reviewed", "Date Last Reviewed") self._rows = rows return self._rows @property def status(self): """String for the definition's status.""" if not hasattr(self, "_status"): self._status = None for tag in ("DefinitionStatus", "TranslatedStatus"): self._status = self.node.find(tag) if self._status is not None: self._status = Doc.get_text(self._status, "").strip() break return self._status @property def status_date(self): """Date of the definition's current status.""" if not hasattr(self, "_status_date"): self._status_date = None for tag in ("StatusDate", "TranslatedStatusDate"): node = self.node.find(tag) if node is not None: self._status_date = Doc.get_text(node, "").strip() break return self._status_date @property def term_table(self): """Table showing term names and customized definitions. The definition is included via the resolve_placeholders() method and by transforming the text via a XSLT filter """ B = builder table = B.TABLE(B.CLASS("name-and-def")) for name in self.concept.names: if self.langcode == "en": langname = name.english_name elif name.spanish_name is not None: langname = name.spanish_name else: langname = name.english_name markup = name.markup_for_name(langname) if markup.tag == "p": markup.tag = "span" args = [markup, f" (CDR{name.id})"] if self.langcode == "es": if name.spanish_pronunciation is not None: args.append(" ") args.append(name.spanish_pronunciation) if name.spanish_name is None: markup = B.SPAN(" (en ingl\xe9s)", B.CLASS("special")) args.append(markup) elif name.alternate_spanish_names: separator = None args.append(" \xa0[alternate: ") for alt_name in name.alternate_spanish_names: if separator: args.append(separator) separator = ", " markup = name.markup_for_name(alt_name) if markup.tag == "p": markup.tag = "span" args.append(markup) args.append("]") elif name.english_pronunciation is not None: args.append(" ") args.append(name.english_pronunciation) if name.blocked: args = ["BLOCKED - "] + args + [B.CLASS("blocked")] table.append(B.TR(B.TD("Name"), B.TD(*args), B.CLASS("name"))) if not name.blocked: table.append(name.resolve_placeholders(self)) return table @property def text(self): """Definition text with placeholders to be resolved.""" if not hasattr(self, "_text"): self._text = self.node.find("DefinitionText") return self._text @property def videos(self): """Sequence of embedded videos for this definition.""" if not hasattr(self, "_videos"): nodes = self.node.findall("EmbeddedVideo") self._videos = [Concept.Video(node) for node in nodes] return self._videos def __add_row(self, rows, name, label): """Helper method to create a row for the definition meta table.""" values = getattr(self, name) if values: label = builder.TD(label) if not isinstance(values, list): values = [values] args = [values[0]] for value in values[1:]: args.append(builder.BR()) args.append(value) values = builder.TD(*args) rows.append(builder.TR(label, values)) class Comment: """Comment associated with the definition.""" def __init__(self, node): """Remember the node for this comment. Pass: node - parsed XML for the comment's element """ self.__node = node @property def row(self): """HTML markup for this comment.""" if not hasattr(self, "_row"): wrapper = etree.Element("GlossaryTermDef") wrapper.append(self.__node) doc = Doc(Concept.GUEST, xml=etree.tostring(wrapper)) result = doc.filter(Concept.FILTER) self._row = html.fromstring(str(result.result_tree)) return self._row class Link: """Link to be displayed in the concept's table for related info.""" RELINFO = "RelatedInformation" DRUG_SUMMARY_LINK = "RelatedInformation/RelatedDrugSummaryLink" EXTERNAL_REF = "RelatedInformation/RelatedExternalRef" SUMMARY_REF = "RelatedInformation/RelatedSummaryRef" TERM_NAME_LINK = "RelatedInformation/RelatedGlossaryTermNameLink" PDQ_TERM = "PDQTerm" TYPES = dict( drug=(DRUG_SUMMARY_LINK, "Rel Drug Summary Link", "ref", True), xref=(EXTERNAL_REF, "Rel External Ref", "xref", True), sref=(SUMMARY_REF, "Rel Summary Ref", "ref", True), term=(TERM_NAME_LINK, "Rel Glossary Term", "ref", True), pdqt=(PDQ_TERM, "PDQ Term", "ref", False), ) def __init__(self, label, value, text, external, indent): """Capture the caller's values. Pass: label - string for the left-side column value - string extracted from the linking attribute text - text displayed in the right-side column external - True if this is a link outside the CDR indent - whether the label should be offset from the left """ self.__label = label self.__value = value self.__text = text self.__external = external self.__indent = indent @property def row(self): """HTML markup for the link.""" if not hasattr(self, "_row"): label = builder.TD(self.__label) if self.__indent: label.set("class", "indent") if self.__external: display = url = self.__value else: doc_id = Doc.extract_id(self.__value) display = f"CDR{doc_id:d}" url = f"QcReport.py?Session=guest&DocId={doc_id:d}" link = builder.A(display, href=url) args = f"{self.__text} (", link, ")" self._row = builder.TR(label, builder.TD(*args)) return self._row @classmethod def get_links(cls, concept, key): """Find all the links of a given type for the concept. Pass: concept - subject of the report key - index into the values for this type of link """ path, label, name, indent = cls.TYPES[key] external = name == "xref" name = f"{{{Doc.NS}}}{name}" links = [] for node in concept.doc.root.findall(path): text = Doc.get_text(node, "").strip() value = node.get(name) links.append(cls(label, value, text, external, indent)) label = "" return links class MediaLink: """Link to an image used by the glossary term.""" CDR_REF = f"{{{Doc.NS}}}ref" CGI = "https://cdr.cancer.gov/cgi-bin/cdr" URL = f"{CGI}/GetCdrImage.py?id={{}}-300.jpg" def __init__(self, node): """Remember the XML node for this link. Pass: node - wrapper element for the image information """ self.__node = node @property def id(self): """CDR ID for the image document.""" if not hasattr(self, "_id"): node = self.node.find("MediaID") try: self._id = Doc.extract_id(node.get(f"{{{Doc.NS}}}ref")) except: self._id = None if not hasattr(self, "_text"): self._text = Doc.get_text(node, "").strip() return self._id @property def node(self): """Wrapper node for the media link.""" return self.__node @property def row(self): """HTML markup for the the image's table row.""" if not hasattr(self, "_row"): B = builder img = B.IMG(src=self.URL.format(self.id)) args = f"{self.text} (CDR{self.id})", B.BR(), img self._row = B.TR(B.TD("Media Link"), B.TD(*args)) return self._row @property def text(self): """Text to be displayed above the image.""" if not hasattr(self, "_text"): node = self.node.find("MediaID") self._text = Doc.get_text(node, "").strip() if not hasattr(self, "_id"): try: self._id = Doc.extract_id(node.get(self.CDR_REF)) except: self._id = None return self._text class Name: """Information needed from a GlossaryTermName document.""" NAME_TAGS = "TermName", "TranslatedName" def __init__(self, concept, id): """Remember the caller's information. Pass: concept - `Concept` to which this name belongs id - CDR ID for the GlossaryTermName document """ self.__concept = concept self.__id = id def resolve_placeholders(self, definition): """Assemble the definition using our name and replacements. Pass: definition - definition with placeholders to be resolved Return: marked-up definition row """ if definition.langcode == "en": name = deepcopy(self.english_name) elif self.spanish_name is None: name = deepcopy(self.english_name) self.__append_en_ingles(name) else: name = deepcopy(self.spanish_name) root = etree.Element("GlossaryTermDef") root.append(name) root.append(self.__make_capped_name(name)) root.append(deepcopy(definition.node.find("DefinitionText"))) if self.replacements: node = etree.Element("GlossaryTermPlaceHolder") for replacement in self.replacements.values(): node.append(deepcopy(replacement)) root.append(node) if definition.replacements: node = etree.Element("GlossaryConceptPlaceHolder") for replacement in definition.replacements.values(): node.append(deepcopy(replacement)) root.append(node) doc = Doc(Concept.GUEST, xml=etree.tostring(root)) result = doc.filter(Concept.FILTER) return html.fromstring(str(result.result_tree)) @property def alternate_spanish_names(self): """Extra spanish names.""" if not hasattr(self, "_alternate_spanish_names"): self._alternate_spanish_names = [] path = "TranslatedName/TermNameString" for node in self.doc.root.findall(path): if node.get("NameType") == "alternate": self._alternate_spanish_names.append(node) elif not hasattr(self, "_spanish_name"): self._spanish_name = node return self._alternate_spanish_names @property def blocked(self): """True if the name document can't be published.""" return self.doc.active_status != Doc.ACTIVE @property def doc(self): """CDR `Doc` object for the GlossaryTermName document.""" if not hasattr(self, "_doc"): self._doc = Doc(Concept.GUEST, id=self.id) return self._doc @property def id(self): """CDR ID for the GlossaryTermName document.""" return self.__id @property def english_name(self): """English name for the glossary term.""" if not hasattr(self, "_english_name"): node = self.doc.root.find("TermName/TermNameString") self._english_name = node return self._english_name @property def english_pronunciation(self): """Link to the audio file for pronunciation of the English name.""" if self.english_pronunciation_url: B = builder url = self.english_pronunciation_url img = B.IMG(B.CLASS("sound"), src="/images/audio.png") return B.A(img, B.CLASS("sound"), href=url) return None @property def english_pronunciation_url(self): """URL for the audio file for pronunciation of the English name.""" if not hasattr(self, "_english_pronunciation_url"): self._english_pronunciation_url = None node = self.doc.root.find("TermName/MediaLink/MediaID") if node is not None: id = node.get(f"{{{Doc.NS}}}ref") if id: url = f"GetCdrBlob.py?disp=inline&id={id}" self._english_pronunciation_url = url return self._english_pronunciation_url @property def replacements(self): """The name's replacement strings for definition placeholders.""" if not hasattr(self, "_replacements"): self._replacements = {} for node in self.doc.root.findall("ReplacementText"): self._replacements[node.get("name")] = node return self._replacements @property def spanish_name(self): """Primary (non-"alternate") Spanish name for the term.""" if not hasattr(self, "_spanish_name"): self._spanish_name = None alternates = [] for node in self.doc.root.findall("TranslatedName"): child = node.find("TermNameString") if child is not None: if node.get("NameType") != "alternate": self._spanish_name = child else: alternates.append(child) if not hasattr(self, "_alternate_spanish_names"): self._alternate_spanish_names = alternates return self._spanish_name @property def spanish_pronunciation(self): """Link to the audio file for pronunciation of the Spanish name.""" if self.spanish_pronunciation_url: B = builder url = self.spanish_pronunciation_url img = B.IMG(B.CLASS("sound"), src="/images/audio.png") return B.A(img, B.CLASS("sound"), href=url) return None @property def spanish_pronunciation_url(self): """URL for the audio file for pronunciation of the Spanish name.""" if not hasattr(self, "_spanish_pronunciation_url"): self._spanish_pronunciation_url = None node = self.doc.root.find("TranslatedName/MediaLink/MediaID") if node is not None: id = node.get(f"{{{Doc.NS}}}ref") if id: url = f"GetCdrBlob.py?disp=inline&id={id}" self._spanish_pronunciation_url = url return self._spanish_pronunciation_url @staticmethod def markup_for_name(name): """Highlight insertion/deletion markup for the term name. Pass: name - parsed XML node for the term name string """ doc = Doc(Concept.GUEST, xml=etree.tostring(name)) result = doc.filter(Concept.FILTER) return html.fromstring(str(result.result_tree).strip()) @staticmethod def __make_capped_name(node): """Helper method for uppercasing the first character of a name. Pass: node - parsed XML node containing the term name """ node = deepcopy(node) node.tag = "CappedNameString" for n in node.iter("*"): if n.text is not None and n.text.strip(): n.text = n.text[0].upper() + n.text[1:] break elif n is not node and n.tail is not None and n.tail.strip(): n.tail = n.tail[0].upper() + n.tail[1:] break return node @staticmethod def __append_en_ingles(node): """Helper method for marking this name as an English substitute. Pass: node - XML node to which the suffix is added """ last_child = None for child in node.findall("*"): last_child = child if last_child is not None: if last_child.tail is not None: last_child.tail += Concept.EN_INGLES else: last_child.tail = Concept.EN_INGLES else: node.text += Concept.EN_INGLES class Video: """Information about a YouTube video.""" IMAGE_URL = "https://img.youtube.com/vi/{}/hqdefault.jpg" VIDEO_URL = "https://www.youtube.com/watch?v={}" SESSION = Session("guest") def __init__(self, node): """Capture the caller's information. Pass: node - wrapper node for the video information """ self.__node = node @property def id(self): """CDR ID for the video's Media document.""" if not hasattr(self, "_id"): node = self.node.find("VideoID") self._id = None if node is not None: try: self._id = Doc.extract_id(node.get(f"{{{Doc.NS}}}ref")) except: pass return self._id @property def img(self): """Still image displayed for the video.""" if not hasattr(self, "_img"): url = self.IMAGE_URL.format(self.youtube_id) self._img = builder.IMG(src=url) return self._img @property def link(self): """Link for playing the YouTube video.""" if not hasattr(self, "_link"): url = self.VIDEO_URL.format(self.youtube_id) self._link = builder.A("Watch video on YouTube", href=url) return self._link @property def node(self): """Wrapper element for the video information.""" return self.__node @property def row(self): """HTML markup for displaying the video info and link.""" if not hasattr(self, "_row"): B = builder args = self.text, B.BR(), self.img, B.BR(), self.link self._row = B.TR(B.TD("Video Link"), B.TD(*args)) return self._row @property def text(self): """String describing the video, displayed at the top.""" if not hasattr(self, "_text"): self._text = None node = self.node.find("SpecificMediaTitle") if node is not None: self._text = Doc.get_text(node, "").strip() if not self._text: node = self.node.find("VideoID") if node is not None: self._text = Doc.get_text(node, "").strip() return self._text @property def youtube_id(self): """Token for the URL to play the video.""" if not hasattr(self, "_youtube_id"): doc = Doc(self.SESSION, id=self.id) node = doc.root.find("PhysicalMedia/VideoData/HostingID") self._youtube_id = Doc.get_text(node, "").strip() or None return self._youtube_id