def __get_glossary_title(self, doc_id, doc_type): """ Fetch or construct title for Glossary document For GlossaryTermConcept documents we construct a title in the form: "GTC for [title of first GTN document] Return: string representing document's title """ if doc_type == "GlossaryTermConcept": path = "/GlossaryTermName/GlossaryTermConcept/@cdr:ref" query = db.Query("document d", "d.title").limit(1) query.join("query_term q", "q.doc_id = d.id") query.where(query.Condition("q.path", path)) query.where(query.Condition("q.int_val", doc_id)) query.order("d.title") row = query.execute(self.cursor).fetchone() if row: return "GTC for {}".format(row[0]) return "GTC CDR{:d}".format(doc_id) query = db.Query("document", "title") query.where(query.Condition("id", doc_id)) return query.execute(self.cursor).fetchone()[0]
def ever_validated(self): """ Boolean flag indicating whether this doc has ever been validated It is important to avoid running validation on a document for the first time unless a publishable version is being created (in which case validation is mandatory), because the validation wipes out the XMetaL prompt PIs. """ if not hasattr(self, "_ever_validated"): if "lastp" in self.doc_objects: self._ever_validated = True if not hasattr(self, "_ever_validated"): query = db.Query("document", "val_status") query.where(query.Condition("id", self.id)) row = query.execute(self.job.cursor).fetchone() if not row: message = "Failure retrieving val_status for {}" raise Exception(message.format(self.cdr_id)) if row.val_status in ("I", "V"): self._ever_validated = True if not hasattr(self, "_ever_validated"): query = db.Query("doc_version", "COUNT(*) AS n") query.where(query.Condition("id", self.id)) query.where("val_status IN ('I', 'V')") row = query.execute(self.job.cursor).fetchone() self._ever_validated = row.n > 0 return self._ever_validated
def __init__(self, control, user_id=None): """ Look up the information for the specified user in the database. If no user ID is passed, use that of the user running this script. """ if not user_id: query = db.Query("session", "usr") query.where(query.Condition("name", control.session.name)) user_id = query.execute(control.cursor).fetchone()[0] self.id = user_id query = db.Query("open_usr", "email", "fullname") query.where(query.Condition("id", user_id)) self.email, self.name = query.execute(control.cursor).fetchone()
def defaultLevel(self): query = db.Query("ctl", "val") query.where("grp = 'WebService'") query.where("name = 'DebugLevel'") query.where("inactivated IS NULL") row = query.execute().fetchone() return row.val if row else "1"
def concepts(self): """ Dictionary information for the term concepts. """ if not hasattr(self, "_concepts"): class Concept: """ CDR GlossaryTermConcept document. Attributes: - id: integer for the document's CDR ID - dictionaries: English and Spanish dictionaries for which we have definitions """ def __init__(self, doc_id): self.id = doc_id self.dictionaries = dict(en=set(), es=set()) self._concepts = {} tags = dict(en="TermDefinition", es="TranslatedTermDefinition") for lang in tags: path = "/GlossaryTermConcept/{}/Dictionary".format(tags[lang]) query = db.Query("query_term_pub", "doc_id", "value") query.where(query.Condition("path", path)) rows = query.execute(self.cursor).fetchall() self.logger.debug("fetched %d %s dictionaries", len(rows), lang) for doc_id, dictionary in rows: concept = self._concepts.get(doc_id) if not concept: concept = self._concepts[doc_id] = Concept(doc_id) concept.dictionaries[lang].add(dictionary.strip()) return self._concepts
def get_summaries(self, language): """ Fetch the IDs and titles for published CDR summary documents. OCECDR-4240: all modules to be queued for translation jobs. Pass: language - string representing language of summaries to be returned Return: dictionary of titles of summaries in the specified language indexed by the summary document IDs """ query = db.Query("active_doc d", "d.id", "d.title") query.join("query_term l", "l.doc_id = d.id") query.where("l.path = '/Summary/SummaryMetaData/SummaryLanguage'") query.where(query.Condition("l.value", language)) if language == "English": query.outer("pub_proc_cg c", "c.id = d.id") query.outer("query_term m", "m.doc_id = d.id", "m.path = '/Summary/@ModuleOnly'") query.where("(c.id IS NOT NULL OR m.value = 'Yes')") query.unique() self.logger.debug("query: %s", query) return dict(query.execute(self.cursor).fetchall())
def board(self): """String for the summary's PDQ Editorial board. A summary typically has multiple board links. Only one will match the controller's list of editorial boards shown on the picklist. For a Spanish summary, we have to find the English summary of which it is a translation, as that is the document which will have the link to the editorial board we need. """ if not hasattr(self, "_board"): self._board = "Board not found" b_path = "/Summary/SummaryMetaData/PDQBoard/Board/@cdr:ref" t_path = "/Summary/TranslationOf/@cdr:ref" query = db.Query("query_term_pub b", "b.int_val").unique() if self.control.language == "english": query.where(query.Condition("b.path", b_path)) query.where(query.Condition("b.doc_id", self.id)) else: query.join("query_term_pub t", "t.int_val = b.doc_id") query.where(query.Condition("t.path", t_path)) query.where(query.Condition("t.doc_id", self.id)) for row in query.execute(self.control.cursor).fetchall(): board = self.control.boards.get(row.int_val) if board: self._board = str(board) return self._board
def check_unique_title(self, tier, parser): """ Look for the title in the target server Make sure the document is not already installed in the target server. This check is actually redundant, as the CDR server will enforce the assumption. Can't hurt to check twice, though. Pass: tier - location of target server (if not localhost) parser - used for reporting errors Raise: exception if filter is already install on specified tier """ cursor = db.connect(name="CdrGuest", tier=tier).cursor() query = db.Query("document d", "d.id") query.join("doc_type t", "t.id = d.doc_type") query.where(query.Condition("d.title", self.title)) query.where("t.name = 'Filter'") rows = query.execute(cursor).fetchall() cursor.close() if rows: ids = ", ".join([str(row[0]) for row in rows]) args = self.title, ids parser.error("{!r} already present ({}) in the CDR".format(*args))
def find_changed_terms(cls, args): """ Find all of the CDR Term documents which need to be refreshed. Writes the concept code and CDR ID to the standard output on a single line, separated by the tab character. Writes errors to the standard error file (most of these will be caused by a mismatch in the concept's preferred name string). Can take as much as a couple of hours, unless the --limit option is used. Pass: args - dictionary of command line arguments (this test uses only the --limit option) """ start = datetime.datetime.now() query = db.Query("query_term", "doc_id", "value") query.where("path = '/Term/NCIThesaurusConcept'") query.where("value LIKE 'C%'") query.where("value NOT LIKE 'CDR%'") if args.limit: query.limit(args.limit) for cdr_id, concept_id in query.execute().fetchall(): try: concept = cls(code=concept_id) term_doc = TermDoc(concept, cdr_id=cdr_id) if term_doc.changes: print("%s\tCDR%s" % (concept_id, cdr_id)) except Exception as e: error = "comparing CDR%d to %r" % (cdr_id, concept_id) Concept.logger.exception(error) sys.stderr.write("%s: %s\n" % (error, e)) elapsed = (datetime.datetime.now() - start).total_seconds() sys.stderr.write("elapsed time: %s seconds\n" % elapsed)
def _load_glossary_jobs(self): """ Find Glossary jobs which have the state "Ready for Translation" Complicated by the fact that we have to construct a title for GlossaryTermConcept documents using the title of one of its GlossaryTermName documents. Return: Sequence of tuples of values """ fields = "d.id", "t.name", "u.fullname", "j.state_date" query = db.Query("glossary_translation_job j", *fields) query.join("usr u", "u.id = j.assigned_to") query.join("document d", "d.id = j.doc_id") query.join("doc_type t", "t.id = d.doc_type") query.join("glossary_translation_state s", "s.value_id = j.state_id") query.where("s.value_name = 'Ready for Translation'") jobs = {} rows = query.execute(self.cursor).fetchall() for doc_id, doc_type, name, date in rows: title = self.__get_glossary_title(doc_id, doc_type) jobs[(title.lower(), doc_id)] = (title, doc_id, name, date) return [jobs[key] for key in sorted(jobs)]
def activeCount(jobName): """ Check to see if any jobs with a certain name are in any kind of active state - i.e., not permanently ended. Only looks at the last 24 hours so that, if something crashes and leaves a status of in-process in the table, it will eventually (in 24 hours) clear itself from this query. Pass: jobName - Name of job in batch_job table. Name may include SQL wildcards. '%' finds any in-process batch jobs, regardless of name. Return: Number of active batch jobs. 0 = nothing currently active. """ # Are there any jobs not in one of the active statuses? statuses = (ST_QUEUED, ST_INITIATING, ST_IN_PROCESS) query = db.Query("batch_job", "COUNT(*)") query.where(query.Condition("status", statuses, "IN")) query.where(query.Condition("name", jobName, "LIKE")) query.where("started >= DATEADD(DAY, -1, GETDATE())") try: return query.execute().fetchone()[0] except Exception as e: raise BatchException("Unable to get batch job activity info: %s" % e)
def _load_glossary_users(self): """ Collect information on users with active Glossary translation jobs Documents can be of type GlossaryTermName or GlossaryTermConcept. Return: sequence of database resultset rows sequence of `User` objects """ fields = ("d.id", "t.name", "s.value_name", "u.name", "u.fullname", "u.id", "s.value_pos", "u.email", "j.state_date") query = db.Query("glossary_translation_job j", *fields) query.join("usr u", "u.id = j.assigned_to") query.join("document d", "d.id = j.doc_id") query.join("doc_type t", "t.id = d.doc_type") query.join("glossary_translation_state s", "s.value_id = j.state_id") rows = query.execute(self.cursor).fetchall() users = {} for doc_id, doc_type, state, name, full, uid, pos, email, date in rows: title = self.__get_glossary_title(doc_id, doc_type) key = uid, pos, str(date)[:10], title values = doc_id, title, state, name, full, uid, email, date users[key] = values return [users[key] for key in sorted(users)]
def fragment_matches(self): """Information about summaries which match the title fragment.""" if not hasattr(self, "_fragment_matches"): if not self.fragment: bail("No title fragment to match") pattern = f"{self.fragment}%" query = db.Query("document d", "d.id", "d.title").order(2, 1) query.join("doc_type t", "t.id = d.doc_type") query.where("t.name = 'Summary'") query.where(query.Condition("d.title", pattern, "LIKE")) self._fragment_matches = [] class Summary: def __init__(self, doc_id, display, tooltip=None): self.id = doc_id self.display = display self.tooltip = tooltip for doc_id, title in query.execute(self.cursor).fetchall(): if len(title) > 60: short_title = title[:57] + "..." summary = Summary(doc_id, short_title, title) else: summary = Summary(doc_id, title) self._fragment_matches.append(summary) return self._fragment_matches
def row(self): """Current values for an existing job, if applicable.""" if not hasattr(self, "_row"): query = db.Query(self.TABLE, *self.FIELDS) query.where(query.Condition(self.KEY, self.english_id)) self._row = query.execute(self.__control.cursor).fetchone() return self._row
def queries(self): """Dictionary of the stored SQL queries, indexed by unique name.""" if not hasattr(self, "_queries"): query = db.Query("query", "name", "value") rows = query.execute(self.cursor).fetchall() self._queries = dict(tuple(row) for row in rows) return self._queries
def indications(self): """Approved indications for this drug.""" if not hasattr(self, "_indications"): query = db.Query("query_term_pub", "value").unique() query.where(query.Condition("doc_id", self.id)) query.where(f"path = '{self.APPROVED}'") rows = query.execute(self.control.cursor) self._indications = [row.value for row in rows] return self._indications
def get_published_doc(self, doctype): fields = "d.doc_id", "d.doc_version" query = db.Query("pub_proc_doc d", *fields).limit(1).order("d.doc_id") query.join("pub_proc p", "p.id = d.pub_proc") query.join("doc_version v", "v.id = d.doc_id", "v.num = d.doc_version") query.join("doc_type t", "t.id = v.doc_type") query.where(query.Condition("t.name", doctype)) row = query.execute(self.CURSOR).fetchone() return Doc(self.session, id=row.doc_id, version=row.doc_version)
def build_tables(self): """Serve up the table.""" query = db.Query("session s", *self.FIELDS).order("s.last_act") query.join("usr u", "u.id = s.usr") query.where("s.ended IS NULL") rows = query.execute(self.cursor).fetchall() desc = self.cursor.description cols = [d[0].replace("_", " ").title() for d in desc] return Reporter.Table(rows, columns=cols)
def job_start(self): """ Date/time the export job was created (so a bit of a misnomer) """ if not hasattr(self, "_job_start"): query = cdrdb.Query("pub_proc", "started") query.where(query.Condition("id", self.job_id)) self._job_start = query.execute(self.cursor).fetchone().started return self._job_start
def root(self): """Fetch and parse xml and clean up unwanted elements/markup""" if not hasattr(self, "_root"): query = db.Query("document", "xml") query.where(query.Condition("id", self.cdr_id)) xml = query.execute(self.__control.cursor).fetchone().xml self._root = etree.fromstring(xml.encode("utf-8")) etree.strip_elements(self._root, *self.DROP, with_tail=False) etree.strip_tags(self._root, *self.STRIP) return self._root
def cms_only(self): """List of summary documents we don't give to the data partners.""" if not hasattr(self, "_cms_only"): query = db.Query("query_term_pub", "doc_id") query.where("path = '/Summary/@SVPC'") query.where("value = 'Yes'") rows = query.execute().fetchall() self._cms_only = {row.doc_id for row in rows} return self._cms_only
def populate_form(self, page): page.body.set("class", "admin-menu") page.form.append(page.B.H3("Documentation Categories")) ol = page.B.OL() page.form.append(ol) query = db.Query("query_term", "doc_id", "value").order("value") query.where("path = '/DocumentationToC/ToCTitle'") for row in query.execute(self.cursor).fetchall(): link = page.menu_link("Help.py", row.value, id=row.doc_id) ol.append(page.B.LI(link))
def spanish_id(self): """CDR ID for the translated summary document (if it exists).""" if not hasattr(self, "_spanish_id"): query = db.Query("query_term", "doc_id") query.where("path = '/Summary/TranslationOf/@cdr:ref'") query.where(query.Condition("int_val", self.english_id)) row = query.execute(self.__control.cursor).fetchone() self._spanish_id = row.doc_id if row else None return self._spanish_id
def __init__(self, **opts): """ Capture control settings for job Invoke the base class constructor with the user's options, then determine the list of documents to be transformed. Typically, the actual determination as to which documents are to be processed will have been made by the time the constructor has completed. However, there would be no harm done if the work to make this determination were deferred until the `select()` method is invoked. Different types of global change jobs call for various techniques for selecting documents. For example, for some requests, the users will supply a spreadsheet file containing a column containing the CDR IDs of the documents to be transformed (and frequently other columns with information about specific values to be used in that transformation). Optional keyword arguments: session - string representing CDR login user - CDR user account name (exactly one of `sesssion` or `user` must be provided) mode - "test" (the default) or "live" tier - "DEV" | "QA" | "STAGE" | "PROD" docs - sequence of document IDs, used to optionally override (for testing) the documents which would be identified by the SQL query below) """ # Pull out the list of documents from the command line (if any). self.__doc_ids = opts.pop("docs") # Invoke the base class constructor with the user's options. Job.__init__(self, **opts) # If no documents were specified on the command line, use SQL. # Some things to note: # - query_term_pub.path takes care of determining document type # - the `Job` class comes with its own read-only cursor # - when document IDs are specified on the command line, the # order of specification is preserved, but when we use the # database we're sorting by document ID (at least in this case). # Typically, the order doesn't really matter, but it might in # some cases. Do whatever you need to do for the requirements # of the specific job you're working on. if not self.__doc_ids: query = db.Query("pub_proc_cg c", "c.id").unique() query.join("query_term_pub u", "u.doc_id = c.id") query.where("u.path LIKE '/Summary%ExternalRef/@cdr:xref'") query.where("u.value LIKE '%NCT[0-9]%'") query.order("c.id") rows = query.execute(self.cursor).fetchall() self.__doc_ids = [row[0] for row in rows]
def create_query(self, agent_type): """Create a customized database query depending on the agent type. Pass: agent_type: one of `SINGLE_AGENT` or `COMBINATION` Return: `cdrapi.db.Query` object """ fields = ["d.id", "t.value AS title"] if self.include_fda_approval: fields.append("a.value AS accelerated") fields.append("k.value AS approved_in_children") query = db.Query("active_doc d", *fields).unique().order(2, 1) query.join("query_term_pub t", "t.doc_id = d.id") query.where("t.path = '/DrugInformationSummary/Title'") if self.drug_type: query.join("query_term_pub r", "r.doc_id = d.id") query.where(f"r.path = '{self.TYPE_PATH}'") query.where(query.Condition("r.value", self.drug_type, "IN")) if self.ACCELERATED_APPROVAL in self.fda_approval_status: if self.APPROVED_IN_CHILDREN in self.fda_approval_status: query.outer("query_term_pub a", "a.doc_id = d.id", "a.value = 'Yes'", f"a.path = '{self.ACCEL_PATH}'") query.outer("query_term_pub k","k.doc_id = d.id", "k.value = 'Yes'", f"k.path = '{self.KIDS_PATH}'") query.where("(a.doc_id IS NOT NULL OR k.doc_id IS NOT NULL)") else: query.join("query_term_pub a", "a.doc_id = d.id") query.where(f"a.path = '{self.ACCEL_PATH}'") query.where("a.value = 'Yes'") if self.include_fda_approval: query.outer("query_term_pub k","k.doc_id = d.id", "k.value = 'Yes'", f"k.path = '{self.KIDS_PATH}'") elif self.APPROVED_IN_CHILDREN in self.fda_approval_status: query.join("query_term_pub k", "k.doc_id = d.id") query.where(f"k.path = '{self.KIDS_PATH}'") query.where("k.value = 'Yes'") if self.include_fda_approval: query.outer("query_term_pub a", "a.doc_id = d.id", "a.value = 'Yes'", f"a.path = '{self.ACCEL_PATH}'") elif self.include_fda_approval: query.outer("query_term_pub a", "a.doc_id = d.id", "a.value = 'Yes'", f"a.path = '{self.ACCEL_PATH}'") query.outer("query_term_pub k","k.doc_id = d.id", "k.value = 'Yes'", f"k.path = '{self.KIDS_PATH}'") query.outer("query_term_pub c", "c.doc_id = d.id", f"c.path = '{self.COMBO_PATH}'") if agent_type == self.SINGLE_AGENT: query.where("c.value IS NULL") else: query.where("c.value = 'Yes'") return query
def week(self): """Get the YYYYWW string for the job's ISO week.""" if not hasattr(self, "_week"): self._week = self.opts.week if not self._week: query = db.Query("pub_proc", "started") query.where(query.Condition("id", self.job_id)) started = query.execute().fetchone().started year, week, dow = started.isocalendar() self._week = "{:04d}{:02d}".format(year, week) return self._week
def local_filters(self): """Dictionary of local filters, indexed by title.""" if not hasattr(self, "_local_filters"): self._local_filters = {} query = db.Query("document d", "d.title", "d.xml") query.join("doc_type t", "t.id = d.doc_type") query.where("t.name = 'Filter'") for title, xml in query.execute().fetchall(): self._local_filters[title] = xml return self._local_filters
def get_group_email_addresses(group_name="Developers Notification"): """ Replacement for cdr.getEmailList() which does not exclude retired accounts. """ query = db.Query("usr u", "u.email") query.join("grp_usr gu", "gu.usr = u.id") query.join("grp g", "g.id = gu.grp") query.where(query.Condition("g.name", group_name)) query.where("u.expired IS NULL") return [row[0] for row in query.execute().fetchall() if row[0]]
def summaries(self): """PDQ summaries (`Summary` objects) to be displayed on the report.""" if not hasattr(self, "_summaries"): fields = "q.doc_id", "q.value AS title" if self.id: query = db.Query("query_term q", *fields).unique() query.where(query.Condition("doc_id", self.id)) else: audience = f"{self.AUDIENCES.get(self.audience)}s" language = self.language.title() b_path = "/Summary/SummaryMetaData/PDQBoard/Board/@cdr:ref" t_path = "/Summary/TranslationOf/@cdr:ref" a_path = "/Summary/SummaryMetaData/SummaryAudience" l_path = "/Summary/SummaryMetaData/SummaryLanguage" query = db.Query("query_term q", *fields).unique() query.join("active_doc d", "d.id = q.doc_id") query.join("doc_version v", "v.id = d.id") query.where("v.publishable = 'Y'") query.join("query_term a", "a.doc_id = d.id") query.where(query.Condition("a.path", a_path)) query.where(query.Condition("a.value", audience)) query.join("query_term l", "l.doc_id = d.id") query.where(query.Condition("l.path", l_path)) query.where(query.Condition("l.value", language)) if "all" not in self.board: if language == "English": query.join("query_term_pub b", "b.doc_id = d.id") else: query.join("query_term_pub t", "t.doc_id = d.id") query.where(query.Condition("t.path", t_path)) query.join("query_term b", "b.doc_id = t.int_val") query.where(query.Condition("b.path", b_path)) query.where(query.Condition("b.int_val", self.board, "IN")) query.where("q.path = '/Summary/SummaryTitle'") self.logger.debug("query=\n%s", query) self.logger.debug("self.board = %s", self.board) query.log() rows = query.execute(self.cursor).fetchall() self._summaries = [Summary(self, row) for row in rows] return self._summaries
def url(self): """URL for the linked DrugInformationSummary document, if any.""" if not hasattr(self, "_url"): query = db.Query("query_term u", "u.value") query.join("query_term t", "t.doc_id = u.doc_id") query.where(f"u.path = '{self.URL_PATH}'") query.where(f"t.path = '{self.TERM_PATH}'") query.where(query.Condition("t.int_val", self.id)) rows = query.execute(self.loader.cdr_cursor).fetchall() self._url = rows[0].value if rows else "" return self._url