def rows(self): """Values for the report table.""" rows = [] for database in sorted(self.DATABASES): for account in self.DATABASES[database]: try: db.connect(user=account, database=database) status = self.__control.ok_image except: status = self.__control.HTMLPage.B.SPAN("LOGIN FAILED") status.set("class", "error") status = self.__control.Reporter.Cell(status, center=True) rows.append([database, account, status]) return rows
def __init__(self, options, logger): """ Save the logger object and extract and validate the settings: mode must be "test" or "live" (required); test mode restricts recipient list for report recip optional email address, used when testing so we don't spam anyone else skip-email optional Boolean, defaults to False; if True, don't email the report to anyone log-level "info", "debug", or "error"; defaults to "info" """ self.options = options self.mode = options["mode"] self.recip = options.get("recip") self.skip_email = options.get("skip-email") or False self.test = self.mode == "test" self.logger = logger if self.mode not in self.MODES: raise Exception("invalid mode %s" % repr(self.mode)) self.cursor = db.connect(user="******").cursor()
def checkJobStatus(jobId): # Defensive programming. tries = MAX_RETRIES while tries: try: conn = db.connect(timeout=300) cursor = conn.cursor() cursor.execute("""\ SELECT id, status, started, completed, messages FROM pub_proc WHERE id = %d""" % int(jobId)) row = cursor.fetchone() # We can stop trying now, we got it. tries = 0 except Exception as e: LOGGER.exception("*** Failure connecting to DB ***") LOGGER.info("*** Unable to check status for PubJob%s", jobId) waitSecs = (MAX_RETRIES + 1 - tries) * RETRY_MULTIPLIER LOGGER.info(" RETRY: %d retries left; waiting %f seconds", tries, waitSecs) time.sleep(waitSecs) tries -= 1 if not row: raise Exception("*** (3) Tried to connect %d times. No Pub Job-ID." % MAX_RETRIES) return row
def get_mssql_settings(self): cursor = db.connect().cursor() cursor.execute("EXEC sp_server_info") settings = {} for attr_id, attr_name, attr_value in cursor.fetchall(): settings[attr_name] = attr_value return settings
def check_unique_title(self, tier, parser): """ Look for the title in the target server Make sure the document is not already installed in the target server. This check is actually redundant, as the CDR server will enforce the assumption. Can't hurt to check twice, though. Pass: tier - location of target server (if not localhost) parser - used for reporting errors Raise: exception if filter is already install on specified tier """ cursor = db.connect(name="CdrGuest", tier=tier).cursor() query = db.Query("document d", "d.id") query.join("doc_type t", "t.id = d.doc_type") query.where(query.Condition("d.title", self.title)) query.where("t.name = 'Filter'") rows = query.execute(cursor).fetchall() cursor.close() if rows: ids = ", ".join([str(row[0]) for row in rows]) args = self.title, ids parser.error("{!r} already present ({}) in the CDR".format(*args))
def dictionary_conn(self): """Connection to the PDQ dictionary tables.""" if not hasattr(self, "_dictionary_conn"): opts = dict(database="pdq_dictionaries", tier=self.tier) self._dictionary_conn = db.connect(**opts) return self._dictionary_conn
def cdr_cursor(self): """Database cursor for the CDR tables.""" if not hasattr(self, "_cdr_cursor"): opts = dict(tier=self.tier, user="******") self._cdr_cursor = db.connect(**opts).cursor() return self._cdr_cursor
def __init__(self, credentials, **opts): """ Instatiates a new object for republishing CDR documents to Cancer.gov. Pass: credentials - public session identifier for a CDR login with sufficient permissions to create a publishing job, or a tuple with two members: the CDR account ID and password with which such a session can be created host - optional string identifying the CDR tier on which the re-publishing job is to be submitted; defaults to None (which falls back on the local tier) """ self.__credentials = credentials self.__tier = opts.get("host") self.__conn = db.connect() self.__cursor = self.__conn.cursor() self.__onCG = self.__getDocsOnCG() self.__logger = cdr.Logging.get_logger("publish")
def __init__(self, opts): """ Collect and validate runtime settings and set up logging. """ self.logger = cdr.Logging.get_logger("deploy", console=True) self.opts = opts self.session = self.login() self.cursor = cdrdb.connect(name="CdrGuest").cursor()
def conn(self): """ Connection to the CDR database """ if not hasattr(self, "_conn"): opts = dict(user="******") self._conn = cdrdb.connect(**opts) return self._conn
def cursor(self): """ Reference to read-only CDR database cursor """ if not hasattr(self, "_cursor"): conn = db.connect(user="******", tier=self.tier) self._cursor = conn.cursor() return self._cursor
def statusPubJobFailure(jobId): # Defensive programming. tries = MAX_RETRIES row = cursor = None while tries: try: conn = db.connect(timeout=300) cursor = conn.cursor() cursor.execute("""\ SELECT id, status, started, completed, messages FROM pub_proc WHERE id = %d""" % int(jobId)) row = cursor.fetchone() LOGGER.info("Job%d status: %s", row[0], row[1]) # We can stop trying now, we got it. tries = 0 except Exception: LOGGER.exception("*** Failure connecting to DB ***") LOGGER.warning("*** Unable to set job status to 'Failure'.") LOGGER.info("*** PubJob%d: %s", jobId) waitSecs = (MAX_RETRIES + 1 - tries) * RETRY_MULTIPLIER LOGGER.info(" RETRY: %d retries left; waiting %f seconds", tries, waitSecs) time.sleep(waitSecs) tries -= 1 if cursor is None: raise Exception("Unable to connect to the database") # Setting the job status to 'Failure' rather than leaving it as # 'In process'. That way a new job won't fail until the job # status has been manually updated. # ------------------------------------------------------------- try: cursor.execute("""\ UPDATE pub_proc SET status = 'Failure' WHERE id = %d AND status = 'In process'""" % int(jobId)) conn.commit() except Exception: LOGGER.exception("*** Failure updating job status ***") LOGGER.info("*** Manually set the job status to 'Failure'.") LOGGER.info("*** PubJob%s", jobId) if not row: raise Exception("*** (3) Tried to connect %d times. No Pub Job-ID." % MAX_RETRIES) return row
def getVersions(ids, sDate, eDate, type=''): rows = "" # Looking for publishable versions only if type='pub' # --------------------------------------------------- if type == 'pub': pubType = """ AND dv.publishable = 'Y' """ else: pubType = "" try: conn = db.connect() cursor = conn.cursor() # dv.id -> CDR-ID # dv.num -> version number # t.value -> title # fp.dt -> date of first publishable version # dv.dt -> date of last publishable version # dv.publishable -> # dv.comment -> version comment cursor.execute("""\ SELECT dv.id, dv.num, t.value, fp.dt, dv.dt, v.value, dv.comment FROM doc_version dv LEFT OUTER JOIN query_term v ON dv.id = v.doc_id AND v.path = '/Media/@BlockedFromVOL' JOIN query_term t ON t.doc_id = dv.id AND t.path = '/Media/MediaTitle' JOIN doc_version fp ON fp.id = dv.id AND fp.dt = (SELECT MIN(i.dt) FROM doc_version i WHERE i.id = dv.id AND i.publishable = 'Y' ) WHERE dv.id IN (%s) %s AND dv.dt between '%s' AND '%s' ORDER BY id, num """ % (', '.join(str(x) for x in ids), pubType, sDate, eDate)) rows = cursor.fetchall() except Exception: LOGGER.exception("Failure finding media data") raise return rows
def getPushJobId(jobId): # Defensive programming. tries = MAX_RETRIES time.sleep(15) while tries: try: conn = db.connect() cursor = conn.cursor() cursor.execute("""\ SELECT id, status, started, completed FROM pub_proc WHERE id > %d AND pub_system = 178 AND (pub_subset LIKE '%%_Interim-Export' OR pub_subset LIKE '%%_Export') """ % int(jobId)) row = cursor.fetchone() # If the SELECT returns nothing a push job was not submitted # because another job is still pending. # Otherwise the push job may already have completed. # ----------------------------------------------------------- if row == None: LOGGER.error("*** Error - No push job waiting. " "Check for pending job") cursor.execute("""\ SELECT id, messages FROM pub_proc WHERE id = %d""" % int(jobId)) row = cursor.fetchone() LOGGER.info("%s", row[1]) raise Exception("No push job waiting") # We can stop trying now, we got it. tries = 0 except Exception: LOGGER.exception("*** Failure connecting to DB ***") LOGGER.info("*** Unable to find status for PushJob%s", jobId) waitSecs = (MAX_RETRIES + 1 - tries) * RETRY_MULTIPLIER LOGGER.info(" RETRY: %d retries left; waiting %f seconds", tries, waitSecs) time.sleep(waitSecs) tries -= 1 if not row: raise Exception("*** (1) Tried to connect %d times. No Push Job-ID." % MAX_RETRIES) return row[0]
def __init__(self, options): """ Find out how far back to ask for trials, and create logging and database query objects. """ self.logger = cdr.Logging.get_logger(self.NAME) self.conn = db.connect(as_dict=True) self.cursor = self.conn.cursor() cutoff = options.get("cutoff") if cutoff: self.cutoff = self.parse_date(cutoff) else: self.cutoff = self.get_default_cutoff(self.cursor)
def delete_job(self): """ Drop the table row for a job (we already have confirmation from the user). """ query = f"DELETE FROM {Job.TABLE} WHERE english_id = ?" conn = db.connect() cursor = conn.cursor() cursor.execute(query, self.english_id) query = f"DELETE FROM {Job.ATTACHMENT} WHERE english_id = ?" cursor.execute(query, self.english_id) conn.commit() self.logger.info("removed translation job for CDR%d", self.english_id) navigateTo("translation-jobs.py", self.session.name)
def __init__(self, options, logger): """ Validate the settings: reports "english", "spanish", and/or "trials"; defaults to all three mode must be "test" or "live" (required); test mode restricts recipient list for report skip-email optional Boolean, defaults to False; if True, don't email the report to anyone log-level "info", "debug", or "error"; defaults to "info" start overrides the default start of the date range (a week ago) end overrides the default end of the date range (today) recip optional email address for testing so we don't spam others timeout how many seconds we'll wait for a connection or a query """ self.TODAY = datetime.date.today() self.DEFAULT_END = self.TODAY - datetime.timedelta(1) self.DEFAULT_START = self.TODAY - datetime.timedelta(7) self.logger = logger self.logger.info("====================================") self.reports = options.get("reports") or self.REPORTS self.mode = options["mode"] self.skip_email = options.get("skip-email", False) self.start = options.get("start") or str(self.DEFAULT_START) self.end = options.get("end") or str(self.DEFAULT_END) self.test = self.mode == "test" self.recip = options.get("recip") timeout = int(options.get("timeout", 300)) self.cursor = db.connect(user="******", timeout=timeout).cursor() if self.skip_email: self.logger.info("skipping email of reports")
def __init__(self, logger=None, recip=None): """ Collect the glossary term information. Pass: logger - the scheduled job's logger (unless testing from the command line) recip - optional email address for testing without spamming the users """ self.tier = Tier() self.logger = logger self.recip = recip if self.logger is None: self.logger = cdr.Logging.get_logger("glossifier", level="debug") self.conn = db.connect() self.cursor = self.conn.cursor()
def checkForBlockedImages(sDate, eDate): """ Assign all input parameters to variables and perform some error checking. """ # Select all Media (Image) documents with a new version # which is currently blocked and a publishable version exists. # ----------------------------------------------------------------- try: conn = db.connect(timeout=300) cursor = conn.cursor() cursor.execute("""\ SELECT d.id FROM document d JOIN doc_version dv ON dv.id = d.id JOIN query_term q ON d.id = q.doc_id JOIN doc_type dt ON d.doc_type = dt.id AND dt.name = 'Media' WHERE d.active_status = 'I' AND dv.dt BETWEEN '%s' AND '%s' AND q.path = '/Media/MediaContent/Categories/Category' AND q.value not in ('pronunciation', 'meeting recording') AND EXISTS (SELECT 'x' FROM doc_version i WHERE i.id = d.id AND i.publishable = 'Y') ORDER BY d.id """ % (sDate, eDate)) rows = cursor.fetchall() ids = [] for row in rows: ids.append(row[0]) except: LOGGER.exception("Failure finding blocked media documents") raise return ids
def show_report(self): """ Override the base class because we're storing data, not creating a report. Modified to also populate the history table. """ if self.have_required_values: self.process_attachments() if self.job.changed: conn = db.connect() cursor = conn.cursor() params = [getattr(self, name) for name in Job.FIELDS] params.append(getattr(self, Job.KEY)) self.logger.info("storing translation job state %s", params) placeholders = ", ".join(["?"] * len(params)) cols = ", ".join(Job.FIELDS + (Job.KEY,)) strings = Job.HISTORY, cols, placeholders cursor.execute(self.INSERT.format(*strings), params) if self.job.new: strings = (Job.TABLE, cols, placeholders) query = self.INSERT.format(*strings) else: cols = ", ".join([("%s = ?" % name) for name in Job.FIELDS]) strings = (Job.TABLE, cols, Job.KEY) query = self.UPDATE.format(*strings) try: cursor.execute(query, params) conn.commit() except Exception as e: if "duplicate key" in str(e).lower(): self.logger.error("duplicate translation job ID") self.bail("attempt to create duplicate job") else: self.logger.error("database failure: %s", e) self.bail(f"database failure: {e}") self.logger.info("translation job state stored successfully") job = Job(self) if self.alert_needed(job): self.alert(job) navigateTo("translation-jobs.py", self.session.name) else: self.show_form()
def run(self): """Launch any batch jobs which are in the queue.""" conn = db.connect(user="******") cursor = conn.cursor() query = db.Query("batch_job", "id", "command") query.where(query.Condition("status", cdrbatch.ST_QUEUED)) for job in query.execute(cursor).fetchall(): command = job.command if not os.path.isabs(command): command = f"{cdr.BASEDIR}/{command}" script = f"{command} {job.id}" if command.endswith(".py"): command = cdr.PYTHON else: command, script = script, "" args = conn, job.id, cdrbatch.ST_INITIATING, cdrbatch.PROC_DAEMON cdrbatch.sendSignal(*args) conn.commit() os.spawnv(os.P_NOWAIT, command, (command, script)) self.logger.info("processed %s", command)
def main(): cursor = db.connect(user="******").cursor() query = db.Query("document d", "d.id", "d.title", "d.xml").order(2) query.join("doc_type t", "t.id = d.doc_type") query.where("t.name = 'Filter'") filters = [] for doc_id, doc_title, doc_xml in query.execute(cursor).fetchall(): filters.append(Filter(doc_id, doc_title, doc_xml)) tbody = builder.TBODY() caption = builder.CAPTION(TITLE) for name in sorted(Parameter.parameters, key=str.lower): parm = Parameter.parameters[name] parm.add_rows(tbody) page = builder.HTML( builder.HEAD( builder.TITLE(TITLE), builder.LINK(rel="stylesheet", href="/stylesheets/cdr.css"), builder.STYLE("th { text-align: right; vertical-align: top; }")), builder.BODY(builder.TABLE(caption, tbody), builder.CLASS("report"))) print("Content-type: text/html\n") print(etree.tostring(page, pretty_print=True).decode("ascii"))
def __init__(self): """ Constructs job control object for restoring data on CDR DEV server. 1. Make sure we're running on the DEV tier. 2. Get the parameters for this job. 3. Create the control object for the job. """ # 1. Safety check. if Tier().name != "DEV": raise Exception("This script must only be run on the DEV tier.") # 2. Get what we need from the command line. parser = ArgumentParser() parser.add_argument("--directory", required=True, help="directory to restore from") parser.add_argument("--user", required=True, help="user ID") parser.add_argument("--session", required=True, help="user session") parser.add_argument( "--skip-content", action="store_true", help="exclude practice documents from being restored") opts = parser.parse_args() # 3. Create objects used to do the job's work. self._logger = cdr.Logging.get_logger("PushDevData", console=True) self._conn = db.connect(user="******") self._cursor = self._conn.cursor() self._dir = opts.directory self._skip_content = opts.skip_content or False self._old = cdr_dev_data.Data(self._dir) self._new = cdr_dev_data.Data(self._cursor, self._old) self._uid = opts.user self._session = opts.session self._logger.info("session %s", self._session) self._logger.info("using data preserved in %s", self._dir) self._new_doc_types = []
def run(self): """Launch any publishing jobs which are in the queue. Make sure we don't do any real work if not on a Windows server. """ conn = db.connect(user="******") cursor = conn.cursor() query = db.Query("pub_proc", "id", "pub_subset") query.where("status = 'Ready'") rows = query.execute(cursor).fetchall() if rows and os.name == "nt": cursor.execute("""\ UPDATE pub_proc SET status = 'Started' WHERE status = 'Ready'""") conn.commit() for job_id, pub_subset in rows: self.logger.info("starting job %d (%s)", job_id, pub_subset) args = ("CdrPublish", self.PUBSCRIPT, str(job_id)) if os.name == "nt": os.spawnv(os.P_NOWAIT, cdr.PYTHON, args)
def process_attachments(self): """Update the attachment table from the current form information.""" conn = db.connect() cursor = conn.cursor() drop = self.fields.getlist(self.DROP) if drop: sql = f"DELETE FROM {Job.ATTACHMENT} WHERE attachment_id = ?" for attachment_id in drop: if int(attachment_id) not in self.attachments: bail() cursor.execute(sql, (attachment_id,)) conn.commit() nfiles = int(self.fields.getvalue("nfiles", "0")) keys = set(self.fields.keys()) columns = "english_id, file_bytes, file_name, registered" values = "?, ?, ?, GETDATE()" insert = self.INSERT.format(Job.ATTACHMENT, columns, values) for i in range(nfiles): name = f"file-{i+1}" if name in keys: f = self.fields[name] if f.file: file_bytes = [] while True: more_bytes = f.file.read() if not more_bytes: break file_bytes.append(more_bytes) file_bytes = b"".join(file_bytes) else: file_bytes = f.value if file_bytes: self.logger.info("filename=%s", f.filename) values = self.english_id, file_bytes, f.filename cursor.execute(insert, values) conn.commit() else: self.logger.warning("%s empty", name)
def checkPubJob(): # Defensive programming. tries = MAX_RETRIES while tries: try: conn = db.connect() cursor = conn.cursor() cursor.execute("""\ SELECT id, pub_subset, status, started, completed FROM pub_proc WHERE status not in ('Failure', 'Success') AND pub_system = 178 AND pub_subset LIKE '%%Export' """) # AND pub_subset LIKE '%%_%s' """ % pubType) row = cursor.fetchone() if row: return row # We can stop trying now, we got it. tries = 0 except Exception: LOGGER.exception("*** Failure connecting to DB ***") LOGGER.info("*** Unable to find status for PubJob%s: %s", jobId) waitSecs = (MAX_RETRIES + 1 - tries) * RETRY_MULTIPLIER LOGGER.info(" RETRY: %d retries left; waiting %f seconds", tries, waitSecs) LOGGER.info("waitSecs: %d", waitSecs) time.sleep(waitSecs) tries -= 1 if not tries == 0: raise Exception("*** (2) Tried to connect %d times. No Pub Job-ID." % MAX_RETRIES) return 0
def main(): pull_tables = ("action", "active_status", "ctl", "doc_type", "filter_set", "filter_set_member", "format", "grp", "grp_action", "grp_usr", "link_prop_type", "link_properties", "link_target", "link_type", "link_xml", "query", "query_term_def", "query_term_rule", "usr") outputDir = time.strftime('DevData-%Y%m%d%H%M%S') cursor = db.connect(user="******").cursor() os.makedirs("%s/tables" % outputDir) print(f"Saving files to {outputDir}") # Saving scheduled Jobs # --------------------- saveJobs(outputDir) for table in pull_tables: saveTable(cursor, outputDir, table) for docType in ["Filter", "PublishingSystem", "Schema"] + sys.argv[1:]: saveDocs(cursor, outputDir, docType) # Saving individual test/training documents marked for preserve # ------------------------------------------------------------- saveTestDocs(cursor, outputDir)
sys.exit(1) # Session id for access to server filtering session = "guest" # If tracing requested, filter the filter to add tracing if traceDbg: # If filter is in database, have to fetch it if not inline: # Filter supplied by title if type(filter) == type(""): # Strip off "name:" that we know must be there filterTitle = filter[5:] # Fetch filter xml from the database, fail if exception conn = db.connect() cursor = conn.cursor() cursor.execute( """ SELECT xml FROM document d JOIN doc_type t ON d.doc_type = t.id WHERE d.title = ? AND t.name = 'Filter' """, (filterTitle, )) row = cursor.fetchone() if not row: sys.stderr.write("Unable to find filter '%s'" % filter) sys.exit(1)
"""Fetch the current FileSweeper configuration. """ from argparse import ArgumentParser from sys import stdout, stderr from lxml import etree from cdrapi import db # Find out what we're supposed to do parser = ArgumentParser() parser.add_argument("--tier", "-t") parser.add_argument("--raw", "-r", action="store_true") opts = parser.parse_args() # Fetch the XML document cursor = db.connect(user="******", tier=opts.tier).cursor() query = db.Query("document d", "d.id", "d.xml") query.join("doc_type t", "t.id = d.doc_type") query.where("t.name = 'SweepSpecifications'") rows = query.execute(cursor).fetchall() if len(rows) > 1: ids = ", ".join([f"CDR{row.id}" for row in rows]) raise Exception(f"Multiple spec docs: {ids}") if not rows: raise Exception("No sweep specification document found") xml = rows[0].xml stderr.write(f"fetched CDR{rows[0].id}\n") # Print it if opts.raw: stdout.buffer.write(xml.encode("utf-8")) else:
def normalize(me): if me is None: return "" return re.sub(r"\s+", " ", me).strip() class Contact: def __init__(self, node): self.type = node.get("Type") self.name = Partner.normalize(get_text(node.find("ContactName"))) self.id = self.email = None detail = node.find("ContactDetail") if detail is not None: self.id = detail.get("{cips.nci.nih.gov/cdr}id") self.email = get_text(detail.find("Email"), "").strip() cursor = db.connect(user="******").cursor() query = db.Query("query_term", "doc_id") query.where("path = '/Licensee/LicenseeInformation/LicenseeStatus'") query.where("value NOT LIKE '%inactive%'") doc_ids = [row.doc_id for row in query.execute(cursor).fetchall()] partners = [] select = "SELECT xml FROM document WHERE id = ?" for doc_id in doc_ids: cursor.execute(select, (doc_id,)) try: xml = cursor.fetchone().xml root = etree.fromstring(xml.encode("utf-8")) partner = Partner(doc_id, root) if partner.status and not partner.deactivated: partners.append(partner) except: