예제 #1
0
    def send_report(self, jobs):
        """
        Send weekly report of new translation jobs to lead translator

        Pass:
          Sequence of tuples of values
        """

        report = self.create_report(jobs)
        self.logger.debug("report\n%s", report)
        if self.recip:
            recips = [self.recip]
        else:
            group = "Spanish Translation Leads"
            if self.test:
                group = "Test Translation Queue Recips"
            recips = Job.get_group_email_addresses(group)
        if recips:
            subject = "[%s] %s" % (cdr.Tier().name, self.title)
            opts = dict(subject=subject, body=report, subtype="html")
            message = cdr.EmailMessage(self.SENDER, recips, **opts)
            message.send()
            self.logger.info("sent %s", subject)
            self.logger.info("recips: %s", ", ".join(recips))
        else:
            self.logger.error("no email recipients for %s", group)
예제 #2
0
def sendErrorMessage(msg):
    # We want to send an email so that the query doesn't silently fail
    # ----------------------------------------------------------------
    args = cdr.Tier().name, "*** Error: Program CheckHotfixRemove failed!"
    subject = "[%s] %s" % args

    recips = cdr.getEmailList("Developers Notification")
    mailHeader = """\
From: %s
To: %s
Subject: %s
""" % (STR_FROM, ", ".join(recips), subject)

    mailHeader += "Content-type: text/html; charset=utf-8\n"
    mailBody = "<b>Error running HotfixRemove.py</b><br>"
    mailBody += "Most likely %s<br>" % msg
    mailBody += "See log file for details."

    # Add a Separator line + body
    # ---------------------------
    message = mailHeader + "\n" + mailBody

    server = smtplib.SMTP(SMTP_RELAY)
    server.sendmail(STR_FROM, recips, message.encode('utf-8'))
    server.quit()
예제 #3
0
 def send_report(self, control):
     report = self.create_report(control)
     control.logger.debug("report\n%s", report)
     if self.recip:
         recips = [self.recip]
     elif control.test:
         group = "Test Translation Queue Recips"
         recips = Job.get_group_email_addresses(group)
     else:
         recips = [self.email]
     if recips:
         subject = "[%s] %s" % (cdr.Tier().name, control.title)
         opts = dict(subject=subject, body=report, subtype="html")
         message = cdr.EmailMessage(self.SENDER, recips, **opts)
         message.send()
         control.logger.info("sent %s", subject)
         control.logger.info("recips: %s", ", ".join(recips))
     else:
         control.logger.error("no email recipients for %s", group)
예제 #4
0
    def send_report(self, report):
        """
        Email the report to the right recipient list.

        report    Serialized HTML document for the report.
        """

        if self.recip:
            recips = [self.recip]
        else:
            if self.test:
                group = "Test Publishing Notification"
            else:
                group = "Licensee Report Notification"
            recips = Job.get_group_email_addresses(group)
        title = "PDQ Distribution Partner List"
        subject = "[%s] %s" % (cdr.Tier().name, title)
        opts = dict(subject=subject, body=report, subtype="html")
        message = cdr.EmailMessage(self.SENDER, recips, **opts)
        message.send()
        self.logger.info("sent %s", subject)
        self.logger.info("recips: %s", ", ".join(recips))
예제 #5
0
class MailerJob:
    """
    Base class for mailer job processing.  Cannot be used directly.
    Public methods include:

        run()
            Top-level method to invoke job processing.

        log(message)
            Appends progress or error information to logfile.

        fillQueue()
            Overridden by derived classes to define processing
            appropriate to each mailer type.  See documentation
            for this method below.

        addToQueue(job)
            Called by the derived classes' implementations of
            fillQueue() to add a PrintJob object to the print
            queue for the mailer job.

        addMailerTrackingDoc(doc, recipient, mailerType)
            Invoked by the derived classes' imlementations of
            fillQueue() to insert a Mailer document into the CDR.

        formatAddress(addr)
            Formats address information into a block of printable
            text lines and returns the block.

        getCipsContactAddress(id)
            Returns an Address object for the ContactDetail information
            in a Person document identified as the CIPS contact address.

        getOrganizationAddress (id):
            Returns an Address object for the ContactDetail information
            in an Organization document.  Handles cases of directly
            referenced CIPSContactPerson, and generic Administrators.

        makeIndex()
            Builds an index list of recipients from the dictionary
            of Recipient objects and sorts it by country and
            postalCode.

        getId()
            Returns the ID for the publishing job.

        getSubset()
            Returns the string identifying the specific type
            of publishing job.

        getCursor()
            Returns object for executing database queries.

        getSession()
            Returns key for current CDR session.

        getDocIds()
            Returns the tuple of document IDs found in the
            pub_proc_doc table.

        getRecipients()
            Returns the dictionary containing the Recipient
            objects associated with this job.  Populated by
            the derived classes during the process of filling
            the print queue.  For jobs which use a single
            address for all packages sent to a given person,
            the Person document ID is used as the dictionary
            key.  For jobs in which different addresses can
            be used for the same person, the keys used for
            the dictionary are the fragment links which
            identify a person and a specific address, so
            the same person can appear more than once if
            multiple addresses are used.

        getDocuments()
            Returns the dictionary containing the Document objects
            for the documents which will be mailed out for this job.
            Populated by the derived classes during the process
            of filling the print queue.

        getParm(name)
            Returns a possibly empty tuple of values stored in
            the pub_proc_parm table for this job.  Filled
            by the base class.

        getDeadline()
            Returns a string in the form YYYY-MM-DD for the deadline
            by which the mailer must be responded to.  Can be
            overridden by the derived classes as appropriate.

        getJobTime()
            Returns a string in the form YYYY-MM-DDTHH:MM:SS
            representing the date/time the job processing began.

        commit()
            Commits the current open database transaction.
    """

    #------------------------------------------------------------------
    # Class-level values.
    #------------------------------------------------------------------
    __TIER = cdr.Tier()
    __CDR_EMAIL = "PDQ Operator <*****@*****.**"
    __SMTP_RELAY = "MAILFWD.NIH.GOV"
    __LOGFILE = _LOGFILE
    __DEF_PRINTER = "\\\\CIPSFS1\\HP8100"
    __INCLUDE_PATH = f"{cdr.WORK_DRIVE}:/cdr/Mailers/include"
    __ERR_PATTERN = re.compile("<Err>(.*)</Err>")

    #------------------------------------------------------------------
    # Constructor for base class.
    #------------------------------------------------------------------
    def __init__(self, jobId, batchPrinting=1):
        """
        Parameters:
            jobId               - Integer for publication job number.
        """
        self.__id = jobId
        self.__nMailers = 0
        self.__docIds = []
        self.__recipients = {}
        self.__index = []
        self.__documents = {}
        self.__parms = {}
        self.__printer = MailerJob.__DEF_PRINTER
        self.__batchPrinting = batchPrinting
        self.__letterLink = ""
        self.__cursor = None
        self.__email = None

    #------------------------------------------------------------------
    # Public access methods.
    #------------------------------------------------------------------
    def getId(self):
        return self.__id

    def getCursor(self):
        return self.__cursor

    def getSubset(self):
        return self.__subset

    def getSession(self):
        return self.__session

    def getDeadline(self):
        return self.__deadline

    def getDocIds(self):
        return self.__docIds

    def getRecipients(self):
        return self.__recipients

    def getIndex(self):
        return self.__index

    def getDocuments(self):
        return self.__documents

    def getJobTime(self):
        return self.__now

    def getCount(self):
        return self.__nMailers

    def getMailerIncludePath(self):
        return self.__INCLUDE_PATH

    def bumpCount(self):
        self.__nMailers += 1

    def printDirect(self):
        self.__batchPrinting = 0

    def addToQueue(self, job):
        self.__queue.append(job)

    def commit(self):
        self.__conn.commit()

    def getParm(self, name):
        v = self.__parms.get(name)
        return v and tuple(v) or ()

    #------------------------------------------------------------------
    # Driver for mailer job processing.
    #------------------------------------------------------------------
    def run(self):
        """
        Invokes the processing for a CDR mailer job.  Catches and
        logs all exceptions.  Returns 0 for success and 1 for failure.
        """
        try:
            self.log("******** starting mailer job ********")
            self.__loadSettings()
            self.log("~~Finished __loadSettings")
            self.__mailerCleanup()
            self.log("~~Finished __mailerCleanup")
            self.__createQueue()
            self.log("~~Finished __createQueue")
            self.fillQueue()
            self.__printQueue(self.__batchPrinting)
            self.createRtfMailers()
            self.__cleanup("Success", "Processed %d mailers" % self.__nMailers)
            self.log("******** finished mailer job ********")
            return 0
        except:
            (eType, eValue) = sys.exc_info()[:2]
            errMessage = eValue or eType
            self.log("ERROR: %s" % errMessage, tback=1)
            self.__packageFailureFiles()
            self.__cleanup("Failure", errMessage)
            return 1

    #------------------------------------------------------------------
    # Append message to logfile.
    #------------------------------------------------------------------
    def log(self, message, tback=None):
        """
        Appends progress or error information to a log file.  Each
        entry is stamped with the current date and time and the
        number of the current publication job.  No return value.
        No exceptions raised.
        """
        try:
            msg = "Job %d: %s" % (self.__id, message)
            if tback:
                _LOGGER.exception(msg)
            else:
                _LOGGER.info(msg)
        except:
            pass

    #------------------------------------------------------------------
    # Placeholder for method to populate the print queue for the job.
    #------------------------------------------------------------------
    def fillQueue(self):
        """
        The primary responsibility of the classes derived from MailerJob
        is to provide a definition of this method.  This method must
        populate the object's print queue by invoking addToQueue() with
        instances of the PrintJob class, defined below.  Each PrintJob
        object must represent a file which is ready to be written
        directly to the printer (for example, PostScript, or plain text,
        but not RTF files or Microsoft Word documents).  Furthermore,
        for each copy of each document added to the print queue, the
        implementation of fillQueue() must invoke the addMailerTrackingDoc()
        method to add a new document to the repository for tracking the
        responses to the mailer.  The mailerType argument passed to that
        method must be a string which matches one of the valid values for
        MailerType enumerated in the schema for Mailer documents.

        The files created for the queue should be written to the
        current working directory (as should any intermediate working
        files), and the filenames provided to the constructor for the
        PrintJob objects should not include any path information.
        """
        raise Exception("fillQueue() must be defined by derived class")

    #------------------------------------------------------------------
    # Placeholder for method to create rtf mailers (if any).
    #------------------------------------------------------------------
    def createRtfMailers(self):
        """
        Do nothing if the derived class does not override this method.
        """
        pass

    #------------------------------------------------------------------
    # Generate a document for tracking a mailer.
    #------------------------------------------------------------------
    def addMailerTrackingDoc(self,
                             doc,
                             recipient,
                             mailerType,
                             remailerFor=None,
                             protOrgId=None,
                             email=None):
        """
        Parameters:
            doc         - Object of type Document, defined below
            recipient   - Object of type Recipient, defined below
            mailerType  - String containing a values matching the
                          list of valid values for MailerType
                          enumerated in the schema for Mailer docs.
            remailerFor - optional integer for the document ID of
                          an earlier mailer that was sent out and
                          never responded to, and for which this
                          is a followup remailer.
            protOrgId   - string or integer form of CDR ID for a
                          protocol's lead organization (the one to
                          which this mailer is being sent); used to
                          distinguish between Status and Participant
                          mailers for the same protocol in the
                          same job.
            email       - address used for electronic mailer.
        Return value:
            Integer ID for the newly inserted Mailer document.
        """

        if remailerFor:
            remailerFor = "\n   <RemailerFor cdr:ref='%s'/>" % \
                          cdr.normalize(remailerFor)
        else:
            remailerFor = ""
        if protOrgId:
            protOrg     = "\n   <ProtocolOrg cdr:ref='%s'/>" % \
                          cdr.normalize(protOrgId)
        else:
            protOrg = ""
        if email:
            mode = "Web-based"
            address = """\
   <MailerAddress>
    <Email>%s</Email>
   </MailerAddress>""" % email
        else:
            mode = "Mail"
            address = recipient.getAddress().getXml()
        recipId = "CDR%010d" % recipient.getId()
        docId = "CDR%010d" % doc.getId()
        xml = """\
<CdrDoc Type="Mailer">
 <CdrDocCtl>
  <DocTitle>Mailer for document %s sent to %s</DocTitle>
 </CdrDocCtl>
 <CdrDocXml><![CDATA[
  <Mailer xmlns:cdr="cips.nci.nih.gov/cdr">
   <Type Mode='%s'>%s</Type>%s
   <JobId>%d</JobId>
   <Recipient cdr:ref="%s">%s</Recipient>%s
%s
   <Document cdr:ref="%s">%s</Document>
   <Sent>%s</Sent>
   <Deadline>%s</Deadline>
  </Mailer>]]>
 </CdrDocXml>
</CdrDoc>
""" % (docId, recipId, mode, mailerType, remailerFor, self.__id, recipId,
        recipient.getName(), protOrg, address, docId, doc.getTitle(),
        self.__now, self.getDeadline())
        rsp = cdr.addDoc(self.__session,
                         doc=xml.encode('utf-8'),
                         checkIn="Y",
                         ver="Y",
                         val='Y')
        match = self.__ERR_PATTERN.search(rsp)
        if match:
            err = match.group(1)
            raise Exception("failure adding tracking document for %s: %s" %
                            (docId, err))
        self.__nMailers += 1
        digits = re.sub("[^\d]", "", rsp)
        return int(digits)

    #------------------------------------------------------------------
    # Convert Unicode string to Latin-1 character set.
    #------------------------------------------------------------------
    def encodeLatin1(self, unicodeString):
        return unicodeString.encode('latin-1')

    #------------------------------------------------------------------
    # Retrieve the CIPS contact Address for a mailer recipient.
    #------------------------------------------------------------------
    def getCipsContactAddress(self, id, withPersonTitle=TITLE_OMITTED):
        """
        Constructs and returns a new Address object for the document.

        Parameters:
            id              - Integer ID for CDR Person document.
            docType         - 'Person' (default) or 'Organization'
            withPersonTitle - For Address constructor.
        Return value:
            Returns an Address object for the CIPS contact.
        """
        # Make string version of id
        docId = cdr.normalize(id)

        # Find fragment ID for CIPS contact location in Person doc
        rows = cdr.getQueryTermValueForId(
            '/Person/PersonLocations/CIPSContact', id, self.__conn)
        if not rows:
            raise Exception("no CIPSContact for %s" % docId)
        fragId = rows[0]

        # Filter to create AddressElement XML
        filters = ["name:Person Address Fragment With Name"]
        result = cdr.filterDoc(self.__session,
                               filters,
                               docId,
                               parm=(('fragId', fragId), ))

        # Expecting tuple of xml fragment, messages.  Single string is error.
        if type(result) == type(""):
            raise Exception("failure extracting contact address "
                            "for %s: %s" % (docId, result))
        return Address(result[0], withPersonTitle)

    #------------------------------------------------------------------
    # Retrieve the contact address for a board member.
    #------------------------------------------------------------------
    def getBoardMemberAddress(self, personId, memberId):

        # Find fragment ID for CIPS contact location in BoardMemberInfo doc
        path = '/PDQBoardMemberInfo/BoardMemberContact/PersonContactID'
        rows = cdr.getQueryTermValueForId(path, memberId, self.__conn)

        # Filter to create AddressElement XML
        if rows:
            docId = cdr.normalize(personId)
            parms = (('fragId', rows[0]), )
            filters = ["name:Person Address Fragment With Name"]
        else:
            docId = cdr.normalize(memberId)
            parms = ()
            filters = ["name:Board Member Address Fragment With Name"]
        result = cdr.filterDoc(self.__session, filters, docId, parm=parms)
        if isinstance(result, (str, bytes)):
            raise Exception("failure extracting contact address "
                            "for %s: %s" % (docId, result))
        return Address(result[0])

    #------------------------------------------------------------------
    # Retrieve the CIPS contact Address object for an Organization
    #------------------------------------------------------------------
    def getOrganizationAddress(self, id):
        """
        Parameters:
            id - Integer ID of the organization document.
        Return value:
            Address object for organization.
        """
        # Default 'name' of the recipient
        nameStr = 'Administrator'

        # See if we have a CIPS contact person whose real name we can use
        rows = cdr.getQueryTermValueForId(
            '/Organization/OrganizationDetails/CIPSContactPerson/@cdr:ref', id,
            self.__conn)
        if rows:
            # Construct and Address object for person, to get real name
            # Fatal error if we can't find one
            personAddr = self.getCipsContactAddress(rows[0])
            nameStr = personAddr.getAddressee()

        # Find the fragment id in the Organization doc for
        #   the address we need to send to
        # Filter the organization to construct an address
        rows = cdr.getQueryTermValueForId(
            '/Organization/OrganizationLocations/CIPSContact', id, self.__conn)
        if not rows:
            raise Exception("No CIPSContact element found for "
                            "Organization %d" % id)

        filters = ["name:Organization Address Fragment"]
        parms = (("fragId", rows[0]), )
        result = cdr.filterDoc(self.__session, filters, id, parm=parms)

        # Expecting tuple of xml fragment, messages.  Single string is error.
        if type(result) == type(""):
            raise Exception("failure extracting contact address "
                            "for %d: %s" % (id, result))

        # Construct an address from returned XML
        orgAddr = Address(result[0])

        # Add or replace the name string with the one we constructed above
        orgAddr.setAddressee(nameStr)

        return orgAddr

    #------------------------------------------------------------------
    # Generate an index of the mailers in order of country + postal code.
    #------------------------------------------------------------------
    def makeIndex(self):
        self.__index = []
        recipients = self.getRecipients()
        for recipKey in recipients:
            recip = recipients[recipKey]
            address = recip.getAddress()
            country = address.getCountry()
            postalCode = address.getPostalCode()
            for doc in recip.getDocs():
                self.__index.append((country, postalCode, recip, doc))
        self.__index.sort()

    #------------------------------------------------------------------
    # Create a formatted address block from an Address object.
    #------------------------------------------------------------------
    def formatAddress(self, addr):
        return addr.format().getBlock()

    #------------------------------------------------------------------
    # Create the directory for RTF mailers.  Callback used by the
    # derived class where appropriate.
    #------------------------------------------------------------------
    def initRtfMailers(self):

        # Does the output directory already exist
        try:
            os.chdir(self.__rtfMailerDir)
        except:
            # Doesn't exist, try to create it
            try:
                os.makedirs(self.__rtfMailerDir)
            except:
                self.log("Unable to create rtf mailer directory", tback=1)
                raise Exception("failure creating rtf mailer directory %s" %
                                self.__rtfMailerDir)
            try:
                os.chdir(self.__rtfMailerDir)
            except:
                self.log("Unable to change to rtf mailer directory", tback=1)
                raise Exception("failure setting working directory to %s" %
                                self.__rtfMailerDir)

        # Specify the hostname based on the environment we're in
        # ------------------------------------------------------
        args = cdr.APPC, "GetBoardMemberLetters.py", self.__id
        url = "https://{}/cgi-bin/cdr/{}?job={:d}".format(*args)

        self.__letterLink = """
You can retrieve the letters at:

    %s
""" % url

    #------------------------------------------------------------------
    # Clear out orphaned mailer tracking documents (from failed jobs).
    #------------------------------------------------------------------
    def __mailerCleanup(self):
        if os.getenv('SKIP_MAILER_CLEANUP'):
            return  # for faster testing
        try:
            results = cdr.mailerCleanup(self.__session)
            if results[0]:
                self.log("%d tracking document(s) marked as deleted" %
                         len(results[0]))
            for err in results[1]:
                self.log("__mailerCleanup: %s" % err)
        except:
            self.log("mailerCleanup failure", 1)

    #------------------------------------------------------------------
    # Prepare initial settings for job.
    #------------------------------------------------------------------
    def __loadSettings(self):
        self.__getDates()
        self.__getDbConnection()
        self.__getCdrSession()
        self.__loadDbInfo()

    #------------------------------------------------------------------
    # Calculate needed dates (now and two months from now).
    #------------------------------------------------------------------
    def __getDates(self):
        now = time.localtime(time.time())
        deadline = (now[0], now[1], now[2] + 60, 0, 0, 0, 0, 0, -1)
        deadline = time.localtime(time.mktime(deadline))
        self.__now = time.strftime("%Y-%m-%dT%H:%M:%S", now)
        self.__deadline = time.strftime("%Y-%m-%d", deadline)

    #------------------------------------------------------------------
    # Log into the CDR server.
    #------------------------------------------------------------------
    def __getCdrSession(self):
        rsp = str(cdr.login("cdrmailers", cdr.getpw("cdrmailers")))
        match = self.__ERR_PATTERN.search(rsp)
        if match:
            raise Exception("CDR login failure: %s" % match.group(1))
        self.__session = rsp

    #------------------------------------------------------------------
    # Log into the CDR database.
    #------------------------------------------------------------------
    def __getDbConnection(self):
        try:
            self.__conn = db.connect(user="******")
            self.__cursor = self.__conn.cursor()
        except Exception as e:
            raise Exception(f"database connection failure: {e}%s")

    #------------------------------------------------------------------
    # Load the settings for this job from the database.
    #------------------------------------------------------------------
    def __loadDbInfo(self):
        self.__getPubProcRow()
        self.__getPubProcDocRows()
        self.__getPubProcParmRows()

    #------------------------------------------------------------------
    # Load the row which matches this job from the pub_proc table.
    #------------------------------------------------------------------
    def __getPubProcRow(self):
        try:
            self.__cursor.execute(
                """\
                SELECT output_dir, email, pub_subset
                  FROM pub_proc
                 WHERE id = ?""", (self.__id, ))
            row = self.__cursor.fetchone()
            if not row:
                raise Exception("unable to find job %d" % self.__id)
            (self.__outputDir, self.__email, self.__subset) = row
            self.__rtfMailerDir = self.__outputDir + "-r"
        except Exception as e:
            raise Exception("database error retrieving pub_proc row: {e}")

    #------------------------------------------------------------------
    # Load the list of document IDs and other descriptive information
    # for each document to be mailed by this job.
    #------------------------------------------------------------------
    def __getPubProcDocRows(self):

        try:
            # Find id, version, title, document type name
            #   for each document previously selected for mailing
            self.__cursor.execute(
                """\
                SELECT pub.doc_id, pub.doc_version,
                       doc.title, type.name
                  FROM pub_proc_doc pub
                  JOIN document doc
                    ON pub.doc_id = doc.id
                  JOIN doc_type type
                    ON doc.doc_type = type.id
                 WHERE pub_proc = ?""", (self.__id, ))
            docDescriptorList = self.__cursor.fetchall()

            # Can't continue if there aren't any
            if not docDescriptorList:
                raise Exception("no documents found for job %d" % self.__id)

            # Build a list of pure docIds (used by some software)
            #   and of fuller information
            for row in docDescriptorList:

                # Append the id to plain list of ids
                self.__docIds.append(row[0])

                # Create a document object and add it to list of objects
                self.__documents[row[0]] = \
                    Document (row[0], row[2], row[3], row[1])

            if not docDescriptorList:
                raise Exception("no documents found for job %d" % self.__id)

            # Convert the id list to a faster tuple
            # [Not sure why Bob did this]
            self.__docIds = tuple(self.__docIds)

        except Exception as e:
            raise Exception("database error retrieving pub_proc_doc rows: {e}")

    #------------------------------------------------------------------
    # Load the parameters stored in the pub_proc_parm table for this job.
    #------------------------------------------------------------------
    def __getPubProcParmRows(self):
        try:
            self.__cursor.execute(
                """\
                SELECT parm_name, parm_value
                  FROM pub_proc_parm
                 WHERE pub_proc = ?
              ORDER BY id""", (self.__id))
            rows = self.__cursor.fetchall()
            if rows:
                for row in rows:
                    if row[0] not in self.__parms:
                        self.__parms[row[0]] = []
                    self.__parms[row[0]].append(row[1])
                    if row[0] == "Printer":
                        self.__printer = row[1]
        except Exception as e:
            raise Exception("database error retrieving job parms: {e}")

    #------------------------------------------------------------------
    # Create and populate the print queue.
    # Also creates and changes to the output directory.
    #------------------------------------------------------------------
    def __createQueue(self):
        self.__queue = []

        # Does the output directory already exist
        try:
            os.chdir(self.__outputDir)
        except:
            # Doesn't exist, try to create it
            try:
                os.makedirs(self.__outputDir)
            except:
                self.log("Unable to create working directory", tback=1)
                raise Exception("failure creating working directory %s" %
                                self.__outputDir)
            try:
                os.chdir(self.__outputDir)
            except:
                self.log("Unable to change to working directory", tback=1)
                raise Exception("failure setting working directory to %s" %
                                self.__outputDir)

    #------------------------------------------------------------------
    # Print the jobs in the queue.
    #------------------------------------------------------------------
    def __printQueue(self, batchPrint=1):

        # If no mailers at this point, we're just doing electronic mailers.
        if not self.__nMailers:
            for file in os.listdir("."):
                os.unlink("./%s" % file)
            os.chdir("..")
            os.rmdir(self.__outputDir)

            # Nothing to print.
            return

        if batchPrint:
            outputFile = open("PrintJob.cmd", "w")
            outputFile.write("@echo off\n")
            outputFile.write("if %1. == . goto usage\n")
            outputFile.write("if %1. == howmany. goto showcount\n")
            outputFile.write("if %2. == . goto L1\n")
            for i in range(len(self.__queue)):
                outputFile.write("if %%2. == %d. goto L%d\n" % (i + 1, i + 1))
            outputFile.write("goto usage\n")
        else:
            outputFile = self.__printer
        i = 1
        for job in self.__queue:
            job.Print(outputFile, self.log, batchPrint, i)
            i += 1
        if batchPrint:
            outputFile.write("goto done\n")
            outputFile.write(":usage\n")
            outputFile.write("echo usage: PrintJob path-to-printer "
                             "[first [last]]\n")
            outputFile.write("echo    or: PrintJob howmany\n")
            outputFile.write("echo     (to show how many files the script "
                             "has without printing anything)\n")
            outputFile.write("echo  e.g.: PrintJob \\\\CIPSFS1\\HP8100\n")
            outputFile.write("echo    or: PrintJob \\\\CIPSFS1\\HP8100 "
                             "201 400\n")
            outputFile.write("echo     (to print the second 200 files)\n")
            outputFile.write(":showcount\n")
            outputFile.write("echo this script contains %d files\n" %
                             len(self.__queue))
            outputFile.write(":done\n")
            outputFile.close()
            self.__packageFiles()

    #------------------------------------------------------------------
    # Create archive packages for the job's files.
    # Assumption: the current working directory is the job's output
    # output directory.  We switch to the parent of that directory.
    # This side effect should have no undesirable consequences,
    # because this is the last thing we do for the job.
    # Note: all files with the extensions '.xml', '.tex', '.log',
    # '.aux', and '.dvi' are packaged in a separate compressed tar
    # archive for intermediate files.  Everything else goes into a
    # second tar archive, used to print the actual mailer documents.
    # Make sure nothing needed by this second archive gets a filename
    # extension used for the intermediate file archive.
    #------------------------------------------------------------------
    def __packageFiles(self):
        self.log("~~In packageFiles")
        workExt = ('xml', 'tex', 'log', 'aux', 'dvi', 'toc')
        dir = "Job%d" % self.getId()
        workName = "SupportFilesForJob%d.tar.bz2" % self.getId()
        printName = "PrintFilesForJob%d.tar.bz2" % self.getId()
        os.chdir("..")
        if not os.path.isdir(dir):
            raise Exception("INTERNAL ERROR: cannot find "
                            "directory %s" % dir)
        try:
            workFile = tarfile.open(workName, 'w:bz2')
            for ext in workExt:
                for file in glob.glob('%s/*.%s' % (dir, ext)):
                    workFile.add(file)
            workFile.close()
            for ext in workExt:
                for file in glob.glob('%s/*.%s' % (dir, ext)):
                    os.unlink(file)
        except:
            raise Exception("failure packing working files for job")

        try:
            printFile = tarfile.open(printName, 'w:bz2')
            for file in os.listdir(dir):
                printFile.add("%s/%s" % (dir, file))
            printFile.close()
            for file in os.listdir(dir):
                os.unlink("%s/%s" % (dir, file))
        except:
            raise Exception("failure creating print job package")
        os.rmdir(dir)

    #------------------------------------------------------------------
    # Create single archive package for a failed job's files.
    #------------------------------------------------------------------
    def __packageFailureFiles(self):
        self.log("~~In packageFailureFiles")
        dir = "Job%d" % self.getId()
        name = "FailedJob%d.tar.bz2" % self.getId()
        try:
            os.chdir(self.__outputDir)
            os.chdir("..")
        except:
            return
        if not os.path.isdir(dir):
            self.log("Cannot find directory %s" % dir)
            return
        try:
            file = tarfile.open(name, 'w:bz2')
            for fName in glob.glob('%s/*' % dir):
                file.add(fName)
            file.close()
            for file in glob.glob('%s/*' % dir):
                os.unlink(file)
        except Exception as e:
            self.log("failure packing files for failed job: %s" % str(e))
            return
        os.rmdir(dir)

    #------------------------------------------------------------------
    # Clean up.
    #------------------------------------------------------------------
    def __cleanup(self, status, message):
        self.log("~~In cleanup")
        if self.__rtfMailerDir:
            os.chdir(self.__rtfMailerDir)
            command = f"{cdr.BASEDIR}/bin/fix-permissions.cmd"
            command = command.replace("/", "\\")
            process = cdr.run_command(command, merge_output=True)
            if process.returncode:
                args = self.__rtfMailerDir, process.stdout
                _LOGGER.error("fixing %s permissions: %s", *args)
            else:
                self.log(f"fixed permissions for {self.__rtfMailerDir}")
        try:
            self.__updateStatus(status, message)
            self.__sendMail()
            if self.__session: cdr.logout(self.__session)
        except:
            self.log("__cleanup failed, status was '%s'" % status, tback=1)

    #------------------------------------------------------------------
    # Update the pub_proc table's status.
    #------------------------------------------------------------------
    def __updateStatus(self, status, message=None):
        self.log("~~In update status, status=%s" % status)
        message = message and str(message)
        try:
            if message:
                self.log("  (message: %s)" % message)
            self.__cursor.execute(
                """\
                UPDATE pub_proc
                   SET status = ?,
                       messages = ?,
                       completed = GETDATE()
                 WHERE id = ?""", (status, message, self.__id))
            self.__conn.commit()
        except:
            self.log("__updateStatus failed, status was '%s'" % status,
                     tback=1)

    #------------------------------------------------------------------
    # Inform the user that the job has completed.
    #------------------------------------------------------------------
    def __sendMail(self):
        try:
            if self.__email:
                self.log("Sending mail to %s" % self.__email)
                sender = MailerJob.__CDR_EMAIL
                subject = "[%s] CDR Mailer Job Status" % self.__TIER.name

                # Specify the hostname based on the environment we're in
                # ------------------------------------------------------
                args = cdr.APPC, "PubStatus.py", self.__id
                url = "https://{}/cgi-bin/cdr/{}?id={:d}".format(*args)

                message = """\
Job %d has completed.  You can view a status report for this job at:

    %s
%s
Please do not reply to this message.
""" % (self.__id, url, self.__letterLink)
                opts = dict(subject=subject, body=message)
                cdr.EmailMessage(sender, [self.__email], **opts).send()
        except:
            self.log("failure sending email to %s: %s" %
                     (self.__email, cdr.exceptionInfo()))
예제 #6
0
 </body>
</html>
"""

    # In Testmode we don't want to send the notification to the world
    # ---------------------------------------------------------------
    # Email constants
    # ---------------
    SMTP_RELAY = "MAILFWD.NIH.GOV"
    strFrom = "PDQ Operator <*****@*****.**>"
    if testMode:
        strTo = cdr.getEmailList('Test Publishing Notification')
    else:
        strTo = cdr.getEmailList('Licensee Report Notification')

    args = cdr.Tier().name, 'PDQ Distribution Partner List'
    subject = "[%s] %s" % args

    mailHeader = """\
From: %s
To: %s
Subject: %s
""" % (strFrom, ", ".join(strTo), subject)

    mailHeader += "Content-type: text/html; charset=iso-8859-1\n"

    # Add a Separator line + body
    # ---------------------------
    message = mailHeader + "\n" + mailBody

    #print message
예제 #7
0
class Control:
    """
    This is the class that does the real work. It is separated out so that
    we can provide a way to run this task from the command line.

    Class constants:

    TITLES          Map of report key to distinguishing part of report title.
    DEFAULT_START   Fall back on this for beginning of date range for report.
    DEFAULT_END     Fall back on this for end of date range.
    REPORTS         Full set of reports to be run by default (in order).
    SENDER          First argument to cdr.EmailMessage constructor.
    CHARSET         Used in HTML page.
    TSTYLE          CSS formatting rules for table elements.
    TO_STRING_OPTS  Options used for serializing HTML report object.
    CG              DNS name for this tier's Cancer.gov host.
    B               HTML builder module imported at Control class scope.
    HTML            HTML module imported at Control class scope.

    Instance properties:

    reports         Reports to be run in sequence specified.
    mode            Required report mode ("test" or "live").
    skip_email      If true, don't send report to recipients; just save it.
    start           Beginning of date range for selecting documents for report.
    end             End of date range for selecting documents for report.
    test            Convenience Boolean reflecting whether mode is 'test'.
    logger          Object for recording log information about the report.
    cursor          Object for submitting queries to the database.
    """

    import lxml.html.builder as B
    import lxml.html as HTML
    TITLES = {
        "trials": "Trials",
        "english": "New/Changed English Summaries",
        "spanish": "New/Changed Spanish Summaries",
    }
    REPORTS = ["english", "spanish", "trials"]
    SENDER = "PDQ Operator <*****@*****.**>"
    CHARSET = "utf-8"
    TSTYLE = ("width: 80%", "border: 1px solid #999",
              "border-collapse: collapse", "margin-top: 30px")
    TSTYLE = "; ".join(TSTYLE)
    TO_STRING_OPTS = {
        "pretty_print": True,
        "encoding": CHARSET,
        "doctype": "<!DOCTYPE html>"
    }
    TIER = cdr.Tier()
    CG = TIER.hosts["CG"]

    def __init__(self, options, logger):
        """
        Validate the settings:

        reports
            "english", "spanish", and/or "trials"; defaults to all three

        mode
            must be "test" or "live" (required); test mode restricts
            recipient list for report

        skip-email
            optional Boolean, defaults to False; if True, don't email
            the report to anyone

        log-level
            "info", "debug", or "error"; defaults to "info"

        start
            overrides the default start of the date range (a week ago)

        end
            overrides the default end of the date range (today)

        recip
            optional email address for testing so we don't spam others

        timeout
            how many seconds we'll wait for a connection or a query
        """

        self.TODAY = datetime.date.today()
        self.DEFAULT_END = self.TODAY - datetime.timedelta(1)
        self.DEFAULT_START = self.TODAY - datetime.timedelta(7)
        self.logger = logger
        self.logger.info("====================================")
        self.reports = options.get("reports") or self.REPORTS
        self.mode = options["mode"]
        self.skip_email = options.get("skip-email", False)
        self.start = options.get("start") or str(self.DEFAULT_START)
        self.end = options.get("end") or str(self.DEFAULT_END)
        self.test = self.mode == "test"
        self.recip = options.get("recip")
        timeout = int(options.get("timeout", 300))
        self.cursor = db.connect(user="******", timeout=timeout).cursor()
        if self.skip_email:
            self.logger.info("skipping email of reports")

    def run(self):
        "Run each of the reports we've been asked to create."
        for key in self.reports:
            try:
                self.do_report(key)
            except Exception as e:
                self.logger.exception("do_report(%s): %s", key, e)
        self.logger.info("%s job completed", self.mode)

    def do_report(self, key):
        """
        Create, save, and (optionally) send out a single report.

        key       Identifies which report we should process.
                  See Control.REPORTS for expected values.
        """

        title_args = (self.TITLES[key], self.start, self.end)
        self.title = "GovDelivery %s Report (%s to %s)" % title_args
        self.key = key
        report = self.create_report()
        self.logger.debug("report\n%s", report)
        if not self.skip_email:
            self.send_report(report)
        self.save_report(report)
        self.logger.info(self.title)

    def create_report(self):
        """
        Create an HTML document for one of this job's reports.

        The report on new trials deals with all of the new trials as a
        single result set, so we can hand off the generation of the
        report to the single TrialSet instance. The reports on
        summaries are broken down to show lots of subsets of the
        documents in separate tables, so we handle the logic here,
        instantiating as many SummarySet objects as we need (by
        calling the summary_table() method below).
        """

        if self.key == "trials":
            return TrialSet(self).report()
        style = "font-size: .9em; font-style: italic; font-family: Arial"
        body = self.B.BODY(
            self.B.H3(self.title, style="color: navy; font-family: Arial;"),
            self.B.P("Report date: %s" % datetime.date.today(), style=style))
        for audience in ("Health professionals", "Patients"):
            body.append(self.summary_table("Summary", True, audience))
            body.append(self.summary_table("Summary", False, audience))
        if self.key == "english":
            body.append(self.summary_table("DrugInformationSummary", True))
            body.append(self.summary_table("DrugInformationSummary", False))
        return self.serialize(self.B.HTML(self.html_head(), body))

    def summary_table(self, doc_type, new, audience=None):
        """
        Create a SummarySet instance to generate the table for a slice
        of the documents in the report.

        doc_type    Either "Summary" or "DrugInformationSummary."
        new         If true, find documents first published in the
                    date range. Otherwise, find documents whose
                    DateLastModified value falls withing this range.
        audience    Either "Health professionals" or "Patients"
                    (only used for summaries).
        """

        args = {"doc_type": doc_type, "new": new, "audience": audience}
        if doc_type == "Summary":
            args["language"] = self.key.capitalize()
        return SummarySet(self, **args).table()

    def save_report(self, report):
        """
        Write the generated report to the cdr/reports directory.

        report    Serialized HTML document for the report.
        """

        now = datetime.datetime.now().isoformat()
        stamp = now.split(".")[0].replace(":", "").replace("-", "")
        test = self.test and ".test" or ""
        name = "gd-%s-%s%s.html" % (self.key, stamp, test)
        path = "%s/reports/%s" % (cdr.BASEDIR, name)
        fp = open(path, "wb")
        fp.write(report)
        fp.close()
        self.logger.info("created %s", path)

    def html_head(self):
        "Common code to create the top part of the generated report."
        return self.B.HEAD(
            self.B.META(charset=self.CHARSET),
            self.B.TITLE(self.title),
        )

    def send_report(self, report):
        """
        Email the report to the right recipient list.

        report    Serialized HTML document for the report.
        """

        if self.recip:
            recips = [self.recip]
        else:
            if self.test:
                group = "Test Publishing Notification"
            else:
                group = {
                    "spanish": "GovDelivery ES Docs Notification",
                    "english": "GovDelivery EN Docs Notification",
                    "trials": "GovDelivery Trials Notification"
                }.get(self.key)
                recips = Job.get_group_email_addresses(group)
        if recips:
            subject = "[%s] %s" % (self.TIER.name, self.title)
            opts = dict(subject=subject, body=report, subtype="html")
            message = cdr.EmailMessage(self.SENDER, recips, **opts)
            message.send()
            self.logger.info("sent %s", subject)
            self.logger.info("recips: %s", ", ".join(recips))
        else:
            self.logger.error("no email recipients for %s", group)

    @classmethod
    def th(cls, label, **styles):
        """
        Helper method to generate a table column header.

        label      Display string for the column header
        styles     Optional style tweaks. See merge_styles() method.
        """

        default_styles = {
            "font-family": "Arial",
            "border": "1px solid #999",
            "margin": "auto",
            "padding": "2px",
        }
        style = cls.merge_styles(default_styles, **styles)
        return cls.B.TH(label, style=style)

    @classmethod
    def td(cls, data, url=None, **styles):
        """
        Helper method to generate a table data cell.

        data       Data string to be displayed in the cell
        styles     Optional style tweaks. See merge_styles() method.
        """

        default_styles = {
            "font-family": "Arial",
            "border": "1px solid #999",
            "vertical-align": "top",
            "padding": "2px",
            "margin": "auto"
        }
        style = cls.merge_styles(default_styles, **styles)
        if url:
            return cls.B.TD(cls.B.A(data, href=url), style=style)
        return cls.B.TD(data, style=style)

    @classmethod
    def li(cls, text, url=None):
        """
        Helper method for creating a list item element.

        text       Display string for the list item.
        url        Optional URL, causing the text to be wrapped
                   in a link element.
        """

        if url:
            return cls.B.LI(cls.B.A(text, href=url,
                                    style="font-family: Arial"))
        return cls.B.LI(text, style="font-family: Arial")

    @classmethod
    def serialize(cls, html):
        """
        Create a properly encoded string for the report.

        html       Tree object created using lxml HTML builder.
        """

        return cls.HTML.tostring(html, **cls.TO_STRING_OPTS)

    @staticmethod
    def merge_styles(defaults, **styles):
        """
        Allow the default styles for an element to be overridden.

        defaults   Dictionary of style settings for a given element.
        styles     Dictionary of additional or replacement style
                   settings. If passed as separate arguments the
                   setting names with hyphens will have to have been
                   given with underscores instead of hyphens. We
                   restore the names which CSS expects.
        """

        d = dict(defaults, **styles)
        s = ["%s:%s" % (k.replace("_", "-"), v) for k, v in d.items()]
        return ";".join(s)
예제 #8
0
# Open Log file and enter start message
# -------------------------------------
LOGFILE = 'PubEmail.log'
LOGGER = cdr.Logging.get_logger("PubEmail")
LOGGER.info('PubEmail Notification - Started')
LOGGER.info('Arguments: %s', sys.argv)

# Retrieve the Email addresses from the specified group
# -----------------------------------------------------
emailDL = sorted(cdr.getEmailList('Operator Publishing Notification'))
emailDev = sorted(cdr.getEmailList("Developers Notification"))

# Set the variables and send the message
# --------------------------------------
sender = "*****@*****.**"
subject = "[%s] %s" % (cdr.Tier().name, sys.argv[1])
message = """\
Automated Publishing Email Notification:

%s""" % sys.argv[2]

try:
    # Somebody needs to get the message if the group is empty
    if not len(emailDL):
        emailDL = emailDev
        subject = '*** DL Missing *** %s' % subject

    opts = dict(subject=subject, body=message)
    cdr.EmailMessage(sender, emailDL, **opts).send()
except:
    LOGGER.exception('*** Failure sending email message')
예제 #9
0
</html>
"""
    else:
        raise NoNewDocumentsError('NoNewDocumentsError')

    # In Testmode we don't want to send the notification to the world
    # ---------------------------------------------------------------
    # Email constants
    # ---------------
    if testMode:
        strTo = cdr.getEmailList('Test Publishing Notification')
    else:
        strTo = cdr.getEmailList('Hotfix Remove Notification')
        #strTo.append(u'*****@*****.**')

    args = cdr.Tier().name, "Document Candidates to be removed from Cancer.gov"
    subject = "[%s] %s" % args

    mailHeader = """\
From: %s
To: %s
Subject: %s
""" % (STR_FROM, ', '.join(strTo), subject)

    mailHeader += "Content-type: text/html; charset=utf-8\n"

    # Add a Separator line + body
    # ---------------------------
    message = mailHeader + "\n" + mailBody

    #print message
예제 #10
0
class Control:
    """
    Wrap the processing logic in a single namespace.

    Properties:
        cms_only - set of document IDs for summaries we don't send to partners
        job_id - ID of the most recent licensee job
        job_ids - IDs of all of the licensee jobs, sorted chronologically
        job_path - location of the filtered CDR documents
        logger - object for writing to the log file
        newsums - nested dictionary of checksums for docs we're transferring
        oldsums - nested dictionary of checksums for docs already transferred
        prev_job_id - ID of the job we transferred the last time
        prev_job_path - location of documents transferred by the last run
        opts - command-line argument values controlling runtime options
        types - sequence of exported document types
        week - YYYYWW string representing the ISO week
    """

    CHECKSUMS = "CHECKSUMS"
    LICENSEE_DOCS = "d:/cdr/Output/LicenseeDocs"
    PUB_SHADOW = "d:/cdr/sftp_shadow"
    PUB_SHADOW_FULL = "{}/full".format(PUB_SHADOW)
    LANGUAGES = dict(English="en", Spanish="es")
    MEDIA_CATALOG = "media_catalog.txt"
    LOGGER = cdr.Logging.get_logger("sftp-export")
    SSH = ("d:/cygwin/bin/ssh.exe -i d:/etc/cdroperator_rsa "
           "-o LogLevel=error "
           "-o StrictHostKeyChecking=no")
    TIER = cdr.Tier()
    HOST = TIER.hosts["SFTP"]
    USER = "******"
    PATH = "/sftp/sftphome/cdrstaging/pdq-{}".format(TIER.name.lower())

    def __init__(self):
        """Log what we're about to do."""
        self.logger.info(47 * "*")
        self.logger.info("sftp-export-data.py - Started")
        self.logger.info(47 * "*")
        self.logger.info("Processing %s", self.job_path)
        self.logger.info("week %s", self.week)
        self.logger.info("path is %s", os.environ.get("PATH"))

    def run(self):
        """
        Execute the top-level processing for the script, performing 4 tasks.

           1. Create catalog files for what we're publishing.
           2. Create compressed archives for the files.
           3. Stage the files to be synced.
           4. Populate the public sFTP data share.
        """

        start = datetime.now()
        os.chdir(self.job_path)

        # Optionally skip the first three steps if so requested.
        if not self.opts.push_only:

            # 1. Creating tar files and auxilliary files.
            if not self.opts.skip_catalogs:
                self.create_catalogs()

            # 2. Create tar files.
            self.create_archives()

            # 3. Copy files and move tar files to shadow location.
            self.copy_files()

        # 4. Sync the staging area to the sFTP server.
        if not self.opts.create_only:
            self.push_files()
            self.fix_permissions()

        elapsed = (datetime.now() - start).total_seconds()
        self.logger.info("")  # Blank line to format log output
        self.logger.info("completed in %f seconds", elapsed)

        self.logger.info(47 * "*")
        self.logger.info("sftp-export-data.py - Finished")
        self.logger.info(47 * "*")

    def create_catalogs(self):
        """
        Compare what we published last week with what we're publishing now.

        Creates a summary file listing counts for added, removed, and changed
        files of each document type. Also, for each document type for which
        any such differences have occurred since the previous week, creates
        a manifest, listing each document, with the action behind the
        difference.
        """

        for path in glob("{}/*.{}".format(self.job_path, self.week)):
            self.logger.debug("removing %r", path)
            os.remove(path)
        changes = []
        self.logger.info("Processing doctype directories:")
        for doctype in self.types:
            self.logger.info("...processing %s", doctype)
            oldsums = self.oldsums.get(doctype, {})
            newsums = self.newsums.get(doctype, {})
            olddocs = set(oldsums)
            newdocs = set(newsums)
            added = newdocs - olddocs
            dropped = olddocs - newdocs
            kept = olddocs & newdocs
            docs = []
            for name in added:
                docs.append((self.extract_id(name), name, "added"))
            for name in dropped:
                docs.append((self.extract_id(name), name, "dropped"))
            changed = 0
            for name in kept:
                if oldsums[name] != newsums[name]:
                    changed += 1
                    docs.append((self.extract_id(name), name, "modified"))
            if docs:
                path = "{}/{}.{}".format(self.job_path, doctype, self.week)
                with open(path, "w") as fp:
                    for doc_id, name, action in sorted(docs):
                        fp.write("{}:{}\n".format(name, action))
                self.logger.debug("%d line(s) in %s", len(docs), path)
            prefix = "{}.{}".format(doctype, self.week)
            changes.append("{}:added:{:d}\n".format(prefix, len(added)))
            changes.append("{}:removed:{:d}\n".format(prefix, len(dropped)))
            changes.append("{}:modified:{:d}\n".format(prefix, changed))
            if doctype == "Summary":
                self.catalog_summaries(newdocs)
        with open("{}/{}.changes".format(self.job_path, self.week), "w") as fp:
            for change in sorted(changes):
                fp.write(change)
        self.logger.info("catalogs created")

    def catalog_summaries(self, filenames):
        """
        Create Summary.en and Summary.es catalog files.

        Each file lists the file names (one per line) of each of the summary
        documents in the language of the catalog file.
        """

        summaries = dict(en=[], es=[])
        for filename in filenames:
            language = self.get_summary_language(filename)
            summaries[language].append((self.extract_id(filename), filename))
        for language in summaries:
            args = self.job_path, language
            with open("{}/Summary.{}".format(*args), "w") as fp:
                for doc_id, filename in sorted(summaries[language]):
                    fp.write("{}\n".format(filename))
        self.logger.info("cataloged summaries by language")

    def get_summary_language(self, filename):
        """
        Parse the summary document to determine its language.

        Pass:
          filename - string naming the file to examine
        """

        path = "{}/Summary/{}".format(self.job_path, filename)
        root = etree.parse(path).getroot()
        language = cdr.get_text(root.find("SummaryMetaData/SummaryLanguage"))
        return self.LANGUAGES[language]

    def create_archives(self):
        """
        Create compressed archives for the published files.

        A tar file is created for each of the document types.
        In addition, a complete tar file is created containing
        all of the document types, as well as the catalogs
        created above.
        """
        self.logger.info("")  # Blank line to format log output
        self.logger.info("Creating full.tar.gz")
        os.chdir(self.job_path)
        with tarfile.open("full.tar.gz", "w:gz") as tar:
            for name in self.types:
                self.logger.info("...adding %s", name)
                tar.add(name)
                catalog_name = "{}.{}".format(name, self.week)
                if os.path.exists(catalog_name):
                    tar.add(catalog_name)
                if name == "Summary":
                    tar.add("Summary.en")
                    tar.add("Summary.es")
            if os.path.exists(self.MEDIA_CATALOG):
                tar.add(self.MEDIA_CATALOG)
            tar.add("{}.changes".format(self.week))
        self.logger.info("")
        self.logger.info("Creating doctype tar files:")
        for name in self.types:
            tarname = "{}.tar.gz".format(name)
            with tarfile.open(tarname, "w:gz") as tar:
                self.logger.info("...creating %s.tar.gz", name)
                tar.add(name)

    def copy_files(self):
        """
        Populate the local sFTP shadow directory.

        Copy the individual document files and catalogs to the shadow
        directory, and move the compressed archive files to that
        location.
        """

        self.logger.info("")  # Blank line to format log output
        self.logger.info("Copying files to shadow location")
        os.chdir(self.job_path)
        destination = "{}/full.tar.gz".format(self.PUB_SHADOW)
        try:
            os.remove(destination)
        except:
            print(("Can't remove {}".format(destination)))
        shutil.move("full.tar.gz", destination)
        full = self.PUB_SHADOW_FULL
        shutil.rmtree(full, ignore_errors=True)
        os.mkdir(full)
        shutil.copy("{}.changes".format(self.week), full)
        if os.path.exists(self.MEDIA_CATALOG):
            shutil.copy(self.MEDIA_CATALOG, full)
        for name in self.types:
            destination = "{}/{}".format(full, name)
            self.logger.info("...copying %s", name)
            shutil.copytree(name, destination)
            args = "{}.tar.gz".format(name), "{}.tar.gz".format(destination)
            shutil.move(*args)
            catalog_name = "{}.{}".format(name, self.week)
            if os.path.exists(catalog_name):
                shutil.copy(catalog_name, full)
            if name == "Summary":
                shutil.copy("Summary.en", full)
                shutil.copy("Summary.es", full)

    def push_files(self):
        """
        Update the sFTP server with the content of the shadow directory.

        Use `rsync` to get the the individual document files and catalogs
        from the shadow directory to the sFTP server.
        """

        args = self.SSH, self.USER, self.HOST, self.PATH
        command = 'rsync --delete -rae "{}" full* {}@{}:{}'.format(*args)
        self.logger.info("")  # Blank line to format log output
        self.logger.info("ssh host: %s", self.HOST)
        self.logger.debug("ssh user: %s", self.USER)
        self.logger.info("rsync command: %s", command)

        os.chdir(self.PUB_SHADOW)

        result = cdr.run_command(command)
        self.logger.info("")  # Blank line to format log output
        self.logger.info("*** run_command output")
        self.logger.info(result.stdout)

        if result.stderr:
            self.logger.info("*** Error:")
            self.logger.info(result.stderr)
            self.logger.info("finished syncing files with errors!!!")
        else:
            self.logger.info("finished syncing files on FTP server")

        os.chdir(self.job_path)

    def load_checksums(self, persist=True, prune_cms_only=False):
        """
        Get the checksums for the CDR documents in the job tree.

        Assumes we have already made the current working directory
        the top-level directory for the job.

        If the checksums have already been calculated (as will typically
        be the case for the previous week's files), just load them from
        the file where they have been persisted.

        Pass:
            persist - if True (the default), save the calculated checksums
                      to save us from having to calculate the sums for this
                      job's files in a subsequent run
            prune_cms_only - if True, drop documents we don't send to the
                      partners

        Return:
            nested dictionary of checksums, top level indexed by document
            type name, inner dictionaries indexed by file name, with values
            of hex strings for SHA256 digest hashes
        """

        checksums = {}
        if os.path.exists(self.CHECKSUMS):
            with open(self.CHECKSUMS) as fp:
                for line in fp:
                    checksum, path = line.strip().split(None, 1)
                    directory, filename = path.split("/")
                    if directory not in checksums:
                        checksums[directory] = {}
                    checksums[directory][filename] = checksum
        else:
            for directory in self.types:
                sums = checksums[directory] = {}
                for path in glob("{}/CDR*".format(directory)):
                    filename = os.path.split(path)[-1]
                    if prune_cms_only:
                        id = self.extract_id(filename)
                        if id in self.cms_only:
                            os.remove(path)
                            continue
                    sums[filename] = self.checksum(path)
                opts = len(sums), directory
                self.logger.debug("calculated %d checksums for %s files",
                                  *opts)
            if persist:
                with open(self.CHECKSUMS, "w") as fp:
                    for directory in sorted(checksums):
                        sums = checksums[directory]
                        for filename in sorted(sums, key=self.extract_id):
                            checksum = sums[filename]
                            path = "{}/{}".format(directory, filename)
                            fp.write("{} {}\n".format(checksum, path))
        return checksums

    def fix_permissions(self):
        """Make it possible for the data partners to retrieve the files."""
        args = self.SSH, self.USER, self.HOST, self.PATH
        command = '{} {}@{} "chmod -R 755 {}/full*"'.format(*args)
        self.logger.info("chmod command: %s", command)
        result = cdr.run_command(command)
        if result.stderr:
            self.logger.info("*** Error:")
            self.logger.info(result.stderr)
            self.logger.info("finished fixing permissions with errors!!!")
        else:
            self.logger.info("finished fixing permissions on FTP server")

    @property
    def cms_only(self):
        """List of summary documents we don't give to the data partners."""

        if not hasattr(self, "_cms_only"):
            query = db.Query("query_term_pub", "doc_id")
            query.where("path = '/Summary/@SVPC'")
            query.where("value = 'Yes'")
            rows = query.execute().fetchall()
            self._cms_only = {row.doc_id for row in rows}
        return self._cms_only

    @property
    def job_id(self):
        """
        Get the overridden or calculated ID of the last licensee job.

        Use the job ID passed as a specific option if available; else
        use the default of the last job ID found in the LicenseeDocs
        directory.
        """

        if not hasattr(self, "_job_id"):
            if self.opts.job:
                self._job_id = self.opts.job
            else:
                self._job_id = self.job_ids[-1]
        return self._job_id

    @property
    def job_ids(self):
        """
        Collect all of the job IDS in the LicenseeDocs directory.

        If we don't find at least one job, there's nothing to transfer,
        so we'll bail.
        """

        if not hasattr(self, "_job_ids"):
            os.chdir(self.LICENSEE_DOCS)
            job_ids = set()
            for name in glob("Job*"):
                match = re.match(r"^Job(\d+)$", name)
                if match:
                    job_ids.add(int(match.group(1)))
            if not job_ids:
                self.logger.info("*** Error: No PDQ partner data found")
                exit(1)
            self._job_ids = sorted(job_ids)
            os.chdir(self.job_path)
        return self._job_ids

    @property
    def job_path(self):
        """Get the location of the documents to be transferred."""
        if not hasattr(self, "_job_path"):
            args = self.LICENSEE_DOCS, self.job_id
            self._job_path = "{}/Job{:d}".format(*args)
        return self._job_path

    @property
    def logger(self):
        """Adjust the logging level as requested."""
        if not hasattr(self, "_logger"):
            self._logger = Control.LOGGER
            self._logger.setLevel(self.opts.level.upper())
        return self._logger

    @property
    def newsums(self):
        """Get checksums for the documents we are about to transfer."""
        if not hasattr(self, "_newsums"):
            os.chdir(self.job_path)
            self._newsums = self.load_checksums(prune_cms_only=True)
            self.logger.info("loaded new checksums from %s", self.job_path)
        return self._newsums

    @property
    def oldsums(self):
        """
        Get checksums for the documents we transferred last time.

        Get them from the previous job directory if available.
        Otherwise get them from the shadow SFTP directory.
        """

        if not hasattr(self, "_oldsums"):
            directory = self.prev_job_path or self.PUB_SHADOW_FULL
            os.chdir(directory)
            self._oldsums = self.load_checksums()
            os.chdir(self.job_path)
            self.logger.info("loaded old checksums from %s", directory)
        return self._oldsums

    @property
    def opts(self):
        """Collect the command-line arguments."""
        if not hasattr(self, "_opts"):
            parser = ArgumentParser()
            parser.add_argument("--job",
                                type=int,
                                help="enter job-id to process, default: last")
            parser.add_argument("--level",
                                default="info",
                                help="specify log level "
                                "(debug, warn, [info], error)")
            parser.add_argument("--push-only",
                                action="store_true",
                                help="copy the latest existing data set")
            parser.add_argument("--create-only",
                                action="store_true",
                                help="create a new data set but do not copy")
            parser.add_argument("--skip-catalogs",
                                action="store_true",
                                help="skip creating auxilliary files")
            parser.add_argument("--week", help="use at your own risk")
            self._opts = parser.parse_args()
        return self._opts

    @property
    def prev_job_id(self):
        """
        Get the ID of the job whose documents we transferred the last time.

        Return None if there are no jobs found older than the one we are
        transferring.
        """

        if not hasattr(self, "_prev_job_id"):
            if len(self.job_ids) > 1:
                self._prev_job_id = self.job_ids[-2]
            else:
                self._prev_job_id = None
        return self._prev_job_id

    @property
    def prev_job_path(self):
        """Get the location of documents we transferred the last time."""
        if not hasattr(self, "_prev_job_path"):
            if self.prev_job_id is None:
                self._prev_job_path = None
            else:
                args = self.LICENSEE_DOCS, self.prev_job_id
                self._prev_job_path = "{}/Job{:d}".format(*args)
        return self._prev_job_path

    @property
    def types(self):
        """Get the names of the document types we're transferring."""
        if not hasattr(self, "_types"):
            os.chdir(self.job_path)
            types = [name for name in os.listdir(".") if os.path.isdir(name)]
            self._types = types
        return self._types

    @property
    def week(self):
        """Get the YYYYWW string for the job's ISO week."""
        if not hasattr(self, "_week"):
            self._week = self.opts.week
            if not self._week:
                query = db.Query("pub_proc", "started")
                query.where(query.Condition("id", self.job_id))
                started = query.execute().fetchone().started
                year, week, dow = started.isocalendar()
                self._week = "{:04d}{:02d}".format(year, week)
        return self._week

    @staticmethod
    def checksum(path):
        """
        Create a checksum for the bytes of a file.

        Use of checksums instead of loading the old and new files into
        memory and comparing them speeds up the catalog generation from
        5 1/2 minutes to 1 1/2 minutes on the CDR DEV server. This also
        avoids problems with aborted runs as reported in OCECDR-4348.

        We're using SHA256 instead of SHA-1 because Linus is planning
        to switch `git` from SHA-1 hashes to SHA256 hashes, and if
        SHA-1 isn't good enough for avoiding collisions in his view,
        then who are we to second-guess his judgment?

        Pass:
            path - string for relative path of file to checksum

        Return:
            Hex representation for SHA256 hash of file contents
        """

        hasher = sha256()
        with open(path, "rb") as fp:
            for block in iter(partial(fp.read, 4096), b""):
                hasher.update(block)
        return hasher.hexdigest()

    @staticmethod
    def extract_id(name):
        """
        Get the CDR document ID from the file name.

        Note:
            We do this so we can sort the names correctly.

        Pass:
           name - string for the file's name

        Return:
           integer extracted from the name
        """

        root, ext = os.path.splitext(name)
        return int(root[3:])
예제 #11
0
# ---------------------------------------------------------------------
# Created:          2007-04-03        Volker Englisch
# *********************************************************************
import sys, re, cdr, os, shutil, time, getopt
from cdrapi import db
# Setting the host variable to submit the link for the error report
# -----------------------------------------------------------------
host = cdr.APPC
url = 'https://%s' % host

# Setting directory and file names
# --------------------------------
PUBPATH = os.path.join('d:\\cdr', 'publishing')
# PUBPATH    = os.path.join('d:\\home', 'venglisch', 'cdr', 'publishing')

TIER = cdr.Tier().name
MAX_RETRIES = 10
RETRY_MULTIPLIER = 5.0
wait = 60  # number of seconds to wait between status checks

# The performance of the publishing job has greatly improved allowing
# us to cancel a running job much sooner if it fails to finish.
# Optionally overriden below once we know the publishing subset.
# --------------------------------------------------------------------
if cdr.isProdHost():
    waitTotal = 10800  #  3.0 hours
elif cdr.isDevHost():
    waitTotal = 10800  #  3.0 hours
else:
    waitTotal = 14400  #  4.0 hours