Esempio n. 1
0
class AbstractApplication(object):
    """Framework class for writing applications for PydPiper. 
    
       This class defines the default behaviour for accepting common command-line options, and executing the application
       under various queueing systems. 
       
       Subclasses should extend the following methods:
           setup_appName()
           setup_logger() [optional, default method is defined here]
           setup_options()
           run()
    
       Usage: 
          class MyApplication(AbstractApplication):
                ... 
           
          if __name__ == "__main__":
              application = MyApplication()
              application.start()
    """
    def __init__(self):
        # use an environment variable to look for a default config file
        # Alternately, we could use a default location for the file
        # (say `files = ['/etc/pydpiper.cfg', '~/pydpiper.cfg', './pydpiper.cfg']`)
        default_config_file = os.getenv("PYDPIPER_CONFIG_FILE")
        if default_config_file is not None:
            files = [default_config_file]
        else:
            files = []
        self.parser = MyParser(default_config_files=files)
        self.__version__ = get_distribution("pydpiper").version  # pylint: disable=E1101
    
    def _setup_options(self):
            # PydPiper options
        addExecutorArgumentGroup(self.parser)
        addApplicationArgumentGroup(self.parser)
    
    def _print_version(self):
        if self.options.show_version:
            print(self.__version__)
            sys.exit()
    
    def _setup_pipeline(self, options):
        self.pipeline = Pipeline(options)

    # FIXME check that only one server is running with a given output directory
    def _setup_directories(self):
        """Output and backup directories setup here."""
        if not self.options.output_directory:
            self.outputDir = os.getcwd()
        else:
            self.outputDir = makedirsIgnoreExisting(self.options.output_directory)
        self.pipeline.setBackupFileLocation(self.outputDir)

    def reconstructCommand(self):    
        reconstruct = ' '.join(sys.argv)
        logger.info("Command is: " + reconstruct)
        logger.info("Command version : " + self.__version__)
        # also, because this is probably a better file for it (also has similar
        # naming conventions as the pipeline-stages.txt file:
        fileForCommandAndVersion = self.options.pipeline_name + "-command-and-version-" + time.strftime("%d-%m-%Y-at-%H-%m-%S") + ".sh"
        pf = open(fileForCommandAndVersion, "w")
        pf.write("#!/usr/bin/env bash\n")
        pf.write("# Command version is: " + self.__version__ + "\n")
        pf.write("# Command was: \n")
        pf.write(reconstruct + '\n')
        pf.write("# options were: \n# %s" % self.options)
        pf.close()
        
    def start(self):
        logger.info("Calling `start`")
        self._setup_options()
        self.setup_options()
        self.options = self.parser.parse_args()
        self.args = self.options.files

        self._print_version()

        # Check to make sure some executors have been specified if we are 
        # actually going to run:
        if self.options.execute:
            noExecSpecified(self.options.num_exec)
             
        self._setup_pipeline(self.options)
        self._setup_directories()
        
        self.appName = self.setup_appName()
        self.setup_logger()

        # TODO this doesn't capture environment variables
        # or contents of any config file so isn't really complete
        self.reconstructCommand()

        pbs_submit = self.options.queue_type == "pbs" \
                     and not self.options.local

        # --create-graph causes the pipeline to be constructed
        # both at PBS submit time and on the grid; this may be an extremely
        # expensive duplication
        if (self.options.execute and not pbs_submit) or self.options.create_graph:
            logger.debug("Calling `run`")
            self.run()
            logger.debug("Calling `initialize`")
            self.pipeline.initialize()
            self.pipeline.printStages(self.options.pipeline_name)

        if self.options.create_graph:
            logger.debug("Writing dot file...")
            nx.write_dot(self.pipeline.G, str(self.options.pipeline_name) + "_labeled-tree.dot")
            logger.debug("Done.")

        if not self.options.execute:
            print("Not executing the command (--no-execute is specified).\nDone.")
            return
        
        if pbs_submit:
            roq = runOnQueueingSystem(self.options, sys.argv)
            roq.createAndSubmitPbsScripts()
            logger.info("Finished submitting PBS job scripts...quitting")
            return 
                
        #pipelineDaemon runs pipeline, launches Pyro client/server and executors (if specified)
        # if use_ns is specified, Pyro NameServer must be started. 
        logger.info("Starting pipeline daemon...")
        pipelineDaemon(self.pipeline, self.options, sys.argv[0])
        logger.info("Server has stopped.  Quitting...")

    def setup_appName(self):
        """sets the name of the application"""
        pass

    def setup_logger(self):
        """sets logging info specific to application"""
        FORMAT = '%(asctime)-15s %(name)s %(levelname)s %(process)d/%(threadName)s: %(message)s'
        now = datetime.now().strftime("%Y-%m-%d-at-%H:%M:%S")
        FILENAME = str(self.appName) + "-" + now + '-pid-' + str(os.getpid())  + ".log"
        logging.basicConfig(filename=FILENAME, format=FORMAT, level=logging.DEBUG)

    def setup_options(self):
        """Set up the self.options option parser with options this application needs."""
        pass
    
    def run(self):
        """Run this application.
        
           """
        pass
Esempio n. 2
0
class AbstractApplication(object):
    """Framework class for writing applications for PydPiper. 
    
       This class defines the default behaviour for accepting common command-line options, and executing the application
       under various queueing systems. 
       
       Subclasses should extend the following methods:
           setup_appName()
           setup_logger() [optional, default method is defined here]
           setup_options()
           run()
    
       Usage: 
          class MyApplication(AbstractApplication):
                ... 
           
          if __name__ == "__main__":
              application = ConcreteApplication()
              application.start()
    """
    def __init__(self):
        Pyro.config.PYRO_MOBILE_CODE = 1
        self.parser = MyParser()
        self.__version__ = get_distribution("pydpiper").version

    def _setup_options(self):
        # PydPiper options
        addExecutorOptionGroup(self.parser)
        addApplicationOptionGroup(self.parser)

    def _print_version(self):
        if self.options.show_version:
            print self.__version__
            sys.exit()

    def _setup_pipeline(self):
        self.pipeline = Pipeline()

    def _setup_directories(self):
        """Output and backup directories setup here."""
        if not self.options.output_directory:
            self.outputDir = os.getcwd()
        else:
            self.outputDir = makedirsIgnoreExisting(
                self.options.output_directory)
        self.pipeline.setBackupFileLocation(self.outputDir)

    def reconstructCommand(self):
        reconstruct = ""
        for i in range(len(sys.argv)):
            reconstruct += sys.argv[i] + " "
        logger.info("Command is: " + reconstruct)

    def start(self):
        self._setup_options()
        self.setup_options()

        self.options, self.args = self.parser.parse_args()

        self._print_version()

        #Check to make sure some executors have been specified.
        noExecSpecified(self.options.num_exec)

        self._setup_pipeline()
        self._setup_directories()

        self.appName = self.setup_appName()
        self.setup_logger()

        if self.options.queue == "pbs":
            roq = runOnQueueingSystem(self.options, sys.argv)
            roq.createPbsScripts()
            return

        if self.options.restart:
            logger.info("Restarting pipeline from pickled files.")
            self.pipeline.restart()
            self.pipeline.initialize()
            self.pipeline.printStages(self.appName)
        else:
            self.reconstructCommand()
            self.run()
            self.pipeline.initialize()
            self.pipeline.printStages(self.appName)

        if self.options.create_graph:
            logger.debug("Writing dot file...")
            nx.write_dot(self.pipeline.G, "labeled-tree.dot")
            logger.debug("Done.")

        if not self.options.execute:
            print "Not executing the command (--no-execute is specified).\nDone."
            return

        #pipelineDaemon runs pipeline, launches Pyro client/server and executors (if specified)
        # if use_ns is specified, Pyro NameServer must be started.
        logger.info("Starting pipeline daemon...")
        pipelineDaemon(self.pipeline, self.options, sys.argv[0])
        logger.info("Server has stopped.  Quitting...")

    def setup_appName(self):
        """sets the name of the application"""
        pass

    def setup_logger(self):
        """sets logging info specific to application"""
        FORMAT = '%(asctime)-15s %(name)s %(levelname)s: %(message)s'
        now = datetime.now()
        FILENAME = str(
            self.appName) + "-" + now.strftime("%Y%m%d-%H%M%S%f") + ".log"
        logging.basicConfig(filename=FILENAME,
                            format=FORMAT,
                            level=logging.DEBUG)

    def setup_options(self):
        """Set up the self.options option parser with options this application needs."""
        pass

    def run(self):
        """Run this application.
        
           """
        pass
Esempio n. 3
0
class AbstractApplication(object):
    """Framework class for writing applications for PydPiper. 
    
       This class defines the default behaviour for accepting common command-line options, and executing the application
       under various queueing systems. 
       
       Subclasses should extend the following methods:
           setup_appName()
           setup_logger() [optional, default method is defined here]
           setup_options()
           run()
    
       Usage: 
          class MyApplication(AbstractApplication):
                ... 
           
          if __name__ == "__main__":
              application = ConcreteApplication()
              application.start()
    """
    def __init__(self):
        Pyro.config.PYRO_MOBILE_CODE=1 
        self.parser = MyParser()
    
    def _setup_options(self):
            # PydPiper options
        basic_group = OptionGroup(self.parser,  "Basic execution control",
                                  "Options controlling how and where the code is run.")
        basic_group.add_option("--uri-file", dest="urifile",
                               type="string", default=None,
                               help="Location for uri file if NameServer is not used. If not specified, default is current working directory.")
        basic_group.add_option("--use-ns", dest="use_ns",
                               action="store_true",
                               help="Use the Pyro NameServer to store object locations")
        basic_group.add_option("--create-graph", dest="create_graph",
                               action="store_true", default=False,
                               help="Create a .dot file with graphical representation of pipeline relationships [default = %default]")
        basic_group.add_option("--num-executors", dest="num_exec", 
                               type="int", default=0, 
                               help="Launch executors automatically without having to run pipeline_excutor.py independently.")
        basic_group.add_option("--time", dest="time", 
                               type="string", default="2:00:00:00", 
                               help="Wall time to request for each executor in the format dd:hh:mm:ss")
        basic_group.add_option("--proc", dest="proc", 
                               type="int", default=8,
                               help="Number of processes per executor. Default is 8. Also sets max value for processor use per executor.")
        basic_group.add_option("--mem", dest="mem", 
                               type="float", default=16,
                               help="Total amount of requested memory. Default is 16G.")
        basic_group.add_option("--ppn", dest="ppn", 
                               type="int", default=8,
                               help="Number of processes per node. Default is 8. Used when --queue=pbs")
        basic_group.add_option("--queue", dest="queue", 
                               type="string", default=None,
                               help="Use specified queueing system to submit jobs. Default is None.")
        basic_group.add_option("--sge-queue-opts", dest="sge_queue_opts", 
                               type="string", default=None,
                               help="For --queue=sge, allows you to specify different queues. If not specified, default is used.")
        basic_group.add_option("--restart", dest="restart", 
                               action="store_true",
                               help="Restart pipeline using backup files.")
        basic_group.add_option("--output-dir", dest="output_directory",
                               type="string", default=None,
                               help="Directory where output data and backups will be saved.")
        self.parser.set_defaults(execute=True)
        basic_group.add_option("--execute", dest="execute",
                               action="store_true",
                               help="Actually execute the planned commands [default]")
        basic_group.add_option("--no-execute", dest="execute",
                               action="store_false",
                               help="Opposite of --execute")
        self.parser.add_option_group(basic_group)
    
    def _setup_pipeline(self):
        self.pipeline = Pipeline()
        
    def _setup_directories(self):
        """Output and backup directories setup here."""
        if not self.options.output_directory:
            self.outputDir = os.getcwd()
        else:
            self.outputDir = makedirsIgnoreExisting(self.options.output_directory)
        self.pipeline.setBackupFileLocation(self.outputDir)
    
    def reconstructCommand(self):    
        reconstruct = ""
        for i in range(len(sys.argv)):
            reconstruct += sys.argv[i] + " "
        logger.info("Command is: " + reconstruct)
        
    def start(self):
        self._setup_options()
        self.setup_options()
        
        self.options, self.args = self.parser.parse_args()        
        self._setup_pipeline()
        self._setup_directories()
        
        self.appName = self.setup_appName()
        self.setup_logger()
        
        if self.options.queue=="pbs":
            roq = runOnQueueingSystem(self.options, sys.argv)
            roq.createPbsScripts()
            return 
        
        if self.options.restart:
            logger.info("Restarting pipeline from pickled files.")
            self.pipeline.restart()
            self.pipeline.initialize()
            self.pipeline.printStages(self.appName)
        else:
            self.reconstructCommand()
            self.run()
            self.pipeline.initialize()
            self.pipeline.printStages(self.appName)
                            
        if self.options.create_graph:
            logger.debug("Writing dot file...")
            nx.write_dot(self.pipeline.G, "labeled-tree.dot")
            logger.debug("Done.")
                
        if not self.options.execute:
            print "Not executing the command (--no-execute is specified).\nDone."
            return
        
        #pipelineDaemon runs pipeline, launches Pyro client/server and executors (if specified)
        # if use_ns is specified, Pyro NameServer must be started. 
        logger.info("Starting pipeline daemon...")
        pipelineDaemon(self.pipeline, self.options, sys.argv[0])
        logger.info("Server has stopped.  Quitting...")

    def setup_appName(self):
        """sets the name of the application"""
        pass

    def setup_logger(self):
        """sets logging info specific to application"""
        FORMAT = '%(asctime)-15s %(name)s %(levelname)s: %(message)s'
        now = datetime.now()  
        FILENAME = str(self.appName) + "-" + now.strftime("%Y%m%d-%H%M%S%f") + ".log"
        logging.basicConfig(filename=FILENAME, format=FORMAT, level=logging.DEBUG)

    def setup_options(self):
        """Set up the self.options option parser with options this application needs."""
        pass
    
    def run(self):
        """Run this application.
        
           """
        pass
Esempio n. 4
0
class AbstractApplication(object):
    """Framework class for writing applications for PydPiper. 
    
       This class defines the default behaviour for accepting common command-line options, and executing the application
       under various queueing systems. 
       
       Subclasses should extend the following methods:
           setup_appName()
           setup_logger() [optional, default method is defined here]
           setup_options()
           run()
    
       Usage: 
          class MyApplication(AbstractApplication):
                ... 
           
          if __name__ == "__main__":
              application = MyApplication()
              application.start()
    """
    def __init__(self):
        self.parser = MyParser()
        self.__version__ = get_distribution("pydpiper").version
    
    def _setup_options(self):
            # PydPiper options
        addExecutorOptionGroup(self.parser)
        addApplicationOptionGroup(self.parser)
    
    def _print_version(self):
        if self.options.show_version:
            print self.__version__
            sys.exit()
    
    def _setup_pipeline(self):
        self.pipeline = Pipeline()
        
    def _setup_directories(self):
        """Output and backup directories setup here."""
        if not self.options.output_directory:
            self.outputDir = os.getcwd()
        else:
            self.outputDir = makedirsIgnoreExisting(self.options.output_directory)
        self.pipeline.setBackupFileLocation(self.outputDir)
    
    def reconstructCommand(self):    
        reconstruct = ""
        for i in range(len(sys.argv)):
            reconstruct += sys.argv[i] + " "
        logger.info("Command is: " + reconstruct)
        
    def start(self):
        self._setup_options()
        self.setup_options()
        
        self.options, self.args = self.parser.parse_args()
        
        self._print_version()   
        
        #Check to make sure some executors have been specified. 
        noExecSpecified(self.options.num_exec)
             
        self._setup_pipeline()
        self._setup_directories()
        
        self.appName = self.setup_appName()
        self.setup_logger()
        
        if self.options.queue=="pbs":
            roq = runOnQueueingSystem(self.options, sys.argv)
            roq.createPbsScripts()
            return 
        
        if self.options.restart:
            print "\nThe restart option is deprecated (pipelines are not pickled anymore, because it takes too much time). Will restart based on which files exists already\n"
            #logger.info("Restarting pipeline from pickled files.")
            #self.pipeline.restart()
            self.reconstructCommand()
            self.run()
            self.pipeline.initialize()
            self.pipeline.printStages(self.appName)
        else:
            self.reconstructCommand()
            self.run()
            self.pipeline.initialize()
            self.pipeline.printStages(self.appName)
                            
        if self.options.create_graph:
            logger.debug("Writing dot file...")
            nx.write_dot(self.pipeline.G, "labeled-tree.dot")
            logger.debug("Done.")
                
        if not self.options.execute:
            print "Not executing the command (--no-execute is specified).\nDone."
            return
        
        #pipelineDaemon runs pipeline, launches Pyro client/server and executors (if specified)
        # if use_ns is specified, Pyro NameServer must be started. 
        logger.info("Starting pipeline daemon...")
        pipelineDaemon(self.pipeline, self.options, sys.argv[0])
        logger.info("Server has stopped.  Quitting...")

    def setup_appName(self):
        """sets the name of the application"""
        pass

    def setup_logger(self):
        """sets logging info specific to application"""
        FORMAT = '%(asctime)-15s %(name)s %(levelname)s %(process)d/%(threadName)s: %(message)s'
        now = datetime.now().strftime("%Y-%m-%d-at-%H:%M:%S")
        FILENAME = str(self.appName) + "-" + now + '-pid-' + str(os.getpid())  + ".log"
        logging.basicConfig(filename=FILENAME, format=FORMAT, level=logging.DEBUG)

    def setup_options(self):
        """Set up the self.options option parser with options this application needs."""
        pass
    
    def run(self):
        """Run this application.
        
           """
        pass
Esempio n. 5
0
class AbstractApplication(object):
    """Framework class for writing applications for PydPiper. 
    
       This class defines the default behaviour for accepting common command-line options, and executing the application
       under various queueing systems. 
       
       Subclasses should extend the following methods:
           setup_appName()
           setup_logger() [optional, default method is defined here]
           setup_options()
           run()
    
       Usage: 
          class MyApplication(AbstractApplication):
                ... 
           
          if __name__ == "__main__":
              application = MyApplication()
              application.start()
    """
    def __init__(self):
        # use an environment variable to look for a default config file
        # Alternately, we could use a default location for the file
        # (say `files = ['/etc/pydpiper.cfg', '~/pydpiper.cfg', './pydpiper.cfg']`)
        default_config_file = os.getenv("PYDPIPER_CONFIG_FILE")
        if default_config_file is not None:
            files = [default_config_file]
        else:
            files = []
        self.parser = MyParser(default_config_files=files)
        self.__version__ = get_distribution("pydpiper").version

    def _setup_options(self):
        # PydPiper options
        addExecutorArgumentGroup(self.parser)
        addApplicationArgumentGroup(self.parser)

    def _print_version(self):
        if self.options.show_version:
            print self.__version__
            sys.exit()

    def _setup_pipeline(self, options):
        self.pipeline = Pipeline()
        self.pipeline.main_options_hash = options

    # FIXME check that only one server is running with a given output directory
    def _setup_directories(self):
        """Output and backup directories setup here."""
        if not self.options.output_directory:
            self.outputDir = os.getcwd()
        else:
            self.outputDir = makedirsIgnoreExisting(
                self.options.output_directory)
        self.pipeline.setBackupFileLocation(self.outputDir)

    def reconstructCommand(self):
        reconstruct = ' '.join(sys.argv)
        logger.info("Command is: " + reconstruct)
        logger.info("Command version : " + self.__version__)
        # also, because this is probably a better file for it (also has similar
        # naming conventions as the pipeline-stages.txt file:
        fileForCommandAndVersion = os.path.abspath(
            os.curdir + "/" + self.options.pipeline_name +
            "-command-and-version-" + time.strftime("%d-%m-%Y-at-%H-%m-%S") +
            ".sh")
        pf = open(fileForCommandAndVersion, "w")
        pf.write("#!/usr/bin/env bash\n")
        pf.write("# Command version is: " + self.__version__ + "\n")
        pf.write("# Command was: \n")
        pf.write(reconstruct + '\n')
        pf.close()

    def start(self):
        logger.info("Calling `start`")
        self._setup_options()
        self.setup_options()
        self.options = self.parser.parse_args()
        self.args = self.options.files

        self._print_version()

        #Check to make sure some executors have been specified.
        noExecSpecified(self.options.num_exec)

        self._setup_pipeline(self.options)
        self._setup_directories()

        self.appName = self.setup_appName()
        self.setup_logger()

        # NB this doesn't capture environment variables
        # or contents of any config file so isn't really complete
        self.reconstructCommand()

        pbs_submit = (self.options.queue == "pbs" or \
                      self.options.queue_type == "pbs") \
                     and not self.options.local

        if (self.options.execute
                and not pbs_submit) or self.options.create_graph:
            logger.debug("Calling `run`")
            self.run()
            logger.debug("Calling `initialize`")
            self.pipeline.initialize()
            self.pipeline.printStages(self.options.pipeline_name)

        if self.options.create_graph:
            logger.debug("Writing dot file...")
            nx.write_dot(self.pipeline.G, "labeled-tree.dot")
            logger.debug("Done.")

        if not self.options.execute:
            print "Not executing the command (--no-execute is specified).\nDone."
            return

        if pbs_submit:
            roq = runOnQueueingSystem(self.options, sys.argv)
            roq.createAndSubmitPbsScripts()
            logger.info("Finished submitting PBS job scripts...quitting")
            return

        #pipelineDaemon runs pipeline, launches Pyro client/server and executors (if specified)
        # if use_ns is specified, Pyro NameServer must be started.
        logger.info("Starting pipeline daemon...")
        pipelineDaemon(self.pipeline, self.options, sys.argv[0])
        logger.info("Server has stopped.  Quitting...")

    def setup_appName(self):
        """sets the name of the application"""
        pass

    def setup_logger(self):
        """sets logging info specific to application"""
        FORMAT = '%(asctime)-15s %(name)s %(levelname)s %(process)d/%(threadName)s: %(message)s'
        now = datetime.now().strftime("%Y-%m-%d-at-%H:%M:%S")
        FILENAME = str(self.appName) + "-" + now + '-pid-' + str(
            os.getpid()) + ".log"
        logging.basicConfig(filename=FILENAME,
                            format=FORMAT,
                            level=logging.DEBUG)

    def setup_options(self):
        """Set up the self.options option parser with options this application needs."""
        pass

    def run(self):
        """Run this application.
        
           """
        pass