Ejemplo n.º 1
0
 def __init__(self):
     self.set_argparse()
     self._set_rmq()
     self.am = entk.AppManager(hostname=self.rmq_hostname,
                               port=self.rmq_port)
     self.p = entk.Pipeline()
     self.s = entk.Stage()
 def __init__(self):
     self.set_argparse()
     self._set_rmq()
     self.am = entk.AppManager(hostname=self.rmq_hostname,
                               port=self.rmq_port,
                               username=self.rmq_username,
                               password=self.rmq_password)
     #pipelines = []
     self.pipelines = []
     self.p1 = entk.Pipeline()
     self.p2 = entk.Pipeline()
     #pipelines.append(self.p1)
     #pipelines.append(self.p2)
     self.s1 = entk.Stage()
     self.s2 = entk.Stage()
     self.s3 = entk.Stage()
     self.s4 = entk.Stage()
     self.s5 = entk.Stage()
     self.s6 = entk.Stage()
     self.s7 = entk.Stage()
    def generate_discover_pipe(self, filetype='csv', img_ftype='tif'):
        '''
        This function takes as an input paths on Bridges and returns a pipeline
        that will provide a file for all the images that exist in that path.
        '''
        pipeline = re.Pipeline()
        pipeline.name = 'Disc'
        stage = re.Stage()
        stage.name = 'Disc.S0'

        if self._paths is None:
            raise RuntimeError('Images paths are not set.')

        # Create the module load list
        modules_load = list()
        if self._modules:
            for module in self._modules:
                tmp_load = 'module load %s' % module
                modules_load.append(tmp_load)

        tmp_pre_execs = ['unset PYTHONPATH']
        if self._pre_execs:
            tmp_pre_execs = tmp_pre_execs + modules_load + self._pre_execs
        else:
            tmp_pre_execs = tmp_pre_execs + modules_load

        for i in range(len(self._paths)):
            task = re.Task()
            task.name = 'Disc.T%d' % i
            task.pre_exec = tmp_pre_execs
            task.executable = 'python'  # Assign executable to the task
            task.arguments = [
                'image_disc.py',
                '%s' % self._paths[i],
                '--image_ftype=%s' % img_ftype,
                '--filename=images%d' % i,
                '--filetype=%s' % filetype, '--filesize'
            ]
            task.download_output_data = ['images%d.csv' % i]
            task.upload_input_data = [
                os.path.dirname(os.path.abspath(__file__)) + '/image_disc.py'
            ]
            task.cpu_reqs = {
                'cpu_processes': 1,
                'cpu_process_type': '',
                'cpu_threads': 1,
                'cpu_thread_type': 'OpenMP'
            }
            stage.add_tasks(task)
        # Add Stage to the Pipeline
        pipeline.add_stages(stage)

        return pipeline
Ejemplo n.º 4
0
    def _generate_pipeline(self, name, pre_execs, image, gpu_id):

        '''
        This function creates a pipeline for an image that will be analyzed.

        :Arguments:
            :name: Pipeline name, str
            :image: image path, str
            :model_path: Path to the model file, str
            :model_arch: Prediction Model Architecture, str
            :model_name: Prediction Model Name, str
            :hyperparam_set: Which hyperparameter set to use, str
        '''
        # Create a Pipeline object
        entk_pipeline = re.Pipeline()
        entk_pipeline.name = name
        # Create a Stage object
        stage0 = re.Stage()
        stage0.name = '%s-S0' % (name)
        # Create Task 1, training
        task1 = re.Task()
        task1.name = '%s-T0' % stage0.name
        task1.pre_exec = pre_execs
        task1.executable = 'iceberg_penguins.detect'  # Assign task executable
        # Assign arguments for the task executable
        task1.arguments = ['--gpu_ids', gpu_id,
                           '--name', self._model_name,
                           '--epoch', self._epoch,
                           '--checkpoints_dir', self._model_path,
                           '--output', self._output_path,
                           '--testset', 'GE',
                           '--input_im', image.split('/')[-1]]
        task1.link_input_data = ['%s' % image]
        task1.cpu_reqs = {'cpu_processes': 1, 'cpu_threads': 1,
                          'cpu_process_type': None, 'cpu_thread_type': 'OpenMP'}
        task1.gpu_reqs = {'gpu_processes': 1, 'gpu_threads': 1,
                          'gpu_process_type': None, 'gpu_thread_type': 'OpenMP'}
        # Download resuting images
        # task1.download_output_data = ['%s/ > %s' % (image.split('/')[-1].
        #                                            split('.')[0],
        #                                            image.split('/')[-1])]
        # task1.tag = task0.name

        stage0.add_tasks(task1)
        # Add Stage to the Pipeline
        entk_pipeline.add_stages(stage0)

        return entk_pipeline
    def setup(self):



        # prepare input for all replicas
        writeInputs.writeInputs(max_temp=self._max_temp,
                                min_temp=self._min_temp,
                                replicas=self._en_size,
                                timesteps=self._timesteps,
                                basename=self._basename)

        # and tar it up
        tar = tarfile.open("input_files.tar", "w")
        for name in [self._basename + ".prmtop",
                     self._basename + ".inpcrd",
                     self._basename + ".mdin"]:
            tar.add(name)

        for replica in self._replicas:
            tar.add  ('mdin-%s-0' % replica.rid) #how does this work
            os.remove('mdin-%s-0' % replica.rid)
            
        tar.close()

        # create a single pipeline with one stage to transfer the tarball
        task = re.Task()
        task.name              = 'untarTsk'
        task.executable        = ['python']
        task.upload_input_data = ['untar_input_files.py', 'input_files.tar']
        task.arguments         = ['untar_input_files.py', 'input_files.tar']
        task.cpu_reqs          = {'processes' : 1,
                                'thread_type' : None, 
                                'threads_per_process': 1, 
                                'process_type': None}
        task.post_exec         = []

        stage = re.Stage()
        stage.name = 'untarStg'
        stage.add_tasks(task)

        #setup = re.Pipeline()
        #setup.name = 'untarPipe'
        #setup.add_stages(stage)

        setup_pipeline = re.Pipeline()
        setup_pipeline.name = 'untarPipe'
        setup_pipeline.add_stages(stage)
        return [setup_pipeline]
def setup_replicas(replicas, min_temp, max_temp, timesteps, basename):

    writeInputs.writeInputs(max_temp=max_temp,
                            min_temp=min_temp,
                            replicas=replicas,
                            timesteps=timesteps,
                            basename=basename)

    tar = tarfile.open("input_files.tar", "w")
    for name in [
            basename + ".prmtop", basename + ".inpcrd", basename + ".mdin"
    ]:
        tar.add(name)

    for r in range(replicas):
        tar.add('mdin-{replica}-{cycle}'.format(replica=r, cycle=0))
        os.remove('mdin-{replica}-{cycle}'.format(replica=r, cycle=0))

    tar.close()

    setup_p = re.Pipeline()
    setup_p.name = 'untarPipe'

    # # unused
    # repo = git.Repo('.', search_parent_directories=True)
    # aux_function_path = repo.working_tree_dir

    untar_stg = re.Stage()
    untar_stg.name = 'untarStg'

    # Untar Task

    untar_tsk = re.Task()
    untar_tsk.name = 'untarTsk'
    untar_tsk.executable = ['python']
    untar_tsk.upload_input_data = ['untar_input_files.py', 'input_files.tar']
    untar_tsk.arguments = ['untar_input_files.py', 'input_files.tar']
    untar_tsk.cpu_reqs = 1
    untar_tsk.post_exec = []

    untar_stg.add_tasks(untar_tsk)
    setup_p.add_stages(untar_stg)

    replica_sandbox = '$Pipeline_%s_Stage_%s_Task_%s' \
                    % (setup_p.name, untar_stg.name, untar_tsk.name)

    return setup_p, replica_sandbox
Ejemplo n.º 7
0
    def setup(self):

        self._log.debug('=== data staging')

        # prepare input for all replicas
        writeInputs.writeInputs(max_temp=self._max_temp,
                                min_temp=self._min_temp,
                                replicas=self._size,
                                timesteps=self._timesteps,
                                basename=self._basename)

        # and tar it up
        tar = tarfile.open("input_files.tar", "w")
        for name in [
                self._basename + ".prmtop", self._basename + ".inpcrd",
                self._basename + ".mdin"
        ]:
            tar.add(name)

        for replica in self._replicas:
            tar.add('mdin-%s-0' % replica.rid)
            os.remove('mdin-%s-0' % replica.rid)

        tar.close()

        # create a single pipeline with one stage to transfer the tarball
        task = re.Task()
        task.name = 'untarTsk'
        task.executable = 'python'
        task.upload_input_data = ['untar_input_files.py', 'input_files.tar']
        task.arguments = ['untar_input_files.py', 'input_files.tar']
        task.cpu_reqs = 1
        task.post_exec = []

        stage = re.Stage()
        stage.name = 'untarStg'
        stage.add_tasks(task)

        setup = re.Pipeline()
        setup.name = 'untarPipe'
        setup.add_stages(stage)

        # run the setup pipeline
        self.workflow = set([setup])
        self.run()
Ejemplo n.º 8
0
def get_wf3_input(appman, cfg):
    # Assuming shared filesystem on login node this can be executed by the
    # script instead of EnTK.
    p = entk.Pipeline()
    p.name = 'get_wf3_input'
    s = entk.Stage()

    t = entk.Task()
    t.executable = ['python3']
    t.arguments = [
        'gather.py', '-f', cfg['outlier_path'], '-p', cfg['top_path']
    ]

    s.add_tasks(t)
    p.add_stages(s)
    appman.workflow = [p]

    appman.run()
Ejemplo n.º 9
0
    def _generate_pipeline(self, name, pre_execs, image, image_size):
        '''
        This function creates a pipeline for an image that will be analyzed.

        :Arguments:
            :name: Pipeline name, str
            :image: image path, str
            :image_size: image size in MBs, int
            :tile_size: The size of each tile, int
            :model_path: Path to the model file, str
            :model_arch: Prediction Model Architecture, str
            :model_name: Prediction Model Name, str
            :hyperparam_set: Which hyperparameter set to use, str
        '''
        # Create a Pipeline object
        entk_pipeline = re.Pipeline()
        entk_pipeline.name = name
        # Create a Stage object
        stage0 = re.Stage()
        stage0.name = '%s.S0' % (name)
        # Create Task 1, training
        task0 = re.Task()
        task0.name = '%s.T0' % stage0.name
        task0.pre_exec = pre_execs
        task0.executable = 'iceberg_seals.tiling'  # Assign tak executable
        # Assign arguments for the task executable
        task0.arguments = [
            '--input_image=%s' % image.split('/')[-1],
            '--output_folder=$NODE_LFS_PATH/%s' % task0.name,
            '--bands=%s' % self._bands,
            '--stride=%s' % self._stride,
            '--patch_size=%s' % self._patch_size,
            '--geotiff=%s' % self._geotiff
        ]
        task0.link_input_data = [image]
        task0.cpu_reqs = {
            'cpu_processes': 1,
            'cpu_threads': 4,
            'cpu_process_type': None,
            'cpu_thread_type': 'OpenMP'
        }
        task0.lfs_per_process = image_size

        stage0.add_tasks(task0)
        # Add Stage to the Pipeline
        entk_pipeline.add_stages(stage0)

        # Create a Stage object
        stage1 = re.Stage()
        stage1.name = '%s.S1' % (name)
        # Create Task 1, training
        task1 = re.Task()
        task1.name = '%s.T1' % stage1.name
        task1.pre_exec = pre_execs
        task1.executable = 'iceberg_seals.predicting'  # Assign task executable
        # Assign arguments for the task executable
        task1.arguments = [
            '--input_dir=$NODE_LFS_PATH/%s' % task0.name,
            '--model_architecture=%s' % self._model_arch,
            '--hyperparameter_set=%s' % self._hyperparam,
            '--model_name=%s' % self._model_name,
            '--models_folder=./',
            '--output_dir=./%s' % image.split('/')[-1].split('.')[0],
        ]
        task1.link_input_data = ['$SHARED/%s' % self._model_name]
        task1.cpu_reqs = {
            'cpu_processes': 1,
            'cpu_threads': 1,
            'cpu_process_type': None,
            'cpu_thread_type': 'OpenMP'
        }
        task1.gpu_reqs = {
            'gpu_processes': 1,
            'gpu_threads': 1,
            'gpu_process_type': None,
            'gpu_thread_type': 'OpenMP'
        }
        # Download resulting images
        # task1.download_output_data = ['%s/ > %s' % (image.split('/')[-1].
        #                                            split('.')[0],
        #                                            image.split('/')[-1])]
        task1.tags = {'colocate': task0.name}

        stage1.add_tasks(task1)
        # Add Stage to the Pipeline
        entk_pipeline.add_stages(stage1)

        return entk_pipeline
Ejemplo n.º 10
0
    def _generate_pipeline(self, name, pre_execs, image, image_size):
        '''
        This function creates a pipeline for an image that will be analyzed.

        :Arguments:
            :name: Pipeline name, str
            :pre_execs: things need to happen before execution
            :image: image path, str
            :image_size: image size in MBs, int
        '''
        # Create a Pipeline object
        entk_pipeline = re.Pipeline()
        entk_pipeline.name = name
        # Create a Stage object
        stage0 = re.Stage()
        stage0.name = '%s.S0' % (name)
        # Create Task 1, training
        task0 = re.Task()
        task0.name = '%s.T0' % stage0.name
        task0.pre_exec = pre_execs
        task0.executable = 'iceberg_rivers.tiling'  # Assign tak executable
        task0.arguments = [
            '--input=%s' % image.split('/')[-1],
            '--output=$NODE_LFS_PATH/%s/' % task0.name,
            '--tile_size=%s' % self._tile_size,
            '--step=%s' % self._step
        ]
        task0.link_input_data = [image]
        task0.cpu_reqs = {
            'cpu_processes': 1,
            'cpu_threads': 4,
            'cpu_process_type': None,
            'cpu_thread_type': None
        }
        task0.lfs_per_process = image_size
        stage0.add_tasks(task0)
        # Add Stage to the Pipeline
        entk_pipeline.add_stages(stage0)

        # Create a Stage object
        stage1 = re.Stage()
        stage1.name = '%s.S1' % (name)
        # Create Task 1, training
        task1 = re.Task()
        task1.name = '%s.T1' % stage1.name
        task1.pre_exec = pre_execs
        task1.executable = 'iceberg_rivers.predicting'  # Assign task executable
        #        # Assign arguments for the task executable
        task1.arguments = [
            '--input=$NODE_LFS_PATH/%s/' % task0.name,
            '--weights_path=%s' % self._weights_path,
            '--output_folder=$NODE_LFS_PATH/%s/' % task1.name
        ]
        #        # task1.link_input_data = ['$SHARED/%s' % self._model_name]
        task1.cpu_reqs = {
            'processes': 1,
            'threads_per_process': 1,
            'process_type': None,
            'thread_type': None
        }
        task1.gpu_reqs = {
            'processes': 1,
            'threads_per_process': 1,
            'process_type': None,
            'thread_type': None
        }
        task0.lfs_per_process = image_size
        # Download resulting images
        # task1.download_output_data = ['%s/ > %s' % (image.split('/')[-1].
        #                                            split('.')[0],
        #                                            image.split('/')[-1])]
        task1.tags = {'colocate': task0.name}

        stage1.add_tasks(task1)
        # Add Stage to the Pipeline
        entk_pipeline.add_stages(stage1)

        # Create a Stage object
        stage2 = re.Stage()
        stage2.name = '%s.S2' % (name)
        # Create Task 1, training
        task2 = re.Task()
        task2.name = '%s.T2' % stage2.name
        task2.pre_exec = pre_execs
        task2.executable = 'iceberg_rivers.mosaic'  # Assign task executable
        #        # Assign arguments for the task executable
        task2.arguments = [
            '--input=$NODE_LFS_PATH/%s/' % task1.name,
            '--input_WV=%s' % image.split('/')[-1],
            '--tile_size=%s' % self._tile_size,
            '--step=%s' % self._step, '--output_folder=./'
        ]
        task2.cpu_reqs = {
            'processes': 1,
            'threads_per_process': 1,
            'process_type': None,
            'thread_type': None
        }
        task2.link_input_data = [image]
        task2.tags = {'colocate': task0.name}
        stage2.add_tasks(task2)
        # Add Stage to the Pipeline
        entk_pipeline.add_stages(stage2)

        return entk_pipeline
Ejemplo n.º 11
0
    def _generate_pipeline(self, name, pre_execs, image, image_size):

        '''
        This function creates a pipeline for an image that will be analyzed.

        :Arguments:
            :name: Pipeline name, str
            :image: image path, str
            :image_size: image size in MBs, int
            :tile_size: The size of each tile, int
            :model_path: Path to the model file, str
            :model_arch: Prediction Model Architecture, str
            :model_name: Prediction Model Name, str
            :hyperparam_set: Which hyperparameter set to use, str
        '''
        # Create a Pipeline object
        entk_pipeline = re.Pipeline()
        entk_pipeline.name = name
        # Create a Stage object
        stage0 = re.Stage()
        stage0.name = '%s-S0' % (name)
        # Create Task 1, training
        task0 = re.Task()
        task0.name = '%s-T0' % stage0.name
        task0.pre_exec = pre_execs
        task0.executable = 'iceberg_seals.tiling'  # Assign tak executable
        # Assign arguments for the task executable
        task0.arguments = ['--scale_bands=%s' % self._scale_bands,
                           '--input_image=%s' % image.split('/')[-1],
                           # This line points to the local filesystem of the
                           # node that the tiling of the image happened.
                           '--output_folder=$NODE_LFS_PATH/%s' % task0.name]
        task0.link_input_data = [image]
        task0.cpu_reqs = {'processes': 1, 'threads_per_process': 4,
                          'process_type': None, 'thread_type': 'OpenMP'}
        task0.lfs_per_process = image_size

        stage0.add_tasks(task0)
        # Add Stage to the Pipeline
        entk_pipeline.add_stages(stage0)

        # Create a Stage object
        stage1 = re.Stage()
        stage1.name = '%s-S1' % (name)
        # Create Task 1, training
        task1 = re.Task()
        task1.name = '%s-T1' % stage1.name
        task1.pre_exec = pre_execs
        task1.executable = 'iceberg_seals.predicting'  # Assign task executable
        # Assign arguments for the task executable
        task1.arguments = ['--input_image', image.split('/')[-1],
                           '--model_architecture', self._model_arch,
                           '--hyperparameter_set', self._hyperparam,
                           '--training_set', 'test_vanilla',
                           '--test_folder', '$NODE_LFS_PATH/%s' % task0.name,
                           '--model_path', './',
                           '--output_folder', './%s' % image.split('/')[-1].
                           split('.')[0]]
        task1.link_input_data = ['$SHARED/%s' % self._model_name]
        task1.cpu_reqs = {'processes': 1, 'threads_per_process': 1,
                          'process_type': None, 'thread_type': 'OpenMP'}
        task1.gpu_reqs = {'processes': 1, 'threads_per_process': 1,
                          'process_type': None, 'thread_type': 'OpenMP'}
        # Download resuting images
        task1.download_output_data = ['%s/ > %s' % (image.split('/')[-1].
                                                    split('.')[0],
                                                    image.split('/')[-1])]
        task1.tag = task0.name

        stage1.add_tasks(task1)
        # Add Stage to the Pipeline
        entk_pipeline.add_stages(stage1)

        return entk_pipeline
    def replica_pipeline(self, rid, cycle, replica_cores, md_executable,
                         timesteps, replica_sandbox):

        # ----------------------------------------------------------------------
        def add_md_stg(rid, cycle):

            # md stg here
            print 'cycle: ', self.cycle

            md_tsk = re.Task()
            md_stg = re.Stage()
            md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=rid,
                                                           cycle=self.cycle)

            md_tsk.link_input_data = [
                '%s/inpcrd > inpcrd-{replica}-{cycle}'.format(
                    replica=rid, cycle=self.cycle) % replica_sandbox,
                '%s/prmtop' % replica_sandbox,
                '%s/mdin-{replica}-{cycle} > mdin'.format(
                    replica=rid, cycle=self.cycle) % replica_sandbox
            ]
            md_tsk.arguments = [
                '-O', '-i', 'mdin', '-p', 'prmtop', '-c',
                'inpcrd-{replica}-{cycle}'.format(replica=rid,
                                                  cycle=self.cycle), '-o',
                'out', '-x', 'mdcrd', '-r',
                '%s/inpcrd-{replica}-{cycle}'.format(
                    replica=rid, cycle=self.cycle + 1) % replica_sandbox,
                '-inf', '%s/mdinfo-{replica}-{cycle}'.format(
                    replica=rid, cycle=self.cycle) % replica_sandbox
            ]
            md_tsk.executable = [md_executable]
            md_tsk.cpu_reqs = {
                'processes': replica_cores,
                'process_type': '',
                'threads_per_process': 1,
                'thread_type': None
            }
            md_tsk.pre_exec = ['echo $SHARED']

            md_stg.add_tasks(md_tsk)
            md_stg.post_exec = {
                'condition': post_md,
                'on_true': start_ex,
                'on_false': suspend_replica
            }
            return md_stg

        # ----------------------------------------------------------------------

        # ----------------------------------------------------------------------
        def add_ex_stg(rid, cycle):

            # ex stg here
            ex_tsk = re.Task()
            ex_stg = re.Stage()
            ex_tsk.name = 'extsk-{replica}-{cycle}'.format(replica=rid,
                                                           cycle=cycle)

            for rid in range(len(waiting_replicas)):
                ex_tsk.link_input_data += [
                    '%s/mdinfo-{replica}-{cycle}'.format(
                        replica=rid, cycle=self.cycle) % replica_sandbox
                ]

            ex_tsk.arguments = ['t_ex_gibbs.py',
                                len(waiting_replicas)
                                ]  # This needs to be fixed
            ex_tsk.executable = ['python']
            ex_tsk.cpu_reqs = {
                'processes': 1,
                'process_type': '',
                'threads_per_process': 1,
                'thread_type': None
            }
            ex_stg.add_tasks(ex_tsk)
            ex_stg.post_exec = {
                'condition': post_ex,
                'on_true': terminate_replicas,
                'on_false': continue_md
            }
            return ex_stg

        # ----------------------------------------------------------------------

        # ----------------------------------------------------------------------
        def post_md():

            global replica_cycles
            print 'replica cyles: %s [%]' % (replica_cycles, rid)

            self.cycle += 1
            replica_cycles[rid] += 1
            print 'replica cyles: %s' % replica_cycles

            waiting_replicas.append(rid)

            if len(waiting_replicas) < max_waiting_list:
                return False
            return True

        # ----------------------------------------------------------------------
        def suspend_replica():
            p_replica.suspend()

        # ----------------------------------------------------------------------

        # ----------------------------------------------------------------------
        def start_ex():
            ex_stg = add_ex_stg(rid, cycle=self.cycle)
            p_replica.add_stages(ex_stg)

        # ----------------------------------------------------------------------

        # ----------------------------------------------------------------------
        def post_ex():

            if cycle > min_completed_cycles:
                return True
            return False

        # ----------------------------------------------------------------------

        # ----------------------------------------------------------------------
        def terminate_replicas():

            # Resume all replicas in list without adding stages
            for rid in waiting_replicas:
                replica_pipelines[rid].resume()
            print "DONE"

        # ----------------------------------------------------------------------

        # ----------------------------------------------------------------------
        def continue_md():

            # This needs to resume replica_pipelines[rid]
            # for all rid's in wait list
            print "continuing replicas"
            global waiting_replicas

            for rid in waiting_replicas:
                try:
                    md_stg = add_md_stg(rid, cycle)
                    replica_pipelines[rid].add_stages(md_stg)
                    if replica_pipelines[rid] is rid:
                        pass
                    else:
                        replica_pipelines[rid].resume()
                        # This is throwing an error: cannot resume itself since
                        # it is not suspended.  Since the pipeline that is
                        # triggering this choice is NOT suspended,
                        # pipeline.resume() fails. This seems to be happening on
                        # ALL pipelines somehow.

                except:
                    print "replica is not suspended, cannot resume"

            waiting_replicas = []

        # ----------------------------------------------------------------------

        p_replica = re.Pipeline()
        p_replica.name = 'p_{rid}'.format(rid=rid)

        md_stg = add_md_stg(rid, cycle)
        p_replica.add_stages(md_stg)

        return p_replica