Beispiel #1
0
 def get_total_templates(self, maps, **kwargs):
     run_settings = kwargs['run_settings']
     output_storage_settings = kwargs['output_storage_settings']
     job_dir = kwargs['job_dir']
     try:
         id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA))
     except (SettingNotFoundException, ValueError) as e:
         logger.debug(e)
         id = 0
     iter_inputdir = os.path.join(job_dir, "input_%s" % id)
     url_with_pkey = get_url_with_credentials(
         output_storage_settings,
         '%s://%s@%s' % (output_storage_settings['scheme'],
                         output_storage_settings['type'], iter_inputdir),
         is_relative_path=False)
     logger.debug(url_with_pkey)
     input_dirs = list_dirs(url_with_pkey)
     for iter, template_map in enumerate(maps):
         logger.debug("template_map=%s" % template_map)
         map_keys = template_map.keys()
         logger.debug("map_keys %s" % map_keys)
         map_ranges = [list(template_map[x]) for x in map_keys]
         product = 1
         for i in map_ranges:
             product = product * len(i)
         total_templates = product * len(input_dirs)
         logger.debug("total_templates=%d" % (total_templates))
     return total_templates
Beispiel #2
0
    def prepare_inputs(self, local_settings, output_storage_settings,
                       computation_platform_settings, mytardis_settings, run_settings):
        """
        Upload all input directories for this iteration
        """
        logger.debug("preparing inputs")
        # TODO: to ensure reproducability, may want to precalculate all random numbers and
        # store rather than rely on canonical execution of rest of this funciton.
        #processes = self.schedule_procs
        processes = [x for x in self.schedule_procs
                     if x['status'] == 'ready']
        self.node_ind = 0
        logger.debug("Iteration Input dir %s" % self.iter_inputdir)
        output_prefix = '%s://%s@' % (output_storage_settings['scheme'],
                                      output_storage_settings['type'])
        url_with_pkey = get_url_with_credentials(
            output_storage_settings, output_prefix + self.iter_inputdir, is_relative_path=False)

        input_dirs = list_dirs(url_with_pkey)
        if not input_dirs:
            raise BadInputException(
                "require an initial subdirectory of input directory")

        for input_dir in sorted(input_dirs):
            self._upload_input_dir_variations(processes,
                                              local_settings,
                                              computation_platform_settings,
                                              output_storage_settings, mytardis_settings,
                                              input_dir, run_settings)
 def get_total_templates(self, maps, **kwargs):
     run_settings = kwargs['run_settings']
     output_storage_settings = kwargs['output_storage_settings']
     job_dir = kwargs['job_dir']
     try:
         id = int(getval(run_settings,
                              '%s/system/id' % RMIT_SCHEMA))
     except (SettingNotFoundException, ValueError) as e:
         logger.debug(e)
         id = 0
     iter_inputdir = os.path.join(job_dir, "input_%s" % id)
     url_with_pkey = get_url_with_credentials(
         output_storage_settings,
         '%s://%s@%s' % (output_storage_settings['scheme'],
                        output_storage_settings['type'],
                         iter_inputdir),
         is_relative_path=False)
     logger.debug(url_with_pkey)
     input_dirs = list_dirs(url_with_pkey)
     for iter, template_map in enumerate(maps):
         logger.debug("template_map=%s" % template_map)
         map_keys = template_map.keys()
         logger.debug("map_keys %s" % map_keys)
         map_ranges = [list(template_map[x]) for x in map_keys]
         product = 1
         for i in map_ranges:
             product = product * len(i)
         total_templates = product * len(input_dirs)
         logger.debug("total_templates=%d" % (total_templates))
     return total_templates
Beispiel #4
0
 def prepare_inputs(self, local_settings, output_storage_settings,
                     computation_platform_settings, mytardis_settings, run_settings):
     """
     Upload all input files for this run
     """
     logger.debug("preparing inputs")
     # TODO: to ensure reproducability, may want to precalculate all random numbers and
     # store rather than rely on canonical execution of rest of this funciton.
     #processes = self.schedule_procs
     processes = [x for x in self.schedule_procs
                 if x['status'] == 'ready']
     self.node_ind = 0
     logger.debug("Iteration Input dir %s" % self.iter_inputdir)
     output_prefix = '%s://%s@' % (output_storage_settings['scheme'],
                             output_storage_settings['type'])
     url_with_pkey = get_url_with_credentials(
         output_storage_settings, output_prefix + self.iter_inputdir, is_relative_path=False)
     logger.debug("url_with_pkey=%s" % url_with_pkey)
     input_dirs = list_dirs(url_with_pkey)
     if not input_dirs:
         raise BadInputException("require an initial subdirectory of input directory")
     for input_dir in sorted(input_dirs):
         logger.debug("Input dir %s" % input_dir)
         self.upload_variation_inputs(
             run_settings, local_settings, self.generate_variations(
                 input_dir, local_settings, output_storage_settings, run_settings),
             processes, input_dir, output_storage_settings,
             computation_platform_settings, mytardis_settings)
Beispiel #5
0
def _instantiate_context(source_url, settings, context):

    templ_pat = re.compile("(.*)_template")
    encoded_s_url = storage.get_url_with_credentials(settings,
        source_url, is_relative_path=False)

    logger.debug("encoded_s_url=%s" % encoded_s_url)
    fnames = storage.list_dirs(encoded_s_url, list_files=True)

    logger.debug("fnames=%s" % fnames)
    new_content = {}
    for fname in fnames:
        logger.debug("fname=%s" % fname)
        templ_mat = templ_pat.match(fname)
        if templ_mat:
            base_fname = templ_mat.group(1)
            basename_url_with_pkey = storage.get_url_with_credentials(
                settings,
                os.path.join(
                    source_url,
                    fname),
                is_relative_path=False)
            logger.debug("basename_url_with_pkey=%s" % basename_url_with_pkey)
            cont = storage.get_file(basename_url_with_pkey)
            try:
                t = Template(cont)
            except TemplateSyntaxError, e:
                logger.error(e)
                #FIXME: should detect this during submission of job,
                #as no sensible way to recover here.
                #TODO: signal error conditions in job status
                continue
            con = Context(context)
            logger.debug("context=%s" % context)
            new_content[base_fname] = t.render(con)
Beispiel #6
0
    def _upload_input_dir_variations(self, processes, local_settings,
                                     computation_platform_settings,
                                     output_storage_settings,
                                     mytardis_settings,
                                     input_dir, run_settings):
        output_prefix = '%s://%s@' % (output_storage_settings['scheme'],
                                      output_storage_settings['type'])
        input_url_with_credentials = get_url_with_credentials(
            output_storage_settings, output_prefix + os.path.join(
                self.iter_inputdir, input_dir),
            is_relative_path=False)
        logger.debug('input_url_with_credentials=%s' %
                     input_url_with_credentials)
        if local_settings['curate_data']:

            try:
                mytardis_platform = jobs.safe_import('chiminey.platform.mytardis.MyTardisPlatform', [], {})
                self.experiment_id = mytardis_platform.create_dataset_for_input(self.experiment_id,
                                                      run_settings, local_settings,
                                                      output_storage_settings,
                                                      mytardis_settings,
                                                      input_url_with_credentials)
            except ImproperlyConfigured as  e:
                logger.error("Cannot load mytardis platform hook %s" % e)

        else:
            logger.warn('Data curation is off')

        # get run Map
        parent_stage = self.import_parent_stage(run_settings)
        run_map, self.rand_index = parent_stage.get_internal_sweep_map(local_settings,
                                                                       run_settings=run_settings)

        # load value_map
        values_url_with_pkey = get_url_with_credentials(
            output_storage_settings,
            output_prefix + os.path.join(self.iter_inputdir,
                                         input_dir,
                                         self.VALUES_FNAME),
            is_relative_path=False)
        logger.debug("initial values_file=%s" % values_url_with_pkey)
        values = {}
        try:
            values_content = storage.get_file(values_url_with_pkey)
        except IOError:
            logger.warn("no values file found")
        else:
            logger.debug("values_content = %s" % values_content)
            values = dict(json.loads(values_content))
        logger.debug("values=%s" % values)

        # generates a set of variations for the template fname
        logger.debug('self.initial_numbfile = %s ' % self.initial_numbfile)
        contexts = self._get_variation_contexts(
            [run_map], values,  self.initial_numbfile)
        self.initial_numbfile += len(contexts)
        logger.debug('contexts = %s ' % contexts)
        logger.debug('self.initial_numbfile = %s ' % self.initial_numbfile)

        # for each context, copy each file to dest and any
        # templates to be instantiated, then store in values.

        template_pat = re.compile("(.*)_template")
        relative_path_suffix = self.get_relative_output_path(local_settings)

        for context in contexts:
            logger.debug("context=%s" % context)
            # get list of all files in input_dir
            fname_url_with_pkey = get_url_with_credentials(
                output_storage_settings,
                output_prefix + os.path.join(self.iter_inputdir, input_dir),
                is_relative_path=False)
            input_files = storage.list_dirs(fname_url_with_pkey,
                                            list_files=True)

            # get process information
            run_counter = context['run_counter']
            logger.debug("run_counter=%s" % run_counter)
            proc = None
            for p in processes:
                # TODO: how to handle invalid run_counter
                pid = int(p['id'])
                logger.debug("pid=%s" % pid)
                if pid == run_counter:
                    proc = p
                    break
            else:
                logger.error("no process found matching run_counter")
                raise BadInputException()
            logger.debug("proc=%s" % pformat(proc))

            for fname in input_files:
                logger.debug("fname=%s" % fname)
                templ_mat = template_pat.match(fname)
                fname_url_with_credentials = storage.get_url_with_credentials(
                    output_storage_settings,
                    output_prefix +
                    os.path.join(self.iter_inputdir, input_dir, fname),
                    is_relative_path=False)
                logger.debug("fname_url_with_credentials=%s" %
                             fname_url_with_credentials)

                def put_dest_file(proc, fname,
                                  dest_file_location, resched_file_location,
                                  content):
                    dest_url = get_url_with_credentials(
                        computation_platform_settings, os.path.join(
                            dest_file_location, fname),
                        is_relative_path=True, ip_address=proc['ip_address'])
                    logger.debug("writing to =%s" % dest_url)
                    #logger.debug("content=%s" % content)
                    storage.put_file(dest_url, content)
                    if self.reschedule_failed_procs:
                        logger.debug("resched=%s" % resched_file_location)
                        logger.debug("fname=%s" % fname)
                        logger.debug("output_storage_settings=%s" %
                                     output_storage_settings)

                        logger.debug("here")
                        test = "%s/%s" % (resched_file_location, fname)
                        logger.debug("test=%s" % test)
                        resched_url = get_url_with_credentials(
                            output_storage_settings, test)
                        logger.debug("writing backup to %s" % resched_url)
                        storage.put_file(resched_url, content)
                    logger.debug("done")

                outputs = []
                if templ_mat:
                    base_fname = templ_mat.group(1)
                    template_content = storage.get_file(
                        fname_url_with_credentials)
                    try:
                        templ = Template(template_content)
                    except TemplateSyntaxError, e:
                        logger.error(e)
                        # FIXME: should detect this during submission of job,
                        # as no sensible way to recover here.
                        # TODO: signal error conditions in job status
                        continue
                    new_context = Context(context)
                    logger.debug("new_content=%s" % new_context)
                    render_output = templ.render(new_context)
                    render_output = render_output.encode('utf-8')
                    outputs.append((base_fname, render_output))
                    outputs.append((fname, template_content))

                else:
                    content = storage.get_file(fname_url_with_credentials)
                    outputs.append((fname, content))

                for (new_fname, content) in outputs:
                    dest_file_location = computation_platform_settings['type']\
                        + "@" + os.path.join(relative_path_suffix,
                                             proc['id'],
                                             local_settings['smart_connector_input'])
                    logger.debug("dest_file_location =%s" % dest_file_location)
                    resched_file_location = "%s%s" % (output_prefix, os.path.join(
                        self.job_dir, "input_backup", proc['id']))

                    logger.debug("resched_file_location=%s" %
                                 resched_file_location)
                    put_dest_file(proc, new_fname, dest_file_location,
                                  resched_file_location, content)

            # then copy context new values file
            logger.debug("writing values file")
            values_dest_location = computation_platform_settings['type']\
                + "@" + os.path.join(relative_path_suffix,
                                     proc['id'],
                                     local_settings['smart_connector_input'],
                                     self.VALUES_FNAME)
            logger.debug("values_dest_location =%s" % values_dest_location)

            values_dest_url = get_url_with_credentials(
                computation_platform_settings, values_dest_location,
                is_relative_path=True, ip_address=proc['ip_address'])

            storage.put_file(values_dest_url, json.dumps(context, indent=4))
Beispiel #7
0
    def generate_variations(self, input_dir, local_settings, output_storage_settings, run_settings):
        """
        For each templated file in input_dir, generate all variations
        """
        output_prefix = '%s://%s@' % (output_storage_settings['scheme'],
                                    output_storage_settings['type'])
        template_pat = re.compile("(.*)_template")
        fname_url_with_pkey = get_url_with_credentials(
            output_storage_settings,
            output_prefix + os.path.join(self.iter_inputdir, input_dir),
            is_relative_path=False)
        input_files = storage.list_dirs(fname_url_with_pkey,
            list_files=True)

        variations = {}
        # TODO: only tested with single template file per input
        parent_stage = self.import_parent_stage(run_settings)

        for fname in input_files:
            logger.debug("trying %s/%s/%s" % (self.iter_inputdir, input_dir,
                                              fname))
            template_mat = template_pat.match(fname)
            if template_mat:
                # get the template
                basename_url_with_pkey = get_url_with_credentials(
                    output_storage_settings,
                    output_prefix + os.path.join(self.iter_inputdir, input_dir, fname),
                    is_relative_path=False)
                template = storage.get_file(basename_url_with_pkey)
                base_fname = template_mat.group(1)
                logger.debug("base_fname=%s" % base_fname)

                # find associated values file and generator_counter
                values_map = {}
                try:
                    values_url_with_pkey = get_url_with_credentials(
                        output_storage_settings,
                        output_prefix + os.path.join(self.iter_inputdir,
                            input_dir,
                            '%s_values' % base_fname),
                        is_relative_path=False)

                    logger.debug("values_file=%s" % values_url_with_pkey)
                    values_content = storage.get_file(values_url_with_pkey)
                except IOError:
                    logger.warn("no values file found")
                else:
                    logger.debug("values_content = %s" % values_content)
                    values_map = dict(json.loads(values_content))

                    # TODO: rather than loading up specific vars for info
                    # to send to next set of variations, pass whole values_map
                    # and then override with map.  This means we need no
                    # special variables here, could easily propogate values
                    # between iterations and we might also pass an list
                    # of values...

                map, self.rand_index = parent_stage.get_run_map(local_settings,
                                       run_settings=run_settings)

                if not template_mat.groups():
                    logger.info("found odd template matching file %s" % fname)
                else:

                    logger.debug("self.initial_numbfile=%s" % self.initial_numbfile)
                    # generates a set of variations for the template fname
                    variation_set = self._expand_variations(template,
                                                            [map], values_map,  self.initial_numbfile)
                    self.initial_numbfile += len(variation_set)
                    logger.debug('variation_set=%d' % len(variation_set))
                    logger.debug("self.initial_numbfile=%s" % self.initial_numbfile)
                    variations[base_fname] = variation_set
                logger.debug("map=%s" % map)
        else:
            # normal file
            pass
        logger.debug('Variations %s' % variations)
        logger.debug("Variations items %d" % len(variations.items()))
        return variations
Beispiel #8
0
    def _upload_input_dir_variations(self, processes, local_settings,
                                     computation_platform_settings,
                                     output_storage_settings,
                                     mytardis_settings, input_dir,
                                     run_settings):
        output_prefix = '%s://%s@' % (output_storage_settings['scheme'],
                                      output_storage_settings['type'])
        input_url_with_credentials = get_url_with_credentials(
            output_storage_settings,
            output_prefix + os.path.join(self.iter_inputdir, input_dir),
            is_relative_path=False)
        logger.debug('input_url_with_credentials=%s' %
                     input_url_with_credentials)
        if local_settings['curate_data']:
            self.experiment_id = self.curate_data(self.experiment_id,
                                                  local_settings,
                                                  output_storage_settings,
                                                  mytardis_settings,
                                                  input_url_with_credentials)
        else:
            logger.warn('Data curation is off')

        # get run Map
        parent_stage = self.import_parent_stage(run_settings)
        run_map, self.rand_index = parent_stage.get_run_map(
            local_settings, run_settings=run_settings)

        # load value_map
        values_url_with_pkey = get_url_with_credentials(
            output_storage_settings,
            output_prefix +
            os.path.join(self.iter_inputdir, input_dir, self.VALUES_FNAME),
            is_relative_path=False)
        logger.debug("initial values_file=%s" % values_url_with_pkey)
        values = {}
        try:
            values_content = storage.get_file(values_url_with_pkey)
        except IOError:
            logger.warn("no values file found")
        else:
            logger.debug("values_content = %s" % values_content)
            values = dict(json.loads(values_content))
        logger.debug("values=%s" % values)

        # generates a set of variations for the template fname
        contexts = self._get_variation_contexts([run_map], values,
                                                self.initial_numbfile)
        self.initial_numbfile += len(contexts)

        # for each context, copy each file to dest and any
        # templates to be instantiated, then store in values.

        template_pat = re.compile("(.*)_template")
        relative_path_suffix = self.get_relative_output_path(local_settings)

        for context in contexts:
            logger.debug("context=%s" % context)
            # get list of all files in input_dir
            fname_url_with_pkey = get_url_with_credentials(
                output_storage_settings,
                output_prefix + os.path.join(self.iter_inputdir, input_dir),
                is_relative_path=False)
            input_files = storage.list_dirs(fname_url_with_pkey,
                                            list_files=True)

            # get process information
            run_counter = context['run_counter']
            logger.debug("run_counter=%s" % run_counter)
            proc = None
            for p in processes:
                # TODO: how to handle invalid run_counter
                pid = int(p['id'])
                logger.debug("pid=%s" % pid)
                if pid == run_counter:
                    proc = p
                    break
            else:
                logger.error("no process found matching run_counter")
                raise BadInputException()
            logger.debug("proc=%s" % pformat(proc))

            for fname in input_files:
                logger.debug("fname=%s" % fname)
                templ_mat = template_pat.match(fname)
                fname_url_with_credentials = storage.get_url_with_credentials(
                    output_storage_settings,
                    output_prefix +
                    os.path.join(self.iter_inputdir, input_dir, fname),
                    is_relative_path=False)
                logger.debug("fname_url_with_credentials=%s" %
                             fname_url_with_credentials)

                def put_dest_file(proc, fname, dest_file_location,
                                  resched_file_location, content):
                    dest_url = get_url_with_credentials(
                        computation_platform_settings,
                        os.path.join(dest_file_location, fname),
                        is_relative_path=True,
                        ip_address=proc['ip_address'])
                    logger.debug("writing to =%s" % dest_url)
                    #logger.debug("content=%s" % content)
                    storage.put_file(dest_url, content)
                    if self.reschedule_failed_procs:
                        logger.debug("resched=%s" % resched_file_location)
                        logger.debug("fname=%s" % fname)
                        logger.debug("output_storage_settings=%s" %
                                     output_storage_settings)

                        logger.debug("here")
                        test = "%s/%s" % (resched_file_location, fname)
                        logger.debug("test=%s" % test)
                        resched_url = get_url_with_credentials(
                            output_storage_settings, test)
                        logger.debug("writing backup to %s" % resched_url)
                        storage.put_file(resched_url, content)
                    logger.debug("done")

                outputs = []
                if templ_mat:
                    base_fname = templ_mat.group(1)
                    template_content = storage.get_file(
                        fname_url_with_credentials)
                    try:
                        templ = Template(template_content)
                    except TemplateSyntaxError, e:
                        logger.error(e)
                        #FIXME: should detect this during submission of job,
                        #as no sensible way to recover here.
                        #TODO: signal error conditions in job status
                        continue
                    new_context = Context(context)
                    logger.debug("new_content=%s" % new_context)
                    render_output = templ.render(new_context)
                    render_output = render_output.encode('utf-8')
                    outputs.append((base_fname, render_output))
                    outputs.append((fname, template_content))

                else:
                    content = storage.get_file(fname_url_with_credentials)
                    outputs.append((fname, content))

                for (new_fname, content) in outputs:
                    dest_file_location = computation_platform_settings['type']\
                        + "@" + os.path.join(relative_path_suffix,
                                             proc['id'],
                                             local_settings['payload_cloud_dirname'])
                    logger.debug("dest_file_location =%s" % dest_file_location)
                    resched_file_location = "%s%s" % (
                        output_prefix,
                        os.path.join(self.job_dir, "input_backup", proc['id']))

                    logger.debug("resched_file_location=%s" %
                                 resched_file_location)
                    put_dest_file(proc, new_fname, dest_file_location,
                                  resched_file_location, content)

            # then copy context new values file
            logger.debug("writing values file")
            values_dest_location = computation_platform_settings['type']\
                + "@" + os.path.join(relative_path_suffix,
                                     proc['id'],
                                     local_settings['payload_cloud_dirname'],
                                     self.VALUES_FNAME)
            logger.debug("values_dest_location =%s" % values_dest_location)

            values_dest_url = get_url_with_credentials(
                computation_platform_settings,
                values_dest_location,
                is_relative_path=True,
                ip_address=proc['ip_address'])

            storage.put_file(values_dest_url, json.dumps(context, indent=4))