Ejemplo n.º 1
0
    def copy_to_scratch_space(self, run_settings, local_settings):
        bdp_username = run_settings['http://rmit.edu.au/schemas/bdp_userprofile']['username']
        output_storage_url = run_settings['http://rmit.edu.au/schemas/platform/storage/output']['platform_url']
        output_storage_settings = manage.get_platform_settings(output_storage_url, bdp_username)

        run_settings['http://rmit.edu.au/schemas/platform/storage/output']['offset'] = self.output_loc_offset
        offset = run_settings['http://rmit.edu.au/schemas/platform/storage/output']['offset']
        self.job_dir = manage.get_job_dir(output_storage_settings, offset)
        iter_inputdir = os.path.join(self.job_dir, "input_0")
        logger.debug("iter_inputdir=%s" % iter_inputdir)

        input_location = run_settings[
            RMIT_SCHEMA + '/input/system']['input_location']
        logger.debug("input_location=%s" % input_location)
        #todo: input location will evenatually be replaced by the scratch space that was used by the sweep
        #todo: the sweep will indicate the location of the scratch space in the run_settings
        #todo: add scheme (ssh) to inputlocation
        source_url = get_url_with_credentials(local_settings, input_location)
        logger.debug("source_url=%s" % source_url)

        destination_url = get_url_with_credentials(
            output_storage_settings,
            '%s://%s@%s' % (output_storage_settings['scheme'],
                             output_storage_settings['type'],
                             iter_inputdir),
            is_relative_path=False)
        logger.debug("destination_url=%s" % destination_url)
        storage.copy_directories(source_url, destination_url)
Ejemplo n.º 2
0
    def copy_to_scratch_space(self, run_settings, local_settings):
        bdp_username = run_settings[
            'http://rmit.edu.au/schemas/bdp_userprofile']['username']
        output_storage_url = run_settings[
            'http://rmit.edu.au/schemas/platform/storage/output'][
                'platform_url']
        output_storage_settings = manage.get_platform_settings(
            output_storage_url, bdp_username)

        run_settings['http://rmit.edu.au/schemas/platform/storage/output'][
            'offset'] = self.output_loc_offset
        offset = run_settings[
            'http://rmit.edu.au/schemas/platform/storage/output']['offset']
        self.job_dir = manage.get_job_dir(output_storage_settings, offset)
        iter_inputdir = os.path.join(self.job_dir, "input_0")
        logger.debug("iter_inputdir=%s" % iter_inputdir)

        input_location = run_settings[RMIT_SCHEMA +
                                      '/input/system']['input_location']
        logger.debug("input_location=%s" % input_location)
        #todo: input location will evenatually be replaced by the scratch space that was used by the sweep
        #todo: the sweep will indicate the location of the scratch space in the run_settings
        #todo: add scheme (ssh) to inputlocation
        source_url = get_url_with_credentials(local_settings, input_location)
        logger.debug("source_url=%s" % source_url)

        destination_url = get_url_with_credentials(
            output_storage_settings,
            '%s://%s@%s' % (output_storage_settings['scheme'],
                            output_storage_settings['type'], iter_inputdir),
            is_relative_path=False)
        logger.debug("destination_url=%s" % destination_url)
        storage.copy_directories(source_url, destination_url)
Ejemplo n.º 3
0
    def copy_to_scratch_space(self, run_settings, local_settings, result_offset):
        bdp_username = run_settings['%s/bdp_userprofile' % django_settings.SCHEMA_PREFIX]['username']
        output_storage_url = run_settings['%s/platform/storage/output' % django_settings.SCHEMA_PREFIX]['platform_url']
        output_storage_settings = manage.get_platform_settings(output_storage_url, bdp_username)

        run_settings['%s/platform/storage/output' % django_settings.SCHEMA_PREFIX]['offset'] = self.output_loc_offset
        offset = run_settings['%s/platform/storage/output' % django_settings.SCHEMA_PREFIX]['offset']
        self.job_dir = manage.get_job_dir(output_storage_settings, offset)
        iter_inputdir = os.path.join(self.job_dir, result_offset)
        logger.debug("iter_inputdir=%s" % iter_inputdir)

        input_storage_settings = self.get_platform_settings(run_settings, '%s/platform/storage/input' % django_settings.SCHEMA_PREFIX)
        #input_location = run_settings[django_settings.SCHEMA_PREFIX + '/input/system']['input_location']

        try:
            input_location = getval(run_settings, django_settings.SCHEMA_PREFIX + '/input/system/input_location')
        except SettingNotFoundException:
            try:
		input_location = getval(run_settings, django_settings.SCHEMA_PREFIX + '/input/location/input_location')
	    except:
		input_location = getval(run_settings, django_settings.SCHEMA_PREFIX + '/input/location/input/input_location')
        logger.debug("input_location=%s" % input_location)
        #todo: input location will evenatually be replaced by the scratch space that was used by the sweep
        #todo: the sweep will indicate the location of the scratch space in the run_settings
        #todo: add scheme (ssh) to inputlocation

        #source_url = get_url_with_credentials(local_settings, input_location)

        input_offset = run_settings['%s/platform/storage/input' % django_settings.SCHEMA_PREFIX]['offset']
        input_url = "%s://%s@%s/%s" % (input_storage_settings['scheme'],
                                       input_storage_settings['type'],
                                       input_storage_settings['host'], input_offset)
        source_url = get_url_with_credentials(
            input_storage_settings, input_url, is_relative_path=False)

        logger.debug("source_url=%s" % source_url)

        destination_url = get_url_with_credentials(
            output_storage_settings,
            '%s://%s@%s' % (output_storage_settings['scheme'],
                             output_storage_settings['type'],
                             iter_inputdir),
            is_relative_path=False)
        logger.debug("destination_url=%s" % destination_url)
        storage.copy_directories(source_url, destination_url)
Ejemplo n.º 4
0
    def process(self, run_settings):
        try:
            id = int(getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX))
        except (SettingNotFoundException, ValueError):
            id = 0
        messages.info(run_settings, '%d: transforming' % (id+1))

        # self.contextid = getval(run_settings, '%s/system/contextid' % django_settings.SCHEMA_PREFIX)
        bdp_username = getval(run_settings, '%s/bdp_userprofile/username' % django_settings.SCHEMA_PREFIX)

        output_storage_url = getval(run_settings, '%s/platform/storage/output/platform_url' % django_settings.SCHEMA_PREFIX)
        output_storage_settings = manage.get_platform_settings(output_storage_url, bdp_username)
        logger.debug("output_storage_settings=%s" % output_storage_settings)
        output_prefix = '%s://%s@' % (output_storage_settings['scheme'],
                                    output_storage_settings['type'])
        offset = getval(run_settings, '%s/platform/storage/output/offset' % django_settings.SCHEMA_PREFIX)
        self.job_dir = manage.get_job_dir(output_storage_settings, offset)

        try:
            self.id = int(getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX))
            self.output_dir = os.path.join(os.path.join(self.job_dir, "output_%s" % self.id))
            self.input_dir = os.path.join(os.path.join(self.job_dir, "input_%d" % self.id))
            self.new_input_dir = os.path.join(os.path.join(self.job_dir, "input_%d" % (self.id + 1)))
        except (SettingNotFoundException, ValueError):
            # FIXME: Not clear that this a valid path through stages
            self.output_dir = os.path.join(os.path.join(self.job_dir, "output"))
            self.output_dir = os.path.join(os.path.join(self.job_dir, "input"))
            self.new_input_dir = os.path.join(os.path.join(self.job_dir, "input_1"))

        logger.debug('self.output_dir=%s' % self.output_dir)

        try:
            self.experiment_id = int(getval(run_settings, '%s/input/mytardis/experiment_id' % django_settings.SCHEMA_PREFIX))
        except SettingNotFoundException:
            self.experiment_id = 0
        except ValueError:
            self.experiment_id = 0

        output_url = get_url_with_credentials(
            output_storage_settings,
            output_prefix + self.output_dir, is_relative_path=False)

        # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(output_url)
        # fsys = storage.get_filesystem(output_url)

        # node_output_dirs, _ = fsys.listdir(mypath)
        # logger.debug("node_output_dirs=%s" % node_output_dirs)

        outputs = self.process_outputs(run_settings, self.job_dir, output_url, output_storage_settings, offset)
        try:
            curate_data = getval(run_settings, '%s/input/mytardis/curate_data' % django_settings.SCHEMA_PREFIX)
        except SettingNotFoundException:
            curate_data = 0
        if curate_data:

            mytardis_url = getval(run_settings, '%s/input/mytardis/mytardis_platform' % django_settings.SCHEMA_PREFIX)
            mytardis_settings = manage.get_platform_settings(mytardis_url, bdp_username)

            all_settings = dict(mytardis_settings)
            all_settings.update(output_storage_settings)
            all_settings['contextid'] = getval(run_settings, '%s/system/contextid' % django_settings.SCHEMA_PREFIX)



            try:
                mytardis_platform = jobs.safe_import('chiminey.platform.mytardis.MyTardisPlatform', [], {})
                logger.debug('self_outpus=%s' % outputs)
                self.experiment_id = mytardis_platform.create_dataset_for_intermediate_output(run_settings, self.experiment_id, self.job_dir, output_url, all_settings, outputs=outputs)
            except ImproperlyConfigured as  e:
                logger.error("Cannot load mytardis platform hook %s" % e)

        else:
            logger.warn('Data curation is off')
Ejemplo n.º 5
0
    def process(self, run_settings):
        try:
            id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA))
        except (SettingNotFoundException, ValueError):
            id = 0
        messages.info(run_settings, '%d: converging' % (id+1))

        def retrieve_local_settings(run_settings, local_settings):

            update(local_settings, run_settings
                    # '%s/stages/setup/payload_source' % RMIT_SCHEMA,
                    # '%s/stages/setup/payload_destination' % RMIT_SCHEMA,
                    # '%s/system/platform' % RMIT_SCHEMA,
                    # # '%s/stages/create/custom_prompt' % RMIT_SCHEMA,
                    # # '%s/stages/create/cloud_sleep_interval' % RMIT_SCHEMA,
                    # # '%s/stages/create/created_nodes' % RMIT_SCHEMA,
                    # '%s/stages/run/payload_cloud_dirname' % RMIT_SCHEMA,
                    # '%s/system/max_seed_int' % RMIT_SCHEMA,
                    # '%s/stages/run/compile_file' % RMIT_SCHEMA,
                    # '%s/stages/run/retry_attempts' % RMIT_SCHEMA,
                    # '%s/input/system/cloud/number_vm_instances' % RMIT_SCHEMA,
                    # '%s/input/hrmc/iseed' % RMIT_SCHEMA,
                    # '%s/input/hrmc/optimisation_scheme' % RMIT_SCHEMA,
                    # '%s/input/hrmc/threshold' % RMIT_SCHEMA,
            )
            local_settings['bdp_username'] = getval(run_settings, '%s/bdp_userprofile/username' % RMIT_SCHEMA)

        local_settings = getvals(run_settings, models.UserProfile.PROFILE_SCHEMA_NS)
        retrieve_local_settings(run_settings, local_settings)

        bdp_username = local_settings['bdp_username']

        # get output
        output_storage_url = getval(run_settings, '%s/platform/storage/output/platform_url' % RMIT_SCHEMA)
        output_storage_settings = manage.get_platform_settings(output_storage_url, bdp_username)
        output_prefix = '%s://%s@' % (output_storage_settings['scheme'],
                                      output_storage_settings['type'])
        offset = getval(run_settings, '%s/platform/storage/output/offset' % RMIT_SCHEMA)
        job_dir = manage.get_job_dir(output_storage_settings, offset)

        # get mytardis
        #mytardis_url = getval(run_settings, '%s/input/mytardis/mytardis_platform' % RMIT_SCHEMA)
        #mytardis_settings = manage.get_platform_settings(mytardis_url, bdp_username)

        # setup new paths
        try:
            self.id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA))
            self.output_dir = os.path.join(job_dir, "output_%d" % self.id)
            self.iter_inputdir = os.path.join(job_dir, "input_%d" % (self.id + 1))
            #self.new_iter_inputdir = "input_%d" % (self.id + 1)
        except (SettingNotFoundException, ValueError):
            self.output_dir = os.path.join(job_dir, "output")
            self.iter_inputdir = os.path.join(job_dir, "input")
            self.id = 0

        logger.debug('output_dir=%s iter_inputdir=%s' % (self.output_dir, self.iter_inputdir))

        try:
            self.experiment_id = int(getval(run_settings, '%s/input/mytardis/experiment_id' % RMIT_SCHEMA))
        except SettingNotFoundException:
            self.experiment_id = 0
        except ValueError:
            self.experiment_id = 0

        inputdir_url = get_url_with_credentials(output_storage_settings,
            output_prefix + self.iter_inputdir, is_relative_path=False)
        logger.debug('input_dir_url=%s' % inputdir_url)

        # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(inputdir_url)
        # fsys = storage.get_filesystem(inputdir_url)
        # logger.debug('mypath=%s' % mypath)
        # input_dirs, _ = fsys.listdir(mypath)
        # logger.debug('input_dirs=%s' % input_dirs)

        (self.done_iterating, self.criterion) = self.process_outputs(run_settings, job_dir, inputdir_url, output_storage_settings)

        if self.done_iterating:
            logger.debug("Total Iterations: %d" % self.id)

            # output_prefix = '%s://%s@' % (output_storage_settings['scheme'],
            #                             output_storage_settings['type'])
            # new_output_dir = os.path.join(base_dir, 'output')

            output_prefix = '%s://%s@' % (output_storage_settings['scheme'],
                                    output_storage_settings['type'])

            # get source url
            iter_output_dir = os.path.join(os.path.join(job_dir, "output_%s" % self.id))

            source_url = "%s%s" % (output_prefix, iter_output_dir)
            # get dest url
            new_output_dir = os.path.join(job_dir, 'output')
            dest_url = "%s%s" % (output_prefix, new_output_dir)

            source_url = get_url_with_credentials(output_storage_settings,
                output_prefix + os.path.join(iter_output_dir), is_relative_path=False)
            dest_url = get_url_with_credentials(output_storage_settings,
                output_prefix + os.path.join(new_output_dir), is_relative_path=False)

            storage.copy_directories(source_url, dest_url)

            # curate
            try:
                curate_data = getval(run_settings, '%s/input/mytardis/curate_data' % RMIT_SCHEMA)
            except SettingNotFoundException:
                curate_data = 0
            if curate_data:

                mytardis_url = getval(run_settings, '%s/input/mytardis/mytardis_platform' % RMIT_SCHEMA)
                mytardis_settings = manage.get_platform_settings(mytardis_url, bdp_username)

                all_settings = dict(mytardis_settings)
                all_settings.update(output_storage_settings)

                logger.debug("source_url=%s" % source_url)
                logger.debug("dest_url=%s" % dest_url)
                logger.debug("job_dir=%s" % job_dir)
                self.experiment_id = self.curate_dataset(run_settings, self.experiment_id,
                                                         job_dir, dest_url,
                                                         all_settings)
            else:
                logger.warn('Data curation is off')
Ejemplo n.º 6
0
        local_settings = getvals(
            run_settings, models.UserProfile.PROFILE_SCHEMA_NS)
        self.set_execute_settings(run_settings, local_settings)

        self.contextid = getval(
            run_settings, '%s/system/contextid' % django_settings.SCHEMA_PREFIX)
        # NB: Don't catch SettingNotFoundException because we can't recover
        # run_settings['%s/system' % django_settings.SCHEMA_PREFIX][u'contextid']
        logger.debug('contextid=%s' % self.contextid)
        output_storage_url = getval(
            run_settings, '%s/platform/storage/output/platform_url' % django_settings.SCHEMA_PREFIX)
        output_storage_settings = manage.get_platform_settings(
            output_storage_url, local_settings['bdp_username'])
        offset = getval(
            run_settings, '%s/platform/storage/output/offset' % django_settings.SCHEMA_PREFIX)
        self.job_dir = manage.get_job_dir(output_storage_settings, offset)
        # TODO: we assume initial input is in "%s/input_0" % self.job_dir
        # in configure stage we could copy initial data in 'input_location'
        # into this location
        try:
            self.id = int(getval(run_settings, '%s/system/id' %
                                 django_settings.SCHEMA_PREFIX))
            self.iter_inputdir = os.path.join(
                self.job_dir, "input_%s" % self.id)
        except (SettingNotFoundException, ValueError):
            self.id = 0
            self.iter_inputdir = os.path.join(self.job_dir, "input_location")
        messages.info(run_settings, "%s: Executing" % (self.id + 1))
        logger.debug("id = %s" % self.id)

        try:
Ejemplo n.º 7
0
        logger.debug("processing execute stage")
        local_settings = getvals(run_settings, models.UserProfile.PROFILE_SCHEMA_NS)
        #self.retrieve_boto_settings(run_settings, local_settings)
        self.set_execute_settings(run_settings, local_settings)


        self.contextid = getval(run_settings, '%s/system/contextid' % RMIT_SCHEMA)
        # NB: Don't catch SettingNotFoundException because we can't recover
        # run_settings['http://rmit.edu.au/schemas/system'][u'contextid']

        output_storage_url = getval(run_settings, '%s/platform/storage/output/platform_url' % RMIT_SCHEMA)
        output_storage_settings = manage.get_platform_settings(output_storage_url, local_settings['bdp_username'])
        offset = getval(run_settings, '%s/platform/storage/output/offset' % RMIT_SCHEMA)
        # offset = run_settings['http://rmit.edu.au/schemas/platform/storage/output']['offset']
        self.job_dir = manage.get_job_dir(output_storage_settings, offset)
        # TODO: we assume initial input is in "%s/input_0" % self.job_dir
        # in configure stage we could copy initial data in 'input_location' into this location
        try:
            self.id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA))
            self.iter_inputdir = os.path.join(self.job_dir, "input_%s" % self.id)
        except (SettingNotFoundException, ValueError):
            self.id = 0
            self.iter_inputdir = os.path.join(self.job_dir, "input_location")
        messages.info(run_settings, "%s: execute" % (self.id + 1))
        logger.debug("id = %s" % self.id)

        try:
            self.initial_numbfile = int(getval(run_settings, '%s/stages/run/initial_numbfile' % RMIT_SCHEMA))
        except (SettingNotFoundException, ValueError):
            logger.warn("setting initial_numbfile for first iteration")
Ejemplo n.º 8
0
    def process(self, run_settings):

        # self.contextid = getval(run_settings, '%s/system/contextid' % RMIT_SCHEMA)
        bdp_username = getval(run_settings,
                              '%s/bdp_userprofile/username' % RMIT_SCHEMA)

        output_storage_url = getval(
            run_settings,
            '%s/platform/storage/output/platform_url' % RMIT_SCHEMA)
        output_storage_settings = manage.get_platform_settings(
            output_storage_url, bdp_username)
        logger.debug("output_storage_settings=%s" % output_storage_settings)
        output_prefix = '%s://%s@' % (output_storage_settings['scheme'],
                                      output_storage_settings['type'])
        offset = getval(run_settings,
                        '%s/platform/storage/output/offset' % RMIT_SCHEMA)
        self.job_dir = manage.get_job_dir(output_storage_settings, offset)

        try:
            self.id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA))
            self.output_dir = os.path.join(
                os.path.join(self.job_dir, "output_%s" % self.id))
            self.input_dir = os.path.join(
                os.path.join(self.job_dir, "input_%d" % self.id))
            self.new_input_dir = os.path.join(
                os.path.join(self.job_dir, "input_%d" % (self.id + 1)))
        except (SettingNotFoundException, ValueError):
            # FIXME: Not clear that this a valid path through stages
            self.output_dir = os.path.join(os.path.join(
                self.job_dir, "output"))
            self.output_dir = os.path.join(os.path.join(self.job_dir, "input"))
            self.new_input_dir = os.path.join(
                os.path.join(self.job_dir, "input_1"))

        logger.debug('self.output_dir=%s' % self.output_dir)

        try:
            self.experiment_id = int(
                getval(run_settings,
                       '%s/input/mytardis/experiment_id' % RMIT_SCHEMA))
        except SettingNotFoundException:
            self.experiment_id = 0
        except ValueError:
            self.experiment_id = 0

        output_url = get_url_with_credentials(output_storage_settings,
                                              output_prefix + self.output_dir,
                                              is_relative_path=False)

        # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(output_url)
        # fsys = storage.get_filesystem(output_url)

        # node_output_dirs, _ = fsys.listdir(mypath)
        # logger.debug("node_output_dirs=%s" % node_output_dirs)

        self.process_outputs(run_settings, self.job_dir, output_url,
                             output_storage_settings, offset)

        # logger.debug("output_url=%s" % output_url)
        # # Should this be output_dir or root of remotesys?
        # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(output_url)
        # fsys = storage.get_filesystem(output_url)
        # logger.debug("fsys=%s" % fsys)
        # logger.debug("mypath=%s" % mypath)

        # node_output_dirs, _ = fsys.listdir(mypath)
        # logger.debug("node_output_dirs=%s" % node_output_dirs)
        # self.audit = ""
        # outputs = []

        # Node_info = namedtuple('Node_info',
        #     ['dir', 'index', 'number', 'criterion'])

        # # gather node_infos
        # for node_output_dir in node_output_dirs:
        #     base_fname = "HRMC.inp"
        #     try:
        #         values_url = get_url_with_credentials(
        #             output_storage_settings,
        #             output_prefix + os.path.join(self.output_dir, node_output_dir,
        #             '%s_values' % base_fname), is_relative_path=False)
        #         values_content = storage.get_file(values_url)
        #         logger.debug("values_file=%s" % values_url)
        #     except IOError:
        #         logger.warn("no values file found")
        #         values_map = {}
        #     else:
        #         values_map = dict(json.loads(values_content))
        #     criterion = self.compute_psd_criterion(
        #         node_output_dir, fsys,
        #         output_storage_settings)
        #     #criterion = self.compute_hrmc_criterion(values_map['run_counter'], node_output_dir, fs,)
        #     logger.debug("criterion=%s" % criterion)
        #     index = 0   # FIXME: as node_output_dirs in particular order, then index is not useful.
        #     outputs.append(Node_info(dir=node_output_dir,
        #         index=index, number=values_map['run_counter'], criterion=criterion))

        # outputs.sort(key=lambda x: int(x.criterion))
        # logger.debug("outputs=%s" % outputs)

        # logger.debug('threshold=%s' % self.threshold)
        # total_picks = 1
        # if len(self.threshold) > 1:
        #     for i in self.threshold:
        #         total_picks *= self.threshold[i]
        # else:
        #     total_picks = self.threshold[0]

        # if not outputs:
        #     logger.error("no ouput found for this iteration")
        #     return

        # for index in range(0, total_picks):
        #     Node_info = outputs[index]
        #     logger.debug("node_info.dir=%s" % Node_info.dir)
        #     logger.debug("Node_info=%s" % str(Node_info))
        #     self.new_input_node_dir = os.path.join(self.new_input_dir,
        #         Node_info.dir)
        #     logger.debug("New input node dir %s" % self.new_input_node_dir)

        #     # Move all existing domain input files unchanged to next input directory
        #     for f in self.DOMAIN_INPUT_FILES:
        #         source_url = get_url_with_credentials(
        #             output_storage_settings,
        #             output_prefix + os.path.join(self.output_dir, Node_info.dir, f), is_relative_path=False)
        #         dest_url = get_url_with_credentials(
        #             output_storage_settings,
        #             output_prefix + os.path.join(self.new_input_node_dir, f),
        #             is_relative_path=False)
        #         logger.debug('source_url=%s, dest_url=%s' % (source_url, dest_url))

        #         content = storage.get_file(source_url)
        #         logger.debug('content collected')
        #         storage.put_file(dest_url, content)
        #         logger.debug('put successfully')

        #     logger.debug('put file successfully')
        #     pattern = "*_values"
        #     self.copy_files_with_pattern(fsys, os.path.join(self.output_dir, Node_info.dir),
        #         self.new_input_node_dir, pattern,
        #         output_storage_settings)

        #     pattern = "*_template"
        #     self.copy_files_with_pattern(fsys, os.path.join(self.output_dir, Node_info.dir),
        #         self.new_input_node_dir, pattern,
        #         output_storage_settings)

        #     # NB: Converge stage triggers based on criterion value from audit.

        #     info = "Run %s preserved (error %s)\n" % (Node_info.number, Node_info.criterion)
        #     audit_url = get_url_with_credentials(
        #         output_storage_settings,
        #             output_prefix + os.path.join(self.new_input_node_dir, 'audit.txt'), is_relative_path=False)
        #     storage.put_file(audit_url, info)
        #     logger.debug("audit=%s" % info)
        #     self.audit += info

        #     # move xyz_final.xyz to initial.xyz
        #     source_url = get_url_with_credentials(
        #         output_storage_settings,
        #         output_prefix + os.path.join(self.output_dir, Node_info.dir, "xyz_final.xyz"), is_relative_path=False)
        #     dest_url = get_url_with_credentials(
        #         output_storage_settings,
        #         output_prefix + os.path.join(self.new_input_node_dir, 'input_initial.xyz'), is_relative_path=False)
        #     content = storage.get_file(source_url)
        #     storage.put_file(dest_url, content)
        #     self.audit += "spawning diamond runs\n"

        # audit_url = get_url_with_credentials(
        #     output_storage_settings,
        #                 output_prefix + os.path.join(self.new_input_dir, 'audit.txt'), is_relative_path=False)
        # storage.put_file(audit_url, self.audit)

        # curate dataset into mytardis
        try:
            curate_data = getval(run_settings,
                                 '%s/input/mytardis/curate_data' % RMIT_SCHEMA)
        except SettingNotFoundException:
            curate_data = 0
        if curate_data:

            mytardis_url = getval(
                run_settings,
                '%s/input/mytardis/mytardis_platform' % RMIT_SCHEMA)
            mytardis_settings = manage.get_platform_settings(
                mytardis_url, bdp_username)

            all_settings = dict(mytardis_settings)
            all_settings.update(output_storage_settings)

            self.experiment_id = self.curate_dataset(run_settings,
                                                     self.experiment_id,
                                                     self.job_dir, output_url,
                                                     all_settings)
        else:
            logger.warn('Data curation is off')
Ejemplo n.º 9
0
    def process(self, run_settings):
        try:
            id = int(
                getval(run_settings,
                       '%s/system/id' % django_settings.SCHEMA_PREFIX))
        except (SettingNotFoundException, ValueError):
            id = 0
        messages.info(run_settings, '%d: transforming' % (id + 1))

        # self.contextid = getval(run_settings, '%s/system/contextid' % django_settings.SCHEMA_PREFIX)
        bdp_username = getval(
            run_settings,
            '%s/bdp_userprofile/username' % django_settings.SCHEMA_PREFIX)

        output_storage_url = getval(
            run_settings, '%s/platform/storage/output/platform_url' %
            django_settings.SCHEMA_PREFIX)
        output_storage_settings = manage.get_platform_settings(
            output_storage_url, bdp_username)
        logger.debug("output_storage_settings=%s" % output_storage_settings)
        output_prefix = '%s://%s@' % (output_storage_settings['scheme'],
                                      output_storage_settings['type'])
        offset = getval(
            run_settings, '%s/platform/storage/output/offset' %
            django_settings.SCHEMA_PREFIX)
        self.job_dir = manage.get_job_dir(output_storage_settings, offset)

        try:
            self.id = int(
                getval(run_settings,
                       '%s/system/id' % django_settings.SCHEMA_PREFIX))
            self.output_dir = os.path.join(
                os.path.join(self.job_dir, "output_%s" % self.id))
            self.input_dir = os.path.join(
                os.path.join(self.job_dir, "input_%d" % self.id))
            self.new_input_dir = os.path.join(
                os.path.join(self.job_dir, "input_%d" % (self.id + 1)))
        except (SettingNotFoundException, ValueError):
            # FIXME: Not clear that this a valid path through stages
            self.output_dir = os.path.join(os.path.join(
                self.job_dir, "output"))
            self.output_dir = os.path.join(os.path.join(self.job_dir, "input"))
            self.new_input_dir = os.path.join(
                os.path.join(self.job_dir, "input_1"))

        logger.debug('self.output_dir=%s' % self.output_dir)

        try:
            self.experiment_id = int(
                getval(
                    run_settings, '%s/input/mytardis/experiment_id' %
                    django_settings.SCHEMA_PREFIX))
        except SettingNotFoundException:
            self.experiment_id = 0
        except ValueError:
            self.experiment_id = 0

        output_url = get_url_with_credentials(output_storage_settings,
                                              output_prefix + self.output_dir,
                                              is_relative_path=False)

        # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(output_url)
        # fsys = storage.get_filesystem(output_url)

        # node_output_dirs, _ = fsys.listdir(mypath)
        # logger.debug("node_output_dirs=%s" % node_output_dirs)

        outputs = self.process_outputs(run_settings, self.job_dir, output_url,
                                       output_storage_settings, offset)
        try:
            curate_data = getval(
                run_settings, '%s/input/mytardis/curate_data' %
                django_settings.SCHEMA_PREFIX)
        except SettingNotFoundException:
            curate_data = 0
        if curate_data:

            mytardis_url = getval(
                run_settings, '%s/input/mytardis/mytardis_platform' %
                django_settings.SCHEMA_PREFIX)
            mytardis_settings = manage.get_platform_settings(
                mytardis_url, bdp_username)

            all_settings = dict(mytardis_settings)
            all_settings.update(output_storage_settings)
            all_settings['contextid'] = getval(
                run_settings,
                '%s/system/contextid' % django_settings.SCHEMA_PREFIX)

            try:
                mytardis_platform = jobs.safe_import(
                    'chiminey.platform.mytardis.MyTardisPlatform', [], {})
                logger.debug('self_outpus=%s' % outputs)
                self.experiment_id = mytardis_platform.create_dataset_for_intermediate_output(
                    run_settings,
                    self.experiment_id,
                    self.job_dir,
                    output_url,
                    all_settings,
                    outputs=outputs)
            except ImproperlyConfigured as e:
                logger.error("Cannot load mytardis platform hook %s" % e)

        else:
            logger.warn('Data curation is off')
Ejemplo n.º 10
0
    def process(self, run_settings):

        # self.contextid = getval(run_settings, '%s/system/contextid' % RMIT_SCHEMA)
        bdp_username = getval(run_settings, '%s/bdp_userprofile/username' % RMIT_SCHEMA)

        output_storage_url = getval(run_settings, '%s/platform/storage/output/platform_url' % RMIT_SCHEMA)
        output_storage_settings = manage.get_platform_settings(output_storage_url, bdp_username)
        logger.debug("output_storage_settings=%s" % output_storage_settings)
        output_prefix = '%s://%s@' % (output_storage_settings['scheme'],
                                    output_storage_settings['type'])
        offset = getval(run_settings, '%s/platform/storage/output/offset' % RMIT_SCHEMA)
        self.job_dir = manage.get_job_dir(output_storage_settings, offset)

        try:
            self.id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA))
            self.output_dir = os.path.join(os.path.join(self.job_dir, "output_%s" % self.id))
            self.input_dir = os.path.join(os.path.join(self.job_dir, "input_%d" % self.id))
            self.new_input_dir = os.path.join(os.path.join(self.job_dir, "input_%d" % (self.id + 1)))
        except (SettingNotFoundException, ValueError):
            # FIXME: Not clear that this a valid path through stages
            self.output_dir = os.path.join(os.path.join(self.job_dir, "output"))
            self.output_dir = os.path.join(os.path.join(self.job_dir, "input"))
            self.new_input_dir = os.path.join(os.path.join(self.job_dir, "input_1"))

        logger.debug('self.output_dir=%s' % self.output_dir)

        try:
            self.experiment_id = int(getval(run_settings, '%s/input/mytardis/experiment_id' % RMIT_SCHEMA))
        except SettingNotFoundException:
            self.experiment_id = 0
        except ValueError:
            self.experiment_id = 0

        output_url = get_url_with_credentials(
            output_storage_settings,
            output_prefix + self.output_dir, is_relative_path=False)

        # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(output_url)
        # fsys = storage.get_filesystem(output_url)

        # node_output_dirs, _ = fsys.listdir(mypath)
        # logger.debug("node_output_dirs=%s" % node_output_dirs)

        self.process_outputs(run_settings, self.job_dir, output_url, output_storage_settings, offset)


        # logger.debug("output_url=%s" % output_url)
        # # Should this be output_dir or root of remotesys?
        # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(output_url)
        # fsys = storage.get_filesystem(output_url)
        # logger.debug("fsys=%s" % fsys)
        # logger.debug("mypath=%s" % mypath)

        # node_output_dirs, _ = fsys.listdir(mypath)
        # logger.debug("node_output_dirs=%s" % node_output_dirs)
        # self.audit = ""
        # outputs = []

        # Node_info = namedtuple('Node_info',
        #     ['dir', 'index', 'number', 'criterion'])

        # # gather node_infos
        # for node_output_dir in node_output_dirs:
        #     base_fname = "HRMC.inp"
        #     try:
        #         values_url = get_url_with_credentials(
        #             output_storage_settings,
        #             output_prefix + os.path.join(self.output_dir, node_output_dir,
        #             '%s_values' % base_fname), is_relative_path=False)
        #         values_content = storage.get_file(values_url)
        #         logger.debug("values_file=%s" % values_url)
        #     except IOError:
        #         logger.warn("no values file found")
        #         values_map = {}
        #     else:
        #         values_map = dict(json.loads(values_content))
        #     criterion = self.compute_psd_criterion(
        #         node_output_dir, fsys,
        #         output_storage_settings)
        #     #criterion = self.compute_hrmc_criterion(values_map['run_counter'], node_output_dir, fs,)
        #     logger.debug("criterion=%s" % criterion)
        #     index = 0   # FIXME: as node_output_dirs in particular order, then index is not useful.
        #     outputs.append(Node_info(dir=node_output_dir,
        #         index=index, number=values_map['run_counter'], criterion=criterion))

        # outputs.sort(key=lambda x: int(x.criterion))
        # logger.debug("outputs=%s" % outputs)

        # logger.debug('threshold=%s' % self.threshold)
        # total_picks = 1
        # if len(self.threshold) > 1:
        #     for i in self.threshold:
        #         total_picks *= self.threshold[i]
        # else:
        #     total_picks = self.threshold[0]

        # if not outputs:
        #     logger.error("no ouput found for this iteration")
        #     return

        # for index in range(0, total_picks):
        #     Node_info = outputs[index]
        #     logger.debug("node_info.dir=%s" % Node_info.dir)
        #     logger.debug("Node_info=%s" % str(Node_info))
        #     self.new_input_node_dir = os.path.join(self.new_input_dir,
        #         Node_info.dir)
        #     logger.debug("New input node dir %s" % self.new_input_node_dir)

        #     # Move all existing domain input files unchanged to next input directory
        #     for f in self.DOMAIN_INPUT_FILES:
        #         source_url = get_url_with_credentials(
        #             output_storage_settings,
        #             output_prefix + os.path.join(self.output_dir, Node_info.dir, f), is_relative_path=False)
        #         dest_url = get_url_with_credentials(
        #             output_storage_settings,
        #             output_prefix + os.path.join(self.new_input_node_dir, f),
        #             is_relative_path=False)
        #         logger.debug('source_url=%s, dest_url=%s' % (source_url, dest_url))

        #         content = storage.get_file(source_url)
        #         logger.debug('content collected')
        #         storage.put_file(dest_url, content)
        #         logger.debug('put successfully')

        #     logger.debug('put file successfully')
        #     pattern = "*_values"
        #     self.copy_files_with_pattern(fsys, os.path.join(self.output_dir, Node_info.dir),
        #         self.new_input_node_dir, pattern,
        #         output_storage_settings)

        #     pattern = "*_template"
        #     self.copy_files_with_pattern(fsys, os.path.join(self.output_dir, Node_info.dir),
        #         self.new_input_node_dir, pattern,
        #         output_storage_settings)

        #     # NB: Converge stage triggers based on criterion value from audit.

        #     info = "Run %s preserved (error %s)\n" % (Node_info.number, Node_info.criterion)
        #     audit_url = get_url_with_credentials(
        #         output_storage_settings,
        #             output_prefix + os.path.join(self.new_input_node_dir, 'audit.txt'), is_relative_path=False)
        #     storage.put_file(audit_url, info)
        #     logger.debug("audit=%s" % info)
        #     self.audit += info

        #     # move xyz_final.xyz to initial.xyz
        #     source_url = get_url_with_credentials(
        #         output_storage_settings,
        #         output_prefix + os.path.join(self.output_dir, Node_info.dir, "xyz_final.xyz"), is_relative_path=False)
        #     dest_url = get_url_with_credentials(
        #         output_storage_settings,
        #         output_prefix + os.path.join(self.new_input_node_dir, 'input_initial.xyz'), is_relative_path=False)
        #     content = storage.get_file(source_url)
        #     storage.put_file(dest_url, content)
        #     self.audit += "spawning diamond runs\n"

        # audit_url = get_url_with_credentials(
        #     output_storage_settings,
        #                 output_prefix + os.path.join(self.new_input_dir, 'audit.txt'), is_relative_path=False)
        # storage.put_file(audit_url, self.audit)

        # curate dataset into mytardis
        try:
            curate_data = getval(run_settings, '%s/input/mytardis/curate_data' % RMIT_SCHEMA)
        except SettingNotFoundException:
            curate_data = 0
        if curate_data:

            mytardis_url = getval(run_settings, '%s/input/mytardis/mytardis_platform' % RMIT_SCHEMA)
            mytardis_settings = manage.get_platform_settings(mytardis_url, bdp_username)

            all_settings = dict(mytardis_settings)
            all_settings.update(output_storage_settings)

            self.experiment_id = self.curate_dataset(run_settings, self.experiment_id,
                                                     self.job_dir, output_url,
                                                     all_settings)
        else:
            logger.warn('Data curation is off')