コード例 #1
0
def _upload_variations_inputs(settings, source_url_initial, values_map):
    bdp_username = settings['bdp_username']
    logger.debug("source_url_initial=%s" % source_url_initial)
    encoded_s_url = storage.get_url_with_credentials(settings, source_url_initial)
    logger.debug("encoded_s_url=%s" % encoded_s_url)

    dest_url = _get_dest_bdp_url(settings)

    computation_platform_url = settings['comp_platform_url']
    bdp_username = settings['bdp_username']
    comp_pltf_settings = manage.get_platform_settings(
        computation_platform_url,
        bdp_username)
    settings.update(comp_pltf_settings)

    encoded_d_url = storage.get_url_with_credentials(settings,
        dest_url, is_relative_path=True, ip_address=settings['host'])

    storage.copy_directories(encoded_s_url, encoded_d_url)

    for content_fname, content in _instantiate_context(
            source_url_initial,
            settings,
            values_map).items():

        content_url = storage.get_url_with_credentials(
            settings,
            os.path.join(dest_url, content_fname),
            is_relative_path=True, ip_address=settings['host'])
        logger.debug("content_url=%s" % content_url)
        storage.put_file(content_url, content.encode('utf-8'))

    _save_values(settings, dest_url, values_map)

    logger.debug("done input upload")
コード例 #2
0
        def copy_files_with_pattern(iter_out_fsys, source_path, dest_path,
                                    pattern, all_settings):
            """
            """
            output_prefix = '%s://%s@' % (all_settings['scheme'],
                                          all_settings['type'])

            logger.debug('source_path=%s, dest_path=%s' %
                         (source_path, dest_path))
            # (scheme, host, iter_output_path, location, query_settings) = storage.parse_bdpurl(source_path)
            _, node_output_fnames = iter_out_fsys.listdir(source_path)
            ip_address = all_settings['ip_address']
            for f in node_output_fnames:
                if fnmatch.fnmatch(f, pattern):
                    source_url = get_url_with_credentials(
                        all_settings,
                        output_prefix +
                        os.path.join(ip_address, source_path, f),
                        is_relative_path=False)
                    dest_url = get_url_with_credentials(
                        all_settings,
                        output_prefix + os.path.join(ip_address, dest_path, f),
                        is_relative_path=False)
                    logger.debug('source_url=%s, dest_url=%s' %
                                 (source_url, dest_url))
                    content = storage.get_file(source_url)
                    storage.put_file(dest_url, content)
コード例 #3
0
def put_proc_ids(relative_path, ids, ip, settings):
    relative_path = os.path.join(relative_path, settings['filename_for_PIDs'])
    logger.debug('put_proc_ids=%s' % relative_path)
    destination = get_url_with_credentials(settings,
                                           relative_path,
                                           is_relative_path=True,
                                           ip_address=ip)
    logger.debug('destination=%s' % destination)
    ids_str = []
    [ids_str.append(str(i)) for i in ids]
    proc_ids = ("\n".join(ids_str)) + "\n"
    logger.debug('ids_str=%s' % ids_str)
    logger.debug('proc_ids=%s' % proc_ids)
    logger.debug('encoded=%s' % proc_ids.encode('utf-8'))
    put_file(destination, proc_ids.encode('utf-8'))
コード例 #4
0
def put_proc_ids(relative_path, ids, ip, settings):
    relative_path = os.path.join(relative_path,
                                 settings['filename_for_PIDs'])
    logger.debug('put_proc_ids=%s' % relative_path)
    destination = get_url_with_credentials(settings,
        relative_path,
        is_relative_path=True,
        ip_address=ip)
    logger.debug('destination=%s' % destination)
    ids_str = []
    [ids_str.append(str(i)) for i in ids]
    proc_ids = ("\n".join(ids_str)) + "\n"
    logger.debug('ids_str=%s' % ids_str)
    logger.debug('proc_ids=%s' % proc_ids)
    logger.debug('encoded=%s' % proc_ids.encode('utf-8'))
    put_file(destination, proc_ids.encode('utf-8'))
コード例 #5
0
        def copy_files_with_pattern(iter_out_fsys, source_path,
                                 dest_path, pattern, all_settings):
            """
            """
            output_prefix = '%s://%s@' % (all_settings['scheme'],
                                    all_settings['type'])

            logger.debug('source_path=%s, dest_path=%s' % (source_path, dest_path))
            # (scheme, host, iter_output_path, location, query_settings) = storage.parse_bdpurl(source_path)
            _, node_output_fnames = iter_out_fsys.listdir(source_path)
            ip_address = all_settings['ip_address']
            for f in node_output_fnames:
                if fnmatch.fnmatch(f, pattern):
                    source_url = get_url_with_credentials(all_settings, output_prefix + os.path.join(ip_address, source_path, f), is_relative_path=False)
                    dest_url = get_url_with_credentials(all_settings, output_prefix + os.path.join(ip_address, dest_path, f), is_relative_path=False)
                    logger.debug('source_url=%s, dest_url=%s' % (source_url, dest_url))
                    content = storage.get_file(source_url)
                    storage.put_file(dest_url, content)
コード例 #6
0
ファイル: execute.py プロジェクト: Libbum/chiminey
                def put_dest_file(proc, fname,
                                  dest_file_location, resched_file_location,
                                 content):
                    dest_url = get_url_with_credentials(
                            computation_platform_settings, os.path.join(dest_file_location, fname),
                            is_relative_path=True, ip_address=proc['ip_address'])
                    logger.debug("writing to =%s" % dest_url)
                    #logger.debug("content=%s" % content)
                    storage.put_file(dest_url, content)
                    if self.reschedule_failed_procs:
                        logger.debug("resched=%s" % resched_file_location)
                        logger.debug("fname=%s" % fname)
                        logger.debug("output_storage_settings=%s" % output_storage_settings)

                        logger.debug("here")
                        test = "%s/%s" % (resched_file_location, fname)
                        logger.debug("test=%s" % test)
                        resched_url = get_url_with_credentials(
                                output_storage_settings, test)
                        logger.debug("writing backup to %s" % resched_url)
                        storage.put_file(resched_url, content)
                    logger.debug("done")
コード例 #7
0
                def put_dest_file(proc, fname, dest_file_location,
                                  resched_file_location, content):
                    dest_url = get_url_with_credentials(
                        computation_platform_settings,
                        os.path.join(dest_file_location, fname),
                        is_relative_path=True,
                        ip_address=proc['ip_address'])
                    logger.debug("writing to =%s" % dest_url)
                    #logger.debug("content=%s" % content)
                    storage.put_file(dest_url, content)
                    if self.reschedule_failed_procs:
                        logger.debug("resched=%s" % resched_file_location)
                        logger.debug("fname=%s" % fname)
                        logger.debug("output_storage_settings=%s" %
                                     output_storage_settings)

                        logger.debug("here")
                        test = "%s/%s" % (resched_file_location, fname)
                        logger.debug("test=%s" % test)
                        resched_url = get_url_with_credentials(
                            output_storage_settings, test)
                        logger.debug("writing backup to %s" % resched_url)
                        storage.put_file(resched_url, content)
                    logger.debug("done")
コード例 #8
0
        logger.debug("values_source_url=%s" % values_source_url)

        values_dest_url = get_url_with_credentials(
            output_storage_settings,
            output_prefix +
            os.path.join(self.job_dir, self.output_dir, process_id,
                         django_settings.VALUES_FNAME),
            is_relative_path=False)
        logger.debug("values_dest_url=%s" % values_dest_url)
        try:
            logger.debug('reading %s' % values_source_url)
            content = storage.get_file(values_source_url)
        except IOError, e:
            content = {}
        logger.debug('content=%s' % content)
        storage.put_file(values_dest_url, content)

    def process(self, run_settings):
        """
            Check all registered nodes to find whether
            they are running, stopped or in error_nodes
        """

        local_settings = getvals(run_settings,
                                 models.UserProfile.PROFILE_SCHEMA_NS)
        # local_settings = run_settings[models.UserProfile.PROFILE_SCHEMA_NS]
        retrieve_local_settings(run_settings, local_settings)
        logger.debug("local_settings=%s" % local_settings)

        self.contextid = getval(
            run_settings,
コード例 #9
0
ファイル: execute.py プロジェクト: chiminey/chiminey
    def _upload_input_dir_variations(self, processes, local_settings,
                                     computation_platform_settings,
                                     output_storage_settings,
                                     mytardis_settings,
                                     input_dir, run_settings):
        output_prefix = '%s://%s@' % (output_storage_settings['scheme'],
                                      output_storage_settings['type'])
        input_url_with_credentials = get_url_with_credentials(
            output_storage_settings, output_prefix + os.path.join(
                self.iter_inputdir, input_dir),
            is_relative_path=False)
        logger.debug('input_url_with_credentials=%s' %
                     input_url_with_credentials)
        if local_settings['curate_data']:

            try:
                mytardis_platform = jobs.safe_import('chiminey.platform.mytardis.MyTardisPlatform', [], {})
                self.experiment_id = mytardis_platform.create_dataset_for_input(self.experiment_id,
                                                      run_settings, local_settings,
                                                      output_storage_settings,
                                                      mytardis_settings,
                                                      input_url_with_credentials)
            except ImproperlyConfigured as  e:
                logger.error("Cannot load mytardis platform hook %s" % e)

        else:
            logger.warn('Data curation is off')

        # get run Map
        parent_stage = self.import_parent_stage(run_settings)
        run_map, self.rand_index = parent_stage.get_internal_sweep_map(local_settings,
                                                                       run_settings=run_settings)

        # load value_map
        values_url_with_pkey = get_url_with_credentials(
            output_storage_settings,
            output_prefix + os.path.join(self.iter_inputdir,
                                         input_dir,
                                         self.VALUES_FNAME),
            is_relative_path=False)
        logger.debug("initial values_file=%s" % values_url_with_pkey)
        values = {}
        try:
            values_content = storage.get_file(values_url_with_pkey)
        except IOError:
            logger.warn("no values file found")
        else:
            logger.debug("values_content = %s" % values_content)
            values = dict(json.loads(values_content))
        logger.debug("values=%s" % values)

        # generates a set of variations for the template fname
        logger.debug('self.initial_numbfile = %s ' % self.initial_numbfile)
        contexts = self._get_variation_contexts(
            [run_map], values,  self.initial_numbfile)
        self.initial_numbfile += len(contexts)
        logger.debug('contexts = %s ' % contexts)
        logger.debug('self.initial_numbfile = %s ' % self.initial_numbfile)

        # for each context, copy each file to dest and any
        # templates to be instantiated, then store in values.

        template_pat = re.compile("(.*)_template")
        relative_path_suffix = self.get_relative_output_path(local_settings)

        for context in contexts:
            logger.debug("context=%s" % context)
            # get list of all files in input_dir
            fname_url_with_pkey = get_url_with_credentials(
                output_storage_settings,
                output_prefix + os.path.join(self.iter_inputdir, input_dir),
                is_relative_path=False)
            input_files = storage.list_dirs(fname_url_with_pkey,
                                            list_files=True)

            # get process information
            run_counter = context['run_counter']
            logger.debug("run_counter=%s" % run_counter)
            proc = None
            for p in processes:
                # TODO: how to handle invalid run_counter
                pid = int(p['id'])
                logger.debug("pid=%s" % pid)
                if pid == run_counter:
                    proc = p
                    break
            else:
                logger.error("no process found matching run_counter")
                raise BadInputException()
            logger.debug("proc=%s" % pformat(proc))

            for fname in input_files:
                logger.debug("fname=%s" % fname)
                templ_mat = template_pat.match(fname)
                fname_url_with_credentials = storage.get_url_with_credentials(
                    output_storage_settings,
                    output_prefix +
                    os.path.join(self.iter_inputdir, input_dir, fname),
                    is_relative_path=False)
                logger.debug("fname_url_with_credentials=%s" %
                             fname_url_with_credentials)

                def put_dest_file(proc, fname,
                                  dest_file_location, resched_file_location,
                                  content):
                    dest_url = get_url_with_credentials(
                        computation_platform_settings, os.path.join(
                            dest_file_location, fname),
                        is_relative_path=True, ip_address=proc['ip_address'])
                    logger.debug("writing to =%s" % dest_url)
                    #logger.debug("content=%s" % content)
                    storage.put_file(dest_url, content)
                    if self.reschedule_failed_procs:
                        logger.debug("resched=%s" % resched_file_location)
                        logger.debug("fname=%s" % fname)
                        logger.debug("output_storage_settings=%s" %
                                     output_storage_settings)

                        logger.debug("here")
                        test = "%s/%s" % (resched_file_location, fname)
                        logger.debug("test=%s" % test)
                        resched_url = get_url_with_credentials(
                            output_storage_settings, test)
                        logger.debug("writing backup to %s" % resched_url)
                        storage.put_file(resched_url, content)
                    logger.debug("done")

                outputs = []
                if templ_mat:
                    base_fname = templ_mat.group(1)
                    template_content = storage.get_file(
                        fname_url_with_credentials)
                    try:
                        templ = Template(template_content)
                    except TemplateSyntaxError, e:
                        logger.error(e)
                        # FIXME: should detect this during submission of job,
                        # as no sensible way to recover here.
                        # TODO: signal error conditions in job status
                        continue
                    new_context = Context(context)
                    logger.debug("new_content=%s" % new_context)
                    render_output = templ.render(new_context)
                    render_output = render_output.encode('utf-8')
                    outputs.append((base_fname, render_output))
                    outputs.append((fname, template_content))

                else:
                    content = storage.get_file(fname_url_with_credentials)
                    outputs.append((fname, content))

                for (new_fname, content) in outputs:
                    dest_file_location = computation_platform_settings['type']\
                        + "@" + os.path.join(relative_path_suffix,
                                             proc['id'],
                                             local_settings['smart_connector_input'])
                    logger.debug("dest_file_location =%s" % dest_file_location)
                    resched_file_location = "%s%s" % (output_prefix, os.path.join(
                        self.job_dir, "input_backup", proc['id']))

                    logger.debug("resched_file_location=%s" %
                                 resched_file_location)
                    put_dest_file(proc, new_fname, dest_file_location,
                                  resched_file_location, content)

            # then copy context new values file
            logger.debug("writing values file")
            values_dest_location = computation_platform_settings['type']\
                + "@" + os.path.join(relative_path_suffix,
                                     proc['id'],
                                     local_settings['smart_connector_input'],
                                     self.VALUES_FNAME)
            logger.debug("values_dest_location =%s" % values_dest_location)

            values_dest_url = get_url_with_credentials(
                computation_platform_settings, values_dest_location,
                is_relative_path=True, ip_address=proc['ip_address'])

            storage.put_file(values_dest_url, json.dumps(context, indent=4))
コード例 #10
0
    def compute_psd_criterion(self, all_settings, node_path):
        import math
        import os
        #globalFileSystem = fs.get_global_filesystem()
        # psd = os.path.join(globalFileSystem,
        #                    self.output_dir, node_output_dir,
        #                    "PSD_output/psd.dat")
        #Fixme replace all reference to files by parameters, e.g PSDCode
        output_prefix = '%s://%s@' % (all_settings['scheme'],
                                      all_settings['type'])
        logger.debug('output_prefix=%s' % output_prefix)
        logger.debug('node_path=%s' % node_path)

        logger.debug('compute psd---')
        psd_url = get_url_with_credentials(all_settings,
                                           os.path.join(
                                               node_path, "PSD_output",
                                               "psd.dat"),
                                           is_relative_path=False)
        logger.debug('psd_url=%s' % psd_url)

        psd = storage.get_filep(psd_url)
        logger.debug('psd=%s' % psd._name)

        # psd_exp = os.path.join(globalFileSystem,
        #                        self.output_dir, node_output_dir,
        #                        "PSD_output/PSD_exp.dat")
        psd_url = get_url_with_credentials(all_settings,
                                           os.path.join(
                                               node_path, "PSD_output",
                                               "PSD_exp.dat"),
                                           is_relative_path=False)
        logger.debug('psd_url=%s' % psd_url)
        psd_exp = storage.get_filep(psd_url)
        logger.debug('psd_exp=%s' % psd_exp._name)

        logger.debug("PSD %s %s " % (psd._name, psd_exp._name))
        x_axis = []
        y1_axis = []
        for line in psd:
            column = line.split()
            #logger.debug(column)
            if len(column) > 0:
                x_axis.append(float(column[0]))
                y1_axis.append(float(column[1]))
        logger.debug("x_axis \n %s" % x_axis)
        logger.debug("y1_axis \n %s" % y1_axis)

        y2_axis = []
        for line in psd_exp:
            column = line.split()
            #logger.debug(column)
            if len(column) > 0:
                y2_axis.append(float(column[1]))

        for i in range(len(x_axis) - len(y2_axis)):
            y2_axis.append(0)
        logger.debug("y2_axis \n %s" % y2_axis)

        criterion = 0
        for i in range(len(y1_axis)):
            criterion += math.pow((y1_axis[i] - y2_axis[i]), 2)
        logger.debug("Criterion %f" % criterion)

        criterion_url = get_url_with_credentials(all_settings,
                                                 os.path.join(
                                                     node_path, "PSD_output",
                                                     "criterion.txt"),
                                                 is_relative_path=False)
        storage.put_file(criterion_url, str(criterion))

        return criterion
コード例 #11
0
    def process_outputs(self, run_settings, base_dir, output_url, all_settings, offset):

        # output_dir = 118.138.241.232/outptuersdfsd/sweep277/hrmc278/output_1
        # output_prefix = ssh://unix@
        # node_output_dir = 2

        output_prefix = '%s://%s@' % (all_settings['scheme'],
                                    all_settings['type'])

        id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA))
        iter_output_dir = os.path.join(os.path.join(base_dir, "output_%s" % id))
        logger.debug('iter_output_dir=%s' % iter_output_dir)
        output_prefix = '%s://%s@' % (all_settings['scheme'],
                                    all_settings['type'])
        logger.debug('output_prefix=%s' % output_prefix)
        #iter_output_dir = "%s%s" % (output_prefix, iter_output_dir)
        logger.debug('output_url=%s' % output_url)
        (scheme, host, iter_output_path, location, query_settings) = storage.parse_bdpurl(output_url)
        logger.debug("iter_output_path=%s" % iter_output_path)
        iter_out_fsys = storage.get_filesystem(output_url)
        logger.debug("iter_out_fsys=%s" % iter_out_fsys)
        node_output_dirnames, _ = iter_out_fsys.listdir(iter_output_path)
        logger.debug('node_output_dirnames=%s' % node_output_dirnames)
        self.audit = ""

        Node_info = namedtuple('Node_info',
            ['dirname', 'number', 'criterion'])

        BASE_FNAME = "HRMC.inp"

        # generate criterias
        self.outputs = []
        for node_output_dirname in node_output_dirnames:
            node_path = output_prefix + os.path.join(iter_output_dir, node_output_dirname)
            criterion = self.compute_psd_criterion(all_settings, node_path)
            #criterion = self.compute_hrmc_criterion(values_map['run_counter'], node_output_dirname, fs,)
            logger.debug("criterion=%s" % criterion)

            try:
                values_url = get_url_with_credentials(
                    all_settings, os.path.join(node_path,
                    '%s_values' % BASE_FNAME), is_relative_path=False)

                values_content = storage.get_file(values_url)

                logger.debug("values_file=%s" % values_url)
            except IOError:
                logger.warn("no values file found")
                values_map = {}
            else:
                values_map = dict(json.loads(values_content))

            self.outputs.append(Node_info(dirname=node_output_dirname,
                           number=values_map['run_counter'], criterion=criterion))

        if not self.outputs:
            logger.error("no ouput found for this iteration")
            return

        self.outputs.sort(key=lambda x: int(x.criterion))
        logger.debug("self.outputs=%s" % self.outputs)

        try:
            # FIXME: need to validate this output to make sure list of int
            threshold = ast.literal_eval(getval(run_settings, '%s/input/hrmc/threshold' % RMIT_SCHEMA))
        except (SettingNotFoundException, ValueError):
            logger.warn("no threshold found when expected")
            return False
        logger.debug("threshold = %s" % threshold)
        total_picks = 1
        if len(threshold) > 1:
            for i in threshold:
                total_picks *= threshold[i]
        else:
            total_picks = threshold[0]

        def copy_files_with_pattern(iter_out_fsys, source_path,
                                 dest_path, pattern, all_settings):
            """
            """
            output_prefix = '%s://%s@' % (all_settings['scheme'],
                                    all_settings['type'])

            logger.debug('source_path=%s, dest_path=%s' % (source_path, dest_path))
            # (scheme, host, iter_output_path, location, query_settings) = storage.parse_bdpurl(source_path)
            _, node_output_fnames = iter_out_fsys.listdir(source_path)
            ip_address = all_settings['ip_address']
            for f in node_output_fnames:
                if fnmatch.fnmatch(f, pattern):
                    source_url = get_url_with_credentials(all_settings, output_prefix + os.path.join(ip_address, source_path, f), is_relative_path=False)
                    dest_url = get_url_with_credentials(all_settings, output_prefix + os.path.join(ip_address, dest_path, f), is_relative_path=False)
                    logger.debug('source_url=%s, dest_url=%s' % (source_url, dest_url))
                    content = storage.get_file(source_url)
                    storage.put_file(dest_url, content)

        # Make new input dirs
        new_input_dir = os.path.join(os.path.join(base_dir, "input_%d" % (id + 1)))
        for index in range(0, total_picks):
            Node_info = self.outputs[index]
            logger.debug("node_info.dirname=%s" % Node_info.dirname)
            logger.debug("Node_info=%s" % str(Node_info))

            new_input_path = os.path.join(new_input_dir,
                Node_info.dirname)
            logger.debug("New input node dir %s" % new_input_path)

            old_output_path = os.path.join(iter_output_dir, Node_info.dirname)

            # Move all existing domain input files unchanged to next input directory
            for f in DOMAIN_INPUT_FILES:
                source_url = get_url_with_credentials(
                    all_settings, output_prefix + os.path.join(old_output_path, f), is_relative_path=False)
                dest_url = get_url_with_credentials(
                    all_settings, output_prefix + os.path.join(new_input_path, f),
                    is_relative_path=False)
                logger.debug('source_url=%s, dest_url=%s' % (source_url, dest_url))

                content = storage.get_file(source_url)
                logger.debug('content collected')
                storage.put_file(dest_url, content)
                logger.debug('put successfully')

            logger.debug('put file successfully')
            pattern = "*_values"
            output_offset = os.path.join(os.path.join(offset, "output_%s" % id, Node_info.dirname))
            input_offset = os.path.join(os.path.join(offset, "input_%s" % (id + 1), Node_info.dirname))
            copy_files_with_pattern(iter_out_fsys,
                output_offset,
                input_offset, pattern,
                all_settings)

            pattern = "*_template"
            copy_files_with_pattern(iter_out_fsys,
                output_offset,
                input_offset, pattern,
                all_settings)

            # NB: Converge stage triggers based on criterion value from audit.
            logger.debug('starting audit')
            info = "Run %s preserved (error %s)\n" % (Node_info.number, Node_info.criterion)
            audit_url = get_url_with_credentials(
                all_settings, output_prefix +
                os.path.join(new_input_path, 'audit.txt'), is_relative_path=False)
            storage.put_file(audit_url, info)
            logger.debug("audit=%s" % info)
            logger.debug('1:audit_url=%s' % audit_url)
            self.audit += info

            # move xyz_final.xyz to initial.xyz
            source_url = get_url_with_credentials(
                all_settings, output_prefix + os.path.join(old_output_path, "xyz_final.xyz"), is_relative_path=False)
            logger.debug('source_url=%s' % source_url)
            dest_url = get_url_with_credentials(
                all_settings, output_prefix + os.path.join(new_input_path, 'input_initial.xyz'), is_relative_path=False)
            logger.debug('dest_url=%s' % dest_url)
            content = storage.get_file(source_url)
            logger.debug('content=%s' % content)
            storage.put_file(dest_url, content)
            self.audit += "spawning diamond runs\n"

        logger.debug("input_dir=%s" % (output_prefix + os.path.join(new_input_dir, 'audit.txt')))
        audit_url = get_url_with_credentials(
            all_settings, output_prefix + os.path.join(new_input_dir, 'audit.txt'), is_relative_path=False)
        logger.debug('audit_url=%s' % audit_url)
        storage.put_file(audit_url, self.audit)
コード例 #12
0
ファイル: execute.py プロジェクト: steveandroulakis/chiminey
    def upload_variation_inputs(self, run_settings, local_settings, variations, processes,
                                 input_dir, output_storage_settings,
                                 computation_platform_settings, mytardis_settings):
        '''
        Create input packages for each variation and upload the vms
        '''
        logger.debug("upload_variation_inputs")
        output_prefix = '%s://%s@' % (output_storage_settings['scheme'],
                                    output_storage_settings['type'])
        source_files_url = get_url_with_credentials(
            output_storage_settings, output_prefix + os.path.join(
                self.iter_inputdir, input_dir),
            is_relative_path=False)

        logger.debug('source_files_url=%s' % source_files_url)
        # Copy input directory to mytardis only after saving locally, so if
        # something goes wrong we still have the results
        if local_settings['curate_data']:
            self.experiment_id = self.curate_data(self.experiment_id, local_settings, output_storage_settings,
                             mytardis_settings, source_files_url)
        else:
            logger.warn('Data curation is off')
        #proc_ind = 0
        for var_fname in variations.keys():
            logger.debug("var_fname=%s" % var_fname)
            logger.debug('variations[var_fname]=%s' % variations[var_fname])
            for var_content, values in variations[var_fname]:
                #logger.debug("var_content = %s" % var_content)
                #logger.debug('proc_ind=%s' % proc_ind)
                logger.debug('processes=%s' % processes)
                run_counter = values['run_counter']
                logger.debug("run_counter=%s" % run_counter)
                proc = None
                for p in processes:
                    # TODO: how to handle invalid run_counter
                    pid = int(p['id'])
                    logger.debug("pid=%s" % pid)
                    if pid == run_counter:
                        proc = p
                        break
                else:
                    logger.error("no process found matching run_counter")
                    #smartconnectorscheduler.error(run_settings, "%s: wait" % (self.id + 1))
                    # TODO: catch this error and recover
                    raise BadInputException()

                logger.debug("proc=%s" % pformat(proc))

                #proc = processes[proc_ind]
                #proc_ind += 1
                #ip = botocloudconnector.get_instance_ip(var_node.id, local_settings)
                ip = proc['ip_address']

                #dest_files_location = computation_platform_settings['type'] + "@"\
                #                      + os.path.join(local_settings['payload_destination'],
                #                                     proc['id'],
                #                                     local_settings['payload_cloud_dirname']
                #                                     )
                relative_path_suffix = self.get_relative_output_path(local_settings)
                dest_files_location = computation_platform_settings['type'] + "@"\
                                      + os.path.join(relative_path_suffix,
                                                     proc['id'],
                                                     local_settings['payload_cloud_dirname']
                                                     )

                logger.debug('dest_files_location=%s' % dest_files_location)

                dest_files_url = get_url_with_credentials(
                    computation_platform_settings, dest_files_location,
                    is_relative_path=True, ip_address=ip)
                logger.debug('dest_files_url=%s' % dest_files_url)

                # FIXME: Cleanup any existing runs already there
                # FIXME: keep the compile exec from setup
                #FIXME: exceptions should be given as parameter
                #FIXme we should not delete anyfile. SInce each process runs in its own directory
                exceptions = [local_settings['compile_file'], "..", ".",
                              'PSD', 'PSD.f', 'PSD_exp.dat', 'PSD.inp',
                              'Makefile', 'running.sh',
                              'process_scheduledone.sh', 'process_schedulestart.sh']
                storage.copy_directories(source_files_url, dest_files_url)

                if self.reschedule_failed_procs:
                    input_backup = os.path.join(self.job_dir, "input_backup", proc['id'])
                    backup_url = get_url_with_credentials(
                        output_storage_settings,
                        output_prefix + input_backup, is_relative_path=False)
                    storage.copy_directories(source_files_url, backup_url)

                # Why do we need to create a tempory file to make this copy?
                import uuid
                randsuffix = unicode(uuid.uuid4())  # should use some job id here

                var_url = get_url_with_credentials(local_settings, os.path.join("tmp%s" % randsuffix, "var"),
                    is_relative_path=True)
                logger.debug("var_url=%s" % var_url)
                storage.put_file(var_url, var_content.encode('utf-8'))

                value_url = get_url_with_credentials(local_settings, os.path.join("tmp%s" % randsuffix, "value"),
                    is_relative_path=True)
                logger.debug("value_url=%s" % value_url)
                storage.put_file(value_url, json.dumps(values))

                #local_settings['platform'] should be replaced
                # and overwrite on the remote
                #var_fname_remote = computation_platform_settings['type']\
                #    + "@" + os.path.join(local_settings['payload_destination'],
                #                         proc['id'],
                #                         local_settings['payload_cloud_dirname'],
                #                         var_fname)
                var_fname_remote = computation_platform_settings['type']\
                    + "@" + os.path.join(relative_path_suffix,
                                         proc['id'],
                                         local_settings['payload_cloud_dirname'],
                                         var_fname)

                var_fname_pkey = get_url_with_credentials(
                    computation_platform_settings, var_fname_remote,
                    is_relative_path=True, ip_address=ip)
                var_content = storage.get_file(var_url)
                storage.put_file(var_fname_pkey, var_content)

                logger.debug("var_fname_pkey=%s" % var_fname_pkey)
                values_fname_pkey = get_url_with_credentials(
                    computation_platform_settings,
                    os.path.join(dest_files_location,
                                 "%s_values" % var_fname),
                    is_relative_path=True, ip_address=ip)
                values_content = storage.get_file(value_url)
                storage.put_file(values_fname_pkey, values_content)
                logger.debug("values_fname_pkey=%s" % values_fname_pkey)

                #copying values and var_content to backup folder
                if self.reschedule_failed_procs:
                    value_url = get_url_with_credentials(
                        output_storage_settings,
                        output_prefix + os.path.join(input_backup, "%s_values" % var_fname),
                        is_relative_path=False)
                    logger.debug("value_url=%s" % value_url)
                    storage.put_file(value_url, json.dumps(values))

                    var_fname_pkey = get_url_with_credentials(
                        output_storage_settings,
                        output_prefix + os.path.join(input_backup, var_fname),
                        is_relative_path=False)
                    var_content = storage.get_file(var_url)
                    storage.put_file(var_fname_pkey, var_content)

                # cleanup
                tmp_url = get_url_with_credentials(local_settings, os.path.join("tmp%s" % randsuffix),
                    is_relative_path=True)
                logger.debug("deleting %s" % tmp_url)
コード例 #13
0
    def process_outputs(self, run_settings, base_dir, output_url, all_settings,
                        offset):

        # output_dir = 118.138.241.232/outptuersdfsd/sweep277/hrmc278/output_1
        # output_prefix = ssh://unix@
        # node_output_dir = 2

        output_prefix = '%s://%s@' % (all_settings['scheme'],
                                      all_settings['type'])

        id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA))
        iter_output_dir = os.path.join(os.path.join(base_dir,
                                                    "output_%s" % id))
        logger.debug('iter_output_dir=%s' % iter_output_dir)
        output_prefix = '%s://%s@' % (all_settings['scheme'],
                                      all_settings['type'])
        logger.debug('output_prefix=%s' % output_prefix)
        #iter_output_dir = "%s%s" % (output_prefix, iter_output_dir)
        logger.debug('output_url=%s' % output_url)
        (scheme, host, iter_output_path, location,
         query_settings) = storage.parse_bdpurl(output_url)
        logger.debug("iter_output_path=%s" % iter_output_path)
        iter_out_fsys = storage.get_filesystem(output_url)
        logger.debug("iter_out_fsys=%s" % iter_out_fsys)
        node_output_dirnames, _ = iter_out_fsys.listdir(iter_output_path)
        logger.debug('node_output_dirnames=%s' % node_output_dirnames)
        self.audit = ""

        Node_info = namedtuple('Node_info', ['dirname', 'number', 'criterion'])

        BASE_FNAME = "HRMC.inp"

        # generate criterias
        self.outputs = []
        for node_output_dirname in node_output_dirnames:
            node_path = output_prefix + os.path.join(iter_output_dir,
                                                     node_output_dirname)
            criterion = self.compute_psd_criterion(all_settings, node_path)
            #criterion = self.compute_hrmc_criterion(values_map['run_counter'], node_output_dirname, fs,)
            logger.debug("criterion=%s" % criterion)

            try:
                values_url = get_url_with_credentials(
                    all_settings,
                    os.path.join(node_path, '%s_values' % BASE_FNAME),
                    is_relative_path=False)

                values_content = storage.get_file(values_url)

                logger.debug("values_file=%s" % values_url)
            except IOError:
                logger.warn("no values file found")
                values_map = {}
            else:
                values_map = dict(json.loads(values_content))

            self.outputs.append(
                Node_info(dirname=node_output_dirname,
                          number=values_map['run_counter'],
                          criterion=criterion))

        if not self.outputs:
            logger.error("no ouput found for this iteration")
            return

        self.outputs.sort(key=lambda x: int(x.criterion))
        logger.debug("self.outputs=%s" % self.outputs)

        try:
            # FIXME: need to validate this output to make sure list of int
            threshold = ast.literal_eval(
                getval(run_settings, '%s/input/hrmc/threshold' % RMIT_SCHEMA))
        except (SettingNotFoundException, ValueError):
            logger.warn("no threshold found when expected")
            return False
        logger.debug("threshold = %s" % threshold)
        total_picks = 1
        if len(threshold) > 1:
            for i in threshold:
                total_picks *= threshold[i]
        else:
            total_picks = threshold[0]

        def copy_files_with_pattern(iter_out_fsys, source_path, dest_path,
                                    pattern, all_settings):
            """
            """
            output_prefix = '%s://%s@' % (all_settings['scheme'],
                                          all_settings['type'])

            logger.debug('source_path=%s, dest_path=%s' %
                         (source_path, dest_path))
            # (scheme, host, iter_output_path, location, query_settings) = storage.parse_bdpurl(source_path)
            _, node_output_fnames = iter_out_fsys.listdir(source_path)
            ip_address = all_settings['ip_address']
            for f in node_output_fnames:
                if fnmatch.fnmatch(f, pattern):
                    source_url = get_url_with_credentials(
                        all_settings,
                        output_prefix +
                        os.path.join(ip_address, source_path, f),
                        is_relative_path=False)
                    dest_url = get_url_with_credentials(
                        all_settings,
                        output_prefix + os.path.join(ip_address, dest_path, f),
                        is_relative_path=False)
                    logger.debug('source_url=%s, dest_url=%s' %
                                 (source_url, dest_url))
                    content = storage.get_file(source_url)
                    storage.put_file(dest_url, content)

        # Make new input dirs
        new_input_dir = os.path.join(
            os.path.join(base_dir, "input_%d" % (id + 1)))
        for index in range(0, total_picks):
            Node_info = self.outputs[index]
            logger.debug("node_info.dirname=%s" % Node_info.dirname)
            logger.debug("Node_info=%s" % str(Node_info))

            new_input_path = os.path.join(new_input_dir, Node_info.dirname)
            logger.debug("New input node dir %s" % new_input_path)

            old_output_path = os.path.join(iter_output_dir, Node_info.dirname)

            # Move all existing domain input files unchanged to next input directory
            for f in DOMAIN_INPUT_FILES:
                source_url = get_url_with_credentials(
                    all_settings,
                    output_prefix + os.path.join(old_output_path, f),
                    is_relative_path=False)
                dest_url = get_url_with_credentials(
                    all_settings,
                    output_prefix + os.path.join(new_input_path, f),
                    is_relative_path=False)
                logger.debug('source_url=%s, dest_url=%s' %
                             (source_url, dest_url))

                content = storage.get_file(source_url)
                logger.debug('content collected')
                storage.put_file(dest_url, content)
                logger.debug('put successfully')

            logger.debug('put file successfully')
            pattern = "*_values"
            output_offset = os.path.join(
                os.path.join(offset, "output_%s" % id, Node_info.dirname))
            input_offset = os.path.join(
                os.path.join(offset, "input_%s" % (id + 1), Node_info.dirname))
            copy_files_with_pattern(iter_out_fsys, output_offset, input_offset,
                                    pattern, all_settings)

            pattern = "*_template"
            copy_files_with_pattern(iter_out_fsys, output_offset, input_offset,
                                    pattern, all_settings)

            # NB: Converge stage triggers based on criterion value from audit.
            logger.debug('starting audit')
            info = "Run %s preserved (error %s)\n" % (Node_info.number,
                                                      Node_info.criterion)
            audit_url = get_url_with_credentials(
                all_settings,
                output_prefix + os.path.join(new_input_path, 'audit.txt'),
                is_relative_path=False)
            storage.put_file(audit_url, info)
            logger.debug("audit=%s" % info)
            logger.debug('1:audit_url=%s' % audit_url)
            self.audit += info

            # move xyz_final.xyz to initial.xyz
            source_url = get_url_with_credentials(
                all_settings,
                output_prefix + os.path.join(old_output_path, "xyz_final.xyz"),
                is_relative_path=False)
            logger.debug('source_url=%s' % source_url)
            dest_url = get_url_with_credentials(
                all_settings,
                output_prefix +
                os.path.join(new_input_path, 'input_initial.xyz'),
                is_relative_path=False)
            logger.debug('dest_url=%s' % dest_url)
            content = storage.get_file(source_url)
            logger.debug('content=%s' % content)
            storage.put_file(dest_url, content)
            self.audit += "spawning diamond runs\n"

        logger.debug(
            "input_dir=%s" %
            (output_prefix + os.path.join(new_input_dir, 'audit.txt')))
        audit_url = get_url_with_credentials(
            all_settings,
            output_prefix + os.path.join(new_input_dir, 'audit.txt'),
            is_relative_path=False)
        logger.debug('audit_url=%s' % audit_url)
        storage.put_file(audit_url, self.audit)
コード例 #14
0
    def compute_psd_criterion(self, all_settings, node_path):
        import math
        import os
        #globalFileSystem = fs.get_global_filesystem()
        # psd = os.path.join(globalFileSystem,
        #                    self.output_dir, node_output_dir,
        #                    "PSD_output/psd.dat")
        #Fixme replace all reference to files by parameters, e.g PSDCode
        output_prefix = '%s://%s@' % (all_settings['scheme'],
                                    all_settings['type'])
        logger.debug('output_prefix=%s' % output_prefix)
        logger.debug('node_path=%s' % node_path)

        logger.debug('compute psd---')
        psd_url = get_url_with_credentials(all_settings,
                        os.path.join(node_path, "PSD_output", "psd.dat"), is_relative_path=False)
        logger.debug('psd_url=%s' % psd_url)

        psd = storage.get_filep(psd_url)
        logger.debug('psd=%s' % psd._name)

        # psd_exp = os.path.join(globalFileSystem,
        #                        self.output_dir, node_output_dir,
        #                        "PSD_output/PSD_exp.dat")
        psd_url = get_url_with_credentials(
            all_settings,
                         os.path.join(node_path, "PSD_output", "PSD_exp.dat"), is_relative_path=False)
        logger.debug('psd_url=%s' % psd_url)
        psd_exp = storage.get_filep(psd_url)
        logger.debug('psd_exp=%s' % psd_exp._name)

        logger.debug("PSD %s %s " % (psd._name, psd_exp._name))
        x_axis = []
        y1_axis = []
        for line in psd:
            column = line.split()
            #logger.debug(column)
            if len(column) > 0:
                x_axis.append(float(column[0]))
                y1_axis.append(float(column[1]))
        logger.debug("x_axis \n %s" % x_axis)
        logger.debug("y1_axis \n %s" % y1_axis)

        y2_axis = []
        for line in psd_exp:
            column = line.split()
            #logger.debug(column)
            if len(column) > 0:
                y2_axis.append(float(column[1]))

        for i in range(len(x_axis) - len(y2_axis)):
            y2_axis.append(0)
        logger.debug("y2_axis \n %s" % y2_axis)

        criterion = 0
        for i in range(len(y1_axis)):
            criterion += math.pow((y1_axis[i] - y2_axis[i]), 2)
        logger.debug("Criterion %f" % criterion)

        criterion_url = get_url_with_credentials(
            all_settings,
            os.path.join(node_path, "PSD_output", "criterion.txt"),
            is_relative_path=False)
        storage.put_file(criterion_url, str(criterion))

        return criterion
コード例 #15
0
ファイル: wait.py プロジェクト: chiminey/chiminey
        logger.debug("values_source_url=%s" % values_source_url)

        values_dest_url = get_url_with_credentials(
            output_storage_settings,
            output_prefix + os.path.join(
                self.job_dir, self.output_dir, process_id, django_settings.VALUES_FNAME),
            is_relative_path=False)
        logger.debug("values_dest_url=%s" % values_dest_url)
        try:
	    logger.debug('reading %s' % values_source_url)
            content = storage.get_file(values_source_url)
        except IOError, e:
            content = {}
        logger.debug('content=%s' % content)
        storage.put_file(values_dest_url, content)

    def process(self, run_settings):
        """
            Check all registered nodes to find whether
            they are running, stopped or in error_nodes
        """

        local_settings = getvals(run_settings, models.UserProfile.PROFILE_SCHEMA_NS)
        # local_settings = run_settings[models.UserProfile.PROFILE_SCHEMA_NS]
        retrieve_local_settings(run_settings, local_settings)
        logger.debug("local_settings=%s" % local_settings)

        self.contextid = getval(run_settings, '%s/system/contextid' % django_settings.SCHEMA_PREFIX)
        output_storage_url = getval(run_settings, '%s/platform/storage/output/platform_url' % django_settings.SCHEMA_PREFIX)
        output_storage_settings = get_platform_settings(output_storage_url, local_settings['bdp_username'])
コード例 #16
0
def _save_values(settings, url, context):
    values_url = storage.get_url_with_credentials(settings,
        os.path.join(url, VALUES_FNAME),
        is_relative_path=True, ip_address=settings['host'])
    storage.put_file(values_url, json.dumps(context))
コード例 #17
0
class Sweep(Stage):

    def __init__(self, user_settings=None):
        self.numbfile = 0
        logger.debug("Sweep stage initialized")

    def is_triggered(self, run_settings):
        logger.debug('run_settings=%s' % run_settings)

        try:
            configure_done = int(getval(run_settings,
                '%s/stages/sweep/sweep_done' % RMIT_SCHEMA))
        except (ValueError, SettingNotFoundException):
            return True

        return not configure_done

    def _get_sweep_name(self, run_settings):
        try:
            sweep_name = getval(run_settings, '%s/directive_profile/sweep_name' % RMIT_SCHEMA)
        except SettingNotFoundException:
            sweep_name = 'unknown_sweep'
        return sweep_name

    def process(self, run_settings):
        logger.debug('run_settings=%s' % run_settings)

        # Need to make copy because we pass on run_settings to sub connector
        # so any changes we make here to run_settings WILL be inherited
        def make_local_settings(run_settings):
            from copy import deepcopy
            local_settings = deepcopy(getvals(run_settings, models.UserProfile.PROFILE_SCHEMA_NS))

            update(local_settings, run_settings,
                    RMIT_SCHEMA + '/system/platform',
                    # RMIT_SCHEMA + '/input/mytardis/experiment_id',
                    # RMIT_SCHEMA + '/system/random_numbers',
                   )
            local_settings['bdp_username'] = getval(
                run_settings, '%s/bdp_userprofile/username' % RMIT_SCHEMA)
            return local_settings

        local_settings = make_local_settings(run_settings)
        logger.debug('local_settings=%s' % local_settings)

        setval(run_settings,
               '%s/platform/computation/platform_url' % RMIT_SCHEMA,
               getval(run_settings,
                      '%s/input/system/compplatform/computation_platform'
                            % RMIT_SCHEMA))

        def _parse_output_location(run_settings, location):

            loc_list = location.split('/')
            name = loc_list[0]
            offset = ''
            if len(loc_list) > 1:
                offset = os.path.join(*loc_list[1:])
            logger.debug('offset=%s' % offset)
            return name, offset

        contextid = int(getval(run_settings, '%s/system/contextid' % RMIT_SCHEMA))
        logger.debug("contextid=%s" % contextid)
        sweep_name = self._get_sweep_name(run_settings)
        logger.debug("sweep_name=%s" % sweep_name)

        output_loc = self.output_exists(run_settings)
        location = ""
        if output_loc:
            location = getval(run_settings, output_loc)
            output_storage_name, output_storage_offset = \
                _parse_output_location(run_settings, location)
            setval(run_settings,
                   '%s/platform/storage/output/platform_url' % RMIT_SCHEMA,
                   output_storage_name)
            setval(run_settings, '%s/platform/storage/output/offset' % RMIT_SCHEMA,
                   os.path.join(output_storage_offset, '%s%s' % (sweep_name, contextid)))

        def _parse_input_location(run_settings, location):
            loc_list = location.split('/')
            name = loc_list[0]
            offset = ''
            if len(loc_list) > 1:
                offset = os.path.join(*loc_list[1:])
            logger.debug('offset=%s' % offset)
            return (name, offset)

        input_loc = self.input_exists(run_settings)
        if input_loc:
            location = getval(run_settings, input_loc)
            input_storage_name, input_storage_offset = \
                _parse_input_location(run_settings, location)
            setval(run_settings, '%s/platform/storage/input/platform_url' % RMIT_SCHEMA,
                   input_storage_name)
            # store offsets
            setval(run_settings,
                   '%s/platform/storage/input/offset' % RMIT_SCHEMA,
                   input_storage_offset)

        # TODO: replace with scratch space computation platform space
        self.scratch_platform = '%s%s%s' % (
            manage.get_scratch_platform(), sweep_name,
            contextid)

        # mytardis

        if output_loc:
            try:
                self.experiment_id = int(getval(run_settings, '%s/input/mytardis/experiment_id' % RMIT_SCHEMA))
            except KeyError:
                self.experiment_id = 0
            except ValueError:
                self.experiment_id = 0
            try:
                curate_data = getval(run_settings, '%s/input/mytardis/curate_data' % RMIT_SCHEMA)
            except SettingNotFoundException:
                curate_data = False
            if curate_data:
                self.experiment_id = self.curate_data(run_settings, location, self.experiment_id)
            setval(run_settings,
                   '%s/input/mytardis/experiment_id' % RMIT_SCHEMA,
                   str(self.experiment_id))

        # generate all variations
        map_text = getval(run_settings, '%s/input/sweep/sweep_map' % RMIT_SCHEMA)
        # map_text = run_settings[RMIT_SCHEMA + '/input/sweep']['sweep_map']
        sweep_map = json.loads(map_text)
        logger.debug("sweep_map=%s" % pformat(sweep_map))
        runs = _expand_variations(maps=[sweep_map], values={})
        logger.debug("runs=%s" % runs)

        # Create random numbers if needed
        # TODO: move iseed out of hrmc into separate generic schema
        # to use on any sweepable connector and make this function
        # completely hrmc independent.

        rands = []

        try:
            self.rand_index = getval(run_settings, '%s/input/hrmc/iseed' % RMIT_SCHEMA)
            logger.debug("rand_index=%s" % self.rand_index)
        except SettingNotFoundException:
            pass
        else:
            # prep random seeds for each run based off original iseed
            # FIXME: inefficient for large random file
            # TODO, FIXME: this is potentially problematic if different
            # runs end up overlapping in the random numbers they utilise.
            # solution is to have separate random files per run or partition
            # big file up.

            try:
                num_url = getval(run_settings, "%s/system/random_numbers" % RMIT_SCHEMA)
                logger.debug('num_url=%s' % num_url)
            except SettingNotFoundException:
                pass
            else:
                try:
                    local_settings['random_numbers'] = num_url
                    rands = generate_rands(settings=local_settings,
                    start_range=0,
                    end_range=-1,
                    num_required=len(runs),
                    start_index=self.rand_index)
                    logger.debug("rands=%s" % rands)
                except Exception, e:
                    logger.debug('error')
                    logger.error(e)
                    raise
        # load initial values map in the input directory which
        # contains variable to use for all subdirectives
        starting_map = {}
        if input_loc:

            input_storage_settings = self.get_platform_settings(
                run_settings, 'http://rmit.edu.au/schemas/platform/storage/input')
            try:
                input_prefix = '%s://%s@' % (input_storage_settings['scheme'],
                                        input_storage_settings['type'])

                values_url = get_url_with_credentials(
                    input_storage_settings,
                    input_prefix + os.path.join(input_storage_settings['ip_address'],
                        input_storage_offset, "initial", VALUES_MAP_FILE),
                    is_relative_path=False)
                logger.debug("values_url=%s" % values_url)

                values_e_url = get_url_with_credentials(
                    local_settings,
                    values_url,
                    is_relative_path=False)
                logger.debug("values_url=%s" % values_e_url)
                values_content = get_file(values_e_url)
                logger.debug("values_content=%s" % values_content)
                starting_map = dict(json.loads(values_content))
            except IOError:
                logger.warn("no starting values file found")
            except ValueError:
                logger.error("problem parsing contents of %s" % VALUES_MAP_FILE)
                pass
            logger.debug("starting_map after initial values=%s"
                % pformat(starting_map))

        # Copy form input values info starting map
        # FIXME: could have name collisions between form inputs and
        # starting values.
        for ns in run_settings:
            if ns.startswith(RMIT_SCHEMA + "/input"):
                # for k, v in run_settings[ns].items():
                for k, v in getvals(run_settings, ns).items():
                    starting_map[k] = v
        logger.debug("starting_map after form=%s" % pformat(starting_map))

        # FIXME: we assume we will always have input directory

        # Get input_url directory
        input_url = ""
        if input_loc:
            input_prefix = '%s://%s@' % (input_storage_settings['scheme'],
                                    input_storage_settings['type'])
            input_url = get_url_with_credentials(input_storage_settings,
                input_prefix + os.path.join(input_storage_settings['ip_address'],
                    input_storage_offset),
            is_relative_path=False)
            logger.debug("input_url=%s" % input_url)

        current_context = models.Context.objects.get(id=contextid)
        user = current_context.owner.user.username

        # For each of the generated runs, copy across initial input
        # to individual input directories with variation values,
        # and then schedule subrun of sub directive
        logger.debug("run_settings=%s" % run_settings)
        for i, context in enumerate(runs):

            run_counter = int(context['run_counter'])
            logger.debug("run_counter=%s" % run_counter)
            run_inputdir = os.path.join(self.scratch_platform,
                SUBDIRECTIVE_DIR % {'run_counter': str(run_counter)},
                FIRST_ITERATION_DIR,)
            logger.debug("run_inputdir=%s" % run_inputdir)
            run_iter_url = get_url_with_credentials(local_settings,
                run_inputdir, is_relative_path=False)
            logger.debug("run_iter_url=%s" % run_iter_url)

            # Duplicate any input_directory into runX duplicates
            if input_loc:
                logger.debug("context=%s" % context)
                logger.debug("systemsettings=%s"
                         % pformat(getvals(run_settings, RMIT_SCHEMA + '/input/system')))
                copy_directories(input_url, run_iter_url)

            # Need to load up existing values, because original input_dir could
            # have contained values for the whole run
            # This code is deprecated in favour of single values file.
            self.error_detected = False


            try:
                template_name = getval(run_settings,
                                       '%s/stages/sweep/template_name'
                                            % RMIT_SCHEMA)
            except SettingNotFoundException:
                pass
            else:
                logger.debug("template_name=%s" % template_name)
                v_map = {}
                try:
                    values_url = get_url_with_credentials(
                        local_settings,
                        os.path.join(run_inputdir, "initial",
                             VALUES_MAP_TEMPLATE_FILE % {'template_name': template_name}),
                        is_relative_path=False)
                    logger.debug("values_url=%s" % values_url)
                    values_content = get_file(values_url)
                    logger.debug("values_content=%s" % values_content)
                    v_map = dict(json.loads(values_content), indent=4)
                except IOError:
                    logger.warn("no values file found")
                except ValueError:
                    logger.error("problem parsing contents of %s" % VALUES_MAP_FILE)
                    pass
                v_map.update(starting_map)
                v_map.update(context)
                logger.debug("new v_map=%s" % v_map)
                put_file(values_url, json.dumps(v_map, indent=4))

            v_map = {}
            try:
                values_url = get_url_with_credentials(
                    local_settings,
                    os.path.join(run_inputdir, "initial",
                        VALUES_MAP_FILE),
                    is_relative_path=False)
                logger.debug("values_url=%s" % values_url)
                values_content = get_file(values_url)
                logger.debug("values_content=%s" % values_content)
                v_map = dict(json.loads(values_content), )
            except IOError:
                logger.warn("no values file found")
            except ValueError:
                logger.error("problem parsing contents of %s" % VALUES_MAP_FILE)
                pass
            v_map.update(starting_map)
            v_map.update(context)
            logger.debug("new v_map=%s" % v_map)
            put_file(values_url, json.dumps(v_map, indent=4))

            # Set random numbers for subdirective
            logger.debug("run_settings=%s" % pformat(run_settings))
            if rands:
                setval(run_settings, '%s/input/hrmc/iseed' % RMIT_SCHEMA, rands[i])

            if input_loc:
                # Set revised input_location for subdirective
                setval(run_settings, input_loc,
                    "%s/%s/%s" % (self.scratch_platform,
                                    SUBDIRECTIVE_DIR
                                        % {'run_counter': str(run_counter)},
                                    FIRST_ITERATION_DIR))

            # Redirect input
            run_input_storage_name, run_input_storage_offset = \
                _parse_input_location(run_settings,
                    "local/sweep%s/run%s/input_0" % (contextid, run_counter))
            # setval(run_settings,
            #        '%s/platform/storage/input/platform_url' % RMIT_SCHEMA,
            #        run_input_storage_name)
            # setval(run_settings,
            #        '%s/platform/storage/input/offset' % RMIT_SCHEMA,
            #        run_input_storage_offset)

            logger.debug("run_settings=%s" % pformat(run_settings))
            try:
                _submit_subdirective("nectar", run_settings, user, current_context)
            except Exception, e:
                logger.error(e)
                raise e
コード例 #18
0
    def _upload_input_dir_variations(self, processes, local_settings,
                                     computation_platform_settings,
                                     output_storage_settings,
                                     mytardis_settings, input_dir,
                                     run_settings):
        output_prefix = '%s://%s@' % (output_storage_settings['scheme'],
                                      output_storage_settings['type'])
        input_url_with_credentials = get_url_with_credentials(
            output_storage_settings,
            output_prefix + os.path.join(self.iter_inputdir, input_dir),
            is_relative_path=False)
        logger.debug('input_url_with_credentials=%s' %
                     input_url_with_credentials)
        if local_settings['curate_data']:
            self.experiment_id = self.curate_data(self.experiment_id,
                                                  local_settings,
                                                  output_storage_settings,
                                                  mytardis_settings,
                                                  input_url_with_credentials)
        else:
            logger.warn('Data curation is off')

        # get run Map
        parent_stage = self.import_parent_stage(run_settings)
        run_map, self.rand_index = parent_stage.get_run_map(
            local_settings, run_settings=run_settings)

        # load value_map
        values_url_with_pkey = get_url_with_credentials(
            output_storage_settings,
            output_prefix +
            os.path.join(self.iter_inputdir, input_dir, self.VALUES_FNAME),
            is_relative_path=False)
        logger.debug("initial values_file=%s" % values_url_with_pkey)
        values = {}
        try:
            values_content = storage.get_file(values_url_with_pkey)
        except IOError:
            logger.warn("no values file found")
        else:
            logger.debug("values_content = %s" % values_content)
            values = dict(json.loads(values_content))
        logger.debug("values=%s" % values)

        # generates a set of variations for the template fname
        contexts = self._get_variation_contexts([run_map], values,
                                                self.initial_numbfile)
        self.initial_numbfile += len(contexts)

        # for each context, copy each file to dest and any
        # templates to be instantiated, then store in values.

        template_pat = re.compile("(.*)_template")
        relative_path_suffix = self.get_relative_output_path(local_settings)

        for context in contexts:
            logger.debug("context=%s" % context)
            # get list of all files in input_dir
            fname_url_with_pkey = get_url_with_credentials(
                output_storage_settings,
                output_prefix + os.path.join(self.iter_inputdir, input_dir),
                is_relative_path=False)
            input_files = storage.list_dirs(fname_url_with_pkey,
                                            list_files=True)

            # get process information
            run_counter = context['run_counter']
            logger.debug("run_counter=%s" % run_counter)
            proc = None
            for p in processes:
                # TODO: how to handle invalid run_counter
                pid = int(p['id'])
                logger.debug("pid=%s" % pid)
                if pid == run_counter:
                    proc = p
                    break
            else:
                logger.error("no process found matching run_counter")
                raise BadInputException()
            logger.debug("proc=%s" % pformat(proc))

            for fname in input_files:
                logger.debug("fname=%s" % fname)
                templ_mat = template_pat.match(fname)
                fname_url_with_credentials = storage.get_url_with_credentials(
                    output_storage_settings,
                    output_prefix +
                    os.path.join(self.iter_inputdir, input_dir, fname),
                    is_relative_path=False)
                logger.debug("fname_url_with_credentials=%s" %
                             fname_url_with_credentials)

                def put_dest_file(proc, fname, dest_file_location,
                                  resched_file_location, content):
                    dest_url = get_url_with_credentials(
                        computation_platform_settings,
                        os.path.join(dest_file_location, fname),
                        is_relative_path=True,
                        ip_address=proc['ip_address'])
                    logger.debug("writing to =%s" % dest_url)
                    #logger.debug("content=%s" % content)
                    storage.put_file(dest_url, content)
                    if self.reschedule_failed_procs:
                        logger.debug("resched=%s" % resched_file_location)
                        logger.debug("fname=%s" % fname)
                        logger.debug("output_storage_settings=%s" %
                                     output_storage_settings)

                        logger.debug("here")
                        test = "%s/%s" % (resched_file_location, fname)
                        logger.debug("test=%s" % test)
                        resched_url = get_url_with_credentials(
                            output_storage_settings, test)
                        logger.debug("writing backup to %s" % resched_url)
                        storage.put_file(resched_url, content)
                    logger.debug("done")

                outputs = []
                if templ_mat:
                    base_fname = templ_mat.group(1)
                    template_content = storage.get_file(
                        fname_url_with_credentials)
                    try:
                        templ = Template(template_content)
                    except TemplateSyntaxError, e:
                        logger.error(e)
                        #FIXME: should detect this during submission of job,
                        #as no sensible way to recover here.
                        #TODO: signal error conditions in job status
                        continue
                    new_context = Context(context)
                    logger.debug("new_content=%s" % new_context)
                    render_output = templ.render(new_context)
                    render_output = render_output.encode('utf-8')
                    outputs.append((base_fname, render_output))
                    outputs.append((fname, template_content))

                else:
                    content = storage.get_file(fname_url_with_credentials)
                    outputs.append((fname, content))

                for (new_fname, content) in outputs:
                    dest_file_location = computation_platform_settings['type']\
                        + "@" + os.path.join(relative_path_suffix,
                                             proc['id'],
                                             local_settings['payload_cloud_dirname'])
                    logger.debug("dest_file_location =%s" % dest_file_location)
                    resched_file_location = "%s%s" % (
                        output_prefix,
                        os.path.join(self.job_dir, "input_backup", proc['id']))

                    logger.debug("resched_file_location=%s" %
                                 resched_file_location)
                    put_dest_file(proc, new_fname, dest_file_location,
                                  resched_file_location, content)

            # then copy context new values file
            logger.debug("writing values file")
            values_dest_location = computation_platform_settings['type']\
                + "@" + os.path.join(relative_path_suffix,
                                     proc['id'],
                                     local_settings['payload_cloud_dirname'],
                                     self.VALUES_FNAME)
            logger.debug("values_dest_location =%s" % values_dest_location)

            values_dest_url = get_url_with_credentials(
                computation_platform_settings,
                values_dest_location,
                is_relative_path=True,
                ip_address=proc['ip_address'])

            storage.put_file(values_dest_url, json.dumps(context, indent=4))
コード例 #19
0
ファイル: sweep.py プロジェクト: chiminey/chiminey
                logger.debug("values_url=%s" % values_url)
                values_content = get_file(values_url)
                logger.debug("values_content=%s" % values_content)
                v_map = dict(json.loads(values_content), )
            except IOError:
                logger.warn("no values file found")
            except ValueError:
                logger.error("problem parsing contents of %s" %
                             VALUES_MAP_FILE)
                pass
            v_map.update(starting_map)
            v_map.update(context)
            v_map['run_counter'] = 1

            logger.debug("new v_map=%s" % v_map)
            put_file(values_url, json.dumps(v_map, indent=4))

            # Set random numbers for subdirective
            logger.debug("run_settings=%s" % pformat(run_settings))
            if rands:
                setval(run_settings,
                       '%s/input/hrmc/iseed' % django_settings.SCHEMA_PREFIX,
                       rands[i])

            if input_loc:
                # Set revised input_location for subdirective
                setval(
                    run_settings, input_loc,
                    "%s/%s/%s" % (self.scratch_platform, SUBDIRECTIVE_DIR % {
                        'run_counter': str(run_counter)
                    }, FIRST_ITERATION_DIR))