def get_platform_settings(platform_url, username): platform_name = platform_url.split('/')[0] if platform_name == "local": return {"scheme": 'file', 'type': 'local', 'host': '127.0.0.1'} platform_settings, schema_namespace = retrieve_platform(platform_name, username) logger.debug("platform_settings=%s" % platform_settings) logger.debug("schema_namespace=%s" % schema_namespace) try: platform_type = platform_settings['platform_type'] except KeyError: logger.error("platform_settings=%s" % platform_settings) raise platform_settings['type'] = platform_type for platform_hook in django_settings.PLATFORM_CLASSES: try: hook = jobs.safe_import(platform_hook, [], {}) except ImproperlyConfigured as e: logger.error("Cannot load platform hook %s" % e) continue logger.debug("hook=%s" % hook) logger.debug("hook.get_platform_types=%s" % hook.get_platform_types()) logger.debug("platform_type=%s" % platform_type) if platform_type in hook.get_platform_types(): hook.update_platform_settings(platform_settings) break platform_settings['bdp_username'] = username #TODO move to HadoopPlatform if platform_settings['type'] == 'hadoop': platform_settings['root_path'] = '/home/%s' % platform_settings['username'] return platform_settings
def load_metadata_builder(self, run_settings): if not self.METADATA_BUILDER: try: self.METADATA_BUILDER = jobs.safe_import(run_settings['%s/system' % \ django_settings.SCHEMA_PREFIX]['metadata_builder'], [], {}) except ImproperlyConfigured as e: logger.warn("Cannot load metadata builder class %s \n" % e)
def load_metadata_builder(self, run_settings): if not self.METADATA_BUILDER: try: self.METADATA_BUILDER = jobs.safe_import(run_settings['%s/system' % \ django_settings.SCHEMA_PREFIX]['metadata_builder'], [], {}) except ImproperlyConfigured as e: logger.warn("Cannot load metadata builder class %s \n" % e)
def setup(self, initialiser, name, description): MESSAGE = "This will delete %s smart connector. Are you sure [Yes/No]?" % name confirm = raw_input(MESSAGE) if confirm != "Yes": print "action aborted by user" return directive = jobs.safe_import(initialiser, [], {}) directive.delete_directive(name) print "done"
def setup(self, initialiser, name, description): MESSAGE = "This will delete %s smart connector. Are you sure [Yes/No]?" % name confirm = raw_input(MESSAGE) if confirm != "Yes": print "action aborted by user" return directive = jobs.safe_import(initialiser, [], {}) directive.delete_directive(name) print "done"
def setup(self, initialiser, name, description, sweep=False): MESSAGE = "This will add %s to the catalogue of available smart connectors. Are you sure [Yes/No]?" % name confirm = raw_input(MESSAGE) if confirm != "Yes": print "action aborted by user" return directive = jobs.safe_import(initialiser, [], {}) directive.define_directive(name, description=description, sweep=sweep) print "done"
def import_parent_stage(self, run_settings): from chiminey.smartconnectorscheduler import jobs from django.core.exceptions import ImproperlyConfigured directive_name = getval(run_settings, "http://rmit.edu.au/schemas/directive_profile/directive_name") directive = models.Directive.objects.get(name=directive_name) logger.debug('directive_name=%s' % directive_name) parent_stage = directive.stage try: logger.debug('parent_package=%s' % (parent_stage.package)) stage = jobs.safe_import(parent_stage.package, [], {}) logger.debug("stage=%s" % stage) return stage except ImproperlyConfigured, e: logger.debug("Except in import of stage: %s: %s" % (parent_stage.name, e)) raise
def process(self, run_settings): logger.debug('run_settings=%s' % run_settings) self.setup_output(run_settings) self.setup_input(run_settings) self.setup_computation(run_settings) messages.info(run_settings, "0: Setting up computation") local_settings = getvals(run_settings, models.UserProfile.PROFILE_SCHEMA_NS) # local_settings = run_settings[models.UserProfile.PROFILE_SCHEMA_NS] logger.debug("settings=%s" % pformat(run_settings)) local_settings['bdp_username'] = getval(run_settings, '%s/bdp_userprofile/username' % django_settings.SCHEMA_PREFIX) # local_settings['bdp_username'] = run_settings[ # django_settings.SCHEMA_PREFIX + '/bdp_userprofile']['username'] logger.debug('local_settings=%s' % local_settings) self.setup_scratchspace(run_settings) output_location = self.output_loc_offset # run_settings[django_settings.SCHEMA_PREFIX + '/input/system'][u'output_location'] try: self.experiment_id = int(getval(run_settings, '%s/input/mytardis/experiment_id' % django_settings.SCHEMA_PREFIX)) except KeyError: self.experiment_id = 0 except ValueError: self.experiment_id = 0 try: curate_data = getval(run_settings, '%s/input/mytardis/curate_data' % django_settings.SCHEMA_PREFIX) except SettingNotFoundException: curate_data = False if curate_data: try: mytardis_platform = jobs.safe_import('chiminey.platform.mytardis.MyTardisPlatform', [], {}) self.experiment_id = mytardis_platform.create_experiment(run_settings, output_location, self.experiment_id) except ImproperlyConfigured as e: logger.error("Cannot load mytardis platform hook %s" % e)
def process(self, run_settings): try: id = int(getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX)) except (SettingNotFoundException, ValueError): id = 0 messages.info(run_settings, '%d: converging' % (id+1)) def retrieve_local_settings(run_settings, local_settings): update(local_settings, run_settings # '%s/stages/setup/payload_source' % django_settings.SCHEMA_PREFIX, # '%s/stages/setup/payload_destination' % django_settings.SCHEMA_PREFIX, # '%s/system/platform' % django_settings.SCHEMA_PREFIX, # # '%s/stages/create/custom_prompt' % django_settings.SCHEMA_PREFIX, # # '%s/stages/create/cloud_sleep_interval' % django_settings.SCHEMA_PREFIX, # # '%s/stages/create/created_nodes' % django_settings.SCHEMA_PREFIX, # '%s/system/max_seed_int' % django_settings.SCHEMA_PREFIX, # '%s/input/system/cloud/number_vm_instances' % django_settings.SCHEMA_PREFIX, # '%s/input/hrmc/iseed' % django_settings.SCHEMA_PREFIX, # '%s/input/hrmc/optimisation_scheme' % django_settings.SCHEMA_PREFIX, # '%s/input/hrmc/threshold' % django_settings.SCHEMA_PREFIX, ) local_settings['bdp_username'] = getval(run_settings, '%s/bdp_userprofile/username' % django_settings.SCHEMA_PREFIX) local_settings = getvals(run_settings, models.UserProfile.PROFILE_SCHEMA_NS) retrieve_local_settings(run_settings, local_settings) bdp_username = local_settings['bdp_username'] # get output output_storage_url = getval(run_settings, '%s/platform/storage/output/platform_url' % django_settings.SCHEMA_PREFIX) output_storage_settings = manage.get_platform_settings(output_storage_url, bdp_username) output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) offset = getval(run_settings, '%s/platform/storage/output/offset' % django_settings.SCHEMA_PREFIX) job_dir = manage.get_job_dir(output_storage_settings, offset) # get mytardis #mytardis_url = getval(run_settings, '%s/input/mytardis/mytardis_platform' % django_settings.SCHEMA_PREFIX) #mytardis_settings = manage.get_platform_settings(mytardis_url, bdp_username) # setup new paths try: self.id = int(getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX)) self.output_dir = os.path.join(job_dir, "output_%d" % self.id) self.iter_inputdir = os.path.join(job_dir, "input_%d" % (self.id + 1)) #self.new_iter_inputdir = "input_%d" % (self.id + 1) except (SettingNotFoundException, ValueError): self.output_dir = os.path.join(job_dir, "output") self.iter_inputdir = os.path.join(job_dir, "input") self.id = 0 logger.debug('output_dir=%s iter_inputdir=%s' % (self.output_dir, self.iter_inputdir)) try: self.experiment_id = int(getval(run_settings, '%s/input/mytardis/experiment_id' % django_settings.SCHEMA_PREFIX)) except SettingNotFoundException: self.experiment_id = 0 except ValueError: self.experiment_id = 0 inputdir_url = get_url_with_credentials(output_storage_settings, output_prefix + self.iter_inputdir, is_relative_path=False) logger.debug('input_dir_url=%s' % inputdir_url) # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(inputdir_url) # fsys = storage.get_filesystem(inputdir_url) # logger.debug('mypath=%s' % mypath) # input_dirs, _ = fsys.listdir(mypath) # logger.debug('input_dirs=%s' % input_dirs) (self.done_iterating, self.criterion) = self.process_outputs(run_settings, job_dir, inputdir_url, output_storage_settings) if self.done_iterating: logger.debug("Total Iterations: %d" % self.id) # output_prefix = '%s://%s@' % (output_storage_settings['scheme'], # output_storage_settings['type']) # new_output_dir = os.path.join(base_dir, 'output') output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) # get source url iter_output_dir = os.path.join(os.path.join(job_dir, "output_%s" % self.id)) source_url = "%s%s" % (output_prefix, iter_output_dir) # get dest url new_output_dir = os.path.join(job_dir, 'output') dest_url = "%s%s" % (output_prefix, new_output_dir) source_url = get_url_with_credentials(output_storage_settings, output_prefix + os.path.join(iter_output_dir), is_relative_path=False) dest_url = get_url_with_credentials(output_storage_settings, output_prefix + os.path.join(new_output_dir), is_relative_path=False) storage.copy_directories(source_url, dest_url) # curate try: curate_data = getval(run_settings, '%s/input/mytardis/curate_data' % django_settings.SCHEMA_PREFIX) except SettingNotFoundException: curate_data = 0 if curate_data: mytardis_url = getval(run_settings, '%s/input/mytardis/mytardis_platform' % django_settings.SCHEMA_PREFIX) mytardis_settings = manage.get_platform_settings(mytardis_url, bdp_username) all_settings = dict(mytardis_settings) all_settings.update(output_storage_settings) logger.debug("source_url=%s" % source_url) logger.debug("dest_url=%s" % dest_url) logger.debug("job_dir=%s" % job_dir) try: mytardis_platform = jobs.safe_import('chiminey.platform.mytardis.MyTardisPlatform', [], {}) self.experiment_id = mytardis_platform.create_dataset_for_final_output(run_settings, self.experiment_id,job_dir, dest_url, all_settings) except ImproperlyConfigured as e: logger.error("Cannot load mytardis platform hook %s" % e) else: logger.warn('Data curation is off')
def process(self, run_settings): try: id = int(getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX)) except (SettingNotFoundException, ValueError): id = 0 messages.info(run_settings, '%d: transforming' % (id+1)) # self.contextid = getval(run_settings, '%s/system/contextid' % django_settings.SCHEMA_PREFIX) bdp_username = getval(run_settings, '%s/bdp_userprofile/username' % django_settings.SCHEMA_PREFIX) output_storage_url = getval(run_settings, '%s/platform/storage/output/platform_url' % django_settings.SCHEMA_PREFIX) output_storage_settings = manage.get_platform_settings(output_storage_url, bdp_username) logger.debug("output_storage_settings=%s" % output_storage_settings) output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) offset = getval(run_settings, '%s/platform/storage/output/offset' % django_settings.SCHEMA_PREFIX) self.job_dir = manage.get_job_dir(output_storage_settings, offset) try: self.id = int(getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX)) self.output_dir = os.path.join(os.path.join(self.job_dir, "output_%s" % self.id)) self.input_dir = os.path.join(os.path.join(self.job_dir, "input_%d" % self.id)) self.new_input_dir = os.path.join(os.path.join(self.job_dir, "input_%d" % (self.id + 1))) except (SettingNotFoundException, ValueError): # FIXME: Not clear that this a valid path through stages self.output_dir = os.path.join(os.path.join(self.job_dir, "output")) self.output_dir = os.path.join(os.path.join(self.job_dir, "input")) self.new_input_dir = os.path.join(os.path.join(self.job_dir, "input_1")) logger.debug('self.output_dir=%s' % self.output_dir) try: self.experiment_id = int(getval(run_settings, '%s/input/mytardis/experiment_id' % django_settings.SCHEMA_PREFIX)) except SettingNotFoundException: self.experiment_id = 0 except ValueError: self.experiment_id = 0 output_url = get_url_with_credentials( output_storage_settings, output_prefix + self.output_dir, is_relative_path=False) # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(output_url) # fsys = storage.get_filesystem(output_url) # node_output_dirs, _ = fsys.listdir(mypath) # logger.debug("node_output_dirs=%s" % node_output_dirs) outputs = self.process_outputs(run_settings, self.job_dir, output_url, output_storage_settings, offset) try: curate_data = getval(run_settings, '%s/input/mytardis/curate_data' % django_settings.SCHEMA_PREFIX) except SettingNotFoundException: curate_data = 0 if curate_data: mytardis_url = getval(run_settings, '%s/input/mytardis/mytardis_platform' % django_settings.SCHEMA_PREFIX) mytardis_settings = manage.get_platform_settings(mytardis_url, bdp_username) all_settings = dict(mytardis_settings) all_settings.update(output_storage_settings) all_settings['contextid'] = getval(run_settings, '%s/system/contextid' % django_settings.SCHEMA_PREFIX) try: mytardis_platform = jobs.safe_import('chiminey.platform.mytardis.MyTardisPlatform', [], {}) logger.debug('self_outpus=%s' % outputs) self.experiment_id = mytardis_platform.create_dataset_for_intermediate_output(run_settings, self.experiment_id, self.job_dir, output_url, all_settings, outputs=outputs) except ImproperlyConfigured as e: logger.error("Cannot load mytardis platform hook %s" % e) else: logger.warn('Data curation is off')
def _upload_input_dir_variations(self, processes, local_settings, computation_platform_settings, output_storage_settings, mytardis_settings, input_dir, run_settings): output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) input_url_with_credentials = get_url_with_credentials( output_storage_settings, output_prefix + os.path.join( self.iter_inputdir, input_dir), is_relative_path=False) logger.debug('input_url_with_credentials=%s' % input_url_with_credentials) if local_settings['curate_data']: try: mytardis_platform = jobs.safe_import('chiminey.platform.mytardis.MyTardisPlatform', [], {}) self.experiment_id = mytardis_platform.create_dataset_for_input(self.experiment_id, run_settings, local_settings, output_storage_settings, mytardis_settings, input_url_with_credentials) except ImproperlyConfigured as e: logger.error("Cannot load mytardis platform hook %s" % e) else: logger.warn('Data curation is off') # get run Map parent_stage = self.import_parent_stage(run_settings) run_map, self.rand_index = parent_stage.get_internal_sweep_map(local_settings, run_settings=run_settings) # load value_map values_url_with_pkey = get_url_with_credentials( output_storage_settings, output_prefix + os.path.join(self.iter_inputdir, input_dir, self.VALUES_FNAME), is_relative_path=False) logger.debug("initial values_file=%s" % values_url_with_pkey) values = {} try: values_content = storage.get_file(values_url_with_pkey) except IOError: logger.warn("no values file found") else: logger.debug("values_content = %s" % values_content) values = dict(json.loads(values_content)) logger.debug("values=%s" % values) # generates a set of variations for the template fname logger.debug('self.initial_numbfile = %s ' % self.initial_numbfile) contexts = self._get_variation_contexts( [run_map], values, self.initial_numbfile) self.initial_numbfile += len(contexts) logger.debug('contexts = %s ' % contexts) logger.debug('self.initial_numbfile = %s ' % self.initial_numbfile) # for each context, copy each file to dest and any # templates to be instantiated, then store in values. template_pat = re.compile("(.*)_template") relative_path_suffix = self.get_relative_output_path(local_settings) for context in contexts: logger.debug("context=%s" % context) # get list of all files in input_dir fname_url_with_pkey = get_url_with_credentials( output_storage_settings, output_prefix + os.path.join(self.iter_inputdir, input_dir), is_relative_path=False) input_files = storage.list_dirs(fname_url_with_pkey, list_files=True) # get process information run_counter = context['run_counter'] logger.debug("run_counter=%s" % run_counter) proc = None for p in processes: # TODO: how to handle invalid run_counter pid = int(p['id']) logger.debug("pid=%s" % pid) if pid == run_counter: proc = p break else: logger.error("no process found matching run_counter") raise BadInputException() logger.debug("proc=%s" % pformat(proc)) for fname in input_files: logger.debug("fname=%s" % fname) templ_mat = template_pat.match(fname) fname_url_with_credentials = storage.get_url_with_credentials( output_storage_settings, output_prefix + os.path.join(self.iter_inputdir, input_dir, fname), is_relative_path=False) logger.debug("fname_url_with_credentials=%s" % fname_url_with_credentials) def put_dest_file(proc, fname, dest_file_location, resched_file_location, content): dest_url = get_url_with_credentials( computation_platform_settings, os.path.join( dest_file_location, fname), is_relative_path=True, ip_address=proc['ip_address']) logger.debug("writing to =%s" % dest_url) #logger.debug("content=%s" % content) storage.put_file(dest_url, content) if self.reschedule_failed_procs: logger.debug("resched=%s" % resched_file_location) logger.debug("fname=%s" % fname) logger.debug("output_storage_settings=%s" % output_storage_settings) logger.debug("here") test = "%s/%s" % (resched_file_location, fname) logger.debug("test=%s" % test) resched_url = get_url_with_credentials( output_storage_settings, test) logger.debug("writing backup to %s" % resched_url) storage.put_file(resched_url, content) logger.debug("done") outputs = [] if templ_mat: base_fname = templ_mat.group(1) template_content = storage.get_file( fname_url_with_credentials) try: templ = Template(template_content) except TemplateSyntaxError, e: logger.error(e) # FIXME: should detect this during submission of job, # as no sensible way to recover here. # TODO: signal error conditions in job status continue new_context = Context(context) logger.debug("new_content=%s" % new_context) render_output = templ.render(new_context) render_output = render_output.encode('utf-8') outputs.append((base_fname, render_output)) outputs.append((fname, template_content)) else: content = storage.get_file(fname_url_with_credentials) outputs.append((fname, content)) for (new_fname, content) in outputs: dest_file_location = computation_platform_settings['type']\ + "@" + os.path.join(relative_path_suffix, proc['id'], local_settings['smart_connector_input']) logger.debug("dest_file_location =%s" % dest_file_location) resched_file_location = "%s%s" % (output_prefix, os.path.join( self.job_dir, "input_backup", proc['id'])) logger.debug("resched_file_location=%s" % resched_file_location) put_dest_file(proc, new_fname, dest_file_location, resched_file_location, content) # then copy context new values file logger.debug("writing values file") values_dest_location = computation_platform_settings['type']\ + "@" + os.path.join(relative_path_suffix, proc['id'], local_settings['smart_connector_input'], self.VALUES_FNAME) logger.debug("values_dest_location =%s" % values_dest_location) values_dest_url = get_url_with_credentials( computation_platform_settings, values_dest_location, is_relative_path=True, ip_address=proc['ip_address']) storage.put_file(values_dest_url, json.dumps(context, indent=4))
def _upload_input_dir_variations(self, processes, local_settings, computation_platform_settings, output_storage_settings, mytardis_settings, input_dir, run_settings): output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) input_url_with_credentials = get_url_with_credentials( output_storage_settings, output_prefix + os.path.join(self.iter_inputdir, input_dir), is_relative_path=False) logger.debug('input_url_with_credentials=%s' % input_url_with_credentials) if local_settings['curate_data']: try: mytardis_platform = jobs.safe_import( 'chiminey.platform.mytardis.MyTardisPlatform', [], {}) self.experiment_id = mytardis_platform.create_dataset_for_input( self.experiment_id, run_settings, local_settings, output_storage_settings, mytardis_settings, input_url_with_credentials) except ImproperlyConfigured as e: logger.error("Cannot load mytardis platform hook %s" % e) else: logger.warn('Data curation is off') # get run Map parent_stage = self.import_parent_stage(run_settings) run_map, self.rand_index = parent_stage.get_internal_sweep_map( local_settings, run_settings=run_settings) # load value_map values_url_with_pkey = get_url_with_credentials( output_storage_settings, output_prefix + os.path.join(self.iter_inputdir, input_dir, self.VALUES_FNAME), is_relative_path=False) logger.debug("initial values_file=%s" % values_url_with_pkey) values = {} try: values_content = storage.get_file(values_url_with_pkey) except IOError: logger.warn("no values file found") else: logger.debug("values_content = %s" % values_content) values = dict(json.loads(values_content)) logger.debug("values=%s" % values) # generates a set of variations for the template fname logger.debug('self.initial_numbfile = %s ' % self.initial_numbfile) contexts = self._get_variation_contexts([run_map], values, self.initial_numbfile) self.initial_numbfile += len(contexts) logger.debug('contexts = %s ' % contexts) logger.debug('self.initial_numbfile = %s ' % self.initial_numbfile) # for each context, copy each file to dest and any # templates to be instantiated, then store in values. template_pat = re.compile("(.*)_template") relative_path_suffix = self.get_relative_output_path(local_settings) for context in contexts: logger.debug("context=%s" % context) # get list of all files in input_dir fname_url_with_pkey = get_url_with_credentials( output_storage_settings, output_prefix + os.path.join(self.iter_inputdir, input_dir), is_relative_path=False) input_files = storage.list_dirs(fname_url_with_pkey, list_files=True) # get process information run_counter = context['run_counter'] logger.debug("run_counter=%s" % run_counter) proc = None for p in processes: # TODO: how to handle invalid run_counter pid = int(p['id']) logger.debug("pid=%s" % pid) if pid == run_counter: proc = p break else: logger.error("no process found matching run_counter") raise BadInputException() logger.debug("proc=%s" % pformat(proc)) for fname in input_files: logger.debug("fname=%s" % fname) templ_mat = template_pat.match(fname) fname_url_with_credentials = storage.get_url_with_credentials( output_storage_settings, output_prefix + os.path.join(self.iter_inputdir, input_dir, fname), is_relative_path=False) logger.debug("fname_url_with_credentials=%s" % fname_url_with_credentials) def put_dest_file(proc, fname, dest_file_location, resched_file_location, content): dest_url = get_url_with_credentials( computation_platform_settings, os.path.join(dest_file_location, fname), is_relative_path=True, ip_address=proc['ip_address']) logger.debug("writing to =%s" % dest_url) #logger.debug("content=%s" % content) storage.put_file(dest_url, content) if self.reschedule_failed_procs: logger.debug("resched=%s" % resched_file_location) logger.debug("fname=%s" % fname) logger.debug("output_storage_settings=%s" % output_storage_settings) logger.debug("here") test = "%s/%s" % (resched_file_location, fname) logger.debug("test=%s" % test) resched_url = get_url_with_credentials( output_storage_settings, test) logger.debug("writing backup to %s" % resched_url) storage.put_file(resched_url, content) logger.debug("done") outputs = [] if templ_mat: base_fname = templ_mat.group(1) template_content = storage.get_file( fname_url_with_credentials) try: templ = Template(template_content) except TemplateSyntaxError, e: logger.error(e) # FIXME: should detect this during submission of job, # as no sensible way to recover here. # TODO: signal error conditions in job status continue new_context = Context(context) logger.debug("new_content=%s" % new_context) render_output = templ.render(new_context) render_output = render_output.encode('utf-8') outputs.append((base_fname, render_output)) outputs.append((fname, template_content)) else: content = storage.get_file(fname_url_with_credentials) outputs.append((fname, content)) for (new_fname, content) in outputs: dest_file_location = computation_platform_settings['type']\ + "@" + os.path.join(relative_path_suffix, proc['id'], local_settings['smart_connector_input']) logger.debug("dest_file_location =%s" % dest_file_location) resched_file_location = "%s%s" % ( output_prefix, os.path.join(self.job_dir, "input_backup", proc['id'])) logger.debug("resched_file_location=%s" % resched_file_location) put_dest_file(proc, new_fname, dest_file_location, resched_file_location, content) # then copy context new values file logger.debug("writing values file") values_dest_location = computation_platform_settings['type']\ + "@" + os.path.join(relative_path_suffix, proc['id'], local_settings['smart_connector_input'], self.VALUES_FNAME) logger.debug("values_dest_location =%s" % values_dest_location) values_dest_url = get_url_with_credentials( computation_platform_settings, values_dest_location, is_relative_path=True, ip_address=proc['ip_address']) storage.put_file(values_dest_url, json.dumps(context, indent=4))
class Bootstrap(Stage): """ Schedules processes on a cloud infrastructure """ def __init__(self, user_settings=None): logger.debug('Bootstrap stage initialised') def is_triggered(self, run_settings): try: self.created_nodes = ast.literal_eval( getval( run_settings, '%s/stages/create/created_nodes' % django_settings.SCHEMA_PREFIX)) running_created_nodes = [ x for x in self.created_nodes if str(x[3]) == 'running' ] logger.debug('running_created_nodes=%s' % running_created_nodes) if len(running_created_nodes) == 0: return False except (SettingNotFoundException, ValueError): return False try: self.bootstrapped_nodes = ast.literal_eval( getval( run_settings, '%s/stages/bootstrap/bootstrapped_nodes' % django_settings.SCHEMA_PREFIX)) logger.debug( 'bootstrapped nodes=%d, running created nodes = %d' % (len(self.bootstrapped_nodes), len(running_created_nodes))) return len(self.bootstrapped_nodes) < len(running_created_nodes) except (SettingNotFoundException, ValueError): self.bootstrapped_nodes = [] return True return False def process(self, run_settings): #messages.info(run_settings, "0: bootstrapping nodes") comp_pltf_settings = self.get_platform_settings( run_settings, '%s/platform/computation' % django_settings.SCHEMA_PREFIX) try: platform_type = comp_pltf_settings['platform_type'] except KeyError, e: logger.error(e) messages.error(run_settings, e) return # TODO: cache is as unlikely to change during execution for platform_hook in django_settings.PLATFORM_CLASSES: try: hook = jobs.safe_import(platform_hook, [], {}) except ImproperlyConfigured as e: logger.error("Cannot load platform hook %s" % e) continue logger.debug("hook=%s" % hook) logger.debug("hook.get_platform_types=%s" % hook.get_platform_types()) logger.debug("platform_type=%s" % platform_type) if platform_type in hook.get_platform_types(): self.strategy = hook.get_strategy(platform_type) logger.debug("self.strategy=%s" % self.strategy) break # if platform_type in ['nectar', 'csrack', 'amazon']: # self.strategy = strategies.CloudStrategy() # elif platform_type in ['nci']: # self.strategy = strategies.ClusterStrategy() local_settings = {} try: self.strategy.set_bootstrap_settings(run_settings, local_settings) local_settings.update(comp_pltf_settings) except SettingNotFoundException, e: logger.error(e) messages.error(run_settings, e) return
def process(self, run_settings): logger.debug("schedule processing") comp_pltf_settings = self.get_platform_settings( run_settings, '%s/platform/computation' % django_settings.SCHEMA_PREFIX) try: platform_type = comp_pltf_settings['platform_type'] except KeyError, e: logger.error(e) messages.error(run_settings, e) return # TODO: cache is as unlikely to change during execution for platform_hook in django_settings.PLATFORM_CLASSES: try: hook = jobs.safe_import(platform_hook, [], {}) except ImproperlyConfigured as e: logger.error("Cannot load platform hook %s" % e) continue logger.debug("hook=%s" % hook) logger.debug("hook.get_platform_types=%s" % hook.get_platform_types()) logger.debug("platform_type=%s" % platform_type) if platform_type in hook.get_platform_types(): self.strategy = hook.get_strategy(platform_type) logger.debug("self.strategy=%s" % self.strategy) break local_settings = {} try: self.strategy.set_schedule_settings(run_settings, local_settings) local_settings.update(comp_pltf_settings)
class Destroy(stage.Stage): def __init__(self, user_settings=None): logger.debug('Destroy stage initialised') def is_triggered(self, run_settings): try: converged = int(getval(run_settings, '%s/stages/converge/converged' % django_settings.SCHEMA_PREFIX)) logger.debug("converged=%s" % converged) except (ValueError, SettingNotFoundException) as e: return False if converged: try: run_finished = int(getval(run_settings, '%s/stages/destroy/run_finished' % django_settings.SCHEMA_PREFIX)) except (ValueError, SettingNotFoundException) as e: return True return not run_finished return False def process(self, run_settings): try: self.id = int(getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX)) except (SettingNotFoundException, ValueError): self.id = 0 try: self.created_nodes = ast.literal_eval(getval( run_settings, '%s/stages/create/created_nodes' % django_settings.SCHEMA_PREFIX)) except (SettingNotFoundException, ValueError): self.created_nodes = [] try: self.scheduled_nodes = ast.literal_eval(getval( run_settings, '%s/stages/schedule/scheduled_nodes' % django_settings.SCHEMA_PREFIX)) except (SettingNotFoundException, ValueError): self.scheduled_nodes = [] try: self.bootstrapped_nodes = ast.literal_eval(getval( run_settings, '%s/stages/bootstrap/bootstrapped_nodes' % django_settings.SCHEMA_PREFIX)) except (SettingNotFoundException, ValueError): self.bootstrapped_nodes = [] #messages.info(run_settings, "%d: destroy" % self.id) comp_pltf_settings = self.get_platform_settings( run_settings, '%s/platform/computation' % django_settings.SCHEMA_PREFIX) try: platform_type = comp_pltf_settings['platform_type'] except KeyError, e: logger.error(e) messages.error(run_settings, e) return # TODO: cache is as unlikely to change during execution for platform_hook in django_settings.PLATFORM_CLASSES: try: hook = jobs.safe_import(platform_hook, [], {}) except ImproperlyConfigured as e: logger.error("Cannot load platform hook %s" % e) continue logger.debug("hook=%s" % hook) logger.debug("hook.get_platform_types=%s" % hook.get_platform_types()) logger.debug("platform_type=%s" % platform_type) if platform_type in hook.get_platform_types(): self.strategy = hook.get_strategy(platform_type) logger.debug("self.strategy=%s" % self.strategy) break # if platform_type in ['nectar', 'csrack', 'amazon']: # self.strategy = strategies.CloudStrategy() # elif platform_type in ['nci']: # self.strategy = strategies.ClusterStrategy() local_settings = {} try: self.strategy.set_destroy_settings(run_settings, local_settings) local_settings.update(comp_pltf_settings) logger.debug('local_settings=%s' % local_settings) except SettingNotFoundException, e: logger.error(e) messages.error(run_settings, e) return
class Create(Stage): def __init__(self, user_settings=None): # self.group_id = '' self.platform_type = None logger.debug("Create stage initialized") def is_triggered(self, run_settings): """ Return True if configure done and no nodes are created """ try: configure_done = int(getval(run_settings, '%s/stages/configure/configure_done' % django_settings.SCHEMA_PREFIX)) except (SettingNotFoundException, ValueError): return False try: create_done = int(getval(run_settings, '%s/stages/create/create_done' % django_settings.SCHEMA_PREFIX)) if create_done: return False except (SettingNotFoundException, ValueError): pass try: self.created_nodes = ast.literal_eval(getval( run_settings, '%s/stages/create/created_nodes' % django_settings.SCHEMA_PREFIX)) except (SettingNotFoundException, ValueError): self.created_nodes = [] return True return False def process(self, run_settings): #messages.info(run_settings, "1: create") comp_pltf_settings = self.get_platform_settings( run_settings, '%s/platform/computation' % django_settings.SCHEMA_PREFIX) logger.debug("comp_pltf_settings=%s" % comp_pltf_settings) try: platform_type = comp_pltf_settings['platform_type'] except KeyError, e: logger.error(e) messages.error(run_settings, e) return logger.debug("platform_type=%s" % platform_type) # TODO: cache is as unlikely to change during execution for platform_hook in django_settings.PLATFORM_CLASSES: try: hook = jobs.safe_import(platform_hook, [], {}) except ImproperlyConfigured as e: logger.error("Cannot load platform hook %s" % e) continue logger.debug("hook=%s" % hook) logger.debug("hook.get_platform_types=%s" % hook.get_platform_types()) logger.debug("platform_type=%s" % platform_type) if platform_type in hook.get_platform_types(): self.strategy = hook.get_strategy(platform_type) logger.debug("self.strategy=%s" % self.strategy) break # if platform_type in ['nectar', 'csrack', 'amazon']: # self.strategy = strategies.CloudStrategy() # elif platform_type in ['nci']: # self.strategy = strategies.ClusterStrategy() local_settings = {} try: self.strategy.set_create_settings(run_settings, local_settings) local_settings.update(comp_pltf_settings) logger.debug('local_settings=%s' % local_settings) except SettingNotFoundException, e: logger.error(e) messages.error(run_settings, e) return
logger.debug("retrieved run_settings=%s" % pformat(run_settings)) # user_settings are r/w during execution, but original values # associated with UserProfile are unchanged as loaded once on # context creation. #user_settings = run_settings[models.UserProfile.PROFILE_SCHEMA_NS] # PROFILE_SCHEMA is now deprecated, so user_settings = {} triggered = 0 for current_stage in stageset: logger.debug("checking stage %s for trigger" % current_stage.name) # get the actual stage object try: stage = jobs.safe_import(current_stage.package, [], {'user_settings': deepcopy(user_settings)}) # obviously need to cache this except ImproperlyConfigured, e: logger.error(e) messages.error(run_settings, "0: internal error (%s stage):%s" % (str(current_stage.name), e)) raise logger.debug("process stage=%s", stage) task_run_settings = deepcopy(run_settings) logger.debug("starting task settings = %s" % pformat(task_run_settings)) # stage_settings are read only as transfered into context here stage_settings = current_stage.get_settings() logger.debug("stage_settings=%s" % stage_settings) # This is nasty task_run_settings = jobs.transfer(task_run_settings, stage_settings)
def process(self, run_settings): try: id = int( getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX)) except (SettingNotFoundException, ValueError): id = 0 messages.info(run_settings, '%d: transforming' % (id + 1)) # self.contextid = getval(run_settings, '%s/system/contextid' % django_settings.SCHEMA_PREFIX) bdp_username = getval( run_settings, '%s/bdp_userprofile/username' % django_settings.SCHEMA_PREFIX) output_storage_url = getval( run_settings, '%s/platform/storage/output/platform_url' % django_settings.SCHEMA_PREFIX) output_storage_settings = manage.get_platform_settings( output_storage_url, bdp_username) logger.debug("output_storage_settings=%s" % output_storage_settings) output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) offset = getval( run_settings, '%s/platform/storage/output/offset' % django_settings.SCHEMA_PREFIX) self.job_dir = manage.get_job_dir(output_storage_settings, offset) try: self.id = int( getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX)) self.output_dir = os.path.join( os.path.join(self.job_dir, "output_%s" % self.id)) self.input_dir = os.path.join( os.path.join(self.job_dir, "input_%d" % self.id)) self.new_input_dir = os.path.join( os.path.join(self.job_dir, "input_%d" % (self.id + 1))) except (SettingNotFoundException, ValueError): # FIXME: Not clear that this a valid path through stages self.output_dir = os.path.join(os.path.join( self.job_dir, "output")) self.output_dir = os.path.join(os.path.join(self.job_dir, "input")) self.new_input_dir = os.path.join( os.path.join(self.job_dir, "input_1")) logger.debug('self.output_dir=%s' % self.output_dir) try: self.experiment_id = int( getval( run_settings, '%s/input/mytardis/experiment_id' % django_settings.SCHEMA_PREFIX)) except SettingNotFoundException: self.experiment_id = 0 except ValueError: self.experiment_id = 0 output_url = get_url_with_credentials(output_storage_settings, output_prefix + self.output_dir, is_relative_path=False) # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(output_url) # fsys = storage.get_filesystem(output_url) # node_output_dirs, _ = fsys.listdir(mypath) # logger.debug("node_output_dirs=%s" % node_output_dirs) outputs = self.process_outputs(run_settings, self.job_dir, output_url, output_storage_settings, offset) try: curate_data = getval( run_settings, '%s/input/mytardis/curate_data' % django_settings.SCHEMA_PREFIX) except SettingNotFoundException: curate_data = 0 if curate_data: mytardis_url = getval( run_settings, '%s/input/mytardis/mytardis_platform' % django_settings.SCHEMA_PREFIX) mytardis_settings = manage.get_platform_settings( mytardis_url, bdp_username) all_settings = dict(mytardis_settings) all_settings.update(output_storage_settings) all_settings['contextid'] = getval( run_settings, '%s/system/contextid' % django_settings.SCHEMA_PREFIX) try: mytardis_platform = jobs.safe_import( 'chiminey.platform.mytardis.MyTardisPlatform', [], {}) logger.debug('self_outpus=%s' % outputs) self.experiment_id = mytardis_platform.create_dataset_for_intermediate_output( run_settings, self.experiment_id, self.job_dir, output_url, all_settings, outputs=outputs) except ImproperlyConfigured as e: logger.error("Cannot load mytardis platform hook %s" % e) else: logger.warn('Data curation is off')
def process(self, run_settings): try: id = int( getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX)) except (SettingNotFoundException, ValueError): id = 0 messages.info(run_settings, '%d: converging' % (id + 1)) def retrieve_local_settings(run_settings, local_settings): update( local_settings, run_settings # '%s/stages/setup/payload_source' % django_settings.SCHEMA_PREFIX, # '%s/stages/setup/payload_destination' % django_settings.SCHEMA_PREFIX, # '%s/system/platform' % django_settings.SCHEMA_PREFIX, # # '%s/stages/create/custom_prompt' % django_settings.SCHEMA_PREFIX, # # '%s/stages/create/cloud_sleep_interval' % django_settings.SCHEMA_PREFIX, # # '%s/stages/create/created_nodes' % django_settings.SCHEMA_PREFIX, # '%s/system/max_seed_int' % django_settings.SCHEMA_PREFIX, # '%s/input/system/cloud/number_vm_instances' % django_settings.SCHEMA_PREFIX, # '%s/input/hrmc/iseed' % django_settings.SCHEMA_PREFIX, # '%s/input/hrmc/optimisation_scheme' % django_settings.SCHEMA_PREFIX, # '%s/input/hrmc/threshold' % django_settings.SCHEMA_PREFIX, ) local_settings['bdp_username'] = getval( run_settings, '%s/bdp_userprofile/username' % django_settings.SCHEMA_PREFIX) local_settings = getvals(run_settings, models.UserProfile.PROFILE_SCHEMA_NS) retrieve_local_settings(run_settings, local_settings) bdp_username = local_settings['bdp_username'] # get output output_storage_url = getval( run_settings, '%s/platform/storage/output/platform_url' % django_settings.SCHEMA_PREFIX) output_storage_settings = manage.get_platform_settings( output_storage_url, bdp_username) output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) offset = getval( run_settings, '%s/platform/storage/output/offset' % django_settings.SCHEMA_PREFIX) job_dir = manage.get_job_dir(output_storage_settings, offset) # get mytardis #mytardis_url = getval(run_settings, '%s/input/mytardis/mytardis_platform' % django_settings.SCHEMA_PREFIX) #mytardis_settings = manage.get_platform_settings(mytardis_url, bdp_username) # setup new paths try: self.id = int( getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX)) self.output_dir = os.path.join(job_dir, "output_%d" % self.id) self.iter_inputdir = os.path.join(job_dir, "input_%d" % (self.id + 1)) #self.new_iter_inputdir = "input_%d" % (self.id + 1) except (SettingNotFoundException, ValueError): self.output_dir = os.path.join(job_dir, "output") self.iter_inputdir = os.path.join(job_dir, "input") self.id = 0 logger.debug('output_dir=%s iter_inputdir=%s' % (self.output_dir, self.iter_inputdir)) try: self.experiment_id = int( getval( run_settings, '%s/input/mytardis/experiment_id' % django_settings.SCHEMA_PREFIX)) except SettingNotFoundException: self.experiment_id = 0 except ValueError: self.experiment_id = 0 inputdir_url = get_url_with_credentials(output_storage_settings, output_prefix + self.iter_inputdir, is_relative_path=False) logger.debug('input_dir_url=%s' % inputdir_url) # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(inputdir_url) # fsys = storage.get_filesystem(inputdir_url) # logger.debug('mypath=%s' % mypath) # input_dirs, _ = fsys.listdir(mypath) # logger.debug('input_dirs=%s' % input_dirs) (self.done_iterating, self.criterion) = self.process_outputs(run_settings, job_dir, inputdir_url, output_storage_settings) if self.done_iterating: logger.debug("Total Iterations: %d" % self.id) # output_prefix = '%s://%s@' % (output_storage_settings['scheme'], # output_storage_settings['type']) # new_output_dir = os.path.join(base_dir, 'output') output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) # get source url iter_output_dir = os.path.join( os.path.join(job_dir, "output_%s" % self.id)) source_url = "%s%s" % (output_prefix, iter_output_dir) # get dest url new_output_dir = os.path.join(job_dir, 'output') dest_url = "%s%s" % (output_prefix, new_output_dir) source_url = get_url_with_credentials( output_storage_settings, output_prefix + os.path.join(iter_output_dir), is_relative_path=False) dest_url = get_url_with_credentials(output_storage_settings, output_prefix + os.path.join(new_output_dir), is_relative_path=False) storage.copy_directories(source_url, dest_url) # curate try: curate_data = getval( run_settings, '%s/input/mytardis/curate_data' % django_settings.SCHEMA_PREFIX) except SettingNotFoundException: curate_data = 0 if curate_data: mytardis_url = getval( run_settings, '%s/input/mytardis/mytardis_platform' % django_settings.SCHEMA_PREFIX) mytardis_settings = manage.get_platform_settings( mytardis_url, bdp_username) all_settings = dict(mytardis_settings) all_settings.update(output_storage_settings) logger.debug("source_url=%s" % source_url) logger.debug("dest_url=%s" % dest_url) logger.debug("job_dir=%s" % job_dir) try: mytardis_platform = jobs.safe_import( 'chiminey.platform.mytardis.MyTardisPlatform', [], {}) self.experiment_id = mytardis_platform.create_dataset_for_final_output( run_settings, self.experiment_id, job_dir, dest_url, all_settings) except ImproperlyConfigured as e: logger.error("Cannot load mytardis platform hook %s" % e) else: logger.warn('Data curation is off')