def _copy_previous_inputs(self, local_settings, output_storage_settings, computation_platform_settings): output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) for proc in self.ready_processes: source_location = os.path.join( self.job_dir, "input_backup", proc['id']) source_files_url = get_url_with_credentials(output_storage_settings, output_prefix + source_location, is_relative_path=False) relative_path_suffix = self.get_relative_output_path( local_settings) #dest_files_location = computation_platform_settings['type'] + "@"\ # + os.path.join( # local_settings['payload_destination'], # proc['id'], local_settings['process_output_dirname']) dest_files_location = computation_platform_settings['type'] + "@"\ + os.path.join(relative_path_suffix, proc['id'], local_settings['smart_connector_input']) logger.debug('dest_files_location=%s' % dest_files_location) dest_files_url = get_url_with_credentials( computation_platform_settings, dest_files_location, is_relative_path=True, ip_address=proc['ip_address']) logger.debug('dest_files_url=%s' % dest_files_url) storage.copy_directories(source_files_url, dest_files_url)
def _copy_previous_inputs(self, local_settings, output_storage_settings, computation_platform_settings): output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) for proc in self.ready_processes: source_location = os.path.join(self.job_dir, "input_backup", proc['id']) source_files_url = get_url_with_credentials( output_storage_settings, output_prefix + source_location, is_relative_path=False) relative_path_suffix = self.get_relative_output_path( local_settings) #dest_files_location = computation_platform_settings['type'] + "@"\ # + os.path.join( # local_settings['payload_destination'], # proc['id'], local_settings['payload_cloud_dirname']) dest_files_location = computation_platform_settings['type'] + "@"\ + os.path.join(relative_path_suffix, proc['id'], local_settings['payload_cloud_dirname']) logger.debug('dest_files_location=%s' % dest_files_location) dest_files_url = get_url_with_credentials( computation_platform_settings, dest_files_location, is_relative_path=True, ip_address=proc['ip_address']) logger.debug('dest_files_url=%s' % dest_files_url) storage.copy_directories(source_files_url, dest_files_url)
def copy_to_scratch_space(self, run_settings, local_settings): bdp_username = run_settings['http://rmit.edu.au/schemas/bdp_userprofile']['username'] output_storage_url = run_settings['http://rmit.edu.au/schemas/platform/storage/output']['platform_url'] output_storage_settings = manage.get_platform_settings(output_storage_url, bdp_username) run_settings['http://rmit.edu.au/schemas/platform/storage/output']['offset'] = self.output_loc_offset offset = run_settings['http://rmit.edu.au/schemas/platform/storage/output']['offset'] self.job_dir = manage.get_job_dir(output_storage_settings, offset) iter_inputdir = os.path.join(self.job_dir, "input_0") logger.debug("iter_inputdir=%s" % iter_inputdir) input_location = run_settings[ RMIT_SCHEMA + '/input/system']['input_location'] logger.debug("input_location=%s" % input_location) #todo: input location will evenatually be replaced by the scratch space that was used by the sweep #todo: the sweep will indicate the location of the scratch space in the run_settings #todo: add scheme (ssh) to inputlocation source_url = get_url_with_credentials(local_settings, input_location) logger.debug("source_url=%s" % source_url) destination_url = get_url_with_credentials( output_storage_settings, '%s://%s@%s' % (output_storage_settings['scheme'], output_storage_settings['type'], iter_inputdir), is_relative_path=False) logger.debug("destination_url=%s" % destination_url) storage.copy_directories(source_url, destination_url)
def _upload_variations_inputs(settings, source_url_initial, values_map): bdp_username = settings['bdp_username'] logger.debug("source_url_initial=%s" % source_url_initial) encoded_s_url = storage.get_url_with_credentials(settings, source_url_initial) logger.debug("encoded_s_url=%s" % encoded_s_url) dest_url = _get_dest_bdp_url(settings) computation_platform_url = settings['comp_platform_url'] bdp_username = settings['bdp_username'] comp_pltf_settings = manage.get_platform_settings( computation_platform_url, bdp_username) settings.update(comp_pltf_settings) encoded_d_url = storage.get_url_with_credentials(settings, dest_url, is_relative_path=True, ip_address=settings['host']) storage.copy_directories(encoded_s_url, encoded_d_url) for content_fname, content in _instantiate_context( source_url_initial, settings, values_map).items(): content_url = storage.get_url_with_credentials( settings, os.path.join(dest_url, content_fname), is_relative_path=True, ip_address=settings['host']) logger.debug("content_url=%s" % content_url) storage.put_file(content_url, content.encode('utf-8')) _save_values(settings, dest_url, values_map) logger.debug("done input upload")
def copy_to_scratch_space(self, run_settings, local_settings): bdp_username = run_settings[ 'http://rmit.edu.au/schemas/bdp_userprofile']['username'] output_storage_url = run_settings[ 'http://rmit.edu.au/schemas/platform/storage/output'][ 'platform_url'] output_storage_settings = manage.get_platform_settings( output_storage_url, bdp_username) run_settings['http://rmit.edu.au/schemas/platform/storage/output'][ 'offset'] = self.output_loc_offset offset = run_settings[ 'http://rmit.edu.au/schemas/platform/storage/output']['offset'] self.job_dir = manage.get_job_dir(output_storage_settings, offset) iter_inputdir = os.path.join(self.job_dir, "input_0") logger.debug("iter_inputdir=%s" % iter_inputdir) input_location = run_settings[RMIT_SCHEMA + '/input/system']['input_location'] logger.debug("input_location=%s" % input_location) #todo: input location will evenatually be replaced by the scratch space that was used by the sweep #todo: the sweep will indicate the location of the scratch space in the run_settings #todo: add scheme (ssh) to inputlocation source_url = get_url_with_credentials(local_settings, input_location) logger.debug("source_url=%s" % source_url) destination_url = get_url_with_credentials( output_storage_settings, '%s://%s@%s' % (output_storage_settings['scheme'], output_storage_settings['type'], iter_inputdir), is_relative_path=False) logger.debug("destination_url=%s" % destination_url) storage.copy_directories(source_url, destination_url)
def _get_output(self, local_settings, source_url): """ Retrieve the output from the task on the node """ logger.debug("get_output from %s" % source_url) computation_platform_url = local_settings['comp_platform_url'] bdp_username = local_settings['bdp_username'] comp_pltf_settings = manage.get_platform_settings( computation_platform_url, bdp_username) local_settings.update(comp_pltf_settings) encoded_s_url = storage.get_url_with_credentials( local_settings, source_url, is_relative_path=True, ip_address=local_settings['host']) (scheme, host, mypath, location, query_settings) = \ storage.parse_bdpurl(encoded_s_url) make_path = os.path.join(query_settings['root_path'], mypath) logger.debug("make_path=%s" % make_path) output_storage_url = local_settings['storeout_platform_url'] logger.debug("output_storage_url=%s" % output_storage_url) output_storage_settings = manage.get_platform_settings( output_storage_url, bdp_username) local_settings.update(output_storage_settings) logger.debug("output_storage_settings=%s" % output_storage_settings) dest_url = '%s://%s@%s/%s/make%s' % ( output_storage_settings['scheme'], output_storage_settings['type'], output_storage_settings['host'], local_settings['storeout_platform_offset'], str(local_settings['contextid'])) logger.debug("Transferring output from %s to %s" % (source_url, dest_url)) local_settings.update(output_storage_settings) encoded_d_url = storage.get_url_with_credentials( local_settings, dest_url) logger.debug("encoded_d_url=%s" % encoded_d_url) # FIXME: might want to turn on paramiko compress function #storage_files(encoded_d_url, exceptions=[]) # to speed up this transfer try: storage.copy_directories(encoded_s_url, encoded_d_url) except SSHException, e: logger.error(e) # FIXME: Could just exit, but need to flag that this data has not # been transferred. raise
def _get_output(self, local_settings, source_url): """ Retrieve the output from the task on the node """ logger.debug("get_output from %s" % source_url) computation_platform_url = local_settings['comp_platform_url'] bdp_username = local_settings['bdp_username'] comp_pltf_settings = manage.get_platform_settings( computation_platform_url, bdp_username) local_settings.update(comp_pltf_settings) encoded_s_url = storage.get_url_with_credentials( local_settings, source_url, is_relative_path=True, ip_address=local_settings['host']) (scheme, host, mypath, location, query_settings) = \ storage.parse_bdpurl(encoded_s_url) make_path = os.path.join(query_settings['root_path'], mypath) logger.debug("make_path=%s" % make_path) output_storage_url = local_settings['storeout_platform_url'] logger.debug("output_storage_url=%s" % output_storage_url) output_storage_settings = manage.get_platform_settings(output_storage_url, bdp_username) local_settings.update(output_storage_settings) logger.debug("output_storage_settings=%s" % output_storage_settings) dest_url = '%s://%s@%s/%s/make%s' % (output_storage_settings['scheme'], output_storage_settings['type'], output_storage_settings['host'], local_settings['storeout_platform_offset'], str(local_settings['contextid'])) logger.debug("Transferring output from %s to %s" % (source_url, dest_url)) local_settings.update(output_storage_settings) encoded_d_url = storage.get_url_with_credentials(local_settings, dest_url) logger.debug("encoded_d_url=%s" % encoded_d_url) # FIXME: might want to turn on paramiko compress function #storage_files(encoded_d_url, exceptions=[]) # to speed up this transfer try: storage.copy_directories(encoded_s_url, encoded_d_url) except SSHException, e: logger.error(e) # FIXME: Could just exit, but need to flag that this data has not # been transferred. raise
def copy_to_scratch_space(self, run_settings, local_settings, result_offset): bdp_username = run_settings['%s/bdp_userprofile' % django_settings.SCHEMA_PREFIX]['username'] output_storage_url = run_settings['%s/platform/storage/output' % django_settings.SCHEMA_PREFIX]['platform_url'] output_storage_settings = manage.get_platform_settings(output_storage_url, bdp_username) run_settings['%s/platform/storage/output' % django_settings.SCHEMA_PREFIX]['offset'] = self.output_loc_offset offset = run_settings['%s/platform/storage/output' % django_settings.SCHEMA_PREFIX]['offset'] self.job_dir = manage.get_job_dir(output_storage_settings, offset) iter_inputdir = os.path.join(self.job_dir, result_offset) logger.debug("iter_inputdir=%s" % iter_inputdir) input_storage_settings = self.get_platform_settings(run_settings, '%s/platform/storage/input' % django_settings.SCHEMA_PREFIX) #input_location = run_settings[django_settings.SCHEMA_PREFIX + '/input/system']['input_location'] try: input_location = getval(run_settings, django_settings.SCHEMA_PREFIX + '/input/system/input_location') except SettingNotFoundException: try: input_location = getval(run_settings, django_settings.SCHEMA_PREFIX + '/input/location/input_location') except: input_location = getval(run_settings, django_settings.SCHEMA_PREFIX + '/input/location/input/input_location') logger.debug("input_location=%s" % input_location) #todo: input location will evenatually be replaced by the scratch space that was used by the sweep #todo: the sweep will indicate the location of the scratch space in the run_settings #todo: add scheme (ssh) to inputlocation #source_url = get_url_with_credentials(local_settings, input_location) input_offset = run_settings['%s/platform/storage/input' % django_settings.SCHEMA_PREFIX]['offset'] input_url = "%s://%s@%s/%s" % (input_storage_settings['scheme'], input_storage_settings['type'], input_storage_settings['host'], input_offset) source_url = get_url_with_credentials( input_storage_settings, input_url, is_relative_path=False) logger.debug("source_url=%s" % source_url) destination_url = get_url_with_credentials( output_storage_settings, '%s://%s@%s' % (output_storage_settings['scheme'], output_storage_settings['type'], iter_inputdir), is_relative_path=False) logger.debug("destination_url=%s" % destination_url) storage.copy_directories(source_url, destination_url)
def get_output(self, ip_address, process_id, output_dir, local_settings, computation_platform_settings, output_storage_settings, run_settings): """ Retrieve the output from the task on the node """ logger.debug("get_output of process %s on %s" % (process_id, ip_address)) output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) #fixme: add call get_process_output_path #cloud_path = os.path.join(local_settings['payload_destination'], # #str(contextid), #fixme: uncomment # str(process_id), # local_settings['payload_cloud_dirname'] # ) relative_path_suffix = self.get_relative_output_path(local_settings) cloud_path = os.path.join(relative_path_suffix, str(process_id), local_settings['payload_cloud_dirname']) #cloud_path = self.get_process_output_path(run_settings, process_id) logger.debug("cloud_path=%s" % cloud_path) logger.debug("Transferring output from %s to %s" % (cloud_path, output_dir)) ip = ip_address # botocloudconnector.get_instance_ip(instance_id, settings) #ssh = open_connection(ip_address=ip, settings=settings) source_files_location = "%s://%s@%s" % ( computation_platform_settings['scheme'], computation_platform_settings['type'], os.path.join( ip, cloud_path)) source_files_url = get_url_with_credentials( computation_platform_settings, source_files_location, is_relative_path=False) logger.debug('source_files_url=%s' % source_files_url) dest_files_url = get_url_with_credentials( output_storage_settings, output_prefix + os.path.join(self.job_dir, self.output_dir, process_id), is_relative_path=False) logger.debug('dest_files_url=%s' % dest_files_url) # FIXME: might want to turn on paramiko compress function # to speed up this transfer storage.copy_directories(source_files_url, dest_files_url)
def get_output(self, ip_address, process_id, output_dir, local_settings, computation_platform_settings, output_storage_settings, run_settings): """ Retrieve the output from the task on the node """ logger.debug("get_output of process %s on %s" % (process_id, ip_address)) output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) #fixme: add call get_process_output_path #cloud_path = os.path.join(local_settings['payload_destination'], # #str(contextid), #fixme: uncomment # str(process_id), # local_settings['payload_cloud_dirname'] # ) relative_path_suffix = self.get_relative_output_path(local_settings) cloud_path = os.path.join(relative_path_suffix, str(process_id), local_settings['payload_cloud_dirname'] ) #cloud_path = self.get_process_output_path(run_settings, process_id) logger.debug("cloud_path=%s" % cloud_path) logger.debug("Transferring output from %s to %s" % (cloud_path, output_dir)) ip = ip_address # botocloudconnector.get_instance_ip(instance_id, settings) #ssh = open_connection(ip_address=ip, settings=settings) source_files_location = "%s://%s@%s" % (computation_platform_settings['scheme'], computation_platform_settings['type'], os.path.join(ip, cloud_path)) source_files_url = get_url_with_credentials( computation_platform_settings, source_files_location, is_relative_path=False) logger.debug('source_files_url=%s' % source_files_url) dest_files_url = get_url_with_credentials( output_storage_settings, output_prefix + os.path.join( self.job_dir, self.output_dir, process_id), is_relative_path=False) logger.debug('dest_files_url=%s' % dest_files_url) # FIXME: might want to turn on paramiko compress function # to speed up this transfer storage.copy_directories(source_files_url, dest_files_url)
def _start_bootstrap(instance, ip, settings, source, destination): """ Start the task on the instance, then return """ logger.info("run_task %s" % str(instance)) copy_directories(source, destination) makefile_path = get_make_path(destination) # TODO, FIXME: need to have timeout for yum install make # and then test can access, otherwise, loop. install_make = 'yum install -y make' command_out = '' errs = '' logger.debug("starting command for %s" % ip) ssh = '' try: ssh = open_connection(ip_address=ip, settings=settings) command_out, errs = run_command_with_status(ssh, install_make) logger.debug("command_out1=(%s, %s)" % (command_out, errs)) run_make(ssh, makefile_path, 'start_bootstrap') except Exception, e:#fixme: consider using reliability framework logger.error(e) raise
def _start_bootstrap(instance, ip, settings, source, destination): """ Start the task on the instance, then return """ logger.info("run_task %s" % str(instance)) copy_directories(source, destination) makefile_path = get_make_path(destination) # TODO, FIXME: need to have timeout for yum install make # and then test can access, otherwise, loop. install_make = 'yum install -y make' command_out = '' errs = '' logger.debug("starting command for %s" % ip) ssh = '' try: ssh = open_connection(ip_address=ip, settings=settings) command_out, errs = run_command_with_status(ssh, install_make) logger.debug("command_out1=(%s, %s)" % (command_out, errs)) run_make(ssh, makefile_path, 'start_bootstrap') except Exception, e: #fixme: consider using reliability framework logger.error(e) raise
def process(self, run_settings): try: id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA)) except (SettingNotFoundException, ValueError): id = 0 messages.info(run_settings, '%d: converging' % (id+1)) def retrieve_local_settings(run_settings, local_settings): update(local_settings, run_settings # '%s/stages/setup/payload_source' % RMIT_SCHEMA, # '%s/stages/setup/payload_destination' % RMIT_SCHEMA, # '%s/system/platform' % RMIT_SCHEMA, # # '%s/stages/create/custom_prompt' % RMIT_SCHEMA, # # '%s/stages/create/cloud_sleep_interval' % RMIT_SCHEMA, # # '%s/stages/create/created_nodes' % RMIT_SCHEMA, # '%s/stages/run/payload_cloud_dirname' % RMIT_SCHEMA, # '%s/system/max_seed_int' % RMIT_SCHEMA, # '%s/stages/run/compile_file' % RMIT_SCHEMA, # '%s/stages/run/retry_attempts' % RMIT_SCHEMA, # '%s/input/system/cloud/number_vm_instances' % RMIT_SCHEMA, # '%s/input/hrmc/iseed' % RMIT_SCHEMA, # '%s/input/hrmc/optimisation_scheme' % RMIT_SCHEMA, # '%s/input/hrmc/threshold' % RMIT_SCHEMA, ) local_settings['bdp_username'] = getval(run_settings, '%s/bdp_userprofile/username' % RMIT_SCHEMA) local_settings = getvals(run_settings, models.UserProfile.PROFILE_SCHEMA_NS) retrieve_local_settings(run_settings, local_settings) bdp_username = local_settings['bdp_username'] # get output output_storage_url = getval(run_settings, '%s/platform/storage/output/platform_url' % RMIT_SCHEMA) output_storage_settings = manage.get_platform_settings(output_storage_url, bdp_username) output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) offset = getval(run_settings, '%s/platform/storage/output/offset' % RMIT_SCHEMA) job_dir = manage.get_job_dir(output_storage_settings, offset) # get mytardis #mytardis_url = getval(run_settings, '%s/input/mytardis/mytardis_platform' % RMIT_SCHEMA) #mytardis_settings = manage.get_platform_settings(mytardis_url, bdp_username) # setup new paths try: self.id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA)) self.output_dir = os.path.join(job_dir, "output_%d" % self.id) self.iter_inputdir = os.path.join(job_dir, "input_%d" % (self.id + 1)) #self.new_iter_inputdir = "input_%d" % (self.id + 1) except (SettingNotFoundException, ValueError): self.output_dir = os.path.join(job_dir, "output") self.iter_inputdir = os.path.join(job_dir, "input") self.id = 0 logger.debug('output_dir=%s iter_inputdir=%s' % (self.output_dir, self.iter_inputdir)) try: self.experiment_id = int(getval(run_settings, '%s/input/mytardis/experiment_id' % RMIT_SCHEMA)) except SettingNotFoundException: self.experiment_id = 0 except ValueError: self.experiment_id = 0 inputdir_url = get_url_with_credentials(output_storage_settings, output_prefix + self.iter_inputdir, is_relative_path=False) logger.debug('input_dir_url=%s' % inputdir_url) # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(inputdir_url) # fsys = storage.get_filesystem(inputdir_url) # logger.debug('mypath=%s' % mypath) # input_dirs, _ = fsys.listdir(mypath) # logger.debug('input_dirs=%s' % input_dirs) (self.done_iterating, self.criterion) = self.process_outputs(run_settings, job_dir, inputdir_url, output_storage_settings) if self.done_iterating: logger.debug("Total Iterations: %d" % self.id) # output_prefix = '%s://%s@' % (output_storage_settings['scheme'], # output_storage_settings['type']) # new_output_dir = os.path.join(base_dir, 'output') output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) # get source url iter_output_dir = os.path.join(os.path.join(job_dir, "output_%s" % self.id)) source_url = "%s%s" % (output_prefix, iter_output_dir) # get dest url new_output_dir = os.path.join(job_dir, 'output') dest_url = "%s%s" % (output_prefix, new_output_dir) source_url = get_url_with_credentials(output_storage_settings, output_prefix + os.path.join(iter_output_dir), is_relative_path=False) dest_url = get_url_with_credentials(output_storage_settings, output_prefix + os.path.join(new_output_dir), is_relative_path=False) storage.copy_directories(source_url, dest_url) # curate try: curate_data = getval(run_settings, '%s/input/mytardis/curate_data' % RMIT_SCHEMA) except SettingNotFoundException: curate_data = 0 if curate_data: mytardis_url = getval(run_settings, '%s/input/mytardis/mytardis_platform' % RMIT_SCHEMA) mytardis_settings = manage.get_platform_settings(mytardis_url, bdp_username) all_settings = dict(mytardis_settings) all_settings.update(output_storage_settings) logger.debug("source_url=%s" % source_url) logger.debug("dest_url=%s" % dest_url) logger.debug("job_dir=%s" % job_dir) self.experiment_id = self.curate_dataset(run_settings, self.experiment_id, job_dir, dest_url, all_settings) else: logger.warn('Data curation is off')
def upload_variation_inputs(self, run_settings, local_settings, variations, processes, input_dir, output_storage_settings, computation_platform_settings, mytardis_settings): ''' Create input packages for each variation and upload the vms ''' logger.debug("upload_variation_inputs") output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) source_files_url = get_url_with_credentials( output_storage_settings, output_prefix + os.path.join( self.iter_inputdir, input_dir), is_relative_path=False) logger.debug('source_files_url=%s' % source_files_url) # Copy input directory to mytardis only after saving locally, so if # something goes wrong we still have the results if local_settings['curate_data']: self.experiment_id = self.curate_data(self.experiment_id, local_settings, output_storage_settings, mytardis_settings, source_files_url) else: logger.warn('Data curation is off') #proc_ind = 0 for var_fname in variations.keys(): logger.debug("var_fname=%s" % var_fname) logger.debug('variations[var_fname]=%s' % variations[var_fname]) for var_content, values in variations[var_fname]: #logger.debug("var_content = %s" % var_content) #logger.debug('proc_ind=%s' % proc_ind) logger.debug('processes=%s' % processes) run_counter = values['run_counter'] logger.debug("run_counter=%s" % run_counter) proc = None for p in processes: # TODO: how to handle invalid run_counter pid = int(p['id']) logger.debug("pid=%s" % pid) if pid == run_counter: proc = p break else: logger.error("no process found matching run_counter") #smartconnectorscheduler.error(run_settings, "%s: wait" % (self.id + 1)) # TODO: catch this error and recover raise BadInputException() logger.debug("proc=%s" % pformat(proc)) #proc = processes[proc_ind] #proc_ind += 1 #ip = botocloudconnector.get_instance_ip(var_node.id, local_settings) ip = proc['ip_address'] #dest_files_location = computation_platform_settings['type'] + "@"\ # + os.path.join(local_settings['payload_destination'], # proc['id'], # local_settings['payload_cloud_dirname'] # ) relative_path_suffix = self.get_relative_output_path(local_settings) dest_files_location = computation_platform_settings['type'] + "@"\ + os.path.join(relative_path_suffix, proc['id'], local_settings['payload_cloud_dirname'] ) logger.debug('dest_files_location=%s' % dest_files_location) dest_files_url = get_url_with_credentials( computation_platform_settings, dest_files_location, is_relative_path=True, ip_address=ip) logger.debug('dest_files_url=%s' % dest_files_url) # FIXME: Cleanup any existing runs already there # FIXME: keep the compile exec from setup #FIXME: exceptions should be given as parameter #FIXme we should not delete anyfile. SInce each process runs in its own directory exceptions = [local_settings['compile_file'], "..", ".", 'PSD', 'PSD.f', 'PSD_exp.dat', 'PSD.inp', 'Makefile', 'running.sh', 'process_scheduledone.sh', 'process_schedulestart.sh'] storage.copy_directories(source_files_url, dest_files_url) if self.reschedule_failed_procs: input_backup = os.path.join(self.job_dir, "input_backup", proc['id']) backup_url = get_url_with_credentials( output_storage_settings, output_prefix + input_backup, is_relative_path=False) storage.copy_directories(source_files_url, backup_url) # Why do we need to create a tempory file to make this copy? import uuid randsuffix = unicode(uuid.uuid4()) # should use some job id here var_url = get_url_with_credentials(local_settings, os.path.join("tmp%s" % randsuffix, "var"), is_relative_path=True) logger.debug("var_url=%s" % var_url) storage.put_file(var_url, var_content.encode('utf-8')) value_url = get_url_with_credentials(local_settings, os.path.join("tmp%s" % randsuffix, "value"), is_relative_path=True) logger.debug("value_url=%s" % value_url) storage.put_file(value_url, json.dumps(values)) #local_settings['platform'] should be replaced # and overwrite on the remote #var_fname_remote = computation_platform_settings['type']\ # + "@" + os.path.join(local_settings['payload_destination'], # proc['id'], # local_settings['payload_cloud_dirname'], # var_fname) var_fname_remote = computation_platform_settings['type']\ + "@" + os.path.join(relative_path_suffix, proc['id'], local_settings['payload_cloud_dirname'], var_fname) var_fname_pkey = get_url_with_credentials( computation_platform_settings, var_fname_remote, is_relative_path=True, ip_address=ip) var_content = storage.get_file(var_url) storage.put_file(var_fname_pkey, var_content) logger.debug("var_fname_pkey=%s" % var_fname_pkey) values_fname_pkey = get_url_with_credentials( computation_platform_settings, os.path.join(dest_files_location, "%s_values" % var_fname), is_relative_path=True, ip_address=ip) values_content = storage.get_file(value_url) storage.put_file(values_fname_pkey, values_content) logger.debug("values_fname_pkey=%s" % values_fname_pkey) #copying values and var_content to backup folder if self.reschedule_failed_procs: value_url = get_url_with_credentials( output_storage_settings, output_prefix + os.path.join(input_backup, "%s_values" % var_fname), is_relative_path=False) logger.debug("value_url=%s" % value_url) storage.put_file(value_url, json.dumps(values)) var_fname_pkey = get_url_with_credentials( output_storage_settings, output_prefix + os.path.join(input_backup, var_fname), is_relative_path=False) var_content = storage.get_file(var_url) storage.put_file(var_fname_pkey, var_content) # cleanup tmp_url = get_url_with_credentials(local_settings, os.path.join("tmp%s" % randsuffix), is_relative_path=True) logger.debug("deleting %s" % tmp_url)
logger.debug("run_counter=%s" % run_counter) run_inputdir = os.path.join( self.scratch_platform, SUBDIRECTIVE_DIR % {'run_counter': str(run_counter)}, FIRST_ITERATION_DIR, ) logger.debug("run_inputdir=%s" % run_inputdir) run_iter_url = get_url_with_credentials(local_settings, run_inputdir, is_relative_path=False) logger.debug("run_iter_url=%s" % run_iter_url) # Duplicate any input_directory into runX duplicates if input_loc: logger.debug("context=%s" % context) copy_directories(input_url, run_iter_url) # Need to load up existing values, because original input_dir could # have contained values for the whole run # This code is deprecated in favour of single values file. self.error_detected = False # try: # template_name = getval(run_settings, # '%s/stages/sweep/template_name' # % django_settings.SCHEMA_PREFIX) # except SettingNotFoundException: # pass # else: # logger.debug("template_name=%s" % template_name) # v_map = {}
class Sweep(Stage): def __init__(self, user_settings=None): self.numbfile = 0 logger.debug("Sweep stage initialized") def is_triggered(self, run_settings): logger.debug('run_settings=%s' % run_settings) try: configure_done = int(getval(run_settings, '%s/stages/sweep/sweep_done' % RMIT_SCHEMA)) except (ValueError, SettingNotFoundException): return True return not configure_done def _get_sweep_name(self, run_settings): try: sweep_name = getval(run_settings, '%s/directive_profile/sweep_name' % RMIT_SCHEMA) except SettingNotFoundException: sweep_name = 'unknown_sweep' return sweep_name def process(self, run_settings): logger.debug('run_settings=%s' % run_settings) # Need to make copy because we pass on run_settings to sub connector # so any changes we make here to run_settings WILL be inherited def make_local_settings(run_settings): from copy import deepcopy local_settings = deepcopy(getvals(run_settings, models.UserProfile.PROFILE_SCHEMA_NS)) update(local_settings, run_settings, RMIT_SCHEMA + '/system/platform', # RMIT_SCHEMA + '/input/mytardis/experiment_id', # RMIT_SCHEMA + '/system/random_numbers', ) local_settings['bdp_username'] = getval( run_settings, '%s/bdp_userprofile/username' % RMIT_SCHEMA) return local_settings local_settings = make_local_settings(run_settings) logger.debug('local_settings=%s' % local_settings) setval(run_settings, '%s/platform/computation/platform_url' % RMIT_SCHEMA, getval(run_settings, '%s/input/system/compplatform/computation_platform' % RMIT_SCHEMA)) def _parse_output_location(run_settings, location): loc_list = location.split('/') name = loc_list[0] offset = '' if len(loc_list) > 1: offset = os.path.join(*loc_list[1:]) logger.debug('offset=%s' % offset) return name, offset contextid = int(getval(run_settings, '%s/system/contextid' % RMIT_SCHEMA)) logger.debug("contextid=%s" % contextid) sweep_name = self._get_sweep_name(run_settings) logger.debug("sweep_name=%s" % sweep_name) output_loc = self.output_exists(run_settings) location = "" if output_loc: location = getval(run_settings, output_loc) output_storage_name, output_storage_offset = \ _parse_output_location(run_settings, location) setval(run_settings, '%s/platform/storage/output/platform_url' % RMIT_SCHEMA, output_storage_name) setval(run_settings, '%s/platform/storage/output/offset' % RMIT_SCHEMA, os.path.join(output_storage_offset, '%s%s' % (sweep_name, contextid))) def _parse_input_location(run_settings, location): loc_list = location.split('/') name = loc_list[0] offset = '' if len(loc_list) > 1: offset = os.path.join(*loc_list[1:]) logger.debug('offset=%s' % offset) return (name, offset) input_loc = self.input_exists(run_settings) if input_loc: location = getval(run_settings, input_loc) input_storage_name, input_storage_offset = \ _parse_input_location(run_settings, location) setval(run_settings, '%s/platform/storage/input/platform_url' % RMIT_SCHEMA, input_storage_name) # store offsets setval(run_settings, '%s/platform/storage/input/offset' % RMIT_SCHEMA, input_storage_offset) # TODO: replace with scratch space computation platform space self.scratch_platform = '%s%s%s' % ( manage.get_scratch_platform(), sweep_name, contextid) # mytardis if output_loc: try: self.experiment_id = int(getval(run_settings, '%s/input/mytardis/experiment_id' % RMIT_SCHEMA)) except KeyError: self.experiment_id = 0 except ValueError: self.experiment_id = 0 try: curate_data = getval(run_settings, '%s/input/mytardis/curate_data' % RMIT_SCHEMA) except SettingNotFoundException: curate_data = False if curate_data: self.experiment_id = self.curate_data(run_settings, location, self.experiment_id) setval(run_settings, '%s/input/mytardis/experiment_id' % RMIT_SCHEMA, str(self.experiment_id)) # generate all variations map_text = getval(run_settings, '%s/input/sweep/sweep_map' % RMIT_SCHEMA) # map_text = run_settings[RMIT_SCHEMA + '/input/sweep']['sweep_map'] sweep_map = json.loads(map_text) logger.debug("sweep_map=%s" % pformat(sweep_map)) runs = _expand_variations(maps=[sweep_map], values={}) logger.debug("runs=%s" % runs) # Create random numbers if needed # TODO: move iseed out of hrmc into separate generic schema # to use on any sweepable connector and make this function # completely hrmc independent. rands = [] try: self.rand_index = getval(run_settings, '%s/input/hrmc/iseed' % RMIT_SCHEMA) logger.debug("rand_index=%s" % self.rand_index) except SettingNotFoundException: pass else: # prep random seeds for each run based off original iseed # FIXME: inefficient for large random file # TODO, FIXME: this is potentially problematic if different # runs end up overlapping in the random numbers they utilise. # solution is to have separate random files per run or partition # big file up. try: num_url = getval(run_settings, "%s/system/random_numbers" % RMIT_SCHEMA) logger.debug('num_url=%s' % num_url) except SettingNotFoundException: pass else: try: local_settings['random_numbers'] = num_url rands = generate_rands(settings=local_settings, start_range=0, end_range=-1, num_required=len(runs), start_index=self.rand_index) logger.debug("rands=%s" % rands) except Exception, e: logger.debug('error') logger.error(e) raise # load initial values map in the input directory which # contains variable to use for all subdirectives starting_map = {} if input_loc: input_storage_settings = self.get_platform_settings( run_settings, 'http://rmit.edu.au/schemas/platform/storage/input') try: input_prefix = '%s://%s@' % (input_storage_settings['scheme'], input_storage_settings['type']) values_url = get_url_with_credentials( input_storage_settings, input_prefix + os.path.join(input_storage_settings['ip_address'], input_storage_offset, "initial", VALUES_MAP_FILE), is_relative_path=False) logger.debug("values_url=%s" % values_url) values_e_url = get_url_with_credentials( local_settings, values_url, is_relative_path=False) logger.debug("values_url=%s" % values_e_url) values_content = get_file(values_e_url) logger.debug("values_content=%s" % values_content) starting_map = dict(json.loads(values_content)) except IOError: logger.warn("no starting values file found") except ValueError: logger.error("problem parsing contents of %s" % VALUES_MAP_FILE) pass logger.debug("starting_map after initial values=%s" % pformat(starting_map)) # Copy form input values info starting map # FIXME: could have name collisions between form inputs and # starting values. for ns in run_settings: if ns.startswith(RMIT_SCHEMA + "/input"): # for k, v in run_settings[ns].items(): for k, v in getvals(run_settings, ns).items(): starting_map[k] = v logger.debug("starting_map after form=%s" % pformat(starting_map)) # FIXME: we assume we will always have input directory # Get input_url directory input_url = "" if input_loc: input_prefix = '%s://%s@' % (input_storage_settings['scheme'], input_storage_settings['type']) input_url = get_url_with_credentials(input_storage_settings, input_prefix + os.path.join(input_storage_settings['ip_address'], input_storage_offset), is_relative_path=False) logger.debug("input_url=%s" % input_url) current_context = models.Context.objects.get(id=contextid) user = current_context.owner.user.username # For each of the generated runs, copy across initial input # to individual input directories with variation values, # and then schedule subrun of sub directive logger.debug("run_settings=%s" % run_settings) for i, context in enumerate(runs): run_counter = int(context['run_counter']) logger.debug("run_counter=%s" % run_counter) run_inputdir = os.path.join(self.scratch_platform, SUBDIRECTIVE_DIR % {'run_counter': str(run_counter)}, FIRST_ITERATION_DIR,) logger.debug("run_inputdir=%s" % run_inputdir) run_iter_url = get_url_with_credentials(local_settings, run_inputdir, is_relative_path=False) logger.debug("run_iter_url=%s" % run_iter_url) # Duplicate any input_directory into runX duplicates if input_loc: logger.debug("context=%s" % context) logger.debug("systemsettings=%s" % pformat(getvals(run_settings, RMIT_SCHEMA + '/input/system'))) copy_directories(input_url, run_iter_url) # Need to load up existing values, because original input_dir could # have contained values for the whole run # This code is deprecated in favour of single values file. self.error_detected = False try: template_name = getval(run_settings, '%s/stages/sweep/template_name' % RMIT_SCHEMA) except SettingNotFoundException: pass else: logger.debug("template_name=%s" % template_name) v_map = {} try: values_url = get_url_with_credentials( local_settings, os.path.join(run_inputdir, "initial", VALUES_MAP_TEMPLATE_FILE % {'template_name': template_name}), is_relative_path=False) logger.debug("values_url=%s" % values_url) values_content = get_file(values_url) logger.debug("values_content=%s" % values_content) v_map = dict(json.loads(values_content), indent=4) except IOError: logger.warn("no values file found") except ValueError: logger.error("problem parsing contents of %s" % VALUES_MAP_FILE) pass v_map.update(starting_map) v_map.update(context) logger.debug("new v_map=%s" % v_map) put_file(values_url, json.dumps(v_map, indent=4)) v_map = {} try: values_url = get_url_with_credentials( local_settings, os.path.join(run_inputdir, "initial", VALUES_MAP_FILE), is_relative_path=False) logger.debug("values_url=%s" % values_url) values_content = get_file(values_url) logger.debug("values_content=%s" % values_content) v_map = dict(json.loads(values_content), ) except IOError: logger.warn("no values file found") except ValueError: logger.error("problem parsing contents of %s" % VALUES_MAP_FILE) pass v_map.update(starting_map) v_map.update(context) logger.debug("new v_map=%s" % v_map) put_file(values_url, json.dumps(v_map, indent=4)) # Set random numbers for subdirective logger.debug("run_settings=%s" % pformat(run_settings)) if rands: setval(run_settings, '%s/input/hrmc/iseed' % RMIT_SCHEMA, rands[i]) if input_loc: # Set revised input_location for subdirective setval(run_settings, input_loc, "%s/%s/%s" % (self.scratch_platform, SUBDIRECTIVE_DIR % {'run_counter': str(run_counter)}, FIRST_ITERATION_DIR)) # Redirect input run_input_storage_name, run_input_storage_offset = \ _parse_input_location(run_settings, "local/sweep%s/run%s/input_0" % (contextid, run_counter)) # setval(run_settings, # '%s/platform/storage/input/platform_url' % RMIT_SCHEMA, # run_input_storage_name) # setval(run_settings, # '%s/platform/storage/input/offset' % RMIT_SCHEMA, # run_input_storage_offset) logger.debug("run_settings=%s" % pformat(run_settings)) try: _submit_subdirective("nectar", run_settings, user, current_context) except Exception, e: logger.error(e) raise e