def copy_to_scratch_space(self, run_settings, local_settings): bdp_username = run_settings['http://rmit.edu.au/schemas/bdp_userprofile']['username'] output_storage_url = run_settings['http://rmit.edu.au/schemas/platform/storage/output']['platform_url'] output_storage_settings = manage.get_platform_settings(output_storage_url, bdp_username) run_settings['http://rmit.edu.au/schemas/platform/storage/output']['offset'] = self.output_loc_offset offset = run_settings['http://rmit.edu.au/schemas/platform/storage/output']['offset'] self.job_dir = manage.get_job_dir(output_storage_settings, offset) iter_inputdir = os.path.join(self.job_dir, "input_0") logger.debug("iter_inputdir=%s" % iter_inputdir) input_location = run_settings[ RMIT_SCHEMA + '/input/system']['input_location'] logger.debug("input_location=%s" % input_location) #todo: input location will evenatually be replaced by the scratch space that was used by the sweep #todo: the sweep will indicate the location of the scratch space in the run_settings #todo: add scheme (ssh) to inputlocation source_url = get_url_with_credentials(local_settings, input_location) logger.debug("source_url=%s" % source_url) destination_url = get_url_with_credentials( output_storage_settings, '%s://%s@%s' % (output_storage_settings['scheme'], output_storage_settings['type'], iter_inputdir), is_relative_path=False) logger.debug("destination_url=%s" % destination_url) storage.copy_directories(source_url, destination_url)
def copy_to_scratch_space(self, run_settings, local_settings): bdp_username = run_settings[ 'http://rmit.edu.au/schemas/bdp_userprofile']['username'] output_storage_url = run_settings[ 'http://rmit.edu.au/schemas/platform/storage/output'][ 'platform_url'] output_storage_settings = manage.get_platform_settings( output_storage_url, bdp_username) run_settings['http://rmit.edu.au/schemas/platform/storage/output'][ 'offset'] = self.output_loc_offset offset = run_settings[ 'http://rmit.edu.au/schemas/platform/storage/output']['offset'] self.job_dir = manage.get_job_dir(output_storage_settings, offset) iter_inputdir = os.path.join(self.job_dir, "input_0") logger.debug("iter_inputdir=%s" % iter_inputdir) input_location = run_settings[RMIT_SCHEMA + '/input/system']['input_location'] logger.debug("input_location=%s" % input_location) #todo: input location will evenatually be replaced by the scratch space that was used by the sweep #todo: the sweep will indicate the location of the scratch space in the run_settings #todo: add scheme (ssh) to inputlocation source_url = get_url_with_credentials(local_settings, input_location) logger.debug("source_url=%s" % source_url) destination_url = get_url_with_credentials( output_storage_settings, '%s://%s@%s' % (output_storage_settings['scheme'], output_storage_settings['type'], iter_inputdir), is_relative_path=False) logger.debug("destination_url=%s" % destination_url) storage.copy_directories(source_url, destination_url)
def copy_to_scratch_space(self, run_settings, local_settings, result_offset): bdp_username = run_settings['%s/bdp_userprofile' % django_settings.SCHEMA_PREFIX]['username'] output_storage_url = run_settings['%s/platform/storage/output' % django_settings.SCHEMA_PREFIX]['platform_url'] output_storage_settings = manage.get_platform_settings(output_storage_url, bdp_username) run_settings['%s/platform/storage/output' % django_settings.SCHEMA_PREFIX]['offset'] = self.output_loc_offset offset = run_settings['%s/platform/storage/output' % django_settings.SCHEMA_PREFIX]['offset'] self.job_dir = manage.get_job_dir(output_storage_settings, offset) iter_inputdir = os.path.join(self.job_dir, result_offset) logger.debug("iter_inputdir=%s" % iter_inputdir) input_storage_settings = self.get_platform_settings(run_settings, '%s/platform/storage/input' % django_settings.SCHEMA_PREFIX) #input_location = run_settings[django_settings.SCHEMA_PREFIX + '/input/system']['input_location'] try: input_location = getval(run_settings, django_settings.SCHEMA_PREFIX + '/input/system/input_location') except SettingNotFoundException: try: input_location = getval(run_settings, django_settings.SCHEMA_PREFIX + '/input/location/input_location') except: input_location = getval(run_settings, django_settings.SCHEMA_PREFIX + '/input/location/input/input_location') logger.debug("input_location=%s" % input_location) #todo: input location will evenatually be replaced by the scratch space that was used by the sweep #todo: the sweep will indicate the location of the scratch space in the run_settings #todo: add scheme (ssh) to inputlocation #source_url = get_url_with_credentials(local_settings, input_location) input_offset = run_settings['%s/platform/storage/input' % django_settings.SCHEMA_PREFIX]['offset'] input_url = "%s://%s@%s/%s" % (input_storage_settings['scheme'], input_storage_settings['type'], input_storage_settings['host'], input_offset) source_url = get_url_with_credentials( input_storage_settings, input_url, is_relative_path=False) logger.debug("source_url=%s" % source_url) destination_url = get_url_with_credentials( output_storage_settings, '%s://%s@%s' % (output_storage_settings['scheme'], output_storage_settings['type'], iter_inputdir), is_relative_path=False) logger.debug("destination_url=%s" % destination_url) storage.copy_directories(source_url, destination_url)
def process(self, run_settings): try: id = int(getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX)) except (SettingNotFoundException, ValueError): id = 0 messages.info(run_settings, '%d: transforming' % (id+1)) # self.contextid = getval(run_settings, '%s/system/contextid' % django_settings.SCHEMA_PREFIX) bdp_username = getval(run_settings, '%s/bdp_userprofile/username' % django_settings.SCHEMA_PREFIX) output_storage_url = getval(run_settings, '%s/platform/storage/output/platform_url' % django_settings.SCHEMA_PREFIX) output_storage_settings = manage.get_platform_settings(output_storage_url, bdp_username) logger.debug("output_storage_settings=%s" % output_storage_settings) output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) offset = getval(run_settings, '%s/platform/storage/output/offset' % django_settings.SCHEMA_PREFIX) self.job_dir = manage.get_job_dir(output_storage_settings, offset) try: self.id = int(getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX)) self.output_dir = os.path.join(os.path.join(self.job_dir, "output_%s" % self.id)) self.input_dir = os.path.join(os.path.join(self.job_dir, "input_%d" % self.id)) self.new_input_dir = os.path.join(os.path.join(self.job_dir, "input_%d" % (self.id + 1))) except (SettingNotFoundException, ValueError): # FIXME: Not clear that this a valid path through stages self.output_dir = os.path.join(os.path.join(self.job_dir, "output")) self.output_dir = os.path.join(os.path.join(self.job_dir, "input")) self.new_input_dir = os.path.join(os.path.join(self.job_dir, "input_1")) logger.debug('self.output_dir=%s' % self.output_dir) try: self.experiment_id = int(getval(run_settings, '%s/input/mytardis/experiment_id' % django_settings.SCHEMA_PREFIX)) except SettingNotFoundException: self.experiment_id = 0 except ValueError: self.experiment_id = 0 output_url = get_url_with_credentials( output_storage_settings, output_prefix + self.output_dir, is_relative_path=False) # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(output_url) # fsys = storage.get_filesystem(output_url) # node_output_dirs, _ = fsys.listdir(mypath) # logger.debug("node_output_dirs=%s" % node_output_dirs) outputs = self.process_outputs(run_settings, self.job_dir, output_url, output_storage_settings, offset) try: curate_data = getval(run_settings, '%s/input/mytardis/curate_data' % django_settings.SCHEMA_PREFIX) except SettingNotFoundException: curate_data = 0 if curate_data: mytardis_url = getval(run_settings, '%s/input/mytardis/mytardis_platform' % django_settings.SCHEMA_PREFIX) mytardis_settings = manage.get_platform_settings(mytardis_url, bdp_username) all_settings = dict(mytardis_settings) all_settings.update(output_storage_settings) all_settings['contextid'] = getval(run_settings, '%s/system/contextid' % django_settings.SCHEMA_PREFIX) try: mytardis_platform = jobs.safe_import('chiminey.platform.mytardis.MyTardisPlatform', [], {}) logger.debug('self_outpus=%s' % outputs) self.experiment_id = mytardis_platform.create_dataset_for_intermediate_output(run_settings, self.experiment_id, self.job_dir, output_url, all_settings, outputs=outputs) except ImproperlyConfigured as e: logger.error("Cannot load mytardis platform hook %s" % e) else: logger.warn('Data curation is off')
def process(self, run_settings): try: id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA)) except (SettingNotFoundException, ValueError): id = 0 messages.info(run_settings, '%d: converging' % (id+1)) def retrieve_local_settings(run_settings, local_settings): update(local_settings, run_settings # '%s/stages/setup/payload_source' % RMIT_SCHEMA, # '%s/stages/setup/payload_destination' % RMIT_SCHEMA, # '%s/system/platform' % RMIT_SCHEMA, # # '%s/stages/create/custom_prompt' % RMIT_SCHEMA, # # '%s/stages/create/cloud_sleep_interval' % RMIT_SCHEMA, # # '%s/stages/create/created_nodes' % RMIT_SCHEMA, # '%s/stages/run/payload_cloud_dirname' % RMIT_SCHEMA, # '%s/system/max_seed_int' % RMIT_SCHEMA, # '%s/stages/run/compile_file' % RMIT_SCHEMA, # '%s/stages/run/retry_attempts' % RMIT_SCHEMA, # '%s/input/system/cloud/number_vm_instances' % RMIT_SCHEMA, # '%s/input/hrmc/iseed' % RMIT_SCHEMA, # '%s/input/hrmc/optimisation_scheme' % RMIT_SCHEMA, # '%s/input/hrmc/threshold' % RMIT_SCHEMA, ) local_settings['bdp_username'] = getval(run_settings, '%s/bdp_userprofile/username' % RMIT_SCHEMA) local_settings = getvals(run_settings, models.UserProfile.PROFILE_SCHEMA_NS) retrieve_local_settings(run_settings, local_settings) bdp_username = local_settings['bdp_username'] # get output output_storage_url = getval(run_settings, '%s/platform/storage/output/platform_url' % RMIT_SCHEMA) output_storage_settings = manage.get_platform_settings(output_storage_url, bdp_username) output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) offset = getval(run_settings, '%s/platform/storage/output/offset' % RMIT_SCHEMA) job_dir = manage.get_job_dir(output_storage_settings, offset) # get mytardis #mytardis_url = getval(run_settings, '%s/input/mytardis/mytardis_platform' % RMIT_SCHEMA) #mytardis_settings = manage.get_platform_settings(mytardis_url, bdp_username) # setup new paths try: self.id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA)) self.output_dir = os.path.join(job_dir, "output_%d" % self.id) self.iter_inputdir = os.path.join(job_dir, "input_%d" % (self.id + 1)) #self.new_iter_inputdir = "input_%d" % (self.id + 1) except (SettingNotFoundException, ValueError): self.output_dir = os.path.join(job_dir, "output") self.iter_inputdir = os.path.join(job_dir, "input") self.id = 0 logger.debug('output_dir=%s iter_inputdir=%s' % (self.output_dir, self.iter_inputdir)) try: self.experiment_id = int(getval(run_settings, '%s/input/mytardis/experiment_id' % RMIT_SCHEMA)) except SettingNotFoundException: self.experiment_id = 0 except ValueError: self.experiment_id = 0 inputdir_url = get_url_with_credentials(output_storage_settings, output_prefix + self.iter_inputdir, is_relative_path=False) logger.debug('input_dir_url=%s' % inputdir_url) # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(inputdir_url) # fsys = storage.get_filesystem(inputdir_url) # logger.debug('mypath=%s' % mypath) # input_dirs, _ = fsys.listdir(mypath) # logger.debug('input_dirs=%s' % input_dirs) (self.done_iterating, self.criterion) = self.process_outputs(run_settings, job_dir, inputdir_url, output_storage_settings) if self.done_iterating: logger.debug("Total Iterations: %d" % self.id) # output_prefix = '%s://%s@' % (output_storage_settings['scheme'], # output_storage_settings['type']) # new_output_dir = os.path.join(base_dir, 'output') output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) # get source url iter_output_dir = os.path.join(os.path.join(job_dir, "output_%s" % self.id)) source_url = "%s%s" % (output_prefix, iter_output_dir) # get dest url new_output_dir = os.path.join(job_dir, 'output') dest_url = "%s%s" % (output_prefix, new_output_dir) source_url = get_url_with_credentials(output_storage_settings, output_prefix + os.path.join(iter_output_dir), is_relative_path=False) dest_url = get_url_with_credentials(output_storage_settings, output_prefix + os.path.join(new_output_dir), is_relative_path=False) storage.copy_directories(source_url, dest_url) # curate try: curate_data = getval(run_settings, '%s/input/mytardis/curate_data' % RMIT_SCHEMA) except SettingNotFoundException: curate_data = 0 if curate_data: mytardis_url = getval(run_settings, '%s/input/mytardis/mytardis_platform' % RMIT_SCHEMA) mytardis_settings = manage.get_platform_settings(mytardis_url, bdp_username) all_settings = dict(mytardis_settings) all_settings.update(output_storage_settings) logger.debug("source_url=%s" % source_url) logger.debug("dest_url=%s" % dest_url) logger.debug("job_dir=%s" % job_dir) self.experiment_id = self.curate_dataset(run_settings, self.experiment_id, job_dir, dest_url, all_settings) else: logger.warn('Data curation is off')
local_settings = getvals( run_settings, models.UserProfile.PROFILE_SCHEMA_NS) self.set_execute_settings(run_settings, local_settings) self.contextid = getval( run_settings, '%s/system/contextid' % django_settings.SCHEMA_PREFIX) # NB: Don't catch SettingNotFoundException because we can't recover # run_settings['%s/system' % django_settings.SCHEMA_PREFIX][u'contextid'] logger.debug('contextid=%s' % self.contextid) output_storage_url = getval( run_settings, '%s/platform/storage/output/platform_url' % django_settings.SCHEMA_PREFIX) output_storage_settings = manage.get_platform_settings( output_storage_url, local_settings['bdp_username']) offset = getval( run_settings, '%s/platform/storage/output/offset' % django_settings.SCHEMA_PREFIX) self.job_dir = manage.get_job_dir(output_storage_settings, offset) # TODO: we assume initial input is in "%s/input_0" % self.job_dir # in configure stage we could copy initial data in 'input_location' # into this location try: self.id = int(getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX)) self.iter_inputdir = os.path.join( self.job_dir, "input_%s" % self.id) except (SettingNotFoundException, ValueError): self.id = 0 self.iter_inputdir = os.path.join(self.job_dir, "input_location") messages.info(run_settings, "%s: Executing" % (self.id + 1)) logger.debug("id = %s" % self.id) try:
logger.debug("processing execute stage") local_settings = getvals(run_settings, models.UserProfile.PROFILE_SCHEMA_NS) #self.retrieve_boto_settings(run_settings, local_settings) self.set_execute_settings(run_settings, local_settings) self.contextid = getval(run_settings, '%s/system/contextid' % RMIT_SCHEMA) # NB: Don't catch SettingNotFoundException because we can't recover # run_settings['http://rmit.edu.au/schemas/system'][u'contextid'] output_storage_url = getval(run_settings, '%s/platform/storage/output/platform_url' % RMIT_SCHEMA) output_storage_settings = manage.get_platform_settings(output_storage_url, local_settings['bdp_username']) offset = getval(run_settings, '%s/platform/storage/output/offset' % RMIT_SCHEMA) # offset = run_settings['http://rmit.edu.au/schemas/platform/storage/output']['offset'] self.job_dir = manage.get_job_dir(output_storage_settings, offset) # TODO: we assume initial input is in "%s/input_0" % self.job_dir # in configure stage we could copy initial data in 'input_location' into this location try: self.id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA)) self.iter_inputdir = os.path.join(self.job_dir, "input_%s" % self.id) except (SettingNotFoundException, ValueError): self.id = 0 self.iter_inputdir = os.path.join(self.job_dir, "input_location") messages.info(run_settings, "%s: execute" % (self.id + 1)) logger.debug("id = %s" % self.id) try: self.initial_numbfile = int(getval(run_settings, '%s/stages/run/initial_numbfile' % RMIT_SCHEMA)) except (SettingNotFoundException, ValueError): logger.warn("setting initial_numbfile for first iteration")
def process(self, run_settings): # self.contextid = getval(run_settings, '%s/system/contextid' % RMIT_SCHEMA) bdp_username = getval(run_settings, '%s/bdp_userprofile/username' % RMIT_SCHEMA) output_storage_url = getval( run_settings, '%s/platform/storage/output/platform_url' % RMIT_SCHEMA) output_storage_settings = manage.get_platform_settings( output_storage_url, bdp_username) logger.debug("output_storage_settings=%s" % output_storage_settings) output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) offset = getval(run_settings, '%s/platform/storage/output/offset' % RMIT_SCHEMA) self.job_dir = manage.get_job_dir(output_storage_settings, offset) try: self.id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA)) self.output_dir = os.path.join( os.path.join(self.job_dir, "output_%s" % self.id)) self.input_dir = os.path.join( os.path.join(self.job_dir, "input_%d" % self.id)) self.new_input_dir = os.path.join( os.path.join(self.job_dir, "input_%d" % (self.id + 1))) except (SettingNotFoundException, ValueError): # FIXME: Not clear that this a valid path through stages self.output_dir = os.path.join(os.path.join( self.job_dir, "output")) self.output_dir = os.path.join(os.path.join(self.job_dir, "input")) self.new_input_dir = os.path.join( os.path.join(self.job_dir, "input_1")) logger.debug('self.output_dir=%s' % self.output_dir) try: self.experiment_id = int( getval(run_settings, '%s/input/mytardis/experiment_id' % RMIT_SCHEMA)) except SettingNotFoundException: self.experiment_id = 0 except ValueError: self.experiment_id = 0 output_url = get_url_with_credentials(output_storage_settings, output_prefix + self.output_dir, is_relative_path=False) # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(output_url) # fsys = storage.get_filesystem(output_url) # node_output_dirs, _ = fsys.listdir(mypath) # logger.debug("node_output_dirs=%s" % node_output_dirs) self.process_outputs(run_settings, self.job_dir, output_url, output_storage_settings, offset) # logger.debug("output_url=%s" % output_url) # # Should this be output_dir or root of remotesys? # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(output_url) # fsys = storage.get_filesystem(output_url) # logger.debug("fsys=%s" % fsys) # logger.debug("mypath=%s" % mypath) # node_output_dirs, _ = fsys.listdir(mypath) # logger.debug("node_output_dirs=%s" % node_output_dirs) # self.audit = "" # outputs = [] # Node_info = namedtuple('Node_info', # ['dir', 'index', 'number', 'criterion']) # # gather node_infos # for node_output_dir in node_output_dirs: # base_fname = "HRMC.inp" # try: # values_url = get_url_with_credentials( # output_storage_settings, # output_prefix + os.path.join(self.output_dir, node_output_dir, # '%s_values' % base_fname), is_relative_path=False) # values_content = storage.get_file(values_url) # logger.debug("values_file=%s" % values_url) # except IOError: # logger.warn("no values file found") # values_map = {} # else: # values_map = dict(json.loads(values_content)) # criterion = self.compute_psd_criterion( # node_output_dir, fsys, # output_storage_settings) # #criterion = self.compute_hrmc_criterion(values_map['run_counter'], node_output_dir, fs,) # logger.debug("criterion=%s" % criterion) # index = 0 # FIXME: as node_output_dirs in particular order, then index is not useful. # outputs.append(Node_info(dir=node_output_dir, # index=index, number=values_map['run_counter'], criterion=criterion)) # outputs.sort(key=lambda x: int(x.criterion)) # logger.debug("outputs=%s" % outputs) # logger.debug('threshold=%s' % self.threshold) # total_picks = 1 # if len(self.threshold) > 1: # for i in self.threshold: # total_picks *= self.threshold[i] # else: # total_picks = self.threshold[0] # if not outputs: # logger.error("no ouput found for this iteration") # return # for index in range(0, total_picks): # Node_info = outputs[index] # logger.debug("node_info.dir=%s" % Node_info.dir) # logger.debug("Node_info=%s" % str(Node_info)) # self.new_input_node_dir = os.path.join(self.new_input_dir, # Node_info.dir) # logger.debug("New input node dir %s" % self.new_input_node_dir) # # Move all existing domain input files unchanged to next input directory # for f in self.DOMAIN_INPUT_FILES: # source_url = get_url_with_credentials( # output_storage_settings, # output_prefix + os.path.join(self.output_dir, Node_info.dir, f), is_relative_path=False) # dest_url = get_url_with_credentials( # output_storage_settings, # output_prefix + os.path.join(self.new_input_node_dir, f), # is_relative_path=False) # logger.debug('source_url=%s, dest_url=%s' % (source_url, dest_url)) # content = storage.get_file(source_url) # logger.debug('content collected') # storage.put_file(dest_url, content) # logger.debug('put successfully') # logger.debug('put file successfully') # pattern = "*_values" # self.copy_files_with_pattern(fsys, os.path.join(self.output_dir, Node_info.dir), # self.new_input_node_dir, pattern, # output_storage_settings) # pattern = "*_template" # self.copy_files_with_pattern(fsys, os.path.join(self.output_dir, Node_info.dir), # self.new_input_node_dir, pattern, # output_storage_settings) # # NB: Converge stage triggers based on criterion value from audit. # info = "Run %s preserved (error %s)\n" % (Node_info.number, Node_info.criterion) # audit_url = get_url_with_credentials( # output_storage_settings, # output_prefix + os.path.join(self.new_input_node_dir, 'audit.txt'), is_relative_path=False) # storage.put_file(audit_url, info) # logger.debug("audit=%s" % info) # self.audit += info # # move xyz_final.xyz to initial.xyz # source_url = get_url_with_credentials( # output_storage_settings, # output_prefix + os.path.join(self.output_dir, Node_info.dir, "xyz_final.xyz"), is_relative_path=False) # dest_url = get_url_with_credentials( # output_storage_settings, # output_prefix + os.path.join(self.new_input_node_dir, 'input_initial.xyz'), is_relative_path=False) # content = storage.get_file(source_url) # storage.put_file(dest_url, content) # self.audit += "spawning diamond runs\n" # audit_url = get_url_with_credentials( # output_storage_settings, # output_prefix + os.path.join(self.new_input_dir, 'audit.txt'), is_relative_path=False) # storage.put_file(audit_url, self.audit) # curate dataset into mytardis try: curate_data = getval(run_settings, '%s/input/mytardis/curate_data' % RMIT_SCHEMA) except SettingNotFoundException: curate_data = 0 if curate_data: mytardis_url = getval( run_settings, '%s/input/mytardis/mytardis_platform' % RMIT_SCHEMA) mytardis_settings = manage.get_platform_settings( mytardis_url, bdp_username) all_settings = dict(mytardis_settings) all_settings.update(output_storage_settings) self.experiment_id = self.curate_dataset(run_settings, self.experiment_id, self.job_dir, output_url, all_settings) else: logger.warn('Data curation is off')
def process(self, run_settings): try: id = int( getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX)) except (SettingNotFoundException, ValueError): id = 0 messages.info(run_settings, '%d: transforming' % (id + 1)) # self.contextid = getval(run_settings, '%s/system/contextid' % django_settings.SCHEMA_PREFIX) bdp_username = getval( run_settings, '%s/bdp_userprofile/username' % django_settings.SCHEMA_PREFIX) output_storage_url = getval( run_settings, '%s/platform/storage/output/platform_url' % django_settings.SCHEMA_PREFIX) output_storage_settings = manage.get_platform_settings( output_storage_url, bdp_username) logger.debug("output_storage_settings=%s" % output_storage_settings) output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) offset = getval( run_settings, '%s/platform/storage/output/offset' % django_settings.SCHEMA_PREFIX) self.job_dir = manage.get_job_dir(output_storage_settings, offset) try: self.id = int( getval(run_settings, '%s/system/id' % django_settings.SCHEMA_PREFIX)) self.output_dir = os.path.join( os.path.join(self.job_dir, "output_%s" % self.id)) self.input_dir = os.path.join( os.path.join(self.job_dir, "input_%d" % self.id)) self.new_input_dir = os.path.join( os.path.join(self.job_dir, "input_%d" % (self.id + 1))) except (SettingNotFoundException, ValueError): # FIXME: Not clear that this a valid path through stages self.output_dir = os.path.join(os.path.join( self.job_dir, "output")) self.output_dir = os.path.join(os.path.join(self.job_dir, "input")) self.new_input_dir = os.path.join( os.path.join(self.job_dir, "input_1")) logger.debug('self.output_dir=%s' % self.output_dir) try: self.experiment_id = int( getval( run_settings, '%s/input/mytardis/experiment_id' % django_settings.SCHEMA_PREFIX)) except SettingNotFoundException: self.experiment_id = 0 except ValueError: self.experiment_id = 0 output_url = get_url_with_credentials(output_storage_settings, output_prefix + self.output_dir, is_relative_path=False) # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(output_url) # fsys = storage.get_filesystem(output_url) # node_output_dirs, _ = fsys.listdir(mypath) # logger.debug("node_output_dirs=%s" % node_output_dirs) outputs = self.process_outputs(run_settings, self.job_dir, output_url, output_storage_settings, offset) try: curate_data = getval( run_settings, '%s/input/mytardis/curate_data' % django_settings.SCHEMA_PREFIX) except SettingNotFoundException: curate_data = 0 if curate_data: mytardis_url = getval( run_settings, '%s/input/mytardis/mytardis_platform' % django_settings.SCHEMA_PREFIX) mytardis_settings = manage.get_platform_settings( mytardis_url, bdp_username) all_settings = dict(mytardis_settings) all_settings.update(output_storage_settings) all_settings['contextid'] = getval( run_settings, '%s/system/contextid' % django_settings.SCHEMA_PREFIX) try: mytardis_platform = jobs.safe_import( 'chiminey.platform.mytardis.MyTardisPlatform', [], {}) logger.debug('self_outpus=%s' % outputs) self.experiment_id = mytardis_platform.create_dataset_for_intermediate_output( run_settings, self.experiment_id, self.job_dir, output_url, all_settings, outputs=outputs) except ImproperlyConfigured as e: logger.error("Cannot load mytardis platform hook %s" % e) else: logger.warn('Data curation is off')
def process(self, run_settings): # self.contextid = getval(run_settings, '%s/system/contextid' % RMIT_SCHEMA) bdp_username = getval(run_settings, '%s/bdp_userprofile/username' % RMIT_SCHEMA) output_storage_url = getval(run_settings, '%s/platform/storage/output/platform_url' % RMIT_SCHEMA) output_storage_settings = manage.get_platform_settings(output_storage_url, bdp_username) logger.debug("output_storage_settings=%s" % output_storage_settings) output_prefix = '%s://%s@' % (output_storage_settings['scheme'], output_storage_settings['type']) offset = getval(run_settings, '%s/platform/storage/output/offset' % RMIT_SCHEMA) self.job_dir = manage.get_job_dir(output_storage_settings, offset) try: self.id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA)) self.output_dir = os.path.join(os.path.join(self.job_dir, "output_%s" % self.id)) self.input_dir = os.path.join(os.path.join(self.job_dir, "input_%d" % self.id)) self.new_input_dir = os.path.join(os.path.join(self.job_dir, "input_%d" % (self.id + 1))) except (SettingNotFoundException, ValueError): # FIXME: Not clear that this a valid path through stages self.output_dir = os.path.join(os.path.join(self.job_dir, "output")) self.output_dir = os.path.join(os.path.join(self.job_dir, "input")) self.new_input_dir = os.path.join(os.path.join(self.job_dir, "input_1")) logger.debug('self.output_dir=%s' % self.output_dir) try: self.experiment_id = int(getval(run_settings, '%s/input/mytardis/experiment_id' % RMIT_SCHEMA)) except SettingNotFoundException: self.experiment_id = 0 except ValueError: self.experiment_id = 0 output_url = get_url_with_credentials( output_storage_settings, output_prefix + self.output_dir, is_relative_path=False) # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(output_url) # fsys = storage.get_filesystem(output_url) # node_output_dirs, _ = fsys.listdir(mypath) # logger.debug("node_output_dirs=%s" % node_output_dirs) self.process_outputs(run_settings, self.job_dir, output_url, output_storage_settings, offset) # logger.debug("output_url=%s" % output_url) # # Should this be output_dir or root of remotesys? # (scheme, host, mypath, location, query_settings) = storage.parse_bdpurl(output_url) # fsys = storage.get_filesystem(output_url) # logger.debug("fsys=%s" % fsys) # logger.debug("mypath=%s" % mypath) # node_output_dirs, _ = fsys.listdir(mypath) # logger.debug("node_output_dirs=%s" % node_output_dirs) # self.audit = "" # outputs = [] # Node_info = namedtuple('Node_info', # ['dir', 'index', 'number', 'criterion']) # # gather node_infos # for node_output_dir in node_output_dirs: # base_fname = "HRMC.inp" # try: # values_url = get_url_with_credentials( # output_storage_settings, # output_prefix + os.path.join(self.output_dir, node_output_dir, # '%s_values' % base_fname), is_relative_path=False) # values_content = storage.get_file(values_url) # logger.debug("values_file=%s" % values_url) # except IOError: # logger.warn("no values file found") # values_map = {} # else: # values_map = dict(json.loads(values_content)) # criterion = self.compute_psd_criterion( # node_output_dir, fsys, # output_storage_settings) # #criterion = self.compute_hrmc_criterion(values_map['run_counter'], node_output_dir, fs,) # logger.debug("criterion=%s" % criterion) # index = 0 # FIXME: as node_output_dirs in particular order, then index is not useful. # outputs.append(Node_info(dir=node_output_dir, # index=index, number=values_map['run_counter'], criterion=criterion)) # outputs.sort(key=lambda x: int(x.criterion)) # logger.debug("outputs=%s" % outputs) # logger.debug('threshold=%s' % self.threshold) # total_picks = 1 # if len(self.threshold) > 1: # for i in self.threshold: # total_picks *= self.threshold[i] # else: # total_picks = self.threshold[0] # if not outputs: # logger.error("no ouput found for this iteration") # return # for index in range(0, total_picks): # Node_info = outputs[index] # logger.debug("node_info.dir=%s" % Node_info.dir) # logger.debug("Node_info=%s" % str(Node_info)) # self.new_input_node_dir = os.path.join(self.new_input_dir, # Node_info.dir) # logger.debug("New input node dir %s" % self.new_input_node_dir) # # Move all existing domain input files unchanged to next input directory # for f in self.DOMAIN_INPUT_FILES: # source_url = get_url_with_credentials( # output_storage_settings, # output_prefix + os.path.join(self.output_dir, Node_info.dir, f), is_relative_path=False) # dest_url = get_url_with_credentials( # output_storage_settings, # output_prefix + os.path.join(self.new_input_node_dir, f), # is_relative_path=False) # logger.debug('source_url=%s, dest_url=%s' % (source_url, dest_url)) # content = storage.get_file(source_url) # logger.debug('content collected') # storage.put_file(dest_url, content) # logger.debug('put successfully') # logger.debug('put file successfully') # pattern = "*_values" # self.copy_files_with_pattern(fsys, os.path.join(self.output_dir, Node_info.dir), # self.new_input_node_dir, pattern, # output_storage_settings) # pattern = "*_template" # self.copy_files_with_pattern(fsys, os.path.join(self.output_dir, Node_info.dir), # self.new_input_node_dir, pattern, # output_storage_settings) # # NB: Converge stage triggers based on criterion value from audit. # info = "Run %s preserved (error %s)\n" % (Node_info.number, Node_info.criterion) # audit_url = get_url_with_credentials( # output_storage_settings, # output_prefix + os.path.join(self.new_input_node_dir, 'audit.txt'), is_relative_path=False) # storage.put_file(audit_url, info) # logger.debug("audit=%s" % info) # self.audit += info # # move xyz_final.xyz to initial.xyz # source_url = get_url_with_credentials( # output_storage_settings, # output_prefix + os.path.join(self.output_dir, Node_info.dir, "xyz_final.xyz"), is_relative_path=False) # dest_url = get_url_with_credentials( # output_storage_settings, # output_prefix + os.path.join(self.new_input_node_dir, 'input_initial.xyz'), is_relative_path=False) # content = storage.get_file(source_url) # storage.put_file(dest_url, content) # self.audit += "spawning diamond runs\n" # audit_url = get_url_with_credentials( # output_storage_settings, # output_prefix + os.path.join(self.new_input_dir, 'audit.txt'), is_relative_path=False) # storage.put_file(audit_url, self.audit) # curate dataset into mytardis try: curate_data = getval(run_settings, '%s/input/mytardis/curate_data' % RMIT_SCHEMA) except SettingNotFoundException: curate_data = 0 if curate_data: mytardis_url = getval(run_settings, '%s/input/mytardis/mytardis_platform' % RMIT_SCHEMA) mytardis_settings = manage.get_platform_settings(mytardis_url, bdp_username) all_settings = dict(mytardis_settings) all_settings.update(output_storage_settings) self.experiment_id = self.curate_dataset(run_settings, self.experiment_id, self.job_dir, output_url, all_settings) else: logger.warn('Data curation is off')