def update_training_service_config(config, training_service, config_file_path): it_ts_config = get_yml_content(os.path.join('config', 'training_service.yml')) # hack for kubeflow trial config if training_service == 'kubeflow': it_ts_config[training_service]['trial']['worker']['command'] = config['trial']['command'] config['trial'].pop('command') if 'gpuNum' in config['trial']: config['trial'].pop('gpuNum') if training_service == 'frameworkcontroller': it_ts_config[training_service]['trial']['taskRoles'][0]['command'] = config['trial']['command'] config['trial'].pop('command') if 'gpuNum' in config['trial']: config['trial'].pop('gpuNum') if training_service == 'adl': # hack for adl trial config, codeDir in adl mode refers to path in container containerCodeDir = config['trial']['codeDir'] # replace metric test folders to container folder if config['trial']['codeDir'] == '.': containerCodeDir = '/' + config_file_path[:config_file_path.rfind('/')] elif config['trial']['codeDir'] == '../naive_trial': containerCodeDir = '/test/config/naive_trial' elif '../../../' in config['trial']['codeDir']: # replace example folders to container folder containerCodeDir = config['trial']['codeDir'].replace('../../../', '/') it_ts_config[training_service]['trial']['codeDir'] = containerCodeDir it_ts_config[training_service]['trial']['command'] = 'cd {0} && {1}'.format(containerCodeDir, config['trial']['command']) if training_service == 'hybrid': it_ts_config = get_yml_content(os.path.join('config', 'training_service_v2.yml')) else: deep_update(config, it_ts_config['all']) deep_update(config, it_ts_config[training_service])
def prepare_config_file(test_case_config, it_config, args): config_path = args.nni_source_dir + test_case_config['configFile'] test_yml_config = get_yml_content(config_path) # apply test case specific config if test_case_config.get('config') is not None: deep_update(test_yml_config, test_case_config['config']) # hack for windows if sys.platform == 'win32' and args.ts == 'local': test_yml_config['trial']['command'] = test_yml_config['trial'][ 'command'].replace('python3', 'python') # apply training service config # user's gpuNum, logCollection config is overwritten by the config in training_service.yml # the hack for kubeflow should be applied at last step update_training_service_config(test_yml_config, args.ts, test_case_config['configFile'], args.nni_source_dir, args.reuse_mode) # generate temporary config yml file to launch experiment new_config_file = config_path + '.tmp' dump_yml_content(new_config_file, test_yml_config) print(yaml.safe_dump(test_yml_config, default_flow_style=False), flush=True) return new_config_file
def gen_new_config(config_file, training_service='local'): ''' Generates temporary config file for integration test, the file should be deleted after testing. ''' config = get_yml_content(config_file) new_config_file = config_file + '.tmp' it_config = get_yml_content('training_service.yml') # hack for kubeflow trial config if training_service == 'kubeflow': it_config[training_service]['trial']['worker']['command'] = config['trial']['command'] config['trial'].pop('command') if 'gpuNum' in config['trial']: config['trial'].pop('gpuNum') if training_service == 'frameworkcontroller': it_config[training_service]['trial']['taskRoles'][0]['command'] = config['trial']['command'] config['trial'].pop('command') if 'gpuNum' in config['trial']: config['trial'].pop('gpuNum') deep_update(config, it_config['all']) deep_update(config, it_config[training_service]) dump_yml_content(new_config_file, config) return new_config_file, config
def update_training_service_config(config, training_service, config_file_path, nni_source_dir): it_ts_config = get_yml_content( os.path.join('config', 'training_service.yml')) # hack for kubeflow trial config if training_service == 'kubeflow': it_ts_config[training_service]['trial']['worker']['command'] = config[ 'trial']['command'] config['trial'].pop('command') if 'gpuNum' in config['trial']: config['trial'].pop('gpuNum') if training_service == 'frameworkcontroller': it_ts_config[training_service]['trial']['taskRoles'][0][ 'command'] = config['trial']['command'] config['trial'].pop('command') if 'gpuNum' in config['trial']: config['trial'].pop('gpuNum') if training_service == 'adl': # hack for adl trial config, codeDir in adl mode refers to path in container containerCodeDir = config['trial']['codeDir'] # replace metric test folders to container folder if config['trial']['codeDir'] == '.': containerCodeDir = '/' + config_file_path[:config_file_path. rfind('/')] elif config['trial']['codeDir'] == '../naive_trial': containerCodeDir = '/test/config/naive_trial' elif '../../../' in config['trial']['codeDir']: # replace example folders to container folder containerCodeDir = config['trial']['codeDir'].replace( '../../../', '/') it_ts_config[training_service]['trial']['codeDir'] = containerCodeDir it_ts_config[training_service]['trial'][ 'command'] = 'cd {0} && {1}'.format(containerCodeDir, config['trial']['command']) if training_service == 'remote': testcase_config = get_yml_content(nni_source_dir + config_file_path) sharedStorage = testcase_config.get('sharedStorage') if sharedStorage is None: it_ts_config[training_service].pop('sharedStorage') elif str(sharedStorage.get('storageType')).lower() == 'nfs': it_ts_config[training_service].get('sharedStorage').pop( 'storageAccountKey') elif str(sharedStorage.get('storageType')).lower() == 'azureblob': it_ts_config[training_service].get('sharedStorage').pop( 'nfsServer') it_ts_config[training_service].get('sharedStorage').pop( 'exportedDirectory') else: it_ts_config[training_service].pop('sharedStorage') if training_service == 'hybrid': it_ts_config = get_yml_content( os.path.join('config', 'training_service_v2.yml')) else: deep_update(config, it_ts_config['all']) deep_update(config, it_ts_config[training_service])
def update_training_service_config(config, training_service): it_ts_config = get_yml_content(os.path.join('config', 'training_service.yml')) # hack for kubeflow trial config if training_service == 'kubeflow': it_ts_config[training_service]['trial']['worker']['command'] = config['trial']['command'] config['trial'].pop('command') if 'gpuNum' in config['trial']: config['trial'].pop('gpuNum') if training_service == 'frameworkcontroller': it_ts_config[training_service]['trial']['taskRoles'][0]['command'] = config['trial']['command'] config['trial'].pop('command') if 'gpuNum' in config['trial']: config['trial'].pop('gpuNum') deep_update(config, it_ts_config['all']) deep_update(config, it_ts_config[training_service])
def __get_config(self, base_filename): filename = "{}.json".format(base_filename) reference_filename = "config.json.example" # always use this filename. reference_config = self.read_json(reference_filename) custom_config = self.read_json(filename) if custom_config: new_config = deep_update(reference_config, custom_config) return new_config return reference_config
def get_settings(): defaults = { "auth": { "use_allauth": False, "use_password_auth": True, "slack": {"required_group": None}, }, "messages": {"non_member": "You must be a member of the space to access Toolhub"}, } return deep_update(defaults, getattr(settings, "TOOLHUB", {}))
def __get_config(self, base_filename): filename = "{}.json".format(base_filename) reference_filename = "{}.example".format(filename) reference_config = self.read_json(reference_filename) if not reference_filename: debug.error("Invalid {} reference config file. Make sure {} exists.".format(base_filename, base_filename)) sys.exit(1) custom_config = self.read_json(filename) if custom_config: new_config = deep_update(reference_config, custom_config) return new_config return reference_config
def __get_colors(self, base_filename): filename = "ledcolors/{}.json".format(base_filename) reference_filename = "{}.example".format(filename) reference_colors = self.read_json(reference_filename) if not reference_colors: debug.error("Invalid {} reference color file. Make sure {} exists in ledcolors/".format(base_filename, base_filename)) sys.exit(1) custom_colors = self.read_json(filename) if custom_colors: debug.info("Custom '{}.json' colors found. Merging with default reference colors.".format(base_filename)) new_colors = deep_update(reference_colors, custom_colors) return new_colors return reference_colors
def __init__(self, config): self.config = deep_update(self._default_config, config) self.device = (torch.device("cuda") if bool( os.environ.get("CUDA_VISIBLE_DEVICES", None)) else torch.device("cpu")) # maximal atomic number is the x_dim of model self.max_atom_num = self.config['max_atom_num'] self.model = MoldaModel(self.max_atom_num, **self.config['model']) self.discriminator = self.model.discriminator self.generator = self.model.generator self.num_params = sum(p.numel() for p in self.model.parameters()) self.model.requires_grad_(False)
def __get_config(self, base_filename): filename = "{}.json".format(base_filename) reference_filename = "{}.example".format(filename) reference_config = self.read_json(reference_filename) if not reference_filename: debug.error( "Invalid {} reference config file. Make sure {} exists.". format(base_filename, base_filename)) sys.exit(1) custom_config = self.read_json(filename) if custom_config: new_config = deep_update(reference_config, custom_config) return new_config return reference_config
def gen_new_config(config_file, training_service='local'): ''' Generates temporary config file for integration test, the file should be deleted after testing. ''' config = get_yml_content(config_file) new_config_file = config_file + '.tmp' ts = get_yml_content('training_service.yml')[training_service] print(config) print(ts) # hack for kubeflow trial config if training_service == 'kubeflow': ts['trial']['worker']['command'] = config['trial']['command'] config['trial'].pop('command') if 'gpuNum' in config['trial']: config['trial'].pop('gpuNum') deep_update(config, ts) print(config) dump_yml_content(new_config_file, config) return new_config_file, config
def __get_layout(self, width, height): filename = "ledcoords/w{}h{}.json".format(width, height) reference_filename = "{}.example".format(filename) reference_layout = self.read_json(reference_filename) if not reference_layout: # Unsupported coordinates debug.error("Invalid matrix dimensions provided. See top of README for supported dimensions.\nIf you would like to see new dimensions supported, please file an issue on GitHub!") sys.exit(1) # Load and merge any layout customizations custom_layout = self.read_json(filename) if custom_layout: debug.info("Custom '{}x{}.json' found. Merging with default reference layout.".format(width,height)) new_layout = deep_update(reference_layout, custom_layout) return new_layout return reference_layout
def __init__(self, config): self.config = deep_update(self._default_config, config) self.device = (torch.device("cuda") if bool( os.environ.get("CUDA_VISIBLE_DEVICES", None)) else torch.device("cpu")) model_config = config['model'] mem_len = model_config['mem_len'] z_dim = model_config['z_dim'] self.model = ArwenModel(device=self.device, mem_len=mem_len, z_dim=z_dim) self.discriminator = self.model.discriminator self.generator = self.model.generator self.num_params = sum(p.numel() for p in self.model.parameters()) self.model.requires_grad_(False)
def batchor(url): UC = unifiedConfiguration() SI = global_SI() CI = campaignInfo() BI = batchInfo() ## get all workflows in assignment-approved with SubRequestType = relval all_wfs = [] for user in UC.get("user_relval"): all_wfs.extend( getWorkflows(url, 'assignment-approved', details=True, user=user, rtype='TaskChain')) wfs = filter( lambda r: r['SubRequestType'] == 'RelVal' if 'SubRequestType' in r else False, all_wfs) ## need a special treatment for those hi_wfs = filter( lambda r: r['SubRequestType'] == 'HIRelVal' if 'SubRequestType' in r else False, all_wfs) by_campaign = defaultdict(set) by_hi_campaign = defaultdict(set) for wf in wfs: print "Relval:", wf['RequestName'], wf['Campaign'] by_campaign[wf['Campaign']].add(wf['PrepID']) for wf in hi_wfs: print "HI Relval:", wf['RequestName'], wf['Campaign'] by_hi_campaign[wf['Campaign']].add(wf['PrepID']) default_setup = { "go": True, "parameters": { "SiteWhitelist": ["T1_US_FNAL"], "MergedLFNBase": "/store/relval", "Team": "relval", "NonCustodialGroup": "RelVal" }, "custodial_override": "notape", "phedex_group": "RelVal", "lumisize": -1, "fractionpass": 0.0, "maxcopies": 1 } default_hi_setup = copy.deepcopy(default_setup) add_on = {} relval_routing = UC.get('relval_routing') def pick_one_site(p): ## modify the parameters on the spot to have only one site if "parameters" in p and "SiteWhitelist" in p["parameters"] and len( p["parameters"]["SiteWhitelist"]) > 1: choose_from = list( set(p["parameters"]["SiteWhitelist"]) & set(SI.sites_ready)) picked = random.choice(choose_from) print "picked", picked, "from", choose_from p["parameters"]["SiteWhitelist"] = [picked] batches = BI.all() for campaign in by_campaign: if campaign in batches: continue ## get a bunch of information setup = copy.deepcopy(default_setup) for key in relval_routing: if key in campaign: ## augment with the routing information augment_with = relval_routing[key] print "Modifying the batch configuration because of keyword", key print "with", augment_with setup = deep_update(setup, augment_with) pick_one_site(setup) add_on[campaign] = setup sendLog('batchor', 'Adding the relval campaigns %s with parameters \n%s' % (campaign, json.dumps(setup, indent=2)), level='critical') BI.update(campaign, by_campaign[campaign]) for campaign in by_hi_campaign: if campaign in batches: continue ## get a bunch of information setup = copy.deepcopy(default_hi_setup) possible_sites = set(["T1_DE_KIT", "T1_FR_CCIN2P3"]) hi_site = random.choice(list(possible_sites)) setup["parameters"]["SiteWhitelist"] = [hi_site] pick_one_site(setup) add_on[campaign] = setup sendLog('batchor', 'Adding the HI relval campaigns %s with parameters \n%s' % (campaign, json.dumps(setup, indent=2)), level='critical') BI.update(campaign, by_hi_campaign[campaign]) ## only new campaigns in announcement for new_campaign in list( set(add_on.keys()) - set(CI.all(c_type='relval'))): ## this is new, and can be announced as such print new_campaign, "is new stuff" subject = "Request of RelVal samples batch %s" % new_campaign text = """Dear all, A new batch of relval workflows was requested. Batch ID: %s Details of the workflows: https://dmytro.web.cern.ch/dmytro/cmsprodmon/requests.php?campaign=%s This is an automated message""" % ( new_campaign, new_campaign, ) print subject print text to = ['*****@*****.**'] sendEmail(subject, text, destination=to) sendLog('batchor', text, level='critical') ## go through all existing campaigns and remove the ones not in use anymore ? for old_campaign in CI.all(c_type='relval'): all_in_batch = getWorkflowByCampaign(url, old_campaign, details=True) if not all_in_batch: continue is_batch_done = all( map( lambda s: not s in [ 'completed', 'force-complete', 'running-open', 'running-closed', 'acquired', 'assigned', 'assignment-approved' ], [wf['RequestStatus'] for wf in all_in_batch])) ## check all statuses if is_batch_done: #print "batch",old_campaign,"can be closed or removed if necessary" #campaigns[old_campaign]['go'] = False ## disable CI.pop(old_campaign) ## or just drop it all together ? BI.pop(old_campaign) print "batch", old_campaign, " configuration was removed" ## merge all anyways CI.update(add_on, c_type='relval')
else: # project, resize, image_extents are not there # so remove their children for key in data.keys(): if k in key: data.pop(key) # this dictionary will hold the output out_dict = {} for k, v in data.iteritems(): # all values coming in from the post request # are unicode, convert those values which # should be int or float tdict = gen_nested_dict(k.split("|"), is_num(v)) # deep_update updates the dictionary deep_update(out_dict, tdict) # MODIS only receive l1 or stats modis_list = ['l1'] if 'stats' in landsat_list: modis_list.append('stats') # we dont need these values returned by the available-products query if 'date_restricted' in scene_dict_all_prods: scene_dict_all_prods.pop('date_restricted') for key in scene_dict_all_prods: if 'mod' in key or 'myd' in key: scene_dict_all_prods[key]['products'] = modis_list elif key not in ('not_implemented', 'date_restricted'): # Probably better to let the user know if there
def batchor( url ): UC = unifiedConfiguration() SI = global_SI() ## get all workflows in assignment-approved with SubRequestType = relval all_wfs = [] for user in UC.get("user_relval"): all_wfs.extend( getWorkflows(url, 'assignment-approved', details=True, user=user, rtype='TaskChain') ) wfs = filter( lambda r :r['SubRequestType'] == 'RelVal' if 'SubRequestType' in r else False, all_wfs) ## need a special treatment for those hi_wfs = filter( lambda r :r['SubRequestType'] == 'HIRelVal' if 'SubRequestType' in r else False, all_wfs) by_campaign = defaultdict(set) by_hi_campaign = defaultdict(set) for wf in wfs: print "Relval:",wf['RequestName'], wf['Campaign'] #by_campaign[wf['Campaign']].add( wf['RequestName'] ) by_campaign[wf['Campaign']].add( wf['PrepID'] ) for wf in hi_wfs: print "HI Relval:",wf['RequestName'], wf['Campaign'] #by_hi_campaign[wf['Campaign']].add( wf['RequestName'] ) by_hi_campaign[wf['Campaign']].add( wf['PrepID'] ) default_setup = { "go" :True, "parameters" : { "SiteWhitelist": [ "T1_US_FNAL" ], "MergedLFNBase": "/store/relval", "Team" : "relval", "NonCustodialGroup" : "RelVal" }, "custodial" : "T1_US_FNAL_MSS", "custodial_override" : ["DQMIO"], "phedex_group" : "RelVal", "lumisize" : -1, "fractionpass" : 0.0, "maxcopies" : 1 } default_hi_setup = copy.deepcopy( default_setup ) add_on = {} batches = json.loads( open('batches.json').read() ) relval_routing = UC.get('relval_routing') def pick_one_site( p): ## modify the parameters on the spot to have only one site if "parameters" in p and "SiteWhitelist" in p["parameters"] and len(p["parameters"]["SiteWhitelist"])>1: choose_from = list(set(p["parameters"]["SiteWhitelist"]) & set(SI.sites_ready)) picked = random.choice( choose_from ) print "picked",picked,"from",choose_from p["parameters"]["SiteWhitelist"] = [picked] for campaign in by_campaign: if campaign in batches: continue ## get a bunch of information setup = copy.deepcopy( default_setup ) for key in relval_routing: if key in campaign: ## augment with the routing information augment_with = relval_routing[key] print "Modifying the batch configuration because of keyword",key print "with",augment_with setup = deep_update( setup, augment_with ) #if 'cc7' in campaign: setup["parameters"]["SiteWhitelist"] = ["T2_US_Nebraska"] pick_one_site( setup ) add_on[campaign] = setup sendLog('batchor','Adding the relval campaigns %s with parameters \n%s'%( campaign, json.dumps( setup, indent=2)),level='critical') if not campaign in batches: batches[campaign] = [] batches[campaign] = list(set(list(copy.deepcopy( by_campaign[campaign] )) + batches[campaign] )) for campaign in by_hi_campaign: if campaign in batches: continue ## get a bunch of information setup = copy.deepcopy( default_hi_setup ) hi_site = random.choice(["T1_DE_KIT","T1_FR_CCIN2P3"]) setup["parameters"]["SiteWhitelist"]=[ hi_site ] #setup["parameters"]["SiteWhitelist"]=["T1_DE_KIT","T1_FR_CCIN2P3"] pick_one_site( setup ) add_on[campaign] = setup sendLog('batchor','Adding the HI relval campaigns %s with parameters \n%s'%( campaign, json.dumps( setup, indent=2)),level='critical') if not campaign in batches: batches[campaign] = [] batches[campaign] = list(set(list(copy.deepcopy( by_hi_campaign[campaign] )) + batches[campaign] )) open('batches.json','w').write( json.dumps( batches , indent=2 ) ) ## open the campaign configuration campaigns = json.loads( open('campaigns.relval.json').read() ) ## protect for overwriting ?? for new_campaign in list(set(add_on.keys())-set(campaigns.keys())): ## this is new, and can be announced as such print new_campaign,"is new stuff" subject = "Request of RelVal samples batch %s"% new_campaign text="""Dear all, A new batch of relval workflows was requested. Batch ID: %s Details of the workflows: https://dmytro.web.cern.ch/dmytro/cmsprodmon/requests.php?campaign=%s This is an automated message"""%( new_campaign, new_campaign, ) print subject print text to = ['*****@*****.**'] sendEmail(subject, text, destination=to) sendLog('batchor',text, level='critical') ## go through all existing campaigns and remove the ones not in use anymore ? for old_campaign in campaigns.keys(): all_in_batch = getWorkflowByCampaign(url, old_campaign, details=True) is_batch_done = all(map(lambda s : not s in ['completed','running-open','running-closed','acquired','assigned','assignment-approved'], [wf['RequestStatus']for wf in all_in_batch])) ## check all statuses if is_batch_done: #print "batch",old_campaign,"can be closed or removed if necessary" #campaigns[old_campaign]['go'] = False ## disable campaigns.pop( old_campaign ) ## or just drop it all together ? print "batch",old_campaign," configuration was removed" ## merge all anyways campaigns.update( add_on ) ## write it out for posterity open('campaigns.json.updated','w').write(json.dumps( campaigns , indent=2)) ## read back rread = json.loads(open('campaigns.json.updated').read()) os.system('mv campaigns.json.updated campaigns.relval.json')