def setOutputDataset(self, **kwargs): '''Through this method it is possible to define the partern of output files (like *.root) and the corresponding output dataset. To choose the desired dataset, a list of all datasets of the chosen session is printed.''' key = raw_input('Enter a pattern (eg. *.root): ') j = self.getJobObject() if isinstance(j.inputdata, SBInputDataset.SBInputPersonalProduction): if j.inputdata.session == 'FastSim': kwargs['session'] = 'fastsim' elif j.inputdata.session == 'FullSim': kwargs['session'] = 'fullsim' else: raise GangaException( 'j.inputdata.session is \'%s\'. It must be \'FastSim\' or \'FullSim\'' % j.inputdata.session) else: kwargs['session'] = 'analysis' kwargs['owner'] = utils.getOwner() kwargs['status'] = ['open', 'prepared'] manager = SBDatasetManager.SBDatasetManager() datasets = manager.getDataset(**kwargs) dataset = manager.printDatasets( datasets) # print dataset and choose one of them self.pairs[key] = dataset['dataset_id']
def setOutputDataset(self, **kwargs): '''Through this method it is possible to define the partern of output files (like *.root) and the corresponding output dataset. To choose the desired dataset, a list of all datasets of the chosen session is printed.''' key = raw_input('Enter a pattern (eg. *.root): ') j = self.getJobObject() if isinstance(j.inputdata, SBInputDataset.SBInputPersonalProduction): if j.inputdata.session == 'FastSim': kwargs['session'] = 'fastsim' elif j.inputdata.session == 'FullSim': kwargs['session'] = 'fullsim' else: raise GangaException('j.inputdata.session is \'%s\'. It must be \'FastSim\' or \'FullSim\'' % j.inputdata.session) else: kwargs['session'] = 'analysis' kwargs['owner'] = utils.getOwner() kwargs['status'] = ['open', 'prepared'] manager = SBDatasetManager.SBDatasetManager() datasets = manager.getDataset(**kwargs) dataset = manager.printDatasets(datasets) # print dataset and choose one of them self.pairs[key] = dataset['dataset_id']
def __changeStatus(self, new_status, **kwargs): kwargs['owner'] = utils.getOwner() datasets = self.getDataset(**kwargs) dataset = self.printDatasets(datasets) dataset_id = dataset['dataset_id'] sql = 'UPDATE analysis_dataset SET status = %s WHERE dataset_id = %s' db.write(sql, (new_status, r'\x' + dataset_id))
def getDataset(self, **kwargs): '''Get all metadata of all datasets. Public method, not exported to GPI.''' db_view_column = ['dataset_id', 'creation_date', 'occupancy'] sql = 'SELECT * FROM dataset_union WHERE true' kwargs['owner'] = kwargs.get('owner', ['official', utils.getOwner()]) # add filter to query if len(kwargs) > 0: for key, value in kwargs.iteritems(): if key in db_view_column: sql += " AND %s ILIKE '%s%%'" % (key, value) elif key == 'files': sql += " AND files > %s" % value elif key in ['status', 'session', 'owner']: if not isinstance(value, list): value = [value] sql += " AND (false" for s in value: sql += " OR %s ILIKE '%s%%'" % (key, s) sql += ")" else: sql += " AND parameters->'%s' ILIKE '%s%%'" % (key, value) # clean up the query sql = sql.replace('false OR ', '') sql = sql.replace('true AND ', '') # TODO: add control to prevent sql injection datasets = db.read(sql) if len(datasets) == 0: raise GangaException('No dataset found') i = 0 for dataset in datasets: dataset['id'] = i i += 1 dataset['occupancy_human'] = utils.sizeof_fmt_binary( dataset['occupancy']) if 'evt_file' in dataset[ 'parameters'] and not 'evt_tot' in dataset['parameters']: evt_file = int(dataset['parameters']['evt_file']) if dataset['files'] is None: dataset['files'] = 0 files = int(dataset['files']) dataset['parameters']['evt_tot'] = evt_file * files if 'evt_tot' in dataset['parameters']: dataset['parameters'][ 'evt_tot_human'] = utils.sizeof_fmt_decimal( int(dataset['parameters']['evt_tot'])) return datasets
def deleteDataset(self, **kwargs): '''to delete empty (prepared status) dataset''' kwargs['owner'] = utils.getOwner() kwargs['status'] = ['prepared'] datasets = self.getDataset(**kwargs) dataset = self.printDatasets(datasets) dataset_id = dataset['dataset_id'] sql = 'DELETE FROM analysis_dataset WHERE dataset_id = %s' db.write(sql, (r'\x' + dataset_id, ))
def getDataset(self, **kwargs): '''Get all metadata of all datasets. Public method, not exported to GPI.''' db_view_column = ['dataset_id', 'creation_date', 'occupancy'] sql = 'SELECT * FROM dataset_union WHERE true' kwargs['owner'] = kwargs.get('owner', ['official', utils.getOwner()]) # add filter to query if len(kwargs) > 0: for key, value in kwargs.iteritems(): if key in db_view_column: sql += " AND %s ILIKE '%s%%'" % (key, value) elif key == 'files': sql += " AND files > %s" % value elif key in ['status', 'session', 'owner']: if not isinstance(value, list): value = [value] sql += " AND (false" for s in value: sql += " OR %s ILIKE '%s%%'" % (key, s) sql += ")" else: sql += " AND parameters->'%s' ILIKE '%s%%'" % (key, value) # clean up the query sql = sql.replace('false OR ', '') sql = sql.replace('true AND ', '') # TODO: add control to prevent sql injection datasets = db.read(sql) if len(datasets) == 0: raise GangaException('No dataset found') i = 0 for dataset in datasets: dataset['id'] = i i += 1 dataset['occupancy_human'] = utils.sizeof_fmt_binary(dataset['occupancy']) if 'evt_file' in dataset['parameters'] and not 'evt_tot' in dataset['parameters']: evt_file = int(dataset['parameters']['evt_file']) if dataset['files'] is None: dataset['files'] = 0 files = int(dataset['files']) dataset['parameters']['evt_tot'] = evt_file * files if 'evt_tot' in dataset['parameters']: dataset['parameters']['evt_tot_human'] = utils.sizeof_fmt_decimal(int(dataset['parameters']['evt_tot'])) return datasets
def downloadDataset(self, **kwargs): '''to retrieve all files belonging to a owned dataset from GRID to submission machine''' # TODO: create surl file lists beside the lfn list to permit lcg-cp #fail over chain implamantation and to permit the direct plugin # subjob configuration by user given list kwargs['owner'] = utils.getOwner() kwargs['files'] = 0 datasets = self.getDataset(**kwargs) dataset = self.printDatasets(datasets) dataset_id = dataset['dataset_id'] files = dataset['files'] occupancy_human = dataset['occupancy_human'] home = os.path.expanduser('~') s = os.statvfs(home) free_disk = utils.sizeof_fmt_binary(s.f_bsize * s.f_bavail) #print('\nFree disk space: %s' % free_disk) print('\nTotal download size: %s\n' % occupancy_human) sql = 'SELECT lfn FROM analysis_output WHERE dataset_id = %s' lfns = db.read(sql, (r'\x' + dataset_id, )) localdir = os.path.join(home, dataset_id) os.mkdir(localdir) print('Downloading to %s ...' % localdir) i = 1 for lfn in lfns: source = lfn['lfn'] destination = os.path.join(localdir, source.split('/')[-1]) process = subprocess.Popen(['lcg-cp', source, destination], stdout=subprocess.PIPE, close_fds=True) outData, errData = process.communicate() retCode = process.poll() if retCode != 0: raise Exception('lcg-cp fail with return code %d' % retCode) sys.stdout.write('\b' * 80 + '%s/%s' % (str(i), str(files))) sys.stdout.flush() i += 1
def createDataset(self): '''Interactive method to guide the user in dataset creation procedure. If the dataset is a 'personal production' type, force user to provide a filter key.''' def asksParameter(parameter): '''Interactive method requesting user the value of each parameter per session (FastSim, FullSim, Analysis)''' if parameter['customValue'] and len(parameter['values']) == 0: value = raw_input('\nEnter %s: ' % parameter['label']) elif not parameter['customValue'] and len(parameter['values']) == 0: raise GangaException('Invalid rule (customValue:False and values=0).') else: table = list() i = 0 for value in parameter['values']: table.append({'id': i, 'value': value}) i += 1 if parameter['customValue']: table.append({'id': i, 'value': 'Enter a custom value'}) print('\nChoose %s:' % parameter['label']) column_names = ('id', 'value') print(utils.format_dict_table(table, column_names)) index = utils.getIndex(maxExclusive=len(table)) if parameter['customValue'] and index == len(table)-1: value = raw_input('Custom value: ') else: value = table[index]['value'] # parameter insertion in dictionary. It will be subsequently #inserted into dataset analysis bookkeeping table, hstore field new_dataset['parameters'][parameter['name']] = value return value type = [ dict(id = 0, dataset_type = 'FastSim Personal Production'), dict(id = 1, dataset_type = 'FullSim Personal Production'), dict(id = 2, dataset_type = 'Analysis'), ] column_names = ('id', 'dataset_type') print(utils.format_dict_table(type, column_names)) index = utils.getIndex(maxExclusive=len(type)) new_dataset = dict() new_dataset['parameters'] = dict() #################### # FAST Simulation session #################### # parameter check: mandatory, free string param management # TODO: parameter type check, evaluate the config file option to store parameters if index == 0: new_dataset['session'] = 'fastsim' parameters = [ {"name": "evt_file", "label": "Events per file", "customValue": True, "values": []}, {"name": "analysis", "label": "Analysis", "customValue": True, "values": ["BtoKNuNu", "BtoKstarNuNu", "DstD0ToXLL", "DstD0ToXLL", "Generics", "HadRecoilCocktail", "KplusNuNu", "SLRecoilCocktail", "tau->3mu"]}, {"name": "dg", "label": "Geometry", "customValue": True, "values": ["DG_4", "DG_4a", "DG_BaBar"]}, {"name": "generator", "label": "Generator", "customValue": True, "values": ["B0B0bar_Btag-HD_Cocktail", "B0B0bar_Btag-SL_e_mu_tau_Bsig-HD_SL_Cocktail", "B0B0bar_generic", "B0B0bar_K0nunu", "B0B0bar_K0nunu_SL_e_mu_tau", "B0B0bar_Kstar0nunu_Kpi", "B0B0bar_Kstar0nunu_Kpi_SL_e_mu_tau", "B+B-_Btag-HD_Cocktail", "B+B-_Btag-SL_e_mu_tau_Bsig-HD_SL_Cocktail", "B+B-_generic", "B+B-_K+nunu", "B+B-_K+nunu_SL_e_mu_tau", "B+B-_Kstar+nunu", "B+B-_Kstar+nunu_SL_e_mu_tau", "B+B-_taunu_SL_e_mu_tau", "bhabha_bhwide", "ccbar", "tau+tau-_kk2f", "uds", "udsc", "Upsilon4S_generic"]}, {"name": "bkg_mixing", "label": "Background Mixing Type", "customValue": True, "values": ["All", "NoPair", "NoMixing"]}, {"name": "analysis_type", "label": "Analysis Type", "customValue": True, "values": ["BtoKNuNu", "BtoKstarNuNu", "HadRecoil", "SemiLepKplusNuNu"]} ] for parameter in parameters: asksParameter(parameter) #################### # FULL Simulation session #################### elif index == 1: new_dataset['session'] = 'fullsim' parameters = [ {"name": "evt_file", "label": "Events per file", "customValue": True, "values": []}, {"name": "sim_type", "label": "Simulation Type", "customValue": False, "values": ["fullsim", "background_frame"]}, {"name": "generator", "label": "Generator", "customValue": False, "values": ["RadBhaBha", "singleparticle"]}, {"name": "dg", "label": "Geometry", "customValue": True, "values": ["Geometry_CIPE", "Geometry_CIPE_BGO", "Geometry_CIPE_CSI", "Geometry_CIPE_V00-00-02"]}, {"name": "pl", "label": "Physics list", "customValue": True, "values": ["QGSP", "QGSP_BERT", "QGSP_BERT_HP"]}, {"name": "g4ver", "label": "Geant 4 version", "customValue": True, "values": ["9.2", "9.3"]}, {"name": "opt_photons", "label": "Optical Photons", "customValue": False, "values": ["OFF", "ON"]} ] radbhabha = [ {"name": "brunobbbminde", "label": "Min. Delta E", "customValue": True, "values": []} ] singleParticle = [ {"name": "brunopdg", "label": "PDG Code", "customValue": True, "values": []}, {"name": "brunothetamin", "label": "Theta min.", "customValue": True, "values": []}, {"name": "brunothetamax", "label": "Theta max.", "customValue": True, "values": []}, {"name": "brunophimin", "label": "Phi min.", "customValue": True, "values": []}, {"name": "brunophimax", "label": "Phi max.", "customValue": True, "values": []}, {"name": "brunoemin", "label": "Energy (GeV) min.", "customValue": True, "values": []}, {"name": "brunoemax", "label": "Energy (GeV) max.", "customValue": True, "values": []} ] for parameter in parameters: value = asksParameter(parameter) # parameter dependencies management if parameter['name'] == 'generator': if value == 'singleparticle': parameters.extend(singleParticle) elif value == 'RadBhaBha': parameters.extend(radbhabha) #################### # ANALYSIS session #################### elif index == 2: new_dataset['session'] = 'analysis' else: raise GangaException('Invalid selection.') while True: free_string = raw_input('\nEnter free string: ') max_length = 128 if len(free_string) <= max_length: new_dataset['parameters']['free_string'] = free_string break else: print('Free string must be <= %d char long.' % max_length) # dataset-site relation set new_dataset['site'] = getConfig('SuperB')['submission_site'] new_dataset['owner'] = utils.getOwner() new_dataset['dataset_id'] = str(objectid.ObjectId()) print('\nNew dataset details:') self.printDatasetDetail(new_dataset) value = '' while True: value = raw_input('Type \'yes\' to confirm the dataset creation or (q)uit: ') if value == 'yes': break elif value == 'q': raise utils.QuitException() sql = '''INSERT INTO analysis_dataset (owner, dataset_id, session, parameters, status) VALUES (%s, decode(%s, 'hex'), %s, %s, 'prepared'); INSERT INTO analysis_dataset_site (dataset_id, site) VALUES (decode(%s, 'hex'), %s);''' params = (new_dataset['owner'], new_dataset['dataset_id'], new_dataset['session'], new_dataset['parameters'], new_dataset['dataset_id'], new_dataset['site']) db.write(sql, params)
def master_configure(self): '''This method creates the tar.bz2 archive of user sw directory. Such a method is called one time per master job''' logger.debug('SBApp master_configure called.') self.now = datetime.datetime.now().strftime("%Y%m%d") self.os_arch = os.environ['SBROOT'].split('/')[-1] self.user_id = utils.getOwner() j = self.getJobObject() # check the target SE status using gridmon DB (updated by nagios monitoring system) sql = 'SELECT se_host, nagios_test_service FROM se WHERE name_grid = %s' local_SE = db.gridmon(sql, (getConfig('SuperB')['submission_site'], )) if local_SE[0]['nagios_test_service'] == 'CRITICAL': raise GangaException('Local storage element %s is down.' % local_SE[0]['se_host']) # logger.error('Local storage element %s seems died for gridmon.' % local_SE[0]['se_host']) #else: # logger.error('Local storage element %s is back alive for gridmon. !! uncomment exception !!' % local_SE[0]['se_host']) # create the software directory if self.software_dir != '': if not os.path.isdir(self.software_dir): raise ApplicationConfigurationError(None, 'software_dir must be a directory.') # make the tar file and update sw_archive parameter self.software_dir = os.path.normpath(self.software_dir) (head, tail) = os.path.split(self.software_dir) self.filename = tail self.sw_archive = os.path.join(j.inputdir, tail + '.tar.bz2') logger.info('Creating archive: %s ...', self.sw_archive) logger.info('From: %s', head) logger.info('Of: %s', tail) #savedir = os.getcwd() #os.chdir(self.software_dir) #retcode = subprocess.call("tar -cjf %s * 2>/dev/null" % self.sw_archive, shell=True) retcode = subprocess.call("tar -cjf %s -C %s %s 2>/dev/null" % (self.sw_archive, head, tail), shell=True) if retcode < 0: raise ApplicationConfigurationError(None, 'Error %d while creating archive.' % retcode) #os.chdir(savedir) else: raise ApplicationConfigurationError(None, 'software_dir cannot be empty.') if self.executable == '': raise ApplicationConfigurationError(None, 'executable cannot be empty.') # checking that j.inputdata is a valid object if not isinstance(j.inputdata, (SBInputDataset.SBInputPersonalProduction, SBInputDataset.SBInputProductionAnalysis, SBInputDataset.SBInputPureAnalysis)): msg = 'j.inputdata %s is not allowed' % str(type(j.inputdata)) raise ApplicationConfigurationError(None, msg) # checking that j.inputdata (the input dataset) is a valid dataset j.inputdata.check() # checking that j.outputdata (the output dataset) is valid if isinstance(j.outputdata, SBOutputDataset.SBOutputDataset): j.outputdata.check() # creating temp dataset self.temp_dataset = str(objectid.ObjectId()) free_string = '%s_%s_%s' % (j.id, j.name, self.filename) sql = '''INSERT INTO analysis_dataset (owner, dataset_id, session, parameters, status) VALUES (%s, decode(%s, 'hex'), %s, %s, 'temp'); INSERT INTO analysis_dataset_site (dataset_id, site) VALUES (decode(%s, 'hex'), %s);''' params = (utils.getOwner(), self.temp_dataset, 'analysis', {'free_string': free_string}, self.temp_dataset, getConfig('SuperB')['submission_site']) db.write(sql, params) # merger j.merger = TextMerger() j.merger.files.extend(['severus.log', 'output_files.txt']) j.merger.ignorefailed = True j.merger.compress = True j.splitter = SBSubmission.SBSubmission() return (0, None)
def check(self): '''This method validates output files informations at submission phase''' if len(self.pairs) == 0: raise ApplicationConfigurationError( None, 'output dataset pairs cannot be empty') for key, value in self.pairs.items(): kwargs = dict() kwargs['dataset_id'] = value kwargs['owner'] = utils.getOwner() manager = SBDatasetManager.SBDatasetManager() datasets = manager.getDataset(**kwargs) # only one dataset if len(datasets) == 0: msg = 'Output dataset %s not found' % value raise ApplicationConfigurationError(None, msg) assert len(datasets) == 1, 'Dataset consistency error' dataset = datasets[0] # owner if dataset['owner'] != utils.getOwner(): msg = 'You are not the owner of the output dataset %s' % value raise ApplicationConfigurationError(None, msg) # status if dataset['status'] not in ['open', 'prepared']: msg = 'Output dataset %s status is not open or prepared' % value raise ApplicationConfigurationError(None, msg) # site sql = 'SELECT site FROM analysis_dataset_site WHERE dataset_id = %s' site = db.read(sql, (r'\x' + value, )) if site[0]['site'] != getConfig('SuperB')['submission_site']: msg = 'Output site mismatching: the submission site for dataset %s has to be %s' % ( value, dataset['site']) raise ApplicationConfigurationError(None, msg) # session j = self.getJobObject() if isinstance(j.inputdata, SBInputDataset.SBInputPersonalProduction): if j.inputdata.session == 'FullSim' and dataset[ 'session'] != 'fullsim': msg = 'Output dataset type should be \'fullsim\'' raise ApplicationConfigurationError(None, msg) if j.inputdata.session == 'FastSim' and dataset[ 'session'] != 'fastsim': msg = 'Output dataset type should be \'fastsim\'' raise ApplicationConfigurationError(None, msg) else: if dataset['session'] != 'analysis': msg = 'Output dataset type should be \'analysis\'' raise ApplicationConfigurationError(None, msg) # parent: exists only for analysis session if j.inputdata.dataset_id is None: msg = 'Input dataset is not defined' raise ApplicationConfigurationError(None, msg) else: parent_dataset = j.inputdata.dataset_id if 'parent' not in dataset['parameters']: sql = 'UPDATE analysis_dataset SET parameters = parameters || %s WHERE dataset_id = %s' db.write(sql, ({'parent': parent_dataset}, r'\x' + value)) elif dataset['parameters']['parent'] != parent_dataset: msg = 'Input dataset must be %s' % dataset['parameters'][ 'parent'] raise ApplicationConfigurationError(None, msg)
def master_configure(self): '''This method creates the tar.bz2 archive of user sw directory. Such a method is called one time per master job''' logger.debug('SBApp master_configure called.') self.now = datetime.datetime.now().strftime("%Y%m%d") self.os_arch = os.environ['SBROOT'].split('/')[-1] self.user_id = utils.getOwner() j = self.getJobObject() # check the target SE status using gridmon DB (updated by nagios monitoring system) sql = 'SELECT se_host, nagios_test_service FROM se WHERE name_grid = %s' local_SE = db.gridmon(sql, (getConfig('SuperB')['submission_site'], )) if local_SE[0]['nagios_test_service'] == 'CRITICAL': raise GangaException('Local storage element %s is down.' % local_SE[0]['se_host']) # logger.error('Local storage element %s seems died for gridmon.' % local_SE[0]['se_host']) #else: # logger.error('Local storage element %s is back alive for gridmon. !! uncomment exception !!' % local_SE[0]['se_host']) # create the software directory if self.software_dir != '': if not os.path.isdir(self.software_dir): raise ApplicationConfigurationError( 'software_dir must be a directory.') # make the tar file and update sw_archive parameter self.software_dir = os.path.normpath(self.software_dir) (head, tail) = os.path.split(self.software_dir) self.filename = tail self.sw_archive = os.path.join(j.inputdir, tail + '.tar.bz2') logger.info('Creating archive: %s ...', self.sw_archive) logger.info('From: %s', head) logger.info('Of: %s', tail) #savedir = os.getcwd() #os.chdir(self.software_dir) #retcode = subprocess.call("tar -cjf %s * 2>/dev/null" % self.sw_archive, shell=True) retcode = subprocess.call("tar -cjf %s -C %s %s 2>/dev/null" % (self.sw_archive, head, tail), shell=True) if retcode < 0: raise ApplicationConfigurationError( 'Error %d while creating archive.' % retcode) #os.chdir(savedir) else: raise ApplicationConfigurationError( 'software_dir cannot be empty.') if self.executable == '': raise ApplicationConfigurationError('executable cannot be empty.') # checking that j.inputdata is a valid object if not isinstance(j.inputdata, (SBInputDataset.SBInputPersonalProduction, SBInputDataset.SBInputProductionAnalysis, SBInputDataset.SBInputPureAnalysis)): msg = 'j.inputdata %s is not allowed' % str(type(j.inputdata)) raise ApplicationConfigurationError(msg) # checking that j.inputdata (the input dataset) is a valid dataset j.inputdata.check() # checking that j.outputdata (the output dataset) is valid if isinstance(j.outputdata, SBOutputDataset.SBOutputDataset): j.outputdata.check() # creating temp dataset self.temp_dataset = str(objectid.ObjectId()) free_string = '%s_%s_%s' % (j.id, j.name, self.filename) sql = '''INSERT INTO analysis_dataset (owner, dataset_id, session, parameters, status) VALUES (%s, decode(%s, 'hex'), %s, %s, 'temp'); INSERT INTO analysis_dataset_site (dataset_id, site) VALUES (decode(%s, 'hex'), %s);''' params = (utils.getOwner(), self.temp_dataset, 'analysis', { 'free_string': free_string }, self.temp_dataset, getConfig('SuperB')['submission_site']) db.write(sql, params) # merger j.merger = TextMerger() j.merger.files.extend(['severus.log', 'output_files.txt']) j.merger.ignorefailed = True j.merger.compress = True j.splitter = SBSubmission.SBSubmission() return (0, None)
def whoami(self): '''Print the User id string''' print(utils.getOwner())
def createDataset(self): '''Interactive method to guide the user in dataset creation procedure. If the dataset is a 'personal production' type, force user to provide a filter key.''' def asksParameter(parameter): '''Interactive method requesting user the value of each parameter per session (FastSim, FullSim, Analysis)''' if parameter['customValue'] and len(parameter['values']) == 0: value = raw_input('\nEnter %s: ' % parameter['label']) elif not parameter['customValue'] and len( parameter['values']) == 0: raise GangaException( 'Invalid rule (customValue:False and values=0).') else: table = list() i = 0 for value in parameter['values']: table.append({'id': i, 'value': value}) i += 1 if parameter['customValue']: table.append({'id': i, 'value': 'Enter a custom value'}) print('\nChoose %s:' % parameter['label']) column_names = ('id', 'value') print(utils.format_dict_table(table, column_names)) index = utils.getIndex(maxExclusive=len(table)) if parameter['customValue'] and index == len(table) - 1: value = raw_input('Custom value: ') else: value = table[index]['value'] # parameter insertion in dictionary. It will be subsequently #inserted into dataset analysis bookkeeping table, hstore field new_dataset['parameters'][parameter['name']] = value return value type = [ dict(id=0, dataset_type='FastSim Personal Production'), dict(id=1, dataset_type='FullSim Personal Production'), dict(id=2, dataset_type='Analysis'), ] column_names = ('id', 'dataset_type') print(utils.format_dict_table(type, column_names)) index = utils.getIndex(maxExclusive=len(type)) new_dataset = dict() new_dataset['parameters'] = dict() #################### # FAST Simulation session #################### # parameter check: mandatory, free string param management # TODO: parameter type check, evaluate the config file option to store parameters if index == 0: new_dataset['session'] = 'fastsim' parameters = [{ "name": "evt_file", "label": "Events per file", "customValue": True, "values": [] }, { "name": "analysis", "label": "Analysis", "customValue": True, "values": [ "BtoKNuNu", "BtoKstarNuNu", "DstD0ToXLL", "DstD0ToXLL", "Generics", "HadRecoilCocktail", "KplusNuNu", "SLRecoilCocktail", "tau->3mu" ] }, { "name": "dg", "label": "Geometry", "customValue": True, "values": ["DG_4", "DG_4a", "DG_BaBar"] }, { "name": "generator", "label": "Generator", "customValue": True, "values": [ "B0B0bar_Btag-HD_Cocktail", "B0B0bar_Btag-SL_e_mu_tau_Bsig-HD_SL_Cocktail", "B0B0bar_generic", "B0B0bar_K0nunu", "B0B0bar_K0nunu_SL_e_mu_tau", "B0B0bar_Kstar0nunu_Kpi", "B0B0bar_Kstar0nunu_Kpi_SL_e_mu_tau", "B+B-_Btag-HD_Cocktail", "B+B-_Btag-SL_e_mu_tau_Bsig-HD_SL_Cocktail", "B+B-_generic", "B+B-_K+nunu", "B+B-_K+nunu_SL_e_mu_tau", "B+B-_Kstar+nunu", "B+B-_Kstar+nunu_SL_e_mu_tau", "B+B-_taunu_SL_e_mu_tau", "bhabha_bhwide", "ccbar", "tau+tau-_kk2f", "uds", "udsc", "Upsilon4S_generic" ] }, { "name": "bkg_mixing", "label": "Background Mixing Type", "customValue": True, "values": ["All", "NoPair", "NoMixing"] }, { "name": "analysis_type", "label": "Analysis Type", "customValue": True, "values": ["BtoKNuNu", "BtoKstarNuNu", "HadRecoil", "SemiLepKplusNuNu"] }] for parameter in parameters: asksParameter(parameter) #################### # FULL Simulation session #################### elif index == 1: new_dataset['session'] = 'fullsim' parameters = [{ "name": "evt_file", "label": "Events per file", "customValue": True, "values": [] }, { "name": "sim_type", "label": "Simulation Type", "customValue": False, "values": ["fullsim", "background_frame"] }, { "name": "generator", "label": "Generator", "customValue": False, "values": ["RadBhaBha", "singleparticle"] }, { "name": "dg", "label": "Geometry", "customValue": True, "values": [ "Geometry_CIPE", "Geometry_CIPE_BGO", "Geometry_CIPE_CSI", "Geometry_CIPE_V00-00-02" ] }, { "name": "pl", "label": "Physics list", "customValue": True, "values": ["QGSP", "QGSP_BERT", "QGSP_BERT_HP"] }, { "name": "g4ver", "label": "Geant 4 version", "customValue": True, "values": ["9.2", "9.3"] }, { "name": "opt_photons", "label": "Optical Photons", "customValue": False, "values": ["OFF", "ON"] }] radbhabha = [{ "name": "brunobbbminde", "label": "Min. Delta E", "customValue": True, "values": [] }] singleParticle = [{ "name": "brunopdg", "label": "PDG Code", "customValue": True, "values": [] }, { "name": "brunothetamin", "label": "Theta min.", "customValue": True, "values": [] }, { "name": "brunothetamax", "label": "Theta max.", "customValue": True, "values": [] }, { "name": "brunophimin", "label": "Phi min.", "customValue": True, "values": [] }, { "name": "brunophimax", "label": "Phi max.", "customValue": True, "values": [] }, { "name": "brunoemin", "label": "Energy (GeV) min.", "customValue": True, "values": [] }, { "name": "brunoemax", "label": "Energy (GeV) max.", "customValue": True, "values": [] }] for parameter in parameters: value = asksParameter(parameter) # parameter dependencies management if parameter['name'] == 'generator': if value == 'singleparticle': parameters.extend(singleParticle) elif value == 'RadBhaBha': parameters.extend(radbhabha) #################### # ANALYSIS session #################### elif index == 2: new_dataset['session'] = 'analysis' else: raise GangaException('Invalid selection.') while True: free_string = raw_input('\nEnter free string: ') max_length = 128 if len(free_string) <= max_length: new_dataset['parameters']['free_string'] = free_string break else: print('Free string must be <= %d char long.' % max_length) # dataset-site relation set new_dataset['site'] = getConfig('SuperB')['submission_site'] new_dataset['owner'] = utils.getOwner() new_dataset['dataset_id'] = str(objectid.ObjectId()) print('\nNew dataset details:') self.printDatasetDetail(new_dataset) value = '' while True: value = raw_input( 'Type \'yes\' to confirm the dataset creation or (q)uit: ') if value == 'yes': break elif value == 'q': raise utils.QuitException() sql = '''INSERT INTO analysis_dataset (owner, dataset_id, session, parameters, status) VALUES (%s, decode(%s, 'hex'), %s, %s, 'prepared'); INSERT INTO analysis_dataset_site (dataset_id, site) VALUES (decode(%s, 'hex'), %s);''' params = (new_dataset['owner'], new_dataset['dataset_id'], new_dataset['session'], new_dataset['parameters'], new_dataset['dataset_id'], new_dataset['site']) db.write(sql, params)
def check(self): '''This method validates output files informations at submission phase''' if len(self.pairs) == 0: raise ApplicationConfigurationError(None, 'output dataset pairs cannot be empty') for key, value in self.pairs.items(): kwargs = dict() kwargs['dataset_id'] = value kwargs['owner'] = utils.getOwner() manager = SBDatasetManager.SBDatasetManager() datasets = manager.getDataset(**kwargs) # only one dataset if len(datasets) == 0: msg = 'Output dataset %s not found' % value raise ApplicationConfigurationError(None, msg) assert len(datasets) == 1, 'Dataset consistency error' dataset = datasets[0] # owner if dataset['owner'] != utils.getOwner(): msg = 'You are not the owner of the output dataset %s' % value raise ApplicationConfigurationError(None, msg) # status if dataset['status'] not in ['open', 'prepared']: msg = 'Output dataset %s status is not open or prepared' % value raise ApplicationConfigurationError(None, msg) # site sql = 'SELECT site FROM analysis_dataset_site WHERE dataset_id = %s' site = db.read(sql, (r'\x' + value, )) if site[0]['site'] != getConfig('SuperB')['submission_site']: msg = 'Output site mismatching: the submission site for dataset %s has to be %s' % (value, dataset['site']) raise ApplicationConfigurationError(None, msg) # session j = self.getJobObject() if isinstance(j.inputdata, SBInputDataset.SBInputPersonalProduction): if j.inputdata.session == 'FullSim' and dataset['session'] != 'fullsim': msg = 'Output dataset type should be \'fullsim\'' raise ApplicationConfigurationError(None, msg) if j.inputdata.session == 'FastSim' and dataset['session'] != 'fastsim': msg = 'Output dataset type should be \'fastsim\'' raise ApplicationConfigurationError(None, msg) else: if dataset['session'] != 'analysis': msg = 'Output dataset type should be \'analysis\'' raise ApplicationConfigurationError(None, msg) # parent: exists only for analysis session if j.inputdata.dataset_id is None: msg = 'Input dataset is not defined' raise ApplicationConfigurationError(None, msg) else: parent_dataset = j.inputdata.dataset_id if 'parent' not in dataset['parameters']: sql = 'UPDATE analysis_dataset SET parameters = parameters || %s WHERE dataset_id = %s' db.write(sql, ({'parent': parent_dataset}, r'\x' + value)) elif dataset['parameters']['parent'] != parent_dataset: msg = 'Input dataset must be %s' % dataset['parameters']['parent'] raise ApplicationConfigurationError(None, msg)