Example #1
0
    def setOutputDataset(self, **kwargs):
        '''Through this method it is possible to define the partern of output files (like *.root) and the corresponding output dataset. 
        To choose the desired dataset, a list of all datasets of the chosen session is printed.'''

        key = raw_input('Enter a pattern (eg. *.root): ')

        j = self.getJobObject()

        if isinstance(j.inputdata, SBInputDataset.SBInputPersonalProduction):
            if j.inputdata.session == 'FastSim':
                kwargs['session'] = 'fastsim'
            elif j.inputdata.session == 'FullSim':
                kwargs['session'] = 'fullsim'
            else:
                raise GangaException(
                    'j.inputdata.session is \'%s\'. It must be \'FastSim\' or \'FullSim\''
                    % j.inputdata.session)
        else:
            kwargs['session'] = 'analysis'

        kwargs['owner'] = utils.getOwner()
        kwargs['status'] = ['open', 'prepared']

        manager = SBDatasetManager.SBDatasetManager()
        datasets = manager.getDataset(**kwargs)
        dataset = manager.printDatasets(
            datasets)  # print dataset and choose one of them

        self.pairs[key] = dataset['dataset_id']
Example #2
0
 def setOutputDataset(self, **kwargs):
     '''Through this method it is possible to define the partern of output files (like *.root) and the corresponding output dataset. 
     To choose the desired dataset, a list of all datasets of the chosen session is printed.'''
     
     key = raw_input('Enter a pattern (eg. *.root): ')
     
     j = self.getJobObject()
     
     if isinstance(j.inputdata, SBInputDataset.SBInputPersonalProduction):
         if j.inputdata.session == 'FastSim':
             kwargs['session'] = 'fastsim'
         elif j.inputdata.session == 'FullSim':
             kwargs['session'] = 'fullsim'
         else:
             raise GangaException('j.inputdata.session is \'%s\'. It must be \'FastSim\' or \'FullSim\'' % j.inputdata.session)
     else:
         kwargs['session'] = 'analysis'
     
     kwargs['owner'] = utils.getOwner()
     kwargs['status'] = ['open', 'prepared']
     
     manager = SBDatasetManager.SBDatasetManager()
     datasets = manager.getDataset(**kwargs)
     dataset = manager.printDatasets(datasets) # print dataset and choose one of them
     
     self.pairs[key] = dataset['dataset_id']
Example #3
0
 def __changeStatus(self, new_status, **kwargs):
     kwargs['owner'] = utils.getOwner()
     
     datasets = self.getDataset(**kwargs)
     dataset = self.printDatasets(datasets)
     dataset_id = dataset['dataset_id']
     
     sql = 'UPDATE analysis_dataset SET status = %s WHERE dataset_id = %s'
     db.write(sql, (new_status, r'\x' + dataset_id))
Example #4
0
    def __changeStatus(self, new_status, **kwargs):
        kwargs['owner'] = utils.getOwner()

        datasets = self.getDataset(**kwargs)
        dataset = self.printDatasets(datasets)
        dataset_id = dataset['dataset_id']

        sql = 'UPDATE analysis_dataset SET status = %s WHERE dataset_id = %s'
        db.write(sql, (new_status, r'\x' + dataset_id))
Example #5
0
    def getDataset(self, **kwargs):
        '''Get all metadata of all datasets.
        Public method, not exported to GPI.'''

        db_view_column = ['dataset_id', 'creation_date', 'occupancy']
        sql = 'SELECT * FROM dataset_union WHERE true'
        kwargs['owner'] = kwargs.get('owner', ['official', utils.getOwner()])

        # add filter to query
        if len(kwargs) > 0:
            for key, value in kwargs.iteritems():
                if key in db_view_column:
                    sql += " AND %s ILIKE '%s%%'" % (key, value)
                elif key == 'files':
                    sql += " AND files > %s" % value
                elif key in ['status', 'session', 'owner']:
                    if not isinstance(value, list):
                        value = [value]

                    sql += " AND (false"
                    for s in value:
                        sql += " OR %s ILIKE '%s%%'" % (key, s)
                    sql += ")"

                else:
                    sql += " AND parameters->'%s' ILIKE '%s%%'" % (key, value)

        # clean up the query
        sql = sql.replace('false OR ', '')
        sql = sql.replace('true AND ', '')

        # TODO: add control to prevent sql injection
        datasets = db.read(sql)

        if len(datasets) == 0:
            raise GangaException('No dataset found')

        i = 0
        for dataset in datasets:
            dataset['id'] = i
            i += 1
            dataset['occupancy_human'] = utils.sizeof_fmt_binary(
                dataset['occupancy'])
            if 'evt_file' in dataset[
                    'parameters'] and not 'evt_tot' in dataset['parameters']:
                evt_file = int(dataset['parameters']['evt_file'])
                if dataset['files'] is None:
                    dataset['files'] = 0
                files = int(dataset['files'])
                dataset['parameters']['evt_tot'] = evt_file * files
            if 'evt_tot' in dataset['parameters']:
                dataset['parameters'][
                    'evt_tot_human'] = utils.sizeof_fmt_decimal(
                        int(dataset['parameters']['evt_tot']))

        return datasets
Example #6
0
 def deleteDataset(self, **kwargs):
     '''to delete empty (prepared status) dataset'''
     kwargs['owner'] = utils.getOwner()
     kwargs['status'] = ['prepared']
     
     datasets = self.getDataset(**kwargs)
     dataset = self.printDatasets(datasets)
     dataset_id = dataset['dataset_id']
     
     sql = 'DELETE FROM analysis_dataset WHERE dataset_id = %s'
     db.write(sql, (r'\x' + dataset_id, ))
Example #7
0
    def deleteDataset(self, **kwargs):
        '''to delete empty (prepared status) dataset'''
        kwargs['owner'] = utils.getOwner()
        kwargs['status'] = ['prepared']

        datasets = self.getDataset(**kwargs)
        dataset = self.printDatasets(datasets)
        dataset_id = dataset['dataset_id']

        sql = 'DELETE FROM analysis_dataset WHERE dataset_id = %s'
        db.write(sql, (r'\x' + dataset_id, ))
Example #8
0
 def getDataset(self, **kwargs):
     '''Get all metadata of all datasets.
     Public method, not exported to GPI.'''
     
     db_view_column = ['dataset_id', 'creation_date', 'occupancy']
     sql = 'SELECT * FROM dataset_union WHERE true'
     kwargs['owner'] = kwargs.get('owner', ['official', utils.getOwner()])
     
     # add filter to query
     if len(kwargs) > 0:
         for key, value in kwargs.iteritems():
             if key in db_view_column:
                 sql += " AND %s ILIKE '%s%%'" % (key, value)
             elif key == 'files':
                 sql += " AND files > %s" % value
             elif key in ['status', 'session', 'owner']:
                 if not isinstance(value, list):
                     value = [value]
                 
                 sql += " AND (false"
                 for s in value:
                     sql += " OR %s ILIKE '%s%%'" % (key, s)
                 sql += ")"
                 
             else:
                 sql += " AND parameters->'%s' ILIKE '%s%%'" % (key, value)
     
     # clean up the query
     sql = sql.replace('false OR ', '')
     sql = sql.replace('true AND ', '')
     
     # TODO: add control to prevent sql injection
     datasets = db.read(sql)
     
     if len(datasets) == 0:
         raise GangaException('No dataset found')
     
     i = 0
     for dataset in datasets:
         dataset['id'] = i
         i += 1
         dataset['occupancy_human'] = utils.sizeof_fmt_binary(dataset['occupancy'])
         if 'evt_file' in dataset['parameters'] and not 'evt_tot' in dataset['parameters']:
             evt_file = int(dataset['parameters']['evt_file'])
             if dataset['files'] is None:
                 dataset['files'] = 0
             files = int(dataset['files'])
             dataset['parameters']['evt_tot'] = evt_file * files
         if 'evt_tot' in dataset['parameters']:
             dataset['parameters']['evt_tot_human'] = utils.sizeof_fmt_decimal(int(dataset['parameters']['evt_tot']))
     
     return datasets
Example #9
0
    def downloadDataset(self, **kwargs):
        '''to retrieve all files belonging to a owned dataset from GRID to 
        submission machine'''
        # TODO: create surl file lists beside the lfn list to permit lcg-cp
        #fail over chain implamantation and to permit the direct plugin
        # subjob configuration by user given list

        kwargs['owner'] = utils.getOwner()
        kwargs['files'] = 0

        datasets = self.getDataset(**kwargs)
        dataset = self.printDatasets(datasets)

        dataset_id = dataset['dataset_id']
        files = dataset['files']
        occupancy_human = dataset['occupancy_human']

        home = os.path.expanduser('~')
        s = os.statvfs(home)
        free_disk = utils.sizeof_fmt_binary(s.f_bsize * s.f_bavail)

        #print('\nFree disk space: %s' % free_disk)
        print('\nTotal download size: %s\n' % occupancy_human)

        sql = 'SELECT lfn FROM analysis_output WHERE dataset_id = %s'
        lfns = db.read(sql, (r'\x' + dataset_id, ))

        localdir = os.path.join(home, dataset_id)
        os.mkdir(localdir)

        print('Downloading to %s ...' % localdir)
        i = 1

        for lfn in lfns:
            source = lfn['lfn']
            destination = os.path.join(localdir, source.split('/')[-1])

            process = subprocess.Popen(['lcg-cp', source, destination],
                                       stdout=subprocess.PIPE,
                                       close_fds=True)
            outData, errData = process.communicate()
            retCode = process.poll()

            if retCode != 0:
                raise Exception('lcg-cp fail with return code %d' % retCode)

            sys.stdout.write('\b' * 80 + '%s/%s' % (str(i), str(files)))
            sys.stdout.flush()

            i += 1
Example #10
0
 def downloadDataset(self, **kwargs):
     '''to retrieve all files belonging to a owned dataset from GRID to 
     submission machine'''
     # TODO: create surl file lists beside the lfn list to permit lcg-cp 
     #fail over chain implamantation and to permit the direct plugin
     # subjob configuration by user given list
     
     kwargs['owner'] = utils.getOwner()
     kwargs['files'] = 0
     
     datasets = self.getDataset(**kwargs)
     dataset = self.printDatasets(datasets)
     
     dataset_id = dataset['dataset_id']
     files = dataset['files']
     occupancy_human = dataset['occupancy_human']
     
     home = os.path.expanduser('~')
     s = os.statvfs(home)
     free_disk = utils.sizeof_fmt_binary(s.f_bsize * s.f_bavail)
     
     #print('\nFree disk space: %s' % free_disk)
     print('\nTotal download size: %s\n' % occupancy_human)
     
     sql = 'SELECT lfn FROM analysis_output WHERE dataset_id = %s'
     lfns = db.read(sql, (r'\x' + dataset_id, ))
     
     localdir = os.path.join(home, dataset_id)
     os.mkdir(localdir)
     
     print('Downloading to %s ...' % localdir)
     i = 1
     
     for lfn in lfns:
         source = lfn['lfn']
         destination = os.path.join(localdir, source.split('/')[-1])
         
         process = subprocess.Popen(['lcg-cp', source, destination], stdout=subprocess.PIPE, close_fds=True)
         outData, errData = process.communicate()
         retCode = process.poll()
         
         if retCode != 0:
             raise Exception('lcg-cp fail with return code %d' % retCode)
         
         sys.stdout.write('\b' * 80 + '%s/%s' % (str(i), str(files)))
         sys.stdout.flush()
         
         i += 1
Example #11
0
 def createDataset(self):
     '''Interactive method to guide the user in dataset creation procedure.
     If the dataset is a 'personal production' type, force user to provide 
     a filter key.'''
     
     def asksParameter(parameter):
         '''Interactive method requesting user the value of each parameter 
         per session (FastSim, FullSim, Analysis)'''
         if parameter['customValue'] and len(parameter['values']) == 0:
             value = raw_input('\nEnter %s: ' % parameter['label'])
         elif not parameter['customValue'] and len(parameter['values']) == 0:
             raise GangaException('Invalid rule (customValue:False and values=0).')
         else:
             table = list()
             
             i = 0
             for value in parameter['values']:
                 table.append({'id': i, 'value': value})
                 i += 1
             
             if parameter['customValue']:
                 table.append({'id': i, 'value': 'Enter a custom value'})
             
             print('\nChoose %s:' % parameter['label'])
             column_names = ('id', 'value')
             print(utils.format_dict_table(table, column_names))
             index = utils.getIndex(maxExclusive=len(table))
             
             if parameter['customValue'] and index == len(table)-1:
                 value = raw_input('Custom value: ')
             else:
                 value = table[index]['value']
         
         # parameter insertion in dictionary. It will be subsequently 
         #inserted into dataset analysis bookkeeping table, hstore field
         new_dataset['parameters'][parameter['name']] = value
         
         return value
     
     
     type = [
         dict(id = 0, dataset_type = 'FastSim Personal Production'),
         dict(id = 1, dataset_type = 'FullSim Personal Production'),
         dict(id = 2, dataset_type = 'Analysis'),
         ]
     
     column_names = ('id', 'dataset_type')
     print(utils.format_dict_table(type, column_names))
     index = utils.getIndex(maxExclusive=len(type))
     
     new_dataset = dict()
     new_dataset['parameters'] = dict()
     
     ####################
     # FAST Simulation session
     ####################
     # parameter check: mandatory, free string param management
     # TODO: parameter type check, evaluate the config file option to store parameters
     
     if index == 0:
         new_dataset['session'] = 'fastsim'
         
         parameters = [
             {"name": "evt_file", "label": "Events per file", "customValue": True, "values": []},
             {"name": "analysis", "label": "Analysis", "customValue": True, "values": ["BtoKNuNu", "BtoKstarNuNu", "DstD0ToXLL", "DstD0ToXLL", "Generics", "HadRecoilCocktail", "KplusNuNu", "SLRecoilCocktail", "tau->3mu"]},
             {"name": "dg", "label": "Geometry", "customValue": True, "values": ["DG_4", "DG_4a", "DG_BaBar"]},
             {"name": "generator", "label": "Generator", "customValue": True, "values": ["B0B0bar_Btag-HD_Cocktail", "B0B0bar_Btag-SL_e_mu_tau_Bsig-HD_SL_Cocktail", "B0B0bar_generic", "B0B0bar_K0nunu", "B0B0bar_K0nunu_SL_e_mu_tau", "B0B0bar_Kstar0nunu_Kpi", "B0B0bar_Kstar0nunu_Kpi_SL_e_mu_tau", "B+B-_Btag-HD_Cocktail", "B+B-_Btag-SL_e_mu_tau_Bsig-HD_SL_Cocktail", "B+B-_generic", "B+B-_K+nunu", "B+B-_K+nunu_SL_e_mu_tau", "B+B-_Kstar+nunu", "B+B-_Kstar+nunu_SL_e_mu_tau", "B+B-_taunu_SL_e_mu_tau", "bhabha_bhwide", "ccbar", "tau+tau-_kk2f", "uds", "udsc", "Upsilon4S_generic"]},
             {"name": "bkg_mixing", "label": "Background Mixing Type", "customValue": True, "values": ["All", "NoPair", "NoMixing"]},
             {"name": "analysis_type", "label": "Analysis Type", "customValue": True, "values": ["BtoKNuNu", "BtoKstarNuNu", "HadRecoil", "SemiLepKplusNuNu"]}
         ]
         
         for parameter in parameters:
             asksParameter(parameter)
     
     ####################
     # FULL Simulation session
     ####################
     elif index == 1:
         new_dataset['session'] = 'fullsim'
         
         parameters = [
             {"name": "evt_file", "label": "Events per file", "customValue": True, "values": []},
             {"name": "sim_type", "label": "Simulation Type", "customValue": False, "values": ["fullsim", "background_frame"]},
             {"name": "generator", "label": "Generator", "customValue": False, "values": ["RadBhaBha", "singleparticle"]},
             {"name": "dg", "label": "Geometry", "customValue": True, "values": ["Geometry_CIPE", "Geometry_CIPE_BGO", "Geometry_CIPE_CSI", "Geometry_CIPE_V00-00-02"]},
             {"name": "pl", "label": "Physics list", "customValue": True, "values": ["QGSP", "QGSP_BERT", "QGSP_BERT_HP"]},
             {"name": "g4ver", "label": "Geant 4 version", "customValue": True, "values": ["9.2", "9.3"]},
             {"name": "opt_photons", "label": "Optical Photons", "customValue": False, "values": ["OFF", "ON"]}
         ]
         radbhabha = [
             {"name": "brunobbbminde", "label": "Min. Delta E", "customValue": True, "values": []}
         ]
         singleParticle = [
             {"name": "brunopdg", "label": "PDG Code", "customValue": True, "values": []},
             {"name": "brunothetamin", "label": "Theta min.", "customValue": True, "values": []},
             {"name": "brunothetamax", "label": "Theta max.", "customValue": True, "values": []},
             {"name": "brunophimin", "label": "Phi min.", "customValue": True, "values": []},
             {"name": "brunophimax", "label": "Phi max.", "customValue": True, "values": []},
             {"name": "brunoemin", "label": "Energy (GeV) min.", "customValue": True, "values": []},
             {"name": "brunoemax", "label": "Energy (GeV) max.", "customValue": True, "values": []}
         ]
         
         for parameter in parameters:
             value = asksParameter(parameter)
             
             # parameter dependencies management
             if parameter['name'] == 'generator':
                 if value == 'singleparticle':
                     parameters.extend(singleParticle)
                 elif value == 'RadBhaBha':
                     parameters.extend(radbhabha)
     
     ####################
     # ANALYSIS session
     ####################
     elif index == 2:
         new_dataset['session'] = 'analysis'
     else:
         raise GangaException('Invalid selection.')
     
     
     while True:
         free_string = raw_input('\nEnter free string: ')
         max_length = 128
         
         if len(free_string) <= max_length:
             new_dataset['parameters']['free_string'] = free_string
             break
         else:
             print('Free string must be <= %d char long.' % max_length)
     
     # dataset-site relation set
     new_dataset['site'] = getConfig('SuperB')['submission_site']
     new_dataset['owner'] = utils.getOwner()
     new_dataset['dataset_id'] = str(objectid.ObjectId())
     
     print('\nNew dataset details:')
     self.printDatasetDetail(new_dataset)
     
     
     value = ''
     while True:
         value = raw_input('Type \'yes\' to confirm the dataset creation or (q)uit: ')
         if value == 'yes':
             break
         elif value == 'q':
             raise utils.QuitException()
     
     sql = '''INSERT INTO analysis_dataset
         (owner, dataset_id, session, parameters, status)
         VALUES (%s, decode(%s, 'hex'), %s, %s, 'prepared');
         
         INSERT INTO analysis_dataset_site
         (dataset_id, site)
         VALUES (decode(%s, 'hex'), %s);'''
     params = (new_dataset['owner'], 
         new_dataset['dataset_id'], 
         new_dataset['session'], 
         new_dataset['parameters'],
         new_dataset['dataset_id'],
         new_dataset['site'])
     db.write(sql, params)
Example #12
0
 def master_configure(self):
     '''This method creates the tar.bz2 archive of user sw directory.
     Such a method is called one time per master job'''
     
     logger.debug('SBApp master_configure called.')
     
     self.now = datetime.datetime.now().strftime("%Y%m%d")
     self.os_arch = os.environ['SBROOT'].split('/')[-1]
     self.user_id = utils.getOwner()
     
     j = self.getJobObject()
     
     # check the target SE status using gridmon DB (updated by nagios monitoring system)
     sql = 'SELECT se_host, nagios_test_service FROM se WHERE name_grid = %s'
     local_SE = db.gridmon(sql, (getConfig('SuperB')['submission_site'], ))
     if local_SE[0]['nagios_test_service'] == 'CRITICAL':
         raise GangaException('Local storage element %s is down.' % local_SE[0]['se_host'])
     #   logger.error('Local storage element %s seems died for gridmon.' % local_SE[0]['se_host'])
     #else:
     #    logger.error('Local storage element %s is back alive for gridmon. !! uncomment exception !!' % local_SE[0]['se_host'])
     
     # create the software directory
     if self.software_dir != '':
         if not os.path.isdir(self.software_dir):
             raise ApplicationConfigurationError(None, 'software_dir must be a directory.')
         
         # make the tar file and update sw_archive parameter
         self.software_dir = os.path.normpath(self.software_dir)
         (head, tail) = os.path.split(self.software_dir)
         self.filename = tail
         self.sw_archive = os.path.join(j.inputdir, tail + '.tar.bz2')
         
         logger.info('Creating archive: %s ...', self.sw_archive)
         logger.info('From: %s', head)
         logger.info('Of: %s', tail)
         
         #savedir = os.getcwd()
         #os.chdir(self.software_dir)
         
         #retcode = subprocess.call("tar -cjf %s * 2>/dev/null" % self.sw_archive, shell=True)
         retcode = subprocess.call("tar -cjf %s -C %s %s 2>/dev/null" % (self.sw_archive, head, tail), shell=True)
         if retcode < 0:
             raise ApplicationConfigurationError(None, 'Error %d while creating archive.' % retcode)
         
         #os.chdir(savedir)
     else:
         raise ApplicationConfigurationError(None, 'software_dir cannot be empty.')
     
     if self.executable == '':
         raise ApplicationConfigurationError(None, 'executable cannot be empty.')
     
     # checking that j.inputdata is a valid object
     if not isinstance(j.inputdata, (SBInputDataset.SBInputPersonalProduction,
                                     SBInputDataset.SBInputProductionAnalysis,
                                     SBInputDataset.SBInputPureAnalysis)):
         msg = 'j.inputdata %s is not allowed' % str(type(j.inputdata))
         raise ApplicationConfigurationError(None, msg)
     
     # checking that j.inputdata (the input dataset) is a valid dataset
     j.inputdata.check()
     
     # checking that j.outputdata (the output dataset) is valid
     if isinstance(j.outputdata, SBOutputDataset.SBOutputDataset):
         j.outputdata.check()
     
     # creating temp dataset
     self.temp_dataset = str(objectid.ObjectId())
     free_string = '%s_%s_%s' % (j.id, j.name, self.filename)
     
     sql = '''INSERT INTO analysis_dataset
         (owner, dataset_id, session, parameters, status)
         VALUES (%s, decode(%s, 'hex'), %s, %s, 'temp');
         
         INSERT INTO analysis_dataset_site
         (dataset_id, site)
         VALUES (decode(%s, 'hex'), %s);'''
     params = (utils.getOwner(), 
         self.temp_dataset, 
         'analysis', 
         {'free_string': free_string},
         self.temp_dataset,
         getConfig('SuperB')['submission_site'])
     db.write(sql, params)
     
     # merger
     j.merger = TextMerger()
     j.merger.files.extend(['severus.log', 'output_files.txt'])
     j.merger.ignorefailed = True
     j.merger.compress = True
     
     j.splitter = SBSubmission.SBSubmission()
     
     return (0, None)
Example #13
0
    def check(self):
        '''This method validates output files informations at submission phase'''
        if len(self.pairs) == 0:
            raise ApplicationConfigurationError(
                None, 'output dataset pairs cannot be empty')

        for key, value in self.pairs.items():
            kwargs = dict()
            kwargs['dataset_id'] = value
            kwargs['owner'] = utils.getOwner()

            manager = SBDatasetManager.SBDatasetManager()
            datasets = manager.getDataset(**kwargs)

            # only one dataset
            if len(datasets) == 0:
                msg = 'Output dataset %s not found' % value
                raise ApplicationConfigurationError(None, msg)
            assert len(datasets) == 1, 'Dataset consistency error'
            dataset = datasets[0]

            # owner
            if dataset['owner'] != utils.getOwner():
                msg = 'You are not the owner of the output dataset %s' % value
                raise ApplicationConfigurationError(None, msg)

            # status
            if dataset['status'] not in ['open', 'prepared']:
                msg = 'Output dataset %s status is not open or prepared' % value
                raise ApplicationConfigurationError(None, msg)

            # site
            sql = 'SELECT site FROM analysis_dataset_site WHERE dataset_id = %s'
            site = db.read(sql, (r'\x' + value, ))
            if site[0]['site'] != getConfig('SuperB')['submission_site']:
                msg = 'Output site mismatching: the submission site for dataset %s has to be %s' % (
                    value, dataset['site'])
                raise ApplicationConfigurationError(None, msg)

            # session
            j = self.getJobObject()
            if isinstance(j.inputdata,
                          SBInputDataset.SBInputPersonalProduction):
                if j.inputdata.session == 'FullSim' and dataset[
                        'session'] != 'fullsim':
                    msg = 'Output dataset type should be \'fullsim\''
                    raise ApplicationConfigurationError(None, msg)

                if j.inputdata.session == 'FastSim' and dataset[
                        'session'] != 'fastsim':
                    msg = 'Output dataset type should be \'fastsim\''
                    raise ApplicationConfigurationError(None, msg)
            else:
                if dataset['session'] != 'analysis':
                    msg = 'Output dataset type should be \'analysis\''
                    raise ApplicationConfigurationError(None, msg)

                # parent: exists only for analysis session
                if j.inputdata.dataset_id is None:
                    msg = 'Input dataset is not defined'
                    raise ApplicationConfigurationError(None, msg)
                else:
                    parent_dataset = j.inputdata.dataset_id
                if 'parent' not in dataset['parameters']:
                    sql = 'UPDATE analysis_dataset SET parameters = parameters || %s WHERE dataset_id = %s'
                    db.write(sql, ({'parent': parent_dataset}, r'\x' + value))
                elif dataset['parameters']['parent'] != parent_dataset:
                    msg = 'Input dataset must be %s' % dataset['parameters'][
                        'parent']
                    raise ApplicationConfigurationError(None, msg)
Example #14
0
    def master_configure(self):
        '''This method creates the tar.bz2 archive of user sw directory.
        Such a method is called one time per master job'''

        logger.debug('SBApp master_configure called.')

        self.now = datetime.datetime.now().strftime("%Y%m%d")
        self.os_arch = os.environ['SBROOT'].split('/')[-1]
        self.user_id = utils.getOwner()

        j = self.getJobObject()

        # check the target SE status using gridmon DB (updated by nagios monitoring system)
        sql = 'SELECT se_host, nagios_test_service FROM se WHERE name_grid = %s'
        local_SE = db.gridmon(sql, (getConfig('SuperB')['submission_site'], ))
        if local_SE[0]['nagios_test_service'] == 'CRITICAL':
            raise GangaException('Local storage element %s is down.' %
                                 local_SE[0]['se_host'])
        #   logger.error('Local storage element %s seems died for gridmon.' % local_SE[0]['se_host'])
        #else:
        #    logger.error('Local storage element %s is back alive for gridmon. !! uncomment exception !!' % local_SE[0]['se_host'])

        # create the software directory
        if self.software_dir != '':
            if not os.path.isdir(self.software_dir):
                raise ApplicationConfigurationError(
                    'software_dir must be a directory.')

            # make the tar file and update sw_archive parameter
            self.software_dir = os.path.normpath(self.software_dir)
            (head, tail) = os.path.split(self.software_dir)
            self.filename = tail
            self.sw_archive = os.path.join(j.inputdir, tail + '.tar.bz2')

            logger.info('Creating archive: %s ...', self.sw_archive)
            logger.info('From: %s', head)
            logger.info('Of: %s', tail)

            #savedir = os.getcwd()
            #os.chdir(self.software_dir)

            #retcode = subprocess.call("tar -cjf %s * 2>/dev/null" % self.sw_archive, shell=True)
            retcode = subprocess.call("tar -cjf %s -C %s %s 2>/dev/null" %
                                      (self.sw_archive, head, tail),
                                      shell=True)
            if retcode < 0:
                raise ApplicationConfigurationError(
                    'Error %d while creating archive.' % retcode)

            #os.chdir(savedir)
        else:
            raise ApplicationConfigurationError(
                'software_dir cannot be empty.')

        if self.executable == '':
            raise ApplicationConfigurationError('executable cannot be empty.')

        # checking that j.inputdata is a valid object
        if not isinstance(j.inputdata,
                          (SBInputDataset.SBInputPersonalProduction,
                           SBInputDataset.SBInputProductionAnalysis,
                           SBInputDataset.SBInputPureAnalysis)):
            msg = 'j.inputdata %s is not allowed' % str(type(j.inputdata))
            raise ApplicationConfigurationError(msg)

        # checking that j.inputdata (the input dataset) is a valid dataset
        j.inputdata.check()

        # checking that j.outputdata (the output dataset) is valid
        if isinstance(j.outputdata, SBOutputDataset.SBOutputDataset):
            j.outputdata.check()

        # creating temp dataset
        self.temp_dataset = str(objectid.ObjectId())
        free_string = '%s_%s_%s' % (j.id, j.name, self.filename)

        sql = '''INSERT INTO analysis_dataset
            (owner, dataset_id, session, parameters, status)
            VALUES (%s, decode(%s, 'hex'), %s, %s, 'temp');
            
            INSERT INTO analysis_dataset_site
            (dataset_id, site)
            VALUES (decode(%s, 'hex'), %s);'''
        params = (utils.getOwner(), self.temp_dataset, 'analysis', {
            'free_string': free_string
        }, self.temp_dataset, getConfig('SuperB')['submission_site'])
        db.write(sql, params)

        # merger
        j.merger = TextMerger()
        j.merger.files.extend(['severus.log', 'output_files.txt'])
        j.merger.ignorefailed = True
        j.merger.compress = True

        j.splitter = SBSubmission.SBSubmission()

        return (0, None)
Example #15
0
 def whoami(self):
     '''Print the User id string'''
     print(utils.getOwner())
Example #16
0
    def createDataset(self):
        '''Interactive method to guide the user in dataset creation procedure.
        If the dataset is a 'personal production' type, force user to provide 
        a filter key.'''
        def asksParameter(parameter):
            '''Interactive method requesting user the value of each parameter 
            per session (FastSim, FullSim, Analysis)'''
            if parameter['customValue'] and len(parameter['values']) == 0:
                value = raw_input('\nEnter %s: ' % parameter['label'])
            elif not parameter['customValue'] and len(
                    parameter['values']) == 0:
                raise GangaException(
                    'Invalid rule (customValue:False and values=0).')
            else:
                table = list()

                i = 0
                for value in parameter['values']:
                    table.append({'id': i, 'value': value})
                    i += 1

                if parameter['customValue']:
                    table.append({'id': i, 'value': 'Enter a custom value'})

                print('\nChoose %s:' % parameter['label'])
                column_names = ('id', 'value')
                print(utils.format_dict_table(table, column_names))
                index = utils.getIndex(maxExclusive=len(table))

                if parameter['customValue'] and index == len(table) - 1:
                    value = raw_input('Custom value: ')
                else:
                    value = table[index]['value']

            # parameter insertion in dictionary. It will be subsequently
            #inserted into dataset analysis bookkeeping table, hstore field
            new_dataset['parameters'][parameter['name']] = value

            return value

        type = [
            dict(id=0, dataset_type='FastSim Personal Production'),
            dict(id=1, dataset_type='FullSim Personal Production'),
            dict(id=2, dataset_type='Analysis'),
        ]

        column_names = ('id', 'dataset_type')
        print(utils.format_dict_table(type, column_names))
        index = utils.getIndex(maxExclusive=len(type))

        new_dataset = dict()
        new_dataset['parameters'] = dict()

        ####################
        # FAST Simulation session
        ####################
        # parameter check: mandatory, free string param management
        # TODO: parameter type check, evaluate the config file option to store parameters

        if index == 0:
            new_dataset['session'] = 'fastsim'

            parameters = [{
                "name": "evt_file",
                "label": "Events per file",
                "customValue": True,
                "values": []
            }, {
                "name":
                "analysis",
                "label":
                "Analysis",
                "customValue":
                True,
                "values": [
                    "BtoKNuNu", "BtoKstarNuNu", "DstD0ToXLL", "DstD0ToXLL",
                    "Generics", "HadRecoilCocktail", "KplusNuNu",
                    "SLRecoilCocktail", "tau->3mu"
                ]
            }, {
                "name": "dg",
                "label": "Geometry",
                "customValue": True,
                "values": ["DG_4", "DG_4a", "DG_BaBar"]
            }, {
                "name":
                "generator",
                "label":
                "Generator",
                "customValue":
                True,
                "values": [
                    "B0B0bar_Btag-HD_Cocktail",
                    "B0B0bar_Btag-SL_e_mu_tau_Bsig-HD_SL_Cocktail",
                    "B0B0bar_generic", "B0B0bar_K0nunu",
                    "B0B0bar_K0nunu_SL_e_mu_tau", "B0B0bar_Kstar0nunu_Kpi",
                    "B0B0bar_Kstar0nunu_Kpi_SL_e_mu_tau",
                    "B+B-_Btag-HD_Cocktail",
                    "B+B-_Btag-SL_e_mu_tau_Bsig-HD_SL_Cocktail",
                    "B+B-_generic", "B+B-_K+nunu", "B+B-_K+nunu_SL_e_mu_tau",
                    "B+B-_Kstar+nunu", "B+B-_Kstar+nunu_SL_e_mu_tau",
                    "B+B-_taunu_SL_e_mu_tau", "bhabha_bhwide", "ccbar",
                    "tau+tau-_kk2f", "uds", "udsc", "Upsilon4S_generic"
                ]
            }, {
                "name": "bkg_mixing",
                "label": "Background Mixing Type",
                "customValue": True,
                "values": ["All", "NoPair", "NoMixing"]
            }, {
                "name":
                "analysis_type",
                "label":
                "Analysis Type",
                "customValue":
                True,
                "values":
                ["BtoKNuNu", "BtoKstarNuNu", "HadRecoil", "SemiLepKplusNuNu"]
            }]

            for parameter in parameters:
                asksParameter(parameter)

        ####################
        # FULL Simulation session
        ####################
        elif index == 1:
            new_dataset['session'] = 'fullsim'

            parameters = [{
                "name": "evt_file",
                "label": "Events per file",
                "customValue": True,
                "values": []
            }, {
                "name": "sim_type",
                "label": "Simulation Type",
                "customValue": False,
                "values": ["fullsim", "background_frame"]
            }, {
                "name": "generator",
                "label": "Generator",
                "customValue": False,
                "values": ["RadBhaBha", "singleparticle"]
            }, {
                "name":
                "dg",
                "label":
                "Geometry",
                "customValue":
                True,
                "values": [
                    "Geometry_CIPE", "Geometry_CIPE_BGO", "Geometry_CIPE_CSI",
                    "Geometry_CIPE_V00-00-02"
                ]
            }, {
                "name": "pl",
                "label": "Physics list",
                "customValue": True,
                "values": ["QGSP", "QGSP_BERT", "QGSP_BERT_HP"]
            }, {
                "name": "g4ver",
                "label": "Geant 4 version",
                "customValue": True,
                "values": ["9.2", "9.3"]
            }, {
                "name": "opt_photons",
                "label": "Optical Photons",
                "customValue": False,
                "values": ["OFF", "ON"]
            }]
            radbhabha = [{
                "name": "brunobbbminde",
                "label": "Min. Delta E",
                "customValue": True,
                "values": []
            }]
            singleParticle = [{
                "name": "brunopdg",
                "label": "PDG Code",
                "customValue": True,
                "values": []
            }, {
                "name": "brunothetamin",
                "label": "Theta min.",
                "customValue": True,
                "values": []
            }, {
                "name": "brunothetamax",
                "label": "Theta max.",
                "customValue": True,
                "values": []
            }, {
                "name": "brunophimin",
                "label": "Phi min.",
                "customValue": True,
                "values": []
            }, {
                "name": "brunophimax",
                "label": "Phi max.",
                "customValue": True,
                "values": []
            }, {
                "name": "brunoemin",
                "label": "Energy (GeV) min.",
                "customValue": True,
                "values": []
            }, {
                "name": "brunoemax",
                "label": "Energy (GeV) max.",
                "customValue": True,
                "values": []
            }]

            for parameter in parameters:
                value = asksParameter(parameter)

                # parameter dependencies management
                if parameter['name'] == 'generator':
                    if value == 'singleparticle':
                        parameters.extend(singleParticle)
                    elif value == 'RadBhaBha':
                        parameters.extend(radbhabha)

        ####################
        # ANALYSIS session
        ####################
        elif index == 2:
            new_dataset['session'] = 'analysis'
        else:
            raise GangaException('Invalid selection.')

        while True:
            free_string = raw_input('\nEnter free string: ')
            max_length = 128

            if len(free_string) <= max_length:
                new_dataset['parameters']['free_string'] = free_string
                break
            else:
                print('Free string must be <= %d char long.' % max_length)

        # dataset-site relation set
        new_dataset['site'] = getConfig('SuperB')['submission_site']
        new_dataset['owner'] = utils.getOwner()
        new_dataset['dataset_id'] = str(objectid.ObjectId())

        print('\nNew dataset details:')
        self.printDatasetDetail(new_dataset)

        value = ''
        while True:
            value = raw_input(
                'Type \'yes\' to confirm the dataset creation or (q)uit: ')
            if value == 'yes':
                break
            elif value == 'q':
                raise utils.QuitException()

        sql = '''INSERT INTO analysis_dataset
            (owner, dataset_id, session, parameters, status)
            VALUES (%s, decode(%s, 'hex'), %s, %s, 'prepared');
            
            INSERT INTO analysis_dataset_site
            (dataset_id, site)
            VALUES (decode(%s, 'hex'), %s);'''
        params = (new_dataset['owner'], new_dataset['dataset_id'],
                  new_dataset['session'], new_dataset['parameters'],
                  new_dataset['dataset_id'], new_dataset['site'])
        db.write(sql, params)
Example #17
0
 def whoami(self):
     '''Print the User id string'''
     print(utils.getOwner())
Example #18
0
 def check(self):
     '''This method validates output files informations at submission phase'''
     if len(self.pairs) == 0:
         raise ApplicationConfigurationError(None, 'output dataset pairs cannot be empty')
     
     for key, value in self.pairs.items():
         kwargs = dict()
         kwargs['dataset_id'] = value
         kwargs['owner'] = utils.getOwner()
         
         manager = SBDatasetManager.SBDatasetManager()
         datasets = manager.getDataset(**kwargs)
         
         # only one dataset
         if len(datasets) == 0:
             msg = 'Output dataset %s not found' % value
             raise ApplicationConfigurationError(None, msg)
         assert len(datasets) == 1, 'Dataset consistency error'
         dataset = datasets[0]
         
         # owner
         if dataset['owner'] != utils.getOwner():
             msg = 'You are not the owner of the output dataset %s' % value
             raise ApplicationConfigurationError(None, msg)
         
         # status
         if dataset['status'] not in ['open', 'prepared']:
             msg = 'Output dataset %s status is not open or prepared' % value
             raise ApplicationConfigurationError(None, msg)
         
         # site
         sql = 'SELECT site FROM analysis_dataset_site WHERE dataset_id = %s'
         site = db.read(sql, (r'\x' + value, ))
         if site[0]['site'] != getConfig('SuperB')['submission_site']:
             msg = 'Output site mismatching: the submission site for dataset %s has to be %s' % (value, dataset['site'])
             raise ApplicationConfigurationError(None, msg)
         
         # session
         j = self.getJobObject()
         if isinstance(j.inputdata, SBInputDataset.SBInputPersonalProduction):
             if j.inputdata.session == 'FullSim' and dataset['session'] != 'fullsim':
                 msg = 'Output dataset type should be \'fullsim\''
                 raise ApplicationConfigurationError(None, msg)
             
             if j.inputdata.session == 'FastSim' and dataset['session'] != 'fastsim':
                 msg = 'Output dataset type should be \'fastsim\''
                 raise ApplicationConfigurationError(None, msg)
         else:
             if dataset['session'] != 'analysis':
                 msg = 'Output dataset type should be \'analysis\''
                 raise ApplicationConfigurationError(None, msg)
             
             # parent: exists only for analysis session
             if j.inputdata.dataset_id is None:
                 msg = 'Input dataset is not defined'
                 raise ApplicationConfigurationError(None, msg)
             else:
                 parent_dataset = j.inputdata.dataset_id
             if 'parent' not in dataset['parameters']:
                 sql = 'UPDATE analysis_dataset SET parameters = parameters || %s WHERE dataset_id = %s'
                 db.write(sql, ({'parent': parent_dataset}, r'\x' + value))
             elif dataset['parameters']['parent'] != parent_dataset:
                 msg = 'Input dataset must be %s' % dataset['parameters']['parent']
                 raise ApplicationConfigurationError(None, msg)