Ejemplo n.º 1
0
def main():
    """
    Main function: start Flask web server
    """
    parser = argparse.ArgumentParser(description='Mongo DB Backup Script')
    parser.add_argument('--mode',
                        help='Use production (prod) or development (dev) section of config',
                        choices=['prod', 'dev'],
                        required=True)
    parser.add_argument('--config',
                        default='config.cfg',
                        help='Specify non standard config file name')
    parser.add_argument('--output',
                        help='Specify directory where output should be put',
                        required=True)
    parser.add_argument('--db',
                        help='Database name',
                        required=True)
    parser.add_argument('--collections',
                        help='Comma separated list of collections to back-up',
                        required=True)
    args = vars(parser.parse_args())
    config = Config.load('../' + args.get('config'), args.get('mode'))
    database_auth = config['database_auth']
    output_dir = args['output']
    db_name = args['db']
    collections = args['collections'].split(',')
    dump_documents(database_auth, output_dir, db_name, collections)
Ejemplo n.º 2
0
    def force_stats_to_refresh(self, workflows):
        """
        Force Stats2 to update workflows with given workflow names
        """
        if not workflows:
            return

        credentials_file = Config.get('credentials_file')
        with self.locker.get_lock('refresh-stats'):
            workflow_update_commands = [
                'cd /home/pdmvserv/private', 'source setup_credentials.sh',
                'cd /home/pdmvserv/Stats2'
            ]
            for workflow_name in workflows:
                workflow_update_commands.append(
                    f'python3 stats_update.py --action update --name {workflow_name}'
                )

            self.logger.info('Will make Stats2 refresh these workflows: %s',
                             ', '.join(workflows))
            with SSHExecutor('vocms074.cern.ch',
                             credentials_file) as ssh_executor:
                ssh_executor.execute_command(workflow_update_commands)

            self.logger.info('Finished making Stats2 refresh workflows')
Ejemplo n.º 3
0
    def get_datasets(self, query, exclude_list=None):
        """
        Query DBS for list of datasets
        """
        if not query:
            return []

        grid_cert = Config.get('grid_user_cert')
        grid_key = Config.get('grid_user_key')
        with self.locker.get_lock('get-ticket-datasets'):
            start_time = time.time()
            connection_wrapper = ConnectionWrapper(host='cmsweb-prod.cern.ch',
                                                   port=8443,
                                                   max_attempts=1,
                                                   cert_file=grid_cert,
                                                   key_file=grid_key)
            response = connection_wrapper.api(
                'POST', '/dbs/prod/global/DBSReader/datasetlist', {
                    'dataset': query,
                    'detail': 1
                })

        response = json.loads(response.decode('utf-8'))
        valid_types = ('VALID', 'PRODUCTION')
        datasets = [
            x['dataset'] for x in response
            if x['dataset_access_type'] in valid_types
        ]
        dataset_blacklist = set(Settings().get('dataset_blacklist'))
        datasets = [
            x for x in datasets if x.split('/')[1] not in dataset_blacklist
        ]
        if exclude_list:
            filtered_datasets = []
            for dataset in datasets:
                for exclude in exclude_list:
                    if exclude in dataset:
                        break
                else:
                    filtered_datasets.append(dataset)

            datasets = filtered_datasets

        end_time = time.time()
        self.logger.info('Got %s datasets from DBS for query %s in %.2fs',
                         len(datasets), query, end_time - start_time)
        return datasets
Ejemplo n.º 4
0
    def send(self, subject, body, recipients):
        body = body.strip() + '\n\nSincerely,\nRelVal Machine'
        if Config.get('development'):
            subject = f'[RelVal-DEV] {subject}'
        else:
            subject = f'[RelVal] {subject}'

        super().send(subject, body, recipients)
Ejemplo n.º 5
0
    def submit_relval(self, relval, controller):
        """
        Method that is used by submission workers. This is where the actual submission happens
        """
        prepid = relval.get_prepid()
        credentials_file = Config.get('credentials_file')
        workspace_dir = Config.get('remote_path').rstrip('/')
        prepid = relval.get_prepid()
        self.logger.debug('Will try to acquire lock for %s', prepid)
        with Locker().get_lock(prepid):
            self.logger.info('Locked %s for submission', prepid)
            relval_db = Database('relvals')
            relval = controller.get(prepid)
            try:
                self.check_for_submission(relval)
                with SSHExecutor('lxplus.cern.ch', credentials_file) as ssh:
                    # Start executing commands
                    self.prepare_workspace(relval, controller, ssh,
                                           workspace_dir)
                    # Create configs
                    self.generate_configs(relval, ssh, workspace_dir)
                    # Upload configs
                    config_hashes = self.upload_configs(
                        relval, ssh, workspace_dir)
                    # Remove remote relval directory
                    ssh.execute_command([f'rm -rf {workspace_dir}/{prepid}'])

                self.logger.debug(config_hashes)
                # Iterate through uploaded configs and save their hashes in RelVal steps
                self.update_steps_with_config_hashes(relval, config_hashes)
                # Submit job dict to ReqMgr2
                job_dict = controller.get_job_dict(relval)
                cmsweb_url = Config.get('cmsweb_url')
                grid_cert = Config.get('grid_user_cert')
                grid_key = Config.get('grid_user_key')
                connection = ConnectionWrapper(host=cmsweb_url,
                                               cert_file=grid_cert,
                                               key_file=grid_key)
                workflow_name = self.submit_job_dict(job_dict, connection)
                # Update RelVal after successful submission
                relval.set('workflows', [{'name': workflow_name}])
                relval.set('status', 'submitted')
                relval.add_history('submission', 'succeeded', 'automatic')
                relval_db.save(relval.get_json())
                time.sleep(3)
                self.approve_workflow(workflow_name, connection)
                connection.close()
                if not Config.get('development'):
                    refresh_workflows_in_stats([workflow_name])

            except Exception as ex:
                self.__handle_error(relval, str(ex))
                return

            self.__handle_success(relval)

        if not Config.get('development'):
            controller.update_workflows(relval)

        self.logger.info('Successfully finished %s submission', prepid)
Ejemplo n.º 6
0
    def create_relvals_for_ticket(self, ticket):
        """
        Create RelVals from given ticket. Return list of relval prepids
        """
        ticket_db = Database('tickets')
        ticket_prepid = ticket.get_prepid()
        ssh_executor = SSHExecutor('lxplus.cern.ch', Config.get('credentials_file'))
        relval_controller = RelValController()
        created_relvals = []
        with self.locker.get_lock(ticket_prepid):
            ticket = self.get(ticket_prepid)
            rewrite_gt_string = ticket.get('rewrite_gt_string')
            recycle_input_of = ticket.get('recycle_input_of')
            try:
                workflows = self.generate_workflows(ticket, ssh_executor)
                # Iterate through workflows and create RelVal objects
                relvals = []
                for workflow_id, workflow_dict in workflows.items():
                    relvals.append(self.create_relval_from_workflow(ticket,
                                                                    workflow_id,
                                                                    workflow_dict))

                # Handle recycling if needed
                if recycle_input_of:
                    if rewrite_gt_string:
                        self.recycle_input_with_gt_rewrite(relvals,
                                                           rewrite_gt_string,
                                                           recycle_input_of)
                    else:
                        self.recycle_input(relvals,
                                           relval_controller,
                                           recycle_input_of)

                for relval in relvals:
                    relval = relval_controller.create(relval.get_json())
                    created_relvals.append(relval)
                    self.logger.info('Created %s', relval.get_prepid())

                created_relval_prepids = [r.get('prepid') for r in created_relvals]
                ticket.set('created_relvals', created_relval_prepids)
                ticket.set('status', 'done')
                ticket.add_history('created_relvals', created_relval_prepids, None)
                ticket_db.save(ticket.get_json())
            except Exception as ex:
                self.logger.error('Error creating RelVal from ticket: %s', ex)
                # Delete created relvals if there was an Exception
                for created_relval in reversed(created_relvals):
                    relval_controller.delete({'prepid': created_relval.get('prepid')})

                # And reraise the exception
                raise ex
            finally:
                # Close all SSH connections
                ssh_executor.close_connections()

        return [r.get('prepid') for r in created_relvals]
Ejemplo n.º 7
0
def main():
    """
    Main function: start Flask web server
    """
    parser = argparse.ArgumentParser(description='ReReco Machine')
    parser.add_argument('--mode',
                        help='Use production (prod) or development (dev) section of config',
                        choices=['prod', 'dev'],
                        required=True)
    parser.add_argument('--config',
                        default='config.cfg',
                        help='Specify non standard config file name')
    parser.add_argument('--debug',
                        help='Run Flask in debug mode',
                        action='store_true')
    args = vars(parser.parse_args())
    config = Config.load(args.get('config'), args.get('mode'))
    database_auth = config.get('database_auth')

    Database.set_database_name('rereco')
    if database_auth:
        Database.set_credentials_file(database_auth)

    Database.add_search_rename('requests', 'runs', 'runs<int>')
    Database.add_search_rename('requests', 'run', 'runs<int>')
    Database.add_search_rename('requests', 'workflows', 'workflows.name')
    Database.add_search_rename('requests', 'workflow', 'workflows.name')
    Database.add_search_rename('requests', 'output_dataset', 'output_datasets')
    Database.add_search_rename('requests', 'input_dataset', 'input.dataset')
    Database.add_search_rename('requests', 'input_request', 'input.request')
    Database.add_search_rename('requests', 'created_on', 'history.0.time')
    Database.add_search_rename('requests', 'created_by', 'history.0.user')
    Database.add_search_rename('subcampaigns', 'created_on', 'history.0.time')
    Database.add_search_rename('subcampaigns', 'created_by', 'history.0.user')
    Database.add_search_rename('tickets', 'created_on', 'history.0.time')
    Database.add_search_rename('tickets', 'created_by', 'history.0.user')
    Database.add_search_rename('tickets', 'subcampaign', 'steps.subcampaign')
    Database.add_search_rename('tickets', 'processing_string', 'steps.processing_string')

    debug = args.get('debug', False)
    port = int(config.get('port', 8002))
    host = config.get('host', '0.0.0.0')
    logger = setup_logging(debug)
    if os.environ.get('WERKZEUG_RUN_MAIN') != 'true':
        # Do only once, before the reloader
        pid = os.getpid()
        logger.info('PID: %s', pid)
        with open('rereco.pid', 'w') as pid_file:
            pid_file.write(str(pid))

    logger.info('Starting... Debug: %s, Host: %s, Port: %s', debug, host, port)
    app.run(host=host,
            port=port,
            threaded=True,
            debug=debug)
Ejemplo n.º 8
0
    def __reject_workflows(self, workflows):
        """
        Reject or abort list of workflows in ReqMgr2
        """
        cmsweb_url = Config.get('cmsweb_url')
        grid_cert = Config.get('grid_user_cert')
        grid_key = Config.get('grid_user_key')
        connection = ConnectionWrapper(host=cmsweb_url,
                                       keep_open=True,
                                       cert_file=grid_cert,
                                       key_file=grid_key)
        headers = {
            'Content-type': 'application/json',
            'Accept': 'application/json'
        }
        for workflow in workflows:
            workflow_name = workflow['name']
            status_history = workflow.get('status_history')
            if not status_history:
                self.logger.error('%s has no status history', workflow_name)
                status_history = [{'status': '<unknown>'}]

            last_workflow_status = status_history[-1]['status']
            self.logger.info('%s last status is %s', workflow_name,
                             last_workflow_status)
            # Depending on current status of workflow,
            # it might need to be either aborted or rejected
            if last_workflow_status in ('assigned', 'staging', 'staged',
                                        'acquired', 'running-open',
                                        'running-closed'):
                new_status = 'aborted'
            else:
                new_status = 'rejected'

            self.logger.info('Will change %s status %s to %s', workflow_name,
                             last_workflow_status, new_status)
            reject_response = connection.api(
                'PUT', f'/reqmgr2/data/request/{workflow_name}',
                {'RequestStatus': new_status}, headers)
            self.logger.info(reject_response)

        connection.close()
Ejemplo n.º 9
0
    def change_request_priority(self, request, priority):
        """
        Change request priority
        """
        prepid = request.get_prepid()
        request_db = Database('requests')
        cmsweb_url = Config.get('cmsweb_url')
        grid_cert = Config.get('grid_user_cert')
        grid_key = Config.get('grid_user_key')
        self.logger.info('Will try to change %s priority to %s', prepid,
                         priority)
        with self.locker.get_nonblocking_lock(prepid):
            request_json = request_db.get(prepid)
            request = Request(json_input=request_json)
            if request.get('status') != 'submitted':
                raise Exception('It is not allowed to change priority of '
                                'requests that are not in status "submitted"')

            request.set('priority', priority)
            updated_workflows = []
            active_workflows = self.__pick_active_workflows(request)
            connection = ConnectionWrapper(host=cmsweb_url,
                                           keep_open=True,
                                           cert_file=grid_cert,
                                           key_file=grid_key)
            for workflow in active_workflows:
                workflow_name = workflow['name']
                self.logger.info('Changing "%s" priority to %s', workflow_name,
                                 priority)
                response = connection.api(
                    'PUT', f'/reqmgr2/data/request/{workflow_name}',
                    {'RequestPriority': priority})
                updated_workflows.append(workflow_name)
                self.logger.debug(response)

            connection.close()
            # Update priority in Stats2
            self.force_stats_to_refresh(updated_workflows)
            # Finally save the request
            request_db.save(request.get_json())

        return request
Ejemplo n.º 10
0
    def get_job_dict(self, request):
        """
        Return a dictionary for ReqMgr2
        """
        prepid = request.get_prepid()
        self.logger.debug('Getting job dict for %s', prepid)
        sequences = request.get('sequences')
        database_url = Config.get('cmsweb_url') + '/couchdb'
        request_string = request.get_request_string()
        campaign_name = request.get('subcampaign').split('-')[0]
        job_dict = {}
        job_dict['Campaign'] = campaign_name
        job_dict['CMSSWVersion'] = request.get('cmssw_release')
        job_dict['ConfigCacheUrl'] = database_url
        job_dict['CouchURL'] = database_url
        job_dict['EnableHarvesting'] = False
        job_dict['Group'] = 'PPD'
        job_dict['Memory'] = request.get('memory')
        job_dict['RequestType'] = 'ReReco'
        job_dict['PrepID'] = request.get_prepid()
        job_dict['Requestor'] = 'pdmvserv'
        job_dict['RequestPriority'] = request.get('priority')
        job_dict['RequestString'] = request_string
        job_dict['ScramArch'] = get_scram_arch(request.get('cmssw_release'))
        job_dict['SizePerEvent'] = request.get('size_per_event')
        job_dict['TimePerEvent'] = request.get('time_per_event')
        if len(sequences) <= 1:
            job_dict.update(self.get_job_dict_singletask(request, sequences))
        else:
            job_dict.update(self.get_job_dict_taskchain(request, sequences))

        if job_dict.get('EnableHarvesting'):
            if not Config.get('development'):
                # Do not upload to prod DQM GUI in dev
                job_dict['DQMUploadUrl'] = 'https://cmsweb.cern.ch/dqm/offline'
            else:
                # Upload to some dev DQM GUI
                job_dict[
                    'DQMUploadUrl'] = 'https://cmsweb-testbed.cern.ch/dqm/dev'

        return job_dict
Ejemplo n.º 11
0
    def __handle_success(self, request):
        """
        Handle notification of successful submission
        """
        prepid = request.get_prepid()
        last_workflow = request.get('workflows')[-1]['name']
        cmsweb_url = Config.get('cmsweb_url')
        self.logger.info('Submission of %s succeeded', prepid)
        service_url = Config.get('service_url')
        emailer = Emailer()
        subject = f'Request {prepid} submission succeeded'
        body = f'Hello,\n\nSubmission of {prepid} succeeded.\n'
        body += (f'You can find this request at '
                 f'{service_url}/requests?prepid={prepid}\n')
        body += f'Workflow in ReqMgr2 {cmsweb_url}/reqmgr2/fetch?rid={last_workflow}'
        if Config.get('development'):
            body += '\nNOTE: This was submitted from a development instance of ReReco machine '
            body += 'and this job will never start running in computing!\n'

        recipients = emailer.get_recipients(request)
        emailer.send(subject, body, recipients)
Ejemplo n.º 12
0
    def get_dataset_runs(self, dataset):
        """
        Fetch a list of runs from DBS for a given dataset
        """
        if not dataset:
            return []

        conn = ConnectionWrapper(host='cmsweb-prod.cern.ch',
                                 port=8443,
                                 cert_file=Config.get('grid_user_cert'),
                                 key_file=Config.get('grid_user_key'))
        with self.locker.get_lock('get-dataset-runs'):
            response = conn.api(
                'GET', f'/dbs/prod/global/DBSReader/runs?dataset={dataset}')

        response = json.loads(response.decode('utf-8'))
        if not response:
            return []

        runs = response[0].get('run_num', [])
        self.logger.debug('Fetched %s runs for %s from DBS', len(runs),
                          dataset)
        return runs
Ejemplo n.º 13
0
    def move_request_to_submitting(self, request):
        """
        Try to move request to submitting status and get sumbitted
        """
        self.update_input_dataset(request)
        input_dataset = request.get('input')['dataset']
        if not input_dataset.strip():
            prepid = request.get_prepid()
            raise Exception(f'Could not move {prepid} to submitting '
                            'because it does not have input dataset')

        # Make sure input dataset is VALID
        grid_cert = Config.get('grid_user_cert')
        grid_key = Config.get('grid_user_key')
        dbs_conn = ConnectionWrapper(host='cmsweb-prod.cern.ch',
                                     port=8443,
                                     cert_file=grid_cert,
                                     key_file=grid_key)
        dbs_response = dbs_conn.api('POST',
                                    '/dbs/prod/global/DBSReader/datasetlist', {
                                        'dataset': input_dataset,
                                        'detail': 1
                                    })
        dbs_response = json.loads(dbs_response.decode('utf-8'))
        dataset_access_type = dbs_response[0].get('dataset_access_type',
                                                  'unknown')
        self.logger.info('%s access type is %s', input_dataset,
                         dataset_access_type)
        if dataset_access_type != 'VALID':
            raise Exception(
                f'{input_dataset} type is {dataset_access_type}, it must be VALID'
            )

        RequestSubmitter().add(request, self)
        self.update_status(request, 'submitting')
        return request
Ejemplo n.º 14
0
def main():
    """
    Main function: start Flask web server
    """
    parser = argparse.ArgumentParser(description='RelVal Machine Script')
    parser.add_argument(
        '--mode',
        help='Use production (prod) or development (dev) section of config',
        choices=['prod', 'dev'],
        required=True)
    parser.add_argument('--config',
                        default='config.cfg',
                        help='Specify non standard config file name')
    args = vars(parser.parse_args())
    config = Config.load('../' + args.get('config'), args.get('mode'))
    database_auth = config['database_auth']
    port = int(config['port'])
    move_to_done(database_auth, port)
Ejemplo n.º 15
0
    def get_config_upload_file(self, request, for_submission=False):
        """
        Get bash script that would upload config files to ReqMgr2
        """
        self.logger.debug('Getting config upload script for %s',
                          request.get_prepid())
        database_url = Config.get('cmsweb_url').replace('https://',
                                                        '').replace(
                                                            'http://', '')
        command = '#!/bin/bash'
        # Check if all expected config files are present
        common_check_part = '\n\nif [ ! -s "%s.py" ]; then\n'
        common_check_part += '  echo "File %s.py is missing" >&2\n'
        common_check_part += '  exit 1\n'
        common_check_part += 'fi'
        for configs in request.get_config_file_names():
            # Run config uploader
            command += common_check_part % (configs['config'],
                                            configs['config'])
            if configs.get('harvest'):
                command += common_check_part % (configs['harvest'],
                                                configs['harvest'])

        # Set up CMSSW environment
        command += '\n\n'
        command += cmssw_setup(request.get('cmssw_release'),
                               reuse_cmssw=for_submission)
        # Use ConfigCacheLite and TweakMakerLite instead of WMCore
        command += '\n\n'
        command += config_cache_lite_setup(reuse_files=for_submission)
        # Upload command will be identical for all configs
        command += '\n'
        common_upload_part = (
            '\npython config_uploader.py --file $(pwd)/%s.py --label %s '
            f'--group ppd --user $(echo $USER) --db {database_url} || exit $?')
        for configs in request.get_config_file_names():
            # Run config uploader
            command += common_upload_part % (configs['config'],
                                             configs['config'])
            if configs.get('harvest'):
                command += common_upload_part % (configs['harvest'],
                                                 configs['harvest'])

        return command
Ejemplo n.º 16
0
 def __handle_error(self, request, error_message):
     """
     Handle error that occured during submission, modify request accordingly
     """
     request_db = Database('requests')
     request.set('status', 'new')
     request.add_history('submission', 'failed', 'automatic')
     request_db.save(request.get_json())
     service_url = Config.get('service_url')
     emailer = Emailer()
     prepid = request.get_prepid()
     self.logger.warning('Submission of %s failed', prepid)
     subject = f'Request {prepid} submission failed'
     body = f'Hello,\n\nUnfortunately submission of {prepid} failed.\n'
     body += (f'You can find this request at '
              f'{service_url}/requests?prepid={prepid}\n')
     body += f'Error message:\n\n{error_message}'
     recipients = emailer.get_recipients(request)
     emailer.send(subject, body, recipients)
Ejemplo n.º 17
0
    def __handle_error(self, relval, error_message):
        """
        Handle error that occured during submission, modify RelVal accordingly
        """
        self.logger.error(error_message)
        relval_db = Database('relvals')
        relval.set('status', 'new')
        relval.set('campaign_timestamp', 0)
        relval.add_history('submission', 'failed', 'automatic')
        for step in relval.get('steps'):
            step.set('config_id', '')
            step.set('resolved_globaltag', '')

        relval_db.save(relval.get_json())
        service_url = Config.get('service_url')
        emailer = Emailer()
        prepid = relval.get_prepid()
        subject = f'RelVal {prepid} submission failed'
        body = f'Hello,\n\nUnfortunately submission of {prepid} failed.\n'
        body += (f'You can find this relval at '
                 f'{service_url}/relvals?prepid={prepid}\n')
        body += f'Error message:\n\n{error_message}'
        recipients = emailer.get_recipients(relval)
        emailer.send(subject, body, recipients)
Ejemplo n.º 18
0
    def get_job_dict(self, relval):
        #pylint: disable=too-many-statements
        """
        Return a dictionary for ReqMgr2
        """
        prepid = relval.get_prepid()
        self.logger.debug('Getting job dict for %s', prepid)
        job_dict = {}
        job_dict['Group'] = 'PPD'
        job_dict['Requestor'] = 'pdmvserv'
        job_dict['CouchURL'] = Config.get('cmsweb_url') + '/couchdb'
        job_dict['ConfigCacheUrl'] = job_dict['CouchURL']
        job_dict['PrepID'] = prepid
        job_dict['SubRequestType'] = 'RelVal'
        job_dict['RequestString'] = relval.get_request_string()
        job_dict['Campaign'] = relval.get_campaign()
        job_dict['RequestPriority'] = 500000
        job_dict['TimePerEvent'] = relval.get('time_per_event')
        job_dict['SizePerEvent'] = relval.get('size_per_event')
        job_dict['ProcessingVersion'] = 1
        # Harvesting should run on single core with 3GB memory,
        # and each task will have it's own core and memory setting
        job_dict['Memory'] = 3000
        job_dict['Multicore'] = 1
        job_dict['EnableHarvesting'] = False
        # Set DbsUrl differently for dev and prod versions
        # "URL to the DBS instance where the input data is registered"
        if not Config.get('development'):
            job_dict[
                'DbsUrl'] = 'https://cmsweb-prod.cern.ch/dbs/prod/global/DBSReader'
        else:
            job_dict[
                'DbsUrl'] = 'https://cmsweb-testbed.cern.ch/dbs/int/global/DBSReader'

        task_number = 0
        input_step = None
        global_dict_step = None
        for step_index, step in enumerate(relval.get('steps')):
            # If it's input file, it's not a task
            if step.get_step_type() == 'input_file':
                input_step = step
                continue

            if not global_dict_step:
                global_dict_step = step

            # Handle harvesting step quickly
            if step.has_step('HARVESTING'):
                # It is harvesting step
                # It goes in the main job_dict
                job_dict['DQMConfigCacheID'] = step.get('config_id')
                job_dict['EnableHarvesting'] = True
                if not Config.get('development'):
                    # Do not upload to prod DQM GUI in dev
                    job_dict[
                        'DQMUploadUrl'] = 'https://cmsweb.cern.ch/dqm/relval'
                else:
                    # Upload to some dev DQM GUI
                    job_dict[
                        'DQMUploadUrl'] = 'https://cmsweb-testbed.cern.ch/dqm/dev'

                continue

            # Add task to main dict
            task_number += 1
            job_dict[f'Task{task_number}'] = self.get_task_dict(
                relval, step, step_index)

        # Set values to the main dictionary
        if global_dict_step:
            job_dict['CMSSWVersion'] = global_dict_step.get_release()
            job_dict['ScramArch'] = global_dict_step.get_scram_arch()
            job_dict['AcquisitionEra'] = job_dict['CMSSWVersion']
            resolved_globaltag = global_dict_step.get('resolved_globaltag')
            if resolved_globaltag:
                job_dict['GlobalTag'] = resolved_globaltag

            global_step_index = global_dict_step.get_index_in_parent()
            processing_string = relval.get_processing_string(global_step_index)
            if processing_string:
                job_dict['ProcessingString'] = processing_string

        if task_number > 0:
            # At least one task - TaskChain workflow
            job_dict['RequestType'] = 'TaskChain'
            job_dict['TaskChain'] = task_number

        elif global_dict_step:
            # Only harvesting step - DQMHarvest workflow
            job_dict['RequestType'] = 'DQMHarvest'
            if input_step:
                input_dict = input_step.get('input')
                job_dict['InputDataset'] = input_dict['dataset']
                if input_dict['lumisection']:
                    job_dict['LumiList'] = input_dict['lumisection']
                elif input_dict['run']:
                    job_dict['RunWhitelist'] = input_dict['run']

        job_dict_overwrite = relval.get('job_dict_overwrite')
        if job_dict_overwrite:
            self.logger.info('Overwriting job dict for %s with %s', prepid,
                             job_dict_overwrite)
            self.apply_job_dict_overwrite(job_dict, job_dict_overwrite)

        return job_dict
Ejemplo n.º 19
0
    def resolve_auto_conditions(self, conditions_tree):
        """
        Iterate through conditions tree and resolve global tags
        Conditions tree example:
        {
            "CMSSW_11_2_0_pre9": {
                "slc7_a_b_c": {
                    "auto:phase1_2021_realistic": None
                }
            }
        }
        """
        self.logger.debug('Resolve auto conditions of:\n%s',
                          json.dumps(conditions_tree, indent=2))
        credentials_file = Config.get('credentials_file')
        remote_directory = Config.get('remote_path').rstrip('/')
        command = [f'cd {remote_directory}']
        for cmssw_version, scram_tree in conditions_tree.items():
            for scram_arch, conditions in scram_tree.items():
                # Setup CMSSW environment
                # No need to explicitly reuse CMSSW as this happens in relval_submission directory
                os_name, _, gcc_version = clean_split(scram_arch, '_')
                amd_scram_arch = f'{os_name}_amd64_{gcc_version}'
                command.extend(
                    cmssw_setup(cmssw_version,
                                scram_arch=amd_scram_arch).split('\n'))
                conditions_str = ','.join(list(conditions.keys()))
                command += [(
                    'python3 resolve_auto_global_tag.py ' +
                    f'"{cmssw_version}" "{scram_arch}" "{conditions_str}" || exit $?'
                )]

        self.logger.debug('Resolve auto conditions command:\n%s',
                          '\n'.join(command))
        with SSHExecutor('lxplus.cern.ch', credentials_file) as ssh_executor:
            # Upload python script to resolve auto globaltag by upload script
            stdout, stderr, exit_code = ssh_executor.execute_command(
                f'mkdir -p {remote_directory}')
            if exit_code != 0:
                self.logger.error('Error creating %s:\nstdout:%s\nstderr:%s',
                                  remote_directory, stdout, stderr)
                raise Exception(f'Error creting remote directory: {stderr}')

            ssh_executor.upload_file(
                './core/utils/resolve_auto_global_tag.py',
                f'{remote_directory}/resolve_auto_global_tag.py')
            stdout, stderr, exit_code = ssh_executor.execute_command(command)
            if exit_code != 0:
                self.logger.error(
                    'Error resolving auto global tags:\nstdout:%s\nstderr:%s',
                    stdout, stderr)
                raise Exception(f'Error resolving auto globaltags: {stderr}')

        tags = [
            x for x in clean_split(stdout, '\n') if x.startswith('GlobalTag:')
        ]
        for resolved_tag in tags:
            split_resolved_tag = clean_split(resolved_tag, ' ')
            cmssw_version = split_resolved_tag[1]
            scram_arch = split_resolved_tag[2]
            conditions = split_resolved_tag[3]
            resolved = split_resolved_tag[4]
            self.logger.debug('Resolved %s to %s in %s (%s)', conditions,
                              resolved, cmssw_version, scram_arch)
            conditions_tree[cmssw_version][scram_arch][conditions] = resolved
Ejemplo n.º 20
0
    def generate_workflows(self, ticket, ssh_executor):
        """
        Remotely run workflow info extraction from CMSSW and return all workflows
        """
        ticket_prepid = ticket.get_prepid()
        remote_directory = Config.get('remote_path').rstrip('/')
        if ticket.get('recycle_gs') and not ticket.get('recycle_input_of'):
            recycle_gs_flag = '-r '
        else:
            recycle_gs_flag = ''

        cmssw_release = ticket.get('cmssw_release')
        scram_arch = ticket.get('scram_arch')
        scram_arch = scram_arch if scram_arch else get_scram_arch(cmssw_release)
        if not scram_arch:
            raise Exception(f'Could not find SCRAM arch of {cmssw_release}')

        matrix = ticket.get('matrix')
        additional_command = ticket.get('command').strip()
        command_steps = ticket.get('command_steps')
        if additional_command:
            additional_command = additional_command.replace('"', '\\"')
            additional_command = f'-c="{additional_command}"'
            if command_steps:
                command_steps = ','.join(command_steps)
                additional_command += f' -cs={command_steps}'
        else:
            additional_command = ''

        workflow_ids = ','.join([str(x) for x in ticket.get('workflow_ids')])
        self.logger.info('Creating RelVals %s for %s', workflow_ids, ticket_prepid)
        # Prepare remote directory with run_the_matrix_pdmv.py
        command = [f'mkdir -p {remote_directory}']
        _, err, code = ssh_executor.execute_command(command)
        if code != 0:
            raise Exception(f'Error code {code} preparing workspace: {err}')

        ssh_executor.upload_file('core/utils/run_the_matrix_pdmv.py',
                                 f'{remote_directory}/run_the_matrix_pdmv.py')
        # Defined a name for output file
        file_name = f'{ticket_prepid}_{int(time.time())}.json'
        # Execute run_the_matrix_pdmv.py
        command = [f'cd {remote_directory}']
        command.extend(cmssw_setup(cmssw_release, scram_arch=scram_arch).split('\n'))
        command += ['python3 run_the_matrix_pdmv.py '
                    f'-l={workflow_ids} '
                    f'-w={matrix} '
                    f'-o={file_name} '
                    f'{additional_command} '
                    f'{recycle_gs_flag}']
        _, err, code = ssh_executor.execute_command(command)
        if code != 0:
            raise Exception(f'Error code {code} creating RelVals: {err}')

        # Download generated json
        ssh_executor.download_file(f'{remote_directory}/{file_name}',
                                   f'/tmp/{file_name}')

        # Cleanup remote directory by removing all ticket jsons
        ssh_executor.execute_command(f'rm -rf {remote_directory}/{ticket_prepid}_*.json')
        with open(f'/tmp/{file_name}', 'r') as workflows_file:
            workflows = json.load(workflows_file)

        os.remove(f'/tmp/{file_name}')
        return workflows
Ejemplo n.º 21
0
    def get_config_upload_file(self, relval, for_submission=False):
        """
        Get bash script that would upload config files to ReqMgr2
        """
        self.logger.debug('Getting config upload script for %s',
                          relval.get_prepid())
        database_url = Config.get('cmsweb_url').replace('https://',
                                                        '').replace(
                                                            'http://', '')
        command = '#!/bin/bash\n\n'
        command += 'export SINGULARITY_CACHEDIR="/tmp/$(whoami)/singularity"\n'
        command += '\n'
        # Check if all expected config files are present
        common_check_part = 'if [ ! -s "%s.py" ]; then\n'
        common_check_part += '  echo "File %s.py is missing" >&2\n'
        common_check_part += '  exit 1\n'
        common_check_part += 'fi\n'
        for step in relval.get('steps'):
            # Run config check
            config_name = step.get_config_file_name()
            if config_name:
                command += common_check_part % (config_name, config_name)

        # Use ConfigCacheLite and TweakMakerLite instead of WMCore
        command += '\n'
        command += config_cache_lite_setup()
        command += '\n\n'

        # Upload command will be identical for all configs
        common_upload_part = (
            '\npython3 config_uploader.py --file $(pwd)/%s.py --label %s '
            f'--group ppd --user $(echo $USER) --db {database_url} || exit $?')
        previous_step_cmssw = None
        previous_step_scram = None
        container_code = ''
        container_steps = []
        default_os = 'slc7_'
        for index, step in enumerate(relval.get('steps')):
            # Run config uploader
            config_name = step.get_config_file_name()
            if not config_name:
                continue

            step_cmssw = step.get_release()
            real_scram_arch = step.get_scram_arch()
            os_name, _, gcc_version = clean_split(real_scram_arch, '_')
            scram_arch = f'{os_name}_amd64_{gcc_version}'

            if step_cmssw != previous_step_cmssw or scram_arch != previous_step_scram:
                if container_code:
                    if not previous_step_scram.startswith(default_os):
                        container_script_name = f'upload-steps-{"-".join(container_steps)}'
                        container_code = run_commands_in_singularity(
                            container_code, previous_step_scram,
                            container_script_name)
                        container_code = '\n'.join(container_code)

                    command += container_code.strip()
                    command += '\n\n\n'
                    container_code = ''
                    container_steps = []

                if real_scram_arch != scram_arch:
                    container_code += f'# Real scram arch is {real_scram_arch}\n'

                container_code += cmssw_setup(step_cmssw,
                                              scram_arch=scram_arch)
                container_code += '\n'

            container_code += common_upload_part % (config_name, config_name)
            container_code += '\n'
            container_steps.append(str(index + 1))
            previous_step_cmssw = step_cmssw
            previous_step_scram = scram_arch

        if not scram_arch.startswith(default_os):
            container_script_name = f'upload-steps-{"-".join(container_steps)}'
            container_code = run_commands_in_singularity(
                container_code, scram_arch, container_script_name)
            container_code = '\n'.join(container_code)

        command += container_code
        return command.strip()