def _get_site_id(name, token): site_client = SiteClient() site_client.set_token(token) sitelist = site_client.get_sites() site_id = [ elem['site_id'] for elem in sitelist if elem['site_name'] == name ] if site_id: return site_client, site_id[0] return site_client, None
def _get_site_name(site_id, token): """ Get site name from site id. Requires a token. If site name cannot be resolved an empty string is returned. :param site_id: site id :param token: user token :return: site name """ site_name = 'Unknown' if token and site_id is not None: _site_client = SiteClient() _site_client.set_token(token) site_name = _site_client.get_site(site_id)['site_name'] return site_name
def startup_web(config): """Configure the web service.""" current_app.log.info("Web interface starting") current_app.hrclient = HRClient() current_app.hrutils = HRUtils() current_app.siteclient = SiteClient() current_app.site_map = {}
def __init__(self, user_token): """ Constructor initialises all service clients involved in the transfer management: EndpointService, CredService and finally the WorkqueueService. :param user_token: user token """ self.__user_token = user_token # endpoint self.__site_client = SiteClient() self.__site_client.set_token(user_token) self.__sitelist = self.__site_client.get_sites() # get user id self.__user_id = HRService.get_token_userid(user_token) # work queue client self.__wq_client = WorkqueueClient() self.__wq_client.set_token(user_token)
def add_site(self, args): """ Add a site to the database. :param args: parser arguments :return: None """ token = UserCommand._get_token(args.token) if token: site_info = { key: value for (key, value) in vars(args).iteritems() if value is not None and key not in ('func', 'token', 'config', 'verbosity', 'service_ca_cert', 'user_ca_cert') } if args.user_ca_cert: user_cert = UserCommand._get_cert(args.user_ca_cert) if user_cert: site_info['user_ca_cert'] = user_cert else: return None if args.service_ca_cert: service_cert = UserCommand._get_cert(args.service_ca_cert) if service_cert: site_info['service_ca_cert'] = service_cert else: return None print site_info site_client = SiteClient() site_client.set_token(token) site_client.add_site(site_info) return None
def __init__(self, debug=False, n_shot=None, loglevel=logging.INFO): """Initialisation.""" RESTClient.__init__(self, 'workqueue') uid = uuid.uuid4() Daemon.__init__(self, pidfile='/tmp/worker-%s.pid' % uid, logfile='/tmp/worker-%s.log' % uid, loglevel=loglevel, target=self.run, debug=debug) conf = getConfig('worker') self._types = [JobType[type_.upper()] for type_ in # pylint: disable=unsubscriptable-object conf.pop('types', ('LIST', 'COPY', 'REMOVE', 'MKDIR', 'RENAME'))] self._alg = conf.pop('algorithm', 'BY_NUMBER').upper() self._alg_args = conf.pop('algorithm.args', {}) self._interpoll_sleep_time = conf.pop('poll_time', 2) self._timeouts = {JobType.LIST: 120, JobType.COPY: 3600, JobType.REMOVE: 120, JobType.MKDIR: 120, JobType.RENAME: 120} self._timeouts.update({JobType[type_.upper()]: timeout for type_, timeout in conf.pop('timeouts', {})}) self._system_ca_dir = conf.pop('system_ca_dir', os.environ.get('X509_CERT_DIR', '/etc/grid-security/certificates')) self._script_path = conf.pop('script_path', os.path.join(os.path.dirname(__file__), 'scripts')) self._script_path = os.path.abspath(self._script_path) self._site_client = SiteClient() self._n_shot = n_shot self._current_process = None # Check for unused config options if conf: raise ValueError("Unused worker config params: '%s'" % ', '.join(conf.keys()))
class Worker(RESTClient, Daemon): # pylint: disable=too-many-instance-attributes """Worker Daemon.""" def __init__(self, debug=False, n_shot=None, loglevel=logging.INFO): """Initialisation.""" RESTClient.__init__(self, 'workqueue') uid = uuid.uuid4() Daemon.__init__(self, pidfile='/tmp/worker-%s.pid' % uid, logfile='/tmp/worker-%s.log' % uid, loglevel=loglevel, target=self.run, debug=debug) conf = getConfig('worker') self._types = [JobType[type_.upper()] for type_ in # pylint: disable=unsubscriptable-object conf.pop('types', ('LIST', 'COPY', 'REMOVE', 'MKDIR', 'RENAME'))] self._alg = conf.pop('algorithm', 'BY_NUMBER').upper() self._alg_args = conf.pop('algorithm.args', {}) self._interpoll_sleep_time = conf.pop('poll_time', 2) self._timeouts = {JobType.LIST: 120, JobType.COPY: 3600, JobType.REMOVE: 120, JobType.MKDIR: 120, JobType.RENAME: 120} self._timeouts.update({JobType[type_.upper()]: timeout for type_, timeout in conf.pop('timeouts', {})}) self._system_ca_dir = conf.pop('system_ca_dir', os.environ.get('X509_CERT_DIR', '/etc/grid-security/certificates')) self._script_path = conf.pop('script_path', os.path.join(os.path.dirname(__file__), 'scripts')) self._script_path = os.path.abspath(self._script_path) self._site_client = SiteClient() self._n_shot = n_shot self._current_process = None # Check for unused config options if conf: raise ValueError("Unused worker config params: '%s'" % ', '.join(conf.keys())) @property def should_run(self): """Return if the daemon loop should run.""" if self._n_shot is None: return True n_shot = max(self._n_shot, 0) self._n_shot -= 1 return n_shot def terminate(self, *_): """Terminate worker daemon.""" Daemon.terminate(self, *_) if self._current_process is not None: self._current_process.terminate() def _upload(self, target, job_id, element_id, token, data): """Upload results to WorkqueueService.""" self._logger.debug("Uploading following data for job.element %s.%s to WorkqueueService: %s", job_id, element_id, pformat(data)) self.set_token(token) try: self.put(target.format(job_id=job_id, element_id=element_id), data=data) except RESTException: self._logger.exception("Error trying to PUT back output from subcommand.") finally: self.set_token(None) # pylint: disable=too-many-locals, too-many-branches, too-many-statements def run(self): """Daemon main method.""" # remove any proxy left around as will mess up copy jobs. try: os.remove("/tmp/x509up_u%d" % os.getuid()) except OSError: pass while self.should_run: self._logger.info("Getting workload from WorkqueueService.") try: workload = self.post('worker/jobs', data={'types': self._types, 'algorithm': self._alg, 'algorithm.args': self._alg_args}) except Timeout: self._logger.warning("Timed out contacting the WorkqueueService.") continue except RESTException as err: if err.code == 404: self._logger.info("WorkqueueService reports no work to be done.") else: self._logger.exception("Error trying to get work from WorkqueueService.") time.sleep(self._interpoll_sleep_time) continue self._logger.info("Workload of %d job elements acquired from WorkqueueService.", sum(len(job['elements']) for job in workload)) for job in workload: self._logger.info("Processing job %d", job['id']) self._logger.debug("Job %d: %s", job['id'], pformat(job)) # Get CAs and endpoints for job. cas = [] credentials = [job['src_credentials']] template_ca_dir = self._system_ca_dir src_endpoint_dict = self._site_client.get_endpoints(job['src_siteid']) src_endpoints = src_endpoint_dict['endpoints'] if 'cas' in src_endpoint_dict: cas.extend(src_endpoint_dict['cas']) template_ca_dir = None if job['type'] in (JobType.COPY, JobType.RENAME): dst_endpoint_dict = self._site_client.get_endpoints(job['dst_siteid']) dst_endpoints = dst_endpoint_dict['endpoints'] if job['type'] == JobType.COPY: credentials.append(job['dst_credentials']) template_ca_dir = self._system_ca_dir if 'cas' in dst_endpoint_dict: cas.extend(dst_endpoint_dict['cas']) if 'cas' in src_endpoint_dict: template_ca_dir = None # Set up element id/token map and job stdin data token_map = {} data = {'files': []} options = job['extra_opts'] if options is not None: data.update(options=options) protocol = PROTOCOLMAP[job['protocol']] for element in job['elements']: element_id = "%d.%d" % (job['id'], element['id']) token_map[element_id] = element['token'] src = (element_id, urlunsplit((protocol, random.choice(src_endpoints), element['src_filepath'], '', ''))) if element['type'] in (JobType.COPY, JobType.RENAME): data['files'].append(src + (urlunsplit((protocol, random.choice(dst_endpoints), element['dst_filepath'], '', '')),)) # pylint: disable=bad-continuation elif element['type'] == JobType.MKDIR\ or (element['type'] == JobType.REMOVE and element['src_filepath'].endswith('/')): data.setdefault('dirs', []).append(src) else: data['files'].append(src) # Correct command, data options and credentials for LIST component of # COPY/REMOVE/RENAME jobs. command = shlex.split(COMMANDMAP[job['type']][job['protocol']]) if job['type'] != JobType.LIST\ and len(job['elements']) == 1\ and job['elements'][0]['type'] == JobType.LIST: command = shlex.split(COMMANDMAP[JobType.LIST][job['protocol']]) data.pop('options', None) # don't pass COPY/REMOVE options to scripts. if job['type'] == JobType.COPY and len(credentials) == 2: credentials.pop() # remove dst_creds to get correct proxy env var command[0] = os.path.join(self._script_path, command[0]) self._logger.info("Running elements in subprocess (%s).", command[0]) # run job in subprocess with temporary proxy files and ca dir with temporary_proxy_files(*credentials) as proxy_env_vars,\ temporary_ca_dir(cas, template_dir=template_ca_dir) as ca_dir: script_env = dict(os.environ, X509_CERT_DIR=ca_dir, **proxy_env_vars) if self._logger.isEnabledFor(logging.DEBUG): extra_env = {key: script_env[key] for key in set(script_env.iterkeys()).difference(os.environ.iterkeys())} self._logger.debug("Extra environment variables: %s", pformat(extra_env)) self._logger.debug("Sending subprocess the following data: %s", pformat(data)) self._current_process = subprocess.Popen(command, bufsize=0, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=script_env) json.dump(data, self._current_process.stdin) self._current_process.stdin.write('\n') self._current_process.stdin.flush() # We have to close stdin to force the subprocess to handle the input # Otherwise it assumes there may be more data and hangs... self._current_process.stdin.close() stderr_dispatcher = BufferingDispatcher(self._current_process.stderr) stdout_dispatcher = StdOutDispatcher(self._current_process.stdout, token_map, stderr_dispatcher, self._upload) timeout = self._timeouts[job['type']] kill_timer = threading.Timer(timeout, self._current_process.kill) if isinstance(timeout, (int, float)): kill_timer.start() asyncore.loop(timeout=2) kill_timer.cancel() if self._current_process.wait(): returncode = self._current_process.returncode extra_log = '' if returncode == -9: extra_log = 'Operation timed out!' stdout_dispatcher.force_complete(returncode=returncode, extra_log=extra_log) self._logger.error("Job %s failed with return: %s", job['id'], returncode) self._logger.info("Job stderr:\n%s", stderr_dispatcher.buffer)
def configure_workqueueservice(config): """Setup the WorkqueueService.""" current_app.workqueueservice_workerlogs = config.pop( 'workerlogs', '/tmp/workers') current_app.site_client = SiteClient()
class TransferClient(object): """ Transfer management client API. To list, copy and remove files from remote site. """ def __init__(self, user_token): """ Constructor initialises all service clients involved in the transfer management: EndpointService, CredService and finally the WorkqueueService. :param user_token: user token """ self.__user_token = user_token # endpoint self.__site_client = SiteClient() self.__site_client.set_token(user_token) self.__sitelist = self.__site_client.get_sites() # get user id self.__user_id = HRService.get_token_userid(user_token) # work queue client self.__wq_client = WorkqueueClient() self.__wq_client.set_token(user_token) def list(self, src_site, src_filepath, **kwargs): """ List a given path. As for all client calls it need a user token set in a request beforehand. Args: src_site (string): The name of the site containing the path to be listed. src_filepath (str): The path to list. kwargs: keyword arguments containing: protocol, max_tries and priority Returns: dict: The Python dictionary representing the list of files. """ # sort out the site ID first: src_siteid = [ elem['site_id'] for elem in self.__sitelist if elem['site_name'] == src_site ] if src_siteid: response = self.__wq_client.list(src_siteid[0], src_filepath, **kwargs) # max_tries, priority, protocol=JobProtocol.GRIDFTP) return response return src_siteid # an empty list def output(self, job_id, element_id=None, attempt=None): """ Get job output. :param job_id: job id :return: output as specified by workqueue client """ response = self.__wq_client.output(job_id, element_id=element_id, attempt=attempt) return response def status(self, job_id, element_id=None): """ Return status of a job. :param job_id: job id to get the status of. :return: forward response from :func:`pdm.workqueue.WorkqueueClient.WorkqueueClient.status`. """ response = self.__wq_client.status(job_id, element_id) return response def list_sites(self): """ Get list of sites. :return: list of dictionaries with all keys but 'site_id'. """ filtered_sites = deepcopy(self.__sitelist) for elem in filtered_sites: elem.pop('site_id', None) return filtered_sites def copy( self, src_site, src_filepath, dst_site, # pylint: disable=too-many-arguments dst_filepath, **kwargs): """ Copy files between sites. :param src_site: source site :param src_filepath: source site path :param dst_site: destination site :param dst_filepath: destination site path :param kwargs: * max_tries: maximum number of attempts * priority: priority * protocol: protocol used, see: :func:`pdm.workqueue.WorkqueueClient.WorkqueueClient.copy` :return: forward response from :func:`pdm.workqueue.WorkqueueClient.WorkqueueClient.copy` or *None* if either of the sites exists. """ src_siteid = [ elem['site_id'] for elem in self.__sitelist if elem['site_name'] == src_site ] dst_siteid = [ elem['site_id'] for elem in self.__sitelist if elem['site_name'] == dst_site ] if not (src_siteid and dst_siteid): return None response = self.__wq_client.copy( src_siteid[0], src_filepath, dst_siteid[0], # pylint: disable=too-many-arguments dst_filepath, **kwargs) return response def remove(self, src_site, src_filepath, **kwargs): """ Remove files from a given site. :param src_site: the site to contact :param src_filepath: the path to be removed :param kwargs: * max_tries: maximum number of attempts * priority: priority * protocol: protocol used :return: forward response from :func:`pdm.workqueue.WorkqueueClient.WorkqueueClient.remove` or *None* if the source site does not exist. """ src_siteid = [ elem['site_id'] for elem in self.__sitelist if elem['site_name'] == src_site ] if not src_siteid: return None response = self.__wq_client.remove( src_siteid[0], src_filepath, # pylint: disable=too-many-arguments **kwargs) return response def mkdir(self, site, dirpath, **kwargs): """ Create a new directory at a site. :param site: site name :param dirpath: directory path :param kwargs: * max_tries: maximum number of attempts * priority: priority * protocol: protocol used,\ see: :func:`pdm.workqueue.WorkqueueClient.WorkqueueClient.copy` :return: forward response from :func:`pdm.workqueue.WorkqueueClient.WorkqueueClient.mkdir` or *None* if the site does not exist. """ src_siteid = [ elem['site_id'] for elem in self.__sitelist if elem['site_name'] == site ] if not src_siteid: return None response = self.__wq_client.mkdir( src_siteid[0], dirpath, # pylint: disable=too-many-arguments **kwargs) return response def rename(self, site, oldname, newname, **kwargs): """ Rename a file or directory within site. :param site: site name :param oldname: old file name :param newname: new file name :param kwargs: * max_tries: maximum number of attempts * priority: priority * protocol: protocol used :return: forward response from :func:`pdm.workqueue.WorkqueueClient.WorkqueueClient.rename` or *None* if the site does not exist. """ src_siteid = [ elem['site_id'] for elem in self.__sitelist if elem['site_name'] == site ] if not src_siteid: return None response = self.__wq_client.rename(src_siteid[0], oldname, newname, **kwargs) return response def jobs(self): """ Get user jobs' info. :return: forwarded response from \ :func:`WorkqueueClient.jobs() <pdm.workqueue.WorkqueueClient.WorkqueueClient.jobs>`. """ response = self.__wq_client.jobs() return response def elements(self, job_id): """ Get job elements information. :param job_id: job id :return: forwarded response from \ :func:`WorkqueueClient.jobs() <pdm.workqueue.WorkqueueClient.WorkqueueClient.jobs>`. """ response = self.__wq_client.elements(job_id) return response
def load_userconfig(config): """ Configure the HRService application. Gets the key needed to contact the Credential Service """ current_app.pwd_len = config.pop("pswd_length", 8) # token validity period struct (from: HH:MM:SS) try: time_struct = time.strptime( config.pop("token_validity", "12:00:00"), "%H:%M:%S") current_app.token_duration = datetime.timedelta( hours=time_struct.tm_hour, minutes=time_struct.tm_min, seconds=time_struct.tm_sec) HRService._logger.info( "User login token duration parsed successfully") m_time_struct = time.strptime( config.pop("mail_token_validity", "23:59:00"), "%H:%M:%S") current_app.mail_token_duration = datetime.timedelta( hours=m_time_struct.tm_hour, minutes=m_time_struct.tm_min, seconds=m_time_struct.tm_sec) HRService._logger.info( "User mail token duration parsed successfully") except ValueError as v_err: HRService._logger.error( " Token lifetime provided in the config " "file has wrong format %s. Aborting.", v_err) raise ValueError("Token lifetime incorrect format %s" % v_err) # verification email: current_app.smtp_server = config.pop("smtp_server", None) if current_app.smtp_server is None: HRService._logger.error( " Mail server not provided in the config. Aborting") raise ValueError( " Mail server not provided in the config. Aborting") current_app.smtp_server_port = config.pop("smtp_server_port", None) current_app.smtp_server_login = config.pop("smtp_server_login", None) current_app.mail_display_from = config.pop("display_from_address", None) current_app.smtp_server_pwd = config.pop("smtp_server_pwd", None) current_app.mail_subject = config.pop("mail_subject", None) current_app.mail_expiry = config.pop("mail_expiry", "12:00:00") mail_server_req = ['REQUIRED', 'OPTIONAL', 'OFF'] allowed_opts = ', '.join(mail_server_req) current_app.smtp_server_starttls = config.pop('smtp_starttls', 'UNDEFINED').upper() if current_app.smtp_server_starttls not in mail_server_req: HRService._logger.error( ' Mail server starttls option invalid (%s). Aborting.', current_app.smtp_server_starttls) HRService._logger.error("Allowed option values are: %s.", allowed_opts) raise ValueError( 'Mail server starttls option invalid (%s) . Aborting.' % current_app.smtp_server_starttls) current_app.smtp_server_login_req = config.pop('smtp_login_req', 'UNDEFINED').upper() if current_app.smtp_server_login_req not in mail_server_req: HRService._logger.error( ' Mail server smtp_server_login_req option invalid (%s).' ' Aborting', current_app.smtp_server_login_req) HRService._logger.error("Allowed option values are: %s.", allowed_opts) raise ValueError( 'Mail server smtp_server_login_req option invalid (%s). Aborting.' % current_app.smtp_server_login_req) current_app.verification_url = config.pop("verification_url") if current_app.verification_url is None: HRService._logger.error( " Mail verification URL not provided in the config. Aborting") raise ValueError( " Mail verification URL not provided in the config. Aborting") current_app.mail_token_secret = config.pop("mail_token_secret") # TokenService used to generate a verification email token. current_app.mail_token_service = TokenService( current_app.mail_token_secret) # site client current_app.site_client = SiteClient()