def create_hive_job(self, script, params=None, options=None, files=None, archives=None, user_name=DEFAULT_USER_NAME, name_node=DEFAULT_NAME_NODE, job_tracker=DEFAULT_JOB_TRACKER, oozie_libpath=DEFAULT_NAME_NODE + DEFAULT_OOZIE_LIBPATH): """ Submit a Workflow that contains a single HIVE action without writing a workflow.xml. Any requred Jars or other files must already exist in HDFS. :param script: Contains the HIVE script you want to run (the actual script, not a file path) :type script: basestring :param params: A dict of parameters (variable definition for the script) in 'key=value' format :type params: dict :param options: A list of arguments to pass to HIVE, arguments are sent directly to HIVE without any modification unless they start with -D, in which case they are put into the element of the action :type options: list[basestring] :param files: A list of files needed for the script (hdfs location) :type files: list[basestring] :param archives: A list of archives needed for the script (hdfs location) :type archives: list[basestring] :param user_name: The username of the user submitting the job :type user_name: basestring :param name_node: The NameNode (e.g: hdfs://localhost:8020 :type name_node: basestring :param job_tracker: The JobTracker (e.g: localhost:8021) :type job_tracker: basestring :param oozie_libpath: A directory in HDFS that contains necessary Jars for your job (e.g: oozie share lib) :type oozie_libpath: basestring :return: ID of the created workflow :rtype : basestring """ # TODO: remove code duplication with pig job creation properties = {'fs.default.name': name_node, 'mapred.job.tracker': job_tracker, 'user.name': user_name, 'oozie.hive.script': script, 'oozie.libpath': oozie_libpath, 'oozie.proxysubmission': 'true', } if files: properties['oozie.files'] = ','.join("%s#%s" % (f, os.path.basename(f)) for f in files) if archives: properties['oozie.archives'] = ','.join("%s#%s" % (f, os.path.basename(f)) for f in files) if params: properties['oozie.hive.script.params.size'] = len(params) for i, param in enumerate(params.iteritems()): properties['oozie.hive.script.params.%d' % i] = "%s=%s" % param if options: properties['oozie.hive.options.size'] = len(options) for i, option in enumerate(options): properties['oozie.hive.options.%d' % i] = option config = utils.properties_to_config(properties) headers = {'Content-Type': 'application/xml;charset=UTF-8'} response = requests.post(self.base_uri + JobsEndPoint, params={'jobtype': 'hive'}, headers=headers, data=config) if response.status_code != httplib.CREATED: raise errors.OozieError(errors.error_message_from_response(response)) else: return response.json()['id']
def create_job(self, config): # TODO: validate the config xml file """ Create a standard job based on XML configuration file The type of job is determined by the presence of one of the following 3 properties: oozie.wf.application.path : path to a workflow application directory, creates a workflow job oozie.coord.application.path : path to a coordinator application file, creates a coordinator job oozie.bundle.application.path : path to a bundle application file, creates a bundle job Or, if none of those are present, the jobtype parameter determines the type of job to run. It can either be mapreduce or pig. :type config: basestring :param config: XML configuration file. <?xml version="1.0" encoding="UTF-8"?> <configuration> <property> <name>user.name</name> <value>bansalm</value> </property> <property> <name>oozie.wf.application.path</name> <value>hdfs://foo:8020/user/bansalm/myapp/</value> </property> ... </configuration> :rtype : basestring :return: Id of the created job :raise errors.OozieError: if the server does not response with a CREATED response """ headers = {'Content-Type': 'application/xml;charset=UTF-8'} response = requests.post(self.base_uri + JobsEndPoint, headers=headers, data=config) if response.status_code != httplib.CREATED: raise errors.OozieError(errors.error_message_from_response(response)) else: return response.json()['id']
def get_all_jobs_information(self, timezone='GMT'): """ Retrieves workflow and coordinator jobs information :param timezone: The timezone to use for times :type timezone: basestring :return: A list of all jobs information :rtype : list[dict] """ response = requests.get(self.base_uri + JobsEndPoint, params={'timezone': timezone}) if response.status_code == httplib.OK: return response.json['jobs'] else: raise errors.OozieError(errors.error_message_from_response(response))
def get_job_log(self, job_id): """ Retrieves the workflow or a coordinator job definition file. :param job_id: The JOB ID :type job_id: basestring :return: The job log :rtype : basestring """ response = requests.get(self.base_uri + JobEndPoint + "/" + job_id, params={'show': 'log'}) if response.status_code == httplib.OK: return response.content elif response.status_code == httplib.BAD_REQUEST: raise ValueError('%s is a bad job id' % job_id) else: raise errors.OozieError(errors.error_message_from_response(response))
def get_job_information(self, job_id, timezone='GMT'): """ Retrieves the job information. :param job_id: The JOB ID :type job_id: basestring :param timezone: The timezone to use for times :type timezone: basestring :return: The information of the job :rtype : dict """ response = requests.get(self.base_uri + JobEndPoint + "/" + job_id, params={'show': 'info', 'timezone': timezone}) if response.status_code == httplib.OK: return response.json() elif response.status_code == httplib.BAD_REQUEST: raise ValueError('%s is a bad job id' % job_id) else: raise errors.OozieError(errors.error_message_from_response(response))
def do_job_action(self, job_id, action, config=None): """ starts, suspends, resumes, kills, or dryruns a job. Rerunning and changing a job require additional parameters, :param job_id: The workflow to act on :type job_id: basestring :param action: The action to do ('start', 'suspend', 'resume', 'kill', 'dryrun', 'rerun', and 'change') :type action: str :param config: if rerunning or changing supply with the XML configuration :type config: basestring :raise errors.OozieError: if the server does not response with an OK response """ if action not in [v for v in dir(JobAction) if not v.startswith('_')]: raise ValueError('%s is not a legal action' % action) if config is not None: headers = {'Content-Type': 'application/xml;charset=UTF-8'} response = requests.put(self.base_uri + JobEndPoint + "/" + job_id, params={'action': action}, headers=headers, data=config) else: response = requests.put(self.base_uri + JobEndPoint + "/" + job_id, params={'action': action}) if response.status_code != httplib.OK: raise errors.OozieError(errors.error_message_from_response(response))
def _set_system_status(self, status): if status not in (SystemStatus.NORMAL, SystemStatus.NOWEBSERVICE, SystemStatus.SAFEMODE): raise ValueError('%s is not a legall status' % status) response = requests.put(self.base_uri + AdminEndPoint.SYSTEM_STATUS, params={'systemmode': status}) if response.status_code != httplib.OK: raise errors.OozieError(errors.error_message_from_response(response))