Beispiel #1
0
    def create_hive_job(self, script, params=None, options=None, files=None, archives=None,
                        user_name=DEFAULT_USER_NAME, name_node=DEFAULT_NAME_NODE, job_tracker=DEFAULT_JOB_TRACKER,
                        oozie_libpath=DEFAULT_NAME_NODE + DEFAULT_OOZIE_LIBPATH):
        """
        Submit a Workflow that contains a single HIVE action without writing a workflow.xml.
        Any requred Jars or other files must already exist in HDFS.

        :param script: Contains the HIVE script you want to run (the actual script, not a file path)
        :type script: basestring
        :param params: A dict of parameters (variable definition for the script) in 'key=value' format
        :type params: dict
        :param options: A list of arguments to pass to HIVE, arguments are sent directly to HIVE without any
                        modification unless they start with -D,
                        in which case they are put into the element of the action
        :type options: list[basestring]
        :param files: A list of files needed for the script (hdfs location)
        :type files: list[basestring]
        :param archives: A list of archives needed for the script (hdfs location)
        :type archives: list[basestring]
        :param user_name: The username of the user submitting the job
        :type user_name: basestring
        :param name_node: The NameNode (e.g: hdfs://localhost:8020
        :type name_node: basestring
        :param job_tracker: The JobTracker (e.g: localhost:8021)
        :type job_tracker: basestring
        :param oozie_libpath: A directory in HDFS that contains necessary Jars for your job (e.g: oozie share lib)
        :type oozie_libpath: basestring
        :return: ID of the created workflow
        :rtype : basestring
        """
        # TODO: remove code duplication with pig job creation
        properties = {'fs.default.name': name_node,
                      'mapred.job.tracker': job_tracker,
                      'user.name': user_name,
                      'oozie.hive.script': script,
                      'oozie.libpath': oozie_libpath,
                      'oozie.proxysubmission': 'true', }
        if files:
            properties['oozie.files'] = ','.join("%s#%s" % (f, os.path.basename(f)) for f in files)

        if archives:
            properties['oozie.archives'] = ','.join("%s#%s" % (f, os.path.basename(f)) for f in files)

        if params:
            properties['oozie.hive.script.params.size'] = len(params)
            for i, param in enumerate(params.iteritems()):
                properties['oozie.hive.script.params.%d' % i] = "%s=%s" % param

        if options:
            properties['oozie.hive.options.size'] = len(options)
            for i, option in enumerate(options):
                properties['oozie.hive.options.%d' % i] = option

        config = utils.properties_to_config(properties)
        headers = {'Content-Type': 'application/xml;charset=UTF-8'}
        response = requests.post(self.base_uri + JobsEndPoint, params={'jobtype': 'hive'}, headers=headers, data=config)
        if response.status_code != httplib.CREATED:
            raise errors.OozieError(errors.error_message_from_response(response))
        else:
            return response.json()['id']
Beispiel #2
0
    def create_job(self, config):
        # TODO: validate the config xml file
        """
        Create a standard job based on XML configuration file
        The type of job is determined by the presence of one of the following 3 properties:
            oozie.wf.application.path : path to a workflow application directory, creates a workflow job
            oozie.coord.application.path : path to a coordinator application file, creates a coordinator job
            oozie.bundle.application.path : path to a bundle application file, creates a bundle job
        Or, if none of those are present, the jobtype parameter determines the type of job to run.
        It can either be mapreduce or pig.

        :type config: basestring
        :param config: XML configuration file.
        <?xml version="1.0" encoding="UTF-8"?>
        <configuration>
            <property>
                <name>user.name</name>
                <value>bansalm</value>
            </property>
            <property>
                <name>oozie.wf.application.path</name>
                <value>hdfs://foo:8020/user/bansalm/myapp/</value>
            </property>
            ...
        </configuration>
        :rtype : basestring
        :return: Id of the created job
        :raise errors.OozieError: if the server does not response with a CREATED response
        """
        headers = {'Content-Type': 'application/xml;charset=UTF-8'}
        response = requests.post(self.base_uri + JobsEndPoint, headers=headers, data=config)
        if response.status_code != httplib.CREATED:
            raise errors.OozieError(errors.error_message_from_response(response))
        else:
            return response.json()['id']
Beispiel #3
0
    def get_all_jobs_information(self, timezone='GMT'):
        """
        Retrieves workflow and coordinator jobs information

        :param timezone: The timezone to use for times
        :type timezone: basestring
        :return: A list of all jobs information
        :rtype : list[dict]
        """
        response = requests.get(self.base_uri + JobsEndPoint, params={'timezone': timezone})
        if response.status_code == httplib.OK:
            return response.json['jobs']
        else:
            raise errors.OozieError(errors.error_message_from_response(response))
Beispiel #4
0
    def get_job_log(self, job_id):
        """
        Retrieves the workflow or a coordinator job definition file.

        :param job_id: The JOB ID
        :type job_id: basestring
        :return: The job log
        :rtype : basestring
        """
        response = requests.get(self.base_uri + JobEndPoint + "/" + job_id,
                                params={'show': 'log'})
        if response.status_code == httplib.OK:
            return response.content
        elif response.status_code == httplib.BAD_REQUEST:
            raise ValueError('%s is a bad job id' % job_id)
        else:
            raise errors.OozieError(errors.error_message_from_response(response))
Beispiel #5
0
    def get_job_information(self, job_id, timezone='GMT'):
        """
        Retrieves the job information.

        :param job_id: The JOB ID
        :type job_id: basestring
        :param timezone: The timezone to use for times
        :type timezone: basestring
        :return: The information of the job
        :rtype : dict
        """
        response = requests.get(self.base_uri + JobEndPoint + "/" + job_id,
                                params={'show': 'info', 'timezone': timezone})
        if response.status_code == httplib.OK:
            return response.json()
        elif response.status_code == httplib.BAD_REQUEST:
            raise ValueError('%s is a bad job id' % job_id)
        else:
            raise errors.OozieError(errors.error_message_from_response(response))
Beispiel #6
0
    def do_job_action(self, job_id, action, config=None):
        """
        starts, suspends, resumes, kills, or dryruns a job.
        Rerunning and changing a job require additional parameters,
        :param job_id: The workflow to act on
        :type job_id: basestring
        :param action: The action to do ('start', 'suspend', 'resume', 'kill', 'dryrun', 'rerun', and 'change')
        :type action: str
        :param config: if rerunning or changing supply with the XML configuration
        :type config: basestring
        :raise errors.OozieError: if the server does not response with an OK response
        """
        if action not in [v for v in dir(JobAction) if not v.startswith('_')]:
            raise ValueError('%s is not a legal action' % action)
        if config is not None:
            headers = {'Content-Type': 'application/xml;charset=UTF-8'}
            response = requests.put(self.base_uri + JobEndPoint + "/" + job_id,
                                    params={'action': action}, headers=headers, data=config)
        else:
            response = requests.put(self.base_uri + JobEndPoint + "/" + job_id, params={'action': action})

        if response.status_code != httplib.OK:
            raise errors.OozieError(errors.error_message_from_response(response))
Beispiel #7
0
 def _set_system_status(self, status):
     if status not in (SystemStatus.NORMAL, SystemStatus.NOWEBSERVICE, SystemStatus.SAFEMODE):
         raise ValueError('%s is not a legall status' % status)
     response = requests.put(self.base_uri + AdminEndPoint.SYSTEM_STATUS, params={'systemmode': status})
     if response.status_code != httplib.OK:
         raise errors.OozieError(errors.error_message_from_response(response))