Example #1
0
 def __init__(self):
     env = Environment.create()
     config_context = env.get_config_items('webhdfs')
     if config_context is not None and 'port' in config_context:
         self.port = config_context['port']
         self.user = config_context['user'] if 'user' in config_context else 'root'
         self.use_kerberos = config_context['kerberos'].lower() == 'true'
     else:
         self.port = None
         self.user = None
         self.use_kerberos = False
Example #2
0
 def __init__(self):
     env = Environment.create()
     config_context = env.get_config_items('webhdfs')
     if config_context is not None and 'port' in config_context:
         self.port = config_context['port']
         self.user = config_context[
             'user'] if 'user' in config_context else 'root'
         self.use_kerberos = config_context['kerberos'].lower() == 'true'
     else:
         self.port = None
         self.user = None
         self.use_kerberos = False
Example #3
0
    def spark_cluster_mode():
        """
        Gets the cluster mode

        Returns
        -------
        out: boolean
            True if spark is running in cluster mode.  Cluster mode means that spark is running on a platform separate
            the program.  In practice, cluster mode means that file arguments must be located on
            a network filesystem such as HDFS or NFS.
        """
        env = Environment.create()
        config = create_spark_config(env)
        return not config.get("spark.master").startswith("local")
    def spark_cluster_mode():
        """
        Gets the cluster mode

        Returns
        -------
        out: boolean
            True if spark is running in cluster mode.  Cluster mode means that spark is running on a platform separate
            the program.  In practice, cluster mode means that file arguments must be located on
            a network filesystem such as HDFS or NFS.
        """
        env = Environment.create()
        config = create_spark_config(env)
        return not config.get('spark.master').startswith('local')
    def __init__(self):
        """
        Create a spark context.

        The spark configuration is taken from xframes/config.ini and from
        the values set in SparkInitContext.set() if this has been called.
        """

        # This is placed here because otherwise it causes an error when used in a spark slave.
        from pyspark import SparkConf, SparkContext, SQLContext, HiveContext
        # This reads from default.ini and then xframes/config.ini
        # if they exist.
        self._env = Environment.create()
        context = create_spark_config(self._env)
        verbose = self._env.get_config('xframes', 'verbose',
                                       'false').lower() == 'true'
        hdfs_user_name = self._env.get_config('webhdfs', 'user', 'hdfs')
        os.environ['HADOOP_USER_NAME'] = hdfs_user_name
        config_pairs = [(k, v) for k, v in context.iteritems()]
        self._config = (SparkConf().setAll(config_pairs))
        if verbose:
            print 'Spark Config: {}'.format(config_pairs)

        self._sc = SparkContext(conf=self._config)
        self._sqlc = SQLContext(self._sc)
        self._hivec = HiveContext(self._sc)
        self.zip_path = []
        version = [int(n) for n in self._sc.version.split('.')]
        self.status_tracker = self._sc.statusTracker()
        if cmp(version, [1, 4, 1]) >= 0:
            self.application_id = self._sc.applicationId
        else:
            self.application_id = None

        if verbose:
            print 'Spark Version: {}'.format(self._sc.version)
            if self.application_id:
                print 'Application Id: {}'.format(self.application_id)

        if not context['spark.master'].startswith('local'):
            zip_path = self.build_zip(get_xframes_home())
            if zip_path:
                self._sc.addPyFile(zip_path)
                self.zip_path.append(zip_path)

        trace_flag = self._env.get_config('xframes', 'rdd-trace',
                                          'false').lower() == 'true'
        XRdd.set_trace(trace_flag)
        atexit.register(self.close_context)
Example #6
0
    def __init__(self):
        """
        Create a spark context.

        The spark configuration is taken from xframes/config.ini and from
        the values set in SparkInitContext.set() if this has been called.
        """

        # This is placed here because otherwise it causes an error when used in a spark slave.
        from pyspark import SparkConf, SparkContext, SQLContext, HiveContext

        # This reads from default.ini and then xframes/config.ini
        # if they exist.
        self._env = Environment.create()
        context = create_spark_config(self._env)
        verbose = self._env.get_config("xframes", "verbose", "false").lower() == "true"
        hdfs_user_name = self._env.get_config("webhdfs", "user", "hdfs")
        os.environ["HADOOP_USER_NAME"] = hdfs_user_name
        config_pairs = [(k, v) for k, v in context.iteritems()]
        self._config = SparkConf().setAll(config_pairs)
        if verbose:
            print "Spark Config: {}".format(config_pairs)

        self._sc = SparkContext(conf=self._config)
        self._sqlc = SQLContext(self._sc)
        self._hivec = HiveContext(self._sc)
        self.zip_path = []
        version = [int(n) for n in self._sc.version.split(".")]
        self.status_tracker = self._sc.statusTracker()
        if cmp(version, [1, 4, 1]) >= 0:
            self.application_id = self._sc.applicationId
        else:
            self.application_id = None

        if verbose:
            print "Spark Version: {}".format(self._sc.version)
            if self.application_id:
                print "Application Id: {}".format(self.application_id)

        if not context["spark.master"].startswith("local"):
            zip_path = self.build_zip(get_xframes_home())
            if zip_path:
                self._sc.addPyFile(zip_path)
                self.zip_path.append(zip_path)

        trace_flag = self._env.get_config("xframes", "rdd-trace", "false").lower() == "true"
        XRdd.set_trace(trace_flag)
        atexit.register(self.close_context)