Exemple #1
0
    def __init__(self, mode, config_file=None, hosts=None,
                 hadoop_cluster=None):
        """Create a new Spark cluster. It can be created as a standalone
        cluster or on top of YARN.

        Args:
          mode (int):
            The cluster manager that is used (STANDALONE_MODE or YARN_MODE).
          configFile (str, optional):
            The path of the config file to be used.
          hosts (list of Host, optional):
            The hosts of the cluster (standalone operation).
          hadoop_cluster (HadoopCluster, optional):
            The Hadoop cluster to link.
        """

        # Load cluster properties
        config = ConfigParser(self.defaults)
        config.add_section("cluster")
        config.add_section("local")

        if config_file:
            config.readfp(open(config_file))

        # Deployment properties
        self.local_base_conf_dir = config.get("local", "local_base_conf_dir")
        self.init_conf_dir = tempfile.mkdtemp("", "spark-init-", "/tmp")
        self.conf_mandatory_files = [SPARK_CONF_FILE]

        self.base_dir = config.get("cluster", "spark_base_dir")
        self.conf_dir = config.get("cluster", "spark_conf_dir")
        self.logs_dir = config.get("cluster", "spark_logs_dir")
        self.evs_log_dir = config.get("cluster", "spark_events_dir")
        self.work_dir = config.get("cluster", "spark_work_dir")
        self.port = config.getint("cluster", "spark_port")
        self.local_base_conf_dir = config.get("local", "local_base_conf_dir")

        self.bin_dir = self.base_dir + "/bin"
        self.sbin_dir = self.base_dir + "/sbin"

        self.mode = mode

        self.java_home = None

        # Initialize hosts
        if hosts:
            self.hosts = hosts
            self.master = hosts[0]
        elif hadoop_cluster:
            self.hosts = hadoop_cluster.hosts
            self.master = hadoop_cluster.master
        else:
            logger.error("Hosts in the cluster must be specified either"
                         "directly or indirectly through a Hadoop cluster.")
            raise SparkException("Hosts in the cluster must be specified "
                                 "either directly or indirectly through a "
                                 "Hadoop cluster.")

        # Store cluster information
        self.hw = hw_manager.make_deployment_hardware()
        self.hw.add_hosts(self.hosts)
        self.master_cluster = self.hw.get_host_cluster(self.master)

        # Store reference to Hadoop cluster and check if mandatory
        self.hc = hadoop_cluster
        if not self.hc and self.mode == YARN_MODE:
            logger.error("When using a YARN_MODE mode, a reference to the "
                         "Hadoop cluster should be provided.")
            raise SparkException("When using a YARN_MODE mode, a reference "
                                 "to the Hadoop cluster should be provided")

        if self.mode == STANDALONE_MODE:
            mode_text = "in standalone mode"
        else:
            mode_text = "on top of YARN"
        logger.info("Spark cluster created %s in hosts %s." +
                    (" It is linked to a Hadoop cluster." if self.hc else ""),
                    mode_text,
                    ' '.join([style.host(h.address.split('.')[0])
                              for h in self.hosts]))
Exemple #2
0
    def __init__(self, hosts, topo_list=None, config_file=None):
        """Create a new Hadoop cluster with the given hosts and topology.
        
        Args:
          hosts (list of Host):
            The hosts to be assigned a topology.
          topo_list (list of str, optional):
            The racks to be assigned to each host. len(hosts) should be equal
            to len(topo_list).
          configFile (str, optional):
            The path of the config file to be used.
        """

        # Load properties
        config = ConfigParser(self.defaults)
        config.add_section("cluster")
        config.add_section("local")

        if config_file:
            config.readfp(open(config_file))

        # Deployment properties
        self.local_base_conf_dir = config.get("local", "local_base_conf_dir")
        self.init_conf_dir = tempfile.mkdtemp("", "hadoop-init-", "/tmp")
        self.conf_mandatory_files = [CORE_CONF_FILE,
                                     HDFS_CONF_FILE,
                                     MR_CONF_FILE]

        # Node properties
        self.base_dir = config.get("cluster", "hadoop_base_dir")
        self.conf_dir = config.get("cluster", "hadoop_conf_dir")
        self.logs_dir = config.get("cluster", "hadoop_logs_dir")
        self.hadoop_temp_dir = config.get("cluster", "hadoop_temp_dir")
        self.hdfs_port = config.getint("cluster", "hdfs_port")
        self.mapred_port = config.getint("cluster", "mapred_port")

        self.bin_dir = self.base_dir + "/bin"
        self.sbin_dir = self.base_dir + "/bin"

        self.java_home = None

        # Configure master and slaves
        self.hosts = list(hosts)
        self.master = self.hosts[0]

        # Create topology
        self.topology = HadoopTopology(hosts, topo_list)

        # Store cluster information
        self.hw = hw_manager.make_deployment_hardware()
        self.hw.add_hosts(self.hosts)
        self.master_cluster = self.hw.get_host_cluster(self.master)

        # Create a string to display the topology
        t = {v: [] for v in self.topology.topology.values()}
        for key, value in self.topology.topology.iteritems():
            t[value].append(key.address)
        log_topo = ', '.join([style.user2(k) + ': ' +
                              ' '.join(map(lambda x: style.host(x.split('.')[0]), v))
                              for k, v in t.iteritems()])
        
        logger.info("Hadoop cluster created with master %s, hosts %s and "
                    "topology %s",
                    style.host(self.master.address), 
                    ' '.join([style.host(h.address.split('.')[0])
                              for h in self.hosts]),
                    log_topo)
Exemple #3
0
 def __init__(self, hosts):
     self.hw = hw_manager.make_deployment_hardware()
     self.hw.add_hosts(list(hosts))