Esempio n. 1
0
def upload_data(cluster, dataset_name, dataset_size):
    logger.info("Prepare data for %s: %s", dataset_name, dataset_size)

    hadoop_dir = helper.get_hadoop_dir()
    dataset_dir = helper.get_dataset_dir()
    dataset_source_dir = helper.get_dataset_source_dir()
    target_dir = "%s_%s" % (dataset_name, dataset_size)
    rm_host = cluster.getMapReduceCluster().getResourceManager().host

    command.execute_remote(
        cluster.getUser(), rm_host,
        "%s/bin/hadoop dfs -mkdir -p %s" % (hadoop_dir, dataset_dir))
    if dataset_name == "terasort":
        real_size = helper.convert_unit(dataset_size)
        num_rows = real_size * 1024 * 1024 / 100
        num_files = 1 if real_size <= 1024 else real_size / 1024
        output_dir = "%s/%s" % (dataset_dir, target_dir)
        command.execute_remote(
            cluster.getUser(), rm_host,
            "%s/bin/hadoop jar %s/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar teragen -Dmapreduce.job.maps=%s %s  %s"
            % (hadoop_dir, hadoop_dir, num_files, num_rows, output_dir))

    else:
        command.execute_remote(
            cluster.getUser(), rm_host, "%s/bin/hadoop dfs -mkdir -p %s/%s" %
            (hadoop_dir, dataset_dir, target_dir))
        for f in os.listdir(os.path.join(dataset_source_dir, target_dir)):
            data_nodes = cluster.getHDFSCluster().getDataNodes()
            node = data_nodes[random.randint(0, len(data_nodes) - 1)]
            logger.info("upload %s/%s to node %s", target_dir, f, node.host)
            command.execute_remote(
                cluster.getUser(), node.host,
                "%s/bin/hadoop dfs -put %s/%s/%s %s/%s" %
                (hadoop_dir, dataset_source_dir, target_dir, f, dataset_dir,
                 target_dir))
Esempio n. 2
0
def deploy(cluster, conf_dir):
        for node in cluster.getNodes():
                logger.info("Deploy to %s" % node.host)
                command.execute_remote(cluster.user, node.host, "mkdir -p ~/hadoop/conf")
                # workaround for path
                command.execute("scp -r %s/%s/* %s@%s:~/hadoop/conf" % (conf_dir, node.host, cluster.user, node.host))
Esempio n. 3
0
def execute(cluster, service, action):
    hadoop_dir = "~/hadoop"
    conf_dir = "%s/conf" % hadoop_dir
    dameon_script = "%s/sbin/hadoop-daemon.sh" % hadoop_dir
    yarn_script = "%s/bin/yarn" % hadoop_dir
    mapreduce_script = "%s/bin/mapred" % hadoop_dir
    hdfs_script = "%s/bin/hdfs" % hadoop_dir

    user = cluster.getUser()
    mapreduce = cluster.getMapReduceCluster()
    hdfs = cluster.getHDFSCluster()

    if service == "all":
        service_list = ["hdfs", "mapreduce", "historyserver"] if action == "start" else ["historyserver", "mapreduce", "hdfs"]
        for service in service_list:
            execute(cluster, service, action)
        return
    elif service == "mapreduce":

        if action == "start":
            # start/stop ResourceManager
            logger.info("[Service] %s ResourceManager at %s" % (action, mapreduce.getResourceManager().host))
            cmd = "%s --config %s --script %s %s resourcemanager" % (dameon_script, conf_dir, yarn_script, action)
            command.execute_remote(user, mapreduce.getResourceManager().host, cmd)
        # start/stop NodeManager
        for node in mapreduce.getNodeManagers():
            logger.info("[Service] %s NodeManager at %s" % (action, node.host))
            cmd = "%s --config %s --script %s %s nodemanager" % (dameon_script, conf_dir, yarn_script, action)
            command.execute_remote(user, node.host, cmd)
        if action == "stop":
            logger.info("[Service] %s ResourceManager at %s" % (action, mapreduce.getResourceManager().host))
            cmd = "%s --config %s --script %s %s resourcemanager" % (dameon_script, conf_dir, yarn_script, action)
            command.execute_remote(user, mapreduce.getResourceManager().host, cmd)
    elif service == "hdfs":
        if action == "format":
            logger.info("[Service] %s NameNode at %s" % (action, hdfs.getNameNode().host))
            cmd = "%s --config %s namenode -format" % (hdfs_script, conf_dir)
            command.execute_remote(user, hdfs.getNameNode().host, cmd)
            for node in hdfs.getDataNodes():
                logger.info("[Service] %s DataNode at %s" % (action, node.host))
                # turnaround soultion, hadoop_runtime should be configurable
                command.execute_remote(user, node.host, "rm -rf ~/hadoop_runtime/hdfs/datanode/*")
        else:
            if action == "start":
                logger.info("[Service] %s NameNode at %s" % (action, hdfs.getNameNode().host))
                cmd = "%s --config %s --script %s %s namenode" % (dameon_script, conf_dir, hdfs_script, action)
                # start/stop NameNode
                command.execute_remote(user, hdfs.getNameNode().host, cmd)

            # start/stop DataNode
            for node in hdfs.getDataNodes():
                logger.info("[Service] %s DataNode at %s" % (action, node.host))
                cmd = "%s --config %s --script %s %s datanode" % (dameon_script, conf_dir, hdfs_script, action)
                command.execute_remote(user, node.host, cmd)

            if action == "stop":
                logger.info("[Service] %s NameNode at %s" % (action, hdfs.getNameNode().host))
                cmd = "%s --config %s --script %s %s namenode" % (dameon_script, conf_dir, hdfs_script, action)
                command.execute_remote(user, hdfs.getNameNode().host, cmd)

    elif service == "historyserver":
        for node in mapreduce.getNodes():
            logger.info("[Service] %s HistoryServer at %s" % (action, node.host))
            cmd = "%s --config %s --script %s %s historyserver" % (dameon_script, conf_dir, mapreduce_script, action)
            command.execute_remote(user, node.host, cmd)
Esempio n. 4
0
	def start(self):
		cmd = "nohup dstat -tcly -mg --vm -dr -n --tcp --float --output %s > /dev/null 2>&1 &" % self.output_path
		self.kill_service()
		for node in self.cluster.getNodes():
			command.execute_remote(self.cluster.getUser(), node.host, cmd)
Esempio n. 5
0
	def kill_service(self):
		cmd = "ps aux | grep dstat | grep python | tr -s ' ' | cut -d' ' -f2 | xargs kill -9"
		for node in self.cluster.getNodes():
			command.execute_remote(self.cluster.getUser(), node.host, cmd)