Esempio n. 1
0
 def hide_link_files(nodes, data_dir):
     with parallel.CommandAgent(concurrency=len(nodes),
                                show_result=False) as agent:
         cmd = "find " + data_dir + " -type l | grep roxie | sudo xargs -I {} unlink {}"
         # logger.info(cmd)
         agent.submit_remote_commands(nodes, cmd, silent=True)
Esempio n. 2
0
 def hide_files(nodes, data_dir):
     with parallel.CommandAgent(concurrency=len(nodes),
                                show_result=False) as agent:
         cmd = "for d in `find " + data_dir + " -type d`; do echo $d; ls -F $d | grep -v '[/@=|]$' | sudo xargs -I {} mv $d/{} $d/.{}; done"
         agent.submit_remote_commands(nodes, cmd, silent=True)
Esempio n. 3
0
def switch_data_placement(data_placement,
                          data_dir="/var/lib/HPCCSystems/hpcc-data/roxie",
                          storage_type='local'):

    logger = logging.getLogger('.'.join([__name__, "switch_data_placement"]))
    logger.info("Executing data placement")

    def hide_files(nodes, data_dir):
        with parallel.CommandAgent(concurrency=len(nodes),
                                   show_result=False) as agent:
            cmd = "for d in `find " + data_dir + " -type d`; do echo $d; ls -F $d | grep -v '[/@=|]$' | sudo xargs -I {} mv $d/{} $d/.{}; done"
            agent.submit_remote_commands(nodes, cmd, silent=True)

    def hide_link_files(nodes, data_dir):
        with parallel.CommandAgent(concurrency=len(nodes),
                                   show_result=False) as agent:
            cmd = "find " + data_dir + " -type l | grep roxie | sudo xargs -I {} unlink {}"
            # logger.info(cmd)
            agent.submit_remote_commands(nodes, cmd, silent=True)

    def hide_link_files2(nodes, data_dir):
        with parallel.CommandAgent(concurrency=len(nodes),
                                   show_result=False) as agent:
            cmd = "for d in `find " + data_dir + " -type d | grep roxie`; do echo $d; ls $d | grep sorted | sudo xargs -I {} mv $d/{} $d/.{}; done"
            #logger.info(cmd)
            agent.submit_remote_commands(nodes, cmd, silent=True)

    def show_index_files(nodes, data_dir):
        with parallel.CommandAgent(concurrency=len(nodes),
                                   show_result=False) as agent:
            cmd = "for d in `find " + data_dir + " -type d`; do echo $d; ls -a $d | grep '^\.idx' | cut -c 2- | xargs -I {} sudo mv $d/.{} $d/{}; done"
            agent.submit_remote_commands(nodes, cmd, silent=True)

    def get_hidden_partition(partition):
        return os.path.dirname(partition) + "/." + os.path.basename(partition)

    def hide_files_nfs(nodes, data_dir):
        with parallel.CommandAgent(concurrency=len(nodes),
                                   show_result=False) as agent:
            for node in nodes:
                node_data_dir = os.path.join(
                    data_dir, node, 'roxie')  # default = /dataset/ip/roxie
                cmd = "for d in `find " + node_data_dir + " -type d`; do echo $d; ls -F $d | grep -v '[/@=|]$' | sudo xargs -I {} mv $d/{} $d/.{}; done"
                #execute(cmd)
                agent.submit_command(cmd)

    def show_index_files_nfs(nodes, data_dir):
        with parallel.CommandAgent(concurrency=len(nodes),
                                   show_result=False) as agent:
            for node in nodes:
                node_data_dir = os.path.join(
                    data_dir, node, 'roxie')  # default = /dataset/ip/roxie
                cmd = "for d in `find " + node_data_dir + " -type d`; do echo $d; ls -a $d | grep '^\.idx' | cut -c 2- | xargs -I {} sudo mv $d/.{} $d/{}; done"
                #execute(cmd)
                agent.submit_command(cmd)

    def modify_nfs_path(node_ip, file_path):
        return os.path.join("/",
                            file_path.split('/')[1], node_ip,
                            *file_path.split('/')[2:])

    logger.info("Data storage type is {}".format(storage_type))
    logger.info("Data dir is {}".format(data_dir))

    if storage_type == 'nfs':
        logger.info("Hiding all data files")
        hide_files_nfs(data_placement.locations.keys(), data_dir=data_dir)
        logger.info("Showing all index files")
        show_index_files_nfs(data_placement.locations.keys(),
                             data_dir=data_dir)
        logger.info("Showing necessary data files")
        for node, partition_list in data_placement.locations.items():
            for partition in set(partition_list):
                partition_on_nfs = modify_nfs_path(node, partition)
                execute("sudo mv {} {}".format(
                    get_hidden_partition(partition_on_nfs), partition_on_nfs))
    elif storage_type == 'local_link':
        logger.info("Hiding all data files")
        hide_link_files(data_placement.locations.keys(), data_dir=data_dir)
        # logger.info("Showing all index files")
        # show_index_files(data_placement.locations.keys(), data_dir=data_dir)
        logger.info("Showing necessary data files")
        with parallel.CommandAgent(concurrency=8, show_result=False) as agent:
            for node, partition_list in data_placement.locations.items():
                for partition in set(partition_list):
                    if partition.startswith('/dataset'):
                        partition_rename = partition.replace(
                            "/dataset", data_dir)
                        # workaround
                        agent.submit_remote_command(
                            node,
                            "sudo ln -s /{}/roxie/mybenchmark/.data_sorted_people_firstname_0._1_of_1 {}"
                            .format(data_dir, partition_rename),
                            capture=False,
                            silent=True)
    elif storage_type == 'local_link_16':
        # hard coded here
        logger.info("Hiding all data files")
        hide_link_files(data_placement.locations.keys(), data_dir=data_dir)
        # no need because all index files are copied
        # logger.info("Showing all index files")
        # show_index_files(data_placement.locations.keys(), data_dir=data_dir)
        logger.info("Showing necessary data files")
        with parallel.CommandAgent(concurrency=8, show_result=False) as agent:
            for node, partition_list in data_placement.locations.items():
                for partition in set(partition_list):
                    if partition.startswith('/dataset'):
                        partition_rename = partition.replace(
                            "/dataset", data_dir)
                        partition_id = int(
                            partition_rename.split('.')[-2].split('_')[-1])

                        agent.submit_remote_command(
                            node,
                            "sudo mv {} {}".format(
                                get_hidden_partition(partition_rename),
                                partition_rename),
                            capture=False,
                            silent=True)

    else:
        logger.info("Hiding all data files")
        hide_files(data_placement.locations.keys(), data_dir=data_dir)
        logger.info("Showing all index files")
        show_index_files(data_placement.locations.keys(), data_dir=data_dir)
        import sys
        sys.exit(0)
        logger.info("Showing necessary data files")
        with parallel.CommandAgent(concurrency=8, show_result=False) as agent:
            for node, partition_list in data_placement.locations.items():
                #logger.info("Host: {}".format(node))
                # remove duplicate partition to support monochromatic
                #logger.info(partition_list)
                for partition in set(partition_list):
                    #logger.info("\tpartition={}".format(partition))
                    agent.submit_remote_command(
                        node,
                        "sudo mv {} {}".format(get_hidden_partition(partition),
                                               partition),
                        capture=False,
                        silent=True)
Esempio n. 4
0
def restore_data_placement(nodes,
                           data_dir="/var/lib/HPCCSystems/hpcc-data/roxie"):
    with parallel.CommandAgent(concurrency=len(nodes),
                               show_result=False) as agent:
        cmd = "for d in `find " + data_dir + " -type d`; do echo $d; ls -a $d | grep of | grep '^\.' | cut -c 2- | xargs -I {} sudo mv $d/.{} $d/{}; done"
        agent.submit_remote_commands(nodes, cmd, silent=True)
Esempio n. 5
0
 def show_index_files(nodes, data_dir):
     with parallel.CommandAgent(concurrency=len(nodes),
                                show_result=False) as agent:
         cmd = "for d in `find " + data_dir + " -type d`; do echo $d; ls -a $d | grep '^\.idx' | cut -c 2- | xargs -I {} sudo mv $d/.{} $d/{}; done"
         agent.submit_remote_commands(nodes, cmd, silent=True)
Esempio n. 6
0
 def kill_service(self):
     cmd_kill = "ps aux | grep dstat | grep python | tr -s ' ' | cut -d' ' -f2 | xargs kill -9"
     with parallel.CommandAgent(len(self.cluster.get_nodes())) as agent:
         for node in self.cluster.get_nodes():
             agent.submit_remote_command(node, cmd_kill, check=False)