def hide_link_files(nodes, data_dir): with parallel.CommandAgent(concurrency=len(nodes), show_result=False) as agent: cmd = "find " + data_dir + " -type l | grep roxie | sudo xargs -I {} unlink {}" # logger.info(cmd) agent.submit_remote_commands(nodes, cmd, silent=True)
def hide_files(nodes, data_dir): with parallel.CommandAgent(concurrency=len(nodes), show_result=False) as agent: cmd = "for d in `find " + data_dir + " -type d`; do echo $d; ls -F $d | grep -v '[/@=|]$' | sudo xargs -I {} mv $d/{} $d/.{}; done" agent.submit_remote_commands(nodes, cmd, silent=True)
def switch_data_placement(data_placement, data_dir="/var/lib/HPCCSystems/hpcc-data/roxie", storage_type='local'): logger = logging.getLogger('.'.join([__name__, "switch_data_placement"])) logger.info("Executing data placement") def hide_files(nodes, data_dir): with parallel.CommandAgent(concurrency=len(nodes), show_result=False) as agent: cmd = "for d in `find " + data_dir + " -type d`; do echo $d; ls -F $d | grep -v '[/@=|]$' | sudo xargs -I {} mv $d/{} $d/.{}; done" agent.submit_remote_commands(nodes, cmd, silent=True) def hide_link_files(nodes, data_dir): with parallel.CommandAgent(concurrency=len(nodes), show_result=False) as agent: cmd = "find " + data_dir + " -type l | grep roxie | sudo xargs -I {} unlink {}" # logger.info(cmd) agent.submit_remote_commands(nodes, cmd, silent=True) def hide_link_files2(nodes, data_dir): with parallel.CommandAgent(concurrency=len(nodes), show_result=False) as agent: cmd = "for d in `find " + data_dir + " -type d | grep roxie`; do echo $d; ls $d | grep sorted | sudo xargs -I {} mv $d/{} $d/.{}; done" #logger.info(cmd) agent.submit_remote_commands(nodes, cmd, silent=True) def show_index_files(nodes, data_dir): with parallel.CommandAgent(concurrency=len(nodes), show_result=False) as agent: cmd = "for d in `find " + data_dir + " -type d`; do echo $d; ls -a $d | grep '^\.idx' | cut -c 2- | xargs -I {} sudo mv $d/.{} $d/{}; done" agent.submit_remote_commands(nodes, cmd, silent=True) def get_hidden_partition(partition): return os.path.dirname(partition) + "/." + os.path.basename(partition) def hide_files_nfs(nodes, data_dir): with parallel.CommandAgent(concurrency=len(nodes), show_result=False) as agent: for node in nodes: node_data_dir = os.path.join( data_dir, node, 'roxie') # default = /dataset/ip/roxie cmd = "for d in `find " + node_data_dir + " -type d`; do echo $d; ls -F $d | grep -v '[/@=|]$' | sudo xargs -I {} mv $d/{} $d/.{}; done" #execute(cmd) agent.submit_command(cmd) def show_index_files_nfs(nodes, data_dir): with parallel.CommandAgent(concurrency=len(nodes), show_result=False) as agent: for node in nodes: node_data_dir = os.path.join( data_dir, node, 'roxie') # default = /dataset/ip/roxie cmd = "for d in `find " + node_data_dir + " -type d`; do echo $d; ls -a $d | grep '^\.idx' | cut -c 2- | xargs -I {} sudo mv $d/.{} $d/{}; done" #execute(cmd) agent.submit_command(cmd) def modify_nfs_path(node_ip, file_path): return os.path.join("/", file_path.split('/')[1], node_ip, *file_path.split('/')[2:]) logger.info("Data storage type is {}".format(storage_type)) logger.info("Data dir is {}".format(data_dir)) if storage_type == 'nfs': logger.info("Hiding all data files") hide_files_nfs(data_placement.locations.keys(), data_dir=data_dir) logger.info("Showing all index files") show_index_files_nfs(data_placement.locations.keys(), data_dir=data_dir) logger.info("Showing necessary data files") for node, partition_list in data_placement.locations.items(): for partition in set(partition_list): partition_on_nfs = modify_nfs_path(node, partition) execute("sudo mv {} {}".format( get_hidden_partition(partition_on_nfs), partition_on_nfs)) elif storage_type == 'local_link': logger.info("Hiding all data files") hide_link_files(data_placement.locations.keys(), data_dir=data_dir) # logger.info("Showing all index files") # show_index_files(data_placement.locations.keys(), data_dir=data_dir) logger.info("Showing necessary data files") with parallel.CommandAgent(concurrency=8, show_result=False) as agent: for node, partition_list in data_placement.locations.items(): for partition in set(partition_list): if partition.startswith('/dataset'): partition_rename = partition.replace( "/dataset", data_dir) # workaround agent.submit_remote_command( node, "sudo ln -s /{}/roxie/mybenchmark/.data_sorted_people_firstname_0._1_of_1 {}" .format(data_dir, partition_rename), capture=False, silent=True) elif storage_type == 'local_link_16': # hard coded here logger.info("Hiding all data files") hide_link_files(data_placement.locations.keys(), data_dir=data_dir) # no need because all index files are copied # logger.info("Showing all index files") # show_index_files(data_placement.locations.keys(), data_dir=data_dir) logger.info("Showing necessary data files") with parallel.CommandAgent(concurrency=8, show_result=False) as agent: for node, partition_list in data_placement.locations.items(): for partition in set(partition_list): if partition.startswith('/dataset'): partition_rename = partition.replace( "/dataset", data_dir) partition_id = int( partition_rename.split('.')[-2].split('_')[-1]) agent.submit_remote_command( node, "sudo mv {} {}".format( get_hidden_partition(partition_rename), partition_rename), capture=False, silent=True) else: logger.info("Hiding all data files") hide_files(data_placement.locations.keys(), data_dir=data_dir) logger.info("Showing all index files") show_index_files(data_placement.locations.keys(), data_dir=data_dir) import sys sys.exit(0) logger.info("Showing necessary data files") with parallel.CommandAgent(concurrency=8, show_result=False) as agent: for node, partition_list in data_placement.locations.items(): #logger.info("Host: {}".format(node)) # remove duplicate partition to support monochromatic #logger.info(partition_list) for partition in set(partition_list): #logger.info("\tpartition={}".format(partition)) agent.submit_remote_command( node, "sudo mv {} {}".format(get_hidden_partition(partition), partition), capture=False, silent=True)
def restore_data_placement(nodes, data_dir="/var/lib/HPCCSystems/hpcc-data/roxie"): with parallel.CommandAgent(concurrency=len(nodes), show_result=False) as agent: cmd = "for d in `find " + data_dir + " -type d`; do echo $d; ls -a $d | grep of | grep '^\.' | cut -c 2- | xargs -I {} sudo mv $d/.{} $d/{}; done" agent.submit_remote_commands(nodes, cmd, silent=True)
def show_index_files(nodes, data_dir): with parallel.CommandAgent(concurrency=len(nodes), show_result=False) as agent: cmd = "for d in `find " + data_dir + " -type d`; do echo $d; ls -a $d | grep '^\.idx' | cut -c 2- | xargs -I {} sudo mv $d/.{} $d/{}; done" agent.submit_remote_commands(nodes, cmd, silent=True)
def kill_service(self): cmd_kill = "ps aux | grep dstat | grep python | tr -s ' ' | cut -d' ' -f2 | xargs kill -9" with parallel.CommandAgent(len(self.cluster.get_nodes())) as agent: for node in self.cluster.get_nodes(): agent.submit_remote_command(node, cmd_kill, check=False)