def process_mwax_stats( logger, mwax_stats_executable: str, full_filename: str, numa_node: int, timeout: int, stats_dump_dir: str, ) -> bool: # This code will execute the mwax stats command obs_id = str(os.path.basename(full_filename)[0:10]) cmd = (f"{mwax_stats_executable} -t {full_filename} -m" f" /vulcan/metafits/{obs_id}_metafits.fits -o {stats_dump_dir}") logger.info(f"{full_filename}- attempting to run stats: {cmd}") start_time = time.time() return_value, stdout = mwax_command.run_command_ext( logger, cmd, numa_node, timeout) elapsed = time.time() - start_time if return_value: logger.info(f"{full_filename} stats success in {elapsed} seconds") return return_value
def run_command(self, filename: str) -> bool: command = f"{self._executable_path}" # Substitute the filename into the command command = command.replace(mwax_mover.FILE_REPLACEMENT_TOKEN, filename) filename_no_ext = os.path.splitext(filename)[0] command = command.replace(mwax_mover.FILENOEXT_REPLACEMENT_TOKEN, filename_no_ext) return mwax_command.run_command_ext(self.logger, command, -1, 60, True)
def archive_file_rsync( logger, full_filename: str, archive_numa_node: int, archive_destination_host: str, archive_destination_path: str, timeout: int, ): logger.debug(f"{full_filename} attempting archive_file_rsync...") # get file size try: file_size = os.path.getsize(full_filename) except Exception as e: logger.error( f"{full_filename}: Error determining file size. Error {e}") return False # Build final command line # --no-compress ensures we don't try to compress (it's going to be quite # uncompressible) # The -e "xxx" is there to remove as much encryption/compression of the # ssh connection as possible to speed up the xfer cmdline = ("rsync --no-compress -e 'ssh -T -c aes128-cbc -o" " StrictHostKeyChecking=no -o Compression=no -x ' " f"-r {full_filename} {archive_destination_host}:" f"{archive_destination_path}") start_time = time.time() # run xrdcp return_val, stdout = mwax_command.run_command_ext(logger, cmdline, archive_numa_node, timeout, False) if return_val: elapsed = time.time() - start_time size_gigabytes = float(file_size) / (1000.0 * 1000.0 * 1000.0) gbps_per_sec = (size_gigabytes * 8) / elapsed logger.info(f"{full_filename} archive_file_rsync success" f" ({size_gigabytes:.3f}GB in {elapsed:.3f} seconds at" f" {gbps_per_sec:.3f} Gbps)") return True else: return False
def do_checksum_md5(logger, full_filename: str, numa_node: int, timeout: int) -> str: # default output of md5 hash command is: # "5ce49e5ebd72c41a1d70802340613757 # /visdata/incoming/1320133480_20211105074422_ch055_000.fits" md5output = "" checksum = "" logger.info(f"{full_filename}- running md5sum...") cmdline = f"md5sum {full_filename}" size = os.path.getsize(full_filename) start_time = time.time() return_value, md5output = mwax_command.run_command_ext( logger, cmdline, numa_node, timeout, False) elapsed = time.time() - start_time size_megabytes = size / (1000 * 1000) mb_per_sec = size_megabytes / elapsed if return_value: # the return value will contain a few spaces and then the filename # So remove the filename and then remove any whitespace checksum = md5output.replace(full_filename, "").rstrip() # MD5 hash is ALWAYS 32 characters if len(checksum) == 32: logger.info( f"{full_filename} md5sum success" f" {checksum} ({size_megabytes:.3f}MB in {elapsed} secs at" f" {mb_per_sec:.3f} MB/s)") return checksum else: raise Exception( f"Calculated MD5 checksum is not valid: md5 output {md5output}" ) else: raise Exception( f"md5sum returned an unexpected return code {return_value}")
def _copy_subfile_to_disk( self, filename: str, numa_node: int, destination_path: str, timeout: int, ) -> bool: self.logger.info(f"{filename}- Copying file into {destination_path}") command = f"cp {filename} {destination_path}/." start_time = time.time() retval, stdout = mwax_command.run_command_ext( self.logger, command, numa_node, timeout, False ) elapsed = time.time() - start_time if retval: self.logger.info( f"{filename}- Copying file into {destination_path} was" f" successful (took {elapsed} secs." ) return retval
def load_psrdada_ringbuffer(logger, full_filename: str, ringbuffer_key: str, numa_node, timeout: int) -> bool: logger.info( f"{full_filename}- attempting load_psrdada_ringbuffer {ringbuffer_key}" ) cmd = f"dada_diskdb -k {ringbuffer_key} -f {full_filename}" size = os.path.getsize(full_filename) start_time = time.time() return_value, stdout = mwax_command.run_command_ext( logger, cmd, numa_node, timeout) elapsed = time.time() - start_time size_gigabytes = size / (1000 * 1000 * 1000) gbps_per_sec = (size_gigabytes * 8) / elapsed if return_value: logger.info(f"{full_filename} load_psrdada_ringbuffer success" f" ({size_gigabytes:.3f}GB in {elapsed} sec at" f" {gbps_per_sec:.3f} Gbps)") return return_value
def archive_file_xrootd( logger, full_filename: str, archive_numa_node: int, archive_destination_host: str, timeout: int, ): logger.debug(f"{full_filename} attempting archive_file_xrootd...") # get file size try: file_size = os.path.getsize(full_filename) except Exception as e: logger.error( f"{full_filename}: Error determining file size. Error {e}") return False # Gather some info for later filename = os.path.basename(full_filename) temp_filename = f"{filename}.part{uuid.uuid4()}" # Archive destination host looks like: "192.168.120.110://volume2/incoming" # so just get the bit before the ":" for the host and the bit after for # the path destination_host = archive_destination_host.split(":")[0] destination_path = archive_destination_host.split(":")[1] full_destination_temp_filename = os.path.join(destination_path, temp_filename) full_destination_final_filename = os.path.join(destination_path, filename) # Build final command line # # --posc = persist on successful copy. If copy fails either remove # the file or set it to 0 bytes. Setting to 0 bytes is # weird, but I'll take it # --rm-bad-cksum = Delete dest file if checksums do not match # cmdline = ( "/usr/local/bin/xrdcp --cksum adler32 --posc --rm-bad-cksum --silent" " --streams 2 --tlsnodata" f" {full_filename} xroot://{archive_destination_host}/{temp_filename}") start_time = time.time() # run xrdcp return_val, stdout = mwax_command.run_command_ext(logger, cmdline, archive_numa_node, timeout, False) if return_val: elapsed = time.time() - start_time size_gigabytes = float(file_size) / (1000.0 * 1000.0 * 1000.0) gbps_per_sec = (size_gigabytes * 8) / elapsed logger.info(f"{full_filename} archive_file_xrootd success" f" ({size_gigabytes:.3f}GB in {elapsed:.3f} seconds at" f" {gbps_per_sec:.3f} Gbps)") cmdline = ( f"ssh -o StrictHostKeyChecking=no mwa@{destination_host} 'mv" f" {full_destination_temp_filename}" f" {full_destination_final_filename}'") # run the mv command to rename the temp file to the final file # If this works, then mwacache will actually do its thing return_val, stdout = mwax_command.run_command_ext( logger, cmdline, archive_numa_node, timeout, False) if return_val: logger.info( f"{full_filename} archive_file_xrootd successfully renamed" f" {full_destination_temp_filename} to" f" {full_destination_final_filename} on the remote host" f" {destination_host}") return True else: return False else: return False