def compress(reqPropsObj, parDic): """ Compress the file if required. reqPropsObj: NG/AMS request properties object (ngamsReqProps). parDic: Dictionary with parameters for the DAPI. This is generated with ngamsPlugInApi.parseDapiPlugInPars() (Dictionary). Returns: Tupe containing uncompressed filesize, archived filesize and the format (mime-type) of the resulting data file (tuple). """ stFn = reqPropsObj.getStagingFilename() uncomprSize = ngamsPlugInApi.getFileSize(stFn) mime = reqPropsObj.getMimeType() compression = parDic.get("compression") if _compress_data(parDic): logger.debug("Compressing file: %s using: %s", stFn, compression) # Compress *and* calculate checksum on compressed stream # The value crc_name depends on the server configuration and whether the # user requested a different variant (see ngamsArchiveUtils) gzip_name = '%s.gz' % stFn crc_info = None if 'crc_name' in reqPropsObj: crc_info = ngamsFileUtils.get_checksum_info( reqPropsObj['crc_name']) compress_start = time.time() with open(stFn, 'rb') as f: crc = ngamsLib.gzip_compress(f, gzip_name, 65536, crc_info=crc_info) compress_time = time.time() - compress_start reqPropsObj.setStagingFilename(gzip_name) rmFile(stFn) mime = 'application/x-gfits' compression = 'gzip --no-name' logger.debug("File compressed: %s Time: %.3fs", gzip_name, compress_time) else: compression = '' crc = None archFileSize = ngamsPlugInApi.getFileSize(reqPropsObj.getStagingFilename()) return uncomprSize, archFileSize, mime, compression, crc
def bbcpFile(srcFilename, targFilename, bparam, crc_name, skip_crc): """ Use bbcp tp copy file <srcFilename> to <targFilename> NOTE: This requires remote access to the host as well as a bbcp installation on both the remote and local host. """ logger.debug("Copying file: %s to filename: %s", srcFilename, targFilename) if bparam.port: pt = ['-Z', str(bparam.port)] else: pt = ['-z'] fw = [] if bparam.winsize: fw = ['-w', str(bparam.winsize)] ns = [] if (bparam.num_streams): ns = ['-s', str(bparam.num_streams)] # bypass password prompt with -oBatchMode=yes this implies you need keys # also disable kerberos-based (GSSAPI) authentication ssh_src = [ '-S', 'ssh -x -a -oBatchMode=yes -oGSSAPIAuthentication=no -oFallBackToRsh=no %4 %I -l %U %H bbcp' ] # perform checksum on host and compare to target. If it's different bbcp will fail. if not skip_crc and crc_name is not None: cmd_checksum = ['-e', '-E'] if crc_name in ('crc32', 'crc32z'): # c32z is the zip-flavor of CRC32 # c32 is the POSIX flavour, which yields a different result cmd_checksum.append('c32z=/dev/stdout') elif crc_name == 'crc32c': cmd_checksum.append('c32c=/dev/stdout') else: raise Exception("Unsupported checksum method in BBCP: %s" % (crc_name, )) cmd_list = ['bbcp', '-f', '-V'] + ssh_src + cmd_checksum + fw + ns + [ '-P', '2' ] + pt + [srcFilename, targFilename] logger.info("Executing external command: %s", subprocess.list2cmdline(cmd_list)) p1 = subprocess.Popen(cmd_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p1.communicate() if p1.returncode != 0: args = (p1.returncode, subprocess.list2cmdline(cmd_list), out, err) raise Exception( "bbcp returncode: %d. Command line: [%r], out: %s, err: %s" % args) # extract c32 zip variant checksum from output and convert to signed 32 bit integer crc_info = ngamsFileUtils.get_checksum_info(crc_name) checksum_bytes = codecs.decode(out.split(b' ')[2], 'hex') bbcp_checksum = crc_info.from_bytes(checksum_bytes) logger.info( 'BBCP final message: %s', out.split(b'\n')[-2]) # e.g. "1 file copied at effectively 18.9 MB/s" logger.info("File: %s copied to filename: %s", srcFilename, targFilename) return str(bbcp_checksum)
def save_to_file(ngams_server, request_properties, target_filename, block_size, start_byte): """ Save the data available on an HTTP channel into the given file :param ngams_server: Reference to NG/AMS server class object (ngamsServer) :param request_properties: NG/AMS Request Properties object (ngamsReqProps) :param target_filename: Target name for file where data will be written (string) :param block_size: Block size (bytes) to apply when reading the data from the HTTP channel (integer) :param start_byte: Start byte offset :return: Tuple. Element 0: Time in took to write file (s) (tuple) """ disk_id = request_properties.fileinfo['diskId'] source_host = request_properties.fileinfo['sourceHost'] host_id = request_properties.fileinfo['hostId'] file_version = request_properties.fileinfo['fileVersion'] file_id = request_properties.fileinfo['fileId'] checksum = request_properties.checksum crc_variant = request_properties.checksum_plugin host, port = source_host.split(":") parameter_list = [('disk_id', disk_id), ('host_id', host_id), ('quick_location', '1'), ('file_version', file_version), ('file_id', file_id)] header_dict = {'Range': "bytes={:d}-".format(start_byte)} rx_timeout = 30 * 60 if ngams_server.getCfg().getVal("Mirroring[1].rx_timeout"): rx_timeout = int( ngams_server.getCfg().getVal("Mirroring[1].rx_timeout")) url = 'http://{0}:{1}/{2}'.format(host, port, NGAMS_RETRIEVE_CMD) authorization_header = ngamsSrvUtils.genIntAuthHdr(ngams_server) response = ngamsHttpUtils.httpGetUrl(url, parameter_list, header_dict, rx_timeout, authorization_header) # Can we resume a previous download? download_resume_supported = 'bytes' in response.getheader( "Accept-Ranges", '') logger.debug("Creating path: %s", target_filename) checkCreatePath(os.path.dirname(target_filename)) logger.info('Fetching file ID %s, checksum %s, checksum variant %s', file_id, checksum, crc_variant) crc_info = ngamsFileUtils.get_checksum_info(crc_variant) if start_byte != 0: logger.info("Resume requested from start byte %d", start_byte) if start_byte != 0 and download_resume_supported: logger.info( "Resume requested and mirroring source supports resume. Appending data to previous staging file" ) crc = ngamsFileUtils.get_checksum(65536, target_filename, crc_variant) request_properties.setBytesReceived(start_byte) fd_out = open(target_filename, "ab") else: if start_byte > 0: logger.info( "Resume of download requested but server does not support it. Starting from byte 0 again." ) fd_out = open(target_filename, "wb") crc = crc_info.init fetch_start_time = time.time() # Distinguish between archive pull and push request # By archive pull we may simply read the file descriptor until it returns and empty string response_header_dict = {h[0].lower(): h[1] for h in response.getheaders()} if 'content-length' in response_header_dict: remaining_size = int(response_header_dict['content-length']) logger.debug("Got Content-Length header value %d in response", remaining_size) else: logger.warning( "No Content-Length header found in response. Defaulting to 1e11") remaining_size = int(1e11) # Receive the data read_size = block_size crc_duration = 0 read_duration = 0 write_duration = 0 read_total_bytes = 0 crc_method = crc_info.method with contextlib.closing(response), contextlib.closing(fd_out): while remaining_size > 0: if remaining_size < read_size: read_size = remaining_size # Read the remote file read_start_time = time.time() data_buffer = response.read(read_size) read_duration += time.time() - read_start_time size_read = len(data_buffer) read_total_bytes += size_read if size_read == 0: raise ngamsFailedDownloadException.FailedDownloadException( "server is unreachable") # CRC crc_start_time = time.time() crc = crc_method(data_buffer, crc) crc_duration += time.time() - crc_start_time remaining_size -= size_read request_properties.setBytesReceived( request_properties.getBytesReceived() + size_read) # Write the file onto disk write_start_time = time.time() fd_out.write(data_buffer) write_duration += time.time() - write_start_time crc = crc_info.final(crc) fetch_duration = time.time() - fetch_start_time # Avoid divide by zeros later on, let's say it took us 1 [us] to do this if fetch_duration == 0.0: fetch_duration = 0.000001 msg = "Saved data in file: %s. Bytes received: %d. Time: %.3f s. Rate: %.2f Bytes/s" logger.info( msg, target_filename, int(request_properties.getBytesReceived()), fetch_duration, (float(request_properties.getBytesReceived()) / fetch_duration)) # Raise exception if bytes received were less than expected if remaining_size != 0: msg = "Not all expected data arrived, {:d} bytes left to read".format( remaining_size) raise ngamsFailedDownloadException.FailedDownloadException(msg) # Now check the freshly calculated CRC value against the stored CRC value logger.info('Source checksum: %s - received checksum: %d', checksum, crc) if not crc_info.equals(checksum, crc): msg = "checksum mismatch: source={:s}, received={:d}".format( checksum, crc) raise ngamsFailedDownloadException.FailedDownloadException(msg) return archiving_results(read_total_bytes, read_duration, write_duration, crc_duration, fetch_duration, crc_variant, crc)
def saveToFile(srvObj, ngamsCfgObj, reqPropsObj, trgFilename, blockSize, startByte): """ Save the data available on an HTTP channel into the given file. ngamsCfgObj: NG/AMS Configuration object (ngamsConfig). reqPropsObj: NG/AMS Request Properties object (ngamsReqProps). trgFilename: Target name for file where data will be written (string). blockSize: Block size (bytes) to apply when reading the data from the HTTP channel (integer). mutexDiskAccess: Require mutual exclusion for disk access (integer). diskInfoObj: Disk info object. Only needed if mutual exclusion is required for disk access (ngamsDiskInfo). Returns: Tuple. Element 0: Time in took to write file (s) (tuple). """ disk_id = reqPropsObj.fileinfo['diskId'] source_host = reqPropsObj.fileinfo['sourceHost'] host_id = reqPropsObj.fileinfo['hostId'] file_version = reqPropsObj.fileinfo['fileVersion'] file_id = reqPropsObj.fileinfo['fileId'] host, port = source_host.split(":") pars = { 'disk_id': disk_id, 'host_id': host_id, 'quick_location': '1', 'file_version': file_version, 'file_id': file_id } hdrs = {'Range': str(startByte) + '-'} rx_timeout = 30 * 60 if srvObj.getCfg().getVal("Mirroring[1].rx_timeout"): rx_timeout = int(srvObj.getCfg().getVal("Mirroring[1].rx_timeout")) response = ngamsHttpUtils.httpGet(host, int(port), 'RETRIEVE', pars=pars, hdrs=hdrs, timeout=rx_timeout) # can we resume a previous download? downloadResumeSupported = 'bytes' in response.getheader("Accept-Ranges", '') logger.debug("Creating path: %s", trgFilename) checkCreatePath(os.path.dirname(trgFilename)) crc_info = ngamsFileUtils.get_checksum_info('crc32') if startByte != 0: logger.info("resume requested") if startByte != 0 and downloadResumeSupported: logger.info("Resume requested and mirroring source supports resume. Appending data to previously started staging file") crc = ngamsFileUtils.get_checksum(65536, trgFilename, 'crc32') reqPropsObj.setBytesReceived(startByte) fdOut = open(trgFilename, "a") else: if (startByte > 0): logger.info("Resume of download requested but server does not support it. Starting from byte 0 again.") fdOut = open(trgFilename, "w") crc = crc_info.init start = time.time() # Distinguish between Archive Pull and Push Request. By Archive # Pull we may simply read the file descriptor until it returns "". logger.info("It is an HTTP Archive Pull Request: trying to get Content-length") hdrs = {h[0]: h[1] for h in response.getheaders()} if hdrs.has_key('content-length'): remSize = int(hdrs['content-length']) else: logger.warning("Non Content-Lenght header found, defaulting to 1e11") remSize = int(1e11) # Receive the data. buf = "-" rdSize = blockSize crc_m = crc_info.method with contextlib.closing(response), contextlib.closing(fdOut): while (remSize > 0): if (remSize < rdSize): rdSize = remSize buf = response.read(rdSize) sizeRead = len(buf) if sizeRead == 0: raise Exception("server is unreachable") else: crc = crc_m(buf, crc) remSize -= sizeRead reqPropsObj.setBytesReceived(reqPropsObj.getBytesReceived() +\ sizeRead) fdOut.write(buf) crc = crc_info.final(crc) deltaTime = time.time() - start msg = "Saved data in file: %s. Bytes received: %d. Time: %.3f s. " +\ "Rate: %.2f Bytes/s" logger.info(msg, trgFilename, int(reqPropsObj.getBytesReceived()), deltaTime, (float(reqPropsObj.getBytesReceived()) / deltaTime)) # Raise exception if less byes were received as expected. if (remSize != 0): msg = "No all expected data arrived, %d bytes left to read" % (remSize,) raise ngamsFailedDownloadException.FailedDownloadException(msg) # now check the CRC value against what we expected sourceChecksum = reqPropsObj.checksum logger.info('source checksum: %s - current checksum: %d', str(sourceChecksum), crc) if (crc != int(sourceChecksum)): msg = "checksum mismatch: source=" + str(sourceChecksum) + ", received: " + str(crc) raise ngamsFailedDownloadException.FailedDownloadException(msg) return [deltaTime,crc]