Ejemplo n.º 1
0
def compress(reqPropsObj, parDic):
    """
    Compress the file if required.

    reqPropsObj:  NG/AMS request properties object (ngamsReqProps).

    parDic:       Dictionary with parameters for the DAPI. This is generated
                  with ngamsPlugInApi.parseDapiPlugInPars() (Dictionary).

    Returns:      Tupe containing uncompressed filesize, archived filesize
                  and the format (mime-type) of the resulting data file
                  (tuple).
    """
    stFn = reqPropsObj.getStagingFilename()
    uncomprSize = ngamsPlugInApi.getFileSize(stFn)
    mime = reqPropsObj.getMimeType()
    compression = parDic.get("compression")

    if _compress_data(parDic):
        logger.debug("Compressing file: %s using: %s", stFn, compression)

        # Compress *and* calculate checksum on compressed stream
        # The value crc_name depends on the server configuration and whether the
        # user requested a different variant (see ngamsArchiveUtils)
        gzip_name = '%s.gz' % stFn
        crc_info = None
        if 'crc_name' in reqPropsObj:
            crc_info = ngamsFileUtils.get_checksum_info(
                reqPropsObj['crc_name'])

        compress_start = time.time()
        with open(stFn, 'rb') as f:
            crc = ngamsLib.gzip_compress(f,
                                         gzip_name,
                                         65536,
                                         crc_info=crc_info)
        compress_time = time.time() - compress_start

        reqPropsObj.setStagingFilename(gzip_name)
        rmFile(stFn)
        mime = 'application/x-gfits'
        compression = 'gzip --no-name'

        logger.debug("File compressed: %s Time: %.3fs", gzip_name,
                     compress_time)
    else:
        compression = ''
        crc = None

    archFileSize = ngamsPlugInApi.getFileSize(reqPropsObj.getStagingFilename())

    return uncomprSize, archFileSize, mime, compression, crc
Ejemplo n.º 2
0
def bbcpFile(srcFilename, targFilename, bparam, crc_name, skip_crc):
    """
    Use bbcp tp copy file <srcFilename> to <targFilename>

    NOTE: This requires remote access to the host as well as
         a bbcp installation on both the remote and local host.
    """
    logger.debug("Copying file: %s to filename: %s", srcFilename, targFilename)

    if bparam.port:
        pt = ['-Z', str(bparam.port)]
    else:
        pt = ['-z']

    fw = []
    if bparam.winsize:
        fw = ['-w', str(bparam.winsize)]

    ns = []
    if (bparam.num_streams):
        ns = ['-s', str(bparam.num_streams)]

    # bypass password prompt with -oBatchMode=yes this implies you need keys
    # also disable kerberos-based (GSSAPI) authentication
    ssh_src = [
        '-S',
        'ssh -x -a -oBatchMode=yes -oGSSAPIAuthentication=no -oFallBackToRsh=no %4 %I -l %U %H bbcp'
    ]

    # perform checksum on host and compare to target. If it's different bbcp will fail.
    if not skip_crc and crc_name is not None:
        cmd_checksum = ['-e', '-E']
        if crc_name in ('crc32', 'crc32z'):
            # c32z is the zip-flavor of CRC32
            # c32 is the POSIX flavour, which yields a different result
            cmd_checksum.append('c32z=/dev/stdout')
        elif crc_name == 'crc32c':
            cmd_checksum.append('c32c=/dev/stdout')
        else:
            raise Exception("Unsupported checksum method in BBCP: %s" %
                            (crc_name, ))

    cmd_list = ['bbcp', '-f', '-V'] + ssh_src + cmd_checksum + fw + ns + [
        '-P', '2'
    ] + pt + [srcFilename, targFilename]

    logger.info("Executing external command: %s",
                subprocess.list2cmdline(cmd_list))

    p1 = subprocess.Popen(cmd_list,
                          stdout=subprocess.PIPE,
                          stderr=subprocess.PIPE)
    out, err = p1.communicate()
    if p1.returncode != 0:
        args = (p1.returncode, subprocess.list2cmdline(cmd_list), out, err)
        raise Exception(
            "bbcp returncode: %d. Command line: [%r], out: %s, err: %s" % args)

    # extract c32 zip variant checksum from output and convert to signed 32 bit integer
    crc_info = ngamsFileUtils.get_checksum_info(crc_name)
    checksum_bytes = codecs.decode(out.split(b' ')[2], 'hex')
    bbcp_checksum = crc_info.from_bytes(checksum_bytes)

    logger.info(
        'BBCP final message: %s',
        out.split(b'\n')[-2])  # e.g. "1 file copied at effectively 18.9 MB/s"
    logger.info("File: %s copied to filename: %s", srcFilename, targFilename)

    return str(bbcp_checksum)
Ejemplo n.º 3
0
def save_to_file(ngams_server, request_properties, target_filename, block_size,
                 start_byte):
    """
    Save the data available on an HTTP channel into the given file
    :param ngams_server: Reference to NG/AMS server class object (ngamsServer)
    :param request_properties: NG/AMS Request Properties object (ngamsReqProps)
    :param target_filename: Target name for file where data will be written (string)
    :param block_size: Block size (bytes) to apply when reading the data from the HTTP channel (integer)
    :param start_byte: Start byte offset
    :return: Tuple. Element 0: Time in took to write file (s) (tuple)
    """
    disk_id = request_properties.fileinfo['diskId']
    source_host = request_properties.fileinfo['sourceHost']
    host_id = request_properties.fileinfo['hostId']
    file_version = request_properties.fileinfo['fileVersion']
    file_id = request_properties.fileinfo['fileId']
    checksum = request_properties.checksum
    crc_variant = request_properties.checksum_plugin

    host, port = source_host.split(":")
    parameter_list = [('disk_id', disk_id), ('host_id', host_id),
                      ('quick_location', '1'), ('file_version', file_version),
                      ('file_id', file_id)]
    header_dict = {'Range': "bytes={:d}-".format(start_byte)}

    rx_timeout = 30 * 60
    if ngams_server.getCfg().getVal("Mirroring[1].rx_timeout"):
        rx_timeout = int(
            ngams_server.getCfg().getVal("Mirroring[1].rx_timeout"))

    url = 'http://{0}:{1}/{2}'.format(host, port, NGAMS_RETRIEVE_CMD)
    authorization_header = ngamsSrvUtils.genIntAuthHdr(ngams_server)
    response = ngamsHttpUtils.httpGetUrl(url, parameter_list, header_dict,
                                         rx_timeout, authorization_header)

    # Can we resume a previous download?
    download_resume_supported = 'bytes' in response.getheader(
        "Accept-Ranges", '')

    logger.debug("Creating path: %s", target_filename)
    checkCreatePath(os.path.dirname(target_filename))

    logger.info('Fetching file ID %s, checksum %s, checksum variant %s',
                file_id, checksum, crc_variant)
    crc_info = ngamsFileUtils.get_checksum_info(crc_variant)
    if start_byte != 0:
        logger.info("Resume requested from start byte %d", start_byte)

    if start_byte != 0 and download_resume_supported:
        logger.info(
            "Resume requested and mirroring source supports resume. Appending data to previous staging file"
        )
        crc = ngamsFileUtils.get_checksum(65536, target_filename, crc_variant)
        request_properties.setBytesReceived(start_byte)
        fd_out = open(target_filename, "ab")
    else:
        if start_byte > 0:
            logger.info(
                "Resume of download requested but server does not support it. Starting from byte 0 again."
            )
        fd_out = open(target_filename, "wb")
        crc = crc_info.init

    fetch_start_time = time.time()

    # Distinguish between archive pull and push request
    # By archive pull we may simply read the file descriptor until it returns and empty string
    response_header_dict = {h[0].lower(): h[1] for h in response.getheaders()}
    if 'content-length' in response_header_dict:
        remaining_size = int(response_header_dict['content-length'])
        logger.debug("Got Content-Length header value %d in response",
                     remaining_size)
    else:
        logger.warning(
            "No Content-Length header found in response. Defaulting to 1e11")
        remaining_size = int(1e11)

    # Receive the data
    read_size = block_size

    crc_duration = 0
    read_duration = 0
    write_duration = 0
    read_total_bytes = 0

    crc_method = crc_info.method
    with contextlib.closing(response), contextlib.closing(fd_out):
        while remaining_size > 0:
            if remaining_size < read_size:
                read_size = remaining_size

            # Read the remote file
            read_start_time = time.time()
            data_buffer = response.read(read_size)
            read_duration += time.time() - read_start_time
            size_read = len(data_buffer)
            read_total_bytes += size_read

            if size_read == 0:
                raise ngamsFailedDownloadException.FailedDownloadException(
                    "server is unreachable")

            # CRC
            crc_start_time = time.time()
            crc = crc_method(data_buffer, crc)
            crc_duration += time.time() - crc_start_time

            remaining_size -= size_read
            request_properties.setBytesReceived(
                request_properties.getBytesReceived() + size_read)

            # Write the file onto disk
            write_start_time = time.time()
            fd_out.write(data_buffer)
            write_duration += time.time() - write_start_time

    crc = crc_info.final(crc)

    fetch_duration = time.time() - fetch_start_time
    # Avoid divide by zeros later on, let's say it took us 1 [us] to do this
    if fetch_duration == 0.0:
        fetch_duration = 0.000001

    msg = "Saved data in file: %s. Bytes received: %d. Time: %.3f s. Rate: %.2f Bytes/s"
    logger.info(
        msg, target_filename, int(request_properties.getBytesReceived()),
        fetch_duration,
        (float(request_properties.getBytesReceived()) / fetch_duration))

    # Raise exception if bytes received were less than expected
    if remaining_size != 0:
        msg = "Not all expected data arrived, {:d} bytes left to read".format(
            remaining_size)
        raise ngamsFailedDownloadException.FailedDownloadException(msg)

    # Now check the freshly calculated CRC value against the stored CRC value
    logger.info('Source checksum: %s - received checksum: %d', checksum, crc)
    if not crc_info.equals(checksum, crc):
        msg = "checksum mismatch: source={:s}, received={:d}".format(
            checksum, crc)
        raise ngamsFailedDownloadException.FailedDownloadException(msg)

    return archiving_results(read_total_bytes, read_duration, write_duration,
                             crc_duration, fetch_duration, crc_variant, crc)
Ejemplo n.º 4
0
def saveToFile(srvObj,
                       ngamsCfgObj,
                       reqPropsObj,
                       trgFilename,
                       blockSize,
                       startByte):
    """
    Save the data available on an HTTP channel into the given file.

    ngamsCfgObj:     NG/AMS Configuration object (ngamsConfig).

    reqPropsObj:     NG/AMS Request Properties object (ngamsReqProps).

    trgFilename:     Target name for file where data will be
                     written (string).

    blockSize:       Block size (bytes) to apply when reading the data
                     from the HTTP channel (integer).

    mutexDiskAccess: Require mutual exclusion for disk access (integer).

    diskInfoObj:     Disk info object. Only needed if mutual exclusion
                     is required for disk access (ngamsDiskInfo).

    Returns:         Tuple. Element 0: Time in took to write
                     file (s) (tuple).
    """

    disk_id = reqPropsObj.fileinfo['diskId']
    source_host = reqPropsObj.fileinfo['sourceHost']
    host_id = reqPropsObj.fileinfo['hostId']
    file_version = reqPropsObj.fileinfo['fileVersion']
    file_id = reqPropsObj.fileinfo['fileId']

    host, port = source_host.split(":")
    pars = {
        'disk_id': disk_id,
        'host_id': host_id,
        'quick_location': '1',
        'file_version': file_version,
        'file_id': file_id
    }
    hdrs = {'Range': str(startByte) + '-'}

    rx_timeout = 30 * 60
    if srvObj.getCfg().getVal("Mirroring[1].rx_timeout"):
        rx_timeout = int(srvObj.getCfg().getVal("Mirroring[1].rx_timeout"))
    response = ngamsHttpUtils.httpGet(host, int(port), 'RETRIEVE', pars=pars, hdrs=hdrs, timeout=rx_timeout)

    # can we resume a previous download?
    downloadResumeSupported = 'bytes' in response.getheader("Accept-Ranges", '')

    logger.debug("Creating path: %s", trgFilename)
    checkCreatePath(os.path.dirname(trgFilename))

    crc_info = ngamsFileUtils.get_checksum_info('crc32')
    if startByte != 0:
        logger.info("resume requested")
    if startByte != 0 and downloadResumeSupported:
        logger.info("Resume requested and mirroring source supports resume. Appending data to previously started staging file")
        crc = ngamsFileUtils.get_checksum(65536, trgFilename, 'crc32')
        reqPropsObj.setBytesReceived(startByte)
        fdOut = open(trgFilename, "a")
    else:
        if (startByte > 0):
            logger.info("Resume of download requested but server does not support it. Starting from byte 0 again.")
        fdOut = open(trgFilename, "w")
        crc = crc_info.init

    start = time.time()

    # Distinguish between Archive Pull and Push Request. By Archive
    # Pull we may simply read the file descriptor until it returns "".
    logger.info("It is an HTTP Archive Pull Request: trying to get Content-length")
    hdrs = {h[0]: h[1] for h in response.getheaders()}
    if hdrs.has_key('content-length'):
        remSize = int(hdrs['content-length'])
    else:
        logger.warning("Non Content-Lenght header found, defaulting to 1e11")
        remSize = int(1e11)

    # Receive the data.
    buf = "-"
    rdSize = blockSize

    crc_m = crc_info.method
    with contextlib.closing(response), contextlib.closing(fdOut):
        while (remSize > 0):
            if (remSize < rdSize):
                rdSize = remSize
            buf = response.read(rdSize)
            sizeRead = len(buf)
            if sizeRead == 0:
                raise Exception("server is unreachable")
            else:
                crc = crc_m(buf, crc)
                remSize -= sizeRead
                reqPropsObj.setBytesReceived(reqPropsObj.getBytesReceived() +\
                                         sizeRead)
                fdOut.write(buf)
    crc = crc_info.final(crc)

    deltaTime = time.time() - start
    msg = "Saved data in file: %s. Bytes received: %d. Time: %.3f s. " +\
          "Rate: %.2f Bytes/s"
    logger.info(msg, trgFilename, int(reqPropsObj.getBytesReceived()),
                  deltaTime, (float(reqPropsObj.getBytesReceived()) /
                              deltaTime))

    # Raise exception if less byes were received as expected.
    if (remSize != 0):
        msg = "No all expected data arrived, %d bytes left to read" % (remSize,)
        raise ngamsFailedDownloadException.FailedDownloadException(msg)

    # now check the CRC value against what we expected
    sourceChecksum = reqPropsObj.checksum
    logger.info('source checksum: %s - current checksum: %d', str(sourceChecksum), crc)
    if (crc != int(sourceChecksum)):
        msg = "checksum mismatch: source=" + str(sourceChecksum) + ", received: " + str(crc)
        raise ngamsFailedDownloadException.FailedDownloadException(msg)

    return [deltaTime,crc]