Beispiel #1
0
def read_ms(infile, verbosity=1):
    """ Convert MS to a HDF file
    :param infile:  Measurement Set path
    :return: HDU version of Measurement Set
    """
    pp = PrintLog(verbosity=verbosity)
    ms = pt.table(infile)

    # Create a HDU List for storing HDUs
    hdul = IdiHdulist(verbosity=verbosity)

    # Add each column to the main HDU
    hdu_main = table2hdu(ms, "MAIN", verbosity=verbosity, close_after=False)
    hdul["MAIN"] = hdu_main

    # Now look for other keyword tables
    for key, val in ms.getkeywords().items():
        pp.debug(val)
        if type(val) in (unicode, str):
            if val.startswith("Table: "):
                tblpath = val.strip().split("Table: ")[1]
                pp.h2("Opening %s" % key)
                t = pt.table(tblpath)
                t_hdu = table2hdu(t, key, verbosity=verbosity)
                hdul[key] = t_hdu
        else:
            hdul["MAIN"].header.vals[key] = val

    ms.close()
    return hdul
Beispiel #2
0
def table2hdu(table, hd, verbosity=1, close_after=True):
    """ Convert MS table to a Header-Data unit
    :param table: name of table (MS path)
    :param hdu: header data unit, either a string or a HDU
    :return:  HDU version of MS table
    """
    pp = PrintLog(verbosity=verbosity)

    if isinstance(hd, str):
        pp.h3("Creating %s HDU" % hd)
        hd = IdiTableHdu(name=hd)

    colnames = table.colnames()
    keywords = table.getkeywords()

    for colname in colnames:
        try:
            pp.debug("Reading col %s" % colname)
            hd.add_column(table.getcol(colname), name=colname)
        except RuntimeError:
            # This can be raised when no data is in the column
            pp.warn("Could not add %s" % colname)

    for key, val in keywords.items():
        hd.header.vals[key] = val

    if close_after:
        table.close()
    return hd
Beispiel #3
0
def create_dataset(hgroup, name, data, **kwargs):
    """ Create dataset from data, will attempt to compress

    :param hgroup: h5py group in which to add dataset
    :param name: name of dataset
    :param data: data to write
    """


    verbosity = 0
    if 'verbosity' in kwargs:
        verbosity = kwargs.pop('verbosity')

    pp = PrintLog(verbosity)

    np_types = [
            np.uint8,
            np.uint16,
            np.uint32,
            np.uint64,
            np.int8,
            np.int16,
            np.int32,
            np.int64,
            np.float16,
            np.float32,
            np.float64,
            np.complex64,
            np.complex128,
            np.void]

    np_types = set(np_types)

    #print name, str(data.dtype)
    #print data.dtype.type, data.dtype.type in np_types
    if data.dtype.type in np_types:
        pp.debug("Creating compressed %s" % name)
        dset = create_compressed(hgroup, name, data, **kwargs)
    else:
        try:
            pp.debug("Creating non-compressed %s" % name)
            dset = hgroup.create_dataset(name, data=data)
        except TypeError:
            #print name, data.dtype
            raise

    return dset
Beispiel #4
0
def table2hdu(table, hd, verbosity=1, close_after=True):
    """ Convert MS table to a Header-Data unit
    :param table: name of table (MS path)
    :param hdu: header data unit, either a string or a HDU
    :return:  HDU version of MS table
    """
    pp = PrintLog(verbosity=verbosity)

    if isinstance(hd, str):
        pp.h3("Creating %s HDU" % hd)
        hd = IdiTableHdu(name=hd)

    colnames = table.colnames()
    keywords = table.getkeywords()

    for colname in colnames:
        try:
            pp.debug("Reading col %s" % colname)
            hd.add_column(table.getcol(colname), name=colname)
        except RuntimeError:
            # This can be raised when no data is in the column
            pp.warn("Could not add %s" % colname)


    for key, val in keywords.items():
        hd.header.vals[key] = val

    if close_after:
        table.close()
    return hd
Beispiel #5
0
def read_ms(infile, verbosity=1):
    """ Convert MS to a HDF file
    :param infile:  Measurement Set path
    :return: HDU version of Measurement Set
    """
    pp = PrintLog(verbosity=verbosity)
    ms = pt.table(infile)

    # Create a HDU List for storing HDUs
    hdul = IdiHdulist(verbosity=verbosity)

    # Add each column to the main HDU
    hdu_main = table2hdu(ms, "MAIN", verbosity=verbosity, close_after=False)
    hdul["MAIN"] = hdu_main

    # Now look for other keyword tables
    for key, val in ms.getkeywords().items():
        pp.debug(val)
        if type(val) in (unicode, str):
            if val.startswith("Table: "):
                tblpath = val.strip().split("Table: ")[1]
                pp.h2("Opening %s" % key)
                t = pt.table(tblpath)
                t_hdu = table2hdu(t, key, verbosity=verbosity)
                hdul[key] = t_hdu
        else:
            hdul["MAIN"].header.vals[key] = val

    ms.close()
    return hdul
Beispiel #6
0
def export_ms(hdf_file, ms_file, verbosity=1):
    """ Convert an HDF file to MS
    :param hdf_file: Input HDF-MS filename
    :param ms_file: Output MS filename

    TODO: Get this working properly.
    """
    pp = PrintLog(verbosity=verbosity)
    hdul = IdiHdulist(verbosity=1)
    hdul.read_hdf("testms.h5")

    main_hdu = hdul["MAIN"]

    vdict = {
        'float32': 'float',
        'float64': 'double',
        'complex64': 'complex',
        'complex128': 'dcomplex',
        'int32': 'int',
        'uint32': 'uint',
        'str': 'string',
        'bool': 'bool'
    }

    col_descs = []
    for col, cdata in main_hdu.data.items():
        col = str(col)
        pp.pp("%16s %s %s" % (col, cdata.shape, cdata.dtype))

        if cdata.ndim == 1:
            vt = vdict[str(cdata.dtype)]
            cdesc = pt.makescacoldesc(col, cdata[0], valuetype=vt)
        else:
            cdesc = pt.makearrcoldesc(col, cdata[0], valuetype=vt)
        col_descs.append(cdesc)

    tdesc = pt.maketabdesc(col_descs)

    t = pt.table("table.ms", tdesc, nrow=main_hdu.n_rows)
Beispiel #7
0
def export_ms(hdf_file, ms_file, verbosity=1):
    """ Convert an HDF file to MS
    :param hdf_file: Input HDF-MS filename
    :param ms_file: Output MS filename

    TODO: Get this working properly.
    """
    pp = PrintLog(verbosity=verbosity)
    hdul = IdiHdulist(verbosity=1)
    hdul.read_hdf("testms.h5")

    main_hdu = hdul["MAIN"]

    vdict = {'float32' : 'float',
               'float64' : 'double',
               'complex64' : 'complex',
               'complex128' : 'dcomplex',
               'int32'  : 'int',
               'uint32' : 'uint',
               'str'    : 'string',
               'bool'   : 'bool'
                }

    col_descs = []
    for col, cdata in main_hdu.data.items():
        col = str(col)
        pp.pp("%16s %s %s" % (col, cdata.shape, cdata.dtype))

        if cdata.ndim == 1:
            vt = vdict[str(cdata.dtype)]
            cdesc = pt.makescacoldesc(col, cdata[0], valuetype=vt)
        else:
            cdesc = pt.makearrcoldesc(col, cdata[0], valuetype=vt)
        col_descs.append(cdesc)

    tdesc = pt.maketabdesc(col_descs)

    t = pt.table("table.ms", tdesc, nrow=main_hdu.n_rows)
Beispiel #8
0
def create_dataset(hgroup, name, data, **kwargs):
    """ Create dataset from data, will attempt to compress

    :param hgroup: h5py group in which to add dataset
    :param name: name of dataset
    :param data: data to write
    """

    verbosity = 0
    if 'verbosity' in kwargs:
        verbosity = kwargs.pop('verbosity')

    pp = PrintLog(verbosity)

    np_types = [
        np.uint8, np.uint16, np.uint32, np.uint64, np.int8, np.int16, np.int32,
        np.int64, np.float16, np.float32, np.float64, np.complex64,
        np.complex128, np.void
    ]

    np_types = set(np_types)

    #print name, str(data.dtype)
    #print data.dtype.type, data.dtype.type in np_types
    if data.dtype.type in np_types:
        pp.debug("Creating compressed %s" % name)
        dset = create_compressed(hgroup, name, data, **kwargs)
    else:
        try:
            pp.debug("Creating non-compressed %s" % name)
            dset = hgroup.create_dataset(name, data=data)
        except TypeError:
            #print name, data.dtype
            raise

    return dset
Beispiel #9
0
def convert_fits_to_hdf(args=None):
    """ Convert a FITS file to HDF5 in HDFITS format

    An input and output directory must be specified, and all files with a matching
    extension will be converted. Command line options set the compression algorithm
    and other run-time settings.
    """
    # Parse options and arguments
    parser = argparse.ArgumentParser(description='Convert FITS files to HDF5 files in HDFITS format.')
    parser.add_argument('-c', '--compression', dest='comp', type=str,
                        help='Data compression. Defaults to None, also lzf, bitshuffle, gzip')
    parser.add_argument('-x', '--extension', dest='ext', type=str, default='fits',
                        help='File extension of FITS files. Defaults to .fits')
    parser.add_argument('-v', '--verbosity', dest='verbosity', type=int, default=4,
                        help='verbosity level (default 0, up to 5)')
    parser.add_argument('-s', '--scaleoffset', dest='scale_offset', default=None,
                        help='Add scale offset')
    parser.add_argument('-S', '--shuffle', dest='shuffle', action='store_true', default=None,
                        help='Apply byte shuffle filter')
    parser.add_argument('-t', '--pytables', dest='table_type', action='store_true', default=None,
                        help='Set output tables to be PyTables TABLE class, instead of HDFITES DATA_GROUP')
    parser.add_argument('-C', '--checksum', dest='checksum', action='store_true', default=None,
                        help='Compute fletcher32 checksum on datasets.')
    parser.add_argument('dir_in', help='input directory')
    parser.add_argument('dir_out', help='output_directory')

    args = parser.parse_args()

    dir_in  = args.dir_in
    dir_out = args.dir_out

    if not os.path.exists(dir_out):
        print("Creating directory %s" % dir_out)
        os.mkdir(dir_out)

    # Form a list of keyword arguments to pass to HDF5 export
    kwargs = {}
    if args.comp is not None:
        kwargs['compression'] = args.comp
    if args.scale_offset is not None:
       kwargs['scaleoffset'] = int(args.scale_offset)
    if args.shuffle is not None:
       kwargs['shuffle'] = args.shuffle
    if args.checksum is not None:
       kwargs['fletcher32'] = args.checksum
    if args.table_type is not None:
       kwargs['table_type'] = 'TABLE'
    else:
        kwargs['table_type'] = 'DATA_GROUP'

    pp = PrintLog(verbosity=args.verbosity)
    if args.verbosity == 0:
        warnings.simplefilter("ignore")

    pp.h1("FITS2HDF")
    pp.pa("Input directory:  %s" % dir_in)
    pp.pa("Output directory: %s" % dir_out)
    pp.pa("Dataset creation arguments:")
    for key, val in kwargs.items():
        pp.pa("%16s: %s" % (key, val))

    # Create list of files to process
    filelist = os.listdir(dir_in)
    filelist = [fn for fn in filelist if fn.endswith(args.ext)]

    t_start = time.time()
    file_count = 0
    for filename in filelist:
        file_in = os.path.join(dir_in, filename)
        file_out = os.path.join(dir_out, filename.split('.' + args.ext)[0] + '.h5')

        a = IdiHdulist()
        try:
            pp.pp("\nReading  %s" % file_in)
            a = read_fits(file_in)
            pp.pp("Creating %s" % file_out)
            t1 = time.time()
            export_hdf(a, file_out, **kwargs)
            t2 = time.time()
            pp.pp("Input  filesize: %sB" % os.path.getsize(file_in))
            pp.pp("Output filesize: %sB" % os.path.getsize(file_out))
            compfact = float(os.path.getsize(file_in)) / float(os.path.getsize(file_out))
            pp.pp("Compression:     %2.2fx" % compfact)
            pp.pp("Comp/write time: %2.2fs" % (t2 - t1))

            file_count += 1

        except IOError:
            pp.err("ERROR: Cannot load %s" % file_in)

    pp.h1("\nSUMMARY")
    pp.pa("Files created: %i" % file_count)
    pp.pa("Time taken:    %2.2fs" % (time.time() - t_start))
Beispiel #10
0
def convert_hdf_to_fits(args=None):
    """ Convert a HDF5 (in HDFITS format) to a FITS file

    An input and output directory must be specified, and all files with a matching
    extension will be converted. Command line options set the run-time settings.
    """

    # Parse options and arguments
    parser = argparse.ArgumentParser(description='Convert HDF5 in HDFITS format FITS files.')
    parser.add_argument('-x', '--extension', dest='ext', type=str, default='h5',
                        help='File extension of HDFITS files. Defaults to .h5')
    parser.add_argument('-v', '--verbosity', dest='verbosity', type=int, default=4,
                        help='verbosity level (default 0, up to 5)')
    parser.add_argument('dir_in', help='input directory')
    parser.add_argument('dir_out', help='output_directory')
    args = parser.parse_args()

    dir_in  = args.dir_in
    dir_out = args.dir_out

    if not os.path.exists(dir_out):
        print("Creating directory %s" % dir_out)
        os.mkdir(dir_out)

    # Form a list of keyword arguments to pass to HDF5 export
    kwargs = {}

    pp = PrintLog(verbosity=args.verbosity)
    if args.verbosity == 0:
        warnings.simplefilter("ignore")

    pp.h1("HDF2FITS")
    pp.pa("Input directory:  %s" % dir_in)
    pp.pa("Output directory: %s" % dir_out)
    pp.pa("Dataset creation arguments:")
    for key, val in kwargs.items():
        pp.pa("%16s: %s" % (key, val))

    # Create list of files to process
    filelist = os.listdir(dir_in)
    filelist = [fn for fn in filelist if fn.endswith(args.ext)]

    t_start = time.time()
    file_count = 0
    for filename in filelist:
        file_in = os.path.join(dir_in, filename)
        file_out = os.path.join(dir_out, filename.split('.' + args.ext)[0] + '.fits')

        a = IdiHdulist()
        try:
            pp.pp("\nReading  %s" % file_in)
            a = read_hdf(file_in)
            pp.pp("Creating %s" % file_out)
            t1 = time.time()
            export_fits(a, file_out, **kwargs)
            t2 = time.time()
            pp.pp("Input  filesize: %sB" % os.path.getsize(file_in))
            pp.pp("Output filesize: %sB" % os.path.getsize(file_out))
            compfact = float(os.path.getsize(file_in)) / float(os.path.getsize(file_out))
            pp.pp("Compression:     %2.2fx" % compfact)
            pp.pp("Comp/write time: %2.2fs" % (t2 - t1))

            file_count += 1

        except IOError:
            pp.err("ERROR: Cannot load %s" % file_in)

    pp.h1("\nSUMMARY")
    pp.pa("Files created: %i" % file_count)
    pp.pa("Time taken:    %2.2fs" % (time.time() - t_start))